SCM Repository
View of /branches/pure-cfg/doc/probe/paper.tex
Parent Directory
|
Revision Log
Revision 629 -
(download)
(as text)
(annotate)
Wed Mar 16 23:14:00 2011 UTC (11 years, 3 months ago) by jhr
File size: 15688 byte(s)
Wed Mar 16 23:14:00 2011 UTC (11 years, 3 months ago) by jhr
File size: 15688 byte(s)
Fixed typos
\documentclass[11pt]{article} \input{defs} \usepackage{graphicx} \usepackage{listings} \lstset{ basicstyle=\ttfamily\footnotesize, keywordstyle=\bfseries, showstringspaces=false, } \lstdefinelanguage{OpenCL}[]{C}{% morekeywords={ char2,char4,char8,char16, uchar2,uchar4,uchar8,uchar16, short2,short4,short8,short16, ushort2,ushort4,ushort8,ushort16, int2,int4,int8,int16, uint2,uint4,uint8,uint16, long2,long4,long8,long16, ulong2,ulong4,ulong8,ulong16, float2,float4,float8,float16, ufloat2,ufloat4,ufloat8,ufloat16, double2,double4,double8,double16, udouble2,udouble4,udouble8,udouble16, constant,__constant,kernel,__kernel,private,__private}, moredirectives={version}, deletekeywords={} } \lstset{ language=C, } \setlength{\textwidth}{6in} \setlength{\oddsidemargin}{0.25in} \setlength{\evensidemargin}{0.25in} \setlength{\parskip}{5pt} \newcommand{\matM}{\mathbf{M}} \newcommand{\vecx}{\mathbf{x}} \newcommand{\vecp}{\mathbf{p}} \newcommand{\vecn}{\mathbf{n}} \newcommand{\vecf}{\mathbf{f}} \newcommand{\VEC}[1]{\left\langle{#1}\right\rangle} \newcommand{\FLOOR}[1]{\left\lfloor{#1}\right\rfloor} \title{Compiling probe operations for Diderot} \author{ John Reppy, Gordon Kindlmann \\ University of Chicago \\ {\small\tt{}\{jhr|glk\}@cs.uchicago.edu} \\ } \date{\today} \begin{document} \maketitle \thispagestyle{empty} \section{Introduction} This note describes the code needed to implement a probe of a field operation. In the discussion below, we use a number of notational conventions that are summarized in the following table: \begin{center} \begin{tabular}{cp{4in}} $V$ & the image data \\ $F$ & the field reconstructed from $V$ \\ $\vecp$ & the position vector in field-space \\ $\vecx$ & the position vector in image-space \\ $\matM^{-1}$ & homogeneous matrix that represents the transformation from world space to image space \\ $h$ & a piecewise polynomial convolution kernel \\ $s$ & the support of $h$ \\ $h_0,\,\ldots,h_{2s-1}$ & the polynomials that make up $h$ \\ \end{tabular}% \end{center}% \section{Probing a 1D scalar field} The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support of $h$. The probe $F\mkw{@}p$ is computed as follows: \begin{eqnarray*} \left[\begin{array}{c} x \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} p \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\ n & = & \FLOOR{x} \qquad \text{\textit{integer part of position}} \\ f & = & x - n \qquad \text{\textit{fractional part of position}} \\ F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)} \end{eqnarray*}% Note that the coordinate system of the convolution filter is flipped (negated) from the coordinate system of the image; this property is why we evaluate $h(f-i)$ for $V(n+i)$. The convolution $h$ is represented as a symmetric piecewise polynomial formed from $2s$ polynomials $h_0,\,\ldots,\,h_{2s-1}$. \begin{displaymath} h(x) = \left\{\begin{array}{ll} 0 & \text{$x \leq -s$ or $s \leq x$} \\ h_{\lfloor{}x\rfloor{}+s}(x) & \text{$-s < x < s$} \\ \end{array}\right. \end{displaymath}% Thus, we can rewrite the probe computation as \begin{displaymath} F\mkw{@}x = \sum_{i=1-s}^{s} {V(n+i) h_{s-i}(f - i)} \end{displaymath}% \figref{fig:1d-probe} illustrates the situation for a kernel $h$ with support $s = 2$, and \figref{fig:1d-probe-code} gives the C code for the probe operation, assuming that $h$ is represented by third-degree polynomials. \begin{figure}[t] \begin{center} \includegraphics[scale=0.8]{pictures/convo} \end{center}% \caption{1D scalar probe} \label{fig:1d-probe} \end{figure}% \begin{figure}[t] \begin{quote} \lstset{language=C} \begin{lstlisting} double V[]; // image data double h[4][4] = { // cubic B-spline ("bspln3") { 1.33333, 2.0, 1.0, 0.166667 }, // -2 .. -1 { 0.666667, 0.0, -1.0, -0.5 }, // -1 .. 0 { 0.666667, 0.0, -1.0, 0.5 }, // 0 .. 1 { 1.33333, -2.0, 1.0, -0.166667 }, // 1 .. 2 } double probe (double p) { double x = transform(p); // from world- to image-space position double nd, f; int n; f = modf (x, &nd); n = (int)nd; double value = 0.0, t; t = f + 1.0; value += V[n-1] * (h[3][0] + t*(h[3][1] + t*(h[3][2] + t*h[3][3]))); t = f; value += V[n] * (h[2][0] + t*(h[2][1] + t*(h[2][2] + t*h[2][3]))); t = f - 1.0; value += V[n+1] * (h[1][0] + t*(h[1][1] + t*(h[1][2] + t*h[1][3]))); t = f - 2.0; value += V[n+2] * (h[0][0] + t*(h[0][1] + t*(h[0][2] + t*h[0][3]))); return value; } \end{lstlisting}% \end{quote}% \caption{Computing $F\mkw{@}x$ for a 1D scalar field in C} \label{fig:1d-probe-code} \end{figure} If we look at the four lines that define \texttt{value}, we see an opportunity for using SIMD parallelism to speed the computation. \figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b}, \texttt{c}, and \texttt{d}, and put the \texttt{t} values into a vector too. We then use a dot product to compute the sum of the products of the convolution-filter values and the image values. \begin{figure}[t] \begin{quote} \lstset{language=OpenCL} \begin{lstlisting} double4 d = (double4)(h[3][0], h[2][0], h[1][0], h[0][0]); // x^0 coeffs double4 c = (double4)(h[3][1], h[2][1], h[2][1], h[0][1]); // x^1 coeffs double4 b = (double4)(h[3][2], h[2][2], h[1][2], h[0][2]); // x^2 coeffs double4 a = (double4)(h[3][3], h[2][3], h[1][3], h[0][3]); // x^3 coeffs double probe (double p) { double x = transform(p); // image-space position double n, f; f = modf (x, &n); double4 v = (double4)(V[n-1], V[n], V[n+1], V[n+2]); double4 t = (double4)(f + 1.0, f, f - 1.0, f - 2.0); return dot(v, d + t*(c + t*(b + t*a))); } \end{lstlisting}% \end{quote}% \caption{Computing $F\mkw{@}x$ for a 1D scalar field in OpenCL} \label{fig:1d-probe-code-opencl} \end{figure} \section{Probing a 3D scalar field} The more common case is when the field is a convolution of a scalar 3-dimensional field ($F = V\circledast{}h$). Let $s$ be the support of $h$. Then the probe $F\mkw{@}\vecp$ is computed as follows: \begin{eqnarray*} \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\ \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\ \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\ F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}} \end{eqnarray*}% We can restructure this equation to highlight the loop-invariant computations.\footnote{ This restructuring assumes that the $Z$-axis of the data is the ``fastest.'' } \begin{displaymath} F\mkw{@}\vecp = \sum_{i=1-s}^s \left({ \sum_{j=1-s}^s \left({ \sum_{k=1-s}^s \left({ V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) }\right) }\right) h(\vecf_y - j) }\right) h(\vecf_x - i) \end{displaymath}% Thus, we see that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D). \figref{fig:3d-probe-code-c} gives C code for this operation. \begin{figure}[p] \begin{quote} \lstset{language=C} \begin{lstlisting} typedef double vec3[3]; double V[][][]; // image data double h[4][4]; // bslpn3 (as before) const int s = 2; // kernel support double probe (vec3 p) { double x[3] = transform(p); // image-space position double nxd, nyd, nzd, fx, fy, fz; int nx, ny, nz; fx = modf (x[0], &nxd); nx = (int)nxd; fy = modf (x[1], &nyd); ny = (int)nyd; fz = modf (x[2], &nzd); nz = (int)nzd; // compute kernel values for each axis double hx[4], hy[4], hz[4]; for (int i = 1-s; i <= s; i++) { double t; t = fx - i; hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); t = fy - i; hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); t = fz - i; hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); } float vx = 0.0; for (int i = 1-s; i <= s; i++) { float vy = 0.0; for (int j = 1-s; j <= s; j++) { float vz = 0.0; for (int k = 1-s; k <= s; k++) { vz += V[nx+i][ny+j][nz+k] * hz[k+s-1]; } vy += vz * hy[j+s-1]; } vx += vy * hx[i+s-1]; } return vx; } \end{lstlisting}% \end{quote}% \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in C} \label{fig:3d-probe-code-c} \end{figure} \begin{figure}[p] \begin{quote} \lstset{language=OpenCL} \begin{lstlisting} double V[][][]; // image data double4 a, b, c, d; // kernel coefficients const int s = 2; // kernel support double probe (double4 p) { double4 x = transform(p); // image-space position double4 nd, f int4 n; f = modf (x, &nd); n = convert_int4(nd); // compute kernel values for each axis double4 t, hx, hy, hz; t = (double4)(f.x + 1, f.x, f.x - 1, f.x - 2); hx = d + t*(c + t*(b + t*a)); t = (double4)(f.y + 1, f.y, f.y - 1, f.y - 2); hy = d + t*(c + t*(b + t*a)); t = (double4)(f.z + 1, f.z, f.z - 1, f.z - 2); hz = d + t*(c + t*(b + t*a)); double vy[4], vz[4]; for (int i = 1-s; i <= s; i++) { for (int j = 1-s; j <= s; j++) { double4 v = (double4) ( V[n.x+i][n.y+j][n.z-1], V[n.x+i][n.y+j][n.z], V[n.x+i][n.y+j][n.z+1], V[n.x+i][n.y+j][n.z+2]); vz[j+s-1] += dot(v, hz); } vy[i+s-1] = dot((double4)(vz[0], vz[1], vz[2], vz[3]), hy); } return dot((double4)(vy[0], vy[1], vy[2], vy[3]), hx); } \end{lstlisting}% \end{quote}% \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in OpenCL} \label{fig:3d-probe-code-opencl} \end{figure} \section{Generalizing to derivative fields} To generalize the probe operation to work on derivative fields (\eg{}, $(\nabla^k F)\mkw{@}\vecp$), we represent the $\nabla^k$ operation as a $k$-order tensor of partial derivative operators. To simplify the presentation, we will restrict the discussion to $3$-dimensional space, but it easily generalizes. In 3D, a partial-derivative operator has the form \begin{displaymath} \frac{\partial}{\partial{}x^i \partial{}y^j \partial{}z^k} \end{displaymath}% where $i$, $j$, and $k$ are the number of levels of differentiation in the $x$, $y$, and $z$-axes (respectively). The normal convention is to omit explicit mention of axes with zero levels of differentiation, but we will be explicit here. We use $\mathbf{Id}^n$ to represent the identity operator in $n$-dimensions (i.e., the one with zero=-levels of differentiation in all axes). The product of two partial-derivative operators is formed by summing the exponents; for example in 3D we have \begin{displaymath} \frac{\partial}{\partial{}x^i \partial{}y^j \partial{}z^k} \frac{\partial}{\partial{}x^{i'} \partial{}y^{j'} \partial{}z^{k'}} = \frac{\partial}{\partial{}x^{i+i'} \partial{}y^{j+j'} \partial{}z^{k+k'}} \end{displaymath}% The last bit of notation that we need is the $n$-vector of partial-derivative operators $\delta^n$, in which the $i$th element has one level of differentiation in the $i$th axis (and zero in all other axes). For example, \begin{displaymath} \delta^3 = \left[ \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^0} \quad \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^0} \quad \frac{\partial}{\partial{}x^0 \partial{}y^0 \partial{}z^1} \right] \end{displaymath}% For $k > 0$ levels of differentiation in $n$-dimensional space, we have \begin{displaymath} \nabla^k = \left\{\begin{array}{ll} \mathbf{Id}^n & \text{when $k = 0$} \\ \delta^n \otimes \nabla^{k-1} & \text{when $k > 0$} \\ \end{array}\right. \end{displaymath}% Consider, for example, the case where $k = 2$. \begin{displaymath} \nabla^2 = \delta^3 \otimes \delta^3 = \left[\begin{array}{ccc} \frac{\partial}{\partial{}x^2 \partial{}y^0 \partial{}z^0} & \frac{\partial}{\partial{}x^1 \partial{}y^1 \partial{}z^0} & \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^1} \\ \frac{\partial}{\partial{}x^1 \partial{}y^1 \partial{}z^0} & \frac{\partial}{\partial{}x^0 \partial{}y^2 \partial{}z^0} & \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^1} \\ \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^1} & \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^1} & \frac{\partial}{\partial{}x^0 \partial{}y^0 \partial{}z^2} \\ \end{array}\right] \end{displaymath}% When we have a probe of a derivative field $(\nabla^k F)\mkw{@}\vecp$, we will generate code for each partial derivative operator $\frac{\partial}{\partial{}x^i\partial{}y^j\partial{}z^k}$ separately. If $F = V\circledast{}h$, then we will use $h^i(x) h^j(y) h^k(z)$ as the interpolation coefficients, where $h^i$ is the $i$th derivative of $h$. \section{Probing a 3D derivative field} We next consider the case of probing the derivative of a scalar field $F = V\circledast{}h$, where $s$ is the support of $h$. The probe $(\nabla F)\mkw{@}\vecp$ produces a vector result as follows: \begin{displaymath} (\nabla F)\mkw{@}\vecp = \left[\begin{array}{c} \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) h(\vecf_y - j) h'(\vecf_x - i)}}} \\ \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) h'(\vecf_y - j) h(\vecf_x - i)}}} \\ \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h'(\vecf_z - k) h(\vecf_y - j) h(\vecf_x - i)}}} \\ \end{array}\right] \end{displaymath}% where \begin{eqnarray*} \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\ \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\ \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \end{eqnarray*}% \begin{figure}[p] \begin{quote} \lstset{language=C} \begin{lstlisting} vec3 probe (vec3 p) { double x[3] = transform(p); // image-space position double nxd, nyd, nzd, fx, fy, fz; int nx, ny, nz; fx = modf (x[0], &nxd); nx = (int)nxd; fy = modf (x[1], &nyd); ny = (int)nyd; fz = modf (x[2], &nzd); nz = (int)nzd; // compute kernel values for each axis double hx[4], dhx[4], hy[4], dhy[4], hz[4], dhz[4]; for (int i = 1-s; i <= s; i++) { double t; t = fx - i; hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); dhx[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]); t = fy - i; hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); dhy[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]); t = fz - i; hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); dhz[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]); } vec3 vx = {0, 0, 0}; for (int i = 1-s; i <= s; i++) { vec3 vy = {0, 0, 0}; for (int j = 1-s; j <= s; j++) { vec3 vz = {0, 0, 0}; for (int k = 1-s; k <= s; k++) { double v = V[nx+i][ny+j][nz+k]; vz[0] = v * hz[k+s-1]; vz[1] = v * hz[k+s-1]; vz[2] = v * dhz[k+s-1]; } vy[0] += vz[0] * hy[j+s-1]; vy[1] += vz[1] * dhy[j+s-1]; vy[2] += vz[2] * hy[j+s-1]; } vx[0] += vy[0] * dhx[i+s-1]; vx[1] += vy[1] * hx[i+s-1]; vx[2] += vy[2] * hx[i+s-1]; } return vx; } \end{lstlisting}% \end{quote}% \caption{Computing $(\nabla F)\mkw{@}\vecx$ for a 3D scalar field in C} \label{fig:3d-deriv-probe-code-c} \end{figure} \bibliographystyle{../common/alpha} \bibliography{../common/strings-short,../common/manticore} \end{document}
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |