Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /trunk/doc/probe/paper.tex
ViewVC logotype

Annotation of /trunk/doc/probe/paper.tex

Parent Directory Parent Directory | Revision Log Revision Log


Revision 289 - (view) (download) (as text)

1 : jhr 156 \documentclass[11pt]{article}
2 :    
3 :     \input{defs}
4 :     \usepackage{graphicx}
5 :     \usepackage{listings}
6 :     \lstset{
7 : jhr 270 basicstyle=\ttfamily\footnotesize,
8 : jhr 156 keywordstyle=\bfseries,
9 :     showstringspaces=false,
10 :     }
11 : jhr 160 \lstdefinelanguage{OpenCL}[]{C}{%
12 :     morekeywords={
13 :     char2,char4,char8,char16,
14 :     uchar2,uchar4,uchar8,uchar16,
15 :     short2,short4,short8,short16,
16 :     ushort2,ushort4,ushort8,ushort16,
17 :     int2,int4,int8,int16,
18 :     uint2,uint4,uint8,uint16,
19 :     long2,long4,long8,long16,
20 :     ulong2,ulong4,ulong8,ulong16,
21 :     float2,float4,float8,float16,
22 :     ufloat2,ufloat4,ufloat8,ufloat16,
23 :     double2,double4,double8,double16,
24 :     udouble2,udouble4,udouble8,udouble16,
25 :     constant,__constant,kernel,__kernel,private,__private},
26 :     moredirectives={version},
27 :     deletekeywords={}
28 :     }
29 : jhr 156
30 :     \lstset{
31 :     language=C,
32 :     }
33 :    
34 :     \setlength{\textwidth}{6in}
35 :     \setlength{\oddsidemargin}{0.25in}
36 :     \setlength{\evensidemargin}{0.25in}
37 :     \setlength{\parskip}{5pt}
38 :    
39 :     \newcommand{\matM}{\mathbf{M}}
40 : jhr 157 \newcommand{\vecx}{\mathbf{x}}
41 : jhr 159 \newcommand{\vecp}{\mathbf{p}}
42 : jhr 156 \newcommand{\vecn}{\mathbf{n}}
43 :     \newcommand{\vecf}{\mathbf{f}}
44 :     \newcommand{\VEC}[1]{\left\langle{#1}\right\rangle}
45 :     \newcommand{\FLOOR}[1]{\left\lfloor{#1}\right\rfloor}
46 :    
47 :     \title{Compiling probe operations for Diderot}
48 :     \author{
49 : glk 289 John Reppy, Gordon Kindlmann \\
50 : jhr 156 University of Chicago \\
51 : glk 289 {\small\tt{}\{jhr|glk\}@cs.uchicago.edu} \\
52 : jhr 156 }
53 :     \date{\today}
54 :    
55 :     \begin{document}
56 :    
57 :     \maketitle
58 :     \thispagestyle{empty}
59 :    
60 :     \bibliographystyle{../common/alpha}
61 :     \bibliography{../common/strings-short,../common/manticore}
62 :    
63 :     \section{Introduction}
64 :    
65 :     This note describes the code needed to implement a probe of a field operation.
66 : jhr 270 In the discussion below, we use a number of notational conventions that are
67 :     summarized in the following table:
68 :     \begin{center}
69 : jhr 274 \begin{tabular}{cp{4in}}
70 : jhr 270 $V$ & the image data \\
71 :     $F$ & the field reconstructed from $V$ \\
72 :     $\vecp$ & the position vector in field-space \\
73 :     $\vecx$ & the position vector in image-space \\
74 : jhr 274 $\matM^{-1}$ & homogeneous matrix that represents the transformation from world space to image space \\
75 : jhr 270 $h$ & a piecewise polynomial convolution kernel \\
76 :     $s$ & the support of $h$ \\
77 :     $h_0,\,\ldots,h_{2s-1}$ & the polynomials that make up $h$ \\
78 :     \end{tabular}%
79 :     \end{center}%
80 : jhr 156
81 :     \section{Probing a 1D scalar field}
82 :     The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support
83 :     of $h$.
84 : jhr 160 The probe $F\mkw{@}p$ is computed as follows:
85 : jhr 156 \begin{eqnarray*}
86 : jhr 160 \left[\begin{array}{c} x \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} p \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\
87 :     n & = & \FLOOR{x} \qquad \text{\textit{integer part of position}} \\
88 :     f & = & x - f \qquad \text{\textit{fractional part of position}} \\
89 :     F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)}
90 : jhr 156 \end{eqnarray*}%
91 : jhr 270 Note that the coordinate system of the convolution filter is flipped (negated) from the coordinate system
92 :     of the image; this property is why we evaluate $h(f-i)$ for $V(n+i)$.
93 : jhr 156 The convolution $h$ is represented as a symmetric piecewise polynomial formed
94 : jhr 270 from $2s$ polynomials $h_1,\,\ldots,\,h_{2s}$.
95 : jhr 156 \begin{displaymath}
96 :     h(x) = \left\{\begin{array}{ll}
97 : jhr 270 0 & \text{$x \leq -s$ or $s \leq x$} \\
98 :     h_{\lfloor{}x\rfloor{}+s}(x) & \text{$-s < x < s$} \\
99 : jhr 156 \end{array}\right.
100 :     \end{displaymath}%
101 :     Thus, we can rewrite the probe computation as
102 :     \begin{displaymath}
103 : jhr 270 F\mkw{@}x = \sum_{i=1-s}^{s} {V(n+i) h_{s-i}(i - f)}
104 : jhr 156 \end{displaymath}%
105 :     \figref{fig:1d-probe} illustrates the situation for a kernel $h$ with support $s = 2$,
106 :     and \figref{fig:1d-probe-code} gives the C code for the probe operation, assuming that
107 :     $h$ is represented by third-degree polynomials.
108 :     \begin{figure}[t]
109 :     \begin{center}
110 :     \includegraphics[scale=0.8]{pictures/convo}
111 :     \end{center}%
112 :     \caption{1D scalar probe}
113 :     \label{fig:1d-probe}
114 :     \end{figure}%
115 :    
116 : jhr 279 \begin{figure}[t]
117 : jhr 156 \begin{quote}
118 : jhr 160 \lstset{language=C}
119 : jhr 156 \begin{lstlisting}
120 : jhr 270 double V[]; // image data
121 : glk 289 double h[4][4] = { // cubic B-spline ("bspln3")
122 : jhr 288 { 1.33333, 2.0, 1.0, 0.166667 }, // -2 .. -1
123 :     { 0.666667, 0.0, -1.0, -0.5 }, // -1 .. 0
124 :     { 0.666667, 0.0, -1.0, 0.5 }, // 0 .. 1
125 :     { 1.33333, -2.0, 1.0, -0.166667 }, // 1 .. 2
126 :     }
127 : jhr 156
128 : jhr 270 double probe (double p)
129 : jhr 156 {
130 : glk 289 double x = transform(p); // from world- to image-space position
131 :     double nd, f; int n;
132 : jhr 156
133 : glk 289 f = modf (x, &nd);
134 :     n = (int)nd;
135 : jhr 156
136 : jhr 160 double value = 0.0, t;
137 : jhr 156
138 : jhr 270 t = f + 1.0;
139 :     value += V[n-1] * (h[3][0] + t*(h[3][1] + t*(h[3][2] + t*h[3][3])));
140 :     t = f;
141 :     value += V[n] * (h[2][0] + t*(h[2][1] + t*(h[2][2] + t*h[2][3])));
142 : jhr 160 t = f - 1.0;
143 : jhr 270 value += V[n+1] * (h[1][0] + t*(h[1][1] + t*(h[1][2] + t*h[1][3])));
144 : jhr 160 t = f - 2.0;
145 : jhr 270 value += V[n+2] * (h[0][0] + t*(h[0][1] + t*(h[0][2] + t*h[0][3])));
146 : jhr 156
147 :     return value;
148 :     }
149 :     \end{lstlisting}%
150 :     \end{quote}%
151 : jhr 157 \caption{Computing $F\mkw{@}x$ for a 1D scalar field in C}
152 : jhr 156 \label{fig:1d-probe-code}
153 :     \end{figure}
154 : jhr 160 If we look at the four lines that define \texttt{value}, we see an opportunity for
155 :     using SIMD parallelism to speed the computation.
156 :     \figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where
157 :     we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b},
158 : jhr 270 \texttt{c}, and \texttt{d}, and put the \texttt{t} values into a vector too.
159 :     We then use a dot product to compute the sum of the products of the
160 :     convolution-filter values and the image values.
161 : jhr 279 \begin{figure}[t]
162 : jhr 156 \begin{quote}
163 : jhr 160 \lstset{language=OpenCL}
164 : jhr 156 \begin{lstlisting}
165 : jhr 270 double4 d = (double4)(h[3][0], h[2][0], h[1][0], h[0][0]); // x^0 coeffs
166 :     double4 c = (double4)(h[3][1], h[2][1], h[2][1], h[0][1]); // x^1 coeffs
167 :     double4 b = (double4)(h[3][2], h[2][2], h[1][2], h[0][2]); // x^2 coeffs
168 :     double4 a = (double4)(h[3][3], h[2][3], h[1][3], h[0][3]); // x^3 coeffs
169 : jhr 156
170 : jhr 270 double probe (double p)
171 : jhr 156 {
172 : jhr 270 double x = transform(p); // image-space position
173 : jhr 156 double n, f;
174 :    
175 : jhr 270 f = modf (x, &n);
176 : jhr 156
177 : jhr 270 double4 v = (double4)(V[n-1], V[n], V[n+1], V[n+2]);
178 :     double4 t = (double4)(f + 1.0, f, f - 1.0, f - 2.0);
179 : jhr 160 return dot(v, d + t*(c + t*(b + t*a)));
180 : jhr 156 }
181 :     \end{lstlisting}%
182 :     \end{quote}%
183 : jhr 157 \caption{Computing $F\mkw{@}x$ for a 1D scalar field in OpenCL}
184 : jhr 156 \label{fig:1d-probe-code-opencl}
185 :     \end{figure}
186 :    
187 :     \section{Probing a 3D scalar field}
188 :    
189 :     The more common case is when the field is a convolution of a scalar 3-dimensional
190 :     field ($F = V\circledast{}h$).
191 :     Let $s$ be the support of $h$.
192 : jhr 159 Then the probe $F\mkw{@}\vecp$ is computed as follows:
193 : jhr 156 \begin{eqnarray*}
194 : jhr 159 \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\
195 :     \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\
196 :     \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\
197 :     F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}}
198 : jhr 156 \end{eqnarray*}%
199 : jhr 270 We can restructure this equation to highlight the loop-invariant computations.
200 :     \begin{displaymath}
201 :     F\mkw{@}\vecp =
202 :     \sum_{i=1-s}^s \left({
203 :     \sum_{j=1-s}^s \left({
204 :     \sum_{k=1-s}^s \left({
205 :     V(\vecn+\VEC{i,j,k}) h(\vecf_z - k)
206 :     }\right)
207 :     }\right) h(\vecf_y - j)
208 :     }\right) h(\vecf_x - i)
209 :     \end{displaymath}%
210 :     Thus, we see that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D).
211 : jhr 271 \figref{fig:3d-probe-code-c} gives C code for this operation.
212 :     \begin{figure}[p]
213 : jhr 156 \begin{quote}
214 : jhr 160 \lstset{language=C}
215 : jhr 156 \begin{lstlisting}
216 :     typedef double vec3[3];
217 :    
218 : jhr 270 double V[][][]; // image data
219 : jhr 288 double h[4][4]; // bslpn3 (as before)
220 : glk 289 const int s = 2; // kernel support
221 : jhr 156
222 : jhr 272 double probe (vec3 p)
223 : jhr 270 {
224 : jhr 272 double x[3] = transform(p); // image-space position
225 : glk 289 double nxd, nyd, nzd, fx, fy, fz;
226 :     int nx, ny, nz, i, j, k;
227 :    
228 :     fx = modf (x[0], &nxd); nx = (int)nxd;
229 :     fy = modf (x[1], &nyd); ny = (int)nyd;
230 :     fz = modf (x[2], &nzd); nz = (int)nzd;
231 : jhr 156
232 : jhr 270 // compute kernel values for each axis
233 :     double hx[4], hy[4], hz[4];
234 : glk 289 for (i = 1-s; i <= s; i++) {
235 : jhr 271 double t;
236 :     t = fx - i;
237 :     hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
238 :     t = fy - i;
239 :     hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
240 :     t = fz - i;
241 :     hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
242 : jhr 270 }
243 :    
244 : glk 289 double vv = 0.0;
245 :     for (i = 1-s; i <= s; i++) {
246 :     for (j = 1-s; j <= s; j++) {
247 :     for (k = 1-s; k <= s; k++) {
248 :     vv += V[nx+i][ny+j][nz+k]*hz[k+s-1]*hy[j+s-1]*hx[i+s-1];
249 : jhr 270 }
250 :     }
251 :     }
252 :    
253 : glk 289 return vv;
254 : jhr 156 }
255 :     \end{lstlisting}%
256 :     \end{quote}%
257 : jhr 157 \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in C}
258 : jhr 271 \label{fig:3d-probe-code-c}
259 : jhr 156 \end{figure}
260 :    
261 : jhr 272 \begin{figure}[p]
262 :     \begin{quote}
263 : jhr 273 \lstset{language=OpenCL}
264 : jhr 272 \begin{lstlisting}
265 :     double V[][][]; // image data
266 :     double h[4][4]; // kernel
267 :     const int s = 2; // kernel support
268 :    
269 :     double probe (double4 p)
270 :     {
271 :     double4 x = transform(p); // image-space position
272 :     double4 n, f
273 :    
274 :     f = modf (x, &n);
275 :    
276 :     // compute kernel values for each axis
277 :     double4 t, hx, hy, hz;
278 :     t = (double4)(f.x +1, f.x, f.x - 1, f.x - 2);
279 : jhr 276 hx = d + t*(c + t*(b + t*a));
280 : jhr 272 t = (double4)(f.y +1, f.y, f.y - 1, f.y - 2);
281 : jhr 276 hy = d + t*(c + t*(b + t*a));
282 : jhr 272 t = (double4)(f.z +1, f.z, f.z - 1, f.z - 2);
283 : jhr 276 hz = d + t*(c + t*(b + t*a));
284 : jhr 272
285 : jhr 274 double vy[4], vz[4];
286 : jhr 275 for (int i = 1-s; i <= s; i++) {
287 :     for (int j = 1-s; j <= s; j++) {
288 : jhr 272 double4 v = (double4) (
289 :     V[nx+i][ny+j][nz-1],
290 :     V[nx+i][ny+j][nz],
291 :     V[nx+i][ny+j][nz+1],
292 :     V[nx+i][ny+j][nz+2]);
293 : jhr 274 vz[j+s-1] += dot(v, hz);
294 : jhr 272 }
295 : jhr 274 vy[i+s-1] = dot((double4)(vz[0], vz[1], vz[2], vz[3]), hy);
296 : jhr 272 }
297 :    
298 : jhr 274 return dot((double4)(vy[0], vy[1], vy[2], vy[3]), hx);
299 : jhr 272 }
300 :     \end{lstlisting}%
301 :     \end{quote}%
302 :     \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in OpenCL}
303 :     \label{fig:3d-probe-code-opencl}
304 :     \end{figure}
305 :    
306 : jhr 159 \section{Probing a 3D derivative field}
307 :     We next consider the case of probing the derivative of a scalar field $F = V\circledast{}h$, where $s$ is the support
308 :     of $h$.
309 : jhr 279 The probe $(\nabla F)\mkw{@}\vecp$ produces a vector result as follows:
310 : jhr 159 \begin{eqnarray*}
311 :     \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\
312 :     \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\
313 :     \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\
314 : jhr 279 (\nabla F)\mkw{@}\vecp & = & \left[\begin{array}{c}
315 : jhr 159 \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h'(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}} \\
316 :     \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h'(\vecf_y - j) h(\vecf_z - k)}}} \\
317 :     \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h'(\vecf_z - k)}}} \\
318 :     \end{array}\right]
319 :     \end{eqnarray*}%
320 :    
321 : jhr 279 \begin{figure}[p]
322 :     \begin{quote}
323 :     \lstset{language=C}
324 :     \begin{lstlisting}
325 :     double probe (vec3 p)
326 :     {
327 :     double x[3] = transform(p); // image-space position
328 :     double nx, ny, nz, fx, fy, fz;
329 :    
330 :     fx = modf (x[0], &nx);
331 :     fy = modf (x[1], &ny);
332 :     fz = modf (x[2], &nz);
333 :    
334 :     // compute kernel values for each axis
335 :     double hx[4], dhx[4], hy[4], dhy[4], hz[4], dhz[4];
336 :     for (int i = 1-s; i < s; i++) {
337 :     double t;
338 :     t = fx - i;
339 :     hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
340 :     dhx[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
341 :     t = fy - i;
342 :     hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
343 :     dhy[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
344 :     t = fz - i;
345 :     hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
346 :     dhz[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
347 :     }
348 :    
349 :     vec3 vx = {0, 0, 0};
350 :     for (int i = 1-s; i <= s; i++) {
351 :     vec3 vy = {0, 0, 0};
352 :     for (int j = 1-s; j <= s; j++) {
353 :     vec3 vz = {0, 0, 0};
354 :     for (int k = 1-s; k <= s; k++) {
355 :     double v = V[nx+i][ny+j][nz+k];
356 :     vz[0] = v * hz[k+s-1];
357 :     vz[1] = vz[0];
358 :     vz[2] = v * dhz[k+s-1];
359 :     }
360 :     vy[0] += vz[0] * hy[j+s-1];
361 :     vy[1] += vz[1] * dhy[j+s-1];
362 :     vy[2] += vz[2] * hy[j+s-1];
363 :     }
364 :     vx[0] += vx[0] * dhx[i+s-1];
365 :     vx[1] += vx[1] * hx[i+s-1];
366 :     vx[2] += vx[2] * hx[i+s-1];
367 :     }
368 :    
369 :     return vx;
370 :     }
371 :     \end{lstlisting}%
372 :     \end{quote}%
373 :     \caption{Computing $(\nabla F)\mkw{@}\vecx$ for a 3D scalar field in C}
374 :     \label{fig:3d-deriv-probe-code-c}
375 :     \end{figure}
376 :    
377 : jhr 156 \end{document}

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0