Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /trunk/doc/probe/paper.tex
ViewVC logotype

Annotation of /trunk/doc/probe/paper.tex

Parent Directory Parent Directory | Revision Log Revision Log


Revision 288 - (view) (download) (as text)

1 : jhr 156 \documentclass[11pt]{article}
2 :    
3 :     \input{defs}
4 :     \usepackage{graphicx}
5 :     \usepackage{listings}
6 :     \lstset{
7 : jhr 270 basicstyle=\ttfamily\footnotesize,
8 : jhr 156 keywordstyle=\bfseries,
9 :     showstringspaces=false,
10 :     }
11 : jhr 160 \lstdefinelanguage{OpenCL}[]{C}{%
12 :     morekeywords={
13 :     char2,char4,char8,char16,
14 :     uchar2,uchar4,uchar8,uchar16,
15 :     short2,short4,short8,short16,
16 :     ushort2,ushort4,ushort8,ushort16,
17 :     int2,int4,int8,int16,
18 :     uint2,uint4,uint8,uint16,
19 :     long2,long4,long8,long16,
20 :     ulong2,ulong4,ulong8,ulong16,
21 :     float2,float4,float8,float16,
22 :     ufloat2,ufloat4,ufloat8,ufloat16,
23 :     double2,double4,double8,double16,
24 :     udouble2,udouble4,udouble8,udouble16,
25 :     constant,__constant,kernel,__kernel,private,__private},
26 :     moredirectives={version},
27 :     deletekeywords={}
28 :     }
29 : jhr 156
30 :     \lstset{
31 :     language=C,
32 :     }
33 :    
34 :     \setlength{\textwidth}{6in}
35 :     \setlength{\oddsidemargin}{0.25in}
36 :     \setlength{\evensidemargin}{0.25in}
37 :     \setlength{\parskip}{5pt}
38 :    
39 :     \newcommand{\matM}{\mathbf{M}}
40 : jhr 157 \newcommand{\vecx}{\mathbf{x}}
41 : jhr 159 \newcommand{\vecp}{\mathbf{p}}
42 : jhr 156 \newcommand{\vecn}{\mathbf{n}}
43 :     \newcommand{\vecf}{\mathbf{f}}
44 :     \newcommand{\VEC}[1]{\left\langle{#1}\right\rangle}
45 :     \newcommand{\FLOOR}[1]{\left\lfloor{#1}\right\rfloor}
46 :    
47 :     \title{Compiling probe operations for Diderot}
48 :     \author{
49 :     John Reppy \\
50 :     University of Chicago \\
51 :     {\small\tt{}jhr@cs.uchicago.edu} \\
52 :     }
53 :     \date{\today}
54 :    
55 :     \begin{document}
56 :    
57 :     \maketitle
58 :     \thispagestyle{empty}
59 :    
60 :     \bibliographystyle{../common/alpha}
61 :     \bibliography{../common/strings-short,../common/manticore}
62 :    
63 :     \section{Introduction}
64 :    
65 :     This note describes the code needed to implement a probe of a field operation.
66 : jhr 270 In the discussion below, we use a number of notational conventions that are
67 :     summarized in the following table:
68 :     \begin{center}
69 : jhr 274 \begin{tabular}{cp{4in}}
70 : jhr 270 $V$ & the image data \\
71 :     $F$ & the field reconstructed from $V$ \\
72 :     $\vecp$ & the position vector in field-space \\
73 :     $\vecx$ & the position vector in image-space \\
74 : jhr 274 $\matM^{-1}$ & homogeneous matrix that represents the transformation from world space to image space \\
75 : jhr 270 $h$ & a piecewise polynomial convolution kernel \\
76 :     $s$ & the support of $h$ \\
77 :     $h_0,\,\ldots,h_{2s-1}$ & the polynomials that make up $h$ \\
78 :     \end{tabular}%
79 :     \end{center}%
80 : jhr 156
81 :     \section{Probing a 1D scalar field}
82 :     The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support
83 :     of $h$.
84 : jhr 160 The probe $F\mkw{@}p$ is computed as follows:
85 : jhr 156 \begin{eqnarray*}
86 : jhr 160 \left[\begin{array}{c} x \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} p \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\
87 :     n & = & \FLOOR{x} \qquad \text{\textit{integer part of position}} \\
88 :     f & = & x - f \qquad \text{\textit{fractional part of position}} \\
89 :     F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)}
90 : jhr 156 \end{eqnarray*}%
91 : jhr 270 Note that the coordinate system of the convolution filter is flipped (negated) from the coordinate system
92 :     of the image; this property is why we evaluate $h(f-i)$ for $V(n+i)$.
93 : jhr 156 The convolution $h$ is represented as a symmetric piecewise polynomial formed
94 : jhr 270 from $2s$ polynomials $h_1,\,\ldots,\,h_{2s}$.
95 : jhr 156 \begin{displaymath}
96 :     h(x) = \left\{\begin{array}{ll}
97 : jhr 270 0 & \text{$x \leq -s$ or $s \leq x$} \\
98 :     h_{\lfloor{}x\rfloor{}+s}(x) & \text{$-s < x < s$} \\
99 : jhr 156 \end{array}\right.
100 :     \end{displaymath}%
101 :     Thus, we can rewrite the probe computation as
102 :     \begin{displaymath}
103 : jhr 270 F\mkw{@}x = \sum_{i=1-s}^{s} {V(n+i) h_{s-i}(i - f)}
104 : jhr 156 \end{displaymath}%
105 :     \figref{fig:1d-probe} illustrates the situation for a kernel $h$ with support $s = 2$,
106 :     and \figref{fig:1d-probe-code} gives the C code for the probe operation, assuming that
107 :     $h$ is represented by third-degree polynomials.
108 :     \begin{figure}[t]
109 :     \begin{center}
110 :     \includegraphics[scale=0.8]{pictures/convo}
111 :     \end{center}%
112 :     \caption{1D scalar probe}
113 :     \label{fig:1d-probe}
114 :     \end{figure}%
115 :    
116 : jhr 279 \begin{figure}[t]
117 : jhr 156 \begin{quote}
118 : jhr 160 \lstset{language=C}
119 : jhr 156 \begin{lstlisting}
120 : jhr 270 double V[]; // image data
121 : jhr 288 double h[4][4] = { // bspln3
122 :     { 1.33333, 2.0, 1.0, 0.166667 }, // -2 .. -1
123 :     { 0.666667, 0.0, -1.0, -0.5 }, // -1 .. 0
124 :     { 0.666667, 0.0, -1.0, 0.5 }, // 0 .. 1
125 :     { 1.33333, -2.0, 1.0, -0.166667 }, // 1 .. 2
126 :     }
127 : jhr 156
128 : jhr 270 double probe (double p)
129 : jhr 156 {
130 : jhr 270 double x = transform(p); // image-space position
131 : jhr 156 double n, f;
132 :    
133 : jhr 270 f = modf (x, &n);
134 : jhr 156
135 : jhr 160 double value = 0.0, t;
136 : jhr 156
137 : jhr 270 t = f + 1.0;
138 :     value += V[n-1] * (h[3][0] + t*(h[3][1] + t*(h[3][2] + t*h[3][3])));
139 :     t = f;
140 :     value += V[n] * (h[2][0] + t*(h[2][1] + t*(h[2][2] + t*h[2][3])));
141 : jhr 160 t = f - 1.0;
142 : jhr 270 value += V[n+1] * (h[1][0] + t*(h[1][1] + t*(h[1][2] + t*h[1][3])));
143 : jhr 160 t = f - 2.0;
144 : jhr 270 value += V[n+2] * (h[0][0] + t*(h[0][1] + t*(h[0][2] + t*h[0][3])));
145 : jhr 156
146 :     return value;
147 :     }
148 :     \end{lstlisting}%
149 :     \end{quote}%
150 : jhr 157 \caption{Computing $F\mkw{@}x$ for a 1D scalar field in C}
151 : jhr 156 \label{fig:1d-probe-code}
152 :     \end{figure}
153 : jhr 160 If we look at the four lines that define \texttt{value}, we see an opportunity for
154 :     using SIMD parallelism to speed the computation.
155 :     \figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where
156 :     we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b},
157 : jhr 270 \texttt{c}, and \texttt{d}, and put the \texttt{t} values into a vector too.
158 :     We then use a dot product to compute the sum of the products of the
159 :     convolution-filter values and the image values.
160 : jhr 279 \begin{figure}[t]
161 : jhr 156 \begin{quote}
162 : jhr 160 \lstset{language=OpenCL}
163 : jhr 156 \begin{lstlisting}
164 : jhr 270 double4 d = (double4)(h[3][0], h[2][0], h[1][0], h[0][0]); // x^0 coeffs
165 :     double4 c = (double4)(h[3][1], h[2][1], h[2][1], h[0][1]); // x^1 coeffs
166 :     double4 b = (double4)(h[3][2], h[2][2], h[1][2], h[0][2]); // x^2 coeffs
167 :     double4 a = (double4)(h[3][3], h[2][3], h[1][3], h[0][3]); // x^3 coeffs
168 : jhr 156
169 : jhr 270 double probe (double p)
170 : jhr 156 {
171 : jhr 270 double x = transform(p); // image-space position
172 : jhr 156 double n, f;
173 :    
174 : jhr 270 f = modf (x, &n);
175 : jhr 156
176 : jhr 270 double4 v = (double4)(V[n-1], V[n], V[n+1], V[n+2]);
177 :     double4 t = (double4)(f + 1.0, f, f - 1.0, f - 2.0);
178 : jhr 160 return dot(v, d + t*(c + t*(b + t*a)));
179 : jhr 156 }
180 :     \end{lstlisting}%
181 :     \end{quote}%
182 : jhr 157 \caption{Computing $F\mkw{@}x$ for a 1D scalar field in OpenCL}
183 : jhr 156 \label{fig:1d-probe-code-opencl}
184 :     \end{figure}
185 :    
186 :     \section{Probing a 3D scalar field}
187 :    
188 :     The more common case is when the field is a convolution of a scalar 3-dimensional
189 :     field ($F = V\circledast{}h$).
190 :     Let $s$ be the support of $h$.
191 : jhr 159 Then the probe $F\mkw{@}\vecp$ is computed as follows:
192 : jhr 156 \begin{eqnarray*}
193 : jhr 159 \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\
194 :     \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\
195 :     \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\
196 :     F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}}
197 : jhr 156 \end{eqnarray*}%
198 : jhr 270 We can restructure this equation to highlight the loop-invariant computations.
199 :     \begin{displaymath}
200 :     F\mkw{@}\vecp =
201 :     \sum_{i=1-s}^s \left({
202 :     \sum_{j=1-s}^s \left({
203 :     \sum_{k=1-s}^s \left({
204 :     V(\vecn+\VEC{i,j,k}) h(\vecf_z - k)
205 :     }\right)
206 :     }\right) h(\vecf_y - j)
207 :     }\right) h(\vecf_x - i)
208 :     \end{displaymath}%
209 :     Thus, we see that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D).
210 : jhr 271 \figref{fig:3d-probe-code-c} gives C code for this operation.
211 :     \begin{figure}[p]
212 : jhr 156 \begin{quote}
213 : jhr 160 \lstset{language=C}
214 : jhr 156 \begin{lstlisting}
215 :     typedef double vec3[3];
216 :    
217 : jhr 270 double V[][][]; // image data
218 : jhr 288 double h[4][4]; // bslpn3 (as before)
219 :     const int s = 2; // kernel support
220 : jhr 156
221 : jhr 272 double probe (vec3 p)
222 : jhr 270 {
223 : jhr 272 double x[3] = transform(p); // image-space position
224 : jhr 270 double nx, ny, nz, fx, fy, fz;
225 : jhr 156
226 : jhr 271 fx = modf (x[0], &nx);
227 :     fy = modf (x[1], &ny);
228 :     fz = modf (x[2], &nz);
229 :    
230 : jhr 270 // compute kernel values for each axis
231 :     double hx[4], hy[4], hz[4];
232 : jhr 271 for (int i = 1-s; i < s; i++) {
233 :     double t;
234 :     t = fx - i;
235 :     hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
236 :     t = fy - i;
237 :     hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
238 :     t = fz - i;
239 :     hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
240 : jhr 270 }
241 :    
242 : jhr 279 double vx = 0.0;
243 : jhr 275 for (int i = 1-s; i <= s; i++) {
244 : jhr 279 double vy = 0.0;
245 : jhr 275 for (int j = 1-s; j <= s; j++) {
246 : jhr 270 double vz = 0.0;
247 : jhr 275 for (int k = 1-s; k <= s; k++) {
248 : jhr 270 vz += V[nx+i][ny+j][nz+k] * hz[k+s-1];
249 :     }
250 :     vy += vz * hy[j+s-1];
251 :     }
252 : jhr 271 vx += vx * hx[i+s-1];
253 : jhr 270 }
254 :    
255 :     return vx;
256 : jhr 156 }
257 :     \end{lstlisting}%
258 :     \end{quote}%
259 : jhr 157 \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in C}
260 : jhr 271 \label{fig:3d-probe-code-c}
261 : jhr 156 \end{figure}
262 :    
263 : jhr 272 \begin{figure}[p]
264 :     \begin{quote}
265 : jhr 273 \lstset{language=OpenCL}
266 : jhr 272 \begin{lstlisting}
267 :     double V[][][]; // image data
268 :     double h[4][4]; // kernel
269 :     const int s = 2; // kernel support
270 :    
271 :     double probe (double4 p)
272 :     {
273 :     double4 x = transform(p); // image-space position
274 :     double4 n, f
275 :    
276 :     f = modf (x, &n);
277 :    
278 :     // compute kernel values for each axis
279 :     double4 t, hx, hy, hz;
280 :     t = (double4)(f.x +1, f.x, f.x - 1, f.x - 2);
281 : jhr 276 hx = d + t*(c + t*(b + t*a));
282 : jhr 272 t = (double4)(f.y +1, f.y, f.y - 1, f.y - 2);
283 : jhr 276 hy = d + t*(c + t*(b + t*a));
284 : jhr 272 t = (double4)(f.z +1, f.z, f.z - 1, f.z - 2);
285 : jhr 276 hz = d + t*(c + t*(b + t*a));
286 : jhr 272
287 : jhr 274 double vy[4], vz[4];
288 : jhr 275 for (int i = 1-s; i <= s; i++) {
289 :     for (int j = 1-s; j <= s; j++) {
290 : jhr 272 double4 v = (double4) (
291 :     V[nx+i][ny+j][nz-1],
292 :     V[nx+i][ny+j][nz],
293 :     V[nx+i][ny+j][nz+1],
294 :     V[nx+i][ny+j][nz+2]);
295 : jhr 274 vz[j+s-1] += dot(v, hz);
296 : jhr 272 }
297 : jhr 274 vy[i+s-1] = dot((double4)(vz[0], vz[1], vz[2], vz[3]), hy);
298 : jhr 272 }
299 :    
300 : jhr 274 return dot((double4)(vy[0], vy[1], vy[2], vy[3]), hx);
301 : jhr 272 }
302 :     \end{lstlisting}%
303 :     \end{quote}%
304 :     \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in OpenCL}
305 :     \label{fig:3d-probe-code-opencl}
306 :     \end{figure}
307 :    
308 : jhr 159 \section{Probing a 3D derivative field}
309 :     We next consider the case of probing the derivative of a scalar field $F = V\circledast{}h$, where $s$ is the support
310 :     of $h$.
311 : jhr 279 The probe $(\nabla F)\mkw{@}\vecp$ produces a vector result as follows:
312 : jhr 159 \begin{eqnarray*}
313 :     \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\
314 :     \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\
315 :     \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\
316 : jhr 279 (\nabla F)\mkw{@}\vecp & = & \left[\begin{array}{c}
317 : jhr 159 \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h'(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}} \\
318 :     \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h'(\vecf_y - j) h(\vecf_z - k)}}} \\
319 :     \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h'(\vecf_z - k)}}} \\
320 :     \end{array}\right]
321 :     \end{eqnarray*}%
322 :    
323 : jhr 279 \begin{figure}[p]
324 :     \begin{quote}
325 :     \lstset{language=C}
326 :     \begin{lstlisting}
327 :     double probe (vec3 p)
328 :     {
329 :     double x[3] = transform(p); // image-space position
330 :     double nx, ny, nz, fx, fy, fz;
331 :    
332 :     fx = modf (x[0], &nx);
333 :     fy = modf (x[1], &ny);
334 :     fz = modf (x[2], &nz);
335 :    
336 :     // compute kernel values for each axis
337 :     double hx[4], dhx[4], hy[4], dhy[4], hz[4], dhz[4];
338 :     for (int i = 1-s; i < s; i++) {
339 :     double t;
340 :     t = fx - i;
341 :     hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
342 :     dhx[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
343 :     t = fy - i;
344 :     hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
345 :     dhy[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
346 :     t = fz - i;
347 :     hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
348 :     dhz[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
349 :     }
350 :    
351 :     vec3 vx = {0, 0, 0};
352 :     for (int i = 1-s; i <= s; i++) {
353 :     vec3 vy = {0, 0, 0};
354 :     for (int j = 1-s; j <= s; j++) {
355 :     vec3 vz = {0, 0, 0};
356 :     for (int k = 1-s; k <= s; k++) {
357 :     double v = V[nx+i][ny+j][nz+k];
358 :     vz[0] = v * hz[k+s-1];
359 :     vz[1] = vz[0];
360 :     vz[2] = v * dhz[k+s-1];
361 :     }
362 :     vy[0] += vz[0] * hy[j+s-1];
363 :     vy[1] += vz[1] * dhy[j+s-1];
364 :     vy[2] += vz[2] * hy[j+s-1];
365 :     }
366 :     vx[0] += vx[0] * dhx[i+s-1];
367 :     vx[1] += vx[1] * hx[i+s-1];
368 :     vx[2] += vx[2] * hx[i+s-1];
369 :     }
370 :    
371 :     return vx;
372 :     }
373 :     \end{lstlisting}%
374 :     \end{quote}%
375 :     \caption{Computing $(\nabla F)\mkw{@}\vecx$ for a 3D scalar field in C}
376 :     \label{fig:3d-deriv-probe-code-c}
377 :     \end{figure}
378 :    
379 : jhr 156 \end{document}

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0