SCM Repository
Annotation of /trunk/doc/probe/paper.tex
Parent Directory
|
Revision Log
Revision 288 - (view) (download) (as text)
1 : | jhr | 156 | \documentclass[11pt]{article} |
2 : | |||
3 : | \input{defs} | ||
4 : | \usepackage{graphicx} | ||
5 : | \usepackage{listings} | ||
6 : | \lstset{ | ||
7 : | jhr | 270 | basicstyle=\ttfamily\footnotesize, |
8 : | jhr | 156 | keywordstyle=\bfseries, |
9 : | showstringspaces=false, | ||
10 : | } | ||
11 : | jhr | 160 | \lstdefinelanguage{OpenCL}[]{C}{% |
12 : | morekeywords={ | ||
13 : | char2,char4,char8,char16, | ||
14 : | uchar2,uchar4,uchar8,uchar16, | ||
15 : | short2,short4,short8,short16, | ||
16 : | ushort2,ushort4,ushort8,ushort16, | ||
17 : | int2,int4,int8,int16, | ||
18 : | uint2,uint4,uint8,uint16, | ||
19 : | long2,long4,long8,long16, | ||
20 : | ulong2,ulong4,ulong8,ulong16, | ||
21 : | float2,float4,float8,float16, | ||
22 : | ufloat2,ufloat4,ufloat8,ufloat16, | ||
23 : | double2,double4,double8,double16, | ||
24 : | udouble2,udouble4,udouble8,udouble16, | ||
25 : | constant,__constant,kernel,__kernel,private,__private}, | ||
26 : | moredirectives={version}, | ||
27 : | deletekeywords={} | ||
28 : | } | ||
29 : | jhr | 156 | |
30 : | \lstset{ | ||
31 : | language=C, | ||
32 : | } | ||
33 : | |||
34 : | \setlength{\textwidth}{6in} | ||
35 : | \setlength{\oddsidemargin}{0.25in} | ||
36 : | \setlength{\evensidemargin}{0.25in} | ||
37 : | \setlength{\parskip}{5pt} | ||
38 : | |||
39 : | \newcommand{\matM}{\mathbf{M}} | ||
40 : | jhr | 157 | \newcommand{\vecx}{\mathbf{x}} |
41 : | jhr | 159 | \newcommand{\vecp}{\mathbf{p}} |
42 : | jhr | 156 | \newcommand{\vecn}{\mathbf{n}} |
43 : | \newcommand{\vecf}{\mathbf{f}} | ||
44 : | \newcommand{\VEC}[1]{\left\langle{#1}\right\rangle} | ||
45 : | \newcommand{\FLOOR}[1]{\left\lfloor{#1}\right\rfloor} | ||
46 : | |||
47 : | \title{Compiling probe operations for Diderot} | ||
48 : | \author{ | ||
49 : | John Reppy \\ | ||
50 : | University of Chicago \\ | ||
51 : | {\small\tt{}jhr@cs.uchicago.edu} \\ | ||
52 : | } | ||
53 : | \date{\today} | ||
54 : | |||
55 : | \begin{document} | ||
56 : | |||
57 : | \maketitle | ||
58 : | \thispagestyle{empty} | ||
59 : | |||
60 : | \bibliographystyle{../common/alpha} | ||
61 : | \bibliography{../common/strings-short,../common/manticore} | ||
62 : | |||
63 : | \section{Introduction} | ||
64 : | |||
65 : | This note describes the code needed to implement a probe of a field operation. | ||
66 : | jhr | 270 | In the discussion below, we use a number of notational conventions that are |
67 : | summarized in the following table: | ||
68 : | \begin{center} | ||
69 : | jhr | 274 | \begin{tabular}{cp{4in}} |
70 : | jhr | 270 | $V$ & the image data \\ |
71 : | $F$ & the field reconstructed from $V$ \\ | ||
72 : | $\vecp$ & the position vector in field-space \\ | ||
73 : | $\vecx$ & the position vector in image-space \\ | ||
74 : | jhr | 274 | $\matM^{-1}$ & homogeneous matrix that represents the transformation from world space to image space \\ |
75 : | jhr | 270 | $h$ & a piecewise polynomial convolution kernel \\ |
76 : | $s$ & the support of $h$ \\ | ||
77 : | $h_0,\,\ldots,h_{2s-1}$ & the polynomials that make up $h$ \\ | ||
78 : | \end{tabular}% | ||
79 : | \end{center}% | ||
80 : | jhr | 156 | |
81 : | \section{Probing a 1D scalar field} | ||
82 : | The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support | ||
83 : | of $h$. | ||
84 : | jhr | 160 | The probe $F\mkw{@}p$ is computed as follows: |
85 : | jhr | 156 | \begin{eqnarray*} |
86 : | jhr | 160 | \left[\begin{array}{c} x \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} p \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\ |
87 : | n & = & \FLOOR{x} \qquad \text{\textit{integer part of position}} \\ | ||
88 : | f & = & x - f \qquad \text{\textit{fractional part of position}} \\ | ||
89 : | F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)} | ||
90 : | jhr | 156 | \end{eqnarray*}% |
91 : | jhr | 270 | Note that the coordinate system of the convolution filter is flipped (negated) from the coordinate system |
92 : | of the image; this property is why we evaluate $h(f-i)$ for $V(n+i)$. | ||
93 : | jhr | 156 | The convolution $h$ is represented as a symmetric piecewise polynomial formed |
94 : | jhr | 270 | from $2s$ polynomials $h_1,\,\ldots,\,h_{2s}$. |
95 : | jhr | 156 | \begin{displaymath} |
96 : | h(x) = \left\{\begin{array}{ll} | ||
97 : | jhr | 270 | 0 & \text{$x \leq -s$ or $s \leq x$} \\ |
98 : | h_{\lfloor{}x\rfloor{}+s}(x) & \text{$-s < x < s$} \\ | ||
99 : | jhr | 156 | \end{array}\right. |
100 : | \end{displaymath}% | ||
101 : | Thus, we can rewrite the probe computation as | ||
102 : | \begin{displaymath} | ||
103 : | jhr | 270 | F\mkw{@}x = \sum_{i=1-s}^{s} {V(n+i) h_{s-i}(i - f)} |
104 : | jhr | 156 | \end{displaymath}% |
105 : | \figref{fig:1d-probe} illustrates the situation for a kernel $h$ with support $s = 2$, | ||
106 : | and \figref{fig:1d-probe-code} gives the C code for the probe operation, assuming that | ||
107 : | $h$ is represented by third-degree polynomials. | ||
108 : | \begin{figure}[t] | ||
109 : | \begin{center} | ||
110 : | \includegraphics[scale=0.8]{pictures/convo} | ||
111 : | \end{center}% | ||
112 : | \caption{1D scalar probe} | ||
113 : | \label{fig:1d-probe} | ||
114 : | \end{figure}% | ||
115 : | |||
116 : | jhr | 279 | \begin{figure}[t] |
117 : | jhr | 156 | \begin{quote} |
118 : | jhr | 160 | \lstset{language=C} |
119 : | jhr | 156 | \begin{lstlisting} |
120 : | jhr | 270 | double V[]; // image data |
121 : | jhr | 288 | double h[4][4] = { // bspln3 |
122 : | { 1.33333, 2.0, 1.0, 0.166667 }, // -2 .. -1 | ||
123 : | { 0.666667, 0.0, -1.0, -0.5 }, // -1 .. 0 | ||
124 : | { 0.666667, 0.0, -1.0, 0.5 }, // 0 .. 1 | ||
125 : | { 1.33333, -2.0, 1.0, -0.166667 }, // 1 .. 2 | ||
126 : | } | ||
127 : | jhr | 156 | |
128 : | jhr | 270 | double probe (double p) |
129 : | jhr | 156 | { |
130 : | jhr | 270 | double x = transform(p); // image-space position |
131 : | jhr | 156 | double n, f; |
132 : | |||
133 : | jhr | 270 | f = modf (x, &n); |
134 : | jhr | 156 | |
135 : | jhr | 160 | double value = 0.0, t; |
136 : | jhr | 156 | |
137 : | jhr | 270 | t = f + 1.0; |
138 : | value += V[n-1] * (h[3][0] + t*(h[3][1] + t*(h[3][2] + t*h[3][3]))); | ||
139 : | t = f; | ||
140 : | value += V[n] * (h[2][0] + t*(h[2][1] + t*(h[2][2] + t*h[2][3]))); | ||
141 : | jhr | 160 | t = f - 1.0; |
142 : | jhr | 270 | value += V[n+1] * (h[1][0] + t*(h[1][1] + t*(h[1][2] + t*h[1][3]))); |
143 : | jhr | 160 | t = f - 2.0; |
144 : | jhr | 270 | value += V[n+2] * (h[0][0] + t*(h[0][1] + t*(h[0][2] + t*h[0][3]))); |
145 : | jhr | 156 | |
146 : | return value; | ||
147 : | } | ||
148 : | \end{lstlisting}% | ||
149 : | \end{quote}% | ||
150 : | jhr | 157 | \caption{Computing $F\mkw{@}x$ for a 1D scalar field in C} |
151 : | jhr | 156 | \label{fig:1d-probe-code} |
152 : | \end{figure} | ||
153 : | jhr | 160 | If we look at the four lines that define \texttt{value}, we see an opportunity for |
154 : | using SIMD parallelism to speed the computation. | ||
155 : | \figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where | ||
156 : | we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b}, | ||
157 : | jhr | 270 | \texttt{c}, and \texttt{d}, and put the \texttt{t} values into a vector too. |
158 : | We then use a dot product to compute the sum of the products of the | ||
159 : | convolution-filter values and the image values. | ||
160 : | jhr | 279 | \begin{figure}[t] |
161 : | jhr | 156 | \begin{quote} |
162 : | jhr | 160 | \lstset{language=OpenCL} |
163 : | jhr | 156 | \begin{lstlisting} |
164 : | jhr | 270 | double4 d = (double4)(h[3][0], h[2][0], h[1][0], h[0][0]); // x^0 coeffs |
165 : | double4 c = (double4)(h[3][1], h[2][1], h[2][1], h[0][1]); // x^1 coeffs | ||
166 : | double4 b = (double4)(h[3][2], h[2][2], h[1][2], h[0][2]); // x^2 coeffs | ||
167 : | double4 a = (double4)(h[3][3], h[2][3], h[1][3], h[0][3]); // x^3 coeffs | ||
168 : | jhr | 156 | |
169 : | jhr | 270 | double probe (double p) |
170 : | jhr | 156 | { |
171 : | jhr | 270 | double x = transform(p); // image-space position |
172 : | jhr | 156 | double n, f; |
173 : | |||
174 : | jhr | 270 | f = modf (x, &n); |
175 : | jhr | 156 | |
176 : | jhr | 270 | double4 v = (double4)(V[n-1], V[n], V[n+1], V[n+2]); |
177 : | double4 t = (double4)(f + 1.0, f, f - 1.0, f - 2.0); | ||
178 : | jhr | 160 | return dot(v, d + t*(c + t*(b + t*a))); |
179 : | jhr | 156 | } |
180 : | \end{lstlisting}% | ||
181 : | \end{quote}% | ||
182 : | jhr | 157 | \caption{Computing $F\mkw{@}x$ for a 1D scalar field in OpenCL} |
183 : | jhr | 156 | \label{fig:1d-probe-code-opencl} |
184 : | \end{figure} | ||
185 : | |||
186 : | \section{Probing a 3D scalar field} | ||
187 : | |||
188 : | The more common case is when the field is a convolution of a scalar 3-dimensional | ||
189 : | field ($F = V\circledast{}h$). | ||
190 : | Let $s$ be the support of $h$. | ||
191 : | jhr | 159 | Then the probe $F\mkw{@}\vecp$ is computed as follows: |
192 : | jhr | 156 | \begin{eqnarray*} |
193 : | jhr | 159 | \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\ |
194 : | \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\ | ||
195 : | \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\ | ||
196 : | F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}} | ||
197 : | jhr | 156 | \end{eqnarray*}% |
198 : | jhr | 270 | We can restructure this equation to highlight the loop-invariant computations. |
199 : | \begin{displaymath} | ||
200 : | F\mkw{@}\vecp = | ||
201 : | \sum_{i=1-s}^s \left({ | ||
202 : | \sum_{j=1-s}^s \left({ | ||
203 : | \sum_{k=1-s}^s \left({ | ||
204 : | V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) | ||
205 : | }\right) | ||
206 : | }\right) h(\vecf_y - j) | ||
207 : | }\right) h(\vecf_x - i) | ||
208 : | \end{displaymath}% | ||
209 : | Thus, we see that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D). | ||
210 : | jhr | 271 | \figref{fig:3d-probe-code-c} gives C code for this operation. |
211 : | \begin{figure}[p] | ||
212 : | jhr | 156 | \begin{quote} |
213 : | jhr | 160 | \lstset{language=C} |
214 : | jhr | 156 | \begin{lstlisting} |
215 : | typedef double vec3[3]; | ||
216 : | |||
217 : | jhr | 270 | double V[][][]; // image data |
218 : | jhr | 288 | double h[4][4]; // bslpn3 (as before) |
219 : | const int s = 2; // kernel support | ||
220 : | jhr | 156 | |
221 : | jhr | 272 | double probe (vec3 p) |
222 : | jhr | 270 | { |
223 : | jhr | 272 | double x[3] = transform(p); // image-space position |
224 : | jhr | 270 | double nx, ny, nz, fx, fy, fz; |
225 : | jhr | 156 | |
226 : | jhr | 271 | fx = modf (x[0], &nx); |
227 : | fy = modf (x[1], &ny); | ||
228 : | fz = modf (x[2], &nz); | ||
229 : | |||
230 : | jhr | 270 | // compute kernel values for each axis |
231 : | double hx[4], hy[4], hz[4]; | ||
232 : | jhr | 271 | for (int i = 1-s; i < s; i++) { |
233 : | double t; | ||
234 : | t = fx - i; | ||
235 : | hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
236 : | t = fy - i; | ||
237 : | hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
238 : | t = fz - i; | ||
239 : | hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
240 : | jhr | 270 | } |
241 : | |||
242 : | jhr | 279 | double vx = 0.0; |
243 : | jhr | 275 | for (int i = 1-s; i <= s; i++) { |
244 : | jhr | 279 | double vy = 0.0; |
245 : | jhr | 275 | for (int j = 1-s; j <= s; j++) { |
246 : | jhr | 270 | double vz = 0.0; |
247 : | jhr | 275 | for (int k = 1-s; k <= s; k++) { |
248 : | jhr | 270 | vz += V[nx+i][ny+j][nz+k] * hz[k+s-1]; |
249 : | } | ||
250 : | vy += vz * hy[j+s-1]; | ||
251 : | } | ||
252 : | jhr | 271 | vx += vx * hx[i+s-1]; |
253 : | jhr | 270 | } |
254 : | |||
255 : | return vx; | ||
256 : | jhr | 156 | } |
257 : | \end{lstlisting}% | ||
258 : | \end{quote}% | ||
259 : | jhr | 157 | \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in C} |
260 : | jhr | 271 | \label{fig:3d-probe-code-c} |
261 : | jhr | 156 | \end{figure} |
262 : | |||
263 : | jhr | 272 | \begin{figure}[p] |
264 : | \begin{quote} | ||
265 : | jhr | 273 | \lstset{language=OpenCL} |
266 : | jhr | 272 | \begin{lstlisting} |
267 : | double V[][][]; // image data | ||
268 : | double h[4][4]; // kernel | ||
269 : | const int s = 2; // kernel support | ||
270 : | |||
271 : | double probe (double4 p) | ||
272 : | { | ||
273 : | double4 x = transform(p); // image-space position | ||
274 : | double4 n, f | ||
275 : | |||
276 : | f = modf (x, &n); | ||
277 : | |||
278 : | // compute kernel values for each axis | ||
279 : | double4 t, hx, hy, hz; | ||
280 : | t = (double4)(f.x +1, f.x, f.x - 1, f.x - 2); | ||
281 : | jhr | 276 | hx = d + t*(c + t*(b + t*a)); |
282 : | jhr | 272 | t = (double4)(f.y +1, f.y, f.y - 1, f.y - 2); |
283 : | jhr | 276 | hy = d + t*(c + t*(b + t*a)); |
284 : | jhr | 272 | t = (double4)(f.z +1, f.z, f.z - 1, f.z - 2); |
285 : | jhr | 276 | hz = d + t*(c + t*(b + t*a)); |
286 : | jhr | 272 | |
287 : | jhr | 274 | double vy[4], vz[4]; |
288 : | jhr | 275 | for (int i = 1-s; i <= s; i++) { |
289 : | for (int j = 1-s; j <= s; j++) { | ||
290 : | jhr | 272 | double4 v = (double4) ( |
291 : | V[nx+i][ny+j][nz-1], | ||
292 : | V[nx+i][ny+j][nz], | ||
293 : | V[nx+i][ny+j][nz+1], | ||
294 : | V[nx+i][ny+j][nz+2]); | ||
295 : | jhr | 274 | vz[j+s-1] += dot(v, hz); |
296 : | jhr | 272 | } |
297 : | jhr | 274 | vy[i+s-1] = dot((double4)(vz[0], vz[1], vz[2], vz[3]), hy); |
298 : | jhr | 272 | } |
299 : | |||
300 : | jhr | 274 | return dot((double4)(vy[0], vy[1], vy[2], vy[3]), hx); |
301 : | jhr | 272 | } |
302 : | \end{lstlisting}% | ||
303 : | \end{quote}% | ||
304 : | \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in OpenCL} | ||
305 : | \label{fig:3d-probe-code-opencl} | ||
306 : | \end{figure} | ||
307 : | |||
308 : | jhr | 159 | \section{Probing a 3D derivative field} |
309 : | We next consider the case of probing the derivative of a scalar field $F = V\circledast{}h$, where $s$ is the support | ||
310 : | of $h$. | ||
311 : | jhr | 279 | The probe $(\nabla F)\mkw{@}\vecp$ produces a vector result as follows: |
312 : | jhr | 159 | \begin{eqnarray*} |
313 : | \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\ | ||
314 : | \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\ | ||
315 : | \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\ | ||
316 : | jhr | 279 | (\nabla F)\mkw{@}\vecp & = & \left[\begin{array}{c} |
317 : | jhr | 159 | \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h'(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}} \\ |
318 : | \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h'(\vecf_y - j) h(\vecf_z - k)}}} \\ | ||
319 : | \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h'(\vecf_z - k)}}} \\ | ||
320 : | \end{array}\right] | ||
321 : | \end{eqnarray*}% | ||
322 : | |||
323 : | jhr | 279 | \begin{figure}[p] |
324 : | \begin{quote} | ||
325 : | \lstset{language=C} | ||
326 : | \begin{lstlisting} | ||
327 : | double probe (vec3 p) | ||
328 : | { | ||
329 : | double x[3] = transform(p); // image-space position | ||
330 : | double nx, ny, nz, fx, fy, fz; | ||
331 : | |||
332 : | fx = modf (x[0], &nx); | ||
333 : | fy = modf (x[1], &ny); | ||
334 : | fz = modf (x[2], &nz); | ||
335 : | |||
336 : | // compute kernel values for each axis | ||
337 : | double hx[4], dhx[4], hy[4], dhy[4], hz[4], dhz[4]; | ||
338 : | for (int i = 1-s; i < s; i++) { | ||
339 : | double t; | ||
340 : | t = fx - i; | ||
341 : | hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
342 : | dhx[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]); | ||
343 : | t = fy - i; | ||
344 : | hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
345 : | dhy[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]); | ||
346 : | t = fz - i; | ||
347 : | hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
348 : | dhz[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]); | ||
349 : | } | ||
350 : | |||
351 : | vec3 vx = {0, 0, 0}; | ||
352 : | for (int i = 1-s; i <= s; i++) { | ||
353 : | vec3 vy = {0, 0, 0}; | ||
354 : | for (int j = 1-s; j <= s; j++) { | ||
355 : | vec3 vz = {0, 0, 0}; | ||
356 : | for (int k = 1-s; k <= s; k++) { | ||
357 : | double v = V[nx+i][ny+j][nz+k]; | ||
358 : | vz[0] = v * hz[k+s-1]; | ||
359 : | vz[1] = vz[0]; | ||
360 : | vz[2] = v * dhz[k+s-1]; | ||
361 : | } | ||
362 : | vy[0] += vz[0] * hy[j+s-1]; | ||
363 : | vy[1] += vz[1] * dhy[j+s-1]; | ||
364 : | vy[2] += vz[2] * hy[j+s-1]; | ||
365 : | } | ||
366 : | vx[0] += vx[0] * dhx[i+s-1]; | ||
367 : | vx[1] += vx[1] * hx[i+s-1]; | ||
368 : | vx[2] += vx[2] * hx[i+s-1]; | ||
369 : | } | ||
370 : | |||
371 : | return vx; | ||
372 : | } | ||
373 : | \end{lstlisting}% | ||
374 : | \end{quote}% | ||
375 : | \caption{Computing $(\nabla F)\mkw{@}\vecx$ for a 3D scalar field in C} | ||
376 : | \label{fig:3d-deriv-probe-code-c} | ||
377 : | \end{figure} | ||
378 : | |||
379 : | jhr | 156 | \end{document} |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |