SCM Repository
Annotation of /trunk/doc/probe/paper.tex
Parent Directory
|
Revision Log
Revision 348 - (view) (download) (as text)
1 : | jhr | 156 | \documentclass[11pt]{article} |
2 : | |||
3 : | \input{defs} | ||
4 : | \usepackage{graphicx} | ||
5 : | \usepackage{listings} | ||
6 : | \lstset{ | ||
7 : | jhr | 270 | basicstyle=\ttfamily\footnotesize, |
8 : | jhr | 156 | keywordstyle=\bfseries, |
9 : | showstringspaces=false, | ||
10 : | } | ||
11 : | jhr | 160 | \lstdefinelanguage{OpenCL}[]{C}{% |
12 : | morekeywords={ | ||
13 : | char2,char4,char8,char16, | ||
14 : | uchar2,uchar4,uchar8,uchar16, | ||
15 : | short2,short4,short8,short16, | ||
16 : | ushort2,ushort4,ushort8,ushort16, | ||
17 : | int2,int4,int8,int16, | ||
18 : | uint2,uint4,uint8,uint16, | ||
19 : | long2,long4,long8,long16, | ||
20 : | ulong2,ulong4,ulong8,ulong16, | ||
21 : | float2,float4,float8,float16, | ||
22 : | ufloat2,ufloat4,ufloat8,ufloat16, | ||
23 : | double2,double4,double8,double16, | ||
24 : | udouble2,udouble4,udouble8,udouble16, | ||
25 : | constant,__constant,kernel,__kernel,private,__private}, | ||
26 : | moredirectives={version}, | ||
27 : | deletekeywords={} | ||
28 : | } | ||
29 : | jhr | 156 | |
30 : | \lstset{ | ||
31 : | language=C, | ||
32 : | } | ||
33 : | |||
34 : | \setlength{\textwidth}{6in} | ||
35 : | \setlength{\oddsidemargin}{0.25in} | ||
36 : | \setlength{\evensidemargin}{0.25in} | ||
37 : | \setlength{\parskip}{5pt} | ||
38 : | |||
39 : | \newcommand{\matM}{\mathbf{M}} | ||
40 : | jhr | 157 | \newcommand{\vecx}{\mathbf{x}} |
41 : | jhr | 159 | \newcommand{\vecp}{\mathbf{p}} |
42 : | jhr | 156 | \newcommand{\vecn}{\mathbf{n}} |
43 : | \newcommand{\vecf}{\mathbf{f}} | ||
44 : | \newcommand{\VEC}[1]{\left\langle{#1}\right\rangle} | ||
45 : | \newcommand{\FLOOR}[1]{\left\lfloor{#1}\right\rfloor} | ||
46 : | |||
47 : | \title{Compiling probe operations for Diderot} | ||
48 : | \author{ | ||
49 : | glk | 289 | John Reppy, Gordon Kindlmann \\ |
50 : | jhr | 156 | University of Chicago \\ |
51 : | glk | 289 | {\small\tt{}\{jhr|glk\}@cs.uchicago.edu} \\ |
52 : | jhr | 156 | } |
53 : | \date{\today} | ||
54 : | |||
55 : | \begin{document} | ||
56 : | |||
57 : | \maketitle | ||
58 : | \thispagestyle{empty} | ||
59 : | |||
60 : | \bibliographystyle{../common/alpha} | ||
61 : | \bibliography{../common/strings-short,../common/manticore} | ||
62 : | |||
63 : | \section{Introduction} | ||
64 : | |||
65 : | This note describes the code needed to implement a probe of a field operation. | ||
66 : | jhr | 270 | In the discussion below, we use a number of notational conventions that are |
67 : | summarized in the following table: | ||
68 : | \begin{center} | ||
69 : | jhr | 274 | \begin{tabular}{cp{4in}} |
70 : | jhr | 270 | $V$ & the image data \\ |
71 : | $F$ & the field reconstructed from $V$ \\ | ||
72 : | $\vecp$ & the position vector in field-space \\ | ||
73 : | $\vecx$ & the position vector in image-space \\ | ||
74 : | jhr | 274 | $\matM^{-1}$ & homogeneous matrix that represents the transformation from world space to image space \\ |
75 : | jhr | 270 | $h$ & a piecewise polynomial convolution kernel \\ |
76 : | $s$ & the support of $h$ \\ | ||
77 : | $h_0,\,\ldots,h_{2s-1}$ & the polynomials that make up $h$ \\ | ||
78 : | \end{tabular}% | ||
79 : | \end{center}% | ||
80 : | jhr | 156 | |
81 : | \section{Probing a 1D scalar field} | ||
82 : | The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support | ||
83 : | of $h$. | ||
84 : | jhr | 160 | The probe $F\mkw{@}p$ is computed as follows: |
85 : | jhr | 156 | \begin{eqnarray*} |
86 : | jhr | 160 | \left[\begin{array}{c} x \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} p \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\ |
87 : | n & = & \FLOOR{x} \qquad \text{\textit{integer part of position}} \\ | ||
88 : | f & = & x - f \qquad \text{\textit{fractional part of position}} \\ | ||
89 : | F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)} | ||
90 : | jhr | 156 | \end{eqnarray*}% |
91 : | jhr | 270 | Note that the coordinate system of the convolution filter is flipped (negated) from the coordinate system |
92 : | of the image; this property is why we evaluate $h(f-i)$ for $V(n+i)$. | ||
93 : | jhr | 156 | The convolution $h$ is represented as a symmetric piecewise polynomial formed |
94 : | jhr | 314 | from $2s$ polynomials $h_0,\,\ldots,\,h_{2s-1}$. |
95 : | jhr | 156 | \begin{displaymath} |
96 : | h(x) = \left\{\begin{array}{ll} | ||
97 : | jhr | 270 | 0 & \text{$x \leq -s$ or $s \leq x$} \\ |
98 : | h_{\lfloor{}x\rfloor{}+s}(x) & \text{$-s < x < s$} \\ | ||
99 : | jhr | 156 | \end{array}\right. |
100 : | \end{displaymath}% | ||
101 : | Thus, we can rewrite the probe computation as | ||
102 : | \begin{displaymath} | ||
103 : | jhr | 270 | F\mkw{@}x = \sum_{i=1-s}^{s} {V(n+i) h_{s-i}(i - f)} |
104 : | jhr | 156 | \end{displaymath}% |
105 : | \figref{fig:1d-probe} illustrates the situation for a kernel $h$ with support $s = 2$, | ||
106 : | and \figref{fig:1d-probe-code} gives the C code for the probe operation, assuming that | ||
107 : | $h$ is represented by third-degree polynomials. | ||
108 : | \begin{figure}[t] | ||
109 : | \begin{center} | ||
110 : | \includegraphics[scale=0.8]{pictures/convo} | ||
111 : | \end{center}% | ||
112 : | \caption{1D scalar probe} | ||
113 : | \label{fig:1d-probe} | ||
114 : | \end{figure}% | ||
115 : | |||
116 : | jhr | 279 | \begin{figure}[t] |
117 : | jhr | 156 | \begin{quote} |
118 : | jhr | 160 | \lstset{language=C} |
119 : | jhr | 156 | \begin{lstlisting} |
120 : | jhr | 270 | double V[]; // image data |
121 : | glk | 289 | double h[4][4] = { // cubic B-spline ("bspln3") |
122 : | jhr | 288 | { 1.33333, 2.0, 1.0, 0.166667 }, // -2 .. -1 |
123 : | { 0.666667, 0.0, -1.0, -0.5 }, // -1 .. 0 | ||
124 : | { 0.666667, 0.0, -1.0, 0.5 }, // 0 .. 1 | ||
125 : | { 1.33333, -2.0, 1.0, -0.166667 }, // 1 .. 2 | ||
126 : | } | ||
127 : | jhr | 156 | |
128 : | jhr | 270 | double probe (double p) |
129 : | jhr | 156 | { |
130 : | glk | 289 | double x = transform(p); // from world- to image-space position |
131 : | double nd, f; int n; | ||
132 : | jhr | 156 | |
133 : | glk | 289 | f = modf (x, &nd); |
134 : | n = (int)nd; | ||
135 : | jhr | 156 | |
136 : | jhr | 160 | double value = 0.0, t; |
137 : | jhr | 156 | |
138 : | jhr | 270 | t = f + 1.0; |
139 : | value += V[n-1] * (h[3][0] + t*(h[3][1] + t*(h[3][2] + t*h[3][3]))); | ||
140 : | t = f; | ||
141 : | value += V[n] * (h[2][0] + t*(h[2][1] + t*(h[2][2] + t*h[2][3]))); | ||
142 : | jhr | 160 | t = f - 1.0; |
143 : | jhr | 270 | value += V[n+1] * (h[1][0] + t*(h[1][1] + t*(h[1][2] + t*h[1][3]))); |
144 : | jhr | 160 | t = f - 2.0; |
145 : | jhr | 270 | value += V[n+2] * (h[0][0] + t*(h[0][1] + t*(h[0][2] + t*h[0][3]))); |
146 : | jhr | 156 | |
147 : | return value; | ||
148 : | } | ||
149 : | \end{lstlisting}% | ||
150 : | \end{quote}% | ||
151 : | jhr | 157 | \caption{Computing $F\mkw{@}x$ for a 1D scalar field in C} |
152 : | jhr | 156 | \label{fig:1d-probe-code} |
153 : | \end{figure} | ||
154 : | jhr | 160 | If we look at the four lines that define \texttt{value}, we see an opportunity for |
155 : | using SIMD parallelism to speed the computation. | ||
156 : | \figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where | ||
157 : | we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b}, | ||
158 : | jhr | 270 | \texttt{c}, and \texttt{d}, and put the \texttt{t} values into a vector too. |
159 : | We then use a dot product to compute the sum of the products of the | ||
160 : | convolution-filter values and the image values. | ||
161 : | jhr | 279 | \begin{figure}[t] |
162 : | jhr | 156 | \begin{quote} |
163 : | jhr | 160 | \lstset{language=OpenCL} |
164 : | jhr | 156 | \begin{lstlisting} |
165 : | jhr | 270 | double4 d = (double4)(h[3][0], h[2][0], h[1][0], h[0][0]); // x^0 coeffs |
166 : | double4 c = (double4)(h[3][1], h[2][1], h[2][1], h[0][1]); // x^1 coeffs | ||
167 : | double4 b = (double4)(h[3][2], h[2][2], h[1][2], h[0][2]); // x^2 coeffs | ||
168 : | double4 a = (double4)(h[3][3], h[2][3], h[1][3], h[0][3]); // x^3 coeffs | ||
169 : | jhr | 156 | |
170 : | jhr | 270 | double probe (double p) |
171 : | jhr | 156 | { |
172 : | jhr | 270 | double x = transform(p); // image-space position |
173 : | jhr | 156 | double n, f; |
174 : | |||
175 : | jhr | 270 | f = modf (x, &n); |
176 : | jhr | 156 | |
177 : | jhr | 270 | double4 v = (double4)(V[n-1], V[n], V[n+1], V[n+2]); |
178 : | double4 t = (double4)(f + 1.0, f, f - 1.0, f - 2.0); | ||
179 : | jhr | 160 | return dot(v, d + t*(c + t*(b + t*a))); |
180 : | jhr | 156 | } |
181 : | \end{lstlisting}% | ||
182 : | \end{quote}% | ||
183 : | jhr | 157 | \caption{Computing $F\mkw{@}x$ for a 1D scalar field in OpenCL} |
184 : | jhr | 156 | \label{fig:1d-probe-code-opencl} |
185 : | \end{figure} | ||
186 : | |||
187 : | \section{Probing a 3D scalar field} | ||
188 : | |||
189 : | The more common case is when the field is a convolution of a scalar 3-dimensional | ||
190 : | field ($F = V\circledast{}h$). | ||
191 : | Let $s$ be the support of $h$. | ||
192 : | jhr | 159 | Then the probe $F\mkw{@}\vecp$ is computed as follows: |
193 : | jhr | 156 | \begin{eqnarray*} |
194 : | jhr | 159 | \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\ |
195 : | \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\ | ||
196 : | \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\ | ||
197 : | F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}} | ||
198 : | jhr | 156 | \end{eqnarray*}% |
199 : | jhr | 314 | We can restructure this equation to highlight the loop-invariant computations.\footnote{ |
200 : | This restructuring assumes that the $Z$-axis of the data is the ``fastest.'' | ||
201 : | } | ||
202 : | jhr | 270 | \begin{displaymath} |
203 : | F\mkw{@}\vecp = | ||
204 : | \sum_{i=1-s}^s \left({ | ||
205 : | \sum_{j=1-s}^s \left({ | ||
206 : | \sum_{k=1-s}^s \left({ | ||
207 : | V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) | ||
208 : | }\right) | ||
209 : | }\right) h(\vecf_y - j) | ||
210 : | }\right) h(\vecf_x - i) | ||
211 : | \end{displaymath}% | ||
212 : | Thus, we see that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D). | ||
213 : | jhr | 271 | \figref{fig:3d-probe-code-c} gives C code for this operation. |
214 : | \begin{figure}[p] | ||
215 : | jhr | 156 | \begin{quote} |
216 : | jhr | 160 | \lstset{language=C} |
217 : | jhr | 156 | \begin{lstlisting} |
218 : | typedef double vec3[3]; | ||
219 : | |||
220 : | jhr | 270 | double V[][][]; // image data |
221 : | jhr | 288 | double h[4][4]; // bslpn3 (as before) |
222 : | glk | 289 | const int s = 2; // kernel support |
223 : | jhr | 156 | |
224 : | jhr | 272 | double probe (vec3 p) |
225 : | jhr | 270 | { |
226 : | jhr | 272 | double x[3] = transform(p); // image-space position |
227 : | jhr | 293 | double nxd, nyd, nzd, fx, fy, fz; |
228 : | int nx, ny, nz; | ||
229 : | |||
230 : | fx = modf (x[0], &nxd); nx = (int)nxd; | ||
231 : | fy = modf (x[1], &nyd); ny = (int)nyd; | ||
232 : | glk | 289 | fz = modf (x[2], &nzd); nz = (int)nzd; |
233 : | jhr | 156 | |
234 : | jhr | 270 | // compute kernel values for each axis |
235 : | double hx[4], hy[4], hz[4]; | ||
236 : | jhr | 293 | for (int i = 1-s; i <= s; i++) { |
237 : | jhr | 271 | double t; |
238 : | t = fx - i; | ||
239 : | hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
240 : | t = fy - i; | ||
241 : | hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
242 : | t = fz - i; | ||
243 : | hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
244 : | jhr | 270 | } |
245 : | |||
246 : | jhr | 293 | float vx = 0.0; |
247 : | for (int i = 1-s; i <= s; i++) { | ||
248 : | float vy = 0.0; | ||
249 : | for (int j = 1-s; j <= s; j++) { | ||
250 : | float vz = 0.0; | ||
251 : | for (int k = 1-s; k <= s; k++) { | ||
252 : | vz += V[nx+i][ny+j][nz+k] * hz[k+s-1]; | ||
253 : | jhr | 270 | } |
254 : | jhr | 293 | vy += vz * hy[j+s-1]; |
255 : | jhr | 270 | } |
256 : | jhr | 293 | vx += vy * hx[i+s-1]; |
257 : | jhr | 270 | } |
258 : | |||
259 : | jhr | 293 | return vx; |
260 : | jhr | 156 | } |
261 : | \end{lstlisting}% | ||
262 : | \end{quote}% | ||
263 : | jhr | 157 | \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in C} |
264 : | jhr | 271 | \label{fig:3d-probe-code-c} |
265 : | jhr | 156 | \end{figure} |
266 : | |||
267 : | jhr | 272 | \begin{figure}[p] |
268 : | \begin{quote} | ||
269 : | jhr | 273 | \lstset{language=OpenCL} |
270 : | jhr | 272 | \begin{lstlisting} |
271 : | double V[][][]; // image data | ||
272 : | jhr | 314 | double4 a, b, c, d; // kernel coefficients |
273 : | const int s = 2; // kernel support | ||
274 : | jhr | 272 | |
275 : | double probe (double4 p) | ||
276 : | { | ||
277 : | double4 x = transform(p); // image-space position | ||
278 : | jhr | 293 | double4 nd, f |
279 : | int4 n; | ||
280 : | jhr | 272 | |
281 : | jhr | 293 | f = modf (x, &nd); |
282 : | n = convert_int4(nd); | ||
283 : | jhr | 272 | |
284 : | // compute kernel values for each axis | ||
285 : | double4 t, hx, hy, hz; | ||
286 : | jhr | 293 | t = (double4)(f.x + 1, f.x, f.x - 1, f.x - 2); |
287 : | jhr | 276 | hx = d + t*(c + t*(b + t*a)); |
288 : | jhr | 293 | t = (double4)(f.y + 1, f.y, f.y - 1, f.y - 2); |
289 : | jhr | 276 | hy = d + t*(c + t*(b + t*a)); |
290 : | jhr | 293 | t = (double4)(f.z + 1, f.z, f.z - 1, f.z - 2); |
291 : | jhr | 276 | hz = d + t*(c + t*(b + t*a)); |
292 : | jhr | 272 | |
293 : | jhr | 274 | double vy[4], vz[4]; |
294 : | jhr | 275 | for (int i = 1-s; i <= s; i++) { |
295 : | for (int j = 1-s; j <= s; j++) { | ||
296 : | jhr | 272 | double4 v = (double4) ( |
297 : | jhr | 293 | V[n.x+i][n.y+j][n.z-1], |
298 : | V[n.x+i][n.y+j][n.z], | ||
299 : | V[n.x+i][n.y+j][n.z+1], | ||
300 : | V[n.x+i][n.y+j][n.z+2]); | ||
301 : | jhr | 274 | vz[j+s-1] += dot(v, hz); |
302 : | jhr | 272 | } |
303 : | jhr | 274 | vy[i+s-1] = dot((double4)(vz[0], vz[1], vz[2], vz[3]), hy); |
304 : | jhr | 272 | } |
305 : | |||
306 : | jhr | 274 | return dot((double4)(vy[0], vy[1], vy[2], vy[3]), hx); |
307 : | jhr | 272 | } |
308 : | \end{lstlisting}% | ||
309 : | \end{quote}% | ||
310 : | \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in OpenCL} | ||
311 : | \label{fig:3d-probe-code-opencl} | ||
312 : | \end{figure} | ||
313 : | |||
314 : | jhr | 348 | \section{Generalizing to derivative fields} |
315 : | To generalize the probe operation to work on derivative fields (\eg{}, $(\nabla^k F)\mkw{@}\vecp$), | ||
316 : | we represent the $\nabla^k$ operation as a $k$-order tensor of partial derivative operators. | ||
317 : | To simplify the presentation, we will restrict the discussion to $3$-dimensional space, | ||
318 : | but it easily generalizes. | ||
319 : | In 3D, a partial-derivative operator has the form | ||
320 : | \begin{displaymath} | ||
321 : | \frac{\partial}{\partial{}x^i \partial{}y^j \partial{}z^k} | ||
322 : | \end{displaymath}% | ||
323 : | where $i$, $j$, and $k$ are the number of levels of differentiation in the $x$, $y$, | ||
324 : | and $z$-axes (respectively). | ||
325 : | The normal convention is to omit explicit mention of axes with zero levels of differentiation, | ||
326 : | but we will be explicit here. | ||
327 : | We use $\mathbf{Id}^n$ to represent the identity operator in $n$-dimensions (i.e., the | ||
328 : | one with zero=-levels of differentiation in all axes). | ||
329 : | |||
330 : | The product of two partial-derivative operators is formed by summing the exponents; | ||
331 : | for example in 3D we have | ||
332 : | \begin{displaymath} | ||
333 : | \frac{\partial}{\partial{}x^i \partial{}y^j \partial{}z^k} | ||
334 : | \frac{\partial}{\partial{}x^{i'} \partial{}y^{j'} \partial{}z^{k'}} = | ||
335 : | \frac{\partial}{\partial{}x^{i+i'} \partial{}y^{j+j'} \partial{}z^{k+k'}} | ||
336 : | \end{displaymath}% | ||
337 : | The last bit of notation that we need is the $n$-vector of partial-derivative | ||
338 : | operators $\delta^n$, in which the $i$th element has one level of differentiation in the $i$th | ||
339 : | axis (and zero in all other axes). | ||
340 : | For example, | ||
341 : | \begin{displaymath} | ||
342 : | \delta^3 = \left[ | ||
343 : | \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^0} \quad | ||
344 : | \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^0} \quad | ||
345 : | \frac{\partial}{\partial{}x^0 \partial{}y^0 \partial{}z^1} | ||
346 : | \right] | ||
347 : | \end{displaymath}% | ||
348 : | For $k > 0$ levels of differentiation in $n$-dimensional space, we have | ||
349 : | \begin{displaymath} | ||
350 : | \nabla^k = \left\{\begin{array}{ll} | ||
351 : | \mathbf{Id}^n & \text{when $k = 0$} \\ | ||
352 : | \delta^n \otimes \nabla^{k-1} & \text{when $k > 0$} \\ | ||
353 : | \end{array}\right. | ||
354 : | \end{displaymath}% | ||
355 : | Consider, for example, the case where $k = 2$. | ||
356 : | \begin{displaymath} | ||
357 : | \nabla^2 = \delta^3 \otimes \delta^3 = \left[\begin{array}{ccc} | ||
358 : | \frac{\partial}{\partial{}x^2 \partial{}y^0 \partial{}z^0} | ||
359 : | & \frac{\partial}{\partial{}x^1 \partial{}y^1 \partial{}z^0} | ||
360 : | & \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^1} \\ | ||
361 : | \frac{\partial}{\partial{}x^1 \partial{}y^1 \partial{}z^0} | ||
362 : | & \frac{\partial}{\partial{}x^0 \partial{}y^2 \partial{}z^0} | ||
363 : | & \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^1} \\ | ||
364 : | \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^1} | ||
365 : | & \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^1} | ||
366 : | & \frac{\partial}{\partial{}x^0 \partial{}y^0 \partial{}z^2} \\ | ||
367 : | \end{array}\right] | ||
368 : | \end{displaymath}% | ||
369 : | |||
370 : | When we have a probe of a derivative field $(\nabla^k F)\mkw{@}\vecp$, we will generate code for | ||
371 : | each partial derivative operator $\frac{\partial}{\partial{}x^i\partial{}y^j\partial{}z^k}$ | ||
372 : | separately. | ||
373 : | If $F = V\circledast{}h$, then we will use $h^i(x) h^j(y) h^k(z)$ as the interpolation | ||
374 : | coefficients, where $h^i$ is the $i$th derivative of $h$. | ||
375 : | |||
376 : | jhr | 159 | \section{Probing a 3D derivative field} |
377 : | We next consider the case of probing the derivative of a scalar field $F = V\circledast{}h$, where $s$ is the support | ||
378 : | of $h$. | ||
379 : | jhr | 279 | The probe $(\nabla F)\mkw{@}\vecp$ produces a vector result as follows: |
380 : | jhr | 314 | \begin{displaymath} |
381 : | (\nabla F)\mkw{@}\vecp = \left[\begin{array}{c} | ||
382 : | \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) h(\vecf_y - j) h'(\vecf_x - i)}}} \\ | ||
383 : | \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) h'(\vecf_y - j) h(\vecf_x - i)}}} \\ | ||
384 : | \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h'(\vecf_z - k) h(\vecf_y - j) h(\vecf_x - i)}}} \\ | ||
385 : | \end{array}\right] | ||
386 : | \end{displaymath}% | ||
387 : | where | ||
388 : | jhr | 159 | \begin{eqnarray*} |
389 : | \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\ | ||
390 : | \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\ | ||
391 : | jhr | 314 | \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} |
392 : | jhr | 159 | \end{eqnarray*}% |
393 : | |||
394 : | jhr | 279 | \begin{figure}[p] |
395 : | \begin{quote} | ||
396 : | \lstset{language=C} | ||
397 : | \begin{lstlisting} | ||
398 : | glk | 301 | vec3 probe (vec3 p) |
399 : | jhr | 279 | { |
400 : | double x[3] = transform(p); // image-space position | ||
401 : | glk | 301 | double nxd, nyd, nzd, fx, fy, fz; |
402 : | int nx, ny, nz; | ||
403 : | jhr | 279 | |
404 : | jhr | 314 | fx = modf (x[0], &nxd); nx = (int)nxd; |
405 : | fy = modf (x[1], &nyd); ny = (int)nyd; | ||
406 : | glk | 301 | fz = modf (x[2], &nzd); nz = (int)nzd; |
407 : | jhr | 279 | |
408 : | // compute kernel values for each axis | ||
409 : | double hx[4], dhx[4], hy[4], dhy[4], hz[4], dhz[4]; | ||
410 : | glk | 301 | for (int i = 1-s; i <= s; i++) { |
411 : | jhr | 279 | double t; |
412 : | t = fx - i; | ||
413 : | hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
414 : | dhx[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]); | ||
415 : | t = fy - i; | ||
416 : | hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
417 : | dhy[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]); | ||
418 : | t = fz - i; | ||
419 : | hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3])); | ||
420 : | dhz[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]); | ||
421 : | } | ||
422 : | |||
423 : | vec3 vx = {0, 0, 0}; | ||
424 : | for (int i = 1-s; i <= s; i++) { | ||
425 : | vec3 vy = {0, 0, 0}; | ||
426 : | for (int j = 1-s; j <= s; j++) { | ||
427 : | vec3 vz = {0, 0, 0}; | ||
428 : | for (int k = 1-s; k <= s; k++) { | ||
429 : | double v = V[nx+i][ny+j][nz+k]; | ||
430 : | vz[0] = v * hz[k+s-1]; | ||
431 : | glk | 301 | vz[1] = v * hz[k+s-1]; |
432 : | jhr | 279 | vz[2] = v * dhz[k+s-1]; |
433 : | } | ||
434 : | vy[0] += vz[0] * hy[j+s-1]; | ||
435 : | vy[1] += vz[1] * dhy[j+s-1]; | ||
436 : | vy[2] += vz[2] * hy[j+s-1]; | ||
437 : | } | ||
438 : | glk | 301 | vx[0] += vy[0] * dhx[i+s-1]; |
439 : | vx[1] += vy[1] * hx[i+s-1]; | ||
440 : | vx[2] += vy[2] * hx[i+s-1]; | ||
441 : | jhr | 279 | } |
442 : | return vx; | ||
443 : | } | ||
444 : | \end{lstlisting}% | ||
445 : | \end{quote}% | ||
446 : | \caption{Computing $(\nabla F)\mkw{@}\vecx$ for a 3D scalar field in C} | ||
447 : | \label{fig:3d-deriv-probe-code-c} | ||
448 : | \end{figure} | ||
449 : | |||
450 : | jhr | 156 | \end{document} |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |