Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /trunk/doc/probe/paper.tex
ViewVC logotype

Annotation of /trunk/doc/probe/paper.tex

Parent Directory Parent Directory | Revision Log Revision Log


Revision 348 - (view) (download) (as text)

1 : jhr 156 \documentclass[11pt]{article}
2 :    
3 :     \input{defs}
4 :     \usepackage{graphicx}
5 :     \usepackage{listings}
6 :     \lstset{
7 : jhr 270 basicstyle=\ttfamily\footnotesize,
8 : jhr 156 keywordstyle=\bfseries,
9 :     showstringspaces=false,
10 :     }
11 : jhr 160 \lstdefinelanguage{OpenCL}[]{C}{%
12 :     morekeywords={
13 :     char2,char4,char8,char16,
14 :     uchar2,uchar4,uchar8,uchar16,
15 :     short2,short4,short8,short16,
16 :     ushort2,ushort4,ushort8,ushort16,
17 :     int2,int4,int8,int16,
18 :     uint2,uint4,uint8,uint16,
19 :     long2,long4,long8,long16,
20 :     ulong2,ulong4,ulong8,ulong16,
21 :     float2,float4,float8,float16,
22 :     ufloat2,ufloat4,ufloat8,ufloat16,
23 :     double2,double4,double8,double16,
24 :     udouble2,udouble4,udouble8,udouble16,
25 :     constant,__constant,kernel,__kernel,private,__private},
26 :     moredirectives={version},
27 :     deletekeywords={}
28 :     }
29 : jhr 156
30 :     \lstset{
31 :     language=C,
32 :     }
33 :    
34 :     \setlength{\textwidth}{6in}
35 :     \setlength{\oddsidemargin}{0.25in}
36 :     \setlength{\evensidemargin}{0.25in}
37 :     \setlength{\parskip}{5pt}
38 :    
39 :     \newcommand{\matM}{\mathbf{M}}
40 : jhr 157 \newcommand{\vecx}{\mathbf{x}}
41 : jhr 159 \newcommand{\vecp}{\mathbf{p}}
42 : jhr 156 \newcommand{\vecn}{\mathbf{n}}
43 :     \newcommand{\vecf}{\mathbf{f}}
44 :     \newcommand{\VEC}[1]{\left\langle{#1}\right\rangle}
45 :     \newcommand{\FLOOR}[1]{\left\lfloor{#1}\right\rfloor}
46 :    
47 :     \title{Compiling probe operations for Diderot}
48 :     \author{
49 : glk 289 John Reppy, Gordon Kindlmann \\
50 : jhr 156 University of Chicago \\
51 : glk 289 {\small\tt{}\{jhr|glk\}@cs.uchicago.edu} \\
52 : jhr 156 }
53 :     \date{\today}
54 :    
55 :     \begin{document}
56 :    
57 :     \maketitle
58 :     \thispagestyle{empty}
59 :    
60 :     \bibliographystyle{../common/alpha}
61 :     \bibliography{../common/strings-short,../common/manticore}
62 :    
63 :     \section{Introduction}
64 :    
65 :     This note describes the code needed to implement a probe of a field operation.
66 : jhr 270 In the discussion below, we use a number of notational conventions that are
67 :     summarized in the following table:
68 :     \begin{center}
69 : jhr 274 \begin{tabular}{cp{4in}}
70 : jhr 270 $V$ & the image data \\
71 :     $F$ & the field reconstructed from $V$ \\
72 :     $\vecp$ & the position vector in field-space \\
73 :     $\vecx$ & the position vector in image-space \\
74 : jhr 274 $\matM^{-1}$ & homogeneous matrix that represents the transformation from world space to image space \\
75 : jhr 270 $h$ & a piecewise polynomial convolution kernel \\
76 :     $s$ & the support of $h$ \\
77 :     $h_0,\,\ldots,h_{2s-1}$ & the polynomials that make up $h$ \\
78 :     \end{tabular}%
79 :     \end{center}%
80 : jhr 156
81 :     \section{Probing a 1D scalar field}
82 :     The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support
83 :     of $h$.
84 : jhr 160 The probe $F\mkw{@}p$ is computed as follows:
85 : jhr 156 \begin{eqnarray*}
86 : jhr 160 \left[\begin{array}{c} x \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} p \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\
87 :     n & = & \FLOOR{x} \qquad \text{\textit{integer part of position}} \\
88 :     f & = & x - f \qquad \text{\textit{fractional part of position}} \\
89 :     F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)}
90 : jhr 156 \end{eqnarray*}%
91 : jhr 270 Note that the coordinate system of the convolution filter is flipped (negated) from the coordinate system
92 :     of the image; this property is why we evaluate $h(f-i)$ for $V(n+i)$.
93 : jhr 156 The convolution $h$ is represented as a symmetric piecewise polynomial formed
94 : jhr 314 from $2s$ polynomials $h_0,\,\ldots,\,h_{2s-1}$.
95 : jhr 156 \begin{displaymath}
96 :     h(x) = \left\{\begin{array}{ll}
97 : jhr 270 0 & \text{$x \leq -s$ or $s \leq x$} \\
98 :     h_{\lfloor{}x\rfloor{}+s}(x) & \text{$-s < x < s$} \\
99 : jhr 156 \end{array}\right.
100 :     \end{displaymath}%
101 :     Thus, we can rewrite the probe computation as
102 :     \begin{displaymath}
103 : jhr 270 F\mkw{@}x = \sum_{i=1-s}^{s} {V(n+i) h_{s-i}(i - f)}
104 : jhr 156 \end{displaymath}%
105 :     \figref{fig:1d-probe} illustrates the situation for a kernel $h$ with support $s = 2$,
106 :     and \figref{fig:1d-probe-code} gives the C code for the probe operation, assuming that
107 :     $h$ is represented by third-degree polynomials.
108 :     \begin{figure}[t]
109 :     \begin{center}
110 :     \includegraphics[scale=0.8]{pictures/convo}
111 :     \end{center}%
112 :     \caption{1D scalar probe}
113 :     \label{fig:1d-probe}
114 :     \end{figure}%
115 :    
116 : jhr 279 \begin{figure}[t]
117 : jhr 156 \begin{quote}
118 : jhr 160 \lstset{language=C}
119 : jhr 156 \begin{lstlisting}
120 : jhr 270 double V[]; // image data
121 : glk 289 double h[4][4] = { // cubic B-spline ("bspln3")
122 : jhr 288 { 1.33333, 2.0, 1.0, 0.166667 }, // -2 .. -1
123 :     { 0.666667, 0.0, -1.0, -0.5 }, // -1 .. 0
124 :     { 0.666667, 0.0, -1.0, 0.5 }, // 0 .. 1
125 :     { 1.33333, -2.0, 1.0, -0.166667 }, // 1 .. 2
126 :     }
127 : jhr 156
128 : jhr 270 double probe (double p)
129 : jhr 156 {
130 : glk 289 double x = transform(p); // from world- to image-space position
131 :     double nd, f; int n;
132 : jhr 156
133 : glk 289 f = modf (x, &nd);
134 :     n = (int)nd;
135 : jhr 156
136 : jhr 160 double value = 0.0, t;
137 : jhr 156
138 : jhr 270 t = f + 1.0;
139 :     value += V[n-1] * (h[3][0] + t*(h[3][1] + t*(h[3][2] + t*h[3][3])));
140 :     t = f;
141 :     value += V[n] * (h[2][0] + t*(h[2][1] + t*(h[2][2] + t*h[2][3])));
142 : jhr 160 t = f - 1.0;
143 : jhr 270 value += V[n+1] * (h[1][0] + t*(h[1][1] + t*(h[1][2] + t*h[1][3])));
144 : jhr 160 t = f - 2.0;
145 : jhr 270 value += V[n+2] * (h[0][0] + t*(h[0][1] + t*(h[0][2] + t*h[0][3])));
146 : jhr 156
147 :     return value;
148 :     }
149 :     \end{lstlisting}%
150 :     \end{quote}%
151 : jhr 157 \caption{Computing $F\mkw{@}x$ for a 1D scalar field in C}
152 : jhr 156 \label{fig:1d-probe-code}
153 :     \end{figure}
154 : jhr 160 If we look at the four lines that define \texttt{value}, we see an opportunity for
155 :     using SIMD parallelism to speed the computation.
156 :     \figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where
157 :     we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b},
158 : jhr 270 \texttt{c}, and \texttt{d}, and put the \texttt{t} values into a vector too.
159 :     We then use a dot product to compute the sum of the products of the
160 :     convolution-filter values and the image values.
161 : jhr 279 \begin{figure}[t]
162 : jhr 156 \begin{quote}
163 : jhr 160 \lstset{language=OpenCL}
164 : jhr 156 \begin{lstlisting}
165 : jhr 270 double4 d = (double4)(h[3][0], h[2][0], h[1][0], h[0][0]); // x^0 coeffs
166 :     double4 c = (double4)(h[3][1], h[2][1], h[2][1], h[0][1]); // x^1 coeffs
167 :     double4 b = (double4)(h[3][2], h[2][2], h[1][2], h[0][2]); // x^2 coeffs
168 :     double4 a = (double4)(h[3][3], h[2][3], h[1][3], h[0][3]); // x^3 coeffs
169 : jhr 156
170 : jhr 270 double probe (double p)
171 : jhr 156 {
172 : jhr 270 double x = transform(p); // image-space position
173 : jhr 156 double n, f;
174 :    
175 : jhr 270 f = modf (x, &n);
176 : jhr 156
177 : jhr 270 double4 v = (double4)(V[n-1], V[n], V[n+1], V[n+2]);
178 :     double4 t = (double4)(f + 1.0, f, f - 1.0, f - 2.0);
179 : jhr 160 return dot(v, d + t*(c + t*(b + t*a)));
180 : jhr 156 }
181 :     \end{lstlisting}%
182 :     \end{quote}%
183 : jhr 157 \caption{Computing $F\mkw{@}x$ for a 1D scalar field in OpenCL}
184 : jhr 156 \label{fig:1d-probe-code-opencl}
185 :     \end{figure}
186 :    
187 :     \section{Probing a 3D scalar field}
188 :    
189 :     The more common case is when the field is a convolution of a scalar 3-dimensional
190 :     field ($F = V\circledast{}h$).
191 :     Let $s$ be the support of $h$.
192 : jhr 159 Then the probe $F\mkw{@}\vecp$ is computed as follows:
193 : jhr 156 \begin{eqnarray*}
194 : jhr 159 \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\
195 :     \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\
196 :     \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\
197 :     F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}}
198 : jhr 156 \end{eqnarray*}%
199 : jhr 314 We can restructure this equation to highlight the loop-invariant computations.\footnote{
200 :     This restructuring assumes that the $Z$-axis of the data is the ``fastest.''
201 :     }
202 : jhr 270 \begin{displaymath}
203 :     F\mkw{@}\vecp =
204 :     \sum_{i=1-s}^s \left({
205 :     \sum_{j=1-s}^s \left({
206 :     \sum_{k=1-s}^s \left({
207 :     V(\vecn+\VEC{i,j,k}) h(\vecf_z - k)
208 :     }\right)
209 :     }\right) h(\vecf_y - j)
210 :     }\right) h(\vecf_x - i)
211 :     \end{displaymath}%
212 :     Thus, we see that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D).
213 : jhr 271 \figref{fig:3d-probe-code-c} gives C code for this operation.
214 :     \begin{figure}[p]
215 : jhr 156 \begin{quote}
216 : jhr 160 \lstset{language=C}
217 : jhr 156 \begin{lstlisting}
218 :     typedef double vec3[3];
219 :    
220 : jhr 270 double V[][][]; // image data
221 : jhr 288 double h[4][4]; // bslpn3 (as before)
222 : glk 289 const int s = 2; // kernel support
223 : jhr 156
224 : jhr 272 double probe (vec3 p)
225 : jhr 270 {
226 : jhr 272 double x[3] = transform(p); // image-space position
227 : jhr 293 double nxd, nyd, nzd, fx, fy, fz;
228 :     int nx, ny, nz;
229 :    
230 :     fx = modf (x[0], &nxd); nx = (int)nxd;
231 :     fy = modf (x[1], &nyd); ny = (int)nyd;
232 : glk 289 fz = modf (x[2], &nzd); nz = (int)nzd;
233 : jhr 156
234 : jhr 270 // compute kernel values for each axis
235 :     double hx[4], hy[4], hz[4];
236 : jhr 293 for (int i = 1-s; i <= s; i++) {
237 : jhr 271 double t;
238 :     t = fx - i;
239 :     hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
240 :     t = fy - i;
241 :     hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
242 :     t = fz - i;
243 :     hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
244 : jhr 270 }
245 :    
246 : jhr 293 float vx = 0.0;
247 :     for (int i = 1-s; i <= s; i++) {
248 :     float vy = 0.0;
249 :     for (int j = 1-s; j <= s; j++) {
250 :     float vz = 0.0;
251 :     for (int k = 1-s; k <= s; k++) {
252 :     vz += V[nx+i][ny+j][nz+k] * hz[k+s-1];
253 : jhr 270 }
254 : jhr 293 vy += vz * hy[j+s-1];
255 : jhr 270 }
256 : jhr 293 vx += vy * hx[i+s-1];
257 : jhr 270 }
258 :    
259 : jhr 293 return vx;
260 : jhr 156 }
261 :     \end{lstlisting}%
262 :     \end{quote}%
263 : jhr 157 \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in C}
264 : jhr 271 \label{fig:3d-probe-code-c}
265 : jhr 156 \end{figure}
266 :    
267 : jhr 272 \begin{figure}[p]
268 :     \begin{quote}
269 : jhr 273 \lstset{language=OpenCL}
270 : jhr 272 \begin{lstlisting}
271 :     double V[][][]; // image data
272 : jhr 314 double4 a, b, c, d; // kernel coefficients
273 :     const int s = 2; // kernel support
274 : jhr 272
275 :     double probe (double4 p)
276 :     {
277 :     double4 x = transform(p); // image-space position
278 : jhr 293 double4 nd, f
279 :     int4 n;
280 : jhr 272
281 : jhr 293 f = modf (x, &nd);
282 :     n = convert_int4(nd);
283 : jhr 272
284 :     // compute kernel values for each axis
285 :     double4 t, hx, hy, hz;
286 : jhr 293 t = (double4)(f.x + 1, f.x, f.x - 1, f.x - 2);
287 : jhr 276 hx = d + t*(c + t*(b + t*a));
288 : jhr 293 t = (double4)(f.y + 1, f.y, f.y - 1, f.y - 2);
289 : jhr 276 hy = d + t*(c + t*(b + t*a));
290 : jhr 293 t = (double4)(f.z + 1, f.z, f.z - 1, f.z - 2);
291 : jhr 276 hz = d + t*(c + t*(b + t*a));
292 : jhr 272
293 : jhr 274 double vy[4], vz[4];
294 : jhr 275 for (int i = 1-s; i <= s; i++) {
295 :     for (int j = 1-s; j <= s; j++) {
296 : jhr 272 double4 v = (double4) (
297 : jhr 293 V[n.x+i][n.y+j][n.z-1],
298 :     V[n.x+i][n.y+j][n.z],
299 :     V[n.x+i][n.y+j][n.z+1],
300 :     V[n.x+i][n.y+j][n.z+2]);
301 : jhr 274 vz[j+s-1] += dot(v, hz);
302 : jhr 272 }
303 : jhr 274 vy[i+s-1] = dot((double4)(vz[0], vz[1], vz[2], vz[3]), hy);
304 : jhr 272 }
305 :    
306 : jhr 274 return dot((double4)(vy[0], vy[1], vy[2], vy[3]), hx);
307 : jhr 272 }
308 :     \end{lstlisting}%
309 :     \end{quote}%
310 :     \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in OpenCL}
311 :     \label{fig:3d-probe-code-opencl}
312 :     \end{figure}
313 :    
314 : jhr 348 \section{Generalizing to derivative fields}
315 :     To generalize the probe operation to work on derivative fields (\eg{}, $(\nabla^k F)\mkw{@}\vecp$),
316 :     we represent the $\nabla^k$ operation as a $k$-order tensor of partial derivative operators.
317 :     To simplify the presentation, we will restrict the discussion to $3$-dimensional space,
318 :     but it easily generalizes.
319 :     In 3D, a partial-derivative operator has the form
320 :     \begin{displaymath}
321 :     \frac{\partial}{\partial{}x^i \partial{}y^j \partial{}z^k}
322 :     \end{displaymath}%
323 :     where $i$, $j$, and $k$ are the number of levels of differentiation in the $x$, $y$,
324 :     and $z$-axes (respectively).
325 :     The normal convention is to omit explicit mention of axes with zero levels of differentiation,
326 :     but we will be explicit here.
327 :     We use $\mathbf{Id}^n$ to represent the identity operator in $n$-dimensions (i.e., the
328 :     one with zero=-levels of differentiation in all axes).
329 :    
330 :     The product of two partial-derivative operators is formed by summing the exponents;
331 :     for example in 3D we have
332 :     \begin{displaymath}
333 :     \frac{\partial}{\partial{}x^i \partial{}y^j \partial{}z^k}
334 :     \frac{\partial}{\partial{}x^{i'} \partial{}y^{j'} \partial{}z^{k'}} =
335 :     \frac{\partial}{\partial{}x^{i+i'} \partial{}y^{j+j'} \partial{}z^{k+k'}}
336 :     \end{displaymath}%
337 :     The last bit of notation that we need is the $n$-vector of partial-derivative
338 :     operators $\delta^n$, in which the $i$th element has one level of differentiation in the $i$th
339 :     axis (and zero in all other axes).
340 :     For example,
341 :     \begin{displaymath}
342 :     \delta^3 = \left[
343 :     \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^0} \quad
344 :     \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^0} \quad
345 :     \frac{\partial}{\partial{}x^0 \partial{}y^0 \partial{}z^1}
346 :     \right]
347 :     \end{displaymath}%
348 :     For $k > 0$ levels of differentiation in $n$-dimensional space, we have
349 :     \begin{displaymath}
350 :     \nabla^k = \left\{\begin{array}{ll}
351 :     \mathbf{Id}^n & \text{when $k = 0$} \\
352 :     \delta^n \otimes \nabla^{k-1} & \text{when $k > 0$} \\
353 :     \end{array}\right.
354 :     \end{displaymath}%
355 :     Consider, for example, the case where $k = 2$.
356 :     \begin{displaymath}
357 :     \nabla^2 = \delta^3 \otimes \delta^3 = \left[\begin{array}{ccc}
358 :     \frac{\partial}{\partial{}x^2 \partial{}y^0 \partial{}z^0}
359 :     & \frac{\partial}{\partial{}x^1 \partial{}y^1 \partial{}z^0}
360 :     & \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^1} \\
361 :     \frac{\partial}{\partial{}x^1 \partial{}y^1 \partial{}z^0}
362 :     & \frac{\partial}{\partial{}x^0 \partial{}y^2 \partial{}z^0}
363 :     & \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^1} \\
364 :     \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^1}
365 :     & \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^1}
366 :     & \frac{\partial}{\partial{}x^0 \partial{}y^0 \partial{}z^2} \\
367 :     \end{array}\right]
368 :     \end{displaymath}%
369 :    
370 :     When we have a probe of a derivative field $(\nabla^k F)\mkw{@}\vecp$, we will generate code for
371 :     each partial derivative operator $\frac{\partial}{\partial{}x^i\partial{}y^j\partial{}z^k}$
372 :     separately.
373 :     If $F = V\circledast{}h$, then we will use $h^i(x) h^j(y) h^k(z)$ as the interpolation
374 :     coefficients, where $h^i$ is the $i$th derivative of $h$.
375 :    
376 : jhr 159 \section{Probing a 3D derivative field}
377 :     We next consider the case of probing the derivative of a scalar field $F = V\circledast{}h$, where $s$ is the support
378 :     of $h$.
379 : jhr 279 The probe $(\nabla F)\mkw{@}\vecp$ produces a vector result as follows:
380 : jhr 314 \begin{displaymath}
381 :     (\nabla F)\mkw{@}\vecp = \left[\begin{array}{c}
382 :     \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) h(\vecf_y - j) h'(\vecf_x - i)}}} \\
383 :     \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) h'(\vecf_y - j) h(\vecf_x - i)}}} \\
384 :     \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h'(\vecf_z - k) h(\vecf_y - j) h(\vecf_x - i)}}} \\
385 :     \end{array}\right]
386 :     \end{displaymath}%
387 :     where
388 : jhr 159 \begin{eqnarray*}
389 :     \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\
390 :     \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\
391 : jhr 314 \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}}
392 : jhr 159 \end{eqnarray*}%
393 :    
394 : jhr 279 \begin{figure}[p]
395 :     \begin{quote}
396 :     \lstset{language=C}
397 :     \begin{lstlisting}
398 : glk 301 vec3 probe (vec3 p)
399 : jhr 279 {
400 :     double x[3] = transform(p); // image-space position
401 : glk 301 double nxd, nyd, nzd, fx, fy, fz;
402 :     int nx, ny, nz;
403 : jhr 279
404 : jhr 314 fx = modf (x[0], &nxd); nx = (int)nxd;
405 :     fy = modf (x[1], &nyd); ny = (int)nyd;
406 : glk 301 fz = modf (x[2], &nzd); nz = (int)nzd;
407 : jhr 279
408 :     // compute kernel values for each axis
409 :     double hx[4], dhx[4], hy[4], dhy[4], hz[4], dhz[4];
410 : glk 301 for (int i = 1-s; i <= s; i++) {
411 : jhr 279 double t;
412 :     t = fx - i;
413 :     hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
414 :     dhx[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
415 :     t = fy - i;
416 :     hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
417 :     dhy[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
418 :     t = fz - i;
419 :     hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
420 :     dhz[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
421 :     }
422 :    
423 :     vec3 vx = {0, 0, 0};
424 :     for (int i = 1-s; i <= s; i++) {
425 :     vec3 vy = {0, 0, 0};
426 :     for (int j = 1-s; j <= s; j++) {
427 :     vec3 vz = {0, 0, 0};
428 :     for (int k = 1-s; k <= s; k++) {
429 :     double v = V[nx+i][ny+j][nz+k];
430 :     vz[0] = v * hz[k+s-1];
431 : glk 301 vz[1] = v * hz[k+s-1];
432 : jhr 279 vz[2] = v * dhz[k+s-1];
433 :     }
434 :     vy[0] += vz[0] * hy[j+s-1];
435 :     vy[1] += vz[1] * dhy[j+s-1];
436 :     vy[2] += vz[2] * hy[j+s-1];
437 :     }
438 : glk 301 vx[0] += vy[0] * dhx[i+s-1];
439 :     vx[1] += vy[1] * hx[i+s-1];
440 :     vx[2] += vy[2] * hx[i+s-1];
441 : jhr 279 }
442 :     return vx;
443 :     }
444 :     \end{lstlisting}%
445 :     \end{quote}%
446 :     \caption{Computing $(\nabla F)\mkw{@}\vecx$ for a 3D scalar field in C}
447 :     \label{fig:3d-deriv-probe-code-c}
448 :     \end{figure}
449 :    
450 : jhr 156 \end{document}

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0