Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /branches/pure-cfg/doc/probe/paper.tex
ViewVC logotype

Annotation of /branches/pure-cfg/doc/probe/paper.tex

Parent Directory Parent Directory | Revision Log Revision Log


Revision 629 - (view) (download) (as text)

1 : jhr 156 \documentclass[11pt]{article}
2 :    
3 :     \input{defs}
4 :     \usepackage{graphicx}
5 :     \usepackage{listings}
6 :     \lstset{
7 : jhr 270 basicstyle=\ttfamily\footnotesize,
8 : jhr 156 keywordstyle=\bfseries,
9 :     showstringspaces=false,
10 :     }
11 : jhr 160 \lstdefinelanguage{OpenCL}[]{C}{%
12 :     morekeywords={
13 :     char2,char4,char8,char16,
14 :     uchar2,uchar4,uchar8,uchar16,
15 :     short2,short4,short8,short16,
16 :     ushort2,ushort4,ushort8,ushort16,
17 :     int2,int4,int8,int16,
18 :     uint2,uint4,uint8,uint16,
19 :     long2,long4,long8,long16,
20 :     ulong2,ulong4,ulong8,ulong16,
21 :     float2,float4,float8,float16,
22 :     ufloat2,ufloat4,ufloat8,ufloat16,
23 :     double2,double4,double8,double16,
24 :     udouble2,udouble4,udouble8,udouble16,
25 :     constant,__constant,kernel,__kernel,private,__private},
26 :     moredirectives={version},
27 :     deletekeywords={}
28 :     }
29 : jhr 156
30 :     \lstset{
31 :     language=C,
32 :     }
33 :    
34 :     \setlength{\textwidth}{6in}
35 :     \setlength{\oddsidemargin}{0.25in}
36 :     \setlength{\evensidemargin}{0.25in}
37 :     \setlength{\parskip}{5pt}
38 :    
39 :     \newcommand{\matM}{\mathbf{M}}
40 : jhr 157 \newcommand{\vecx}{\mathbf{x}}
41 : jhr 159 \newcommand{\vecp}{\mathbf{p}}
42 : jhr 156 \newcommand{\vecn}{\mathbf{n}}
43 :     \newcommand{\vecf}{\mathbf{f}}
44 :     \newcommand{\VEC}[1]{\left\langle{#1}\right\rangle}
45 :     \newcommand{\FLOOR}[1]{\left\lfloor{#1}\right\rfloor}
46 :    
47 :     \title{Compiling probe operations for Diderot}
48 :     \author{
49 : glk 289 John Reppy, Gordon Kindlmann \\
50 : jhr 156 University of Chicago \\
51 : glk 289 {\small\tt{}\{jhr|glk\}@cs.uchicago.edu} \\
52 : jhr 156 }
53 :     \date{\today}
54 :    
55 :     \begin{document}
56 :    
57 :     \maketitle
58 :     \thispagestyle{empty}
59 :    
60 :     \section{Introduction}
61 :    
62 :     This note describes the code needed to implement a probe of a field operation.
63 : jhr 270 In the discussion below, we use a number of notational conventions that are
64 :     summarized in the following table:
65 :     \begin{center}
66 : jhr 274 \begin{tabular}{cp{4in}}
67 : jhr 270 $V$ & the image data \\
68 :     $F$ & the field reconstructed from $V$ \\
69 :     $\vecp$ & the position vector in field-space \\
70 :     $\vecx$ & the position vector in image-space \\
71 : jhr 274 $\matM^{-1}$ & homogeneous matrix that represents the transformation from world space to image space \\
72 : jhr 270 $h$ & a piecewise polynomial convolution kernel \\
73 :     $s$ & the support of $h$ \\
74 :     $h_0,\,\ldots,h_{2s-1}$ & the polynomials that make up $h$ \\
75 :     \end{tabular}%
76 :     \end{center}%
77 : jhr 156
78 :     \section{Probing a 1D scalar field}
79 :     The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support
80 :     of $h$.
81 : jhr 160 The probe $F\mkw{@}p$ is computed as follows:
82 : jhr 156 \begin{eqnarray*}
83 : jhr 160 \left[\begin{array}{c} x \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} p \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\
84 :     n & = & \FLOOR{x} \qquad \text{\textit{integer part of position}} \\
85 : jhr 629 f & = & x - n \qquad \text{\textit{fractional part of position}} \\
86 : jhr 160 F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)}
87 : jhr 156 \end{eqnarray*}%
88 : jhr 270 Note that the coordinate system of the convolution filter is flipped (negated) from the coordinate system
89 :     of the image; this property is why we evaluate $h(f-i)$ for $V(n+i)$.
90 : jhr 156 The convolution $h$ is represented as a symmetric piecewise polynomial formed
91 : jhr 314 from $2s$ polynomials $h_0,\,\ldots,\,h_{2s-1}$.
92 : jhr 156 \begin{displaymath}
93 :     h(x) = \left\{\begin{array}{ll}
94 : jhr 270 0 & \text{$x \leq -s$ or $s \leq x$} \\
95 :     h_{\lfloor{}x\rfloor{}+s}(x) & \text{$-s < x < s$} \\
96 : jhr 156 \end{array}\right.
97 :     \end{displaymath}%
98 :     Thus, we can rewrite the probe computation as
99 :     \begin{displaymath}
100 : jhr 629 F\mkw{@}x = \sum_{i=1-s}^{s} {V(n+i) h_{s-i}(f - i)}
101 : jhr 156 \end{displaymath}%
102 :     \figref{fig:1d-probe} illustrates the situation for a kernel $h$ with support $s = 2$,
103 :     and \figref{fig:1d-probe-code} gives the C code for the probe operation, assuming that
104 :     $h$ is represented by third-degree polynomials.
105 :     \begin{figure}[t]
106 :     \begin{center}
107 :     \includegraphics[scale=0.8]{pictures/convo}
108 :     \end{center}%
109 :     \caption{1D scalar probe}
110 :     \label{fig:1d-probe}
111 :     \end{figure}%
112 :    
113 : jhr 279 \begin{figure}[t]
114 : jhr 156 \begin{quote}
115 : jhr 160 \lstset{language=C}
116 : jhr 156 \begin{lstlisting}
117 : jhr 270 double V[]; // image data
118 : glk 289 double h[4][4] = { // cubic B-spline ("bspln3")
119 : jhr 288 { 1.33333, 2.0, 1.0, 0.166667 }, // -2 .. -1
120 :     { 0.666667, 0.0, -1.0, -0.5 }, // -1 .. 0
121 :     { 0.666667, 0.0, -1.0, 0.5 }, // 0 .. 1
122 :     { 1.33333, -2.0, 1.0, -0.166667 }, // 1 .. 2
123 :     }
124 : jhr 156
125 : jhr 270 double probe (double p)
126 : jhr 156 {
127 : glk 289 double x = transform(p); // from world- to image-space position
128 :     double nd, f; int n;
129 : jhr 156
130 : glk 289 f = modf (x, &nd);
131 :     n = (int)nd;
132 : jhr 156
133 : jhr 160 double value = 0.0, t;
134 : jhr 156
135 : jhr 270 t = f + 1.0;
136 :     value += V[n-1] * (h[3][0] + t*(h[3][1] + t*(h[3][2] + t*h[3][3])));
137 :     t = f;
138 :     value += V[n] * (h[2][0] + t*(h[2][1] + t*(h[2][2] + t*h[2][3])));
139 : jhr 160 t = f - 1.0;
140 : jhr 270 value += V[n+1] * (h[1][0] + t*(h[1][1] + t*(h[1][2] + t*h[1][3])));
141 : jhr 160 t = f - 2.0;
142 : jhr 270 value += V[n+2] * (h[0][0] + t*(h[0][1] + t*(h[0][2] + t*h[0][3])));
143 : jhr 156
144 :     return value;
145 :     }
146 :     \end{lstlisting}%
147 :     \end{quote}%
148 : jhr 157 \caption{Computing $F\mkw{@}x$ for a 1D scalar field in C}
149 : jhr 156 \label{fig:1d-probe-code}
150 :     \end{figure}
151 : jhr 160 If we look at the four lines that define \texttt{value}, we see an opportunity for
152 :     using SIMD parallelism to speed the computation.
153 :     \figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where
154 :     we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b},
155 : jhr 270 \texttt{c}, and \texttt{d}, and put the \texttt{t} values into a vector too.
156 :     We then use a dot product to compute the sum of the products of the
157 :     convolution-filter values and the image values.
158 : jhr 279 \begin{figure}[t]
159 : jhr 156 \begin{quote}
160 : jhr 160 \lstset{language=OpenCL}
161 : jhr 156 \begin{lstlisting}
162 : jhr 270 double4 d = (double4)(h[3][0], h[2][0], h[1][0], h[0][0]); // x^0 coeffs
163 :     double4 c = (double4)(h[3][1], h[2][1], h[2][1], h[0][1]); // x^1 coeffs
164 :     double4 b = (double4)(h[3][2], h[2][2], h[1][2], h[0][2]); // x^2 coeffs
165 :     double4 a = (double4)(h[3][3], h[2][3], h[1][3], h[0][3]); // x^3 coeffs
166 : jhr 156
167 : jhr 270 double probe (double p)
168 : jhr 156 {
169 : jhr 270 double x = transform(p); // image-space position
170 : jhr 156 double n, f;
171 :    
172 : jhr 270 f = modf (x, &n);
173 : jhr 156
174 : jhr 270 double4 v = (double4)(V[n-1], V[n], V[n+1], V[n+2]);
175 :     double4 t = (double4)(f + 1.0, f, f - 1.0, f - 2.0);
176 : jhr 160 return dot(v, d + t*(c + t*(b + t*a)));
177 : jhr 156 }
178 :     \end{lstlisting}%
179 :     \end{quote}%
180 : jhr 157 \caption{Computing $F\mkw{@}x$ for a 1D scalar field in OpenCL}
181 : jhr 156 \label{fig:1d-probe-code-opencl}
182 :     \end{figure}
183 :    
184 :     \section{Probing a 3D scalar field}
185 :    
186 :     The more common case is when the field is a convolution of a scalar 3-dimensional
187 :     field ($F = V\circledast{}h$).
188 :     Let $s$ be the support of $h$.
189 : jhr 159 Then the probe $F\mkw{@}\vecp$ is computed as follows:
190 : jhr 156 \begin{eqnarray*}
191 : jhr 159 \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\
192 :     \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\
193 :     \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\
194 :     F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}}
195 : jhr 156 \end{eqnarray*}%
196 : jhr 314 We can restructure this equation to highlight the loop-invariant computations.\footnote{
197 :     This restructuring assumes that the $Z$-axis of the data is the ``fastest.''
198 :     }
199 : jhr 270 \begin{displaymath}
200 :     F\mkw{@}\vecp =
201 :     \sum_{i=1-s}^s \left({
202 :     \sum_{j=1-s}^s \left({
203 :     \sum_{k=1-s}^s \left({
204 :     V(\vecn+\VEC{i,j,k}) h(\vecf_z - k)
205 :     }\right)
206 :     }\right) h(\vecf_y - j)
207 :     }\right) h(\vecf_x - i)
208 :     \end{displaymath}%
209 :     Thus, we see that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D).
210 : jhr 271 \figref{fig:3d-probe-code-c} gives C code for this operation.
211 :     \begin{figure}[p]
212 : jhr 156 \begin{quote}
213 : jhr 160 \lstset{language=C}
214 : jhr 156 \begin{lstlisting}
215 :     typedef double vec3[3];
216 :    
217 : jhr 270 double V[][][]; // image data
218 : jhr 288 double h[4][4]; // bslpn3 (as before)
219 : glk 289 const int s = 2; // kernel support
220 : jhr 156
221 : jhr 272 double probe (vec3 p)
222 : jhr 270 {
223 : jhr 272 double x[3] = transform(p); // image-space position
224 : jhr 293 double nxd, nyd, nzd, fx, fy, fz;
225 :     int nx, ny, nz;
226 :    
227 :     fx = modf (x[0], &nxd); nx = (int)nxd;
228 :     fy = modf (x[1], &nyd); ny = (int)nyd;
229 : glk 289 fz = modf (x[2], &nzd); nz = (int)nzd;
230 : jhr 156
231 : jhr 270 // compute kernel values for each axis
232 :     double hx[4], hy[4], hz[4];
233 : jhr 293 for (int i = 1-s; i <= s; i++) {
234 : jhr 271 double t;
235 :     t = fx - i;
236 :     hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
237 :     t = fy - i;
238 :     hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
239 :     t = fz - i;
240 :     hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
241 : jhr 270 }
242 :    
243 : jhr 293 float vx = 0.0;
244 :     for (int i = 1-s; i <= s; i++) {
245 :     float vy = 0.0;
246 :     for (int j = 1-s; j <= s; j++) {
247 :     float vz = 0.0;
248 :     for (int k = 1-s; k <= s; k++) {
249 :     vz += V[nx+i][ny+j][nz+k] * hz[k+s-1];
250 : jhr 270 }
251 : jhr 293 vy += vz * hy[j+s-1];
252 : jhr 270 }
253 : jhr 293 vx += vy * hx[i+s-1];
254 : jhr 270 }
255 :    
256 : jhr 293 return vx;
257 : jhr 156 }
258 :     \end{lstlisting}%
259 :     \end{quote}%
260 : jhr 157 \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in C}
261 : jhr 271 \label{fig:3d-probe-code-c}
262 : jhr 156 \end{figure}
263 :    
264 : jhr 272 \begin{figure}[p]
265 :     \begin{quote}
266 : jhr 273 \lstset{language=OpenCL}
267 : jhr 272 \begin{lstlisting}
268 :     double V[][][]; // image data
269 : jhr 314 double4 a, b, c, d; // kernel coefficients
270 :     const int s = 2; // kernel support
271 : jhr 272
272 :     double probe (double4 p)
273 :     {
274 :     double4 x = transform(p); // image-space position
275 : jhr 293 double4 nd, f
276 :     int4 n;
277 : jhr 272
278 : jhr 293 f = modf (x, &nd);
279 :     n = convert_int4(nd);
280 : jhr 272
281 :     // compute kernel values for each axis
282 :     double4 t, hx, hy, hz;
283 : jhr 293 t = (double4)(f.x + 1, f.x, f.x - 1, f.x - 2);
284 : jhr 276 hx = d + t*(c + t*(b + t*a));
285 : jhr 293 t = (double4)(f.y + 1, f.y, f.y - 1, f.y - 2);
286 : jhr 276 hy = d + t*(c + t*(b + t*a));
287 : jhr 293 t = (double4)(f.z + 1, f.z, f.z - 1, f.z - 2);
288 : jhr 276 hz = d + t*(c + t*(b + t*a));
289 : jhr 272
290 : jhr 274 double vy[4], vz[4];
291 : jhr 275 for (int i = 1-s; i <= s; i++) {
292 :     for (int j = 1-s; j <= s; j++) {
293 : jhr 272 double4 v = (double4) (
294 : jhr 293 V[n.x+i][n.y+j][n.z-1],
295 :     V[n.x+i][n.y+j][n.z],
296 :     V[n.x+i][n.y+j][n.z+1],
297 :     V[n.x+i][n.y+j][n.z+2]);
298 : jhr 274 vz[j+s-1] += dot(v, hz);
299 : jhr 272 }
300 : jhr 274 vy[i+s-1] = dot((double4)(vz[0], vz[1], vz[2], vz[3]), hy);
301 : jhr 272 }
302 :    
303 : jhr 274 return dot((double4)(vy[0], vy[1], vy[2], vy[3]), hx);
304 : jhr 272 }
305 :     \end{lstlisting}%
306 :     \end{quote}%
307 :     \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in OpenCL}
308 :     \label{fig:3d-probe-code-opencl}
309 :     \end{figure}
310 :    
311 : jhr 348 \section{Generalizing to derivative fields}
312 :     To generalize the probe operation to work on derivative fields (\eg{}, $(\nabla^k F)\mkw{@}\vecp$),
313 :     we represent the $\nabla^k$ operation as a $k$-order tensor of partial derivative operators.
314 :     To simplify the presentation, we will restrict the discussion to $3$-dimensional space,
315 :     but it easily generalizes.
316 :     In 3D, a partial-derivative operator has the form
317 :     \begin{displaymath}
318 :     \frac{\partial}{\partial{}x^i \partial{}y^j \partial{}z^k}
319 :     \end{displaymath}%
320 :     where $i$, $j$, and $k$ are the number of levels of differentiation in the $x$, $y$,
321 :     and $z$-axes (respectively).
322 :     The normal convention is to omit explicit mention of axes with zero levels of differentiation,
323 :     but we will be explicit here.
324 :     We use $\mathbf{Id}^n$ to represent the identity operator in $n$-dimensions (i.e., the
325 :     one with zero=-levels of differentiation in all axes).
326 :    
327 :     The product of two partial-derivative operators is formed by summing the exponents;
328 :     for example in 3D we have
329 :     \begin{displaymath}
330 :     \frac{\partial}{\partial{}x^i \partial{}y^j \partial{}z^k}
331 :     \frac{\partial}{\partial{}x^{i'} \partial{}y^{j'} \partial{}z^{k'}} =
332 :     \frac{\partial}{\partial{}x^{i+i'} \partial{}y^{j+j'} \partial{}z^{k+k'}}
333 :     \end{displaymath}%
334 :     The last bit of notation that we need is the $n$-vector of partial-derivative
335 :     operators $\delta^n$, in which the $i$th element has one level of differentiation in the $i$th
336 :     axis (and zero in all other axes).
337 :     For example,
338 :     \begin{displaymath}
339 :     \delta^3 = \left[
340 :     \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^0} \quad
341 :     \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^0} \quad
342 :     \frac{\partial}{\partial{}x^0 \partial{}y^0 \partial{}z^1}
343 :     \right]
344 :     \end{displaymath}%
345 :     For $k > 0$ levels of differentiation in $n$-dimensional space, we have
346 :     \begin{displaymath}
347 :     \nabla^k = \left\{\begin{array}{ll}
348 :     \mathbf{Id}^n & \text{when $k = 0$} \\
349 :     \delta^n \otimes \nabla^{k-1} & \text{when $k > 0$} \\
350 :     \end{array}\right.
351 :     \end{displaymath}%
352 :     Consider, for example, the case where $k = 2$.
353 :     \begin{displaymath}
354 :     \nabla^2 = \delta^3 \otimes \delta^3 = \left[\begin{array}{ccc}
355 :     \frac{\partial}{\partial{}x^2 \partial{}y^0 \partial{}z^0}
356 :     & \frac{\partial}{\partial{}x^1 \partial{}y^1 \partial{}z^0}
357 :     & \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^1} \\
358 :     \frac{\partial}{\partial{}x^1 \partial{}y^1 \partial{}z^0}
359 :     & \frac{\partial}{\partial{}x^0 \partial{}y^2 \partial{}z^0}
360 :     & \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^1} \\
361 :     \frac{\partial}{\partial{}x^1 \partial{}y^0 \partial{}z^1}
362 :     & \frac{\partial}{\partial{}x^0 \partial{}y^1 \partial{}z^1}
363 :     & \frac{\partial}{\partial{}x^0 \partial{}y^0 \partial{}z^2} \\
364 :     \end{array}\right]
365 :     \end{displaymath}%
366 :    
367 :     When we have a probe of a derivative field $(\nabla^k F)\mkw{@}\vecp$, we will generate code for
368 :     each partial derivative operator $\frac{\partial}{\partial{}x^i\partial{}y^j\partial{}z^k}$
369 :     separately.
370 :     If $F = V\circledast{}h$, then we will use $h^i(x) h^j(y) h^k(z)$ as the interpolation
371 :     coefficients, where $h^i$ is the $i$th derivative of $h$.
372 :    
373 : jhr 159 \section{Probing a 3D derivative field}
374 :     We next consider the case of probing the derivative of a scalar field $F = V\circledast{}h$, where $s$ is the support
375 :     of $h$.
376 : jhr 279 The probe $(\nabla F)\mkw{@}\vecp$ produces a vector result as follows:
377 : jhr 314 \begin{displaymath}
378 :     (\nabla F)\mkw{@}\vecp = \left[\begin{array}{c}
379 :     \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) h(\vecf_y - j) h'(\vecf_x - i)}}} \\
380 :     \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) h'(\vecf_y - j) h(\vecf_x - i)}}} \\
381 :     \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h'(\vecf_z - k) h(\vecf_y - j) h(\vecf_x - i)}}} \\
382 :     \end{array}\right]
383 :     \end{displaymath}%
384 :     where
385 : jhr 159 \begin{eqnarray*}
386 :     \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\
387 :     \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\
388 : jhr 314 \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}}
389 : jhr 159 \end{eqnarray*}%
390 :    
391 : jhr 279 \begin{figure}[p]
392 :     \begin{quote}
393 :     \lstset{language=C}
394 :     \begin{lstlisting}
395 : glk 301 vec3 probe (vec3 p)
396 : jhr 279 {
397 :     double x[3] = transform(p); // image-space position
398 : glk 301 double nxd, nyd, nzd, fx, fy, fz;
399 :     int nx, ny, nz;
400 : jhr 279
401 : jhr 314 fx = modf (x[0], &nxd); nx = (int)nxd;
402 :     fy = modf (x[1], &nyd); ny = (int)nyd;
403 : glk 301 fz = modf (x[2], &nzd); nz = (int)nzd;
404 : jhr 279
405 :     // compute kernel values for each axis
406 :     double hx[4], dhx[4], hy[4], dhy[4], hz[4], dhz[4];
407 : glk 301 for (int i = 1-s; i <= s; i++) {
408 : jhr 279 double t;
409 :     t = fx - i;
410 :     hx[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
411 :     dhx[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
412 :     t = fy - i;
413 :     hy[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
414 :     dhy[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
415 :     t = fz - i;
416 :     hz[i+s-1] = h[s-i][0] + t*(h[s-i][1] + t*(h[s-i][2] + t*h[s-i][3]));
417 :     dhz[i+s-1] = h[s-i][1] + t*(2*h[s-i][2] + t*3*h[s-i][3]);
418 :     }
419 :    
420 :     vec3 vx = {0, 0, 0};
421 :     for (int i = 1-s; i <= s; i++) {
422 :     vec3 vy = {0, 0, 0};
423 :     for (int j = 1-s; j <= s; j++) {
424 :     vec3 vz = {0, 0, 0};
425 :     for (int k = 1-s; k <= s; k++) {
426 :     double v = V[nx+i][ny+j][nz+k];
427 :     vz[0] = v * hz[k+s-1];
428 : glk 301 vz[1] = v * hz[k+s-1];
429 : jhr 279 vz[2] = v * dhz[k+s-1];
430 :     }
431 :     vy[0] += vz[0] * hy[j+s-1];
432 :     vy[1] += vz[1] * dhy[j+s-1];
433 :     vy[2] += vz[2] * hy[j+s-1];
434 :     }
435 : glk 301 vx[0] += vy[0] * dhx[i+s-1];
436 :     vx[1] += vy[1] * hx[i+s-1];
437 :     vx[2] += vy[2] * hx[i+s-1];
438 : jhr 279 }
439 :     return vx;
440 :     }
441 :     \end{lstlisting}%
442 :     \end{quote}%
443 :     \caption{Computing $(\nabla F)\mkw{@}\vecx$ for a 3D scalar field in C}
444 :     \label{fig:3d-deriv-probe-code-c}
445 :     \end{figure}
446 :    
447 : jhr 629
448 :     \bibliographystyle{../common/alpha}
449 :     \bibliography{../common/strings-short,../common/manticore}
450 :    
451 : jhr 156 \end{document}

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0