SCM Repository
Annotation of /trunk/doc/probe/paper.tex
Parent Directory
|
Revision Log
Revision 270 - (view) (download) (as text)
1 : | jhr | 156 | \documentclass[11pt]{article} |
2 : | |||
3 : | \input{defs} | ||
4 : | \usepackage{graphicx} | ||
5 : | \usepackage{listings} | ||
6 : | \lstset{ | ||
7 : | jhr | 270 | basicstyle=\ttfamily\footnotesize, |
8 : | jhr | 156 | keywordstyle=\bfseries, |
9 : | showstringspaces=false, | ||
10 : | } | ||
11 : | jhr | 160 | \lstdefinelanguage{OpenCL}[]{C}{% |
12 : | morekeywords={ | ||
13 : | char2,char4,char8,char16, | ||
14 : | uchar2,uchar4,uchar8,uchar16, | ||
15 : | short2,short4,short8,short16, | ||
16 : | ushort2,ushort4,ushort8,ushort16, | ||
17 : | int2,int4,int8,int16, | ||
18 : | uint2,uint4,uint8,uint16, | ||
19 : | long2,long4,long8,long16, | ||
20 : | ulong2,ulong4,ulong8,ulong16, | ||
21 : | float2,float4,float8,float16, | ||
22 : | ufloat2,ufloat4,ufloat8,ufloat16, | ||
23 : | double2,double4,double8,double16, | ||
24 : | udouble2,udouble4,udouble8,udouble16, | ||
25 : | constant,__constant,kernel,__kernel,private,__private}, | ||
26 : | moredirectives={version}, | ||
27 : | deletekeywords={} | ||
28 : | } | ||
29 : | jhr | 156 | |
30 : | \lstset{ | ||
31 : | language=C, | ||
32 : | } | ||
33 : | |||
34 : | \setlength{\textwidth}{6in} | ||
35 : | \setlength{\oddsidemargin}{0.25in} | ||
36 : | \setlength{\evensidemargin}{0.25in} | ||
37 : | \setlength{\parskip}{5pt} | ||
38 : | |||
39 : | \newcommand{\matM}{\mathbf{M}} | ||
40 : | jhr | 157 | \newcommand{\vecx}{\mathbf{x}} |
41 : | jhr | 159 | \newcommand{\vecp}{\mathbf{p}} |
42 : | jhr | 156 | \newcommand{\vecn}{\mathbf{n}} |
43 : | \newcommand{\vecf}{\mathbf{f}} | ||
44 : | \newcommand{\VEC}[1]{\left\langle{#1}\right\rangle} | ||
45 : | \newcommand{\FLOOR}[1]{\left\lfloor{#1}\right\rfloor} | ||
46 : | |||
47 : | \title{Compiling probe operations for Diderot} | ||
48 : | \author{ | ||
49 : | John Reppy \\ | ||
50 : | University of Chicago \\ | ||
51 : | {\small\tt{}jhr@cs.uchicago.edu} \\ | ||
52 : | } | ||
53 : | \date{\today} | ||
54 : | |||
55 : | \begin{document} | ||
56 : | |||
57 : | \maketitle | ||
58 : | \thispagestyle{empty} | ||
59 : | |||
60 : | \bibliographystyle{../common/alpha} | ||
61 : | \bibliography{../common/strings-short,../common/manticore} | ||
62 : | |||
63 : | \section{Introduction} | ||
64 : | |||
65 : | This note describes the code needed to implement a probe of a field operation. | ||
66 : | jhr | 270 | In the discussion below, we use a number of notational conventions that are |
67 : | summarized in the following table: | ||
68 : | \begin{center} | ||
69 : | \begin{tabular}{cl} | ||
70 : | $V$ & the image data \\ | ||
71 : | $F$ & the field reconstructed from $V$ \\ | ||
72 : | $\vecp$ & the position vector in field-space \\ | ||
73 : | $\vecx$ & the position vector in image-space \\ | ||
74 : | $\matM^{-1}$ & \\ | ||
75 : | $h$ & a piecewise polynomial convolution kernel \\ | ||
76 : | $s$ & the support of $h$ \\ | ||
77 : | $h_0,\,\ldots,h_{2s-1}$ & the polynomials that make up $h$ \\ | ||
78 : | \end{tabular}% | ||
79 : | \end{center}% | ||
80 : | jhr | 156 | |
81 : | \section{Probing a 1D scalar field} | ||
82 : | The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support | ||
83 : | of $h$. | ||
84 : | jhr | 160 | The probe $F\mkw{@}p$ is computed as follows: |
85 : | jhr | 156 | \begin{eqnarray*} |
86 : | jhr | 160 | \left[\begin{array}{c} x \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} p \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\ |
87 : | n & = & \FLOOR{x} \qquad \text{\textit{integer part of position}} \\ | ||
88 : | f & = & x - f \qquad \text{\textit{fractional part of position}} \\ | ||
89 : | F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)} | ||
90 : | jhr | 156 | \end{eqnarray*}% |
91 : | jhr | 270 | Note that the coordinate system of the convolution filter is flipped (negated) from the coordinate system |
92 : | of the image; this property is why we evaluate $h(f-i)$ for $V(n+i)$. | ||
93 : | jhr | 156 | The convolution $h$ is represented as a symmetric piecewise polynomial formed |
94 : | jhr | 270 | from $2s$ polynomials $h_1,\,\ldots,\,h_{2s}$. |
95 : | jhr | 156 | \begin{displaymath} |
96 : | h(x) = \left\{\begin{array}{ll} | ||
97 : | jhr | 270 | 0 & \text{$x \leq -s$ or $s \leq x$} \\ |
98 : | h_{\lfloor{}x\rfloor{}+s}(x) & \text{$-s < x < s$} \\ | ||
99 : | jhr | 156 | \end{array}\right. |
100 : | \end{displaymath}% | ||
101 : | Thus, we can rewrite the probe computation as | ||
102 : | \begin{displaymath} | ||
103 : | jhr | 270 | F\mkw{@}x = \sum_{i=1-s}^{s} {V(n+i) h_{s-i}(i - f)} |
104 : | jhr | 156 | \end{displaymath}% |
105 : | \figref{fig:1d-probe} illustrates the situation for a kernel $h$ with support $s = 2$, | ||
106 : | and \figref{fig:1d-probe-code} gives the C code for the probe operation, assuming that | ||
107 : | $h$ is represented by third-degree polynomials. | ||
108 : | \begin{figure}[t] | ||
109 : | \begin{center} | ||
110 : | \includegraphics[scale=0.8]{pictures/convo} | ||
111 : | \end{center}% | ||
112 : | \caption{1D scalar probe} | ||
113 : | \label{fig:1d-probe} | ||
114 : | \end{figure}% | ||
115 : | |||
116 : | jhr | 270 | \begin{figure}[p] |
117 : | jhr | 156 | \begin{quote} |
118 : | jhr | 160 | \lstset{language=C} |
119 : | jhr | 156 | \begin{lstlisting} |
120 : | jhr | 270 | double V[]; // image data |
121 : | double h[4][4]; // kernel | ||
122 : | jhr | 156 | |
123 : | jhr | 270 | double probe (double p) |
124 : | jhr | 156 | { |
125 : | jhr | 270 | double x = transform(p); // image-space position |
126 : | jhr | 156 | double n, f; |
127 : | |||
128 : | jhr | 270 | f = modf (x, &n); |
129 : | jhr | 156 | |
130 : | jhr | 160 | double value = 0.0, t; |
131 : | jhr | 156 | |
132 : | jhr | 270 | t = f + 1.0; |
133 : | value += V[n-1] * (h[3][0] + t*(h[3][1] + t*(h[3][2] + t*h[3][3]))); | ||
134 : | t = f; | ||
135 : | value += V[n] * (h[2][0] + t*(h[2][1] + t*(h[2][2] + t*h[2][3]))); | ||
136 : | jhr | 160 | t = f - 1.0; |
137 : | jhr | 270 | value += V[n+1] * (h[1][0] + t*(h[1][1] + t*(h[1][2] + t*h[1][3]))); |
138 : | jhr | 160 | t = f - 2.0; |
139 : | jhr | 270 | value += V[n+2] * (h[0][0] + t*(h[0][1] + t*(h[0][2] + t*h[0][3]))); |
140 : | jhr | 156 | |
141 : | return value; | ||
142 : | } | ||
143 : | \end{lstlisting}% | ||
144 : | \end{quote}% | ||
145 : | jhr | 157 | \caption{Computing $F\mkw{@}x$ for a 1D scalar field in C} |
146 : | jhr | 156 | \label{fig:1d-probe-code} |
147 : | \end{figure} | ||
148 : | jhr | 160 | If we look at the four lines that define \texttt{value}, we see an opportunity for |
149 : | using SIMD parallelism to speed the computation. | ||
150 : | \figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where | ||
151 : | we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b}, | ||
152 : | jhr | 270 | \texttt{c}, and \texttt{d}, and put the \texttt{t} values into a vector too. |
153 : | We then use a dot product to compute the sum of the products of the | ||
154 : | convolution-filter values and the image values. | ||
155 : | \begin{figure}[p] | ||
156 : | jhr | 156 | \begin{quote} |
157 : | jhr | 160 | \lstset{language=OpenCL} |
158 : | jhr | 156 | \begin{lstlisting} |
159 : | jhr | 270 | double4 d = (double4)(h[3][0], h[2][0], h[1][0], h[0][0]); // x^0 coeffs |
160 : | double4 c = (double4)(h[3][1], h[2][1], h[2][1], h[0][1]); // x^1 coeffs | ||
161 : | double4 b = (double4)(h[3][2], h[2][2], h[1][2], h[0][2]); // x^2 coeffs | ||
162 : | double4 a = (double4)(h[3][3], h[2][3], h[1][3], h[0][3]); // x^3 coeffs | ||
163 : | jhr | 156 | |
164 : | jhr | 270 | double probe (double p) |
165 : | jhr | 156 | { |
166 : | jhr | 270 | double x = transform(p); // image-space position |
167 : | jhr | 156 | double n, f; |
168 : | |||
169 : | jhr | 270 | f = modf (x, &n); |
170 : | jhr | 156 | |
171 : | jhr | 270 | double4 v = (double4)(V[n-1], V[n], V[n+1], V[n+2]); |
172 : | double4 t = (double4)(f + 1.0, f, f - 1.0, f - 2.0); | ||
173 : | jhr | 160 | return dot(v, d + t*(c + t*(b + t*a))); |
174 : | jhr | 156 | } |
175 : | \end{lstlisting}% | ||
176 : | \end{quote}% | ||
177 : | jhr | 157 | \caption{Computing $F\mkw{@}x$ for a 1D scalar field in OpenCL} |
178 : | jhr | 156 | \label{fig:1d-probe-code-opencl} |
179 : | \end{figure} | ||
180 : | |||
181 : | \section{Probing a 3D scalar field} | ||
182 : | |||
183 : | The more common case is when the field is a convolution of a scalar 3-dimensional | ||
184 : | field ($F = V\circledast{}h$). | ||
185 : | Let $s$ be the support of $h$. | ||
186 : | jhr | 159 | Then the probe $F\mkw{@}\vecp$ is computed as follows: |
187 : | jhr | 156 | \begin{eqnarray*} |
188 : | jhr | 159 | \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\ |
189 : | \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\ | ||
190 : | \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\ | ||
191 : | F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}} | ||
192 : | jhr | 156 | \end{eqnarray*}% |
193 : | jhr | 270 | We can restructure this equation to highlight the loop-invariant computations. |
194 : | \begin{displaymath} | ||
195 : | F\mkw{@}\vecp = | ||
196 : | \sum_{i=1-s}^s \left({ | ||
197 : | \sum_{j=1-s}^s \left({ | ||
198 : | \sum_{k=1-s}^s \left({ | ||
199 : | V(\vecn+\VEC{i,j,k}) h(\vecf_z - k) | ||
200 : | }\right) | ||
201 : | }\right) h(\vecf_y - j) | ||
202 : | }\right) h(\vecf_x - i) | ||
203 : | \end{displaymath}% | ||
204 : | Thus, we see that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D). | ||
205 : | jhr | 156 | |
206 : | \begin{figure}[t] | ||
207 : | \begin{quote} | ||
208 : | jhr | 160 | \lstset{language=C} |
209 : | jhr | 156 | \begin{lstlisting} |
210 : | typedef double vec3[3]; | ||
211 : | |||
212 : | jhr | 270 | double V[][][]; // image data |
213 : | double h[4][4]; // kernel | ||
214 : | const int s = 2; // kernel support | ||
215 : | jhr | 156 | |
216 : | jhr | 270 | double probe (vec3 pos) |
217 : | { | ||
218 : | double nx, ny, nz, fx, fy, fz; | ||
219 : | jhr | 156 | |
220 : | jhr | 270 | // compute kernel values for each axis |
221 : | double hx[4], hy[4], hz[4]; | ||
222 : | for (int k = 1-s; j < s; k++) { | ||
223 : | double t = fx - k; | ||
224 : | hx[k+s-1] = h[s-k][0] + t*(h[s-k][1] + t*(h[s-k][2] + t*h[s-k][3])); | ||
225 : | double t = fy - k; | ||
226 : | hy[k+s-1] = h[s-k][0] + t*(h[s-k][1] + t*(h[s-k][2] + t*h[s-k][3])); | ||
227 : | double t = fz - k; | ||
228 : | hz[k+s-1] = h[s-k][0] + t*(h[s-k][1] + t*(h[s-k][2] + t*h[s-k][3])); | ||
229 : | } | ||
230 : | |||
231 : | double vx = 0.0 | ||
232 : | for (int i = 1-s; i < s; i++) { | ||
233 : | double vy = 0.0 | ||
234 : | for (int j = 1-s; j < s; j++) { | ||
235 : | double vz = 0.0; | ||
236 : | for (int k = 1-s; k < s; k++) { | ||
237 : | vz += V[nx+i][ny+j][nz+k] * hz[k+s-1]; | ||
238 : | } | ||
239 : | vy += vz * hy[j+s-1]; | ||
240 : | } | ||
241 : | vx += vx * hy[i+s-1]; | ||
242 : | } | ||
243 : | |||
244 : | return vx; | ||
245 : | jhr | 156 | } |
246 : | \end{lstlisting}% | ||
247 : | \end{quote}% | ||
248 : | jhr | 157 | \caption{Computing $F\mkw{@}\vecx$ for a 3D scalar field in C} |
249 : | jhr | 156 | \end{figure} |
250 : | |||
251 : | jhr | 159 | \section{Probing a 3D derivative field} |
252 : | We next consider the case of probing the derivative of a scalar field $F = V\circledast{}h$, where $s$ is the support | ||
253 : | of $h$. | ||
254 : | The probe $(\mkw{D}\;F)\mkw{@}\vecp$ produces a vector result as follows: | ||
255 : | \begin{eqnarray*} | ||
256 : | \vecx & = & \matM^{-1} \vecp \qquad \text{\textit{transform to image space}} \\ | ||
257 : | \vecn & = & \FLOOR{\vecx} \qquad \text{\textit{integer part of position}} \\ | ||
258 : | \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\ | ||
259 : | (\mkw{D}\;F)\mkw{@}\vecp & = & \left[\begin{array}{c} | ||
260 : | \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h'(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}} \\ | ||
261 : | \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h'(\vecf_y - j) h(\vecf_z - k)}}} \\ | ||
262 : | \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h'(\vecf_z - k)}}} \\ | ||
263 : | \end{array}\right] | ||
264 : | \end{eqnarray*}% | ||
265 : | |||
266 : | jhr | 156 | \end{document} |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |