Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Diff of /trunk/doc/probe/paper.tex
ViewVC logotype

Diff of /trunk/doc/probe/paper.tex

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 269, Wed Aug 11 04:42:30 2010 UTC revision 270, Wed Aug 11 15:37:10 2010 UTC
# Line 4  Line 4 
4  \usepackage{graphicx}  \usepackage{graphicx}
5  \usepackage{listings}  \usepackage{listings}
6  \lstset{  \lstset{
7    basicstyle=\ttfamily\small,    basicstyle=\ttfamily\footnotesize,
8    keywordstyle=\bfseries,    keywordstyle=\bfseries,
9    showstringspaces=false,    showstringspaces=false,
10  }  }
# Line 63  Line 63 
63  \section{Introduction}  \section{Introduction}
64    
65  This note describes the code needed to implement a probe of a field operation.  This note describes the code needed to implement a probe of a field operation.
66    In the discussion below, we  use a number of notational conventions that are
67  In the discussion below, we use $\matM^{-1}$ for the homogeneous matrix that maps from world  summarized in the following table:
68  coordinates to image coordinates and use $\vecx$ for the position vector.  \begin{center}
69      \begin{tabular}{cl}
70        $V$ & the image data \\
71        $F$ & the field reconstructed from $V$ \\
72        $\vecp$ & the position vector in field-space \\
73        $\vecx$ & the position vector in image-space \\
74        $\matM^{-1}$ & \\
75        $h$ & a piecewise polynomial convolution kernel \\
76        $s$ & the support of $h$ \\
77        $h_0,\,\ldots,h_{2s-1}$ & the polynomials that make up $h$ \\
78      \end{tabular}%
79    \end{center}%
80    
81  \section{Probing a 1D scalar field}  \section{Probing a 1D scalar field}
82  The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support  The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support
# Line 77  Line 88 
88    f  & = & x - f \qquad \text{\textit{fractional part of position}} \\    f  & = & x - f \qquad \text{\textit{fractional part of position}} \\
89    F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)}    F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)}
90  \end{eqnarray*}%  \end{eqnarray*}%
91    Note that the coordinate system of the convolution filter is flipped (negated) from the coordinate system
92    of the image; this property is why we evaluate $h(f-i)$ for $V(n+i)$.
93  The convolution $h$ is represented as a symmetric piecewise polynomial formed  The convolution $h$ is represented as a symmetric piecewise polynomial formed
94  from $s$ polynomials $h_1,\,\ldots,\,h_s$.  from $2s$ polynomials $h_1,\,\ldots,\,h_{2s}$.
95  \begin{displaymath}  \begin{displaymath}
96    h(x) = \left\{\begin{array}{ll}    h(x) = \left\{\begin{array}{ll}
97      0 & \text{$x < -s$} \\      0 & \text{$x \leq -s$ or $s \leq x$} \\
98      h_i(-x) & \text{$-s \leq -i \leq x < 1-i \leq 0$} \\      h_{\lfloor{}x\rfloor{}+s}(x) & \text{$-s < x < s$} \\
     h_i(x) & \text{$0 < i-1 \leq x < i \leq s$} \\  
     0 & \text{$x > s$} \\  
99    \end{array}\right.    \end{array}\right.
100  \end{displaymath}%  \end{displaymath}%
101  Thus, we can rewrite the probe computation as  Thus, we can rewrite the probe computation as
102  \begin{displaymath}  \begin{displaymath}
103    F\mkw{@}x = \sum_{i=1-s}^{0} {V(n+i) h_{1-i}(i - f)} + \sum_{i=1}^s {V(n+i) h_i(f - i)}    F\mkw{@}x = \sum_{i=1-s}^{s} {V(n+i) h_{s-i}(i - f)}
104  \end{displaymath}%  \end{displaymath}%
105  \figref{fig:1d-probe} illustrates the situation for a kernel $h$ with support $s = 2$,  \figref{fig:1d-probe} illustrates the situation for a kernel $h$ with support $s = 2$,
106  and \figref{fig:1d-probe-code} gives the C code for the probe operation, assuming that  and \figref{fig:1d-probe-code} gives the C code for the probe operation, assuming that
# Line 102  Line 113 
113    \label{fig:1d-probe}    \label{fig:1d-probe}
114  \end{figure}%  \end{figure}%
115    
116  \begin{figure}[t]  \begin{figure}[p]
117  \begin{quote}  \begin{quote}
118  \lstset{language=C}  \lstset{language=C}
119  \begin{lstlisting}  \begin{lstlisting}
120  double img[];                   // image data  double V[];                     // image data
121  double h1[4], h2[4];            // kernel  double h[4][4];                 // kernel
122    
123  double probe (double x)  double probe (double p)
124  {  {
125    double imgX = transform(x);   // image-space position    double x = transform(p);      // image-space position
126    double n, f;    double n, f;
127    
128    f = modf (imgPos, &n);    f = modf (x, &n);
129    
130    double value = 0.0, t;    double value = 0.0, t;
131    
132    t = -1.0 - f;    t = f + 1.0;
133    value += img[n-1] * (h2[0] + t*(h2[1] + t*(h2[2] + t*h2[3])));    value += V[n-1] * (h[3][0] + t*(h[3][1] + t*(h[3][2] + t*h[3][3])));
134    t = -f;    t = f;
135    value += img[n]   * (h1[0] + t*(h1[1] + t*(h1[2] + t*h1[3])));    value += V[n]   * (h[2][0] + t*(h[2][1] + t*(h[2][2] + t*h[2][3])));
136    t = f - 1.0;    t = f - 1.0;
137    value += img[n+1] * (h1[0] + t*(h1[1] + t*(h1[2] + t*h1[3])));    value += V[n+1] * (h[1][0] + t*(h[1][1] + t*(h[1][2] + t*h[1][3])));
138    t = f - 2.0;    t = f - 2.0;
139    value += img[n+2] * (h2[0] + t*(h2[1] + t*(h2[2] + t*h2[3])));    value += V[n+2] * (h[0][0] + t*(h[0][1] + t*(h[0][2] + t*h[0][3])));
140    
141    return value;    return value;
142  }  }
# Line 138  Line 149 
149  using SIMD parallelism to speed the computation.  using SIMD parallelism to speed the computation.
150  \figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where  \figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where
151  we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b},  we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b},
152  \texttt{c}, and \texttt{d}.  \texttt{c}, and \texttt{d}, and put the \texttt{t} values into a vector too.
153  \begin{figure}[t]  We then use a dot product to compute the sum of the products of the
154    convolution-filter values and the image values.
155    \begin{figure}[p]
156  \begin{quote}  \begin{quote}
157  \lstset{language=OpenCL}  \lstset{language=OpenCL}
158  \begin{lstlisting}  \begin{lstlisting}
159  double4 d = (double4)(h2[0], h1[0], h1[0], h2[0]); // x^0 coeffs  double4 d = (double4)(h[3][0], h[2][0], h[1][0], h[0][0]); // x^0 coeffs
160  double4 c = (double4)(h2[1], h1[1], h1[1], h2[1]); // x^1 coeffs  double4 c = (double4)(h[3][1], h[2][1], h[2][1], h[0][1]); // x^1 coeffs
161  double4 b = (double4)(h2[2], h1[2], h1[2], h2[2]); // x^2 coeffs  double4 b = (double4)(h[3][2], h[2][2], h[1][2], h[0][2]); // x^2 coeffs
162  double4 a = (double4)(h2[3], h1[3], h1[3], h2[3]); // x^3 coeffs  double4 a = (double4)(h[3][3], h[2][3], h[1][3], h[0][3]); // x^3 coeffs
163    
164  double probe (double x)  double probe (double p)
165  {  {
166    double imgX = transform(x);   // image-space position    double x = transform(p);      // image-space position
167    double n, f;    double n, f;
168    
169    f = modf (imgPos, &n);    f = modf (x, &n);
170    
171    double4 v = (double4)(img[n-1], img[n], img[n+1], img[n+2]);    double4 v = (double4)(V[n-1], V[n], V[n+1], V[n+2]);
172    double4 t = (double4)(-1.0 - f, -f, f - 1.0, f - 2.0);    double4 t = (double4)(f + 1.0, f, f - 1.0, f - 2.0);
173    return dot(v, d + t*(c + t*(b + t*a)));    return dot(v, d + t*(c + t*(b + t*a)));
174  }  }
175  \end{lstlisting}%  \end{lstlisting}%
# Line 177  Line 190 
190    \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\    \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\
191    F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}}    F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}}
192  \end{eqnarray*}%  \end{eqnarray*}%
193  Note that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D).  We can restructure this equation to highlight the loop-invariant computations.
194    \begin{displaymath}
195      F\mkw{@}\vecp =
196        \sum_{i=1-s}^s \left({
197          \sum_{j=1-s}^s \left({
198            \sum_{k=1-s}^s \left({
199              V(\vecn+\VEC{i,j,k}) h(\vecf_z - k)
200            }\right)
201          }\right) h(\vecf_y - j)
202        }\right) h(\vecf_x - i)
203    \end{displaymath}%
204    Thus, we see that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D).
205    
206  \begin{figure}[t]  \begin{figure}[t]
207  \begin{quote}  \begin{quote}
# Line 185  Line 209 
209  \begin{lstlisting}  \begin{lstlisting}
210  typedef double vec3[3];  typedef double vec3[3];
211    
212  typedef struct {  double V[][][];         // image data
213      int         degree;  double h[4][4];         // kernel
214      double      coeff[];  const int s = 2;                // kernel support
 } polynomial;  
   
 typedef struct {  
     int         support;  
     polynomial  *segments[];  
 } kernel;  
215    
216  double probe (vec3 ***img, kernel *h, vec3 pos)  double probe (vec3 pos)
217  {  {
218      double nx, ny, nz, fx, fy, fz;
219    
220      // compute kernel values for each axis
221      double hx[4], hy[4], hz[4];
222      for (int k = 1-s;  j < s;  k++) {
223        double t = fx - k;
224        hx[k+s-1] = h[s-k][0] + t*(h[s-k][1] + t*(h[s-k][2] + t*h[s-k][3]));
225        double t = fy - k;
226        hy[k+s-1] = h[s-k][0] + t*(h[s-k][1] + t*(h[s-k][2] + t*h[s-k][3]));
227        double t = fz - k;
228        hz[k+s-1] = h[s-k][0] + t*(h[s-k][1] + t*(h[s-k][2] + t*h[s-k][3]));
229      }
230    
231      double vx = 0.0
232      for (int i = 1-s;  i < s;  i++) {
233        double vy = 0.0
234        for (int j = 1-s;  j < s;  j++) {
235          double vz = 0.0;
236          for (int k = 1-s;  k < s;  k++) {
237            vz += V[nx+i][ny+j][nz+k] * hz[k+s-1];
238          }
239          vy += vz * hy[j+s-1];
240        }
241        vx += vx * hy[i+s-1];
242      }
243    
244      return vx;
245  }  }
246  \end{lstlisting}%  \end{lstlisting}%
247  \end{quote}%  \end{quote}%

Legend:
Removed from v.269  
changed lines
  Added in v.270

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0