Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Diff of /trunk/doc/probe/paper.tex
ViewVC logotype

Diff of /trunk/doc/probe/paper.tex

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 159, Tue Jul 13 02:46:31 2010 UTC revision 160, Tue Jul 13 21:25:19 2010 UTC
# Line 4  Line 4 
4  \usepackage{graphicx}  \usepackage{graphicx}
5  \usepackage{listings}  \usepackage{listings}
6  \lstset{  \lstset{
7    basicstyle=\ttfamily,    basicstyle=\ttfamily\small,
8    keywordstyle=\bfseries,    keywordstyle=\bfseries,
9    showstringspaces=false,    showstringspaces=false,
10  }  }
11    \lstdefinelanguage{OpenCL}[]{C}{%
12      morekeywords={
13        char2,char4,char8,char16,
14        uchar2,uchar4,uchar8,uchar16,
15        short2,short4,short8,short16,
16        ushort2,ushort4,ushort8,ushort16,
17        int2,int4,int8,int16,
18        uint2,uint4,uint8,uint16,
19        long2,long4,long8,long16,
20        ulong2,ulong4,ulong8,ulong16,
21        float2,float4,float8,float16,
22        ufloat2,ufloat4,ufloat8,ufloat16,
23        double2,double4,double8,double16,
24        udouble2,udouble4,udouble8,udouble16,
25        constant,__constant,kernel,__kernel,private,__private},
26      moredirectives={version},
27      deletekeywords={}
28    }
29    
30  \lstset{  \lstset{
31    language=C,    language=C,
# Line 52  Line 70 
70  \section{Probing a 1D scalar field}  \section{Probing a 1D scalar field}
71  The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support  The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support
72  of $h$.  of $h$.
73  The probe $F\mkw{@}x$ is computed as follows:  The probe $F\mkw{@}p$ is computed as follows:
74  \begin{eqnarray*}  \begin{eqnarray*}
75    \left[\begin{array}{c} x' \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} x \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\    \left[\begin{array}{c} x \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} p \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\
76    n  & = & \FLOOR{x'} \qquad \text{\textit{integer part of position}} \\    n  & = & \FLOOR{x} \qquad \text{\textit{integer part of position}} \\
77    f  & = & x' - f \qquad \text{\textit{fractional part of position}} \\    f  & = & x - f \qquad \text{\textit{fractional part of position}} \\
78    F\mkw{@}x & = & \sum_{i=1-s}^s {V(n+i) h(f - i)}    F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)}
79  \end{eqnarray*}%  \end{eqnarray*}%
80  The convolution $h$ is represented as a symmetric piecewise polynomial formed  The convolution $h$ is represented as a symmetric piecewise polynomial formed
81  from $s$ polynomials $h_1,\,\ldots,\,h_s$.  from $s$ polynomials $h_1,\,\ldots,\,h_s$.
# Line 86  Line 104 
104    
105  \begin{figure}[t]  \begin{figure}[t]
106  \begin{quote}  \begin{quote}
107    \lstset{language=C}
108  \begin{lstlisting}  \begin{lstlisting}
109  double img[];                   // image data  double img[];                   // image data
110  double h1[4], h2[4];            // kernel  double h1[4], h2[4];            // kernel
# Line 97  Line 116 
116    
117    f = modf (imgPos, &n);    f = modf (imgPos, &n);
118    
119    double value = 0.0, x;    double value = 0.0, t;
120    
121    w = -1.0 - f;    t = -1.0 - f;
122    value += img[n-1] * (h2[0] + w*(h2[1] + w*(h2[2] + w*h2[3])));    value += img[n-1] * (h2[0] + t*(h2[1] + t*(h2[2] + t*h2[3])));
123    w = -f;    t = -f;
124    value += img[n] * (h1[0] + w*(h1[1] + w*(h1[2] + w*h1[3])));    value += img[n]   * (h1[0] + t*(h1[1] + t*(h1[2] + t*h1[3])));
125    w = f - 1.0;    t = f - 1.0;
126    value += img[n+1] * (h1[0] + w*(h1[1] + w*(h1[2] + w*h1[3])));    value += img[n+1] * (h1[0] + t*(h1[1] + t*(h1[2] + t*h1[3])));
127    w = f - 2.0;    t = f - 2.0;
128    value += img[n+2] * (h2[0] + w*(h2[1] + w*(h2[2] + w*h2[3])));    value += img[n+2] * (h2[0] + t*(h2[1] + t*(h2[2] + t*h2[3])));
129    
130    return value;    return value;
131  }  }
# Line 115  Line 134 
134  \caption{Computing $F\mkw{@}x$ for a 1D scalar field in C}  \caption{Computing $F\mkw{@}x$ for a 1D scalar field in C}
135  \label{fig:1d-probe-code}  \label{fig:1d-probe-code}
136  \end{figure}  \end{figure}
137    If we look at the four lines that define \texttt{value}, we see an opportunity for
138    using SIMD parallelism to speed the computation.
139    \figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where
140    we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b},
141    \texttt{c}, and \texttt{d}.
142  \begin{figure}[t]  \begin{figure}[t]
143  \begin{quote}  \begin{quote}
144    \lstset{language=OpenCL}
145  \begin{lstlisting}  \begin{lstlisting}
146  double4 d = (double4)(h2[0], h1[0], h1[0], h2[0]); // x^0 coeffs  double4 d = (double4)(h2[0], h1[0], h1[0], h2[0]); // x^0 coeffs
147  double4 c = (double4)(h2[1], h1[1], h1[1], h2[1]); // x^1 coeffs  double4 c = (double4)(h2[1], h1[1], h1[1], h2[1]); // x^1 coeffs
# Line 132  Line 156 
156    f = modf (imgPos, &n);    f = modf (imgPos, &n);
157    
158    double4 v = (double4)(img[n-1], img[n], img[n+1], img[n+2]);    double4 v = (double4)(img[n-1], img[n], img[n+1], img[n+2]);
159    double4 w = (double4)(-1.0 - f, -f, f - 1.0, f - 2.0);    double4 t = (double4)(-1.0 - f, -f, f - 1.0, f - 2.0);
160    return dot(v, d + w*(c + w*(b + w*a)));    return dot(v, d + t*(c + t*(b + t*a)));
161  }  }
162  \end{lstlisting}%  \end{lstlisting}%
163  \end{quote}%  \end{quote}%
# Line 153  Line 177 
177    \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\    \vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\
178    F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}}    F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}}
179  \end{eqnarray*}%  \end{eqnarray*}%
180    Note that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D).
181    
182  \begin{figure}[t]  \begin{figure}[t]
183  \begin{quote}  \begin{quote}
184    \lstset{language=C}
185  \begin{lstlisting}  \begin{lstlisting}
186  typedef double vec3[3];  typedef double vec3[3];
187    

Legend:
Removed from v.159  
changed lines
  Added in v.160

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0