4 |
\usepackage{graphicx} |
\usepackage{graphicx} |
5 |
\usepackage{listings} |
\usepackage{listings} |
6 |
\lstset{ |
\lstset{ |
7 |
basicstyle=\ttfamily, |
basicstyle=\ttfamily\small, |
8 |
keywordstyle=\bfseries, |
keywordstyle=\bfseries, |
9 |
showstringspaces=false, |
showstringspaces=false, |
10 |
} |
} |
11 |
|
\lstdefinelanguage{OpenCL}[]{C}{% |
12 |
|
morekeywords={ |
13 |
|
char2,char4,char8,char16, |
14 |
|
uchar2,uchar4,uchar8,uchar16, |
15 |
|
short2,short4,short8,short16, |
16 |
|
ushort2,ushort4,ushort8,ushort16, |
17 |
|
int2,int4,int8,int16, |
18 |
|
uint2,uint4,uint8,uint16, |
19 |
|
long2,long4,long8,long16, |
20 |
|
ulong2,ulong4,ulong8,ulong16, |
21 |
|
float2,float4,float8,float16, |
22 |
|
ufloat2,ufloat4,ufloat8,ufloat16, |
23 |
|
double2,double4,double8,double16, |
24 |
|
udouble2,udouble4,udouble8,udouble16, |
25 |
|
constant,__constant,kernel,__kernel,private,__private}, |
26 |
|
moredirectives={version}, |
27 |
|
deletekeywords={} |
28 |
|
} |
29 |
|
|
30 |
\lstset{ |
\lstset{ |
31 |
language=C, |
language=C, |
70 |
\section{Probing a 1D scalar field} |
\section{Probing a 1D scalar field} |
71 |
The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support |
The simplest case is probing a 1D scalar field $F = V\circledast{}h$, where $s$ is the support |
72 |
of $h$. |
of $h$. |
73 |
The probe $F\mkw{@}x$ is computed as follows: |
The probe $F\mkw{@}p$ is computed as follows: |
74 |
\begin{eqnarray*} |
\begin{eqnarray*} |
75 |
\left[\begin{array}{c} x' \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} x \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\ |
\left[\begin{array}{c} x \\ 1 \end{array}\right] & = & \matM^{-1} \left[\begin{array}{c} p \\ 1 \end{array}\right] \qquad \text{\textit{transform to image space}} \\ |
76 |
n & = & \FLOOR{x'} \qquad \text{\textit{integer part of position}} \\ |
n & = & \FLOOR{x} \qquad \text{\textit{integer part of position}} \\ |
77 |
f & = & x' - f \qquad \text{\textit{fractional part of position}} \\ |
f & = & x - f \qquad \text{\textit{fractional part of position}} \\ |
78 |
F\mkw{@}x & = & \sum_{i=1-s}^s {V(n+i) h(f - i)} |
F\mkw{@}p & = & \sum_{i=1-s}^s {V(n+i) h(f - i)} |
79 |
\end{eqnarray*}% |
\end{eqnarray*}% |
80 |
The convolution $h$ is represented as a symmetric piecewise polynomial formed |
The convolution $h$ is represented as a symmetric piecewise polynomial formed |
81 |
from $s$ polynomials $h_1,\,\ldots,\,h_s$. |
from $s$ polynomials $h_1,\,\ldots,\,h_s$. |
104 |
|
|
105 |
\begin{figure}[t] |
\begin{figure}[t] |
106 |
\begin{quote} |
\begin{quote} |
107 |
|
\lstset{language=C} |
108 |
\begin{lstlisting} |
\begin{lstlisting} |
109 |
double img[]; // image data |
double img[]; // image data |
110 |
double h1[4], h2[4]; // kernel |
double h1[4], h2[4]; // kernel |
116 |
|
|
117 |
f = modf (imgPos, &n); |
f = modf (imgPos, &n); |
118 |
|
|
119 |
double value = 0.0, x; |
double value = 0.0, t; |
120 |
|
|
121 |
w = -1.0 - f; |
t = -1.0 - f; |
122 |
value += img[n-1] * (h2[0] + w*(h2[1] + w*(h2[2] + w*h2[3]))); |
value += img[n-1] * (h2[0] + t*(h2[1] + t*(h2[2] + t*h2[3]))); |
123 |
w = -f; |
t = -f; |
124 |
value += img[n] * (h1[0] + w*(h1[1] + w*(h1[2] + w*h1[3]))); |
value += img[n] * (h1[0] + t*(h1[1] + t*(h1[2] + t*h1[3]))); |
125 |
w = f - 1.0; |
t = f - 1.0; |
126 |
value += img[n+1] * (h1[0] + w*(h1[1] + w*(h1[2] + w*h1[3]))); |
value += img[n+1] * (h1[0] + t*(h1[1] + t*(h1[2] + t*h1[3]))); |
127 |
w = f - 2.0; |
t = f - 2.0; |
128 |
value += img[n+2] * (h2[0] + w*(h2[1] + w*(h2[2] + w*h2[3]))); |
value += img[n+2] * (h2[0] + t*(h2[1] + t*(h2[2] + t*h2[3]))); |
129 |
|
|
130 |
return value; |
return value; |
131 |
} |
} |
134 |
\caption{Computing $F\mkw{@}x$ for a 1D scalar field in C} |
\caption{Computing $F\mkw{@}x$ for a 1D scalar field in C} |
135 |
\label{fig:1d-probe-code} |
\label{fig:1d-probe-code} |
136 |
\end{figure} |
\end{figure} |
137 |
|
If we look at the four lines that define \texttt{value}, we see an opportunity for |
138 |
|
using SIMD parallelism to speed the computation. |
139 |
|
\figref{fig:1d-probe-code-opencl} gives the OpenCL for for the probe function, where |
140 |
|
we have lifted the kernel coefficients into the vector constants \texttt{a}, \texttt{b}, |
141 |
|
\texttt{c}, and \texttt{d}. |
142 |
\begin{figure}[t] |
\begin{figure}[t] |
143 |
\begin{quote} |
\begin{quote} |
144 |
|
\lstset{language=OpenCL} |
145 |
\begin{lstlisting} |
\begin{lstlisting} |
146 |
double4 d = (double4)(h2[0], h1[0], h1[0], h2[0]); // x^0 coeffs |
double4 d = (double4)(h2[0], h1[0], h1[0], h2[0]); // x^0 coeffs |
147 |
double4 c = (double4)(h2[1], h1[1], h1[1], h2[1]); // x^1 coeffs |
double4 c = (double4)(h2[1], h1[1], h1[1], h2[1]); // x^1 coeffs |
156 |
f = modf (imgPos, &n); |
f = modf (imgPos, &n); |
157 |
|
|
158 |
double4 v = (double4)(img[n-1], img[n], img[n+1], img[n+2]); |
double4 v = (double4)(img[n-1], img[n], img[n+1], img[n+2]); |
159 |
double4 w = (double4)(-1.0 - f, -f, f - 1.0, f - 2.0); |
double4 t = (double4)(-1.0 - f, -f, f - 1.0, f - 2.0); |
160 |
return dot(v, d + w*(c + w*(b + w*a))); |
return dot(v, d + t*(c + t*(b + t*a))); |
161 |
} |
} |
162 |
\end{lstlisting}% |
\end{lstlisting}% |
163 |
\end{quote}% |
\end{quote}% |
177 |
\vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\ |
\vecf & = & \vecx - \vecn \qquad \text{\textit{fractional part of position}} \\ |
178 |
F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}} |
F\mkw{@}\vecp & = & \sum_{i=1-s}^s {\sum_{j=1-s}^s {\sum_{k=1-s}^s {V(\vecn+\VEC{i,j,k}) h(\vecf_x - i) h(\vecf_y - j) h(\vecf_z - k)}}} |
179 |
\end{eqnarray*}% |
\end{eqnarray*}% |
180 |
|
Note that the kernel $h$ is evaluated $2s$ times per axis (\ie{}, $6s$ times in 3D). |
181 |
|
|
182 |
\begin{figure}[t] |
\begin{figure}[t] |
183 |
\begin{quote} |
\begin{quote} |
184 |
|
\lstset{language=C} |
185 |
\begin{lstlisting} |
\begin{lstlisting} |
186 |
typedef double vec3[3]; |
typedef double vec3[3]; |
187 |
|
|