1 : |
lamonts |
302 |
/* mip_opencl.c
|
2 : |
lamonts |
203 |
*
|
3 : |
lamonts |
302 |
* COPYRIGHT (c) 2010 The Diderot Project (http://diderot.cs.uchicago.edu)
|
4 : |
|
|
* All rights reserved.
|
5 : |
|
|
*
|
6 : |
|
|
* An OpenCL mip implementation.
|
7 : |
|
|
*
|
8 : |
lamonts |
203 |
* To View the Image
|
9 : |
|
|
* =========================
|
10 : |
|
|
* ./unu reshape -i mip.txt -s 200 200 | ./unu quantize -b 8 -o new.png
|
11 : |
|
|
*/
|
12 : |
lamonts |
177 |
#include <OpenCL/OpenCl.h>
|
13 : |
|
|
#include <assert.h>
|
14 : |
|
|
#include <stdio.h>
|
15 : |
|
|
#include <stdlib.h>
|
16 : |
|
|
#include <sys/sysctl.h>
|
17 : |
|
|
#include <sys/stat.h>
|
18 : |
|
|
#include <teem/nrrd.h>
|
19 : |
|
|
|
20 : |
lamonts |
302 |
#define SIZE 200
|
21 : |
lamonts |
177 |
|
22 : |
|
|
//Loads the Kernel from a file
|
23 : |
|
|
char * loadKernel (const char * filename)
|
24 : |
|
|
{
|
25 : |
|
|
struct stat statbuf;
|
26 : |
|
|
FILE *fh;
|
27 : |
|
|
char *source;
|
28 : |
|
|
|
29 : |
|
|
fh = fopen(filename, "r");
|
30 : |
|
|
if (fh == 0)
|
31 : |
|
|
return 0;
|
32 : |
|
|
|
33 : |
|
|
stat(filename, &statbuf);
|
34 : |
|
|
source = (char *) malloc(statbuf.st_size + 1);
|
35 : |
|
|
fread(source, statbuf.st_size, 1, fh);
|
36 : |
|
|
source[statbuf.st_size] = '\0';
|
37 : |
|
|
|
38 : |
|
|
return source;
|
39 : |
|
|
}
|
40 : |
lamonts |
203 |
void saveResults (float * matrix, int size)
|
41 : |
|
|
{
|
42 : |
|
|
int i;
|
43 : |
|
|
float max = -INFINITY;
|
44 : |
|
|
FILE * out_file;
|
45 : |
|
|
out_file = fopen("mip.txt", "w");
|
46 : |
|
|
if (out_file == NULL) {
|
47 : |
|
|
fprintf(stderr,"Can not open output file\n");
|
48 : |
|
|
exit (8);
|
49 : |
|
|
}
|
50 : |
|
|
|
51 : |
|
|
for(i = 0; i < size; i++)
|
52 : |
|
|
{
|
53 : |
|
|
if(matrix[i] == -INFINITY || matrix[i] < 0)
|
54 : |
lamonts |
302 |
fprintf(out_file,"%.4f\n",0.0f);
|
55 : |
lamonts |
203 |
else
|
56 : |
lamonts |
302 |
fprintf(out_file,"%.4f\n",matrix[i]);
|
57 : |
lamonts |
203 |
|
58 : |
|
|
if(matrix[i] > max)
|
59 : |
|
|
max = matrix[i];
|
60 : |
|
|
|
61 : |
|
|
}
|
62 : |
|
|
printf("Max: %f\n",max);
|
63 : |
|
|
fclose(out_file);
|
64 : |
|
|
|
65 : |
|
|
|
66 : |
|
|
}
|
67 : |
lamonts |
177 |
float det3x3(float a, float b, float c, float d, float e, float f, float g, float h, float i)
|
68 : |
|
|
{
|
69 : |
|
|
return ( (a)*(e)*(i)
|
70 : |
|
|
+ (d)*(h)*(c)
|
71 : |
|
|
+ (g)*(b)*(f)
|
72 : |
|
|
- (g)*(e)*(c)
|
73 : |
|
|
- (d)*(b)*(i)
|
74 : |
|
|
- (a)*(h)*(f));
|
75 : |
|
|
}
|
76 : |
|
|
float det4x4(cl_float16 m)
|
77 : |
|
|
{
|
78 : |
|
|
return (m[ 0] * det3x3(m[ 5], m[ 6], m[ 7],
|
79 : |
|
|
m[ 9], m[10], m[11],
|
80 : |
|
|
m[13], m[14], m[15])
|
81 : |
|
|
|
82 : |
|
|
- m[ 1] * det3x3(m[ 4], m[ 6], m[ 7],
|
83 : |
|
|
m[ 8], m[10], m[11],
|
84 : |
|
|
m[12], m[14], m[15])
|
85 : |
|
|
+ m[ 2] * det3x3(m[ 4], m[ 5], m[ 7],
|
86 : |
|
|
m[ 8], m[ 9], m[11],
|
87 : |
|
|
m[12], m[13], m[15])
|
88 : |
|
|
|
89 : |
|
|
- m[ 3] * det3x3(m[ 4], m[ 5], m[ 6],
|
90 : |
|
|
m[ 8], m[ 9], m[10],
|
91 : |
|
|
m[12], m[13], m[14]));
|
92 : |
|
|
|
93 : |
|
|
|
94 : |
|
|
|
95 : |
|
|
}
|
96 : |
|
|
void invMatrix(cl_float16 m, cl_float16 i)
|
97 : |
|
|
{
|
98 : |
|
|
float det = det4x4(m);
|
99 : |
|
|
|
100 : |
|
|
|
101 : |
|
|
i[0] = det3x3(m[5],m[ 6],m[ 7],
|
102 : |
|
|
m[ 9],m[10],m[11],
|
103 : |
|
|
m[13],m[14],m[15])/det;
|
104 : |
|
|
|
105 : |
|
|
i[ 1] = -det3x3(m[ 1],m[ 2],m[ 3],
|
106 : |
|
|
m[ 9],m[10],m[11],
|
107 : |
|
|
m[13],m[14],m[15])/det;
|
108 : |
|
|
|
109 : |
|
|
i[ 2] = det3x3(m[ 1],m[ 2],m[ 3],
|
110 : |
|
|
m[ 5],m[ 6],m[ 7],
|
111 : |
|
|
m[13],m[14],m[15])/det;
|
112 : |
|
|
|
113 : |
|
|
i[ 3] = -det3x3(m[ 1],m[ 2],m[ 3],
|
114 : |
|
|
m[ 5],m[ 6],m[ 7],
|
115 : |
|
|
m[ 9],m[10],m[11])/det;
|
116 : |
|
|
|
117 : |
|
|
i[ 4] = -det3x3(m[ 4],m[ 6],m[ 7],
|
118 : |
|
|
m[ 8],m[10],m[11],
|
119 : |
|
|
m[12],m[14],m[15])/det;
|
120 : |
|
|
|
121 : |
|
|
i[ 5] = det3x3(m[ 0],m[ 2],m[ 3],
|
122 : |
|
|
m[ 8],m[10],m[11],
|
123 : |
|
|
m[12],m[14],m[15])/det;
|
124 : |
|
|
|
125 : |
|
|
i[ 6] = -det3x3(m[ 0],m[ 2],m[ 3],
|
126 : |
|
|
m[ 4],m[ 6],m[ 7],
|
127 : |
|
|
m[12],m[14],m[15])/det;
|
128 : |
|
|
|
129 : |
|
|
i[ 7] = det3x3(m[ 0],m[ 2],m[ 3],
|
130 : |
|
|
m[ 4],m[ 6],m[ 7],
|
131 : |
|
|
m[ 8],m[10],m[11])/det;
|
132 : |
|
|
|
133 : |
|
|
i[ 8] = det3x3(m[ 4],m[ 5],m[ 7],
|
134 : |
|
|
m[ 8],m[ 9],m[11],
|
135 : |
|
|
m[12],m[13],m[15])/det;
|
136 : |
|
|
|
137 : |
|
|
i[ 9] = -det3x3(m[ 0],m[ 1],m[ 3],
|
138 : |
|
|
m[ 8],m[ 9],m[11],
|
139 : |
|
|
m[12],m[13],m[15])/det;
|
140 : |
|
|
|
141 : |
|
|
i[10] = det3x3(m[ 0],m[ 1],m[ 3],
|
142 : |
|
|
m[ 4],m[ 5],m[ 7],
|
143 : |
|
|
m[12],m[13],m[15])/det;
|
144 : |
|
|
|
145 : |
|
|
i[11] = -det3x3(m[ 0],m[ 1],m[ 3],
|
146 : |
|
|
m[ 4],m[ 5],m[ 7],
|
147 : |
|
|
m[ 8],m[ 9],m[11])/det;
|
148 : |
|
|
|
149 : |
|
|
i[12] = -det3x3(m[ 4],m[ 5],m[ 6],
|
150 : |
|
|
m[ 8],m[ 9],m[10],
|
151 : |
|
|
m[12],m[13],m[14])/det;
|
152 : |
|
|
|
153 : |
|
|
i[13] = det3x3(m[ 0],m[ 1],m[ 2],
|
154 : |
|
|
m[ 8],m[ 9],m[10],
|
155 : |
|
|
m[12],m[13],m[14])/det;
|
156 : |
|
|
|
157 : |
|
|
i[14] = -det3x3(m[ 0],m[ 1],m[ 2],
|
158 : |
|
|
m[ 4],m[ 5],m[ 6],
|
159 : |
|
|
m[12],m[13],m[14])/det;
|
160 : |
|
|
|
161 : |
|
|
i[15] = det3x3(m[ 0],m[ 1],m[ 2],
|
162 : |
|
|
m[ 4],m[ 5],m[ 6],
|
163 : |
|
|
m[ 8],m[ 9],m[10])/det;
|
164 : |
|
|
}
|
165 : |
|
|
void printMatrix(float * matrix, int rowSize)
|
166 : |
|
|
{
|
167 : |
|
|
int index = 0, end = 1, arraySize = rowSize * rowSize;
|
168 : |
|
|
|
169 : |
jhr |
218 |
for(index = 0; index < arraySize; index++)
|
170 : |
lamonts |
177 |
{
|
171 : |
jhr |
218 |
if(end == rowSize)
|
172 : |
lamonts |
177 |
{
|
173 : |
|
|
printf(" %.2f\n",matrix[index]);
|
174 : |
|
|
end = 1;
|
175 : |
|
|
}
|
176 : |
|
|
else
|
177 : |
|
|
{
|
178 : |
|
|
printf(" %.2f ",matrix[index]);
|
179 : |
|
|
end++;
|
180 : |
|
|
}
|
181 : |
|
|
}
|
182 : |
|
|
printf("\n");
|
183 : |
|
|
}
|
184 : |
|
|
void loadTransformMatrix(Nrrd * nin, cl_float16 transformMatrix)
|
185 : |
|
|
{
|
186 : |
|
|
int i,j, size = nin->spaceDim;
|
187 : |
|
|
NrrdAxisInfo axisInfo;
|
188 : |
|
|
|
189 : |
|
|
//Image axis Scaling and Rotation
|
190 : |
|
|
for(i = 0; i < size; i++)
|
191 : |
|
|
{
|
192 : |
|
|
axisInfo = nin->axis[i];
|
193 : |
|
|
for(j = 0; j < size; j++)
|
194 : |
|
|
{
|
195 : |
|
|
transformMatrix[ (size+ 1) * j + i] = axisInfo.spaceDirection[j];
|
196 : |
|
|
}
|
197 : |
|
|
|
198 : |
|
|
//Image Location
|
199 : |
|
|
transformMatrix[ (i * (size + 1)) + size] = nin->spaceOrigin[i];
|
200 : |
|
|
|
201 : |
|
|
//Bottom row of the Transform Matrix
|
202 : |
|
|
transformMatrix[((size + 1) * (size)) + i ] = 0;
|
203 : |
|
|
}
|
204 : |
jhr |
218 |
transformMatrix[((size + 1) * (size)) + size ] = 1;
|
205 : |
|
|
|
206 : |
lamonts |
177 |
}
|
207 : |
|
|
Nrrd * loadNrrdFile(char * filename)
|
208 : |
|
|
{
|
209 : |
|
|
/* create a nrrd; at this point this is just an empty container */
|
210 : |
|
|
Nrrd * nin;
|
211 : |
|
|
|
212 : |
|
|
nin = nrrdNew();
|
213 : |
|
|
char *err;
|
214 : |
|
|
|
215 : |
|
|
/* read in the nrrd from file */
|
216 : |
|
|
if (nrrdLoad(nin, filename, NULL)) {
|
217 : |
|
|
err = biffGetDone(NRRD);
|
218 : |
|
|
fprintf(stderr, "Mip: trouble reading \"%s\":\n%s", filename, err);
|
219 : |
|
|
free(err);
|
220 : |
|
|
return NULL;
|
221 : |
|
|
}
|
222 : |
|
|
|
223 : |
|
|
/* say something about the array
|
224 : |
|
|
printf("Mip: \"%s\" is a %d-dimensional nrrd of type %d (%s)\n",
|
225 : |
|
|
filename, nin->dim, nin->type,
|
226 : |
|
|
airEnumStr(nrrdType, nin->type));
|
227 : |
|
|
printf("Mip: the array contains %d elements, each %d bytes in size\n",
|
228 : |
|
|
(int)nrrdElementNumber(nin), (int)nrrdElementSize(nin));*/
|
229 : |
|
|
|
230 : |
|
|
return nin;
|
231 : |
|
|
|
232 : |
|
|
}
|
233 : |
lamonts |
302 |
int exe_MIP_Kernel(float * img, int imageDataSize, float inverseMatrix[16], int sAxis[3], float * out)
|
234 : |
lamonts |
177 |
{
|
235 : |
|
|
|
236 : |
|
|
cl_program program;
|
237 : |
|
|
cl_kernel kernel;
|
238 : |
|
|
|
239 : |
|
|
cl_command_queue queue;
|
240 : |
|
|
cl_context context;
|
241 : |
|
|
|
242 : |
|
|
cl_device_id cpu = NULL, device = NULL;
|
243 : |
|
|
|
244 : |
|
|
cl_int err = 0;
|
245 : |
lamonts |
302 |
|
246 : |
lamonts |
177 |
|
247 : |
lamonts |
302 |
cl_mem imageData_mem, out_mem,sAxis_mem;
|
248 : |
lamonts |
177 |
|
249 : |
|
|
/** Setup Device **/
|
250 : |
|
|
err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL);
|
251 : |
|
|
assert(err==CL_SUCCESS);
|
252 : |
|
|
|
253 : |
|
|
err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_GPU,1,&device,NULL);
|
254 : |
|
|
//if(err != CL_SUCCESS)
|
255 : |
|
|
device = cpu;
|
256 : |
|
|
|
257 : |
|
|
assert(device);
|
258 : |
|
|
|
259 : |
|
|
/* Setup Context and Command Queue */
|
260 : |
|
|
context = clCreateContext(0,1,&device,NULL,NULL,&err);
|
261 : |
|
|
assert(err == CL_SUCCESS);
|
262 : |
|
|
|
263 : |
|
|
queue = clCreateCommandQueue(context,device,0,NULL);
|
264 : |
|
|
|
265 : |
|
|
/** Load the Kernel and Program **/
|
266 : |
|
|
const char * filename = "mip.cl";
|
267 : |
|
|
char * kernel_source = loadKernel(filename);
|
268 : |
|
|
|
269 : |
|
|
assert(kernel_source != 0);
|
270 : |
|
|
|
271 : |
|
|
program = clCreateProgramWithSource(context,1,(const char **)&kernel_source,NULL,&err);
|
272 : |
|
|
|
273 : |
|
|
assert(err == CL_SUCCESS);
|
274 : |
|
|
|
275 : |
|
|
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
276 : |
|
|
|
277 : |
|
|
|
278 : |
|
|
/** Retrieve information about the program build to check for any possible errors **/
|
279 : |
|
|
char * build_log;
|
280 : |
|
|
size_t log_size;
|
281 : |
|
|
|
282 : |
|
|
// First call to know the proper size
|
283 : |
|
|
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
284 : |
|
|
build_log = (char *) malloc(log_size+1);
|
285 : |
|
|
// Second call to get the log
|
286 : |
|
|
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
|
287 : |
|
|
build_log[log_size] = '\0';
|
288 : |
|
|
printf("\nBuild Log:\n%s\n",build_log);
|
289 : |
|
|
free(build_log);
|
290 : |
|
|
|
291 : |
|
|
assert(err == CL_SUCCESS);
|
292 : |
|
|
|
293 : |
lamonts |
302 |
kernel = clCreateKernel(program,"mip",&err);
|
294 : |
lamonts |
177 |
|
295 : |
|
|
assert(err == CL_SUCCESS);
|
296 : |
|
|
|
297 : |
|
|
/** Memory Allocation for the Matrices **/
|
298 : |
|
|
|
299 : |
lamonts |
191 |
imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL);
|
300 : |
|
|
err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize,
|
301 : |
lamonts |
302 |
(void *)img ,0,NULL,NULL);
|
302 : |
lamonts |
177 |
|
303 : |
lamonts |
302 |
sAxis_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(int) * 3,NULL,NULL);
|
304 : |
|
|
err |= clEnqueueWriteBuffer(queue,sAxis_mem,CL_TRUE,0,sizeof(int) * 3,
|
305 : |
|
|
(void *)sAxis ,0,NULL,NULL);
|
306 : |
lamonts |
177 |
|
307 : |
|
|
assert(err == CL_SUCCESS);
|
308 : |
|
|
|
309 : |
|
|
out_mem = clCreateBuffer(context,CL_MEM_READ_WRITE, sizeof(float) * (SIZE *SIZE),NULL,NULL);
|
310 : |
|
|
|
311 : |
|
|
clFinish(queue);
|
312 : |
|
|
|
313 : |
|
|
size_t global_work_size[2], local_work_size[2];
|
314 : |
|
|
|
315 : |
|
|
global_work_size[0] = 256;
|
316 : |
|
|
global_work_size[1] = 256;
|
317 : |
|
|
|
318 : |
|
|
local_work_size[0] = 1;
|
319 : |
|
|
local_work_size[1] = 1;
|
320 : |
|
|
|
321 : |
lamonts |
302 |
cl_int2 workDim = {SIZE,SIZE};
|
322 : |
lamonts |
177 |
|
323 : |
lamonts |
302 |
int index = 0;
|
324 : |
|
|
|
325 : |
|
|
err =clSetKernelArg(kernel,index++,sizeof(cl_mem), &imageData_mem);
|
326 : |
|
|
err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &out_mem);
|
327 : |
|
|
err |=clSetKernelArg(kernel,index++,sizeof(cl_float16), inverseMatrix);
|
328 : |
|
|
err |=clSetKernelArg(kernel,index++,sizeof(cl_int2), &workDim);
|
329 : |
|
|
err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &sAxis_mem);
|
330 : |
|
|
|
331 : |
|
|
printf("error:%d\n",err);
|
332 : |
lamonts |
177 |
assert(err == CL_SUCCESS);
|
333 : |
lamonts |
302 |
|
334 : |
lamonts |
177 |
|
335 : |
|
|
err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size,
|
336 : |
|
|
local_work_size,0,NULL,NULL);
|
337 : |
|
|
|
338 : |
|
|
assert(err == CL_SUCCESS);
|
339 : |
|
|
|
340 : |
|
|
clFinish(queue);
|
341 : |
|
|
|
342 : |
|
|
err = clEnqueueReadBuffer(queue,out_mem,CL_TRUE,0, sizeof(float) * (SIZE *SIZE),out,0,NULL,NULL);
|
343 : |
lamonts |
191 |
|
344 : |
lamonts |
203 |
saveResults(out,SIZE * SIZE);
|
345 : |
lamonts |
177 |
|
346 : |
|
|
clReleaseKernel(kernel);
|
347 : |
|
|
clReleaseProgram(program);
|
348 : |
|
|
clReleaseCommandQueue(queue);
|
349 : |
|
|
clReleaseContext(context);
|
350 : |
|
|
clReleaseMemObject(imageData_mem);
|
351 : |
|
|
clReleaseMemObject(out_mem);
|
352 : |
|
|
|
353 : |
|
|
return CL_SUCCESS;
|
354 : |
jhr |
218 |
}
|
355 : |
lamonts |
177 |
int main (int argc, char ** argv)
|
356 : |
|
|
{
|
357 : |
lamonts |
302 |
char * dataFile = "txs.nrrd";
|
358 : |
|
|
float transformMatrix[16];
|
359 : |
|
|
float inverseMatrix[16];
|
360 : |
|
|
|
361 : |
lamonts |
177 |
float * out;
|
362 : |
|
|
out = (float *) malloc(sizeof(float) * (SIZE * SIZE));
|
363 : |
lamonts |
302 |
Nrrd * nin = loadNrrdFile(dataFile);
|
364 : |
lamonts |
177 |
|
365 : |
lamonts |
302 |
int sAxis[] = {nin->axis[0].size, nin->axis[1].size,nin->axis[2].size};
|
366 : |
|
|
|
367 : |
|
|
loadTransformMatrix(nin,transformMatrix);
|
368 : |
|
|
invMatrix(transformMatrix,inverseMatrix);
|
369 : |
|
|
|
370 : |
lamonts |
177 |
|
371 : |
lamonts |
302 |
exe_MIP_Kernel((float *)nin->data, (int)nrrdElementNumber(nin),inverseMatrix, sAxis, out);
|
372 : |
lamonts |
177 |
|
373 : |
|
|
return 0;
|
374 : |
|
|
}
|