1 : |
lamonts |
203 |
/**
|
2 : |
|
|
*
|
3 : |
|
|
* To View the Image
|
4 : |
|
|
* =========================
|
5 : |
|
|
* ./unu reshape -i mip.txt -s 200 200 | ./unu quantize -b 8 -o new.png
|
6 : |
|
|
*/
|
7 : |
lamonts |
177 |
#include <OpenCL/OpenCl.h>
|
8 : |
|
|
#include <assert.h>
|
9 : |
|
|
#include <stdio.h>
|
10 : |
|
|
#include <stdlib.h>
|
11 : |
|
|
#include <sys/sysctl.h>
|
12 : |
|
|
#include <sys/stat.h>
|
13 : |
|
|
|
14 : |
|
|
#include <teem/nrrd.h>
|
15 : |
|
|
|
16 : |
|
|
#define SIZE 200
|
17 : |
|
|
|
18 : |
|
|
/*typedef float vec3[3];
|
19 : |
|
|
|
20 : |
|
|
typedef struct {
|
21 : |
|
|
int degree;
|
22 : |
|
|
float coeff[];
|
23 : |
|
|
} polynomial;
|
24 : |
|
|
|
25 : |
|
|
typedef struct {
|
26 : |
|
|
int support;
|
27 : |
|
|
polynomial *segments[];
|
28 : |
|
|
} kernel; */
|
29 : |
|
|
|
30 : |
|
|
|
31 : |
|
|
int device_stats(cl_device_id device_id){
|
32 : |
|
|
|
33 : |
|
|
int err,i;
|
34 : |
|
|
size_t returned_size;
|
35 : |
|
|
|
36 : |
|
|
// Report the device vendor and device name
|
37 : |
|
|
//
|
38 : |
|
|
cl_char vendor_name[1024] = {0};
|
39 : |
|
|
cl_char device_name[1024] = {0};
|
40 : |
|
|
cl_char device_profile[1024] = {0};
|
41 : |
|
|
cl_char device_extensions[1024] = {0};
|
42 : |
|
|
cl_device_local_mem_type local_mem_type;
|
43 : |
|
|
|
44 : |
|
|
cl_ulong global_mem_size, global_mem_cache_size;
|
45 : |
|
|
cl_ulong max_mem_alloc_size;
|
46 : |
|
|
|
47 : |
|
|
cl_uint clock_frequency, vector_width, max_compute_units;
|
48 : |
|
|
|
49 : |
|
|
size_t max_work_item_dims,max_work_group_size, max_work_item_sizes[3];
|
50 : |
|
|
|
51 : |
|
|
cl_uint vector_types[] = {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE};
|
52 : |
|
|
char *vector_type_names[] = {"char","short","int","long","float","double"};
|
53 : |
|
|
|
54 : |
|
|
err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);
|
55 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);
|
56 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(device_profile), device_profile, &returned_size);
|
57 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(device_extensions), device_extensions, &returned_size);
|
58 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, &returned_size);
|
59 : |
|
|
|
60 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, &returned_size);
|
61 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(global_mem_cache_size), &global_mem_cache_size, &returned_size);
|
62 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, &returned_size);
|
63 : |
|
|
|
64 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, &returned_size);
|
65 : |
|
|
|
66 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, &returned_size);
|
67 : |
|
|
|
68 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_work_item_dims), &max_work_item_dims, &returned_size);
|
69 : |
|
|
|
70 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), max_work_item_sizes, &returned_size);
|
71 : |
|
|
|
72 : |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, &returned_size);
|
73 : |
|
|
|
74 : |
|
|
printf("Vendor: %s\n", vendor_name);
|
75 : |
|
|
printf("Device Name: %s\n", device_name);
|
76 : |
|
|
printf("Profile: %s\n", device_profile);
|
77 : |
|
|
printf("Supported Extensions: %s\n\n", device_extensions);
|
78 : |
|
|
|
79 : |
|
|
printf("Local Mem Type (Local=1, Global=2): %i\n",(int)local_mem_type);
|
80 : |
|
|
printf("Global Mem Size (MB): %i\n",(int)global_mem_size/(1024*1024));
|
81 : |
|
|
printf("Global Mem Cache Size (Bytes): %i\n",(int)global_mem_cache_size);
|
82 : |
|
|
printf("Max Mem Alloc Size (MB): %ld\n",(long int)max_mem_alloc_size/(1024*1024));
|
83 : |
|
|
|
84 : |
|
|
printf("Clock Frequency (MHz): %i\n\n",clock_frequency);
|
85 : |
|
|
|
86 : |
|
|
for(i=0;i<6;i++){
|
87 : |
|
|
err|= clGetDeviceInfo(device_id, vector_types[i], sizeof(clock_frequency), &vector_width, &returned_size);
|
88 : |
|
|
printf("Vector type width for: %s = %i\n",vector_type_names[i],vector_width);
|
89 : |
|
|
}
|
90 : |
|
|
|
91 : |
|
|
printf("\nMax Work Group Size: %lu\n",max_work_group_size);
|
92 : |
|
|
//printf("Max Work Item Dims: %lu\n",max_work_item_dims);
|
93 : |
|
|
//for(size_t i=0;i<max_work_item_dims;i++)
|
94 : |
|
|
// printf("Max Work Items in Dim %lu: %lu\n",(long unsigned)(i+1),(long unsigned)max_work_item_sizes[i]);
|
95 : |
|
|
|
96 : |
|
|
printf("Max Compute Units: %i\n",max_compute_units);
|
97 : |
|
|
printf("\n");
|
98 : |
|
|
|
99 : |
|
|
return CL_SUCCESS;
|
100 : |
|
|
}
|
101 : |
|
|
//Loads the Kernel from a file
|
102 : |
|
|
char * loadKernel (const char * filename)
|
103 : |
|
|
{
|
104 : |
|
|
struct stat statbuf;
|
105 : |
|
|
FILE *fh;
|
106 : |
|
|
char *source;
|
107 : |
|
|
|
108 : |
|
|
fh = fopen(filename, "r");
|
109 : |
|
|
if (fh == 0)
|
110 : |
|
|
return 0;
|
111 : |
|
|
|
112 : |
|
|
stat(filename, &statbuf);
|
113 : |
|
|
source = (char *) malloc(statbuf.st_size + 1);
|
114 : |
|
|
fread(source, statbuf.st_size, 1, fh);
|
115 : |
|
|
source[statbuf.st_size] = '\0';
|
116 : |
|
|
|
117 : |
|
|
return source;
|
118 : |
|
|
}
|
119 : |
lamonts |
203 |
void saveResults (float * matrix, int size)
|
120 : |
|
|
{
|
121 : |
|
|
int i;
|
122 : |
|
|
float max = -INFINITY;
|
123 : |
|
|
FILE * out_file;
|
124 : |
|
|
out_file = fopen("mip.txt", "w");
|
125 : |
|
|
if (out_file == NULL) {
|
126 : |
|
|
fprintf(stderr,"Can not open output file\n");
|
127 : |
|
|
exit (8);
|
128 : |
|
|
}
|
129 : |
|
|
|
130 : |
|
|
for(i = 0; i < size; i++)
|
131 : |
|
|
{
|
132 : |
|
|
if(matrix[i] == -INFINITY || matrix[i] < 0)
|
133 : |
|
|
fprintf(out_file,"%f\n",0.0f);
|
134 : |
|
|
else
|
135 : |
|
|
fprintf(out_file,"%f\n",matrix[i]);
|
136 : |
|
|
|
137 : |
|
|
if(matrix[i] > max)
|
138 : |
|
|
max = matrix[i];
|
139 : |
|
|
|
140 : |
|
|
}
|
141 : |
|
|
printf("Max: %f\n",max);
|
142 : |
|
|
fclose(out_file);
|
143 : |
|
|
|
144 : |
|
|
|
145 : |
|
|
}
|
146 : |
lamonts |
177 |
float det3x3(float a, float b, float c, float d, float e, float f, float g, float h, float i)
|
147 : |
|
|
{
|
148 : |
|
|
return ( (a)*(e)*(i)
|
149 : |
|
|
+ (d)*(h)*(c)
|
150 : |
|
|
+ (g)*(b)*(f)
|
151 : |
|
|
- (g)*(e)*(c)
|
152 : |
|
|
- (d)*(b)*(i)
|
153 : |
|
|
- (a)*(h)*(f));
|
154 : |
|
|
}
|
155 : |
|
|
float det4x4(cl_float16 m)
|
156 : |
|
|
{
|
157 : |
|
|
return (m[ 0] * det3x3(m[ 5], m[ 6], m[ 7],
|
158 : |
|
|
m[ 9], m[10], m[11],
|
159 : |
|
|
m[13], m[14], m[15])
|
160 : |
|
|
|
161 : |
|
|
- m[ 1] * det3x3(m[ 4], m[ 6], m[ 7],
|
162 : |
|
|
m[ 8], m[10], m[11],
|
163 : |
|
|
m[12], m[14], m[15])
|
164 : |
|
|
+ m[ 2] * det3x3(m[ 4], m[ 5], m[ 7],
|
165 : |
|
|
m[ 8], m[ 9], m[11],
|
166 : |
|
|
m[12], m[13], m[15])
|
167 : |
|
|
|
168 : |
|
|
- m[ 3] * det3x3(m[ 4], m[ 5], m[ 6],
|
169 : |
|
|
m[ 8], m[ 9], m[10],
|
170 : |
|
|
m[12], m[13], m[14]));
|
171 : |
|
|
|
172 : |
|
|
|
173 : |
|
|
|
174 : |
|
|
}
|
175 : |
|
|
void invMatrix(cl_float16 m, cl_float16 i)
|
176 : |
|
|
{
|
177 : |
|
|
float det = det4x4(m);
|
178 : |
|
|
|
179 : |
|
|
|
180 : |
|
|
i[0] = det3x3(m[5],m[ 6],m[ 7],
|
181 : |
|
|
m[ 9],m[10],m[11],
|
182 : |
|
|
m[13],m[14],m[15])/det;
|
183 : |
|
|
|
184 : |
|
|
i[ 1] = -det3x3(m[ 1],m[ 2],m[ 3],
|
185 : |
|
|
m[ 9],m[10],m[11],
|
186 : |
|
|
m[13],m[14],m[15])/det;
|
187 : |
|
|
|
188 : |
|
|
i[ 2] = det3x3(m[ 1],m[ 2],m[ 3],
|
189 : |
|
|
m[ 5],m[ 6],m[ 7],
|
190 : |
|
|
m[13],m[14],m[15])/det;
|
191 : |
|
|
|
192 : |
|
|
i[ 3] = -det3x3(m[ 1],m[ 2],m[ 3],
|
193 : |
|
|
m[ 5],m[ 6],m[ 7],
|
194 : |
|
|
m[ 9],m[10],m[11])/det;
|
195 : |
|
|
|
196 : |
|
|
i[ 4] = -det3x3(m[ 4],m[ 6],m[ 7],
|
197 : |
|
|
m[ 8],m[10],m[11],
|
198 : |
|
|
m[12],m[14],m[15])/det;
|
199 : |
|
|
|
200 : |
|
|
i[ 5] = det3x3(m[ 0],m[ 2],m[ 3],
|
201 : |
|
|
m[ 8],m[10],m[11],
|
202 : |
|
|
m[12],m[14],m[15])/det;
|
203 : |
|
|
|
204 : |
|
|
i[ 6] = -det3x3(m[ 0],m[ 2],m[ 3],
|
205 : |
|
|
m[ 4],m[ 6],m[ 7],
|
206 : |
|
|
m[12],m[14],m[15])/det;
|
207 : |
|
|
|
208 : |
|
|
i[ 7] = det3x3(m[ 0],m[ 2],m[ 3],
|
209 : |
|
|
m[ 4],m[ 6],m[ 7],
|
210 : |
|
|
m[ 8],m[10],m[11])/det;
|
211 : |
|
|
|
212 : |
|
|
i[ 8] = det3x3(m[ 4],m[ 5],m[ 7],
|
213 : |
|
|
m[ 8],m[ 9],m[11],
|
214 : |
|
|
m[12],m[13],m[15])/det;
|
215 : |
|
|
|
216 : |
|
|
i[ 9] = -det3x3(m[ 0],m[ 1],m[ 3],
|
217 : |
|
|
m[ 8],m[ 9],m[11],
|
218 : |
|
|
m[12],m[13],m[15])/det;
|
219 : |
|
|
|
220 : |
|
|
i[10] = det3x3(m[ 0],m[ 1],m[ 3],
|
221 : |
|
|
m[ 4],m[ 5],m[ 7],
|
222 : |
|
|
m[12],m[13],m[15])/det;
|
223 : |
|
|
|
224 : |
|
|
i[11] = -det3x3(m[ 0],m[ 1],m[ 3],
|
225 : |
|
|
m[ 4],m[ 5],m[ 7],
|
226 : |
|
|
m[ 8],m[ 9],m[11])/det;
|
227 : |
|
|
|
228 : |
|
|
i[12] = -det3x3(m[ 4],m[ 5],m[ 6],
|
229 : |
|
|
m[ 8],m[ 9],m[10],
|
230 : |
|
|
m[12],m[13],m[14])/det;
|
231 : |
|
|
|
232 : |
|
|
i[13] = det3x3(m[ 0],m[ 1],m[ 2],
|
233 : |
|
|
m[ 8],m[ 9],m[10],
|
234 : |
|
|
m[12],m[13],m[14])/det;
|
235 : |
|
|
|
236 : |
|
|
i[14] = -det3x3(m[ 0],m[ 1],m[ 2],
|
237 : |
|
|
m[ 4],m[ 5],m[ 6],
|
238 : |
|
|
m[12],m[13],m[14])/det;
|
239 : |
|
|
|
240 : |
|
|
i[15] = det3x3(m[ 0],m[ 1],m[ 2],
|
241 : |
|
|
m[ 4],m[ 5],m[ 6],
|
242 : |
|
|
m[ 8],m[ 9],m[10])/det;
|
243 : |
|
|
}
|
244 : |
|
|
void printMatrix(float * matrix, int rowSize)
|
245 : |
|
|
{
|
246 : |
|
|
int index = 0, end = 1, arraySize = rowSize * rowSize;
|
247 : |
|
|
|
248 : |
lamonts |
203 |
for(index = 1000; index < 1256; index++)
|
249 : |
lamonts |
177 |
{
|
250 : |
lamonts |
203 |
if(end == 16)
|
251 : |
lamonts |
177 |
{
|
252 : |
|
|
printf(" %.2f\n",matrix[index]);
|
253 : |
|
|
end = 1;
|
254 : |
|
|
}
|
255 : |
|
|
else
|
256 : |
|
|
{
|
257 : |
|
|
printf(" %.2f ",matrix[index]);
|
258 : |
|
|
end++;
|
259 : |
|
|
}
|
260 : |
|
|
}
|
261 : |
|
|
printf("\n");
|
262 : |
|
|
}
|
263 : |
|
|
void loadTransformMatrix(Nrrd * nin, cl_float16 transformMatrix)
|
264 : |
|
|
{
|
265 : |
|
|
int i,j, size = nin->spaceDim;
|
266 : |
|
|
NrrdAxisInfo axisInfo;
|
267 : |
|
|
|
268 : |
|
|
//Image axis Scaling and Rotation
|
269 : |
|
|
for(i = 0; i < size; i++)
|
270 : |
|
|
{
|
271 : |
|
|
axisInfo = nin->axis[i];
|
272 : |
|
|
for(j = 0; j < size; j++)
|
273 : |
|
|
{
|
274 : |
|
|
transformMatrix[ (size+ 1) * j + i] = axisInfo.spaceDirection[j];
|
275 : |
|
|
}
|
276 : |
|
|
|
277 : |
|
|
//Image Location
|
278 : |
|
|
transformMatrix[ (i * (size + 1)) + size] = nin->spaceOrigin[i];
|
279 : |
|
|
|
280 : |
|
|
//Bottom row of the Transform Matrix
|
281 : |
|
|
transformMatrix[((size + 1) * (size)) + i ] = 0;
|
282 : |
|
|
}
|
283 : |
|
|
transformMatrix[((size + 1) * (size)) + size ] = 1;
|
284 : |
|
|
}
|
285 : |
|
|
Nrrd * loadNrrdFile(char * filename)
|
286 : |
|
|
{
|
287 : |
|
|
/* create a nrrd; at this point this is just an empty container */
|
288 : |
|
|
Nrrd * nin;
|
289 : |
|
|
|
290 : |
|
|
nin = nrrdNew();
|
291 : |
|
|
char *err;
|
292 : |
|
|
|
293 : |
|
|
/* read in the nrrd from file */
|
294 : |
|
|
if (nrrdLoad(nin, filename, NULL)) {
|
295 : |
|
|
err = biffGetDone(NRRD);
|
296 : |
|
|
fprintf(stderr, "Mip: trouble reading \"%s\":\n%s", filename, err);
|
297 : |
|
|
free(err);
|
298 : |
|
|
return NULL;
|
299 : |
|
|
}
|
300 : |
|
|
|
301 : |
|
|
/* say something about the array
|
302 : |
|
|
printf("Mip: \"%s\" is a %d-dimensional nrrd of type %d (%s)\n",
|
303 : |
|
|
filename, nin->dim, nin->type,
|
304 : |
|
|
airEnumStr(nrrdType, nin->type));
|
305 : |
|
|
printf("Mip: the array contains %d elements, each %d bytes in size\n",
|
306 : |
|
|
(int)nrrdElementNumber(nin), (int)nrrdElementSize(nin));*/
|
307 : |
|
|
|
308 : |
|
|
return nin;
|
309 : |
|
|
|
310 : |
|
|
}
|
311 : |
|
|
int exe_MIP_Kernel(Nrrd * nin, float stepSize, cl_float4 eyeVec, cl_float4 origVec,
|
312 : |
|
|
cl_float4 cVec, cl_float4 rVec, float * h1, float * h2, float * out)
|
313 : |
|
|
{
|
314 : |
|
|
|
315 : |
|
|
cl_program program;
|
316 : |
|
|
cl_kernel kernel;
|
317 : |
|
|
|
318 : |
|
|
cl_command_queue queue;
|
319 : |
|
|
cl_context context;
|
320 : |
|
|
|
321 : |
|
|
cl_device_id cpu = NULL, device = NULL;
|
322 : |
|
|
|
323 : |
|
|
cl_int err = 0;
|
324 : |
|
|
|
325 : |
|
|
cl_float16 transformMatrix;
|
326 : |
|
|
cl_float16 inverseMatrix;
|
327 : |
|
|
|
328 : |
|
|
int imageDataSize = (int)nrrdElementNumber(nin);
|
329 : |
|
|
|
330 : |
lamonts |
191 |
float * data = (float *)nin->data;
|
331 : |
|
|
printf("Data Image: %f\n", (float)data[4* nin->axis[1].size * nin->axis[2].size + 5 * nin->axis[2].size + 2]);
|
332 : |
|
|
|
333 : |
lamonts |
177 |
cl_mem imageData_mem, out_mem, h1_mem, h2_mem;
|
334 : |
|
|
|
335 : |
|
|
|
336 : |
|
|
/** Setup Device **/
|
337 : |
|
|
err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL);
|
338 : |
|
|
assert(err==CL_SUCCESS);
|
339 : |
|
|
|
340 : |
|
|
err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_GPU,1,&device,NULL);
|
341 : |
|
|
//if(err != CL_SUCCESS)
|
342 : |
|
|
device = cpu;
|
343 : |
|
|
|
344 : |
|
|
assert(device);
|
345 : |
|
|
|
346 : |
|
|
|
347 : |
|
|
/** Retrieve Information about the device
|
348 : |
|
|
cl_char vendor_name[1024] = {0};
|
349 : |
|
|
cl_char device_name[1024] = {0};
|
350 : |
|
|
err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);
|
351 : |
|
|
err|= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);
|
352 : |
|
|
printf("Connecting to %s %s...\n", vendor_name, device_name);
|
353 : |
|
|
device_stats(device); */
|
354 : |
|
|
|
355 : |
|
|
|
356 : |
|
|
/* Setup Context and Command Queue */
|
357 : |
|
|
context = clCreateContext(0,1,&device,NULL,NULL,&err);
|
358 : |
|
|
assert(err == CL_SUCCESS);
|
359 : |
|
|
|
360 : |
|
|
queue = clCreateCommandQueue(context,device,0,NULL);
|
361 : |
|
|
|
362 : |
|
|
/** Load the Kernel and Program **/
|
363 : |
|
|
const char * filename = "mip.cl";
|
364 : |
|
|
char * kernel_source = loadKernel(filename);
|
365 : |
|
|
|
366 : |
|
|
assert(kernel_source != 0);
|
367 : |
|
|
|
368 : |
|
|
program = clCreateProgramWithSource(context,1,(const char **)&kernel_source,NULL,&err);
|
369 : |
|
|
|
370 : |
|
|
assert(err == CL_SUCCESS);
|
371 : |
|
|
|
372 : |
|
|
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
373 : |
|
|
|
374 : |
|
|
|
375 : |
|
|
/** Retrieve information about the program build to check for any possible errors **/
|
376 : |
|
|
char * build_log;
|
377 : |
|
|
size_t log_size;
|
378 : |
|
|
|
379 : |
|
|
// First call to know the proper size
|
380 : |
|
|
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
|
381 : |
|
|
build_log = (char *) malloc(log_size+1);
|
382 : |
|
|
// Second call to get the log
|
383 : |
|
|
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
|
384 : |
|
|
build_log[log_size] = '\0';
|
385 : |
|
|
printf("\nBuild Log:\n%s\n",build_log);
|
386 : |
|
|
free(build_log);
|
387 : |
|
|
|
388 : |
|
|
assert(err == CL_SUCCESS);
|
389 : |
|
|
|
390 : |
|
|
kernel = clCreateKernel(program,"raycast",&err);
|
391 : |
|
|
|
392 : |
|
|
assert(err == CL_SUCCESS);
|
393 : |
|
|
|
394 : |
|
|
/** Memory Allocation for the Matrices **/
|
395 : |
|
|
|
396 : |
|
|
h1_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL);
|
397 : |
|
|
err |= clEnqueueWriteBuffer(queue,h1_mem,CL_TRUE,0,sizeof(float) * 4,
|
398 : |
|
|
(void *)h1 ,0,NULL,NULL);
|
399 : |
|
|
|
400 : |
|
|
h2_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL);
|
401 : |
|
|
err |= clEnqueueWriteBuffer(queue,h2_mem,CL_TRUE,0,sizeof(float) * 4,
|
402 : |
|
|
(void *)h2 ,0,NULL,NULL);
|
403 : |
|
|
|
404 : |
|
|
|
405 : |
lamonts |
191 |
imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL);
|
406 : |
|
|
err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize,
|
407 : |
lamonts |
177 |
nin->data ,0,NULL,NULL);
|
408 : |
|
|
|
409 : |
|
|
//Load the transformMatrix
|
410 : |
|
|
loadTransformMatrix(nin,transformMatrix);
|
411 : |
|
|
invMatrix(transformMatrix,inverseMatrix);
|
412 : |
|
|
|
413 : |
|
|
err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,imageDataSize,
|
414 : |
|
|
nin->data ,0,NULL,NULL);
|
415 : |
|
|
|
416 : |
|
|
assert(err == CL_SUCCESS);
|
417 : |
|
|
|
418 : |
|
|
out_mem = clCreateBuffer(context,CL_MEM_READ_WRITE, sizeof(float) * (SIZE *SIZE),NULL,NULL);
|
419 : |
|
|
|
420 : |
|
|
clFinish(queue);
|
421 : |
|
|
|
422 : |
|
|
size_t global_work_size[2], local_work_size[2];
|
423 : |
|
|
|
424 : |
|
|
global_work_size[0] = 256;
|
425 : |
|
|
global_work_size[1] = 256;
|
426 : |
|
|
|
427 : |
|
|
local_work_size[0] = 1;
|
428 : |
|
|
local_work_size[1] = 1;
|
429 : |
|
|
|
430 : |
|
|
err =clSetKernelArg(kernel,0,sizeof(cl_mem), &imageData_mem);
|
431 : |
|
|
err |=clSetKernelArg(kernel,1,sizeof(cl_mem), &h1_mem);
|
432 : |
|
|
err |=clSetKernelArg(kernel,2,sizeof(cl_mem), &h2_mem);
|
433 : |
|
|
err |=clSetKernelArg(kernel,3,sizeof(cl_mem), &out_mem);
|
434 : |
|
|
err |=clSetKernelArg(kernel,4,sizeof(cl_float4), origVec);
|
435 : |
|
|
err |=clSetKernelArg(kernel,5,sizeof(cl_float4), eyeVec);
|
436 : |
|
|
err |=clSetKernelArg(kernel,6,sizeof(cl_float4), cVec);
|
437 : |
|
|
err |=clSetKernelArg(kernel,7,sizeof(cl_float4), rVec);
|
438 : |
|
|
err |=clSetKernelArg(kernel,8,sizeof(cl_float16), &inverseMatrix);
|
439 : |
|
|
err |=clSetKernelArg(kernel,9,sizeof(float), &stepSize);
|
440 : |
|
|
err |=clSetKernelArg(kernel,10,sizeof(int), &nin->axis[1].size);
|
441 : |
lamonts |
203 |
err |=clSetKernelArg(kernel,11,sizeof(int), &nin->axis[2].size);
|
442 : |
|
|
err |=clSetKernelArg(kernel,12,sizeof(int), &nin->axis[0].size);
|
443 : |
lamonts |
177 |
|
444 : |
lamonts |
203 |
printf("Error: %d\n",err);
|
445 : |
lamonts |
177 |
assert(err == CL_SUCCESS);
|
446 : |
|
|
|
447 : |
|
|
/** Retrieve the Recommend Work Group Size */
|
448 : |
|
|
size_t thread_size;
|
449 : |
|
|
clGetKernelWorkGroupInfo(kernel,device,CL_KERNEL_WORK_GROUP_SIZE,
|
450 : |
|
|
sizeof(size_t),&thread_size,NULL);
|
451 : |
|
|
printf("Recommended Size: %lu\n",thread_size);
|
452 : |
|
|
|
453 : |
|
|
|
454 : |
|
|
err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size,
|
455 : |
|
|
local_work_size,0,NULL,NULL);
|
456 : |
|
|
|
457 : |
|
|
assert(err == CL_SUCCESS);
|
458 : |
|
|
|
459 : |
|
|
clFinish(queue);
|
460 : |
|
|
|
461 : |
|
|
err = clEnqueueReadBuffer(queue,out_mem,CL_TRUE,0, sizeof(float) * (SIZE *SIZE),out,0,NULL,NULL);
|
462 : |
lamonts |
191 |
|
463 : |
lamonts |
203 |
saveResults(out,SIZE * SIZE);
|
464 : |
lamonts |
177 |
|
465 : |
|
|
clReleaseKernel(kernel);
|
466 : |
|
|
clReleaseProgram(program);
|
467 : |
|
|
clReleaseCommandQueue(queue);
|
468 : |
|
|
clReleaseContext(context);
|
469 : |
|
|
|
470 : |
|
|
clReleaseMemObject(imageData_mem);
|
471 : |
|
|
clReleaseMemObject(h1_mem);
|
472 : |
|
|
clReleaseMemObject(h2_mem);
|
473 : |
|
|
clReleaseMemObject(out_mem);
|
474 : |
|
|
|
475 : |
|
|
return CL_SUCCESS;
|
476 : |
|
|
}
|
477 : |
|
|
int main (int argc, char ** argv)
|
478 : |
|
|
{
|
479 : |
|
|
//Declaring and initializing input variables
|
480 : |
|
|
Nrrd * nin;
|
481 : |
|
|
char * dataFile = "txs.nrrd";
|
482 : |
|
|
cl_float4 eyeVector = {25,15,10};
|
483 : |
|
|
cl_float4 origVector = {8.83877,2.5911,7.65275};
|
484 : |
|
|
cl_float4 cVector = {-0.0151831,0.0278357,0};
|
485 : |
|
|
cl_float4 rVector = {0.0074887,0.00408474,-0.0305383};
|
486 : |
|
|
float stepSize = 0.1;
|
487 : |
|
|
float h1[] = {0.666667,0,-1,0.5};
|
488 : |
|
|
float h2[] = {1.33333, -2, 1,-0.166667};
|
489 : |
|
|
float * out;
|
490 : |
|
|
|
491 : |
|
|
out = (float *) malloc(sizeof(float) * (SIZE * SIZE));
|
492 : |
|
|
|
493 : |
|
|
nin = loadNrrdFile(dataFile);
|
494 : |
|
|
|
495 : |
|
|
exe_MIP_Kernel(nin,stepSize,eyeVector,origVector,
|
496 : |
|
|
cVector,rVector,h1,h2,out);
|
497 : |
|
|
|
498 : |
|
|
|
499 : |
|
|
return 0;
|
500 : |
|
|
}
|