SCM Repository
Annotation of /trunk/test/probe/probe.c
Parent Directory
|
Revision Log
Revision 310 - (view) (download) (as text)
1 : | lamonts | 259 | /** |
2 : | * | ||
3 : | * To View the Image | ||
4 : | * ========================= | ||
5 : | * ./unu reshape -i mip.txt -s 200 200 | ./unu quantize -b 8 -o new.png | ||
6 : | */ | ||
7 : | #include <OpenCL/OpenCl.h> | ||
8 : | #include <assert.h> | ||
9 : | #include <stdio.h> | ||
10 : | #include <stdlib.h> | ||
11 : | #include <string.h> | ||
12 : | #include <sys/sysctl.h> | ||
13 : | #include <sys/stat.h> | ||
14 : | |||
15 : | #include <teem/nrrd.h> | ||
16 : | |||
17 : | #define SIZE 21 | ||
18 : | |||
19 : | /*typedef float vec3[3]; | ||
20 : | |||
21 : | typedef struct { | ||
22 : | jhr | 262 | int degree; |
23 : | float coeff[]; | ||
24 : | lamonts | 259 | } polynomial; |
25 : | |||
26 : | typedef struct { | ||
27 : | jhr | 262 | int support; |
28 : | polynomial *segments[]; | ||
29 : | lamonts | 259 | } kernel; */ |
30 : | |||
31 : | |||
32 : | int device_stats(cl_device_id device_id){ | ||
33 : | jhr | 262 | |
34 : | int err,i; | ||
35 : | size_t returned_size; | ||
36 : | |||
37 : | // Report the device vendor and device name | ||
38 : | lamonts | 259 | // |
39 : | cl_char vendor_name[1024] = {0}; | ||
40 : | cl_char device_name[1024] = {0}; | ||
41 : | jhr | 262 | cl_char device_profile[1024] = {0}; |
42 : | cl_char device_extensions[1024] = {0}; | ||
43 : | cl_device_local_mem_type local_mem_type; | ||
44 : | |||
45 : | lamonts | 259 | cl_ulong global_mem_size, global_mem_cache_size; |
46 : | jhr | 262 | cl_ulong max_mem_alloc_size; |
47 : | |||
48 : | cl_uint clock_frequency, vector_width, max_compute_units; | ||
49 : | |||
50 : | size_t max_work_item_dims,max_work_group_size, max_work_item_sizes[3]; | ||
51 : | |||
52 : | cl_uint vector_types[] = {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE}; | ||
53 : | char *vector_type_names[] = {"char","short","int","long","float","double"}; | ||
54 : | |||
55 : | err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size); | ||
56 : | lamonts | 259 | err|= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size); |
57 : | jhr | 262 | err|= clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(device_profile), device_profile, &returned_size); |
58 : | err|= clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(device_extensions), device_extensions, &returned_size); | ||
59 : | err|= clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, &returned_size); | ||
60 : | |||
61 : | err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, &returned_size); | ||
62 : | err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(global_mem_cache_size), &global_mem_cache_size, &returned_size); | ||
63 : | err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, &returned_size); | ||
64 : | |||
65 : | err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, &returned_size); | ||
66 : | |||
67 : | err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, &returned_size); | ||
68 : | |||
69 : | err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_work_item_dims), &max_work_item_dims, &returned_size); | ||
70 : | |||
71 : | err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), max_work_item_sizes, &returned_size); | ||
72 : | |||
73 : | err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, &returned_size); | ||
74 : | |||
75 : | printf("Vendor: %s\n", vendor_name); | ||
76 : | printf("Device Name: %s\n", device_name); | ||
77 : | printf("Profile: %s\n", device_profile); | ||
78 : | printf("Supported Extensions: %s\n\n", device_extensions); | ||
79 : | |||
80 : | printf("Local Mem Type (Local=1, Global=2): %i\n",(int)local_mem_type); | ||
81 : | printf("Global Mem Size (MB): %i\n",(int)global_mem_size/(1024*1024)); | ||
82 : | printf("Global Mem Cache Size (Bytes): %i\n",(int)global_mem_cache_size); | ||
83 : | printf("Max Mem Alloc Size (MB): %ld\n",(long int)max_mem_alloc_size/(1024*1024)); | ||
84 : | |||
85 : | printf("Clock Frequency (MHz): %i\n\n",clock_frequency); | ||
86 : | |||
87 : | for(i=0;i<6;i++){ | ||
88 : | err|= clGetDeviceInfo(device_id, vector_types[i], sizeof(clock_frequency), &vector_width, &returned_size); | ||
89 : | printf("Vector type width for: %s = %i\n",vector_type_names[i],vector_width); | ||
90 : | } | ||
91 : | |||
92 : | printf("\nMax Work Group Size: %lu\n",max_work_group_size); | ||
93 : | //printf("Max Work Item Dims: %lu\n",max_work_item_dims); | ||
94 : | //for(size_t i=0;i<max_work_item_dims;i++) | ||
95 : | // printf("Max Work Items in Dim %lu: %lu\n",(long unsigned)(i+1),(long unsigned)max_work_item_sizes[i]); | ||
96 : | |||
97 : | printf("Max Compute Units: %i\n",max_compute_units); | ||
98 : | printf("\n"); | ||
99 : | |||
100 : | return CL_SUCCESS; | ||
101 : | lamonts | 259 | } |
102 : | //Loads the Kernel from a file | ||
103 : | char * loadKernel (const char * filename) | ||
104 : | { | ||
105 : | jhr | 262 | struct stat statbuf; |
106 : | FILE *fh; | ||
107 : | char *source; | ||
108 : | |||
109 : | fh = fopen(filename, "r"); | ||
110 : | if (fh == 0) | ||
111 : | return 0; | ||
112 : | |||
113 : | stat(filename, &statbuf); | ||
114 : | source = (char *) malloc(statbuf.st_size + 1); | ||
115 : | fread(source, statbuf.st_size, 1, fh); | ||
116 : | source[statbuf.st_size] = '\0'; | ||
117 : | |||
118 : | return source; | ||
119 : | lamonts | 259 | } |
120 : | void saveResults (float * matrix, int size) | ||
121 : | { | ||
122 : | jhr | 262 | int i; |
123 : | float max = -INFINITY; | ||
124 : | FILE * out_file; | ||
125 : | out_file = fopen("mip.txt", "w"); | ||
126 : | lamonts | 259 | if (out_file == NULL) { |
127 : | fprintf(stderr,"Can not open output file\n"); | ||
128 : | exit (8); | ||
129 : | } | ||
130 : | |||
131 : | for(i = 0; i < size; i++) | ||
132 : | { | ||
133 : | jhr | 262 | if(matrix[i] == -INFINITY || matrix[i] < 0) |
134 : | fprintf(out_file,"%f\n",0.0f); | ||
135 : | else | ||
136 : | fprintf(out_file,"%f\n",matrix[i]); | ||
137 : | |||
138 : | if(matrix[i] > max) | ||
139 : | max = matrix[i]; | ||
140 : | lamonts | 259 | |
141 : | } | ||
142 : | printf("Max: %f\n",max); | ||
143 : | fclose(out_file); | ||
144 : | |||
145 : | |||
146 : | } | ||
147 : | float det3x3(float a, float b, float c, float d, float e, float f, float g, float h, float i) | ||
148 : | { | ||
149 : | return ( (a)*(e)*(i) | ||
150 : | jhr | 262 | + (d)*(h)*(c) |
151 : | + (g)*(b)*(f) | ||
152 : | - (g)*(e)*(c) | ||
153 : | - (d)*(b)*(i) | ||
154 : | - (a)*(h)*(f)); | ||
155 : | lamonts | 259 | } |
156 : | float det4x4(cl_float16 m) | ||
157 : | { | ||
158 : | jhr | 262 | return (m[ 0] * det3x3(m[ 5], m[ 6], m[ 7], |
159 : | m[ 9], m[10], m[11], | ||
160 : | m[13], m[14], m[15]) | ||
161 : | |||
162 : | - m[ 1] * det3x3(m[ 4], m[ 6], m[ 7], | ||
163 : | m[ 8], m[10], m[11], | ||
164 : | m[12], m[14], m[15]) | ||
165 : | + m[ 2] * det3x3(m[ 4], m[ 5], m[ 7], | ||
166 : | m[ 8], m[ 9], m[11], | ||
167 : | m[12], m[13], m[15]) | ||
168 : | |||
169 : | - m[ 3] * det3x3(m[ 4], m[ 5], m[ 6], | ||
170 : | m[ 8], m[ 9], m[10], | ||
171 : | m[12], m[13], m[14])); | ||
172 : | lamonts | 259 | |
173 : | |||
174 : | |||
175 : | } | ||
176 : | void invMatrix(cl_float16 m, cl_float16 i) | ||
177 : | { | ||
178 : | jhr | 262 | float det = det4x4(m); |
179 : | lamonts | 259 | |
180 : | jhr | 262 | |
181 : | i[0] = det3x3(m[5],m[ 6],m[ 7], | ||
182 : | m[ 9],m[10],m[11], | ||
183 : | m[13],m[14],m[15])/det; | ||
184 : | |||
185 : | i[ 1] = -det3x3(m[ 1],m[ 2],m[ 3], | ||
186 : | m[ 9],m[10],m[11], | ||
187 : | m[13],m[14],m[15])/det; | ||
188 : | |||
189 : | i[ 2] = det3x3(m[ 1],m[ 2],m[ 3], | ||
190 : | m[ 5],m[ 6],m[ 7], | ||
191 : | m[13],m[14],m[15])/det; | ||
192 : | |||
193 : | i[ 3] = -det3x3(m[ 1],m[ 2],m[ 3], | ||
194 : | m[ 5],m[ 6],m[ 7], | ||
195 : | m[ 9],m[10],m[11])/det; | ||
196 : | |||
197 : | i[ 4] = -det3x3(m[ 4],m[ 6],m[ 7], | ||
198 : | m[ 8],m[10],m[11], | ||
199 : | m[12],m[14],m[15])/det; | ||
200 : | |||
201 : | i[ 5] = det3x3(m[ 0],m[ 2],m[ 3], | ||
202 : | m[ 8],m[10],m[11], | ||
203 : | m[12],m[14],m[15])/det; | ||
204 : | |||
205 : | i[ 6] = -det3x3(m[ 0],m[ 2],m[ 3], | ||
206 : | m[ 4],m[ 6],m[ 7], | ||
207 : | m[12],m[14],m[15])/det; | ||
208 : | |||
209 : | i[ 7] = det3x3(m[ 0],m[ 2],m[ 3], | ||
210 : | m[ 4],m[ 6],m[ 7], | ||
211 : | m[ 8],m[10],m[11])/det; | ||
212 : | |||
213 : | i[ 8] = det3x3(m[ 4],m[ 5],m[ 7], | ||
214 : | m[ 8],m[ 9],m[11], | ||
215 : | m[12],m[13],m[15])/det; | ||
216 : | |||
217 : | i[ 9] = -det3x3(m[ 0],m[ 1],m[ 3], | ||
218 : | m[ 8],m[ 9],m[11], | ||
219 : | m[12],m[13],m[15])/det; | ||
220 : | |||
221 : | i[10] = det3x3(m[ 0],m[ 1],m[ 3], | ||
222 : | m[ 4],m[ 5],m[ 7], | ||
223 : | m[12],m[13],m[15])/det; | ||
224 : | |||
225 : | i[11] = -det3x3(m[ 0],m[ 1],m[ 3], | ||
226 : | m[ 4],m[ 5],m[ 7], | ||
227 : | m[ 8],m[ 9],m[11])/det; | ||
228 : | |||
229 : | i[12] = -det3x3(m[ 4],m[ 5],m[ 6], | ||
230 : | m[ 8],m[ 9],m[10], | ||
231 : | m[12],m[13],m[14])/det; | ||
232 : | |||
233 : | i[13] = det3x3(m[ 0],m[ 1],m[ 2], | ||
234 : | m[ 8],m[ 9],m[10], | ||
235 : | m[12],m[13],m[14])/det; | ||
236 : | |||
237 : | i[14] = -det3x3(m[ 0],m[ 1],m[ 2], | ||
238 : | m[ 4],m[ 5],m[ 6], | ||
239 : | m[12],m[13],m[14])/det; | ||
240 : | |||
241 : | i[15] = det3x3(m[ 0],m[ 1],m[ 2], | ||
242 : | m[ 4],m[ 5],m[ 6], | ||
243 : | m[ 8],m[ 9],m[10])/det; | ||
244 : | lamonts | 259 | } |
245 : | void printMatrix(float * matrix, int rowSize) | ||
246 : | { | ||
247 : | jhr | 262 | int index = 0, end = 1, arraySize = SIZE; |
248 : | |||
249 : | for(index = 0; index < arraySize; index++) | ||
250 : | { | ||
251 : | if(end == rowSize) | ||
252 : | { | ||
253 : | printf(" %.2f\n",matrix[index]); | ||
254 : | end = 1; | ||
255 : | } | ||
256 : | else | ||
257 : | { | ||
258 : | printf(" %.2f ",matrix[index]); | ||
259 : | end++; | ||
260 : | } | ||
261 : | } | ||
262 : | printf("\n"); | ||
263 : | lamonts | 259 | } |
264 : | void loadTransformMatrix(Nrrd * nin, cl_float16 transformMatrix) | ||
265 : | { | ||
266 : | jhr | 262 | int i,j, size = nin->spaceDim; |
267 : | NrrdAxisInfo axisInfo; | ||
268 : | lamonts | 259 | |
269 : | jhr | 262 | //Image axis Scaling and Rotation |
270 : | for(i = 0; i < size; i++) | ||
271 : | { | ||
272 : | axisInfo = nin->axis[i]; | ||
273 : | for(j = 0; j < size; j++) | ||
274 : | { | ||
275 : | transformMatrix[ (size+ 1) * j + i] = axisInfo.spaceDirection[j]; | ||
276 : | } | ||
277 : | |||
278 : | //Image Location | ||
279 : | transformMatrix[ (i * (size + 1)) + size] = nin->spaceOrigin[i]; | ||
280 : | |||
281 : | //Bottom row of the Transform Matrix | ||
282 : | transformMatrix[((size + 1) * (size)) + i ] = 0; | ||
283 : | } | ||
284 : | transformMatrix[((size + 1) * (size)) + size ] = 1; | ||
285 : | lamonts | 259 | |
286 : | } | ||
287 : | Nrrd * loadNrrdFile(char * filename) | ||
288 : | { | ||
289 : | jhr | 262 | /* create a nrrd; at this point this is just an empty container */ |
290 : | Nrrd * nin; | ||
291 : | |||
292 : | nin = nrrdNew(); | ||
293 : | char *err; | ||
294 : | lamonts | 259 | |
295 : | jhr | 262 | /* read in the nrrd from file */ |
296 : | if (nrrdLoad(nin, filename, NULL)) { | ||
297 : | err = biffGetDone(NRRD); | ||
298 : | fprintf(stderr, "Mip: trouble reading \"%s\":\n%s", filename, err); | ||
299 : | free(err); | ||
300 : | exit(1); | ||
301 : | lamonts | 259 | } |
302 : | |||
303 : | /* say something about the array | ||
304 : | printf("Mip: \"%s\" is a %d-dimensional nrrd of type %d (%s)\n", | ||
305 : | filename, nin->dim, nin->type, | ||
306 : | airEnumStr(nrrdType, nin->type)); | ||
307 : | printf("Mip: the array contains %d elements, each %d bytes in size\n", | ||
308 : | (int)nrrdElementNumber(nin), (int)nrrdElementSize(nin));*/ | ||
309 : | |||
310 : | return nin; | ||
311 : | |||
312 : | } | ||
313 : | void loadProbePositions(char * filename, float * posValues) | ||
314 : | { | ||
315 : | jhr | 262 | FILE * infile; |
316 : | |||
317 : | int i = 0; | ||
318 : | char line[80]; | ||
319 : | |||
320 : | if(!(infile = fopen(filename,"r"))) | ||
321 : | { | ||
322 : | fprintf(stderr, "Could not load file:\"%s\n", filename); | ||
323 : | exit(1); | ||
324 : | } | ||
325 : | |||
326 : | lamonts | 259 | while(fgets(line, 80, infile) != NULL) |
327 : | { | ||
328 : | jhr | 262 | sscanf (line, "%f %f %f", &posValues[i],&posValues[i+1],&posValues[i+2]); |
329 : | i+=3; | ||
330 : | lamonts | 259 | } |
331 : | jhr | 262 | |
332 : | fclose(infile); | ||
333 : | lamonts | 259 | } |
334 : | jhr | 262 | int exe_Probe_Kernel(Nrrd * nin, float * probedPositions, float *h[4], float * out) |
335 : | lamonts | 259 | { |
336 : | |||
337 : | jhr | 262 | cl_program program; |
338 : | cl_kernel kernel; | ||
339 : | lamonts | 259 | |
340 : | jhr | 262 | cl_command_queue queue; |
341 : | cl_context context; | ||
342 : | |||
343 : | cl_device_id cpu = NULL, device = NULL; | ||
344 : | |||
345 : | cl_int err = 0; | ||
346 : | |||
347 : | cl_float16 transformMatrix; | ||
348 : | cl_float16 inverseMatrix; | ||
349 : | |||
350 : | int imageDataSize = (int)nrrdElementNumber(nin); | ||
351 : | |||
352 : | cl_mem imageData_mem, out_mem, h_mem[4], positions_mem; | ||
353 : | |||
354 : | /** Setup Device **/ | ||
355 : | err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL); | ||
356 : | assert(err==CL_SUCCESS); | ||
357 : | |||
358 : | err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_GPU,1,&device,NULL); | ||
359 : | //if(err != CL_SUCCESS) | ||
360 : | device = cpu; | ||
361 : | |||
362 : | assert(device); | ||
363 : | |||
364 : | /** Retrieve Information about the device | ||
365 : | cl_char vendor_name[1024] = {0}; | ||
366 : | lamonts | 259 | cl_char device_name[1024] = {0}; |
367 : | err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size); | ||
368 : | err|= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size); | ||
369 : | jhr | 262 | printf("Connecting to %s %s...\n", vendor_name, device_name); |
370 : | device_stats(device); */ | ||
371 : | lamonts | 259 | |
372 : | jhr | 262 | |
373 : | /* Setup Context and Command Queue */ | ||
374 : | context = clCreateContext(0,1,&device,NULL,NULL,&err); | ||
375 : | assert(err == CL_SUCCESS); | ||
376 : | |||
377 : | queue = clCreateCommandQueue(context,device,0,NULL); | ||
378 : | /** Load the Kernel and Program **/ | ||
379 : | const char * filename = "probe.cl"; | ||
380 : | lamonts | 259 | |
381 : | jhr | 262 | char * kernel_source = loadKernel(filename); |
382 : | lamonts | 259 | |
383 : | jhr | 262 | assert(kernel_source != 0); |
384 : | |||
385 : | program = clCreateProgramWithSource(context,1,(const char **)&kernel_source,NULL,&err); | ||
386 : | |||
387 : | assert(err == CL_SUCCESS); | ||
388 : | lamonts | 259 | |
389 : | jhr | 310 | const char *options = 0; |
390 : | err = clBuildProgram(program, 0, NULL, options, NULL, NULL); | ||
391 : | lamonts | 259 | |
392 : | jhr | 310 | |
393 : | lamonts | 259 | /** Retrieve information about the program build to check for any possible errors **/ |
394 : | char * build_log; | ||
395 : | size_t log_size; | ||
396 : | |||
397 : | // First call to know the proper size | ||
398 : | clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); | ||
399 : | build_log = (char *) malloc(log_size+1); | ||
400 : | // Second call to get the log | ||
401 : | clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL); | ||
402 : | build_log[log_size] = '\0'; | ||
403 : | printf("\nBuild Log:\n%s\n",build_log); | ||
404 : | free(build_log); | ||
405 : | |||
406 : | assert(err == CL_SUCCESS); | ||
407 : | |||
408 : | jhr | 310 | kernel = clCreateKernel(program, "diderot", &err); |
409 : | jhr | 262 | |
410 : | assert(err == CL_SUCCESS); | ||
411 : | |||
412 : | /** Memory Allocation for the Matrices **/ | ||
413 : | lamonts | 278 | |
414 : | for (int i = 0; i < 4; i++) { | ||
415 : | jhr | 262 | h_mem[i] = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL); |
416 : | err |= clEnqueueWriteBuffer(queue,h_mem[i],CL_TRUE,0,sizeof(float) * 4, | ||
417 : | (void *)h[i] ,0,NULL,NULL); | ||
418 : | } | ||
419 : | |||
420 : | positions_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * (SIZE *3),NULL,NULL); | ||
421 : | err |= clEnqueueWriteBuffer(queue,positions_mem,CL_TRUE,0,sizeof(float) * (SIZE *3), | ||
422 : | (void *)probedPositions ,0,NULL,NULL); | ||
423 : | |||
424 : | imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL); | ||
425 : | err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize, | ||
426 : | nin->data ,0,NULL,NULL); | ||
427 : | |||
428 : | //Load the transformMatrix | ||
429 : | loadTransformMatrix(nin,transformMatrix); | ||
430 : | invMatrix(transformMatrix,inverseMatrix); | ||
431 : | lamonts | 259 | |
432 : | jhr | 262 | err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,imageDataSize, |
433 : | nin->data ,0,NULL,NULL); | ||
434 : | |||
435 : | assert(err == CL_SUCCESS); | ||
436 : | |||
437 : | out_mem = clCreateBuffer(context,CL_MEM_READ_WRITE, sizeof(float) * (SIZE),NULL,NULL); | ||
438 : | |||
439 : | clFinish(queue); | ||
440 : | |||
441 : | size_t global_work_size[1], local_work_size[1]; | ||
442 : | |||
443 : | global_work_size[0] = SIZE; | ||
444 : | local_work_size[0] = 1; | ||
445 : | lamonts | 259 | |
446 : | jhr | 262 | int idx = 0; |
447 : | err =clSetKernelArg(kernel, idx++, sizeof(cl_mem), &imageData_mem); | ||
448 : | err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[0])); | ||
449 : | err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[1])); | ||
450 : | err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[2])); | ||
451 : | err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[3])); | ||
452 : | err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &positions_mem); | ||
453 : | err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &out_mem); | ||
454 : | err |=clSetKernelArg(kernel, idx++, sizeof(cl_float16), &inverseMatrix); | ||
455 : | err |=clSetKernelArg(kernel, idx++, sizeof(int), &nin->axis[1].size); | ||
456 : | err |=clSetKernelArg(kernel, idx++, sizeof(int), &nin->axis[2].size); | ||
457 : | err |=clSetKernelArg(kernel, idx++, sizeof(int), &nin->axis[0].size); | ||
458 : | |||
459 : | assert(err == CL_SUCCESS); | ||
460 : | |||
461 : | /** Retrieve the Recommend Work Group Size | ||
462 : | size_t thread_size; | ||
463 : | clGetKernelWorkGroupInfo(kernel,device,CL_KERNEL_WORK_GROUP_SIZE, | ||
464 : | sizeof(size_t),&thread_size,NULL); | ||
465 : | printf("Recommended Size: %lu\n",thread_size);*/ | ||
466 : | |||
467 : | |||
468 : | err = clEnqueueNDRangeKernel(queue,kernel,1,NULL,global_work_size, | ||
469 : | local_work_size,0,NULL,NULL); | ||
470 : | |||
471 : | assert(err == CL_SUCCESS); | ||
472 : | |||
473 : | clFinish(queue); | ||
474 : | |||
475 : | err = clEnqueueReadBuffer(queue,out_mem,CL_TRUE,0, sizeof(float) * SIZE,out,0,NULL,NULL); | ||
476 : | |||
477 : | // saveResults(out,SIZE * SIZE); | ||
478 : | lamonts | 278 | printMatrix(out,1); |
479 : | jhr | 262 | |
480 : | clReleaseKernel(kernel); | ||
481 : | clReleaseProgram(program); | ||
482 : | clReleaseCommandQueue(queue); | ||
483 : | clReleaseContext(context); | ||
484 : | |||
485 : | clReleaseMemObject(imageData_mem); | ||
486 : | lamonts | 278 | for (int i = 0; i < 4; i++) |
487 : | jhr | 262 | clReleaseMemObject(h_mem[i]); |
488 : | clReleaseMemObject(out_mem); | ||
489 : | clReleaseMemObject(positions_mem); | ||
490 : | return CL_SUCCESS; | ||
491 : | lamonts | 259 | } |
492 : | |||
493 : | int main (int argc, char ** argv) | ||
494 : | { | ||
495 : | jhr | 262 | //Declaring and initializing input variables |
496 : | Nrrd * nin; | ||
497 : | lamonts | 278 | char * probeValuesFile = "../../data/plane-probe-pos-x.txt"; |
498 : | char * dataFile = "../../data/plane-x9.nrrd"; | ||
499 : | jhr | 262 | float h0[] = {1.33333, 2, 1, 0.166667}; /* -2..-1 */ |
500 : | float h1[] = {0.666667, 0, -1, -0.5}; /* -1..0 */ | ||
501 : | float h2[] = {0.666667, 0, -1, 0.5}; /* 0..1 */ | ||
502 : | float h3[] = {1.33333, -2, 1, -0.166667}; /* 1..2 */ | ||
503 : | float *h[4] = {h0, h1, h2, h3}; | ||
504 : | float * out; | ||
505 : | float * posValues; | ||
506 : | lamonts | 259 | |
507 : | jhr | 262 | if (argc == 3) { |
508 : | probeValuesFile = argv[1]; | ||
509 : | dataFile = argv[2]; | ||
510 : | } | ||
511 : | else if (argc > 1) { | ||
512 : | fprintf (stderr, "usage: probe values-file data-file\n"); | ||
513 : | exit (1); | ||
514 : | } | ||
515 : | |||
516 : | out = (float *) malloc(sizeof(float) * SIZE); | ||
517 : | posValues = (float *) malloc(sizeof(float) * (SIZE * 3)); | ||
518 : | |||
519 : | nin = loadNrrdFile(dataFile); | ||
520 : | loadProbePositions(probeValuesFile,posValues); | ||
521 : | |||
522 : | exe_Probe_Kernel(nin,posValues,h,out); | ||
523 : | |||
524 : | return 0; | ||
525 : | lamonts | 259 | } |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |