1 |
/** |
/* mip_opencl.c |
2 |
|
* |
3 |
|
* COPYRIGHT (c) 2010 The Diderot Project (http://diderot.cs.uchicago.edu) |
4 |
|
* All rights reserved. |
5 |
|
* |
6 |
|
* An OpenCL mip implementation. |
7 |
* |
* |
8 |
* To View the Image |
* To View the Image |
9 |
* ========================= |
* ========================= |
15 |
#include <stdlib.h> |
#include <stdlib.h> |
16 |
#include <sys/sysctl.h> |
#include <sys/sysctl.h> |
17 |
#include <sys/stat.h> |
#include <sys/stat.h> |
|
|
|
18 |
#include <teem/nrrd.h> |
#include <teem/nrrd.h> |
19 |
|
|
20 |
#define SIZE 201 |
#define SIZE 200 |
|
|
|
|
/*typedef float vec3[3]; |
|
|
|
|
|
typedef struct { |
|
|
int degree; |
|
|
float coeff[]; |
|
|
} polynomial; |
|
|
|
|
|
typedef struct { |
|
|
int support; |
|
|
polynomial *segments[]; |
|
|
} kernel; */ |
|
|
|
|
|
|
|
|
int device_stats(cl_device_id device_id){ |
|
|
|
|
|
int err,i; |
|
|
size_t returned_size; |
|
|
|
|
|
// Report the device vendor and device name |
|
|
// |
|
|
cl_char vendor_name[1024] = {0}; |
|
|
cl_char device_name[1024] = {0}; |
|
|
cl_char device_profile[1024] = {0}; |
|
|
cl_char device_extensions[1024] = {0}; |
|
|
cl_device_local_mem_type local_mem_type; |
|
|
|
|
|
cl_ulong global_mem_size, global_mem_cache_size; |
|
|
cl_ulong max_mem_alloc_size; |
|
|
|
|
|
cl_uint clock_frequency, vector_width, max_compute_units; |
|
|
|
|
|
size_t max_work_item_dims,max_work_group_size, max_work_item_sizes[3]; |
|
|
|
|
|
cl_uint vector_types[] = {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE}; |
|
|
char *vector_type_names[] = {"char","short","int","long","float","double"}; |
|
|
|
|
|
err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size); |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size); |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(device_profile), device_profile, &returned_size); |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(device_extensions), device_extensions, &returned_size); |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, &returned_size); |
|
|
|
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, &returned_size); |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(global_mem_cache_size), &global_mem_cache_size, &returned_size); |
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, &returned_size); |
|
|
|
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, &returned_size); |
|
|
|
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, &returned_size); |
|
|
|
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_work_item_dims), &max_work_item_dims, &returned_size); |
|
|
|
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), max_work_item_sizes, &returned_size); |
|
|
|
|
|
err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, &returned_size); |
|
|
|
|
|
printf("Vendor: %s\n", vendor_name); |
|
|
printf("Device Name: %s\n", device_name); |
|
|
printf("Profile: %s\n", device_profile); |
|
|
printf("Supported Extensions: %s\n\n", device_extensions); |
|
|
|
|
|
printf("Local Mem Type (Local=1, Global=2): %i\n",(int)local_mem_type); |
|
|
printf("Global Mem Size (MB): %i\n",(int)global_mem_size/(1024*1024)); |
|
|
printf("Global Mem Cache Size (Bytes): %i\n",(int)global_mem_cache_size); |
|
|
printf("Max Mem Alloc Size (MB): %ld\n",(long int)max_mem_alloc_size/(1024*1024)); |
|
|
|
|
|
printf("Clock Frequency (MHz): %i\n\n",clock_frequency); |
|
|
|
|
|
for(i=0;i<6;i++){ |
|
|
err|= clGetDeviceInfo(device_id, vector_types[i], sizeof(clock_frequency), &vector_width, &returned_size); |
|
|
printf("Vector type width for: %s = %i\n",vector_type_names[i],vector_width); |
|
|
} |
|
|
|
|
|
printf("\nMax Work Group Size: %lu\n",max_work_group_size); |
|
|
//printf("Max Work Item Dims: %lu\n",max_work_item_dims); |
|
|
//for(size_t i=0;i<max_work_item_dims;i++) |
|
|
// printf("Max Work Items in Dim %lu: %lu\n",(long unsigned)(i+1),(long unsigned)max_work_item_sizes[i]); |
|
|
|
|
|
printf("Max Compute Units: %i\n",max_compute_units); |
|
|
printf("\n"); |
|
21 |
|
|
|
return CL_SUCCESS; |
|
|
} |
|
22 |
//Loads the Kernel from a file |
//Loads the Kernel from a file |
23 |
char * loadKernel (const char * filename) |
char * loadKernel (const char * filename) |
24 |
{ |
{ |
51 |
for(i = 0; i < size; i++) |
for(i = 0; i < size; i++) |
52 |
{ |
{ |
53 |
if(matrix[i] == -INFINITY || matrix[i] < 0) |
if(matrix[i] == -INFINITY || matrix[i] < 0) |
54 |
fprintf(out_file,"%f\n",0.0f); |
fprintf(out_file,"%.4f\n",0.0f); |
55 |
else |
else |
56 |
fprintf(out_file,"%f\n",matrix[i]); |
fprintf(out_file,"%.4f\n",matrix[i]); |
57 |
|
|
58 |
if(matrix[i] > max) |
if(matrix[i] > max) |
59 |
max = matrix[i]; |
max = matrix[i]; |
203 |
} |
} |
204 |
transformMatrix[((size + 1) * (size)) + size ] = 1; |
transformMatrix[((size + 1) * (size)) + size ] = 1; |
205 |
|
|
|
printMatrix (transformMatrix, 4); |
|
206 |
} |
} |
207 |
Nrrd * loadNrrdFile(char * filename) |
Nrrd * loadNrrdFile(char * filename) |
208 |
{ |
{ |
230 |
return nin; |
return nin; |
231 |
|
|
232 |
} |
} |
233 |
int exe_MIP_Kernel(Nrrd * nin, float stepSize, cl_float4 eyeVec, cl_float4 origVec, |
int exe_MIP_Kernel(float * img, int imageDataSize, float inverseMatrix[16], int sAxis[3], float * out) |
|
cl_float4 cVec, cl_float4 rVec, float * h1, float * h2, float * out) |
|
234 |
{ |
{ |
235 |
|
|
236 |
cl_program program; |
cl_program program; |
243 |
|
|
244 |
cl_int err = 0; |
cl_int err = 0; |
245 |
|
|
|
cl_float16 transformMatrix; |
|
|
cl_float16 inverseMatrix; |
|
|
|
|
|
int imageDataSize = (int)nrrdElementNumber(nin); |
|
|
|
|
|
float * data = (float *)nin->data; |
|
|
printf("Data Image: %f\n", (float)data[4* nin->axis[1].size * nin->axis[2].size + 5 * nin->axis[2].size + 2]); |
|
|
|
|
|
cl_mem imageData_mem, out_mem, h1_mem, h2_mem; |
|
246 |
|
|
247 |
|
cl_mem imageData_mem, out_mem,sAxis_mem; |
248 |
|
|
249 |
/** Setup Device **/ |
/** Setup Device **/ |
250 |
err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL); |
err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL); |
256 |
|
|
257 |
assert(device); |
assert(device); |
258 |
|
|
|
|
|
|
/** Retrieve Information about the device |
|
|
cl_char vendor_name[1024] = {0}; |
|
|
cl_char device_name[1024] = {0}; |
|
|
err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size); |
|
|
err|= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size); |
|
|
printf("Connecting to %s %s...\n", vendor_name, device_name); |
|
|
device_stats(device); */ |
|
|
|
|
|
|
|
259 |
/* Setup Context and Command Queue */ |
/* Setup Context and Command Queue */ |
260 |
context = clCreateContext(0,1,&device,NULL,NULL,&err); |
context = clCreateContext(0,1,&device,NULL,NULL,&err); |
261 |
assert(err == CL_SUCCESS); |
assert(err == CL_SUCCESS); |
290 |
|
|
291 |
assert(err == CL_SUCCESS); |
assert(err == CL_SUCCESS); |
292 |
|
|
293 |
kernel = clCreateKernel(program,"raycast",&err); |
kernel = clCreateKernel(program,"mip",&err); |
294 |
|
|
295 |
assert(err == CL_SUCCESS); |
assert(err == CL_SUCCESS); |
296 |
|
|
297 |
/** Memory Allocation for the Matrices **/ |
/** Memory Allocation for the Matrices **/ |
298 |
|
|
|
h1_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL); |
|
|
err |= clEnqueueWriteBuffer(queue,h1_mem,CL_TRUE,0,sizeof(float) * 4, |
|
|
(void *)h1 ,0,NULL,NULL); |
|
|
|
|
|
h2_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL); |
|
|
err |= clEnqueueWriteBuffer(queue,h2_mem,CL_TRUE,0,sizeof(float) * 4, |
|
|
(void *)h2 ,0,NULL,NULL); |
|
|
|
|
|
|
|
299 |
imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL); |
imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL); |
300 |
err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize, |
err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize, |
301 |
nin->data ,0,NULL,NULL); |
(void *)img ,0,NULL,NULL); |
302 |
|
|
303 |
//Load the transformMatrix |
sAxis_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(int) * 3,NULL,NULL); |
304 |
loadTransformMatrix(nin,transformMatrix); |
err |= clEnqueueWriteBuffer(queue,sAxis_mem,CL_TRUE,0,sizeof(int) * 3, |
305 |
invMatrix(transformMatrix,inverseMatrix); |
(void *)sAxis ,0,NULL,NULL); |
|
printf("Inverse\n"); printMatrix(inverseMatrix, 4); |
|
|
|
|
|
err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,imageDataSize, |
|
|
nin->data ,0,NULL,NULL); |
|
306 |
|
|
307 |
assert(err == CL_SUCCESS); |
assert(err == CL_SUCCESS); |
308 |
|
|
318 |
local_work_size[0] = 1; |
local_work_size[0] = 1; |
319 |
local_work_size[1] = 1; |
local_work_size[1] = 1; |
320 |
|
|
321 |
err =clSetKernelArg(kernel,0,sizeof(cl_mem), &imageData_mem); |
cl_int2 workDim = {SIZE,SIZE}; |
|
err |=clSetKernelArg(kernel,1,sizeof(cl_mem), &h1_mem); |
|
|
err |=clSetKernelArg(kernel,2,sizeof(cl_mem), &h2_mem); |
|
|
err |=clSetKernelArg(kernel,3,sizeof(cl_mem), &out_mem); |
|
|
err |=clSetKernelArg(kernel,4,sizeof(cl_float4), origVec); |
|
|
err |=clSetKernelArg(kernel,5,sizeof(cl_float4), eyeVec); |
|
|
err |=clSetKernelArg(kernel,6,sizeof(cl_float4), cVec); |
|
|
err |=clSetKernelArg(kernel,7,sizeof(cl_float4), rVec); |
|
|
err |=clSetKernelArg(kernel,8,sizeof(cl_float16), &inverseMatrix); |
|
|
err |=clSetKernelArg(kernel,9,sizeof(float), &stepSize); |
|
|
err |=clSetKernelArg(kernel,10,sizeof(int), &nin->axis[1].size); |
|
|
err |=clSetKernelArg(kernel,11,sizeof(int), &nin->axis[2].size); |
|
|
err |=clSetKernelArg(kernel,12,sizeof(int), &nin->axis[0].size); |
|
322 |
|
|
323 |
printf("Error: %d\n",err); |
int index = 0; |
|
assert(err == CL_SUCCESS); |
|
324 |
|
|
325 |
/** Retrieve the Recommend Work Group Size */ |
err =clSetKernelArg(kernel,index++,sizeof(cl_mem), &imageData_mem); |
326 |
size_t thread_size; |
err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &out_mem); |
327 |
clGetKernelWorkGroupInfo(kernel,device,CL_KERNEL_WORK_GROUP_SIZE, |
err |=clSetKernelArg(kernel,index++,sizeof(cl_float16), inverseMatrix); |
328 |
sizeof(size_t),&thread_size,NULL); |
err |=clSetKernelArg(kernel,index++,sizeof(cl_int2), &workDim); |
329 |
printf("Recommended Size: %lu\n",thread_size); |
err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &sAxis_mem); |
330 |
|
|
331 |
|
printf("error:%d\n",err); |
332 |
|
assert(err == CL_SUCCESS); |
333 |
|
|
334 |
|
|
335 |
err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size, |
err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size, |
347 |
clReleaseProgram(program); |
clReleaseProgram(program); |
348 |
clReleaseCommandQueue(queue); |
clReleaseCommandQueue(queue); |
349 |
clReleaseContext(context); |
clReleaseContext(context); |
|
|
|
350 |
clReleaseMemObject(imageData_mem); |
clReleaseMemObject(imageData_mem); |
|
clReleaseMemObject(h1_mem); |
|
|
clReleaseMemObject(h2_mem); |
|
351 |
clReleaseMemObject(out_mem); |
clReleaseMemObject(out_mem); |
352 |
|
|
353 |
return CL_SUCCESS; |
return CL_SUCCESS; |
354 |
} |
} |
|
#define VOX1_Z |
|
355 |
int main (int argc, char ** argv) |
int main (int argc, char ** argv) |
356 |
{ |
{ |
|
//Declaring and initializing input variables |
|
|
Nrrd * nin; |
|
|
#ifdef TXS |
|
357 |
char * dataFile = "txs.nrrd"; |
char * dataFile = "txs.nrrd"; |
358 |
cl_float4 eyeVector = {25,15,10, 1}; |
float transformMatrix[16]; |
359 |
cl_float4 origVector = {8.83877,2.5911,7.65275}; |
float inverseMatrix[16]; |
|
cl_float4 cVector = {-0.0151831,0.0278357,0}; |
|
|
cl_float4 rVector = {0.0074887,0.00408474,-0.0305383}; |
|
|
#endif |
|
|
#ifdef VOX1_X |
|
|
char * dataFile = "../../data/vox1.nrrd"; |
|
|
cl_float4 eyeVector = {-8,2,2, 1}; |
|
|
cl_float4 origVector = {0,3.4036,3.4036}; |
|
|
cl_float4 cVector = {0,-0.014036,0}; |
|
|
cl_float4 rVector = {0,0,-0.014036}; |
|
|
#endif |
|
|
#ifdef VOX1_Y |
|
|
char * dataFile = "../../data/vox1.nrrd"; |
|
|
cl_float4 eyeVector = {2,-8,2, 1}; |
|
|
cl_float4 origVector = {0.596402,0,3.4036, 1}; |
|
|
cl_float4 cVector = {0.014036,0,0}; |
|
|
cl_float4 rVector = {0,0,-0.014036}; |
|
|
#endif |
|
|
#ifdef VOX1_Z |
|
|
char * dataFile = "../../data/vox1-11.nrrd"; |
|
|
cl_float4 eyeVector = {2,2,-8, 1}; |
|
|
cl_float4 origVector = {3.4036,3.4036,0, 1}; |
|
|
cl_float4 cVector = {-0.014036,0,0,0}; |
|
|
cl_float4 rVector = {0,-0.014036,0}; |
|
|
#endif |
|
|
float stepSize = 0.1; |
|
|
float h1[] = {0.666667,0,-1,0.5}; |
|
|
float h2[] = {1.33333, -2, 1,-0.166667}; |
|
|
float * out; |
|
360 |
|
|
361 |
|
float * out; |
362 |
out = (float *) malloc(sizeof(float) * (SIZE * SIZE)); |
out = (float *) malloc(sizeof(float) * (SIZE * SIZE)); |
363 |
|
Nrrd * nin = loadNrrdFile(dataFile); |
364 |
|
|
365 |
nin = loadNrrdFile(dataFile); |
int sAxis[] = {nin->axis[0].size, nin->axis[1].size,nin->axis[2].size}; |
366 |
|
|
367 |
|
loadTransformMatrix(nin,transformMatrix); |
368 |
|
invMatrix(transformMatrix,inverseMatrix); |
369 |
|
|
|
exe_MIP_Kernel(nin,stepSize,eyeVector,origVector, |
|
|
cVector,rVector,h1,h2,out); |
|
370 |
|
|
371 |
|
exe_MIP_Kernel((float *)nin->data, (int)nrrdElementNumber(nin),inverseMatrix, sAxis, out); |
372 |
|
|
373 |
return 0; |
return 0; |
374 |
} |
} |