SCM Repository
[diderot] / trunk / test / MIP / mip_opencl.c |
View of /trunk/test/MIP/mip_opencl.c
Parent Directory
|
Revision Log
Revision 218 -
(download)
(as text)
(annotate)
Tue Aug 3 19:40:56 2010 UTC (10 years, 7 months ago) by jhr
File size: 15990 byte(s)
Tue Aug 3 19:40:56 2010 UTC (10 years, 7 months ago) by jhr
File size: 15990 byte(s)
Debugging
/** * * To View the Image * ========================= * ./unu reshape -i mip.txt -s 200 200 | ./unu quantize -b 8 -o new.png */ #include <OpenCL/OpenCl.h> #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <sys/sysctl.h> #include <sys/stat.h> #include <teem/nrrd.h> #define SIZE 201 /*typedef float vec3[3]; typedef struct { int degree; float coeff[]; } polynomial; typedef struct { int support; polynomial *segments[]; } kernel; */ int device_stats(cl_device_id device_id){ int err,i; size_t returned_size; // Report the device vendor and device name // cl_char vendor_name[1024] = {0}; cl_char device_name[1024] = {0}; cl_char device_profile[1024] = {0}; cl_char device_extensions[1024] = {0}; cl_device_local_mem_type local_mem_type; cl_ulong global_mem_size, global_mem_cache_size; cl_ulong max_mem_alloc_size; cl_uint clock_frequency, vector_width, max_compute_units; size_t max_work_item_dims,max_work_group_size, max_work_item_sizes[3]; cl_uint vector_types[] = {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE}; char *vector_type_names[] = {"char","short","int","long","float","double"}; err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(device_profile), device_profile, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(device_extensions), device_extensions, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(global_mem_cache_size), &global_mem_cache_size, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_work_item_dims), &max_work_item_dims, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), max_work_item_sizes, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, &returned_size); printf("Vendor: %s\n", vendor_name); printf("Device Name: %s\n", device_name); printf("Profile: %s\n", device_profile); printf("Supported Extensions: %s\n\n", device_extensions); printf("Local Mem Type (Local=1, Global=2): %i\n",(int)local_mem_type); printf("Global Mem Size (MB): %i\n",(int)global_mem_size/(1024*1024)); printf("Global Mem Cache Size (Bytes): %i\n",(int)global_mem_cache_size); printf("Max Mem Alloc Size (MB): %ld\n",(long int)max_mem_alloc_size/(1024*1024)); printf("Clock Frequency (MHz): %i\n\n",clock_frequency); for(i=0;i<6;i++){ err|= clGetDeviceInfo(device_id, vector_types[i], sizeof(clock_frequency), &vector_width, &returned_size); printf("Vector type width for: %s = %i\n",vector_type_names[i],vector_width); } printf("\nMax Work Group Size: %lu\n",max_work_group_size); //printf("Max Work Item Dims: %lu\n",max_work_item_dims); //for(size_t i=0;i<max_work_item_dims;i++) // printf("Max Work Items in Dim %lu: %lu\n",(long unsigned)(i+1),(long unsigned)max_work_item_sizes[i]); printf("Max Compute Units: %i\n",max_compute_units); printf("\n"); return CL_SUCCESS; } //Loads the Kernel from a file char * loadKernel (const char * filename) { struct stat statbuf; FILE *fh; char *source; fh = fopen(filename, "r"); if (fh == 0) return 0; stat(filename, &statbuf); source = (char *) malloc(statbuf.st_size + 1); fread(source, statbuf.st_size, 1, fh); source[statbuf.st_size] = '\0'; return source; } void saveResults (float * matrix, int size) { int i; float max = -INFINITY; FILE * out_file; out_file = fopen("mip.txt", "w"); if (out_file == NULL) { fprintf(stderr,"Can not open output file\n"); exit (8); } for(i = 0; i < size; i++) { if(matrix[i] == -INFINITY || matrix[i] < 0) fprintf(out_file,"%f\n",0.0f); else fprintf(out_file,"%f\n",matrix[i]); if(matrix[i] > max) max = matrix[i]; } printf("Max: %f\n",max); fclose(out_file); } float det3x3(float a, float b, float c, float d, float e, float f, float g, float h, float i) { return ( (a)*(e)*(i) + (d)*(h)*(c) + (g)*(b)*(f) - (g)*(e)*(c) - (d)*(b)*(i) - (a)*(h)*(f)); } float det4x4(cl_float16 m) { return (m[ 0] * det3x3(m[ 5], m[ 6], m[ 7], m[ 9], m[10], m[11], m[13], m[14], m[15]) - m[ 1] * det3x3(m[ 4], m[ 6], m[ 7], m[ 8], m[10], m[11], m[12], m[14], m[15]) + m[ 2] * det3x3(m[ 4], m[ 5], m[ 7], m[ 8], m[ 9], m[11], m[12], m[13], m[15]) - m[ 3] * det3x3(m[ 4], m[ 5], m[ 6], m[ 8], m[ 9], m[10], m[12], m[13], m[14])); } void invMatrix(cl_float16 m, cl_float16 i) { float det = det4x4(m); i[0] = det3x3(m[5],m[ 6],m[ 7], m[ 9],m[10],m[11], m[13],m[14],m[15])/det; i[ 1] = -det3x3(m[ 1],m[ 2],m[ 3], m[ 9],m[10],m[11], m[13],m[14],m[15])/det; i[ 2] = det3x3(m[ 1],m[ 2],m[ 3], m[ 5],m[ 6],m[ 7], m[13],m[14],m[15])/det; i[ 3] = -det3x3(m[ 1],m[ 2],m[ 3], m[ 5],m[ 6],m[ 7], m[ 9],m[10],m[11])/det; i[ 4] = -det3x3(m[ 4],m[ 6],m[ 7], m[ 8],m[10],m[11], m[12],m[14],m[15])/det; i[ 5] = det3x3(m[ 0],m[ 2],m[ 3], m[ 8],m[10],m[11], m[12],m[14],m[15])/det; i[ 6] = -det3x3(m[ 0],m[ 2],m[ 3], m[ 4],m[ 6],m[ 7], m[12],m[14],m[15])/det; i[ 7] = det3x3(m[ 0],m[ 2],m[ 3], m[ 4],m[ 6],m[ 7], m[ 8],m[10],m[11])/det; i[ 8] = det3x3(m[ 4],m[ 5],m[ 7], m[ 8],m[ 9],m[11], m[12],m[13],m[15])/det; i[ 9] = -det3x3(m[ 0],m[ 1],m[ 3], m[ 8],m[ 9],m[11], m[12],m[13],m[15])/det; i[10] = det3x3(m[ 0],m[ 1],m[ 3], m[ 4],m[ 5],m[ 7], m[12],m[13],m[15])/det; i[11] = -det3x3(m[ 0],m[ 1],m[ 3], m[ 4],m[ 5],m[ 7], m[ 8],m[ 9],m[11])/det; i[12] = -det3x3(m[ 4],m[ 5],m[ 6], m[ 8],m[ 9],m[10], m[12],m[13],m[14])/det; i[13] = det3x3(m[ 0],m[ 1],m[ 2], m[ 8],m[ 9],m[10], m[12],m[13],m[14])/det; i[14] = -det3x3(m[ 0],m[ 1],m[ 2], m[ 4],m[ 5],m[ 6], m[12],m[13],m[14])/det; i[15] = det3x3(m[ 0],m[ 1],m[ 2], m[ 4],m[ 5],m[ 6], m[ 8],m[ 9],m[10])/det; } void printMatrix(float * matrix, int rowSize) { int index = 0, end = 1, arraySize = rowSize * rowSize; for(index = 0; index < arraySize; index++) { if(end == rowSize) { printf(" %.2f\n",matrix[index]); end = 1; } else { printf(" %.2f ",matrix[index]); end++; } } printf("\n"); } void loadTransformMatrix(Nrrd * nin, cl_float16 transformMatrix) { int i,j, size = nin->spaceDim; NrrdAxisInfo axisInfo; //Image axis Scaling and Rotation for(i = 0; i < size; i++) { axisInfo = nin->axis[i]; for(j = 0; j < size; j++) { transformMatrix[ (size+ 1) * j + i] = axisInfo.spaceDirection[j]; } //Image Location transformMatrix[ (i * (size + 1)) + size] = nin->spaceOrigin[i]; //Bottom row of the Transform Matrix transformMatrix[((size + 1) * (size)) + i ] = 0; } transformMatrix[((size + 1) * (size)) + size ] = 1; printMatrix (transformMatrix, 4); } Nrrd * loadNrrdFile(char * filename) { /* create a nrrd; at this point this is just an empty container */ Nrrd * nin; nin = nrrdNew(); char *err; /* read in the nrrd from file */ if (nrrdLoad(nin, filename, NULL)) { err = biffGetDone(NRRD); fprintf(stderr, "Mip: trouble reading \"%s\":\n%s", filename, err); free(err); return NULL; } /* say something about the array printf("Mip: \"%s\" is a %d-dimensional nrrd of type %d (%s)\n", filename, nin->dim, nin->type, airEnumStr(nrrdType, nin->type)); printf("Mip: the array contains %d elements, each %d bytes in size\n", (int)nrrdElementNumber(nin), (int)nrrdElementSize(nin));*/ return nin; } int exe_MIP_Kernel(Nrrd * nin, float stepSize, cl_float4 eyeVec, cl_float4 origVec, cl_float4 cVec, cl_float4 rVec, float * h1, float * h2, float * out) { cl_program program; cl_kernel kernel; cl_command_queue queue; cl_context context; cl_device_id cpu = NULL, device = NULL; cl_int err = 0; cl_float16 transformMatrix; cl_float16 inverseMatrix; int imageDataSize = (int)nrrdElementNumber(nin); float * data = (float *)nin->data; printf("Data Image: %f\n", (float)data[4* nin->axis[1].size * nin->axis[2].size + 5 * nin->axis[2].size + 2]); cl_mem imageData_mem, out_mem, h1_mem, h2_mem; /** Setup Device **/ err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL); assert(err==CL_SUCCESS); err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_GPU,1,&device,NULL); //if(err != CL_SUCCESS) device = cpu; assert(device); /** Retrieve Information about the device cl_char vendor_name[1024] = {0}; cl_char device_name[1024] = {0}; err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size); err|= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size); printf("Connecting to %s %s...\n", vendor_name, device_name); device_stats(device); */ /* Setup Context and Command Queue */ context = clCreateContext(0,1,&device,NULL,NULL,&err); assert(err == CL_SUCCESS); queue = clCreateCommandQueue(context,device,0,NULL); /** Load the Kernel and Program **/ const char * filename = "mip.cl"; char * kernel_source = loadKernel(filename); assert(kernel_source != 0); program = clCreateProgramWithSource(context,1,(const char **)&kernel_source,NULL,&err); assert(err == CL_SUCCESS); err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); /** Retrieve information about the program build to check for any possible errors **/ char * build_log; size_t log_size; // First call to know the proper size clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); build_log = (char *) malloc(log_size+1); // Second call to get the log clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL); build_log[log_size] = '\0'; printf("\nBuild Log:\n%s\n",build_log); free(build_log); assert(err == CL_SUCCESS); kernel = clCreateKernel(program,"raycast",&err); assert(err == CL_SUCCESS); /** Memory Allocation for the Matrices **/ h1_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL); err |= clEnqueueWriteBuffer(queue,h1_mem,CL_TRUE,0,sizeof(float) * 4, (void *)h1 ,0,NULL,NULL); h2_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL); err |= clEnqueueWriteBuffer(queue,h2_mem,CL_TRUE,0,sizeof(float) * 4, (void *)h2 ,0,NULL,NULL); imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL); err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize, nin->data ,0,NULL,NULL); //Load the transformMatrix loadTransformMatrix(nin,transformMatrix); invMatrix(transformMatrix,inverseMatrix); printf("Inverse\n"); printMatrix(inverseMatrix, 4); err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,imageDataSize, nin->data ,0,NULL,NULL); assert(err == CL_SUCCESS); out_mem = clCreateBuffer(context,CL_MEM_READ_WRITE, sizeof(float) * (SIZE *SIZE),NULL,NULL); clFinish(queue); size_t global_work_size[2], local_work_size[2]; global_work_size[0] = 256; global_work_size[1] = 256; local_work_size[0] = 1; local_work_size[1] = 1; err =clSetKernelArg(kernel,0,sizeof(cl_mem), &imageData_mem); err |=clSetKernelArg(kernel,1,sizeof(cl_mem), &h1_mem); err |=clSetKernelArg(kernel,2,sizeof(cl_mem), &h2_mem); err |=clSetKernelArg(kernel,3,sizeof(cl_mem), &out_mem); err |=clSetKernelArg(kernel,4,sizeof(cl_float4), origVec); err |=clSetKernelArg(kernel,5,sizeof(cl_float4), eyeVec); err |=clSetKernelArg(kernel,6,sizeof(cl_float4), cVec); err |=clSetKernelArg(kernel,7,sizeof(cl_float4), rVec); err |=clSetKernelArg(kernel,8,sizeof(cl_float16), &inverseMatrix); err |=clSetKernelArg(kernel,9,sizeof(float), &stepSize); err |=clSetKernelArg(kernel,10,sizeof(int), &nin->axis[1].size); err |=clSetKernelArg(kernel,11,sizeof(int), &nin->axis[2].size); err |=clSetKernelArg(kernel,12,sizeof(int), &nin->axis[0].size); printf("Error: %d\n",err); assert(err == CL_SUCCESS); /** Retrieve the Recommend Work Group Size */ size_t thread_size; clGetKernelWorkGroupInfo(kernel,device,CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),&thread_size,NULL); printf("Recommended Size: %lu\n",thread_size); err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size, local_work_size,0,NULL,NULL); assert(err == CL_SUCCESS); clFinish(queue); err = clEnqueueReadBuffer(queue,out_mem,CL_TRUE,0, sizeof(float) * (SIZE *SIZE),out,0,NULL,NULL); saveResults(out,SIZE * SIZE); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); clReleaseMemObject(imageData_mem); clReleaseMemObject(h1_mem); clReleaseMemObject(h2_mem); clReleaseMemObject(out_mem); return CL_SUCCESS; } #define VOX1_Z int main (int argc, char ** argv) { //Declaring and initializing input variables Nrrd * nin; #ifdef TXS char * dataFile = "txs.nrrd"; cl_float4 eyeVector = {25,15,10, 1}; cl_float4 origVector = {8.83877,2.5911,7.65275}; cl_float4 cVector = {-0.0151831,0.0278357,0}; cl_float4 rVector = {0.0074887,0.00408474,-0.0305383}; #endif #ifdef VOX1_X char * dataFile = "../../data/vox1.nrrd"; cl_float4 eyeVector = {-8,2,2, 1}; cl_float4 origVector = {0,3.4036,3.4036}; cl_float4 cVector = {0,-0.014036,0}; cl_float4 rVector = {0,0,-0.014036}; #endif #ifdef VOX1_Y char * dataFile = "../../data/vox1.nrrd"; cl_float4 eyeVector = {2,-8,2, 1}; cl_float4 origVector = {0.596402,0,3.4036, 1}; cl_float4 cVector = {0.014036,0,0}; cl_float4 rVector = {0,0,-0.014036}; #endif #ifdef VOX1_Z char * dataFile = "../../data/vox1-11.nrrd"; cl_float4 eyeVector = {2,2,-8, 1}; cl_float4 origVector = {3.4036,3.4036,0, 1}; cl_float4 cVector = {-0.014036,0,0,0}; cl_float4 rVector = {0,-0.014036,0}; #endif float stepSize = 0.1; float h1[] = {0.666667,0,-1,0.5}; float h2[] = {1.33333, -2, 1,-0.166667}; float * out; out = (float *) malloc(sizeof(float) * (SIZE * SIZE)); nin = loadNrrdFile(dataFile); exe_MIP_Kernel(nin,stepSize,eyeVector,origVector, cVector,rVector,h1,h2,out); return 0; }
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |