Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Diff of /trunk/test/MIP/mip_opencl.c
ViewVC logotype

Diff of /trunk/test/MIP/mip_opencl.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 301, Sun Aug 15 23:13:20 2010 UTC revision 302, Tue Aug 17 03:52:21 2010 UTC
# Line 1  Line 1 
1  /**  /* mip_opencl.c
2     *
3     * COPYRIGHT (c) 2010 The Diderot Project (http://diderot.cs.uchicago.edu)
4     * All rights reserved.
5     *
6     * An OpenCL mip implementation.
7   *   *
8   * To View the Image   * To View the Image
9   * =========================   * =========================
# Line 10  Line 15 
15  #include <stdlib.h>  #include <stdlib.h>
16  #include <sys/sysctl.h>  #include <sys/sysctl.h>
17  #include <sys/stat.h>  #include <sys/stat.h>
   
18  #include <teem/nrrd.h>  #include <teem/nrrd.h>
19    
20  #define SIZE 201  #define SIZE 200
   
 /*typedef float vec3[3];  
   
 typedef struct {  
         int degree;  
         float coeff[];  
 } polynomial;  
   
 typedef struct {  
         int support;  
         polynomial *segments[];  
 } kernel; */  
   
   
 int device_stats(cl_device_id device_id){  
   
         int err,i;  
         size_t returned_size;  
   
         // Report the device vendor and device name  
    //  
    cl_char vendor_name[1024] = {0};  
    cl_char device_name[1024] = {0};  
         cl_char device_profile[1024] = {0};  
         cl_char device_extensions[1024] = {0};  
         cl_device_local_mem_type local_mem_type;  
   
    cl_ulong global_mem_size, global_mem_cache_size;  
         cl_ulong max_mem_alloc_size;  
   
         cl_uint clock_frequency, vector_width, max_compute_units;  
   
         size_t max_work_item_dims,max_work_group_size, max_work_item_sizes[3];  
   
         cl_uint vector_types[] = {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE};  
         char *vector_type_names[] = {"char","short","int","long","float","double"};  
   
         err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);  
    err|= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(device_profile), device_profile, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(device_extensions), device_extensions, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(global_mem_cache_size), &global_mem_cache_size, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_work_item_dims), &max_work_item_dims, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), max_work_item_sizes, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, &returned_size);  
   
         printf("Vendor: %s\n", vendor_name);  
         printf("Device Name: %s\n", device_name);  
         printf("Profile: %s\n", device_profile);  
         printf("Supported Extensions: %s\n\n", device_extensions);  
   
         printf("Local Mem Type (Local=1, Global=2): %i\n",(int)local_mem_type);  
         printf("Global Mem Size (MB): %i\n",(int)global_mem_size/(1024*1024));  
         printf("Global Mem Cache Size (Bytes): %i\n",(int)global_mem_cache_size);  
         printf("Max Mem Alloc Size (MB): %ld\n",(long int)max_mem_alloc_size/(1024*1024));  
   
         printf("Clock Frequency (MHz): %i\n\n",clock_frequency);  
   
         for(i=0;i<6;i++){  
                 err|= clGetDeviceInfo(device_id, vector_types[i], sizeof(clock_frequency), &vector_width, &returned_size);  
                 printf("Vector type width for: %s = %i\n",vector_type_names[i],vector_width);  
         }  
   
         printf("\nMax Work Group Size: %lu\n",max_work_group_size);  
         //printf("Max Work Item Dims: %lu\n",max_work_item_dims);  
         //for(size_t i=0;i<max_work_item_dims;i++)  
         //      printf("Max Work Items in Dim %lu: %lu\n",(long unsigned)(i+1),(long unsigned)max_work_item_sizes[i]);  
   
         printf("Max Compute Units: %i\n",max_compute_units);  
         printf("\n");  
21    
         return CL_SUCCESS;  
 }  
22  //Loads the Kernel from a file  //Loads the Kernel from a file
23  char * loadKernel (const char * filename)  char * loadKernel (const char * filename)
24  {  {
# Line 130  Line 51 
51      for(i = 0; i < size; i++)      for(i = 0; i < size; i++)
52      {      {
53          if(matrix[i] == -INFINITY || matrix[i] < 0)          if(matrix[i] == -INFINITY || matrix[i] < 0)
54             fprintf(out_file,"%f\n",0.0f);             fprintf(out_file,"%.4f\n",0.0f);
55          else          else
56             fprintf(out_file,"%f\n",matrix[i]);             fprintf(out_file,"%.4f\n",matrix[i]);
57    
58           if(matrix[i] > max)           if(matrix[i] > max)
59                  max = matrix[i];                  max = matrix[i];
# Line 282  Line 203 
203          }          }
204          transformMatrix[((size + 1) * (size)) + size ] = 1;          transformMatrix[((size + 1) * (size)) + size ] = 1;
205    
 printMatrix (transformMatrix, 4);  
206  }  }
207  Nrrd * loadNrrdFile(char * filename)  Nrrd * loadNrrdFile(char * filename)
208  {  {
# Line 310  Line 230 
230    return nin;    return nin;
231    
232  }  }
233  int exe_MIP_Kernel(Nrrd * nin, float stepSize, cl_float4 eyeVec, cl_float4 origVec,  int exe_MIP_Kernel(float * img, int imageDataSize, float inverseMatrix[16], int sAxis[3], float * out)
                                   cl_float4 cVec, cl_float4 rVec, float * h1, float * h2, float * out)  
234  {  {
235    
236          cl_program program;          cl_program program;
# Line 324  Line 243 
243    
244          cl_int err = 0;          cl_int err = 0;
245    
         cl_float16 transformMatrix;  
         cl_float16 inverseMatrix;  
   
         int imageDataSize =  (int)nrrdElementNumber(nin);  
   
         float * data = (float *)nin->data;  
         printf("Data Image: %f\n", (float)data[4* nin->axis[1].size * nin->axis[2].size + 5 * nin->axis[2].size + 2]);  
   
         cl_mem imageData_mem, out_mem, h1_mem, h2_mem;  
246    
247            cl_mem imageData_mem, out_mem,sAxis_mem;
248    
249          /** Setup Device **/          /** Setup Device **/
250          err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL);          err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL);
# Line 345  Line 256 
256    
257          assert(device);          assert(device);
258    
   
         /** Retrieve Information about the device  
         cl_char vendor_name[1024] = {0};  
     cl_char device_name[1024] = {0};  
     err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);  
     err|= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);  
         printf("Connecting to %s %s...\n", vendor_name, device_name);  
         device_stats(device); */  
   
   
259          /* Setup Context and Command Queue */          /* Setup Context and Command Queue */
260          context = clCreateContext(0,1,&device,NULL,NULL,&err);          context = clCreateContext(0,1,&device,NULL,NULL,&err);
261          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
# Line 389  Line 290 
290    
291      assert(err == CL_SUCCESS);      assert(err == CL_SUCCESS);
292    
293          kernel = clCreateKernel(program,"raycast",&err);          kernel = clCreateKernel(program,"mip",&err);
294    
295          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
296    
297          /** Memory Allocation for the Matrices **/          /** Memory Allocation for the Matrices **/
298    
         h1_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL);  
         err |= clEnqueueWriteBuffer(queue,h1_mem,CL_TRUE,0,sizeof(float) * 4,  
                                                                 (void *)h1 ,0,NULL,NULL);  
   
         h2_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL);  
         err |= clEnqueueWriteBuffer(queue,h2_mem,CL_TRUE,0,sizeof(float) * 4,  
                                                                 (void *)h2 ,0,NULL,NULL);  
   
   
299          imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL);          imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL);
300          err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize,          err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize,
301                                                                  nin->data ,0,NULL,NULL);                                                                  (void *)img ,0,NULL,NULL);
302    
303          //Load the transformMatrix          sAxis_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(int) * 3,NULL,NULL);
304          loadTransformMatrix(nin,transformMatrix);          err |= clEnqueueWriteBuffer(queue,sAxis_mem,CL_TRUE,0,sizeof(int) * 3,
305          invMatrix(transformMatrix,inverseMatrix);                                                                  (void *)sAxis ,0,NULL,NULL);
 printf("Inverse\n"); printMatrix(inverseMatrix, 4);  
   
         err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,imageDataSize,  
                                                                 nin->data ,0,NULL,NULL);  
306    
307          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
308    
# Line 430  Line 318 
318          local_work_size[0] = 1;          local_work_size[0] = 1;
319          local_work_size[1] = 1;          local_work_size[1] = 1;
320    
321          err  =clSetKernelArg(kernel,0,sizeof(cl_mem), &imageData_mem);          cl_int2 workDim = {SIZE,SIZE};
         err |=clSetKernelArg(kernel,1,sizeof(cl_mem), &h1_mem);  
         err |=clSetKernelArg(kernel,2,sizeof(cl_mem), &h2_mem);  
         err |=clSetKernelArg(kernel,3,sizeof(cl_mem), &out_mem);  
         err |=clSetKernelArg(kernel,4,sizeof(cl_float4), origVec);  
         err |=clSetKernelArg(kernel,5,sizeof(cl_float4), eyeVec);  
         err |=clSetKernelArg(kernel,6,sizeof(cl_float4), cVec);  
         err |=clSetKernelArg(kernel,7,sizeof(cl_float4), rVec);  
         err |=clSetKernelArg(kernel,8,sizeof(cl_float16), &inverseMatrix);  
         err |=clSetKernelArg(kernel,9,sizeof(float), &stepSize);  
         err |=clSetKernelArg(kernel,10,sizeof(int), &nin->axis[1].size);  
         err |=clSetKernelArg(kernel,11,sizeof(int), &nin->axis[2].size);  
         err |=clSetKernelArg(kernel,12,sizeof(int), &nin->axis[0].size);  
322    
323          printf("Error: %d\n",err);          int index = 0;
         assert(err == CL_SUCCESS);  
324    
325          /** Retrieve the Recommend Work Group Size */          err  =clSetKernelArg(kernel,index++,sizeof(cl_mem), &imageData_mem);
326          size_t thread_size;          err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &out_mem);
327          clGetKernelWorkGroupInfo(kernel,device,CL_KERNEL_WORK_GROUP_SIZE,          err |=clSetKernelArg(kernel,index++,sizeof(cl_float16), inverseMatrix);
328                                                           sizeof(size_t),&thread_size,NULL);          err |=clSetKernelArg(kernel,index++,sizeof(cl_int2), &workDim);
329          printf("Recommended Size: %lu\n",thread_size);          err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &sAxis_mem);
330    
331            printf("error:%d\n",err);
332            assert(err == CL_SUCCESS);
333    
334    
335          err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size,          err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size,
# Line 469  Line 347 
347          clReleaseProgram(program);          clReleaseProgram(program);
348          clReleaseCommandQueue(queue);          clReleaseCommandQueue(queue);
349          clReleaseContext(context);          clReleaseContext(context);
   
350          clReleaseMemObject(imageData_mem);          clReleaseMemObject(imageData_mem);
         clReleaseMemObject(h1_mem);  
         clReleaseMemObject(h2_mem);  
351          clReleaseMemObject(out_mem);          clReleaseMemObject(out_mem);
352    
353          return CL_SUCCESS;          return CL_SUCCESS;
354  }  }
 #define VOX1_Z  
355  int main (int argc, char ** argv)  int main (int argc, char ** argv)
356  {  {
         //Declaring and initializing input variables  
         Nrrd * nin;  
 #ifdef TXS  
357          char * dataFile = "txs.nrrd";          char * dataFile = "txs.nrrd";
358          cl_float4 eyeVector = {25,15,10, 1};          float transformMatrix[16];
359          cl_float4 origVector = {8.83877,2.5911,7.65275};          float inverseMatrix[16];
         cl_float4 cVector = {-0.0151831,0.0278357,0};  
         cl_float4 rVector = {0.0074887,0.00408474,-0.0305383};  
 #endif  
 #ifdef VOX1_X  
         char * dataFile = "../../data/vox1.nrrd";  
         cl_float4 eyeVector = {-8,2,2, 1};  
         cl_float4 origVector = {0,3.4036,3.4036};  
         cl_float4 cVector = {0,-0.014036,0};  
         cl_float4 rVector = {0,0,-0.014036};  
 #endif  
 #ifdef VOX1_Y  
         char * dataFile = "../../data/vox1.nrrd";  
         cl_float4 eyeVector = {2,-8,2, 1};  
         cl_float4 origVector = {0.596402,0,3.4036, 1};  
         cl_float4 cVector = {0.014036,0,0};  
         cl_float4 rVector = {0,0,-0.014036};  
 #endif  
 #ifdef VOX1_Z  
         char * dataFile = "../../data/vox1-11.nrrd";  
         cl_float4 eyeVector = {2,2,-8, 1};  
         cl_float4 origVector = {3.4036,3.4036,0, 1};  
         cl_float4 cVector = {-0.014036,0,0,0};  
         cl_float4 rVector = {0,-0.014036,0};  
 #endif  
         float stepSize = 0.1;  
         float h1[] = {0.666667,0,-1,0.5};  
         float h2[] = {1.33333, -2, 1,-0.166667};  
         float * out;  
360    
361            float * out;
362          out = (float *) malloc(sizeof(float) * (SIZE * SIZE));          out = (float *) malloc(sizeof(float) * (SIZE * SIZE));
363            Nrrd * nin = loadNrrdFile(dataFile);
364    
365          nin = loadNrrdFile(dataFile);          int sAxis[] = {nin->axis[0].size, nin->axis[1].size,nin->axis[2].size};
366    
367            loadTransformMatrix(nin,transformMatrix);
368            invMatrix(transformMatrix,inverseMatrix);
369    
         exe_MIP_Kernel(nin,stepSize,eyeVector,origVector,  
                                                         cVector,rVector,h1,h2,out);  
370    
371            exe_MIP_Kernel((float *)nin->data, (int)nrrdElementNumber(nin),inverseMatrix, sAxis, out);
372    
373          return 0;          return 0;
374  }  }

Legend:
Removed from v.301  
changed lines
  Added in v.302

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0