Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Diff of /branches/pure-cfg/test/MIP/mip_opencl.c
ViewVC logotype

Diff of /branches/pure-cfg/test/MIP/mip_opencl.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 203, Tue Aug 3 14:46:27 2010 UTC revision 310, Tue Aug 17 18:52:00 2010 UTC
# Line 1  Line 1 
1  /**  /* mip_opencl.c
2     *
3     * COPYRIGHT (c) 2010 The Diderot Project (http://diderot.cs.uchicago.edu)
4     * All rights reserved.
5     *
6     * An OpenCL mip implementation.
7   *   *
8   * To View the Image   * To View the Image
9   * =========================   * =========================
10   * ./unu reshape -i mip.txt -s 200 200 | ./unu quantize -b 8 -o new.png   * unu reshape -i mip.txt -s 200 200 | unu quantize -b 8 -o new.png
11   */   */
12  #include <OpenCL/OpenCl.h>  #include <OpenCL/OpenCl.h>
13  #include <assert.h>  #include <assert.h>
# Line 10  Line 15 
15  #include <stdlib.h>  #include <stdlib.h>
16  #include <sys/sysctl.h>  #include <sys/sysctl.h>
17  #include <sys/stat.h>  #include <sys/stat.h>
   
18  #include <teem/nrrd.h>  #include <teem/nrrd.h>
19    
20  #define SIZE 200  #define SIZE 640
   
 /*typedef float vec3[3];  
   
 typedef struct {  
         int degree;  
         float coeff[];  
 } polynomial;  
   
 typedef struct {  
         int support;  
         polynomial *segments[];  
 } kernel; */  
   
   
 int device_stats(cl_device_id device_id){  
   
         int err,i;  
         size_t returned_size;  
   
         // Report the device vendor and device name  
    //  
    cl_char vendor_name[1024] = {0};  
    cl_char device_name[1024] = {0};  
         cl_char device_profile[1024] = {0};  
         cl_char device_extensions[1024] = {0};  
         cl_device_local_mem_type local_mem_type;  
   
    cl_ulong global_mem_size, global_mem_cache_size;  
         cl_ulong max_mem_alloc_size;  
   
         cl_uint clock_frequency, vector_width, max_compute_units;  
   
         size_t max_work_item_dims,max_work_group_size, max_work_item_sizes[3];  
   
         cl_uint vector_types[] = {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE};  
         char *vector_type_names[] = {"char","short","int","long","float","double"};  
   
         err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);  
    err|= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(device_profile), device_profile, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(device_extensions), device_extensions, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(global_mem_cache_size), &global_mem_cache_size, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_work_item_dims), &max_work_item_dims, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), max_work_item_sizes, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, &returned_size);  
   
         printf("Vendor: %s\n", vendor_name);  
         printf("Device Name: %s\n", device_name);  
         printf("Profile: %s\n", device_profile);  
         printf("Supported Extensions: %s\n\n", device_extensions);  
   
         printf("Local Mem Type (Local=1, Global=2): %i\n",(int)local_mem_type);  
         printf("Global Mem Size (MB): %i\n",(int)global_mem_size/(1024*1024));  
         printf("Global Mem Cache Size (Bytes): %i\n",(int)global_mem_cache_size);  
         printf("Max Mem Alloc Size (MB): %ld\n",(long int)max_mem_alloc_size/(1024*1024));  
   
         printf("Clock Frequency (MHz): %i\n\n",clock_frequency);  
   
         for(i=0;i<6;i++){  
                 err|= clGetDeviceInfo(device_id, vector_types[i], sizeof(clock_frequency), &vector_width, &returned_size);  
                 printf("Vector type width for: %s = %i\n",vector_type_names[i],vector_width);  
         }  
   
         printf("\nMax Work Group Size: %lu\n",max_work_group_size);  
         //printf("Max Work Item Dims: %lu\n",max_work_item_dims);  
         //for(size_t i=0;i<max_work_item_dims;i++)  
         //      printf("Max Work Items in Dim %lu: %lu\n",(long unsigned)(i+1),(long unsigned)max_work_item_sizes[i]);  
   
         printf("Max Compute Units: %i\n",max_compute_units);  
         printf("\n");  
21    
         return CL_SUCCESS;  
 }  
22  //Loads the Kernel from a file  //Loads the Kernel from a file
23  char * loadKernel (const char * filename)  char * loadKernel (const char * filename)
24  {  {
# Line 130  Line 51 
51      for(i = 0; i < size; i++)      for(i = 0; i < size; i++)
52      {      {
53          if(matrix[i] == -INFINITY || matrix[i] < 0)          if(matrix[i] == -INFINITY || matrix[i] < 0)
54             fprintf(out_file,"%f\n",0.0f);             fprintf(out_file,"%.4f\n",0.0f);
55          else          else
56             fprintf(out_file,"%f\n",matrix[i]);             fprintf(out_file,"%.4f\n",matrix[i]);
57    
58           if(matrix[i] > max)           if(matrix[i] > max)
59                  max = matrix[i];                  max = matrix[i];
60    
61     }     }
    printf("Max: %f\n",max);  
62     fclose(out_file);     fclose(out_file);
63    
64    
# Line 245  Line 165 
165  {  {
166          int index = 0, end = 1, arraySize = rowSize * rowSize;          int index = 0, end = 1, arraySize = rowSize * rowSize;
167    
168          for(index = 1000; index < 1256; index++)          for(index = 0; index < arraySize; index++)
169          {          {
170                  if(end == 16)                  if(end == rowSize)
171                  {                  {
172                          printf(" %.2f\n",matrix[index]);                          printf(" %.2f\n",matrix[index]);
173                          end = 1;                          end = 1;
# Line 281  Line 201 
201              transformMatrix[((size + 1) * (size)) + i ] = 0;              transformMatrix[((size + 1) * (size)) + i ] = 0;
202          }          }
203          transformMatrix[((size + 1) * (size)) + size ] = 1;          transformMatrix[((size + 1) * (size)) + size ] = 1;
204    
205  }  }
206  Nrrd * loadNrrdFile(char * filename)  Nrrd * loadNrrdFile(char * filename)
207  {  {
# Line 308  Line 229 
229    return nin;    return nin;
230    
231  }  }
232  int exe_MIP_Kernel(Nrrd * nin, float stepSize, cl_float4 eyeVec, cl_float4 origVec,  int exe_MIP_Kernel(float * img, int imageDataSize, float inverseMatrix[16], int sAxis[3], float * out)
                                   cl_float4 cVec, cl_float4 rVec, float * h1, float * h2, float * out)  
233  {  {
234    
235          cl_program program;          cl_program program;
# Line 322  Line 242 
242    
243          cl_int err = 0;          cl_int err = 0;
244    
         cl_float16 transformMatrix;  
         cl_float16 inverseMatrix;  
   
         int imageDataSize =  (int)nrrdElementNumber(nin);  
   
         float * data = (float *)nin->data;  
         printf("Data Image: %f\n", (float)data[4* nin->axis[1].size * nin->axis[2].size + 5 * nin->axis[2].size + 2]);  
   
         cl_mem imageData_mem, out_mem, h1_mem, h2_mem;  
245    
246            cl_mem imageData_mem, out_mem,sAxis_mem;
247    
248          /** Setup Device **/          /** Setup Device **/
249          err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL);          err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL);
# Line 343  Line 255 
255    
256          assert(device);          assert(device);
257    
   
         /** Retrieve Information about the device  
         cl_char vendor_name[1024] = {0};  
     cl_char device_name[1024] = {0};  
     err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);  
     err|= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);  
         printf("Connecting to %s %s...\n", vendor_name, device_name);  
         device_stats(device); */  
   
   
258          /* Setup Context and Command Queue */          /* Setup Context and Command Queue */
259          context = clCreateContext(0,1,&device,NULL,NULL,&err);          context = clCreateContext(0,1,&device,NULL,NULL,&err);
260          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
# Line 387  Line 289 
289    
290      assert(err == CL_SUCCESS);      assert(err == CL_SUCCESS);
291    
292          kernel = clCreateKernel(program,"raycast",&err);          kernel = clCreateKernel(program,"mip",&err);
293    
294          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
295    
296          /** Memory Allocation for the Matrices **/          /** Memory Allocation for the Matrices **/
297    
         h1_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL);  
         err |= clEnqueueWriteBuffer(queue,h1_mem,CL_TRUE,0,sizeof(float) * 4,  
                                                                 (void *)h1 ,0,NULL,NULL);  
   
         h2_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL);  
         err |= clEnqueueWriteBuffer(queue,h2_mem,CL_TRUE,0,sizeof(float) * 4,  
                                                                 (void *)h2 ,0,NULL,NULL);  
   
   
298          imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL);          imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL);
299          err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize,          err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize,
300                                                                  nin->data ,0,NULL,NULL);                                                                  (void *)img ,0,NULL,NULL);
   
         //Load the transformMatrix  
         loadTransformMatrix(nin,transformMatrix);  
         invMatrix(transformMatrix,inverseMatrix);  
301    
302          err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,imageDataSize,          sAxis_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(int) * 3,NULL,NULL);
303                                                                  nin->data ,0,NULL,NULL);          err |= clEnqueueWriteBuffer(queue,sAxis_mem,CL_TRUE,0,sizeof(int) * 3,
304                                                                    (void *)sAxis ,0,NULL,NULL);
305    
306          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
307    
# Line 421  Line 311 
311    
312          size_t global_work_size[2], local_work_size[2];          size_t global_work_size[2], local_work_size[2];
313    
314          global_work_size[0] = 256;          global_work_size[0] = SIZE;
315          global_work_size[1] = 256;          global_work_size[1] = SIZE;
316    
317          local_work_size[0] = 1;          local_work_size[0] = 1;
318          local_work_size[1] = 1;          local_work_size[1] = 1;
319    
320          err  =clSetKernelArg(kernel,0,sizeof(cl_mem), &imageData_mem);          cl_int2 workDim = {SIZE,SIZE};
         err |=clSetKernelArg(kernel,1,sizeof(cl_mem), &h1_mem);  
         err |=clSetKernelArg(kernel,2,sizeof(cl_mem), &h2_mem);  
         err |=clSetKernelArg(kernel,3,sizeof(cl_mem), &out_mem);  
         err |=clSetKernelArg(kernel,4,sizeof(cl_float4), origVec);  
         err |=clSetKernelArg(kernel,5,sizeof(cl_float4), eyeVec);  
         err |=clSetKernelArg(kernel,6,sizeof(cl_float4), cVec);  
         err |=clSetKernelArg(kernel,7,sizeof(cl_float4), rVec);  
         err |=clSetKernelArg(kernel,8,sizeof(cl_float16), &inverseMatrix);  
         err |=clSetKernelArg(kernel,9,sizeof(float), &stepSize);  
         err |=clSetKernelArg(kernel,10,sizeof(int), &nin->axis[1].size);  
         err |=clSetKernelArg(kernel,11,sizeof(int), &nin->axis[2].size);  
         err |=clSetKernelArg(kernel,12,sizeof(int), &nin->axis[0].size);  
321    
322          printf("Error: %d\n",err);          int index = 0;
         assert(err == CL_SUCCESS);  
323    
324          /** Retrieve the Recommend Work Group Size */          err  =clSetKernelArg(kernel,index++,sizeof(cl_mem), &imageData_mem);
325          size_t thread_size;          err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &out_mem);
326          clGetKernelWorkGroupInfo(kernel,device,CL_KERNEL_WORK_GROUP_SIZE,          err |=clSetKernelArg(kernel,index++,sizeof(cl_float16), inverseMatrix);
327                                                           sizeof(size_t),&thread_size,NULL);          err |=clSetKernelArg(kernel,index++,sizeof(cl_int2), &workDim);
328          printf("Recommended Size: %lu\n",thread_size);          err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &sAxis_mem);
329    
330            assert(err == CL_SUCCESS);
331    
332    
333          err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size,          err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size,
# Line 466  Line 345 
345          clReleaseProgram(program);          clReleaseProgram(program);
346          clReleaseCommandQueue(queue);          clReleaseCommandQueue(queue);
347          clReleaseContext(context);          clReleaseContext(context);
   
348          clReleaseMemObject(imageData_mem);          clReleaseMemObject(imageData_mem);
         clReleaseMemObject(h1_mem);  
         clReleaseMemObject(h2_mem);  
349          clReleaseMemObject(out_mem);          clReleaseMemObject(out_mem);
350    
351          return CL_SUCCESS;          return CL_SUCCESS;
352  }  }
353    
354  int main (int argc, char ** argv)  int main (int argc, char ** argv)
355  {  {
356          //Declaring and initializing input variables          char * dataFile = "../../data/txs.nrrd";
357          Nrrd * nin;          float transformMatrix[16];
358          char * dataFile = "txs.nrrd";          float inverseMatrix[16];
359          cl_float4 eyeVector = {25,15,10};  
360          cl_float4 origVector = {8.83877,2.5911,7.65275};          if (argc == 2) {
361          cl_float4 cVector = {-0.0151831,0.0278357,0};              dataFile = argv[1];
362          cl_float4 rVector = {0.0074887,0.00408474,-0.0305383};          }
         float stepSize = 0.1;  
         float h1[] = {0.666667,0,-1,0.5};  
         float h2[] = {1.33333, -2, 1,-0.166667};  
         float * out;  
363    
364            float * out;
365          out = (float *) malloc(sizeof(float) * (SIZE * SIZE));          out = (float *) malloc(sizeof(float) * (SIZE * SIZE));
366            Nrrd * nin = loadNrrdFile(dataFile);
367    
368          nin = loadNrrdFile(dataFile);          int sAxis[] = {nin->axis[0].size, nin->axis[1].size,nin->axis[2].size};
369    
370            loadTransformMatrix(nin,transformMatrix);
371            invMatrix(transformMatrix,inverseMatrix);
372    
         exe_MIP_Kernel(nin,stepSize,eyeVector,origVector,  
                                                         cVector,rVector,h1,h2,out);  
373    
374            exe_MIP_Kernel((float *)nin->data, (int)nrrdElementNumber(nin),inverseMatrix, sAxis, out);
375    
376          return 0;          return 0;
377  }  }

Legend:
Removed from v.203  
changed lines
  Added in v.310

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0