Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Diff of /branches/pure-cfg/test/MIP/mip_opencl.c
ViewVC logotype

Diff of /branches/pure-cfg/test/MIP/mip_opencl.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/test/MIP/mip_opencl.c revision 191, Mon Aug 2 14:05:11 2010 UTC branches/pure-cfg/test/MIP/mip_opencl.c revision 477, Sat Nov 13 16:02:07 2010 UTC
# Line 1  Line 1 
1    /* mip_opencl.c
2     *
3     * COPYRIGHT (c) 2010 The Diderot Project (http://diderot-language.cs.uchicago.edu)
4     * All rights reserved.
5     *
6     * An OpenCL mip implementation.
7     *
8     * To View the Image
9     * =========================
10     * unu reshape -i mip.txt -s 200 200 | unu quantize -b 8 -o new.png
11     */
12  #include <OpenCL/OpenCl.h>  #include <OpenCL/OpenCl.h>
13  #include <assert.h>  #include <assert.h>
14  #include <stdio.h>  #include <stdio.h>
15  #include <stdlib.h>  #include <stdlib.h>
16  #include <sys/sysctl.h>  #include <sys/sysctl.h>
17  #include <sys/stat.h>  #include <sys/stat.h>
   
18  #include <teem/nrrd.h>  #include <teem/nrrd.h>
19    
20  #define SIZE 200  #define SIZE 640
   
 /*typedef float vec3[3];  
   
 typedef struct {  
         int degree;  
         float coeff[];  
 } polynomial;  
   
 typedef struct {  
         int support;  
         polynomial *segments[];  
 } kernel; */  
   
   
 int device_stats(cl_device_id device_id){  
   
         int err,i;  
         size_t returned_size;  
   
         // Report the device vendor and device name  
    //  
    cl_char vendor_name[1024] = {0};  
    cl_char device_name[1024] = {0};  
         cl_char device_profile[1024] = {0};  
         cl_char device_extensions[1024] = {0};  
         cl_device_local_mem_type local_mem_type;  
   
    cl_ulong global_mem_size, global_mem_cache_size;  
         cl_ulong max_mem_alloc_size;  
   
         cl_uint clock_frequency, vector_width, max_compute_units;  
   
         size_t max_work_item_dims,max_work_group_size, max_work_item_sizes[3];  
   
         cl_uint vector_types[] = {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE};  
         char *vector_type_names[] = {"char","short","int","long","float","double"};  
   
         err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);  
    err|= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(device_profile), device_profile, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(device_extensions), device_extensions, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(global_mem_cache_size), &global_mem_cache_size, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_work_item_dims), &max_work_item_dims, &returned_size);  
21    
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), max_work_item_sizes, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, &returned_size);  
   
         printf("Vendor: %s\n", vendor_name);  
         printf("Device Name: %s\n", device_name);  
         printf("Profile: %s\n", device_profile);  
         printf("Supported Extensions: %s\n\n", device_extensions);  
   
         printf("Local Mem Type (Local=1, Global=2): %i\n",(int)local_mem_type);  
         printf("Global Mem Size (MB): %i\n",(int)global_mem_size/(1024*1024));  
         printf("Global Mem Cache Size (Bytes): %i\n",(int)global_mem_cache_size);  
         printf("Max Mem Alloc Size (MB): %ld\n",(long int)max_mem_alloc_size/(1024*1024));  
   
         printf("Clock Frequency (MHz): %i\n\n",clock_frequency);  
   
         for(i=0;i<6;i++){  
                 err|= clGetDeviceInfo(device_id, vector_types[i], sizeof(clock_frequency), &vector_width, &returned_size);  
                 printf("Vector type width for: %s = %i\n",vector_type_names[i],vector_width);  
         }  
   
         printf("\nMax Work Group Size: %lu\n",max_work_group_size);  
         //printf("Max Work Item Dims: %lu\n",max_work_item_dims);  
         //for(size_t i=0;i<max_work_item_dims;i++)  
         //      printf("Max Work Items in Dim %lu: %lu\n",(long unsigned)(i+1),(long unsigned)max_work_item_sizes[i]);  
   
         printf("Max Compute Units: %i\n",max_compute_units);  
         printf("\n");  
   
         return CL_SUCCESS;  
 }  
22  //Loads the Kernel from a file  //Loads the Kernel from a file
23  char * loadKernel (const char * filename)  char * loadKernel (const char * filename)
24  {  {
# Line 110  Line 37 
37    
38          return source;          return source;
39  }  }
40    void saveResults (float * matrix, int size)
41    {
42            int i;
43            float max = -INFINITY;
44            FILE * out_file;
45            out_file = fopen("mip.txt", "w");
46        if (out_file == NULL) {
47            fprintf(stderr,"Can not open output file\n");
48            exit (8);
49        }
50    
51        for(i = 0; i < size; i++)
52        {
53            if(matrix[i] == -INFINITY || matrix[i] < 0)
54               fprintf(out_file,"%.4f\n",0.0f);
55            else
56               fprintf(out_file,"%.4f\n",matrix[i]);
57    
58             if(matrix[i] > max)
59                    max = matrix[i];
60    
61       }
62       fclose(out_file);
63    
64    
65    }
66  float det3x3(float a, float b, float c, float d, float e, float f, float g, float h, float i)  float det3x3(float a, float b, float c, float d, float e, float f, float g, float h, float i)
67  {  {
68     return ( (a)*(e)*(i)     return ( (a)*(e)*(i)
# Line 248  Line 201 
201              transformMatrix[((size + 1) * (size)) + i ] = 0;              transformMatrix[((size + 1) * (size)) + i ] = 0;
202          }          }
203          transformMatrix[((size + 1) * (size)) + size ] = 1;          transformMatrix[((size + 1) * (size)) + size ] = 1;
204    
205  }  }
206  Nrrd * loadNrrdFile(char * filename)  Nrrd * loadNrrdFile(char * filename)
207  {  {
# Line 275  Line 229 
229    return nin;    return nin;
230    
231  }  }
232  int exe_MIP_Kernel(Nrrd * nin, float stepSize, cl_float4 eyeVec, cl_float4 origVec,  int exe_MIP_Kernel(float * img, int imageDataSize, float inverseMatrix[16], int sAxis[3], float * out)
                                   cl_float4 cVec, cl_float4 rVec, float * h1, float * h2, float * out)  
233  {  {
234    
235          cl_program program;          cl_program program;
# Line 289  Line 242 
242    
243          cl_int err = 0;          cl_int err = 0;
244    
         cl_float16 transformMatrix;  
         cl_float16 inverseMatrix;  
   
         int imageDataSize =  (int)nrrdElementNumber(nin);  
   
         float * data = (float *)nin->data;  
         printf("Data Image: %f\n", (float)data[4* nin->axis[1].size * nin->axis[2].size + 5 * nin->axis[2].size + 2]);  
   
         cl_mem imageData_mem, out_mem, h1_mem, h2_mem;  
245    
246            cl_mem imageData_mem, out_mem,sAxis_mem;
247    
248          /** Setup Device **/          /** Setup Device **/
249          err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL);          err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL);
# Line 310  Line 255 
255    
256          assert(device);          assert(device);
257    
   
         /** Retrieve Information about the device  
         cl_char vendor_name[1024] = {0};  
     cl_char device_name[1024] = {0};  
     err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);  
     err|= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);  
         printf("Connecting to %s %s...\n", vendor_name, device_name);  
         device_stats(device); */  
   
   
258          /* Setup Context and Command Queue */          /* Setup Context and Command Queue */
259          context = clCreateContext(0,1,&device,NULL,NULL,&err);          context = clCreateContext(0,1,&device,NULL,NULL,&err);
260          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
# Line 354  Line 289 
289    
290      assert(err == CL_SUCCESS);      assert(err == CL_SUCCESS);
291    
292          kernel = clCreateKernel(program,"raycast",&err);          kernel = clCreateKernel(program,"mip",&err);
293    
294          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
295    
296          /** Memory Allocation for the Matrices **/          /** Memory Allocation for the Matrices **/
297    
         h1_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL);  
         err |= clEnqueueWriteBuffer(queue,h1_mem,CL_TRUE,0,sizeof(float) * 4,  
                                                                 (void *)h1 ,0,NULL,NULL);  
   
         h2_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL);  
         err |= clEnqueueWriteBuffer(queue,h2_mem,CL_TRUE,0,sizeof(float) * 4,  
                                                                 (void *)h2 ,0,NULL,NULL);  
   
   
298          imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL);          imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL);
299          err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize,          err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize,
300                                                                  nin->data ,0,NULL,NULL);                                                                  (void *)img ,0,NULL,NULL);
301    
302          //Load the transformMatrix          sAxis_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(int) * 3,NULL,NULL);
303          loadTransformMatrix(nin,transformMatrix);          err |= clEnqueueWriteBuffer(queue,sAxis_mem,CL_TRUE,0,sizeof(int) * 3,
304          invMatrix(transformMatrix,inverseMatrix);                                                                  (void *)sAxis ,0,NULL,NULL);
   
         err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,imageDataSize,  
                                                                 nin->data ,0,NULL,NULL);  
305    
306          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
307    
# Line 388  Line 311 
311    
312          size_t global_work_size[2], local_work_size[2];          size_t global_work_size[2], local_work_size[2];
313    
314          global_work_size[0] = 256;          global_work_size[0] = SIZE;
315          global_work_size[1] = 256;          global_work_size[1] = SIZE;
316    
317          local_work_size[0] = 1;          local_work_size[0] = 1;
318          local_work_size[1] = 1;          local_work_size[1] = 1;
319    
320          err  =clSetKernelArg(kernel,0,sizeof(cl_mem), &imageData_mem);          cl_int2 workDim = {SIZE,SIZE};
         err |=clSetKernelArg(kernel,1,sizeof(cl_mem), &h1_mem);  
         err |=clSetKernelArg(kernel,2,sizeof(cl_mem), &h2_mem);  
         err |=clSetKernelArg(kernel,3,sizeof(cl_mem), &out_mem);  
         err |=clSetKernelArg(kernel,4,sizeof(cl_float4), origVec);  
         err |=clSetKernelArg(kernel,5,sizeof(cl_float4), eyeVec);  
         err |=clSetKernelArg(kernel,6,sizeof(cl_float4), cVec);  
         err |=clSetKernelArg(kernel,7,sizeof(cl_float4), rVec);  
         err |=clSetKernelArg(kernel,8,sizeof(cl_float16), &inverseMatrix);  
         err |=clSetKernelArg(kernel,9,sizeof(float), &stepSize);  
         err |=clSetKernelArg(kernel,10,sizeof(int), &nin->axis[1].size);  
         err |=clSetKernelArg(kernel,11,sizeof(int), &nin->axis[2].size);  
321    
322          assert(err == CL_SUCCESS);          int index = 0;
323    
324          /** Retrieve the Recommend Work Group Size */          err  =clSetKernelArg(kernel,index++,sizeof(cl_mem), &imageData_mem);
325          size_t thread_size;          err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &out_mem);
326          clGetKernelWorkGroupInfo(kernel,device,CL_KERNEL_WORK_GROUP_SIZE,          err |=clSetKernelArg(kernel,index++,sizeof(cl_float16), inverseMatrix);
327                                                           sizeof(size_t),&thread_size,NULL);          err |=clSetKernelArg(kernel,index++,sizeof(cl_int2), &workDim);
328          printf("Recommended Size: %lu\n",thread_size);          err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &sAxis_mem);
329    
330            assert(err == CL_SUCCESS);
331    
332    
333          err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size,          err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size,
# Line 425  Line 339 
339    
340          err = clEnqueueReadBuffer(queue,out_mem,CL_TRUE,0, sizeof(float) * (SIZE *SIZE),out,0,NULL,NULL);          err = clEnqueueReadBuffer(queue,out_mem,CL_TRUE,0, sizeof(float) * (SIZE *SIZE),out,0,NULL,NULL);
341    
342          printMatrix(out,20);          saveResults(out,SIZE * SIZE);
343    
344          clReleaseKernel(kernel);          clReleaseKernel(kernel);
345          clReleaseProgram(program);          clReleaseProgram(program);
346          clReleaseCommandQueue(queue);          clReleaseCommandQueue(queue);
347          clReleaseContext(context);          clReleaseContext(context);
   
348          clReleaseMemObject(imageData_mem);          clReleaseMemObject(imageData_mem);
         clReleaseMemObject(h1_mem);  
         clReleaseMemObject(h2_mem);  
349          clReleaseMemObject(out_mem);          clReleaseMemObject(out_mem);
350    
351          return CL_SUCCESS;          return CL_SUCCESS;
352  }  }
353    
354  int main (int argc, char ** argv)  int main (int argc, char ** argv)
355  {  {
356          //Declaring and initializing input variables          char * dataFile = "../../data/txs.nrrd";
357          Nrrd * nin;          float transformMatrix[16];
358          char * dataFile = "txs.nrrd";          float inverseMatrix[16];
359          cl_float4 eyeVector = {25,15,10};  
360          cl_float4 origVector = {8.83877,2.5911,7.65275};          if (argc == 2) {
361          cl_float4 cVector = {-0.0151831,0.0278357,0};              dataFile = argv[1];
362          cl_float4 rVector = {0.0074887,0.00408474,-0.0305383};          }
         float stepSize = 0.1;  
         float h1[] = {0.666667,0,-1,0.5};  
         float h2[] = {1.33333, -2, 1,-0.166667};  
         float * out;  
363    
364            float * out;
365          out = (float *) malloc(sizeof(float) * (SIZE * SIZE));          out = (float *) malloc(sizeof(float) * (SIZE * SIZE));
366            Nrrd * nin = loadNrrdFile(dataFile);
367    
368            int sAxis[] = {nin->axis[0].size, nin->axis[1].size,nin->axis[2].size};
369    
370          nin = loadNrrdFile(dataFile);          loadTransformMatrix(nin,transformMatrix);
371            invMatrix(transformMatrix,inverseMatrix);
372    
         exe_MIP_Kernel(nin,stepSize,eyeVector,origVector,  
                                                         cVector,rVector,h1,h2,out);  
373    
374            exe_MIP_Kernel((float *)nin->data, (int)nrrdElementNumber(nin),inverseMatrix, sAxis, out);
375    
376          return 0;          return 0;
377  }  }

Legend:
Removed from v.191  
changed lines
  Added in v.477

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0