Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Diff of /trunk/test/MIP/mip_opencl.c
ViewVC logotype

Diff of /trunk/test/MIP/mip_opencl.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 191, Mon Aug 2 14:05:11 2010 UTC revision 302, Tue Aug 17 03:52:21 2010 UTC
# Line 1  Line 1 
1    /* mip_opencl.c
2     *
3     * COPYRIGHT (c) 2010 The Diderot Project (http://diderot.cs.uchicago.edu)
4     * All rights reserved.
5     *
6     * An OpenCL mip implementation.
7     *
8     * To View the Image
9     * =========================
10     * ./unu reshape -i mip.txt -s 200 200 | ./unu quantize -b 8 -o new.png
11     */
12  #include <OpenCL/OpenCl.h>  #include <OpenCL/OpenCl.h>
13  #include <assert.h>  #include <assert.h>
14  #include <stdio.h>  #include <stdio.h>
15  #include <stdlib.h>  #include <stdlib.h>
16  #include <sys/sysctl.h>  #include <sys/sysctl.h>
17  #include <sys/stat.h>  #include <sys/stat.h>
   
18  #include <teem/nrrd.h>  #include <teem/nrrd.h>
19    
20  #define SIZE 200  #define SIZE 200
21    
 /*typedef float vec3[3];  
   
 typedef struct {  
         int degree;  
         float coeff[];  
 } polynomial;  
   
 typedef struct {  
         int support;  
         polynomial *segments[];  
 } kernel; */  
   
   
 int device_stats(cl_device_id device_id){  
   
         int err,i;  
         size_t returned_size;  
   
         // Report the device vendor and device name  
    //  
    cl_char vendor_name[1024] = {0};  
    cl_char device_name[1024] = {0};  
         cl_char device_profile[1024] = {0};  
         cl_char device_extensions[1024] = {0};  
         cl_device_local_mem_type local_mem_type;  
   
    cl_ulong global_mem_size, global_mem_cache_size;  
         cl_ulong max_mem_alloc_size;  
   
         cl_uint clock_frequency, vector_width, max_compute_units;  
   
         size_t max_work_item_dims,max_work_group_size, max_work_item_sizes[3];  
   
         cl_uint vector_types[] = {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE};  
         char *vector_type_names[] = {"char","short","int","long","float","double"};  
   
         err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);  
    err|= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(device_profile), device_profile, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(device_extensions), device_extensions, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(global_mem_cache_size), &global_mem_cache_size, &returned_size);  
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_work_item_dims), &max_work_item_dims, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), max_work_item_sizes, &returned_size);  
   
         err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, &returned_size);  
   
         printf("Vendor: %s\n", vendor_name);  
         printf("Device Name: %s\n", device_name);  
         printf("Profile: %s\n", device_profile);  
         printf("Supported Extensions: %s\n\n", device_extensions);  
   
         printf("Local Mem Type (Local=1, Global=2): %i\n",(int)local_mem_type);  
         printf("Global Mem Size (MB): %i\n",(int)global_mem_size/(1024*1024));  
         printf("Global Mem Cache Size (Bytes): %i\n",(int)global_mem_cache_size);  
         printf("Max Mem Alloc Size (MB): %ld\n",(long int)max_mem_alloc_size/(1024*1024));  
   
         printf("Clock Frequency (MHz): %i\n\n",clock_frequency);  
   
         for(i=0;i<6;i++){  
                 err|= clGetDeviceInfo(device_id, vector_types[i], sizeof(clock_frequency), &vector_width, &returned_size);  
                 printf("Vector type width for: %s = %i\n",vector_type_names[i],vector_width);  
         }  
   
         printf("\nMax Work Group Size: %lu\n",max_work_group_size);  
         //printf("Max Work Item Dims: %lu\n",max_work_item_dims);  
         //for(size_t i=0;i<max_work_item_dims;i++)  
         //      printf("Max Work Items in Dim %lu: %lu\n",(long unsigned)(i+1),(long unsigned)max_work_item_sizes[i]);  
   
         printf("Max Compute Units: %i\n",max_compute_units);  
         printf("\n");  
   
         return CL_SUCCESS;  
 }  
22  //Loads the Kernel from a file  //Loads the Kernel from a file
23  char * loadKernel (const char * filename)  char * loadKernel (const char * filename)
24  {  {
# Line 110  Line 37 
37    
38          return source;          return source;
39  }  }
40    void saveResults (float * matrix, int size)
41    {
42            int i;
43            float max = -INFINITY;
44            FILE * out_file;
45            out_file = fopen("mip.txt", "w");
46        if (out_file == NULL) {
47            fprintf(stderr,"Can not open output file\n");
48            exit (8);
49        }
50    
51        for(i = 0; i < size; i++)
52        {
53            if(matrix[i] == -INFINITY || matrix[i] < 0)
54               fprintf(out_file,"%.4f\n",0.0f);
55            else
56               fprintf(out_file,"%.4f\n",matrix[i]);
57    
58             if(matrix[i] > max)
59                    max = matrix[i];
60    
61       }
62       printf("Max: %f\n",max);
63       fclose(out_file);
64    
65    
66    }
67  float det3x3(float a, float b, float c, float d, float e, float f, float g, float h, float i)  float det3x3(float a, float b, float c, float d, float e, float f, float g, float h, float i)
68  {  {
69     return ( (a)*(e)*(i)     return ( (a)*(e)*(i)
# Line 248  Line 202 
202              transformMatrix[((size + 1) * (size)) + i ] = 0;              transformMatrix[((size + 1) * (size)) + i ] = 0;
203          }          }
204          transformMatrix[((size + 1) * (size)) + size ] = 1;          transformMatrix[((size + 1) * (size)) + size ] = 1;
205    
206  }  }
207  Nrrd * loadNrrdFile(char * filename)  Nrrd * loadNrrdFile(char * filename)
208  {  {
# Line 275  Line 230 
230    return nin;    return nin;
231    
232  }  }
233  int exe_MIP_Kernel(Nrrd * nin, float stepSize, cl_float4 eyeVec, cl_float4 origVec,  int exe_MIP_Kernel(float * img, int imageDataSize, float inverseMatrix[16], int sAxis[3], float * out)
                                   cl_float4 cVec, cl_float4 rVec, float * h1, float * h2, float * out)  
234  {  {
235    
236          cl_program program;          cl_program program;
# Line 289  Line 243 
243    
244          cl_int err = 0;          cl_int err = 0;
245    
         cl_float16 transformMatrix;  
         cl_float16 inverseMatrix;  
   
         int imageDataSize =  (int)nrrdElementNumber(nin);  
   
         float * data = (float *)nin->data;  
         printf("Data Image: %f\n", (float)data[4* nin->axis[1].size * nin->axis[2].size + 5 * nin->axis[2].size + 2]);  
   
         cl_mem imageData_mem, out_mem, h1_mem, h2_mem;  
246    
247            cl_mem imageData_mem, out_mem,sAxis_mem;
248    
249          /** Setup Device **/          /** Setup Device **/
250          err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL);          err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL);
# Line 310  Line 256 
256    
257          assert(device);          assert(device);
258    
   
         /** Retrieve Information about the device  
         cl_char vendor_name[1024] = {0};  
     cl_char device_name[1024] = {0};  
     err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);  
     err|= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);  
         printf("Connecting to %s %s...\n", vendor_name, device_name);  
         device_stats(device); */  
   
   
259          /* Setup Context and Command Queue */          /* Setup Context and Command Queue */
260          context = clCreateContext(0,1,&device,NULL,NULL,&err);          context = clCreateContext(0,1,&device,NULL,NULL,&err);
261          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
# Line 354  Line 290 
290    
291      assert(err == CL_SUCCESS);      assert(err == CL_SUCCESS);
292    
293          kernel = clCreateKernel(program,"raycast",&err);          kernel = clCreateKernel(program,"mip",&err);
294    
295          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
296    
297          /** Memory Allocation for the Matrices **/          /** Memory Allocation for the Matrices **/
298    
         h1_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL);  
         err |= clEnqueueWriteBuffer(queue,h1_mem,CL_TRUE,0,sizeof(float) * 4,  
                                                                 (void *)h1 ,0,NULL,NULL);  
   
         h2_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL);  
         err |= clEnqueueWriteBuffer(queue,h2_mem,CL_TRUE,0,sizeof(float) * 4,  
                                                                 (void *)h2 ,0,NULL,NULL);  
   
   
299          imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL);          imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL);
300          err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize,          err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize,
301                                                                  nin->data ,0,NULL,NULL);                                                                  (void *)img ,0,NULL,NULL);
   
         //Load the transformMatrix  
         loadTransformMatrix(nin,transformMatrix);  
         invMatrix(transformMatrix,inverseMatrix);  
302    
303          err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,imageDataSize,          sAxis_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(int) * 3,NULL,NULL);
304                                                                  nin->data ,0,NULL,NULL);          err |= clEnqueueWriteBuffer(queue,sAxis_mem,CL_TRUE,0,sizeof(int) * 3,
305                                                                    (void *)sAxis ,0,NULL,NULL);
306    
307          assert(err == CL_SUCCESS);          assert(err == CL_SUCCESS);
308    
# Line 394  Line 318 
318          local_work_size[0] = 1;          local_work_size[0] = 1;
319          local_work_size[1] = 1;          local_work_size[1] = 1;
320    
321          err  =clSetKernelArg(kernel,0,sizeof(cl_mem), &imageData_mem);          cl_int2 workDim = {SIZE,SIZE};
         err |=clSetKernelArg(kernel,1,sizeof(cl_mem), &h1_mem);  
         err |=clSetKernelArg(kernel,2,sizeof(cl_mem), &h2_mem);  
         err |=clSetKernelArg(kernel,3,sizeof(cl_mem), &out_mem);  
         err |=clSetKernelArg(kernel,4,sizeof(cl_float4), origVec);  
         err |=clSetKernelArg(kernel,5,sizeof(cl_float4), eyeVec);  
         err |=clSetKernelArg(kernel,6,sizeof(cl_float4), cVec);  
         err |=clSetKernelArg(kernel,7,sizeof(cl_float4), rVec);  
         err |=clSetKernelArg(kernel,8,sizeof(cl_float16), &inverseMatrix);  
         err |=clSetKernelArg(kernel,9,sizeof(float), &stepSize);  
         err |=clSetKernelArg(kernel,10,sizeof(int), &nin->axis[1].size);  
         err |=clSetKernelArg(kernel,11,sizeof(int), &nin->axis[2].size);  
322    
323          assert(err == CL_SUCCESS);          int index = 0;
324    
325          /** Retrieve the Recommend Work Group Size */          err  =clSetKernelArg(kernel,index++,sizeof(cl_mem), &imageData_mem);
326          size_t thread_size;          err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &out_mem);
327          clGetKernelWorkGroupInfo(kernel,device,CL_KERNEL_WORK_GROUP_SIZE,          err |=clSetKernelArg(kernel,index++,sizeof(cl_float16), inverseMatrix);
328                                                           sizeof(size_t),&thread_size,NULL);          err |=clSetKernelArg(kernel,index++,sizeof(cl_int2), &workDim);
329          printf("Recommended Size: %lu\n",thread_size);          err |=clSetKernelArg(kernel,index++,sizeof(cl_mem), &sAxis_mem);
330    
331            printf("error:%d\n",err);
332            assert(err == CL_SUCCESS);
333    
334    
335          err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size,          err = clEnqueueNDRangeKernel(queue,kernel,2,NULL,global_work_size,
# Line 425  Line 341 
341    
342          err = clEnqueueReadBuffer(queue,out_mem,CL_TRUE,0, sizeof(float) * (SIZE *SIZE),out,0,NULL,NULL);          err = clEnqueueReadBuffer(queue,out_mem,CL_TRUE,0, sizeof(float) * (SIZE *SIZE),out,0,NULL,NULL);
343    
344          printMatrix(out,20);          saveResults(out,SIZE * SIZE);
345    
346          clReleaseKernel(kernel);          clReleaseKernel(kernel);
347          clReleaseProgram(program);          clReleaseProgram(program);
348          clReleaseCommandQueue(queue);          clReleaseCommandQueue(queue);
349          clReleaseContext(context);          clReleaseContext(context);
   
350          clReleaseMemObject(imageData_mem);          clReleaseMemObject(imageData_mem);
         clReleaseMemObject(h1_mem);  
         clReleaseMemObject(h2_mem);  
351          clReleaseMemObject(out_mem);          clReleaseMemObject(out_mem);
352    
353          return CL_SUCCESS;          return CL_SUCCESS;
354  }  }
355  int main (int argc, char ** argv)  int main (int argc, char ** argv)
356  {  {
         //Declaring and initializing input variables  
         Nrrd * nin;  
357          char * dataFile = "txs.nrrd";          char * dataFile = "txs.nrrd";
358          cl_float4 eyeVector = {25,15,10};          float transformMatrix[16];
359          cl_float4 origVector = {8.83877,2.5911,7.65275};          float inverseMatrix[16];
         cl_float4 cVector = {-0.0151831,0.0278357,0};  
         cl_float4 rVector = {0.0074887,0.00408474,-0.0305383};  
         float stepSize = 0.1;  
         float h1[] = {0.666667,0,-1,0.5};  
         float h2[] = {1.33333, -2, 1,-0.166667};  
         float * out;  
360    
361            float * out;
362          out = (float *) malloc(sizeof(float) * (SIZE * SIZE));          out = (float *) malloc(sizeof(float) * (SIZE * SIZE));
363            Nrrd * nin = loadNrrdFile(dataFile);
364    
365          nin = loadNrrdFile(dataFile);          int sAxis[] = {nin->axis[0].size, nin->axis[1].size,nin->axis[2].size};
366    
367            loadTransformMatrix(nin,transformMatrix);
368            invMatrix(transformMatrix,inverseMatrix);
369    
         exe_MIP_Kernel(nin,stepSize,eyeVector,origVector,  
                                                         cVector,rVector,h1,h2,out);  
370    
371            exe_MIP_Kernel((float *)nin->data, (int)nrrdElementNumber(nin),inverseMatrix, sAxis, out);
372    
373          return 0;          return 0;
374  }  }

Legend:
Removed from v.191  
changed lines
  Added in v.302

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0