Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /trunk/test/probe/probe.c
ViewVC logotype

View of /trunk/test/probe/probe.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 310 - (download) (as text) (annotate)
Tue Aug 17 18:52:00 2010 UTC (8 years, 10 months ago) by jhr
File size: 17696 byte(s)
  updates to test code
/**
 *
 * To View the Image 
 * =========================
 * ./unu reshape -i mip.txt -s 200 200 | ./unu quantize -b 8 -o new.png
 */
#include <OpenCL/OpenCl.h> 
#include <assert.h> 
#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include <sys/sysctl.h>
#include <sys/stat.h>

#include <teem/nrrd.h> 

#define SIZE 21

/*typedef float vec3[3]; 

typedef struct { 
    int degree; 
    float coeff[];
} polynomial; 

typedef struct { 
    int support; 
    polynomial *segments[]; 
} kernel; */ 


int device_stats(cl_device_id device_id){
    
    int err,i;
    size_t returned_size;
    
    // Report the device vendor and device name
   // 
   cl_char vendor_name[1024] = {0};
   cl_char device_name[1024] = {0};
    cl_char device_profile[1024] = {0};
    cl_char device_extensions[1024] = {0};
    cl_device_local_mem_type local_mem_type;
    
   cl_ulong global_mem_size, global_mem_cache_size;
    cl_ulong max_mem_alloc_size;
    
    cl_uint clock_frequency, vector_width, max_compute_units;
    
    size_t max_work_item_dims,max_work_group_size, max_work_item_sizes[3];
    
    cl_uint vector_types[] = {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE}; 
    char *vector_type_names[] = {"char","short","int","long","float","double"};
    
    err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);
   err|= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);
    err|= clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(device_profile), device_profile, &returned_size);
    err|= clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(device_extensions), device_extensions, &returned_size);
    err|= clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, &returned_size);
    
    err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, &returned_size);
    err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(global_mem_cache_size), &global_mem_cache_size, &returned_size);
    err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, &returned_size);
    
    err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, &returned_size);
    
    err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, &returned_size);
    
    err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_work_item_dims), &max_work_item_dims, &returned_size);
    
    err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), max_work_item_sizes, &returned_size);
    
    err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, &returned_size);
    
    printf("Vendor: %s\n", vendor_name);
    printf("Device Name: %s\n", device_name);
    printf("Profile: %s\n", device_profile);
    printf("Supported Extensions: %s\n\n", device_extensions);
    
    printf("Local Mem Type (Local=1, Global=2): %i\n",(int)local_mem_type);
    printf("Global Mem Size (MB): %i\n",(int)global_mem_size/(1024*1024));
    printf("Global Mem Cache Size (Bytes): %i\n",(int)global_mem_cache_size);
    printf("Max Mem Alloc Size (MB): %ld\n",(long int)max_mem_alloc_size/(1024*1024));
    
    printf("Clock Frequency (MHz): %i\n\n",clock_frequency);
    
    for(i=0;i<6;i++){
        err|= clGetDeviceInfo(device_id, vector_types[i], sizeof(clock_frequency), &vector_width, &returned_size);
        printf("Vector type width for: %s = %i\n",vector_type_names[i],vector_width);
    }
    
    printf("\nMax Work Group Size: %lu\n",max_work_group_size);
    //printf("Max Work Item Dims: %lu\n",max_work_item_dims);
    //for(size_t i=0;i<max_work_item_dims;i++) 
    //  printf("Max Work Items in Dim %lu: %lu\n",(long unsigned)(i+1),(long unsigned)max_work_item_sizes[i]);
    
    printf("Max Compute Units: %i\n",max_compute_units);
    printf("\n");
    
    return CL_SUCCESS;
}
//Loads the Kernel from a file 
char * loadKernel (const char * filename) 
{  
    struct stat statbuf;
    FILE *fh; 
    char *source; 
    
    fh = fopen(filename, "r");
    if (fh == 0)
        return 0; 
    
    stat(filename, &statbuf);
    source = (char *) malloc(statbuf.st_size + 1);
    fread(source, statbuf.st_size, 1, fh);
    source[statbuf.st_size] = '\0'; 
    
    return source; 
} 
void saveResults (float * matrix, int size) 
{ 
    int i; 
    float max = -INFINITY; 
    FILE * out_file; 
    out_file = fopen("mip.txt", "w");
    if (out_file == NULL) {
        fprintf(stderr,"Can not open output file\n");
        exit (8);
    }
    
    for(i = 0; i < size; i++) 
    {
        if(matrix[i] == -INFINITY || matrix[i] < 0)
           fprintf(out_file,"%f\n",0.0f); 
        else
           fprintf(out_file,"%f\n",matrix[i]); 
           
         if(matrix[i] > max) 
            max = matrix[i]; 
   
   }
   printf("Max: %f\n",max); 
   fclose(out_file);


} 
float det3x3(float a, float b, float c, float d, float e, float f, float g, float h, float i) 
{
   return ( (a)*(e)*(i) 
          + (d)*(h)*(c) 
          + (g)*(b)*(f) 
          - (g)*(e)*(c) 
          - (d)*(b)*(i) 
          - (a)*(h)*(f)); 
}
float det4x4(cl_float16 m) 
{ 
    return  (m[ 0] * det3x3(m[ 5], m[ 6], m[ 7], 
                           m[ 9], m[10], m[11], 
                           m[13], m[14], m[15]) 
                           
    - m[ 1] * det3x3(m[ 4], m[ 6], m[ 7], 
                           m[ 8], m[10], m[11], 
                           m[12], m[14], m[15]) 
    + m[ 2] * det3x3(m[ 4], m[ 5], m[ 7], 
                           m[ 8], m[ 9], m[11], 
                           m[12], m[13], m[15]) 
                           
    - m[ 3] * det3x3(m[ 4], m[ 5], m[ 6], 
                           m[ 8], m[ 9], m[10], 
                           m[12], m[13], m[14])); 



} 
void invMatrix(cl_float16 m, cl_float16 i) 
{  
    float det = det4x4(m); 

    
    i[0] = det3x3(m[5],m[ 6],m[ 7], 
                       m[ 9],m[10],m[11], 
                       m[13],m[14],m[15])/det; 
    
    i[ 1] = -det3x3(m[ 1],m[ 2],m[ 3], 
                       m[ 9],m[10],m[11], 
                       m[13],m[14],m[15])/det; 
                       
    i[ 2] =  det3x3(m[ 1],m[ 2],m[ 3], 
                       m[ 5],m[ 6],m[ 7], 
                       m[13],m[14],m[15])/det; 
                       
    i[ 3] = -det3x3(m[ 1],m[ 2],m[ 3], 
                       m[ 5],m[ 6],m[ 7], 
                       m[ 9],m[10],m[11])/det; 
                       
    i[ 4] = -det3x3(m[ 4],m[ 6],m[ 7], 
                       m[ 8],m[10],m[11], 
                       m[12],m[14],m[15])/det; 
                       
    i[ 5] =  det3x3(m[ 0],m[ 2],m[ 3], 
                       m[ 8],m[10],m[11], 
                       m[12],m[14],m[15])/det; 
                       
    i[ 6] = -det3x3(m[ 0],m[ 2],m[ 3], 
                       m[ 4],m[ 6],m[ 7], 
                       m[12],m[14],m[15])/det; 
                       
    i[ 7] =  det3x3(m[ 0],m[ 2],m[ 3], 
                       m[ 4],m[ 6],m[ 7], 
                       m[ 8],m[10],m[11])/det; 
                       
    i[ 8] =  det3x3(m[ 4],m[ 5],m[ 7], 
                       m[ 8],m[ 9],m[11], 
                       m[12],m[13],m[15])/det; 
                       
    i[ 9] = -det3x3(m[ 0],m[ 1],m[ 3], 
                       m[ 8],m[ 9],m[11], 
                       m[12],m[13],m[15])/det; 
                       
    i[10] =  det3x3(m[ 0],m[ 1],m[ 3], 
                       m[ 4],m[ 5],m[ 7], 
                       m[12],m[13],m[15])/det; 
                       
    i[11] = -det3x3(m[ 0],m[ 1],m[ 3], 
                       m[ 4],m[ 5],m[ 7], 
                       m[ 8],m[ 9],m[11])/det; 
                       
    i[12] = -det3x3(m[ 4],m[ 5],m[ 6], 
                       m[ 8],m[ 9],m[10], 
                       m[12],m[13],m[14])/det; 
                       
    i[13] =  det3x3(m[ 0],m[ 1],m[ 2], 
                       m[ 8],m[ 9],m[10],
                       m[12],m[13],m[14])/det; 
                       
    i[14] = -det3x3(m[ 0],m[ 1],m[ 2], 
                       m[ 4],m[ 5],m[ 6], 
                       m[12],m[13],m[14])/det; 
                       
    i[15] =  det3x3(m[ 0],m[ 1],m[ 2], 
                       m[ 4],m[ 5],m[ 6], 
                       m[ 8],m[ 9],m[10])/det; 
} 
void printMatrix(float * matrix, int rowSize) 
{
    int index = 0, end = 1, arraySize = SIZE; 
    
    for(index = 0; index < arraySize; index++) 
    { 
        if(end == rowSize) 
        { 
            printf(" %.2f\n",matrix[index]);
            end = 1;
        } 
        else
        {
            printf(" %.2f ",matrix[index]); 
            end++; 
        } 
    }
    printf("\n"); 
}
void loadTransformMatrix(Nrrd * nin, cl_float16 transformMatrix) 
{ 
    int i,j, size =  nin->spaceDim; 
    NrrdAxisInfo axisInfo; 

    //Image axis Scaling and Rotation 
    for(i = 0; i < size; i++) 
    { 
        axisInfo = nin->axis[i];
        for(j = 0; j < size; j++) 
        {
            transformMatrix[ (size+ 1) * j + i] = axisInfo.spaceDirection[j]; 
        }
      
        //Image Location 
        transformMatrix[ (i * (size + 1)) + size] = nin->spaceOrigin[i];  
        
        //Bottom row of the Transform Matrix 
        transformMatrix[((size + 1) * (size)) + i ] = 0; 
    }
    transformMatrix[((size + 1) * (size)) + size ] = 1;

} 
Nrrd * loadNrrdFile(char * filename) 
{
    /* create a nrrd; at this point this is just an empty container */
    Nrrd * nin; 
    
    nin = nrrdNew();
    char *err;

    /* read in the nrrd from file */
    if (nrrdLoad(nin, filename, NULL)) {
        err = biffGetDone(NRRD);
        fprintf(stderr, "Mip: trouble reading \"%s\":\n%s", filename, err);
        free(err);
        exit(1); 
   }

  /* say something about the array 
  printf("Mip: \"%s\" is a %d-dimensional nrrd of type %d (%s)\n", 
         filename, nin->dim, nin->type,
         airEnumStr(nrrdType, nin->type));
  printf("Mip: the array contains %d elements, each %d bytes in size\n",
         (int)nrrdElementNumber(nin), (int)nrrdElementSize(nin));*/ 
         
  return nin; 
  
} 
void loadProbePositions(char * filename, float * posValues) 
{ 
    FILE * infile; 
    
    int i = 0; 
    char line[80]; 
    
    if(!(infile = fopen(filename,"r"))) 
    {
        fprintf(stderr, "Could not load file:\"%s\n", filename);
        exit(1); 
    }   
    
   while(fgets(line, 80, infile) != NULL)
   {
      sscanf (line, "%f %f %f", &posValues[i],&posValues[i+1],&posValues[i+2]);
      i+=3; 
    }
    
   fclose(infile);  
} 
int exe_Probe_Kernel(Nrrd * nin, float * probedPositions, float *h[4], float * out)
{ 

    cl_program program; 
    cl_kernel kernel; 

    cl_command_queue queue; 
    cl_context context; 
    
    cl_device_id cpu = NULL, device = NULL; 
    
    cl_int err = 0; 
    
    cl_float16 transformMatrix; 
    cl_float16 inverseMatrix; 
    
    int imageDataSize =  (int)nrrdElementNumber(nin);  
    
    cl_mem imageData_mem, out_mem, h_mem[4], positions_mem;  
    
    /** Setup Device **/ 
    err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL); 
    assert(err==CL_SUCCESS); 
    
    err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_GPU,1,&device,NULL); 
    //if(err != CL_SUCCESS)  
    device = cpu; 
    
    assert(device); 
    
    /** Retrieve Information about the device  
    cl_char vendor_name[1024] = {0};
    cl_char device_name[1024] = {0};
    err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);
    err|= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);
    printf("Connecting to %s %s...\n", vendor_name, device_name);
    device_stats(device); */ 

    
    /* Setup Context and Command Queue */ 
    context = clCreateContext(0,1,&device,NULL,NULL,&err); 
    assert(err == CL_SUCCESS); 
    
    queue = clCreateCommandQueue(context,device,0,NULL); 
    /** Load the Kernel and Program **/ 
    const char * filename = "probe.cl"; 

    char * kernel_source = loadKernel(filename); 

    assert(kernel_source != 0); 
    
    program = clCreateProgramWithSource(context,1,(const char **)&kernel_source,NULL,&err); 
    
    assert(err == CL_SUCCESS);

    const char *options = 0;
    err = clBuildProgram(program, 0, NULL, options, NULL, NULL);


   /** Retrieve information about the program build to check for any possible errors **/ 
   char * build_log;
   size_t log_size;
   
   // First call to know the proper size
   clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
   build_log = (char *) malloc(log_size+1);
   // Second call to get the log
   clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
   build_log[log_size] = '\0';
   printf("\nBuild Log:\n%s\n",build_log); 
   free(build_log); 
   
    assert(err == CL_SUCCESS); 
   
    kernel = clCreateKernel(program, "diderot", &err); 
        
    assert(err == CL_SUCCESS); 
    
    /** Memory Allocation for the Matrices **/    

    for (int i = 0;  i < 4;  i++) {
	h_mem[i] = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL); 
	err |= clEnqueueWriteBuffer(queue,h_mem[i],CL_TRUE,0,sizeof(float) * 4,
                                (void *)h[i] ,0,NULL,NULL);
    }
    
    positions_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * (SIZE *3),NULL,NULL); 
    err |= clEnqueueWriteBuffer(queue,positions_mem,CL_TRUE,0,sizeof(float) * (SIZE *3), 
                                (void *)probedPositions ,0,NULL,NULL); 
    
    imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL); 
    err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize, 
                                nin->data ,0,NULL,NULL);    
    
    //Load the transformMatrix 
    loadTransformMatrix(nin,transformMatrix); 
    invMatrix(transformMatrix,inverseMatrix);

    err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,imageDataSize, 
                                nin->data ,0,NULL,NULL);    
    
    assert(err == CL_SUCCESS); 
    
    out_mem = clCreateBuffer(context,CL_MEM_READ_WRITE, sizeof(float) * (SIZE),NULL,NULL); 
    
    clFinish(queue); 
    
    size_t global_work_size[1], local_work_size[1]; 
    
    global_work_size[0] = SIZE;  
    local_work_size[0] = 1; 

    int idx = 0;
    err  =clSetKernelArg(kernel, idx++, sizeof(cl_mem), &imageData_mem); 
    err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[0])); 
    err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[1])); 
    err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[2])); 
    err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[3])); 
    err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &positions_mem); 
    err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &out_mem); 
    err |=clSetKernelArg(kernel, idx++, sizeof(cl_float16), &inverseMatrix); 
    err |=clSetKernelArg(kernel, idx++, sizeof(int), &nin->axis[1].size); 
    err |=clSetKernelArg(kernel, idx++, sizeof(int), &nin->axis[2].size);
    err |=clSetKernelArg(kernel, idx++, sizeof(int), &nin->axis[0].size); 
    
    assert(err == CL_SUCCESS); 
    
    /** Retrieve the Recommend Work Group Size 
    size_t thread_size; 
    clGetKernelWorkGroupInfo(kernel,device,CL_KERNEL_WORK_GROUP_SIZE,
                             sizeof(size_t),&thread_size,NULL);
    printf("Recommended Size: %lu\n",thread_size);*/ 
    
    
    err = clEnqueueNDRangeKernel(queue,kernel,1,NULL,global_work_size,
                                local_work_size,0,NULL,NULL); 
    
    assert(err == CL_SUCCESS); 
        
    clFinish(queue); 
    
    err = clEnqueueReadBuffer(queue,out_mem,CL_TRUE,0, sizeof(float) * SIZE,out,0,NULL,NULL); 
     
//  saveResults(out,SIZE * SIZE); 
    printMatrix(out,1); 
    
    clReleaseKernel(kernel); 
    clReleaseProgram(program); 
    clReleaseCommandQueue(queue); 
    clReleaseContext(context); 
    
    clReleaseMemObject(imageData_mem); 
    for (int i = 0;  i < 4;  i++)
	clReleaseMemObject(h_mem[i]); 
    clReleaseMemObject(out_mem); 
    clReleaseMemObject(positions_mem); 
    return CL_SUCCESS;  
}

int main (int argc, char ** argv) 
{ 
    //Declaring and initializing input variables 
    Nrrd * nin;
    char * probeValuesFile = "../../data/plane-probe-pos-x.txt"; 
    char * dataFile = "../../data/plane-x9.nrrd"; 
    float h0[] = {1.33333,  2,  1,  0.166667};		/* -2..-1 */
    float h1[] = {0.666667, 0, -1, -0.5}; 		/* -1..0 */
    float h2[] = {0.666667, 0, -1,  0.5}; 		/* 0..1 */
    float h3[] = {1.33333, -2,  1, -0.166667}; 		/* 1..2 */
    float *h[4] = {h0, h1, h2, h3};
    float * out; 
    float * posValues; 

    if (argc == 3) {
	probeValuesFile = argv[1];
	dataFile = argv[2];
    }
    else if (argc > 1) {
	fprintf (stderr, "usage: probe values-file data-file\n");
	exit (1);
    }

    out = (float *) malloc(sizeof(float) * SIZE); 
    posValues = (float *) malloc(sizeof(float) * (SIZE * 3)); 
    
    nin = loadNrrdFile(dataFile); 
    loadProbePositions(probeValuesFile,posValues); 

    exe_Probe_Kernel(nin,posValues,h,out); 

    return 0; 
} 

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0