SCM Repository
View of /trunk/test/probe/probe.c
Parent Directory
|
Revision Log
Revision 310 -
(download)
(as text)
(annotate)
Tue Aug 17 18:52:00 2010 UTC (11 years, 9 months ago) by jhr
File size: 17696 byte(s)
Tue Aug 17 18:52:00 2010 UTC (11 years, 9 months ago) by jhr
File size: 17696 byte(s)
updates to test code
/** * * To View the Image * ========================= * ./unu reshape -i mip.txt -s 200 200 | ./unu quantize -b 8 -o new.png */ #include <OpenCL/OpenCl.h> #include <assert.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/sysctl.h> #include <sys/stat.h> #include <teem/nrrd.h> #define SIZE 21 /*typedef float vec3[3]; typedef struct { int degree; float coeff[]; } polynomial; typedef struct { int support; polynomial *segments[]; } kernel; */ int device_stats(cl_device_id device_id){ int err,i; size_t returned_size; // Report the device vendor and device name // cl_char vendor_name[1024] = {0}; cl_char device_name[1024] = {0}; cl_char device_profile[1024] = {0}; cl_char device_extensions[1024] = {0}; cl_device_local_mem_type local_mem_type; cl_ulong global_mem_size, global_mem_cache_size; cl_ulong max_mem_alloc_size; cl_uint clock_frequency, vector_width, max_compute_units; size_t max_work_item_dims,max_work_group_size, max_work_item_sizes[3]; cl_uint vector_types[] = {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE}; char *vector_type_names[] = {"char","short","int","long","float","double"}; err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(device_profile), device_profile, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(device_extensions), device_extensions, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(global_mem_cache_size), &global_mem_cache_size, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_work_item_dims), &max_work_item_dims, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), max_work_item_sizes, &returned_size); err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, &returned_size); printf("Vendor: %s\n", vendor_name); printf("Device Name: %s\n", device_name); printf("Profile: %s\n", device_profile); printf("Supported Extensions: %s\n\n", device_extensions); printf("Local Mem Type (Local=1, Global=2): %i\n",(int)local_mem_type); printf("Global Mem Size (MB): %i\n",(int)global_mem_size/(1024*1024)); printf("Global Mem Cache Size (Bytes): %i\n",(int)global_mem_cache_size); printf("Max Mem Alloc Size (MB): %ld\n",(long int)max_mem_alloc_size/(1024*1024)); printf("Clock Frequency (MHz): %i\n\n",clock_frequency); for(i=0;i<6;i++){ err|= clGetDeviceInfo(device_id, vector_types[i], sizeof(clock_frequency), &vector_width, &returned_size); printf("Vector type width for: %s = %i\n",vector_type_names[i],vector_width); } printf("\nMax Work Group Size: %lu\n",max_work_group_size); //printf("Max Work Item Dims: %lu\n",max_work_item_dims); //for(size_t i=0;i<max_work_item_dims;i++) // printf("Max Work Items in Dim %lu: %lu\n",(long unsigned)(i+1),(long unsigned)max_work_item_sizes[i]); printf("Max Compute Units: %i\n",max_compute_units); printf("\n"); return CL_SUCCESS; } //Loads the Kernel from a file char * loadKernel (const char * filename) { struct stat statbuf; FILE *fh; char *source; fh = fopen(filename, "r"); if (fh == 0) return 0; stat(filename, &statbuf); source = (char *) malloc(statbuf.st_size + 1); fread(source, statbuf.st_size, 1, fh); source[statbuf.st_size] = '\0'; return source; } void saveResults (float * matrix, int size) { int i; float max = -INFINITY; FILE * out_file; out_file = fopen("mip.txt", "w"); if (out_file == NULL) { fprintf(stderr,"Can not open output file\n"); exit (8); } for(i = 0; i < size; i++) { if(matrix[i] == -INFINITY || matrix[i] < 0) fprintf(out_file,"%f\n",0.0f); else fprintf(out_file,"%f\n",matrix[i]); if(matrix[i] > max) max = matrix[i]; } printf("Max: %f\n",max); fclose(out_file); } float det3x3(float a, float b, float c, float d, float e, float f, float g, float h, float i) { return ( (a)*(e)*(i) + (d)*(h)*(c) + (g)*(b)*(f) - (g)*(e)*(c) - (d)*(b)*(i) - (a)*(h)*(f)); } float det4x4(cl_float16 m) { return (m[ 0] * det3x3(m[ 5], m[ 6], m[ 7], m[ 9], m[10], m[11], m[13], m[14], m[15]) - m[ 1] * det3x3(m[ 4], m[ 6], m[ 7], m[ 8], m[10], m[11], m[12], m[14], m[15]) + m[ 2] * det3x3(m[ 4], m[ 5], m[ 7], m[ 8], m[ 9], m[11], m[12], m[13], m[15]) - m[ 3] * det3x3(m[ 4], m[ 5], m[ 6], m[ 8], m[ 9], m[10], m[12], m[13], m[14])); } void invMatrix(cl_float16 m, cl_float16 i) { float det = det4x4(m); i[0] = det3x3(m[5],m[ 6],m[ 7], m[ 9],m[10],m[11], m[13],m[14],m[15])/det; i[ 1] = -det3x3(m[ 1],m[ 2],m[ 3], m[ 9],m[10],m[11], m[13],m[14],m[15])/det; i[ 2] = det3x3(m[ 1],m[ 2],m[ 3], m[ 5],m[ 6],m[ 7], m[13],m[14],m[15])/det; i[ 3] = -det3x3(m[ 1],m[ 2],m[ 3], m[ 5],m[ 6],m[ 7], m[ 9],m[10],m[11])/det; i[ 4] = -det3x3(m[ 4],m[ 6],m[ 7], m[ 8],m[10],m[11], m[12],m[14],m[15])/det; i[ 5] = det3x3(m[ 0],m[ 2],m[ 3], m[ 8],m[10],m[11], m[12],m[14],m[15])/det; i[ 6] = -det3x3(m[ 0],m[ 2],m[ 3], m[ 4],m[ 6],m[ 7], m[12],m[14],m[15])/det; i[ 7] = det3x3(m[ 0],m[ 2],m[ 3], m[ 4],m[ 6],m[ 7], m[ 8],m[10],m[11])/det; i[ 8] = det3x3(m[ 4],m[ 5],m[ 7], m[ 8],m[ 9],m[11], m[12],m[13],m[15])/det; i[ 9] = -det3x3(m[ 0],m[ 1],m[ 3], m[ 8],m[ 9],m[11], m[12],m[13],m[15])/det; i[10] = det3x3(m[ 0],m[ 1],m[ 3], m[ 4],m[ 5],m[ 7], m[12],m[13],m[15])/det; i[11] = -det3x3(m[ 0],m[ 1],m[ 3], m[ 4],m[ 5],m[ 7], m[ 8],m[ 9],m[11])/det; i[12] = -det3x3(m[ 4],m[ 5],m[ 6], m[ 8],m[ 9],m[10], m[12],m[13],m[14])/det; i[13] = det3x3(m[ 0],m[ 1],m[ 2], m[ 8],m[ 9],m[10], m[12],m[13],m[14])/det; i[14] = -det3x3(m[ 0],m[ 1],m[ 2], m[ 4],m[ 5],m[ 6], m[12],m[13],m[14])/det; i[15] = det3x3(m[ 0],m[ 1],m[ 2], m[ 4],m[ 5],m[ 6], m[ 8],m[ 9],m[10])/det; } void printMatrix(float * matrix, int rowSize) { int index = 0, end = 1, arraySize = SIZE; for(index = 0; index < arraySize; index++) { if(end == rowSize) { printf(" %.2f\n",matrix[index]); end = 1; } else { printf(" %.2f ",matrix[index]); end++; } } printf("\n"); } void loadTransformMatrix(Nrrd * nin, cl_float16 transformMatrix) { int i,j, size = nin->spaceDim; NrrdAxisInfo axisInfo; //Image axis Scaling and Rotation for(i = 0; i < size; i++) { axisInfo = nin->axis[i]; for(j = 0; j < size; j++) { transformMatrix[ (size+ 1) * j + i] = axisInfo.spaceDirection[j]; } //Image Location transformMatrix[ (i * (size + 1)) + size] = nin->spaceOrigin[i]; //Bottom row of the Transform Matrix transformMatrix[((size + 1) * (size)) + i ] = 0; } transformMatrix[((size + 1) * (size)) + size ] = 1; } Nrrd * loadNrrdFile(char * filename) { /* create a nrrd; at this point this is just an empty container */ Nrrd * nin; nin = nrrdNew(); char *err; /* read in the nrrd from file */ if (nrrdLoad(nin, filename, NULL)) { err = biffGetDone(NRRD); fprintf(stderr, "Mip: trouble reading \"%s\":\n%s", filename, err); free(err); exit(1); } /* say something about the array printf("Mip: \"%s\" is a %d-dimensional nrrd of type %d (%s)\n", filename, nin->dim, nin->type, airEnumStr(nrrdType, nin->type)); printf("Mip: the array contains %d elements, each %d bytes in size\n", (int)nrrdElementNumber(nin), (int)nrrdElementSize(nin));*/ return nin; } void loadProbePositions(char * filename, float * posValues) { FILE * infile; int i = 0; char line[80]; if(!(infile = fopen(filename,"r"))) { fprintf(stderr, "Could not load file:\"%s\n", filename); exit(1); } while(fgets(line, 80, infile) != NULL) { sscanf (line, "%f %f %f", &posValues[i],&posValues[i+1],&posValues[i+2]); i+=3; } fclose(infile); } int exe_Probe_Kernel(Nrrd * nin, float * probedPositions, float *h[4], float * out) { cl_program program; cl_kernel kernel; cl_command_queue queue; cl_context context; cl_device_id cpu = NULL, device = NULL; cl_int err = 0; cl_float16 transformMatrix; cl_float16 inverseMatrix; int imageDataSize = (int)nrrdElementNumber(nin); cl_mem imageData_mem, out_mem, h_mem[4], positions_mem; /** Setup Device **/ err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL); assert(err==CL_SUCCESS); err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_GPU,1,&device,NULL); //if(err != CL_SUCCESS) device = cpu; assert(device); /** Retrieve Information about the device cl_char vendor_name[1024] = {0}; cl_char device_name[1024] = {0}; err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size); err|= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size); printf("Connecting to %s %s...\n", vendor_name, device_name); device_stats(device); */ /* Setup Context and Command Queue */ context = clCreateContext(0,1,&device,NULL,NULL,&err); assert(err == CL_SUCCESS); queue = clCreateCommandQueue(context,device,0,NULL); /** Load the Kernel and Program **/ const char * filename = "probe.cl"; char * kernel_source = loadKernel(filename); assert(kernel_source != 0); program = clCreateProgramWithSource(context,1,(const char **)&kernel_source,NULL,&err); assert(err == CL_SUCCESS); const char *options = 0; err = clBuildProgram(program, 0, NULL, options, NULL, NULL); /** Retrieve information about the program build to check for any possible errors **/ char * build_log; size_t log_size; // First call to know the proper size clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); build_log = (char *) malloc(log_size+1); // Second call to get the log clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL); build_log[log_size] = '\0'; printf("\nBuild Log:\n%s\n",build_log); free(build_log); assert(err == CL_SUCCESS); kernel = clCreateKernel(program, "diderot", &err); assert(err == CL_SUCCESS); /** Memory Allocation for the Matrices **/ for (int i = 0; i < 4; i++) { h_mem[i] = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL); err |= clEnqueueWriteBuffer(queue,h_mem[i],CL_TRUE,0,sizeof(float) * 4, (void *)h[i] ,0,NULL,NULL); } positions_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * (SIZE *3),NULL,NULL); err |= clEnqueueWriteBuffer(queue,positions_mem,CL_TRUE,0,sizeof(float) * (SIZE *3), (void *)probedPositions ,0,NULL,NULL); imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL); err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize, nin->data ,0,NULL,NULL); //Load the transformMatrix loadTransformMatrix(nin,transformMatrix); invMatrix(transformMatrix,inverseMatrix); err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,imageDataSize, nin->data ,0,NULL,NULL); assert(err == CL_SUCCESS); out_mem = clCreateBuffer(context,CL_MEM_READ_WRITE, sizeof(float) * (SIZE),NULL,NULL); clFinish(queue); size_t global_work_size[1], local_work_size[1]; global_work_size[0] = SIZE; local_work_size[0] = 1; int idx = 0; err =clSetKernelArg(kernel, idx++, sizeof(cl_mem), &imageData_mem); err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[0])); err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[1])); err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[2])); err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &(h_mem[3])); err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &positions_mem); err |=clSetKernelArg(kernel, idx++, sizeof(cl_mem), &out_mem); err |=clSetKernelArg(kernel, idx++, sizeof(cl_float16), &inverseMatrix); err |=clSetKernelArg(kernel, idx++, sizeof(int), &nin->axis[1].size); err |=clSetKernelArg(kernel, idx++, sizeof(int), &nin->axis[2].size); err |=clSetKernelArg(kernel, idx++, sizeof(int), &nin->axis[0].size); assert(err == CL_SUCCESS); /** Retrieve the Recommend Work Group Size size_t thread_size; clGetKernelWorkGroupInfo(kernel,device,CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t),&thread_size,NULL); printf("Recommended Size: %lu\n",thread_size);*/ err = clEnqueueNDRangeKernel(queue,kernel,1,NULL,global_work_size, local_work_size,0,NULL,NULL); assert(err == CL_SUCCESS); clFinish(queue); err = clEnqueueReadBuffer(queue,out_mem,CL_TRUE,0, sizeof(float) * SIZE,out,0,NULL,NULL); // saveResults(out,SIZE * SIZE); printMatrix(out,1); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); clReleaseMemObject(imageData_mem); for (int i = 0; i < 4; i++) clReleaseMemObject(h_mem[i]); clReleaseMemObject(out_mem); clReleaseMemObject(positions_mem); return CL_SUCCESS; } int main (int argc, char ** argv) { //Declaring and initializing input variables Nrrd * nin; char * probeValuesFile = "../../data/plane-probe-pos-x.txt"; char * dataFile = "../../data/plane-x9.nrrd"; float h0[] = {1.33333, 2, 1, 0.166667}; /* -2..-1 */ float h1[] = {0.666667, 0, -1, -0.5}; /* -1..0 */ float h2[] = {0.666667, 0, -1, 0.5}; /* 0..1 */ float h3[] = {1.33333, -2, 1, -0.166667}; /* 1..2 */ float *h[4] = {h0, h1, h2, h3}; float * out; float * posValues; if (argc == 3) { probeValuesFile = argv[1]; dataFile = argv[2]; } else if (argc > 1) { fprintf (stderr, "usage: probe values-file data-file\n"); exit (1); } out = (float *) malloc(sizeof(float) * SIZE); posValues = (float *) malloc(sizeof(float) * (SIZE * 3)); nin = loadNrrdFile(dataFile); loadProbePositions(probeValuesFile,posValues); exe_Probe_Kernel(nin,posValues,h,out); return 0; }
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |