Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /trunk/test/probe/probe.c
ViewVC logotype

View of /trunk/test/probe/probe.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 259 - (download) (as text) (annotate)
Tue Aug 10 14:50:19 2010 UTC (9 years, 1 month ago) by lamonts
File size: 15280 byte(s)
Added the opencl version of the probe code.
/**
 *
 * To View the Image 
 * =========================
 * ./unu reshape -i mip.txt -s 200 200 | ./unu quantize -b 8 -o new.png
 */
#include <OpenCL/OpenCl.h> 
#include <assert.h> 
#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include <sys/sysctl.h>
#include <sys/stat.h>

#include <teem/nrrd.h> 

#define SIZE 21

/*typedef float vec3[3]; 

typedef struct { 
	int degree; 
	float coeff[];
} polynomial; 

typedef struct { 
	int support; 
	polynomial *segments[]; 
} kernel; */ 


int device_stats(cl_device_id device_id){
	
	int err,i;
	size_t returned_size;
	
	// Report the device vendor and device name
   // 
   cl_char vendor_name[1024] = {0};
   cl_char device_name[1024] = {0};
	cl_char device_profile[1024] = {0};
	cl_char device_extensions[1024] = {0};
	cl_device_local_mem_type local_mem_type;
	
   cl_ulong global_mem_size, global_mem_cache_size;
	cl_ulong max_mem_alloc_size;
	
	cl_uint clock_frequency, vector_width, max_compute_units;
	
	size_t max_work_item_dims,max_work_group_size, max_work_item_sizes[3];
	
	cl_uint vector_types[] = {CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG,CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT,CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE}; 
	char *vector_type_names[] = {"char","short","int","long","float","double"};
	
	err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);
   err|= clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);
	err|= clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(device_profile), device_profile, &returned_size);
	err|= clGetDeviceInfo(device_id, CL_DEVICE_EXTENSIONS, sizeof(device_extensions), device_extensions, &returned_size);
	err|= clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(local_mem_type), &local_mem_type, &returned_size);
	
	err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, &returned_size);
	err|= clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(global_mem_cache_size), &global_mem_cache_size, &returned_size);
	err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_mem_alloc_size), &max_mem_alloc_size, &returned_size);
	
	err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clock_frequency), &clock_frequency, &returned_size);
	
	err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, &returned_size);
	
	err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_work_item_dims), &max_work_item_dims, &returned_size);
	
	err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), max_work_item_sizes, &returned_size);
	
	err|= clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), &max_compute_units, &returned_size);
	
	printf("Vendor: %s\n", vendor_name);
	printf("Device Name: %s\n", device_name);
	printf("Profile: %s\n", device_profile);
	printf("Supported Extensions: %s\n\n", device_extensions);
	
	printf("Local Mem Type (Local=1, Global=2): %i\n",(int)local_mem_type);
	printf("Global Mem Size (MB): %i\n",(int)global_mem_size/(1024*1024));
	printf("Global Mem Cache Size (Bytes): %i\n",(int)global_mem_cache_size);
	printf("Max Mem Alloc Size (MB): %ld\n",(long int)max_mem_alloc_size/(1024*1024));
	
	printf("Clock Frequency (MHz): %i\n\n",clock_frequency);
	
	for(i=0;i<6;i++){
		err|= clGetDeviceInfo(device_id, vector_types[i], sizeof(clock_frequency), &vector_width, &returned_size);
		printf("Vector type width for: %s = %i\n",vector_type_names[i],vector_width);
	}
	
	printf("\nMax Work Group Size: %lu\n",max_work_group_size);
	//printf("Max Work Item Dims: %lu\n",max_work_item_dims);
	//for(size_t i=0;i<max_work_item_dims;i++) 
	//	printf("Max Work Items in Dim %lu: %lu\n",(long unsigned)(i+1),(long unsigned)max_work_item_sizes[i]);
	
	printf("Max Compute Units: %i\n",max_compute_units);
	printf("\n");
	
	return CL_SUCCESS;
}
//Loads the Kernel from a file 
char * loadKernel (const char * filename) 
{  
	struct stat statbuf;
	FILE *fh; 
	char *source; 
	
	fh = fopen(filename, "r");
	if (fh == 0)
		return 0; 
	
	stat(filename, &statbuf);
	source = (char *) malloc(statbuf.st_size + 1);
	fread(source, statbuf.st_size, 1, fh);
	source[statbuf.st_size] = '\0'; 
	
	return source; 
} 
void saveResults (float * matrix, int size) 
{ 
	int i; 
	float max = -INFINITY; 
	FILE * out_file; 
	out_file = fopen("mip.txt", "w");
    if (out_file == NULL) {
        fprintf(stderr,"Can not open output file\n");
        exit (8);
    }
    
    for(i = 0; i < size; i++) 
    {
    	if(matrix[i] == -INFINITY || matrix[i] < 0)
    	   fprintf(out_file,"%f\n",0.0f); 
    	else
    	   fprintf(out_file,"%f\n",matrix[i]); 
    	   
    	 if(matrix[i] > max) 
    	 	max = matrix[i]; 
   
   }
   printf("Max: %f\n",max); 
   fclose(out_file);


} 
float det3x3(float a, float b, float c, float d, float e, float f, float g, float h, float i) 
{
   return ( (a)*(e)*(i) 
   		  + (d)*(h)*(c) 
   		  + (g)*(b)*(f) 
   		  - (g)*(e)*(c) 
   		  - (d)*(b)*(i) 
   		  - (a)*(h)*(f)); 
}
float det4x4(cl_float16 m) 
{ 
	return	(m[ 0] * det3x3(m[ 5], m[ 6], m[ 7], 
						   m[ 9], m[10], m[11], 
						   m[13], m[14], m[15]) 
						   
	- m[ 1] * det3x3(m[ 4], m[ 6], m[ 7], 
						   m[ 8], m[10], m[11], 
						   m[12], m[14], m[15]) 
	+ m[ 2] * det3x3(m[ 4], m[ 5], m[ 7], 
						   m[ 8], m[ 9], m[11], 
						   m[12], m[13], m[15]) 
						   
	- m[ 3] * det3x3(m[ 4], m[ 5], m[ 6], 
						   m[ 8], m[ 9], m[10], 
						   m[12], m[13], m[14])); 



} 
void invMatrix(cl_float16 m, cl_float16 i) 
{  
	float det = det4x4(m); 

	
	i[0] = det3x3(m[5],m[ 6],m[ 7], 
					   m[ 9],m[10],m[11], 
					   m[13],m[14],m[15])/det; 
	
	i[ 1] = -det3x3(m[ 1],m[ 2],m[ 3], 
					   m[ 9],m[10],m[11], 
					   m[13],m[14],m[15])/det; 
					   
	i[ 2] =  det3x3(m[ 1],m[ 2],m[ 3], 
					   m[ 5],m[ 6],m[ 7], 
					   m[13],m[14],m[15])/det; 
					   
	i[ 3] = -det3x3(m[ 1],m[ 2],m[ 3], 
					   m[ 5],m[ 6],m[ 7], 
					   m[ 9],m[10],m[11])/det; 
					   
	i[ 4] = -det3x3(m[ 4],m[ 6],m[ 7], 
					   m[ 8],m[10],m[11], 
					   m[12],m[14],m[15])/det; 
					   
	i[ 5] =  det3x3(m[ 0],m[ 2],m[ 3], 
					   m[ 8],m[10],m[11], 
					   m[12],m[14],m[15])/det; 
					   
	i[ 6] = -det3x3(m[ 0],m[ 2],m[ 3], 
					   m[ 4],m[ 6],m[ 7], 
					   m[12],m[14],m[15])/det; 
					   
	i[ 7] =  det3x3(m[ 0],m[ 2],m[ 3], 
					   m[ 4],m[ 6],m[ 7], 
					   m[ 8],m[10],m[11])/det; 
					   
	i[ 8] =  det3x3(m[ 4],m[ 5],m[ 7], 
					   m[ 8],m[ 9],m[11], 
					   m[12],m[13],m[15])/det; 
					   
	i[ 9] = -det3x3(m[ 0],m[ 1],m[ 3], 
					   m[ 8],m[ 9],m[11], 
					   m[12],m[13],m[15])/det; 
					   
	i[10] =  det3x3(m[ 0],m[ 1],m[ 3], 
					   m[ 4],m[ 5],m[ 7], 
					   m[12],m[13],m[15])/det; 
					   
	i[11] = -det3x3(m[ 0],m[ 1],m[ 3], 
					   m[ 4],m[ 5],m[ 7], 
					   m[ 8],m[ 9],m[11])/det; 
					   
	i[12] = -det3x3(m[ 4],m[ 5],m[ 6], 
					   m[ 8],m[ 9],m[10], 
					   m[12],m[13],m[14])/det; 
					   
	i[13] =  det3x3(m[ 0],m[ 1],m[ 2], 
					   m[ 8],m[ 9],m[10],
					   m[12],m[13],m[14])/det; 
					   
	i[14] = -det3x3(m[ 0],m[ 1],m[ 2], 
					   m[ 4],m[ 5],m[ 6], 
					   m[12],m[13],m[14])/det; 
					   
	i[15] =  det3x3(m[ 0],m[ 1],m[ 2], 
					   m[ 4],m[ 5],m[ 6], 
					   m[ 8],m[ 9],m[10])/det; 
} 
void printMatrix(float * matrix, int rowSize) 
{
	int index = 0, end = 1, arraySize = SIZE; 
	
	for(index = 0; index < arraySize; index++) 
	{ 
		if(end == rowSize) 
		{ 
			printf(" %.2f\n",matrix[index]);
			end = 1;
		} 
		else
		{
			printf(" %.2f ",matrix[index]); 
			end++; 
		} 
	}
	printf("\n"); 
}
void loadTransformMatrix(Nrrd * nin, cl_float16 transformMatrix) 
{ 
	int i,j, size =  nin->spaceDim; 
	NrrdAxisInfo axisInfo; 

	//Image axis Scaling and Rotation 
	for(i = 0; i < size; i++) 
	{ 
		axisInfo = nin->axis[i];
		for(j = 0; j < size; j++) 
		{
	     	transformMatrix[ (size+ 1) * j + i] = axisInfo.spaceDirection[j]; 
	    }
	  
		//Image Location 
	    transformMatrix[ (i * (size + 1)) + size] = nin->spaceOrigin[i];  
	    
	    //Bottom row of the Transform Matrix 
	    transformMatrix[((size + 1) * (size)) + i ] = 0; 
	}
	transformMatrix[((size + 1) * (size)) + size ] = 1;

} 
Nrrd * loadNrrdFile(char * filename) 
{
	/* create a nrrd; at this point this is just an empty container */
  	Nrrd * nin; 
  	
  	nin = nrrdNew();
	char *err;

  	/* read in the nrrd from file */
  	if (nrrdLoad(nin, filename, NULL)) {
    	err = biffGetDone(NRRD);
    	fprintf(stderr, "Mip: trouble reading \"%s\":\n%s", filename, err);
    	free(err);
   	    exit(1); 
   }

  /* say something about the array 
  printf("Mip: \"%s\" is a %d-dimensional nrrd of type %d (%s)\n", 
         filename, nin->dim, nin->type,
         airEnumStr(nrrdType, nin->type));
  printf("Mip: the array contains %d elements, each %d bytes in size\n",
         (int)nrrdElementNumber(nin), (int)nrrdElementSize(nin));*/ 
         
  return nin; 
  
} 
void loadProbePositions(char * filename, float * posValues) 
{ 
	FILE * infile; 
	
	int i = 0; 
	char line[80]; 
	
	if(!(infile = fopen(filename,"r"))) 
	{
    	fprintf(stderr, "Could not load file:\"%s\n", filename);
   	    exit(1); 
   	}   
	
   while(fgets(line, 80, infile) != NULL)
   {
	  sscanf (line, "%f %f %f", &posValues[i],&posValues[i+1],&posValues[i+2]);
	  i+=3; 
    }
	
   fclose(infile); 	
} 
int exe_Probe_Kernel(Nrrd * nin, float * probedPositions, float * h1, float * h2, float * out)
{ 

	cl_program program; 
	cl_kernel kernel; 

	cl_command_queue queue; 
	cl_context context; 
	
	cl_device_id cpu = NULL, device = NULL; 
	
	cl_int err = 0; 
	
	cl_float16 transformMatrix; 
	cl_float16 inverseMatrix; 
	
	int imageDataSize =  (int)nrrdElementNumber(nin);  
	
	cl_mem imageData_mem, out_mem, h1_mem, h2_mem, positions_mem;  
	
	/** Setup Device **/ 
	err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU,1,&cpu,NULL); 
	assert(err==CL_SUCCESS); 
	
	err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_GPU,1,&device,NULL); 
	//if(err != CL_SUCCESS)  
	device = cpu; 
	
	assert(device); 
	
	/** Retrieve Information about the device  
	cl_char vendor_name[1024] = {0};
    cl_char device_name[1024] = {0};
    err = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);
    err|= clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);
  	printf("Connecting to %s %s...\n", vendor_name, device_name);
	device_stats(device); */ 

	
	/* Setup Context and Command Queue */ 
	context = clCreateContext(0,1,&device,NULL,NULL,&err); 
	assert(err == CL_SUCCESS); 
	
	queue = clCreateCommandQueue(context,device,0,NULL); 
	/** Load the Kernel and Program **/ 
	const char * filename = "probe.cl"; 

	char * kernel_source = loadKernel(filename); 

	assert(kernel_source != 0); 
	
	program = clCreateProgramWithSource(context,1,(const char **)&kernel_source,NULL,&err); 
	
	assert(err == CL_SUCCESS);
	
	err = clBuildProgram(program, 0, NULL, NULL,NULL, NULL);


   /** Retrieve information about the program build to check for any possible errors **/ 
   char * build_log;
   size_t log_size;
   
   // First call to know the proper size
   clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
   build_log = (char *) malloc(log_size+1);
   // Second call to get the log
   clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
   build_log[log_size] = '\0';
   printf("\nBuild Log:\n%s\n",build_log); 
   free(build_log); 
   
    assert(err == CL_SUCCESS); 
   
	kernel = clCreateKernel(program,"probe",&err); 
		
	assert(err == CL_SUCCESS); 
	
	/** Memory Allocation for the Matrices **/    

	h1_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL); 
	err |= clEnqueueWriteBuffer(queue,h1_mem,CL_TRUE,0,sizeof(float) * 4,
								(void *)h1 ,0,NULL,NULL); 
								
	h2_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * 4,NULL,NULL); 
	err |= clEnqueueWriteBuffer(queue,h2_mem,CL_TRUE,0,sizeof(float) * 4, 
								(void *)h2 ,0,NULL,NULL); 

	
	positions_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * (SIZE *3),NULL,NULL); 
	err |= clEnqueueWriteBuffer(queue,positions_mem,CL_TRUE,0,sizeof(float) * (SIZE *3), 
								(void *)probedPositions ,0,NULL,NULL); 
	
	imageData_mem = clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float) * imageDataSize,NULL,NULL); 
	err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,sizeof(float) * imageDataSize, 
								nin->data ,0,NULL,NULL); 	
	
	//Load the transformMatrix 
	loadTransformMatrix(nin,transformMatrix); 
	invMatrix(transformMatrix,inverseMatrix);

	err |= clEnqueueWriteBuffer(queue,imageData_mem,CL_TRUE,0,imageDataSize, 
								nin->data ,0,NULL,NULL); 	
	
	assert(err == CL_SUCCESS); 
	
	out_mem = clCreateBuffer(context,CL_MEM_READ_WRITE, sizeof(float) * (SIZE),NULL,NULL); 
	
	clFinish(queue); 
	
	size_t global_work_size[1], local_work_size[1]; 
	
	global_work_size[0] = SIZE;  
	local_work_size[0] = 1; 
	
	err  =clSetKernelArg(kernel,0,sizeof(cl_mem), &imageData_mem); 
	err |=clSetKernelArg(kernel,1,sizeof(cl_mem), &h1_mem); 
	err |=clSetKernelArg(kernel,2,sizeof(cl_mem), &h2_mem); 
	err |=clSetKernelArg(kernel,3,sizeof(cl_mem), &positions_mem); 
	err |=clSetKernelArg(kernel,4,sizeof(cl_mem), &out_mem); 
	err |=clSetKernelArg(kernel,5,sizeof(cl_float16), &inverseMatrix); 
	err |=clSetKernelArg(kernel,6,sizeof(int), &nin->axis[1].size); 
	err |=clSetKernelArg(kernel,7,sizeof(int), &nin->axis[2].size);
	err |=clSetKernelArg(kernel,8,sizeof(int), &nin->axis[0].size); 
	
	assert(err == CL_SUCCESS); 
	
	/** Retrieve the Recommend Work Group Size 
	size_t thread_size; 
	clGetKernelWorkGroupInfo(kernel,device,CL_KERNEL_WORK_GROUP_SIZE,
							 sizeof(size_t),&thread_size,NULL);
	printf("Recommended Size: %lu\n",thread_size);*/ 
	
	
	err = clEnqueueNDRangeKernel(queue,kernel,1,NULL,global_work_size,
								local_work_size,0,NULL,NULL); 
	
	assert(err == CL_SUCCESS); 
		
	clFinish(queue); 
	
	err = clEnqueueReadBuffer(queue,out_mem,CL_TRUE,0, sizeof(float) * SIZE,out,0,NULL,NULL); 
	 
//	saveResults(out,SIZE * SIZE); 
	printMatrix(out,1); 
	
	clReleaseKernel(kernel); 
	clReleaseProgram(program); 
	clReleaseCommandQueue(queue); 
	clReleaseContext(context); 
	
	clReleaseMemObject(imageData_mem); 
	clReleaseMemObject(h1_mem); 
	clReleaseMemObject(h2_mem); 
	clReleaseMemObject(out_mem); 
	clReleaseMemObject(positions_mem); 
	return CL_SUCCESS; 	
}

int main (int argc, char ** argv) 
{ 
 	//Declaring and initializing input variables 
 	Nrrd * nin;
 	char * probeValuesFile = "../../../data/plane-probe-pos-x.txt"; 
 	char * dataFile = "../../../data/plane-x9.nrrd"; 
	float h1[] = {0.666667,0,-1,0.5}; 
	float h2[] = {1.33333, -2, 1,-0.166667}; 
	float * out; 
	float * posValues; 
	
	out = (float *) malloc(sizeof(float) * SIZE); 
	posValues = (float *) malloc(sizeof(float) * (SIZE * 3)); 
	
	nin = loadNrrdFile(dataFile); 
	loadProbePositions(probeValuesFile,posValues); 

	exe_Probe_Kernel(nin,posValues,h1,h2,out); 
	
	
	return 0; 
} 

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0