SCM Repository
View of /branches/pure-cfg/src/lib/cl-target/main.c
Parent Directory
|
Revision Log
Revision 1355 -
(download)
(as text)
(annotate)
Tue Jun 21 01:15:27 2011 UTC (10 years, 11 months ago) by jhr
File size: 14748 byte(s)
Tue Jun 21 01:15:27 2011 UTC (10 years, 11 months ago) by jhr
File size: 14748 byte(s)
Added functions to shadow vectors and matrices
/*! \file main.c * * \author John Reppy */ /* * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu) * All rights reserved. */ #include <Diderot/diderot.h> #include <string.h> #include <stdio.h> #include <assert.h> #include "clinfo.h" #include <sys/sysctl.h> #include <sys/stat.h> // NOTE: we probably should put this in a file that supports runtime printing static bool VerboseFlg = false; static bool TimingFlg = false; struct struct_world { const char *name; // the program name bool isArray; // is the initialization an array or collection? uint32_t nDims; // depth of iteration nesting int32_t *base; // nDims array of base indices uint32_t *size; // nDims array of iteration sizes uint32_t numStrands; // number of strands in the world unsigned int strandSize; // the sizeof of the strand buffers void *inState; void *outState; uint8_t *status; // array of strand status flags cl_device_id device; // OpenCL device cl_context context; // OpenCL execution context cl_command_queue cmdQ; // OpenCL command queue cl_kernel kernel; // OpenCL Kernel that implements the program }; static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld); extern void Diderot_LoadGlobals (cl_context context, cl_kernel kernel, cl_command_queue cmdQ, int argStart); extern void printArray(FILE *outS,void * array) ; int main (int argc, const char **argv) { // get information about OpenCL support CLInfo_t *clInfo = GetCLInfo(); if (clInfo == 0) { fprintf (stderr, "no OpenCL support\n"); exit (1); } Diderot_Options_t *opts = Diderot_OptNew (); Diderot_OptAddFlag (opts, "verbose", "enable runtime-system messages", &VerboseFlg); Diderot_RegisterGlobalOpts (opts); Diderot_OptProcess (opts, argc, argv); Diderot_OptFree (opts); // run the generated global initialization code if (VerboseFlg) printf("initializing globals ...\n"); Diderot_InitGlobals(); /***** FIXME: OpenCL specific stuff goes here. Things to do: ** ** 1) copy data to GPU ** 2) initialize strands ** 3) run strands to termination ** 4) load results from GPU **/ Diderot_World_t *wrld = Diderot_Initially (); // this may not be right for OpenCL if (! InitCL(clInfo, wrld)) exit (1); int argCount = 0; cl_int sts = CL_SUCCESS; /* Create the strand in-state and out-state buffers */ cl_mem stateInMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, wrld->strandSize, wrld->inState, &sts); clFinish (wrld->cmdQ); if (sts != CL_SUCCESS) { fprintf (stderr, "error creating OpenCL strand in-state buffer\n"); exit(1); } cl_mem stateOutMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE| CL_MEM_COPY_HOST_PTR, wrld->strandSize, wrld->outState, &sts); clFinish (wrld->cmdQ); if (sts != CL_SUCCESS) { fprintf (stderr, "error creating OpenCL strand in-state buffer\n"); exit(1); } cl_mem statusMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE| CL_MEM_COPY_HOST_PTR, sizeof(uint8_t) * wrld->numStrands, wrld->status, &sts); clFinish (wrld->cmdQ); if (sts != CL_SUCCESS) { fprintf (stderr, "error creating OpenCL world status buffer\n"); exit(1); } /* Set the in-state and out-state strand agruments */ sts = clSetKernelArg (wrld->kernel, argCount++, sizeof(cl_mem), &stateInMem); clFinish (wrld->cmdQ); if (sts != CL_SUCCESS) { fprintf (stderr, "error Setting OpenCL strand in-state argument\n"); exit(1); } sts = clSetKernelArg (wrld->kernel, argCount++, sizeof(cl_mem), &stateOutMem); clFinish(wrld->cmdQ); if (sts != CL_SUCCESS) { fprintf (stderr, "error Setting OpenCL strand out-state argument\n"); exit(1); } sts = clSetKernelArg (wrld->kernel, argCount++, sizeof(cl_mem), &statusMem); clFinish (wrld->cmdQ); if (sts != CL_SUCCESS) { fprintf (stderr, "error Setting OpenCL world status argument\n"); exit(1); } double t0 = GetTime(); /* FIXME: Also, what happens if nDims != 2? We never output results! */ if (wrld->nDims == 2) { size_t global_work_size[2], local_work_size[2]; global_work_size[0] = wrld->size[0]; global_work_size[1] = wrld->size[1]; local_work_size[0] = 1; local_work_size[1] = 1; cl_int width = global_work_size[1]; sts = clSetKernelArg(wrld->kernel, argCount++, sizeof(cl_int), &width); clFinish(wrld->cmdQ); if (sts != CL_SUCCESS) { fprintf (stderr, "error Setting OpenCL width argument\n"); exit(1); } Diderot_LoadGlobals(wrld->context, wrld->kernel,wrld->cmdQ, argCount); sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL); clFinish(wrld->cmdQ); if (sts != CL_SUCCESS) { fprintf (stderr, "error in executing kernel code:%d\n",sts); exit(1); } sts = clEnqueueReadBuffer(wrld->cmdQ, stateOutMem, CL_TRUE, 0, wrld->strandSize, wrld->outState, 0, NULL, NULL); clFinish(wrld->cmdQ); if (sts != CL_SUCCESS) { fprintf (stderr, "error in reading back output code:%d\n",sts); exit(1); } sts = clEnqueueReadBuffer(wrld->cmdQ, statusMem, CL_TRUE, 0, sizeof(uint8_t) * wrld->numStrands, wrld->status, 0, NULL, NULL); clFinish(wrld->cmdQ); if (sts != CL_SUCCESS) { fprintf (stderr, "error in reading back output code:%d\n",sts); exit(1); } } /* FIXME: release all OpenCL objects */ clReleaseKernel(wrld->kernel); clReleaseCommandQueue(wrld->cmdQ); clReleaseContext(wrld->context); double totalTime = GetTime() - t0; if (VerboseFlg) fprintf (stderr, "done in %f seconds\n", totalTime); else if (TimingFlg) printf ("usr=%f\n", totalTime); // here we have the final state of all of the strands in the "in" buffer int outFileNameLen = strlen(wrld->name) + 5; char *outFileName = (char *)malloc(outFileNameLen); snprintf (outFileName, outFileNameLen, "%s.txt", wrld->name); FILE *outS = fopen(outFileName, "w"); if (outS == NULL) { fprintf (stderr, "Cannot open output file %s\n", outFileName); exit (8); } /*for (int i = 0; i < wrld->numStrands; i++) { if (wrld->status[i] == DIDEROT_STABILIZE) Diderot_Strands[0]->print (outS, &wrld->outState[i]); }*/ Diderot_Strands[0]->print (outS,wrld->status,wrld->numStrands, wrld->outState); fclose (outS); Diderot_Shutdown (wrld); return 0; } /*! \brief load OpenCL code from a file */ static char *LoadSource (const char *filename) { struct stat statbuf; if (stat(filename, &statbuf) < 0) { fprintf (stderr, "unable to stat OpenCL source file %s\n", filename); exit (1); } char *source = (char *) malloc(statbuf.st_size + 1); if (source == 0) { fprintf (stderr, "unable to allocate memory for OpenCL source\n"); exit (1); } FILE *fh = fopen(filename, "r"); if ((fh == 0) || (fread(source, statbuf.st_size, 1, fh) != 1)) { fprintf (stderr, "unable to read OpenCL source from %s\n", filename); exit (1); } source[statbuf.st_size] = '\0'; fclose (fh); return source; } static void LogMessagesToStderr (const char *errstr, const void *private_info, size_t cb, void *user_data) { fprintf(stderr, "%s\n", errstr); } /*! \brief build an OpenCL program from source. */ static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld) { cl_int sts; // find a GPU on platform[0] cl_device_id dev; int i; for (i = 0; i < clInfo->platforms[0].numDevices; i++) { if (clInfo->platforms[0].devices[i].ty == CL_DEVICE_TYPE_GPU) { dev = clInfo->platforms[0].devices[i].id; break; } } if (i == clInfo->platforms[0].numDevices) { fprintf (stderr, "unable to find GPU device\n"); return false; } // create the context cl_context cxt = clCreateContext(0, 1, &dev, LogMessagesToStderr, 0, &sts); if (sts != CL_SUCCESS) { fprintf (stderr, "error creating OpenCL context\n"); return false; } // create the command queue cl_command_queue q = clCreateCommandQueue(cxt, dev, 0, &sts); if (sts != CL_SUCCESS) { fprintf (stderr, "error creating OpenCL command queue\n"); return false; } // create the program from the source int fnameLen = strlen(wrld->name) + 4; // name + ".cl\0" char *fname = (char *)malloc(fnameLen); snprintf(fname, fnameLen, "%s.cl", wrld->name); const char *src = LoadSource (fname); free (fname); cl_program prog = clCreateProgramWithSource(cxt, 1, &src, NULL, &sts); if (sts != CL_SUCCESS) { fprintf (stderr, "error creating program\n"); return false; } free ((void *)src); // build the program const char *options = "-I " DIDEROT_INCLUDE_PATH; sts = clBuildProgram (prog, 1, &dev, options, 0, 0); if (sts != CL_SUCCESS) { size_t logSzb; clGetProgramBuildInfo (prog, dev, CL_PROGRAM_BUILD_LOG, 0, 0, &logSzb); char *log = malloc(logSzb+1); clGetProgramBuildInfo (prog, dev, CL_PROGRAM_BUILD_LOG, logSzb, log, &logSzb); log[logSzb] = '\0'; fprintf (stderr, "error compiling program:\n%s\n", log); free (log); return false; } // extract the kernel from the program cl_kernel kernel = clCreateKernel(prog, "Diderot_KernelMain", &sts); if (sts != CL_SUCCESS) { fprintf (stderr, "error getting kernel from program\n"); return false; } // initialize world info wrld->device = dev; wrld->context = cxt; wrld->cmdQ = q; wrld->kernel = kernel; return true; } // this should be the part of the scheduler void *Diderot_AllocStrand (Strand_t *strand) { return malloc(strand->stateSzb); } // block allocation of an initial collection of strands Diderot_World_t *Diderot_AllocInitially( const char *name, // the name of the program Strand_t *strand, // the type of strands being allocated bool isArray, // is the initialization an array or collection? uint32_t nDims, // depth of iteration nesting int32_t *base, // nDims array of base indices uint32_t *size) // nDims array of iteration sizes { Diderot_World_t *wrld = (Diderot_World_t *) malloc (sizeof(Diderot_World_t)); if (wrld == 0) { fprintf (stderr, "unable to allocate world\n"); exit (1); } wrld->name = name; /* NOTE: we are assuming that name is statically allocated! */ wrld->isArray = isArray; wrld->nDims = nDims; wrld->base = (int32_t *) malloc (nDims * sizeof(int32_t)); wrld->size = (uint32_t *) malloc (nDims * sizeof(uint32_t)); size_t numStrands = 1; for (int i = 0; i < wrld->nDims; i++) { numStrands *= size[i]; wrld->base[i] = base[i]; wrld->size[i] = size[i]; } if (VerboseFlg) { printf("AllocInitially: %d", size[0]); for (int i = 1; i < nDims; i++) printf(" x %d", size[i]); printf("\n"); } // allocate the strand state pointers wrld->numStrands = numStrands; wrld->strandSize = strand->stateSzb * numStrands; /* wrld->inState = (void **) malloc (numStrands * sizeof(void *)); wrld->outState = (void **) malloc (numStrands * sizeof(void *)); */ wrld->inState = malloc (wrld->strandSize); wrld->outState = malloc (wrld->strandSize); wrld->status = (uint8_t *) malloc (numStrands * sizeof(uint8_t)); if ((wrld->inState == 0) || (wrld->outState == 0) || (wrld->status == 0)) { fprintf (stderr, "unable to allocate strand states\n"); exit (1); } // initialize strand state pointers etc. for (size_t i = 0; i < numStrands; i++) { // wrld->inState[i] = Diderot_AllocStrand (strand); // wrld->outState[i] = Diderot_AllocStrand (strand); wrld->status[i] = DIDEROT_ACTIVE; } return wrld; } // get strand state pointers void *Diderot_InState (Diderot_World_t *wrld, uint32_t i) { assert (i < wrld->numStrands); return &wrld->inState[i]; } void *Diderot_OutState (Diderot_World_t *wrld, uint32_t i) { assert (i < wrld->numStrands); return &wrld->outState[i]; } bool Diderot_IsActive (Diderot_World_t *wrld, uint32_t i) { assert (i < wrld->numStrands); return !wrld->status[i]; } /***** Support for shadow image values *****/ void ShadowImage1D (cl_context cxt, Shadow_image1D_t *dst, Diderot_image1D_t *src) { cl_int sts; dst->data = clCreateBuffer ( cxt, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, src->dataSzb, src->data, &sts); if (sts != CL_SUCCESS) { fprintf(stderr, "error in creating buffer for 1D image data"); exit(1); } dst->size[0] = src->size[0]; dst->s = src->s; dst->t = dst->t; } void ShadowImage2D (cl_context cxt, Shadow_image2D_t *dst, Diderot_image2D_t *src) { cl_int sts; dst->data = clCreateBuffer ( cxt, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, src->dataSzb, src->data, &sts); if (sts != CL_SUCCESS) { fprintf(stderr, "error in creating buffer for 2D image data"); exit(1); } dst->size[0] = src->size[0]; dst->size[1] = src->size[1]; ShadowMat2x2 (dst->w2i, src->w2i); ShadowVec2 (dst->tVec, src->tVec); ShadowMat2x2 (dst->w2iT, src->w2iT); } void ShadowImage3D (cl_context cxt, Shadow_image3D_t *dst, Diderot_image3D_t *src) { cl_int sts; dst->data = clCreateBuffer ( cxt, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, src->dataSzb, src->data, &sts); if (sts != CL_SUCCESS) { fprintf(stderr, "error in creating buffer for 3D image data"); exit(1); } dst->size[0] = src->size[0]; dst->size[1] = src->size[1]; dst->size[2] = src->size[2]; ShadowMat3x3 (dst->w2i, src->w2i); ShadowVec3 (dst->tVec, src->tVec); ShadowMat3x3 (dst->w2iT, src->w2iT); }
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |