SCM Repository
View of /branches/vis12/src/lib/cl-target/main.c
Parent Directory
|
Revision Log
Revision 1685 -
(download)
(as text)
(annotate)
Sun Jan 22 15:23:36 2012 UTC (10 years, 3 months ago) by jhr
File size: 24444 byte(s)
Sun Jan 22 15:23:36 2012 UTC (10 years, 3 months ago) by jhr
File size: 24444 byte(s)
Create a branch to implement things that we need for the Vis 2012 paper
/*! \file main.c * * \author John Reppy */ /* * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu) * All rights reserved. */ #include <Diderot/diderot.h> #include "clinfo.h" #include <sys/sysctl.h> #include <sys/stat.h> typedef struct { cl_int blkIdx; //!< the id of this block cl_int nActive; //!< number of active (status != DIE or STABLE) strands cl_int nDead; //!< number of strands in the DIE state cl_int nStabilizing; //!< number of new strands in the STABILIZE state cl_int nDying; //!< number of new strands in the DIE state } StrandBlock_t; typedef struct { cl_int numStrands; //!< number of strands cl_int sId; //!< the index into the todo list or queue cl_int nextStrand; //!< index of the next strand to retrieve from the pool cl_int clearQueueSz; //!< an indicator on whether the queue size should be cleared cl_int queueSize; //!< number of blocks on the scheduler's queue cl_int todoSize; //!< number of blocks on the scheduler's todo list cl_int numAvailable; //!< number of active strands left to process } SchedState_t; typedef struct { cl_kernel kern; //!< OpenCL kernel that implements the program size_t workGrpSize; //!< size of workgroup for this kernel cl_ulong localSzb; //!< size of local memory used by kernel } GPUKernel_t; struct struct_world { STRUCT_WORLD_PREFIX // FIXME: document these fields! void *inState; void *outState; DeviceInfo_t *device; //!< info about OpenCL device that we are using. uint32_t nWorkers; //!< number of work groups to create cl_context context; //!< OpenCL execution context cl_command_queue cmdQ; //!< OpenCL command queue GPUKernel_t kernel[3]; //!< OpenCL kernel that implements the program }; // FIXME: need documentation for this stuct! typedef struct { cl_mem schedMem; cl_mem outMem; cl_mem inMem; cl_mem blocksMem; cl_mem strandBlocksIdxsMem; cl_mem queueMem; cl_mem todoMem; cl_mem statusMem; } KernelArgs_t; static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld); static void CheckErrorCode (cl_int sts, const char * msg); static void SetPhase1Args (cl_kernel kernel, int *argCount, KernelArgs_t *args); static void SetPhase2Args (cl_kernel kernel, int *argCount, KernelArgs_t *args, int blk_sz); static void SetScheduleKernelArgs (cl_kernel kernel, int *argCount, KernelArgs_t *args); static bool CreateKernel (cl_device_id dev, cl_program prog, const char *name, GPUKernel_t *kern); extern void Diderot_LoadGlobals (cl_context cxt, cl_kernel kernel, cl_command_queue cmdQ, int argStart); int main (int argc, const char **argv) { // get information about OpenCL support CLInfo_t *clInfo = GetCLInfo(); if (clInfo == 0) { fprintf (stderr, "no OpenCL support\n"); exit (1); } // run the generated global initialization code if (VerboseFlg) fprintf (stderr, "initializing globals ...\n"); Diderot_InitGlobals (); Diderot_int_t nWorkersPerCU = 4; Diderot_Options_t *opts = Diderot_OptNew (); Diderot_OptAddInt (opts, "np", "specify number of workers/CU", &nWorkersPerCU, true); Diderot_RegisterGlobalOpts (opts); Diderot_OptProcess (opts, argc, argv); Diderot_OptFree (opts); if (VerboseFlg) PrintCLInfo (stdout, clInfo); Diderot_World_t *wrld = Diderot_Initially (); // this may not be right for OpenCL if (! InitCL(clInfo, wrld)) exit (1); // set the number of work groups if ((0 < nWorkersPerCU) && (nWorkersPerCU < wrld->device->maxWISize[0] / wrld->device->cuWidth)) wrld->nWorkers = nWorkersPerCU * wrld->device->numCUs; else wrld->nWorkers = wrld->device->numCUs; // fallback to one worker per CU if (VerboseFlg) fprintf (stderr, "using %d x %d threads\n", wrld->nWorkers, wrld->device->cuWidth); // Conversion of strands from their host types to their shadow types void *shadowInState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands); void *shadowOutState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands); uint8_t *strandPtr = (uint8_t *)wrld->inState; uint8_t *strandShadowPtr = (uint8_t *)shadowInState; uint8_t *strandShadowOutPtr = (uint8_t *)shadowOutState; size_t shadowSize = Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands; for (int i = 0; i < wrld->numStrands; i++) { Diderot_Strands[0]->strandCopy (strandPtr, strandShadowPtr); Diderot_Strands[0]->strandCopy (strandPtr, strandShadowOutPtr); strandPtr += Diderot_Strands[0]->stateSzb; strandShadowPtr += Diderot_Strands[0]->shadowStrandSzb; strandShadowOutPtr += Diderot_Strands[0]->shadowStrandSzb; } // FIXME: it is confusing to use the inState/outState pointers for two different purposes. free (wrld->inState); free (wrld->outState); wrld->inState = shadowInState; wrld->outState = shadowOutState; // hack to make the invariant part of the state the same in both copies memcpy (wrld->outState, wrld->inState, shadowSize); int argCount = 0; cl_int sts = CL_SUCCESS; KernelArgs_t kernelArgs; SchedState_t scheduler; scheduler.numStrands = wrld->numStrands; scheduler.nextStrand = 0; scheduler.todoSize = 0; scheduler.sId = 0; scheduler.clearQueueSz= 1; scheduler.numAvailable = wrld->numStrands; size_t globalWorkSize[1] = {0}; size_t localWorkSize[1] = {0}; globalWorkSize[0] = wrld->nWorkers * wrld->device->cuWidth; localWorkSize[0] = wrld->device->cuWidth; // compute the number of scheduler blocks int numberOfBlocks = (wrld->numStrands + wrld->device->cuWidth - 1) / wrld->device->cuWidth; assert (numberOfBlocks * wrld->device->cuWidth >= wrld->numStrands); size_t strandBlkMemSize = sizeof(int) * numberOfBlocks * wrld->device->cuWidth; size_t schedListMemSize = sizeof(int) * numberOfBlocks; int *schedulerQueue = (int *)CheckedAlloc(schedListMemSize); int *schedulerTodoList = (int *)CheckedAlloc(schedListMemSize); int *strandBlocksIdxs = (int *)CheckedAlloc(strandBlkMemSize); scheduler.queueSize = numberOfBlocks; size_t strandblkMemSize = sizeof(StrandBlock_t) * numberOfBlocks; StrandBlock_t *strandBlks = (StrandBlock_t *)CheckedAlloc(strandblkMemSize); for (int i = 0; i < numberOfBlocks; i++) { schedulerQueue[i] = i; strandBlks[i].blkIdx = i; strandBlks[i].nActive = 0; strandBlks[i].nDead = 0; strandBlks[i].nStabilizing = 0; strandBlks[i].nDying = 0; } kernelArgs.schedMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(SchedState_t), &scheduler, &sts); CheckErrorCode (sts, "error creating OpenCL scheduler buffer\n"); kernelArgs.blocksMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, strandblkMemSize, strandBlks , &sts); CheckErrorCode (sts, "error creating OpenCL strand blocks buffer\n"); kernelArgs.strandBlocksIdxsMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, strandBlkMemSize, strandBlocksIdxs , &sts); CheckErrorCode (sts, "error creating OpenCL strand blocks indices buffer\n"); kernelArgs.queueMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, schedListMemSize, schedulerQueue , &sts); CheckErrorCode (sts, "error creating OpenCL scheduler queue buffer\n"); kernelArgs.todoMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, schedListMemSize, schedulerTodoList , &sts); CheckErrorCode (sts, "error creating OpenCL scheduler todo buffer\n"); kernelArgs.inMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, shadowSize, wrld->inState, &sts); CheckErrorCode (sts, "error creating OpenCL strand in-state buffer\n"); kernelArgs.outMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, shadowSize, wrld->outState, &sts); CheckErrorCode (sts, "error creating OpenCL strand out-state buffer\n"); kernelArgs.statusMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(int) * wrld->numStrands, wrld->status, &sts); CheckErrorCode (sts, "error creating OpenCL world status buffer\n"); //Setup the Update Kernel's arguments SetPhase1Args (wrld->kernel[0].kern, &argCount, &kernelArgs); Diderot_LoadGlobals (wrld->context, wrld->kernel[0].kern, wrld->cmdQ, argCount); //Setup the Compaction Kernel's arguments argCount=0; SetPhase2Args (wrld->kernel[1].kern, &argCount, &kernelArgs, wrld->device->cuWidth); //Setup the Scheduler Kernel's arguments argCount = 0; SetScheduleKernelArgs (wrld->kernel[2].kern, &argCount, &kernelArgs); double t0 = airTime(); while (scheduler.numAvailable > 0) { // Runs the update kernel on all strands sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel[0].kern, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); CheckErrorCode (sts, "error in executing update kernel\n"); sts = clEnqueueTask(wrld->cmdQ,wrld->kernel[2].kern, 0, NULL, NULL); CheckErrorCode (sts, "error in executing scheduler update kernel before compaction\n"); // Run the compaction kernel on all strands sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel[1].kern, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); CheckErrorCode (sts, "error ccccn executing compaction kernel\n"); sts = clEnqueueTask(wrld->cmdQ,wrld->kernel[2].kern,0,NULL,NULL); CheckErrorCode (sts, "error in executing scheduler update kernel after compaction\n"); sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.schedMem, CL_TRUE, 0, sizeof(SchedState_t), &scheduler, 0, NULL, NULL); CheckErrorCode (sts, "error reading back scheduler information\n"); } sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.outMem, CL_TRUE, 0,shadowSize, wrld->outState, 0, NULL, NULL); CheckErrorCode (sts, "error in reading back output\n"); sts = clEnqueueReadBuffer( wrld->cmdQ, kernelArgs.statusMem, CL_TRUE, 0, sizeof(int) * wrld->numStrands, wrld->status, 0, NULL, NULL); CheckErrorCode (sts, "error in reading back status\n"); wrld->outputSzb = Diderot_Strands[0]->shadowStrandSzb; clFinish (wrld->cmdQ); clReleaseKernel(wrld->kernel[0].kern); clReleaseKernel(wrld->kernel[1].kern); clReleaseCommandQueue(wrld->cmdQ); clReleaseContext(wrld->context); double totalTime = airTime() - t0; if (VerboseFlg) fprintf (stderr, "done in %f seconds\n", totalTime); else if (TimingFlg) printf ("usr=%f\n", totalTime); // here we have the final state of all of the strands in the "in" buffer // output the final strand states if (NrrdOutputFlg) Diderot_Output (wrld, Diderot_Strands[0]->shadowStrandSzb); else Diderot_Print (wrld); Diderot_Shutdown (wrld); return 0; } static void SetPhase1Args (cl_kernel kernel, int *argCount, KernelArgs_t *args) { int count = *argCount; cl_int sts = CL_SUCCESS; sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->inMem); CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand in-state argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->outMem); CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand out-state argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->statusMem); CheckErrorCode (sts, "Update Kernel: error Setting OpenCL world status argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem); CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->blocksMem); CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand blocks argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->strandBlocksIdxsMem); CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand blocks' indices argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->queueMem); CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler queue argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->todoMem); CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler todo argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(StrandBlock_t), NULL); CheckErrorCode (sts, "Update Kernel: error Setting OpenCL local strand blockargument\n"); *argCount = count; } static void SetPhase2Args (cl_kernel kernel, int *argCount, KernelArgs_t *args, int blk_sz) { int count = *argCount; cl_int sts = CL_SUCCESS; sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->statusMem); CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL world status argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem); CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->blocksMem); CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL strand blocks argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->strandBlocksIdxsMem); CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL strand blocks' indices argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->queueMem); CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler queue argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->todoMem); CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler todo argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(StrandBlock_t), NULL); CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local strand blockargument\n"); sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz, NULL); CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local preStable argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz, NULL); CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local preDead argument\n"); sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz * 2, NULL); CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local temporary array for the prefix scan of preStable preDead argument\n"); *argCount = count; } static void SetScheduleKernelArgs(cl_kernel kernel, int *argCount, KernelArgs_t *args) { int count = *argCount; cl_int sts = CL_SUCCESS; sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem); CheckErrorCode (sts, "Scheduler Kernel: error setting OpenCL scheduler argument\n"); *argCount = count; } static void CheckErrorCode (cl_int sts, const char *msg) { if (sts != CL_SUCCESS) { fprintf (stderr, "%s", msg); exit(1); } } /*! \brief load OpenCL code from a file */ static char *LoadSource (const char *filename) { struct stat statbuf; if (stat(filename, &statbuf) < 0) { fprintf (stderr, "unable to stat OpenCL source file %s\n", filename); exit (1); } char *source = (char *)CheckedAlloc(statbuf.st_size + 1); if (source == 0) { fprintf (stderr, "unable to allocate memory for OpenCL source\n"); exit (1); } FILE *fh = fopen(filename, "r"); if ((fh == 0) || (fread(source, statbuf.st_size, 1, fh) != 1)) { fprintf (stderr, "unable to read OpenCL source from %s\n", filename); exit (1); } source[statbuf.st_size] = '\0'; fclose (fh); return source; } //! create a kernel object from a program static bool CreateKernel (cl_device_id dev, cl_program prog, const char *name, GPUKernel_t *kern) { cl_int sts; kern->kern = clCreateKernel(prog, name, &sts); if (sts != CL_SUCCESS) { fprintf (stderr, "error getting %s from program\n", name); return false; } sts = clGetKernelWorkGroupInfo ( kern->kern, dev, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &(kern->workGrpSize), 0); if (sts != CL_SUCCESS) { fprintf (stderr, "error getting workgroup size for %s\n", name); return false; } sts = clGetKernelWorkGroupInfo ( kern->kern, dev, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(cl_ulong), &(kern->localSzb), 0); if (sts != CL_SUCCESS) { fprintf (stderr, "error getting local memory size for %s\n", name); return false; } if (VerboseFlg) { fprintf(stderr, "kernel %s: workgroup size = %d, local memory = %d bytes\n", name, (int)kern->workGrpSize, (int)kern->localSzb); } return true; } static void LogMessagesToStderr (const char *errstr, const void *private_info, size_t cb, void *user_data) { fprintf(stderr, "***** error log *****\n"); fprintf(stderr, "%s\n", errstr); fprintf(stderr, "***** end error log *****\n"); } /*! \brief initialize the OpenCL execution context, including loading and compiling the OpenCL * program. * \param clInfo points to the summary information about the available OpenCL devices. * \param wrld the Diderot execution information. * \return true if initialization is successful, otherwise false. */ static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld) { cl_int sts; int pltIx = 0; // main patform index // find a GPU on platform[0] DeviceInfo_t *dev = 0; int i; for (i = 0; i < clInfo->platforms[pltIx].numDevices; i++) { if (isGPUDevice (&(clInfo->platforms[pltIx].devices[i])) && clInfo->platforms[pltIx].devices[i].isAvail) { dev = &(clInfo->platforms[pltIx].devices[i]); break; } } if (dev == 0) { fprintf (stderr, "unable to find GPU device\n"); return false; } if (VerboseFlg) { fprintf (stderr, "using platform %d, device %d: %s\n", pltIx, i, clInfo->platforms[0].devices[i].name); } // create the context cl_context cxt = clCreateContext(0, 1, &(dev->id), LogMessagesToStderr, 0, &sts); if (sts != CL_SUCCESS) { fprintf (stderr, "error creating OpenCL context\n"); return false; } // create the program from the source int fnameLen = strlen(wrld->name) + 4; // name + ".cl\0" char *fname = (char *)CheckedAlloc(fnameLen); snprintf(fname, fnameLen, "%s.cl", wrld->name); char *updateSource = LoadSource (fname); free (fname); const char *src[1] = {updateSource}; cl_program prog = clCreateProgramWithSource(cxt, 1, src, NULL, &sts); free (updateSource); if (sts != CL_SUCCESS) { fprintf (stderr, "error creating program\n"); return false; } // build the program char options[1024]; snprintf (options, sizeof(options), "-D DIDEROT_CL_VERSION=%d -D DIDEROT_CU_WIDTH=%d -I %s -w", 100*dev->majorVersion + dev->minorVersion, dev->cuWidth, DIDEROT_INCLUDE_PATH); if (VerboseFlg) { fprintf (stderr, "clBuildProgram options: %s\n", options); } sts = clBuildProgram (prog, 1, &(dev->id), options, 0, 0); if (sts != CL_SUCCESS) { size_t logSzb; clGetProgramBuildInfo (prog, dev->id, CL_PROGRAM_BUILD_LOG, 0, 0, &logSzb); char *log = CheckedAlloc(logSzb+1); clGetProgramBuildInfo (prog, dev->id, CL_PROGRAM_BUILD_LOG, logSzb, log, &logSzb); log[logSzb] = '\0'; fprintf (stderr, "error compiling program:\n%s\n", log); free (log); return false; } // extract the kernels from the program if ((! CreateKernel (dev->id, prog, "Diderot_UpdateKernel", &(wrld->kernel[0]))) || (! CreateKernel (dev->id, prog, "Diderot_CompactionKernel", &(wrld->kernel[1]))) || (! CreateKernel (dev->id, prog, "Diderot_SchedUpdateKernel", &(wrld->kernel[2])))) return false; // create the command queue cl_command_queue q = clCreateCommandQueue(cxt, dev->id, 0, &sts); if (sts != CL_SUCCESS) { fprintf (stderr, "error creating OpenCL command queue\n"); return false; } // initialize world info wrld->device = dev; wrld->context = cxt; wrld->cmdQ = q; return true; } // this should be the part of the scheduler void *Diderot_AllocStrand (Strand_t *strand) { return CheckedAlloc(strand->stateSzb); } // block allocation of an initial collection of strands Diderot_World_t *Diderot_AllocInitially ( const char *name, // the name of the program Strand_t *strand, // the type of strands being allocated bool isArray, // is the initialization an array or collection? uint32_t nDims, // depth of iteration nesting int32_t *base, // nDims array of base indices uint32_t *size) // nDims array of iteration sizes { Diderot_World_t *wrld = NEW(Diderot_World_t); if (wrld == 0) { fprintf (stderr, "unable to allocate world\n"); exit (1); } wrld->name = name; /* NOTE: we are assuming that name is statically allocated! */ wrld->isArray = isArray; wrld->nDims = nDims; wrld->base = NEWVEC(int32_t, nDims); wrld->size = NEWVEC(uint32_t, nDims); size_t numStrands = 1; for (int i = 0; i < wrld->nDims; i++) { numStrands *= size[i]; wrld->base[i] = base[i]; wrld->size[i] = size[i]; } if (VerboseFlg) { printf("AllocInitially: %d", size[0]); for (int i = 1; i < nDims; i++) printf(" x %d", size[i]); printf("\n"); } // allocate the strand state pointers wrld->numStrands = numStrands; wrld->inState = CheckedAlloc (strand->stateSzb * numStrands); wrld->outState = CheckedAlloc (strand->shadowStrandSzb * numStrands); wrld->status = NEWVEC(StatusInt_t, numStrands); // initialize strand state pointers etc. for (size_t i = 0; i < numStrands; i++) { wrld->status[i] = DIDEROT_ACTIVE; } return wrld; } // get strand state pointers void *Diderot_InState (Diderot_World_t *wrld, uint32_t i) { assert (i < wrld->numStrands); return wrld->inState + (i * Diderot_Strands[0]->stateSzb); } void *Diderot_OutState (Diderot_World_t *wrld, uint32_t i) { assert (i < wrld->numStrands); // FIXME: this function is called when outState points to the shadow state, // but it is confusing to me -- JHR return wrld->outState + (i * Diderot_Strands[0]->shadowStrandSzb); } /***** Support for shadow image values *****/ void ShadowImage1D (cl_context cxt, Shadow_image1D_t *dst, Diderot_image1D_t *src) { dst->size[0] = src->size[0]; dst->s = src->s; dst->t = src->t; } void ShadowImage2D (cl_context cxt, Shadow_image2D_t *dst, Diderot_image2D_t *src) { dst->size[0] = src->size[0]; dst->size[1] = src->size[1]; ShadowMat2x2 (dst->w2i, src->w2i); ShadowVec2 (&dst->tVec, src->tVec); ShadowMat2x2 (dst->w2iT, src->w2iT); } void ShadowImage3D (cl_context cxt, Shadow_image3D_t *dst, Diderot_image3D_t *src) { dst->size[0] = src->size[0]; dst->size[1] = src->size[1]; dst->size[2] = src->size[2]; ShadowMat3x3 (dst->w2i, src->w2i); ShadowVec3 (&dst->tVec, src->tVec); ShadowMat3x3 (dst->w2iT, src->w2iT); }
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |