Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/vis12-cl/src/compiler/cl-target/fragments/c-output-grid-fixed.in
ViewVC logotype

View of /branches/vis12-cl/src/compiler/cl-target/fragments/c-output-grid-fixed.in

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3137 - (download) (annotate)
Thu Mar 26 14:55:52 2015 UTC (4 years, 4 months ago) by jhr
File size: 2293 byte(s)
working on OpenCL scheduling
// helper function for getting fixed-size output values from a grid of strands
//
STATIC_INLINE bool OutputGridFixed (
    @PREFIX@World_t *wrld,	// the Diderot context
    int dim,			// number of output dimensions
    size_t *sizes,		// dim-sized array of sizes
    int nrrdType,		// Nrrd type of elements
    cl_kernel kern,		// copy kernel
    Nrrd *nData)		// output Nrrd
{
    cl_int sts;

  // compute the total size
    size_t totalSz = nrrdTypeSize[nrrdType];
    for (int i = 0;  i < dim;  i++) {
	totalSz *= sizes[i];
    }

  // Allocate GPU buffer
    cl_mem gpuData = clCreateBuffer (wrld->context, CL_MEM_WRITE_ONLY, totalSz, 0, &sts);
    if (CheckOCLStatus (wrld, sts, "error allocating GPU memory for output")) {
	return true;
    }

  // work sizes for the program
    size_t globalWorkSize[2];
    size_t localWorkSize[2]; 
    globalWorkSize[0] = wrld->device->numCUs * wrld->device->cuWidth; 
    globalWorkSize[1] = wrld->nWorkers; 
    localWorkSize[0] = wrld->device->cuWidth;
    localWorkSize[1] = 1;

  // Run the copy kernel
    cl_event copyDoneEvt;
    if (((sts = clSetKernelArg (kern, 0, sizeof(cl_mem), &(wrld->schedBuf))) != CL_SUCCESS)
    ||  ((sts = clSetKernelArg (kern, 1, sizeof(cl_mem), &gpuData)) != CL_SUCCESS)
    ||  ((sts = clSetKernelArg (kern, 2, sizeof(cl_uint), &(wrld->numStrands))) != CL_SUCCESS)
    ||  ((sts = clEnqueueNDRangeKernel(wrld->cmdQ, kern, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &copyDoneEvt)) != CL_SUCCESS)) {
	ReportOCLError (wrld, sts, "error enqueuing output kernel");
	return true;
    }

  // Allocate nData nrrd
    if (nrrdMaybeAlloc_nva(nData, nrrdType, dim, sizes)!=0) {
        char *msg = biffGetDone(NRRD);
        biffMsgAdd(wrld->errors, msg);
        free(msg);
	clReleaseMemObject(gpuData);
        return true;
    }

//    if ((sts = clFinish (wrld->cmdQ)) != CL_SUCCESS) {
//	ReportOCLError (wrld, sts, "error finishing output kernel");
//	return true;
//    }

  // copy data to output nrrd
    sts = clEnqueueReadBuffer (wrld->cmdQ, gpuData, CL_TRUE, 0, totalSz, nData->data, 1, &copyDoneEvt, 0);
    if (CheckOCLStatus (wrld, sts, "error copying output from GPU")) {
	clReleaseMemObject(gpuData);
	return true;
    }

  // free GPU buffer
    clReleaseMemObject(gpuData);
    return false;

}

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0