Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/vis12-cl/src/compiler/cl-target/fragments/c-output-collection-fixed.in
ViewVC logotype

View of /branches/vis12-cl/src/compiler/cl-target/fragments/c-output-collection-fixed.in

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3183 - (download) (annotate)
Tue Mar 31 20:47:38 2015 UTC (4 years, 5 months ago) by jhr
File size: 2397 byte(s)
  working on collection output support
// helper function for getting fixed-size output values from a collection of strands
//
STATIC_INLINE bool OutputCollectionFixed (
    @PREFIX@World_t *wrld,	// the Diderot context
    size_t size,		// number of items in grid
    int nrrdType,		// Nrrd type of elements
    cl_kernel kern,		// copy kernel
    Nrrd *nData)		// output Nrrd
{
    cl_int sts;

  // compute the total size
    size_t totalSz = nrrdTypeSize[nrrdType] * size;

  // Allocate GPU buffer
    cl_mem gpuData = clCreateBuffer (wrld->context, CL_MEM_WRITE_ONLY, totalSz, 0, &sts);
    if (CheckOCLStatus (wrld, sts, "error allocating GPU memory for output")) {
	return true;
    }

  // work sizes for the program
    size_t globalWorkSize[2];
    size_t localWorkSize[2]; 
    globalWorkSize[0] = wrld->device->numCUs * wrld->device->cuWidth; 
    globalWorkSize[1] = wrld->nWorkers; 
    localWorkSize[0] = wrld->device->cuWidth;
    localWorkSize[1] = 1;

    if (wrld->verboseFlg) {
	fprintf (stderr, "enqueuing output kernel for %d x %d x %d workers and %d strands\n",
	    wrld->device->numCUs, wrld->device->cuWidth, wrld->nWorkers, wrld->numStrands);
    }

  // Run the copy kernel
//    if (((sts = clSetKernelArg (kern, 0, sizeof(cl_mem), &(wrld->schedBuf))) != CL_SUCCESS)
//    ||  ((sts = clSetKernelArg (kern, 1, sizeof(cl_mem), &gpuData)) != CL_SUCCESS)
//    ||  ((sts = clSetKernelArg (kern, 2, sizeof(cl_uint), &(wrld->numStrands))) != CL_SUCCESS)
//    ||  ((sts = clEnqueueNDRangeKernel(wrld->cmdQ, kern, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL)) != CL_SUCCESS)) {
//	ReportOCLError (wrld, sts, "error enqueuing output kernel");
//	return true;
//    }

//   if ((sts = clFinish (wrld->cmdQ)) != CL_SUCCESS) {
//        ReportOCLError (wrld, sts, "error finishing output kernel");
//        return true;
//   }

  // Allocate nData nrrd
    if (nrrdMaybeAlloc_nva(nData, nrrdType, dim, sizes) != 0) {
        char *msg = biffGetDone(NRRD);
        biffMsgAdd(wrld->errors, msg);
        free(msg);
	clReleaseMemObject(gpuData);
        return true;
    }

  // copy data to output nrrd
//    sts = clEnqueueReadBuffer (wrld->cmdQ, gpuData, CL_TRUE, 0, totalSz, nData->data, 0, 0, 0);
//    if (CheckOCLStatus (wrld, sts, "error copying output from GPU")) {
//	clReleaseMemObject(gpuData);
//	return true;
//    }

  // free GPU buffer
    clReleaseMemObject(gpuData);
    return false;

}

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0