Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Diff of /branches/pure-cfg/src/lib/cl-target/main.c
ViewVC logotype

Diff of /branches/pure-cfg/src/lib/cl-target/main.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1648, Mon Nov 21 15:52:35 2011 UTC revision 1649, Mon Nov 21 17:50:39 2011 UTC
# Line 42  Line 42 
42  // FIXME: document these fields!  // FIXME: document these fields!
43      void                *inState;      void                *inState;
44      void                *outState;      void                *outState;
45      cl_device_id        device;         //!< OpenCL device      DeviceInfo_t        *device;        //!< info about OpenCL device that we are using.
46        uint32_t            nWorkers;       //!< number of work groups to create
47      cl_context          context;        //!< OpenCL execution context      cl_context          context;        //!< OpenCL execution context
48      cl_command_queue    cmdQ;           //!< OpenCL command queue      cl_command_queue    cmdQ;           //!< OpenCL command queue
49      GPUKernel_t         kernel[3];      //!< OpenCL kernel that implements the program      GPUKernel_t         kernel[3];      //!< OpenCL kernel that implements the program
# Line 79  Line 80 
80          exit (1);          exit (1);
81      }      }
82    
83        Diderot_int_t nWorkers = 0;
84      Diderot_Options_t *opts = Diderot_OptNew ();      Diderot_Options_t *opts = Diderot_OptNew ();
85        Diderot_OptAddInt (opts, "np", "specify number of workers", &nWorkers, true);
86      Diderot_RegisterGlobalOpts (opts);      Diderot_RegisterGlobalOpts (opts);
87      Diderot_OptProcess (opts, argc, argv);      Diderot_OptProcess (opts, argc, argv);
88      Diderot_OptFree (opts);      Diderot_OptFree (opts);
# Line 97  Line 100 
100      if (! InitCL(clInfo, wrld))      if (! InitCL(clInfo, wrld))
101          exit (1);          exit (1);
102    
103      // set the number of work groups to the device's number of CUs.  This value
104      // can be overridden by a command-line option
105        if (nWorkers > 0)
106            wrld->nWorkers = nWorkers;
107        else
108            wrld->nWorkers = wrld->device->numCUs;
109        if (VerboseFlg)
110            fprintf (stderr, "using %d x %d threads\n",
111                wrld->nWorkers, wrld->device->cuWidth);
112    
113    // Conversion of strands from their host types to their shadow types    // Conversion of strands from their host types to their shadow types
114      void *shadowInState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands);      void *shadowInState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands);
115      void *shadowOutState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands);      void *shadowOutState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands);
116      uint8_t *strandPtr = (uint8_t *)wrld->inState;      uint8_t *strandPtr = (uint8_t *)wrld->inState;
117      uint8_t *strandShadowPtr = (uint8_t *)shadowInState;      uint8_t *strandShadowPtr = (uint8_t *)shadowInState;
118      uint8_t *strandShadowOutPtr = (uint8_t *)shadowInState;      uint8_t *strandShadowOutPtr = (uint8_t *)shadowOutState;
119      size_t shadowSize = Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands;      size_t shadowSize = Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands;
120    
121      for (int i = 0;  i < wrld->numStrands;  i++) {      for (int i = 0;  i < wrld->numStrands;  i++) {
# Line 136  Line 149 
149    
150      size_t globalWorkSize[1] = {0};      size_t globalWorkSize[1] = {0};
151      size_t localWorkSize[1] = {0};      size_t localWorkSize[1] = {0};
152      int dIdx = clInfo->mainDeviceIdx;      globalWorkSize[0] = wrld->nWorkers * wrld->device->cuWidth;
153      int pIdx = clInfo->mainPlatformIdx;      localWorkSize[0] = wrld->device->cuWidth;
     globalWorkSize[0] = clInfo->platforms[pIdx].devices[dIdx].numCUs * clInfo->platforms[pIdx].devices[dIdx].cuWidth;  
     localWorkSize[0] = clInfo->platforms[pIdx].devices[dIdx].cuWidth;  
154      int numberOfBlocks =  ceil((double)wrld->numStrands/localWorkSize[0]);      int numberOfBlocks =  ceil((double)wrld->numStrands/localWorkSize[0]);
155    
156      size_t strandBlkMemSize = sizeof(int) * numberOfBlocks * clInfo->platforms[pIdx].devices[dIdx].cuWidth;;      size_t strandBlkMemSize = sizeof(int) * numberOfBlocks * wrld->device->cuWidth;
157      size_t schedListMemSize = sizeof(int) * numberOfBlocks;      size_t schedListMemSize = sizeof(int) * numberOfBlocks;
158      int *schedulerQueue = (int *)CheckedAlloc(schedListMemSize);      int *schedulerQueue = (int *)CheckedAlloc(schedListMemSize);
159      int *schedulerTodoList = (int *)CheckedAlloc(schedListMemSize);      int *schedulerTodoList = (int *)CheckedAlloc(schedListMemSize);
# Line 199  Line 210 
210    
211      //Setup the Compaction Kernel's arguments      //Setup the Compaction Kernel's arguments
212      argCount=0;      argCount=0;
213      SetPhase2Args(wrld->kernel[1].kern, &argCount, &kernelArgs,clInfo->platforms[pIdx].devices[dIdx].cuWidth);      SetPhase2Args (wrld->kernel[1].kern, &argCount, &kernelArgs, wrld->device->cuWidth);
214    
215      //Setup the Scheduler Kernel's arguments      //Setup the Scheduler Kernel's arguments
216      argCount = 0;      argCount = 0;
217      SetScheduleKernelArgs(wrld->kernel[2].kern, &argCount, &kernelArgs);      SetScheduleKernelArgs(wrld->kernel[2].kern, &argCount, &kernelArgs);
218    
     clFinish(wrld->cmdQ);  
   
219      double t0 = airTime();      double t0 = airTime();
220    
     clFinish(wrld->cmdQ);  
   
221      while (scheduler.numAvailable > 0) {      while (scheduler.numAvailable > 0) {
222        // Runs the update kernel on all  strands        // Runs the update kernel on all  strands
223          sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel[0].kern, 1, NULL,          sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel[0].kern, 1, NULL,
# Line 231  Line 238 
238         sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.schedMem, CL_TRUE, 0, sizeof(SchedState_t),         sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.schedMem, CL_TRUE, 0, sizeof(SchedState_t),
239                  &scheduler, 0, NULL, NULL);                  &scheduler, 0, NULL, NULL);
240         CheckErrorCode (sts, "error reading back scheduler information\n");         CheckErrorCode (sts, "error reading back scheduler information\n");
   
   
241      }      }
242    
243      sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.outMem, CL_TRUE, 0,shadowSize,      sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.outMem, CL_TRUE, 0,shadowSize,
# Line 265  Line 270 
270      else      else
271          Diderot_Print (wrld);          Diderot_Print (wrld);
272    
   
273      Diderot_Shutdown (wrld);      Diderot_Shutdown (wrld);
274    
275      return 0;      return 0;
# Line 346  Line 350 
350    
351      *argCount = count;      *argCount = count;
352  }  }
353    
354  static void SetScheduleKernelArgs(cl_kernel kernel, int *argCount, KernelArgs_t *args)  static void SetScheduleKernelArgs(cl_kernel kernel, int *argCount, KernelArgs_t *args)
355  {  {
356      int count = *argCount;      int count = *argCount;
# Line 442  Line 447 
447  static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld)  static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld)
448  {  {
449      cl_int              sts;      cl_int              sts;
450        int                 pltIx = 0;  // main patform index
451    
452    // find a GPU on platform[0]    // find a GPU on platform[0]
453      DeviceInfo_t *dev = 0;      DeviceInfo_t *dev = 0;
     clInfo->mainPlatformIdx = 0;  
454      int i;      int i;
455      for (i = 0;  i < clInfo->platforms[0].numDevices;  i++) {      for (i = 0;  i < clInfo->platforms[pltIx].numDevices;  i++) {
456          if (isGPUDevice (&(clInfo->platforms[0].devices[i]))          if (isGPUDevice (&(clInfo->platforms[pltIx].devices[i]))
457          &&  clInfo->platforms[0].devices[i].isAvail) {          &&  clInfo->platforms[pltIx].devices[i].isAvail) {
458              dev = &(clInfo->platforms[0].devices[i]);              dev = &(clInfo->platforms[pltIx].devices[i]);
             clInfo->mainDeviceIdx = i;  
459              break;              break;
460          }          }
461      }      }
# Line 462  Line 466 
466      }      }
467    
468      if (VerboseFlg) {      if (VerboseFlg) {
469          fprintf (stderr, "using platform 0, device %d: %s\n",          fprintf (stderr, "using platform %d, device %d: %s\n",
470              i, clInfo->platforms[0].devices[i].name);              pltIx, i, clInfo->platforms[0].devices[i].name);
471      }      }
472    
473    // create the context    // create the context
# Line 524  Line 528 
528      }      }
529    
530    // initialize world info    // initialize world info
531      wrld->device = dev->id;      wrld->device = dev;
532      wrld->context = cxt;      wrld->context = cxt;
533      wrld->cmdQ = q;      wrld->cmdQ = q;
534    

Legend:
Removed from v.1648  
changed lines
  Added in v.1649

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0