Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /trunk/src/lib/cl-target/main.c
ViewVC logotype

Annotation of /trunk/src/lib/cl-target/main.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2109 - (view) (download) (as text)

1 : jhr 1671 /*! \file main.c
2 :     *
3 :     * \author John Reppy
4 :     */
5 :    
6 :     /*
7 :     * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
8 :     * All rights reserved.
9 :     */
10 :    
11 :     #include <Diderot/diderot.h>
12 :     #include "clinfo.h"
13 :     #include <sys/sysctl.h>
14 :     #include <sys/stat.h>
15 :    
16 :     typedef struct {
17 :     cl_int blkIdx; //!< the id of this block
18 :     cl_int nActive; //!< number of active (status != DIE or STABLE) strands
19 :     cl_int nDead; //!< number of strands in the DIE state
20 :     cl_int nStabilizing; //!< number of new strands in the STABILIZE state
21 :     cl_int nDying; //!< number of new strands in the DIE state
22 :     } StrandBlock_t;
23 :    
24 :     typedef struct {
25 :     cl_int numStrands; //!< number of strands
26 :     cl_int sId; //!< the index into the todo list or queue
27 :     cl_int nextStrand; //!< index of the next strand to retrieve from the pool
28 :     cl_int clearQueueSz; //!< an indicator on whether the queue size should be cleared
29 :     cl_int queueSize; //!< number of blocks on the scheduler's queue
30 :     cl_int todoSize; //!< number of blocks on the scheduler's todo list
31 :     cl_int numAvailable; //!< number of active strands left to process
32 :     } SchedState_t;
33 :    
34 :     typedef struct {
35 :     cl_kernel kern; //!< OpenCL kernel that implements the program
36 :     size_t workGrpSize; //!< size of workgroup for this kernel
37 :     cl_ulong localSzb; //!< size of local memory used by kernel
38 :     } GPUKernel_t;
39 :    
40 :     struct struct_world {
41 :     STRUCT_WORLD_PREFIX
42 :     // FIXME: document these fields!
43 :     void *inState;
44 :     void *outState;
45 :     DeviceInfo_t *device; //!< info about OpenCL device that we are using.
46 :     uint32_t nWorkers; //!< number of work groups to create
47 :     cl_context context; //!< OpenCL execution context
48 :     cl_command_queue cmdQ; //!< OpenCL command queue
49 :     GPUKernel_t kernel[3]; //!< OpenCL kernel that implements the program
50 :     };
51 :    
52 :     // FIXME: need documentation for this stuct!
53 :     typedef struct {
54 :     cl_mem schedMem;
55 :     cl_mem outMem;
56 :     cl_mem inMem;
57 :     cl_mem blocksMem;
58 :     cl_mem strandBlocksIdxsMem;
59 :     cl_mem queueMem;
60 :     cl_mem todoMem;
61 :     cl_mem statusMem;
62 :     } KernelArgs_t;
63 :    
64 : jhr 2109 static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld, int devID);
65 : jhr 1671 static void CheckErrorCode (cl_int sts, const char * msg);
66 :     static void SetPhase1Args (cl_kernel kernel, int *argCount, KernelArgs_t *args);
67 :     static void SetPhase2Args (cl_kernel kernel, int *argCount, KernelArgs_t *args, int blk_sz);
68 :     static void SetScheduleKernelArgs (cl_kernel kernel, int *argCount, KernelArgs_t *args);
69 :    
70 :     static bool CreateKernel (cl_device_id dev, cl_program prog, const char *name, GPUKernel_t *kern);
71 :    
72 :     extern void Diderot_LoadGlobals (cl_context cxt, cl_kernel kernel, cl_command_queue cmdQ, int argStart);
73 :    
74 :     int main (int argc, const char **argv)
75 :     {
76 :     // get information about OpenCL support
77 :     CLInfo_t *clInfo = GetCLInfo();
78 :     if (clInfo == 0) {
79 :     fprintf (stderr, "no OpenCL support\n");
80 :     exit (1);
81 :     }
82 :    
83 :     // run the generated global initialization code
84 :     if (VerboseFlg)
85 :     fprintf (stderr, "initializing globals ...\n");
86 :     Diderot_InitGlobals ();
87 :    
88 :     Diderot_int_t nWorkersPerCU = 4;
89 : jhr 2109 Diderot_int_t gpuDevID = -1;
90 : jhr 1671 Diderot_Options_t *opts = Diderot_OptNew ();
91 :     Diderot_OptAddInt (opts, "np", "specify number of workers/CU", &nWorkersPerCU, true);
92 : jhr 2109 Diderot_OptAddInt (opts, "devno", "specify GPU device ID", &gpuDevID, true);
93 : jhr 1671 Diderot_RegisterGlobalOpts (opts);
94 :     Diderot_OptProcess (opts, argc, argv);
95 :     Diderot_OptFree (opts);
96 :    
97 :     if (VerboseFlg)
98 :     PrintCLInfo (stdout, clInfo);
99 :    
100 :     Diderot_World_t *wrld = Diderot_Initially (); // this may not be right for OpenCL
101 :    
102 : jhr 2109 if (! InitCL(clInfo, wrld, gpuDevID))
103 : jhr 1671 exit (1);
104 :    
105 :     // set the number of work groups
106 :     if ((0 < nWorkersPerCU) && (nWorkersPerCU < wrld->device->maxWISize[0] / wrld->device->cuWidth))
107 :     wrld->nWorkers = nWorkersPerCU * wrld->device->numCUs;
108 :     else
109 :     wrld->nWorkers = wrld->device->numCUs; // fallback to one worker per CU
110 :     if (VerboseFlg)
111 :     fprintf (stderr, "using %d x %d threads\n",
112 :     wrld->nWorkers, wrld->device->cuWidth);
113 :    
114 :     // Conversion of strands from their host types to their shadow types
115 :     void *shadowInState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands);
116 :     void *shadowOutState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands);
117 :     uint8_t *strandPtr = (uint8_t *)wrld->inState;
118 :     uint8_t *strandShadowPtr = (uint8_t *)shadowInState;
119 :     uint8_t *strandShadowOutPtr = (uint8_t *)shadowOutState;
120 :     size_t shadowSize = Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands;
121 :    
122 :     for (int i = 0; i < wrld->numStrands; i++) {
123 :     Diderot_Strands[0]->strandCopy (strandPtr, strandShadowPtr);
124 :     Diderot_Strands[0]->strandCopy (strandPtr, strandShadowOutPtr);
125 :     strandPtr += Diderot_Strands[0]->stateSzb;
126 :     strandShadowPtr += Diderot_Strands[0]->shadowStrandSzb;
127 :     strandShadowOutPtr += Diderot_Strands[0]->shadowStrandSzb;
128 :     }
129 :     // FIXME: it is confusing to use the inState/outState pointers for two different purposes.
130 :     free (wrld->inState);
131 :     free (wrld->outState);
132 :     wrld->inState = shadowInState;
133 :     wrld->outState = shadowOutState;
134 :    
135 :     // hack to make the invariant part of the state the same in both copies
136 :     memcpy (wrld->outState, wrld->inState, shadowSize);
137 :    
138 :     int argCount = 0;
139 :     cl_int sts = CL_SUCCESS;
140 :    
141 :     KernelArgs_t kernelArgs;
142 :     SchedState_t scheduler;
143 :    
144 :     scheduler.numStrands = wrld->numStrands;
145 :     scheduler.nextStrand = 0;
146 :     scheduler.todoSize = 0;
147 :     scheduler.sId = 0;
148 :     scheduler.clearQueueSz= 1;
149 :     scheduler.numAvailable = wrld->numStrands;
150 :    
151 :     size_t globalWorkSize[1] = {0};
152 :     size_t localWorkSize[1] = {0};
153 :     globalWorkSize[0] = wrld->nWorkers * wrld->device->cuWidth;
154 :     localWorkSize[0] = wrld->device->cuWidth;
155 :    
156 :     // compute the number of scheduler blocks
157 :     int numberOfBlocks = (wrld->numStrands + wrld->device->cuWidth - 1) / wrld->device->cuWidth;
158 :     assert (numberOfBlocks * wrld->device->cuWidth >= wrld->numStrands);
159 :    
160 :     size_t strandBlkMemSize = sizeof(int) * numberOfBlocks * wrld->device->cuWidth;
161 :     size_t schedListMemSize = sizeof(int) * numberOfBlocks;
162 :     int *schedulerQueue = (int *)CheckedAlloc(schedListMemSize);
163 :     int *schedulerTodoList = (int *)CheckedAlloc(schedListMemSize);
164 :     int *strandBlocksIdxs = (int *)CheckedAlloc(strandBlkMemSize);
165 :     scheduler.queueSize = numberOfBlocks;
166 :    
167 :     size_t strandblkMemSize = sizeof(StrandBlock_t) * numberOfBlocks;
168 :     StrandBlock_t *strandBlks = (StrandBlock_t *)CheckedAlloc(strandblkMemSize);
169 :    
170 :     for (int i = 0; i < numberOfBlocks; i++) {
171 :     schedulerQueue[i] = i;
172 :     strandBlks[i].blkIdx = i;
173 :     strandBlks[i].nActive = 0;
174 :     strandBlks[i].nDead = 0;
175 :     strandBlks[i].nStabilizing = 0;
176 :     strandBlks[i].nDying = 0;
177 :     }
178 :    
179 :     kernelArgs.schedMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
180 :     sizeof(SchedState_t), &scheduler, &sts);
181 :     CheckErrorCode (sts, "error creating OpenCL scheduler buffer\n");
182 :    
183 :     kernelArgs.blocksMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
184 :     strandblkMemSize, strandBlks , &sts);
185 :     CheckErrorCode (sts, "error creating OpenCL strand blocks buffer\n");
186 :    
187 :     kernelArgs.strandBlocksIdxsMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
188 :     strandBlkMemSize, strandBlocksIdxs , &sts);
189 :     CheckErrorCode (sts, "error creating OpenCL strand blocks indices buffer\n");
190 :    
191 :     kernelArgs.queueMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
192 :     schedListMemSize, schedulerQueue , &sts);
193 :     CheckErrorCode (sts, "error creating OpenCL scheduler queue buffer\n");
194 :    
195 :     kernelArgs.todoMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
196 :     schedListMemSize, schedulerTodoList , &sts);
197 :     CheckErrorCode (sts, "error creating OpenCL scheduler todo buffer\n");
198 :    
199 :     kernelArgs.inMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
200 :     shadowSize, wrld->inState, &sts);
201 :     CheckErrorCode (sts, "error creating OpenCL strand in-state buffer\n");
202 :    
203 :     kernelArgs.outMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
204 :     shadowSize, wrld->outState, &sts);
205 :     CheckErrorCode (sts, "error creating OpenCL strand out-state buffer\n");
206 :    
207 :     kernelArgs.statusMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
208 :     sizeof(int) * wrld->numStrands, wrld->status, &sts);
209 :     CheckErrorCode (sts, "error creating OpenCL world status buffer\n");
210 :    
211 :     //Setup the Update Kernel's arguments
212 :     SetPhase1Args (wrld->kernel[0].kern, &argCount, &kernelArgs);
213 :     Diderot_LoadGlobals (wrld->context, wrld->kernel[0].kern, wrld->cmdQ, argCount);
214 :    
215 :     //Setup the Compaction Kernel's arguments
216 :     argCount=0;
217 :     SetPhase2Args (wrld->kernel[1].kern, &argCount, &kernelArgs, wrld->device->cuWidth);
218 :    
219 :     //Setup the Scheduler Kernel's arguments
220 :     argCount = 0;
221 :     SetScheduleKernelArgs (wrld->kernel[2].kern, &argCount, &kernelArgs);
222 :    
223 :     double t0 = airTime();
224 :    
225 :     while (scheduler.numAvailable > 0) {
226 :     // Runs the update kernel on all strands
227 :     sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel[0].kern, 1, NULL,
228 :     globalWorkSize, localWorkSize, 0, NULL, NULL);
229 :     CheckErrorCode (sts, "error in executing update kernel\n");
230 :    
231 :     sts = clEnqueueTask(wrld->cmdQ,wrld->kernel[2].kern, 0, NULL, NULL);
232 :     CheckErrorCode (sts, "error in executing scheduler update kernel before compaction\n");
233 :    
234 :     // Run the compaction kernel on all strands
235 :     sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel[1].kern, 1, NULL,
236 :     globalWorkSize, localWorkSize, 0, NULL, NULL);
237 :     CheckErrorCode (sts, "error ccccn executing compaction kernel\n");
238 :    
239 :     sts = clEnqueueTask(wrld->cmdQ,wrld->kernel[2].kern,0,NULL,NULL);
240 :     CheckErrorCode (sts, "error in executing scheduler update kernel after compaction\n");
241 :    
242 :     sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.schedMem, CL_TRUE, 0, sizeof(SchedState_t),
243 :     &scheduler, 0, NULL, NULL);
244 :     CheckErrorCode (sts, "error reading back scheduler information\n");
245 :     }
246 :    
247 :     sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.outMem, CL_TRUE, 0,shadowSize,
248 :     wrld->outState, 0, NULL, NULL);
249 :     CheckErrorCode (sts, "error in reading back output\n");
250 :    
251 :     sts = clEnqueueReadBuffer(
252 :     wrld->cmdQ, kernelArgs.statusMem, CL_TRUE, 0, sizeof(int) * wrld->numStrands,
253 :     wrld->status, 0, NULL, NULL);
254 :     CheckErrorCode (sts, "error in reading back status\n");
255 :    
256 :     wrld->outputSzb = Diderot_Strands[0]->shadowStrandSzb;
257 :     clFinish (wrld->cmdQ);
258 :     clReleaseKernel(wrld->kernel[0].kern);
259 :     clReleaseKernel(wrld->kernel[1].kern);
260 :     clReleaseCommandQueue(wrld->cmdQ);
261 :     clReleaseContext(wrld->context);
262 :    
263 :     double totalTime = airTime() - t0;
264 :    
265 :     if (VerboseFlg)
266 :     fprintf (stderr, "done in %f seconds\n", totalTime);
267 :     else if (TimingFlg)
268 :     printf ("usr=%f\n", totalTime);
269 :    
270 :     // here we have the final state of all of the strands in the "in" buffer
271 :     // output the final strand states
272 :     if (NrrdOutputFlg)
273 :     Diderot_Output (wrld, Diderot_Strands[0]->shadowStrandSzb);
274 :     else
275 :     Diderot_Print (wrld);
276 :    
277 :     Diderot_Shutdown (wrld);
278 :    
279 :     return 0;
280 :    
281 :     }
282 :    
283 :     static void SetPhase1Args (cl_kernel kernel, int *argCount, KernelArgs_t *args)
284 :     {
285 :     int count = *argCount;
286 :     cl_int sts = CL_SUCCESS;
287 :    
288 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->inMem);
289 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand in-state argument\n");
290 :    
291 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->outMem);
292 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand out-state argument\n");
293 :    
294 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->statusMem);
295 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL world status argument\n");
296 :    
297 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem);
298 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler argument\n");
299 :    
300 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->blocksMem);
301 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand blocks argument\n");
302 :    
303 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->strandBlocksIdxsMem);
304 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand blocks' indices argument\n");
305 :    
306 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->queueMem);
307 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler queue argument\n");
308 :    
309 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->todoMem);
310 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler todo argument\n");
311 :    
312 :     sts = clSetKernelArg (kernel, count++, sizeof(StrandBlock_t), NULL);
313 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL local strand blockargument\n");
314 :    
315 :     *argCount = count;
316 :    
317 :     }
318 :    
319 :     static void SetPhase2Args (cl_kernel kernel, int *argCount, KernelArgs_t *args, int blk_sz)
320 :     {
321 :     int count = *argCount;
322 :     cl_int sts = CL_SUCCESS;
323 :    
324 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->statusMem);
325 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL world status argument\n");
326 :    
327 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem);
328 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler argument\n");
329 :    
330 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->blocksMem);
331 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL strand blocks argument\n");
332 :    
333 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->strandBlocksIdxsMem);
334 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL strand blocks' indices argument\n");
335 :    
336 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->queueMem);
337 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler queue argument\n");
338 :    
339 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->todoMem);
340 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler todo argument\n");
341 :    
342 :     sts = clSetKernelArg (kernel, count++, sizeof(StrandBlock_t), NULL);
343 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local strand blockargument\n");
344 :    
345 :     sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz, NULL);
346 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local preStable argument\n");
347 :    
348 :     sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz, NULL);
349 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local preDead argument\n");
350 :    
351 :     sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz * 2, NULL);
352 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local temporary array for the prefix scan of preStable preDead argument\n");
353 :    
354 :    
355 :     *argCount = count;
356 :     }
357 :    
358 :     static void SetScheduleKernelArgs(cl_kernel kernel, int *argCount, KernelArgs_t *args)
359 :     {
360 :     int count = *argCount;
361 :     cl_int sts = CL_SUCCESS;
362 :    
363 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem);
364 :     CheckErrorCode (sts, "Scheduler Kernel: error setting OpenCL scheduler argument\n");
365 :    
366 :     *argCount = count;
367 :     }
368 :    
369 :     static void CheckErrorCode (cl_int sts, const char *msg)
370 :     {
371 :     if (sts != CL_SUCCESS) {
372 :     fprintf (stderr, "%s", msg);
373 :     exit(1);
374 :     }
375 :     }
376 :    
377 :     /*! \brief load OpenCL code from a file
378 :     */
379 :     static char *LoadSource (const char *filename)
380 :     {
381 :     struct stat statbuf;
382 :     if (stat(filename, &statbuf) < 0) {
383 :     fprintf (stderr, "unable to stat OpenCL source file %s\n", filename);
384 :     exit (1);
385 :     }
386 :    
387 :     char *source = (char *)CheckedAlloc(statbuf.st_size + 1);
388 :     if (source == 0) {
389 :     fprintf (stderr, "unable to allocate memory for OpenCL source\n");
390 :     exit (1);
391 :     }
392 :    
393 :     FILE *fh = fopen(filename, "r");
394 :     if ((fh == 0)
395 :     || (fread(source, statbuf.st_size, 1, fh) != 1)) {
396 :     fprintf (stderr, "unable to read OpenCL source from %s\n", filename);
397 :     exit (1);
398 :     }
399 :     source[statbuf.st_size] = '\0';
400 :     fclose (fh);
401 :    
402 :     return source;
403 :     }
404 :    
405 :     //! create a kernel object from a program
406 :     static bool CreateKernel (cl_device_id dev, cl_program prog, const char *name, GPUKernel_t *kern)
407 :     {
408 :     cl_int sts;
409 :    
410 :     kern->kern = clCreateKernel(prog, name, &sts);
411 :     if (sts != CL_SUCCESS) {
412 :     fprintf (stderr, "error getting %s from program\n", name);
413 :     return false;
414 :     }
415 :     sts = clGetKernelWorkGroupInfo (
416 :     kern->kern, dev, CL_KERNEL_WORK_GROUP_SIZE,
417 :     sizeof(size_t), &(kern->workGrpSize), 0);
418 :     if (sts != CL_SUCCESS) {
419 :     fprintf (stderr, "error getting workgroup size for %s\n", name);
420 :     return false;
421 :     }
422 :     sts = clGetKernelWorkGroupInfo (
423 :     kern->kern, dev, CL_KERNEL_LOCAL_MEM_SIZE,
424 :     sizeof(cl_ulong), &(kern->localSzb), 0);
425 :     if (sts != CL_SUCCESS) {
426 :     fprintf (stderr, "error getting local memory size for %s\n", name);
427 :     return false;
428 :     }
429 :    
430 :     if (VerboseFlg) {
431 :     fprintf(stderr, "kernel %s: workgroup size = %d, local memory = %d bytes\n",
432 :     name, (int)kern->workGrpSize, (int)kern->localSzb);
433 :     }
434 :    
435 :     return true;
436 :     }
437 :    
438 :     static void LogMessagesToStderr (const char *errstr, const void *private_info, size_t cb, void *user_data)
439 :     {
440 :     fprintf(stderr, "***** error log *****\n");
441 :     fprintf(stderr, "%s\n", errstr);
442 :     fprintf(stderr, "***** end error log *****\n");
443 :     }
444 :    
445 :     /*! \brief initialize the OpenCL execution context, including loading and compiling the OpenCL
446 :     * program.
447 :     * \param clInfo points to the summary information about the available OpenCL devices.
448 :     * \param wrld the Diderot execution information.
449 :     * \return true if initialization is successful, otherwise false.
450 :     */
451 : jhr 2109 static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld, int devID)
452 : jhr 1671 {
453 :     cl_int sts;
454 :     int pltIx = 0; // main patform index
455 : jhr 2109 DeviceInfo_t *dev = 0;
456 : jhr 1671
457 : jhr 2109 if ((0 <= devID) && (devID < clInfo->platforms[pltIx].numDevices)) {
458 :     // use the user supplied device
459 :     if (isGPUDevice (&(clInfo->platforms[pltIx].devices[devID]))
460 :     && clInfo->platforms[pltIx].devices[devID].isAvail) {
461 :     dev = &(clInfo->platforms[pltIx].devices[devID]);
462 : jhr 1671 }
463 : jhr 2109 else {
464 :     fprintf (stderr, "specified OpenCL device %d is not a GPU or not available\n", devID);
465 :     return false;
466 :     }
467 : jhr 1671 }
468 : jhr 2109 else {
469 :     // find a GPU on platform[0]
470 :     for (int i = 0; i < clInfo->platforms[pltIx].numDevices; i++) {
471 :     if (isGPUDevice (&(clInfo->platforms[pltIx].devices[i]))
472 :     && clInfo->platforms[pltIx].devices[i].isAvail) {
473 :     devID = i;
474 :     dev = &(clInfo->platforms[pltIx].devices[i]);
475 :     break;
476 :     }
477 :     }
478 :     if (dev == 0) {
479 :     fprintf (stderr, "unable to find GPU device\n");
480 :     return false;
481 :     }
482 : jhr 1671 }
483 :    
484 :     if (VerboseFlg) {
485 :     fprintf (stderr, "using platform %d, device %d: %s\n",
486 : jhr 2109 pltIx, devID, dev->name);
487 : jhr 1671 }
488 :    
489 :     // create the context
490 :     cl_context cxt = clCreateContext(0, 1, &(dev->id), LogMessagesToStderr, 0, &sts);
491 :     if (sts != CL_SUCCESS) {
492 :     fprintf (stderr, "error creating OpenCL context\n");
493 :     return false;
494 :     }
495 :    
496 :     // create the program from the source
497 :     int fnameLen = strlen(wrld->name) + 4; // name + ".cl\0"
498 :     char *fname = (char *)CheckedAlloc(fnameLen);
499 :     snprintf(fname, fnameLen, "%s.cl", wrld->name);
500 :     char *updateSource = LoadSource (fname);
501 :     free (fname);
502 :    
503 :     const char *src[1] = {updateSource};
504 :     cl_program prog = clCreateProgramWithSource(cxt, 1, src, NULL, &sts);
505 :     free (updateSource);
506 :     if (sts != CL_SUCCESS) {
507 :     fprintf (stderr, "error creating program\n");
508 :     return false;
509 :     }
510 :    
511 :     // build the program
512 :     char options[1024];
513 :     snprintf (options, sizeof(options),
514 :     "-D DIDEROT_CL_VERSION=%d -D DIDEROT_CU_WIDTH=%d -I %s -w",
515 :     100*dev->majorVersion + dev->minorVersion,
516 :     dev->cuWidth,
517 :     DIDEROT_INCLUDE_PATH);
518 :     if (VerboseFlg) {
519 :     fprintf (stderr, "clBuildProgram options: %s\n", options);
520 :     }
521 :     sts = clBuildProgram (prog, 1, &(dev->id), options, 0, 0);
522 :     if (sts != CL_SUCCESS) {
523 :     size_t logSzb;
524 :     clGetProgramBuildInfo (prog, dev->id, CL_PROGRAM_BUILD_LOG, 0, 0, &logSzb);
525 :     char *log = CheckedAlloc(logSzb+1);
526 :     clGetProgramBuildInfo (prog, dev->id, CL_PROGRAM_BUILD_LOG, logSzb, log, &logSzb);
527 :     log[logSzb] = '\0';
528 :     fprintf (stderr, "error compiling program:\n%s\n", log);
529 :     free (log);
530 :     return false;
531 :     }
532 :    
533 :     // extract the kernels from the program
534 :     if ((! CreateKernel (dev->id, prog, "Diderot_UpdateKernel", &(wrld->kernel[0])))
535 :     || (! CreateKernel (dev->id, prog, "Diderot_CompactionKernel", &(wrld->kernel[1])))
536 :     || (! CreateKernel (dev->id, prog, "Diderot_SchedUpdateKernel", &(wrld->kernel[2]))))
537 :     return false;
538 :    
539 :     // create the command queue
540 :     cl_command_queue q = clCreateCommandQueue(cxt, dev->id, 0, &sts);
541 :     if (sts != CL_SUCCESS) {
542 :     fprintf (stderr, "error creating OpenCL command queue\n");
543 :     return false;
544 :     }
545 :    
546 :     // initialize world info
547 :     wrld->device = dev;
548 :     wrld->context = cxt;
549 :     wrld->cmdQ = q;
550 :    
551 :     return true;
552 :     }
553 :    
554 :     // this should be the part of the scheduler
555 :     void *Diderot_AllocStrand (Strand_t *strand)
556 :     {
557 :     return CheckedAlloc(strand->stateSzb);
558 :     }
559 :    
560 :     // block allocation of an initial collection of strands
561 :     Diderot_World_t *Diderot_AllocInitially (
562 :     const char *name, // the name of the program
563 :     Strand_t *strand, // the type of strands being allocated
564 :     bool isArray, // is the initialization an array or collection?
565 :     uint32_t nDims, // depth of iteration nesting
566 :     int32_t *base, // nDims array of base indices
567 :     uint32_t *size) // nDims array of iteration sizes
568 :     {
569 :     Diderot_World_t *wrld = NEW(Diderot_World_t);
570 :     if (wrld == 0) {
571 :     fprintf (stderr, "unable to allocate world\n");
572 :     exit (1);
573 :     }
574 :    
575 :     wrld->name = name; /* NOTE: we are assuming that name is statically allocated! */
576 :     wrld->isArray = isArray;
577 :     wrld->nDims = nDims;
578 :     wrld->base = NEWVEC(int32_t, nDims);
579 :     wrld->size = NEWVEC(uint32_t, nDims);
580 :     size_t numStrands = 1;
581 :     for (int i = 0; i < wrld->nDims; i++) {
582 :     numStrands *= size[i];
583 :     wrld->base[i] = base[i];
584 :     wrld->size[i] = size[i];
585 :     }
586 :    
587 :     if (VerboseFlg) {
588 :     printf("AllocInitially: %d", size[0]);
589 :     for (int i = 1; i < nDims; i++) printf(" x %d", size[i]);
590 :     printf("\n");
591 :     }
592 :    
593 :     // allocate the strand state pointers
594 :     wrld->numStrands = numStrands;
595 :     wrld->inState = CheckedAlloc (strand->stateSzb * numStrands);
596 :     wrld->outState = CheckedAlloc (strand->shadowStrandSzb * numStrands);
597 :     wrld->status = NEWVEC(StatusInt_t, numStrands);
598 :    
599 :     // initialize strand state pointers etc.
600 :     for (size_t i = 0; i < numStrands; i++) {
601 :     wrld->status[i] = DIDEROT_ACTIVE;
602 :     }
603 :    
604 :     return wrld;
605 :    
606 :     }
607 :    
608 :     // get strand state pointers
609 :     void *Diderot_InState (Diderot_World_t *wrld, uint32_t i)
610 :     {
611 :     assert (i < wrld->numStrands);
612 :     return wrld->inState + (i * Diderot_Strands[0]->stateSzb);
613 :     }
614 :    
615 :     void *Diderot_OutState (Diderot_World_t *wrld, uint32_t i)
616 :     {
617 :     assert (i < wrld->numStrands);
618 :     // FIXME: this function is called when outState points to the shadow state,
619 :     // but it is confusing to me -- JHR
620 :     return wrld->outState + (i * Diderot_Strands[0]->shadowStrandSzb);
621 :     }
622 :    
623 :     /***** Support for shadow image values *****/
624 :    
625 :     void ShadowImage1D (cl_context cxt, Shadow_image1D_t *dst, Diderot_image1D_t *src)
626 :     {
627 :     dst->size[0] = src->size[0];
628 :     dst->s = src->s;
629 :     dst->t = src->t;
630 :     }
631 :    
632 :     void ShadowImage2D (cl_context cxt, Shadow_image2D_t *dst, Diderot_image2D_t *src)
633 :     {
634 :     dst->size[0] = src->size[0];
635 :     dst->size[1] = src->size[1];
636 :     ShadowMat2x2 (dst->w2i, src->w2i);
637 :     ShadowVec2 (&dst->tVec, src->tVec);
638 :     ShadowMat2x2 (dst->w2iT, src->w2iT);
639 :     }
640 :    
641 :     void ShadowImage3D (cl_context cxt, Shadow_image3D_t *dst, Diderot_image3D_t *src)
642 :     {
643 :     dst->size[0] = src->size[0];
644 :     dst->size[1] = src->size[1];
645 :     dst->size[2] = src->size[2];
646 :     ShadowMat3x3 (dst->w2i, src->w2i);
647 :     ShadowVec3 (&dst->tVec, src->tVec);
648 :     ShadowMat3x3 (dst->w2iT, src->w2iT);
649 :     }

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0