Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /branches/vis12/src/lib/cl-target/main.c
ViewVC logotype

Annotation of /branches/vis12/src/lib/cl-target/main.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1685 - (view) (download) (as text)

1 : jhr 1671 /*! \file main.c
2 :     *
3 :     * \author John Reppy
4 :     */
5 :    
6 :     /*
7 :     * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
8 :     * All rights reserved.
9 :     */
10 :    
11 :     #include <Diderot/diderot.h>
12 :     #include "clinfo.h"
13 :     #include <sys/sysctl.h>
14 :     #include <sys/stat.h>
15 :    
16 :     typedef struct {
17 :     cl_int blkIdx; //!< the id of this block
18 :     cl_int nActive; //!< number of active (status != DIE or STABLE) strands
19 :     cl_int nDead; //!< number of strands in the DIE state
20 :     cl_int nStabilizing; //!< number of new strands in the STABILIZE state
21 :     cl_int nDying; //!< number of new strands in the DIE state
22 :     } StrandBlock_t;
23 :    
24 :     typedef struct {
25 :     cl_int numStrands; //!< number of strands
26 :     cl_int sId; //!< the index into the todo list or queue
27 :     cl_int nextStrand; //!< index of the next strand to retrieve from the pool
28 :     cl_int clearQueueSz; //!< an indicator on whether the queue size should be cleared
29 :     cl_int queueSize; //!< number of blocks on the scheduler's queue
30 :     cl_int todoSize; //!< number of blocks on the scheduler's todo list
31 :     cl_int numAvailable; //!< number of active strands left to process
32 :     } SchedState_t;
33 :    
34 :     typedef struct {
35 :     cl_kernel kern; //!< OpenCL kernel that implements the program
36 :     size_t workGrpSize; //!< size of workgroup for this kernel
37 :     cl_ulong localSzb; //!< size of local memory used by kernel
38 :     } GPUKernel_t;
39 :    
40 :     struct struct_world {
41 :     STRUCT_WORLD_PREFIX
42 :     // FIXME: document these fields!
43 :     void *inState;
44 :     void *outState;
45 :     DeviceInfo_t *device; //!< info about OpenCL device that we are using.
46 :     uint32_t nWorkers; //!< number of work groups to create
47 :     cl_context context; //!< OpenCL execution context
48 :     cl_command_queue cmdQ; //!< OpenCL command queue
49 :     GPUKernel_t kernel[3]; //!< OpenCL kernel that implements the program
50 :     };
51 :    
52 :     // FIXME: need documentation for this stuct!
53 :     typedef struct {
54 :     cl_mem schedMem;
55 :     cl_mem outMem;
56 :     cl_mem inMem;
57 :     cl_mem blocksMem;
58 :     cl_mem strandBlocksIdxsMem;
59 :     cl_mem queueMem;
60 :     cl_mem todoMem;
61 :     cl_mem statusMem;
62 :     } KernelArgs_t;
63 :    
64 :     static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld);
65 :     static void CheckErrorCode (cl_int sts, const char * msg);
66 :     static void SetPhase1Args (cl_kernel kernel, int *argCount, KernelArgs_t *args);
67 :     static void SetPhase2Args (cl_kernel kernel, int *argCount, KernelArgs_t *args, int blk_sz);
68 :     static void SetScheduleKernelArgs (cl_kernel kernel, int *argCount, KernelArgs_t *args);
69 :    
70 :     static bool CreateKernel (cl_device_id dev, cl_program prog, const char *name, GPUKernel_t *kern);
71 :    
72 :     extern void Diderot_LoadGlobals (cl_context cxt, cl_kernel kernel, cl_command_queue cmdQ, int argStart);
73 :    
74 :     int main (int argc, const char **argv)
75 :     {
76 :     // get information about OpenCL support
77 :     CLInfo_t *clInfo = GetCLInfo();
78 :     if (clInfo == 0) {
79 :     fprintf (stderr, "no OpenCL support\n");
80 :     exit (1);
81 :     }
82 :    
83 :     // run the generated global initialization code
84 :     if (VerboseFlg)
85 :     fprintf (stderr, "initializing globals ...\n");
86 :     Diderot_InitGlobals ();
87 :    
88 :     Diderot_int_t nWorkersPerCU = 4;
89 :     Diderot_Options_t *opts = Diderot_OptNew ();
90 :     Diderot_OptAddInt (opts, "np", "specify number of workers/CU", &nWorkersPerCU, true);
91 :     Diderot_RegisterGlobalOpts (opts);
92 :     Diderot_OptProcess (opts, argc, argv);
93 :     Diderot_OptFree (opts);
94 :    
95 :     if (VerboseFlg)
96 :     PrintCLInfo (stdout, clInfo);
97 :    
98 :     Diderot_World_t *wrld = Diderot_Initially (); // this may not be right for OpenCL
99 :    
100 :     if (! InitCL(clInfo, wrld))
101 :     exit (1);
102 :    
103 :     // set the number of work groups
104 :     if ((0 < nWorkersPerCU) && (nWorkersPerCU < wrld->device->maxWISize[0] / wrld->device->cuWidth))
105 :     wrld->nWorkers = nWorkersPerCU * wrld->device->numCUs;
106 :     else
107 :     wrld->nWorkers = wrld->device->numCUs; // fallback to one worker per CU
108 :     if (VerboseFlg)
109 :     fprintf (stderr, "using %d x %d threads\n",
110 :     wrld->nWorkers, wrld->device->cuWidth);
111 :    
112 :     // Conversion of strands from their host types to their shadow types
113 :     void *shadowInState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands);
114 :     void *shadowOutState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands);
115 :     uint8_t *strandPtr = (uint8_t *)wrld->inState;
116 :     uint8_t *strandShadowPtr = (uint8_t *)shadowInState;
117 :     uint8_t *strandShadowOutPtr = (uint8_t *)shadowOutState;
118 :     size_t shadowSize = Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands;
119 :    
120 :     for (int i = 0; i < wrld->numStrands; i++) {
121 :     Diderot_Strands[0]->strandCopy (strandPtr, strandShadowPtr);
122 :     Diderot_Strands[0]->strandCopy (strandPtr, strandShadowOutPtr);
123 :     strandPtr += Diderot_Strands[0]->stateSzb;
124 :     strandShadowPtr += Diderot_Strands[0]->shadowStrandSzb;
125 :     strandShadowOutPtr += Diderot_Strands[0]->shadowStrandSzb;
126 :     }
127 :     // FIXME: it is confusing to use the inState/outState pointers for two different purposes.
128 :     free (wrld->inState);
129 :     free (wrld->outState);
130 :     wrld->inState = shadowInState;
131 :     wrld->outState = shadowOutState;
132 :    
133 :     // hack to make the invariant part of the state the same in both copies
134 :     memcpy (wrld->outState, wrld->inState, shadowSize);
135 :    
136 :     int argCount = 0;
137 :     cl_int sts = CL_SUCCESS;
138 :    
139 :     KernelArgs_t kernelArgs;
140 :     SchedState_t scheduler;
141 :    
142 :     scheduler.numStrands = wrld->numStrands;
143 :     scheduler.nextStrand = 0;
144 :     scheduler.todoSize = 0;
145 :     scheduler.sId = 0;
146 :     scheduler.clearQueueSz= 1;
147 :     scheduler.numAvailable = wrld->numStrands;
148 :    
149 :     size_t globalWorkSize[1] = {0};
150 :     size_t localWorkSize[1] = {0};
151 :     globalWorkSize[0] = wrld->nWorkers * wrld->device->cuWidth;
152 :     localWorkSize[0] = wrld->device->cuWidth;
153 :    
154 :     // compute the number of scheduler blocks
155 :     int numberOfBlocks = (wrld->numStrands + wrld->device->cuWidth - 1) / wrld->device->cuWidth;
156 :     assert (numberOfBlocks * wrld->device->cuWidth >= wrld->numStrands);
157 :    
158 :     size_t strandBlkMemSize = sizeof(int) * numberOfBlocks * wrld->device->cuWidth;
159 :     size_t schedListMemSize = sizeof(int) * numberOfBlocks;
160 :     int *schedulerQueue = (int *)CheckedAlloc(schedListMemSize);
161 :     int *schedulerTodoList = (int *)CheckedAlloc(schedListMemSize);
162 :     int *strandBlocksIdxs = (int *)CheckedAlloc(strandBlkMemSize);
163 :     scheduler.queueSize = numberOfBlocks;
164 :    
165 :     size_t strandblkMemSize = sizeof(StrandBlock_t) * numberOfBlocks;
166 :     StrandBlock_t *strandBlks = (StrandBlock_t *)CheckedAlloc(strandblkMemSize);
167 :    
168 :     for (int i = 0; i < numberOfBlocks; i++) {
169 :     schedulerQueue[i] = i;
170 :     strandBlks[i].blkIdx = i;
171 :     strandBlks[i].nActive = 0;
172 :     strandBlks[i].nDead = 0;
173 :     strandBlks[i].nStabilizing = 0;
174 :     strandBlks[i].nDying = 0;
175 :     }
176 :    
177 :     kernelArgs.schedMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
178 :     sizeof(SchedState_t), &scheduler, &sts);
179 :     CheckErrorCode (sts, "error creating OpenCL scheduler buffer\n");
180 :    
181 :     kernelArgs.blocksMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
182 :     strandblkMemSize, strandBlks , &sts);
183 :     CheckErrorCode (sts, "error creating OpenCL strand blocks buffer\n");
184 :    
185 :     kernelArgs.strandBlocksIdxsMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
186 :     strandBlkMemSize, strandBlocksIdxs , &sts);
187 :     CheckErrorCode (sts, "error creating OpenCL strand blocks indices buffer\n");
188 :    
189 :     kernelArgs.queueMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
190 :     schedListMemSize, schedulerQueue , &sts);
191 :     CheckErrorCode (sts, "error creating OpenCL scheduler queue buffer\n");
192 :    
193 :     kernelArgs.todoMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
194 :     schedListMemSize, schedulerTodoList , &sts);
195 :     CheckErrorCode (sts, "error creating OpenCL scheduler todo buffer\n");
196 :    
197 :     kernelArgs.inMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
198 :     shadowSize, wrld->inState, &sts);
199 :     CheckErrorCode (sts, "error creating OpenCL strand in-state buffer\n");
200 :    
201 :     kernelArgs.outMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
202 :     shadowSize, wrld->outState, &sts);
203 :     CheckErrorCode (sts, "error creating OpenCL strand out-state buffer\n");
204 :    
205 :     kernelArgs.statusMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
206 :     sizeof(int) * wrld->numStrands, wrld->status, &sts);
207 :     CheckErrorCode (sts, "error creating OpenCL world status buffer\n");
208 :    
209 :     //Setup the Update Kernel's arguments
210 :     SetPhase1Args (wrld->kernel[0].kern, &argCount, &kernelArgs);
211 :     Diderot_LoadGlobals (wrld->context, wrld->kernel[0].kern, wrld->cmdQ, argCount);
212 :    
213 :     //Setup the Compaction Kernel's arguments
214 :     argCount=0;
215 :     SetPhase2Args (wrld->kernel[1].kern, &argCount, &kernelArgs, wrld->device->cuWidth);
216 :    
217 :     //Setup the Scheduler Kernel's arguments
218 :     argCount = 0;
219 :     SetScheduleKernelArgs (wrld->kernel[2].kern, &argCount, &kernelArgs);
220 :    
221 :     double t0 = airTime();
222 :    
223 :     while (scheduler.numAvailable > 0) {
224 :     // Runs the update kernel on all strands
225 :     sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel[0].kern, 1, NULL,
226 :     globalWorkSize, localWorkSize, 0, NULL, NULL);
227 :     CheckErrorCode (sts, "error in executing update kernel\n");
228 :    
229 :     sts = clEnqueueTask(wrld->cmdQ,wrld->kernel[2].kern, 0, NULL, NULL);
230 :     CheckErrorCode (sts, "error in executing scheduler update kernel before compaction\n");
231 :    
232 :     // Run the compaction kernel on all strands
233 :     sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel[1].kern, 1, NULL,
234 :     globalWorkSize, localWorkSize, 0, NULL, NULL);
235 :     CheckErrorCode (sts, "error ccccn executing compaction kernel\n");
236 :    
237 :     sts = clEnqueueTask(wrld->cmdQ,wrld->kernel[2].kern,0,NULL,NULL);
238 :     CheckErrorCode (sts, "error in executing scheduler update kernel after compaction\n");
239 :    
240 :     sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.schedMem, CL_TRUE, 0, sizeof(SchedState_t),
241 :     &scheduler, 0, NULL, NULL);
242 :     CheckErrorCode (sts, "error reading back scheduler information\n");
243 :     }
244 :    
245 :     sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.outMem, CL_TRUE, 0,shadowSize,
246 :     wrld->outState, 0, NULL, NULL);
247 :     CheckErrorCode (sts, "error in reading back output\n");
248 :    
249 :     sts = clEnqueueReadBuffer(
250 :     wrld->cmdQ, kernelArgs.statusMem, CL_TRUE, 0, sizeof(int) * wrld->numStrands,
251 :     wrld->status, 0, NULL, NULL);
252 :     CheckErrorCode (sts, "error in reading back status\n");
253 :    
254 :     wrld->outputSzb = Diderot_Strands[0]->shadowStrandSzb;
255 :     clFinish (wrld->cmdQ);
256 :     clReleaseKernel(wrld->kernel[0].kern);
257 :     clReleaseKernel(wrld->kernel[1].kern);
258 :     clReleaseCommandQueue(wrld->cmdQ);
259 :     clReleaseContext(wrld->context);
260 :    
261 :     double totalTime = airTime() - t0;
262 :    
263 :     if (VerboseFlg)
264 :     fprintf (stderr, "done in %f seconds\n", totalTime);
265 :     else if (TimingFlg)
266 :     printf ("usr=%f\n", totalTime);
267 :    
268 :     // here we have the final state of all of the strands in the "in" buffer
269 :     // output the final strand states
270 :     if (NrrdOutputFlg)
271 :     Diderot_Output (wrld, Diderot_Strands[0]->shadowStrandSzb);
272 :     else
273 :     Diderot_Print (wrld);
274 :    
275 :     Diderot_Shutdown (wrld);
276 :    
277 :     return 0;
278 :    
279 :     }
280 :    
281 :     static void SetPhase1Args (cl_kernel kernel, int *argCount, KernelArgs_t *args)
282 :     {
283 :     int count = *argCount;
284 :     cl_int sts = CL_SUCCESS;
285 :    
286 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->inMem);
287 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand in-state argument\n");
288 :    
289 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->outMem);
290 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand out-state argument\n");
291 :    
292 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->statusMem);
293 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL world status argument\n");
294 :    
295 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem);
296 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler argument\n");
297 :    
298 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->blocksMem);
299 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand blocks argument\n");
300 :    
301 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->strandBlocksIdxsMem);
302 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand blocks' indices argument\n");
303 :    
304 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->queueMem);
305 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler queue argument\n");
306 :    
307 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->todoMem);
308 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler todo argument\n");
309 :    
310 :     sts = clSetKernelArg (kernel, count++, sizeof(StrandBlock_t), NULL);
311 :     CheckErrorCode (sts, "Update Kernel: error Setting OpenCL local strand blockargument\n");
312 :    
313 :     *argCount = count;
314 :    
315 :     }
316 :    
317 :     static void SetPhase2Args (cl_kernel kernel, int *argCount, KernelArgs_t *args, int blk_sz)
318 :     {
319 :     int count = *argCount;
320 :     cl_int sts = CL_SUCCESS;
321 :    
322 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->statusMem);
323 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL world status argument\n");
324 :    
325 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem);
326 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler argument\n");
327 :    
328 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->blocksMem);
329 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL strand blocks argument\n");
330 :    
331 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->strandBlocksIdxsMem);
332 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL strand blocks' indices argument\n");
333 :    
334 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->queueMem);
335 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler queue argument\n");
336 :    
337 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->todoMem);
338 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler todo argument\n");
339 :    
340 :     sts = clSetKernelArg (kernel, count++, sizeof(StrandBlock_t), NULL);
341 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local strand blockargument\n");
342 :    
343 :     sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz, NULL);
344 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local preStable argument\n");
345 :    
346 :     sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz, NULL);
347 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local preDead argument\n");
348 :    
349 :     sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz * 2, NULL);
350 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local temporary array for the prefix scan of preStable preDead argument\n");
351 :    
352 :    
353 :     *argCount = count;
354 :     }
355 :    
356 :     static void SetScheduleKernelArgs(cl_kernel kernel, int *argCount, KernelArgs_t *args)
357 :     {
358 :     int count = *argCount;
359 :     cl_int sts = CL_SUCCESS;
360 :    
361 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem);
362 :     CheckErrorCode (sts, "Scheduler Kernel: error setting OpenCL scheduler argument\n");
363 :    
364 :     *argCount = count;
365 :     }
366 :    
367 :     static void CheckErrorCode (cl_int sts, const char *msg)
368 :     {
369 :     if (sts != CL_SUCCESS) {
370 :     fprintf (stderr, "%s", msg);
371 :     exit(1);
372 :     }
373 :     }
374 :    
375 :     /*! \brief load OpenCL code from a file
376 :     */
377 :     static char *LoadSource (const char *filename)
378 :     {
379 :     struct stat statbuf;
380 :     if (stat(filename, &statbuf) < 0) {
381 :     fprintf (stderr, "unable to stat OpenCL source file %s\n", filename);
382 :     exit (1);
383 :     }
384 :    
385 :     char *source = (char *)CheckedAlloc(statbuf.st_size + 1);
386 :     if (source == 0) {
387 :     fprintf (stderr, "unable to allocate memory for OpenCL source\n");
388 :     exit (1);
389 :     }
390 :    
391 :     FILE *fh = fopen(filename, "r");
392 :     if ((fh == 0)
393 :     || (fread(source, statbuf.st_size, 1, fh) != 1)) {
394 :     fprintf (stderr, "unable to read OpenCL source from %s\n", filename);
395 :     exit (1);
396 :     }
397 :     source[statbuf.st_size] = '\0';
398 :     fclose (fh);
399 :    
400 :     return source;
401 :     }
402 :    
403 :     //! create a kernel object from a program
404 :     static bool CreateKernel (cl_device_id dev, cl_program prog, const char *name, GPUKernel_t *kern)
405 :     {
406 :     cl_int sts;
407 :    
408 :     kern->kern = clCreateKernel(prog, name, &sts);
409 :     if (sts != CL_SUCCESS) {
410 :     fprintf (stderr, "error getting %s from program\n", name);
411 :     return false;
412 :     }
413 :     sts = clGetKernelWorkGroupInfo (
414 :     kern->kern, dev, CL_KERNEL_WORK_GROUP_SIZE,
415 :     sizeof(size_t), &(kern->workGrpSize), 0);
416 :     if (sts != CL_SUCCESS) {
417 :     fprintf (stderr, "error getting workgroup size for %s\n", name);
418 :     return false;
419 :     }
420 :     sts = clGetKernelWorkGroupInfo (
421 :     kern->kern, dev, CL_KERNEL_LOCAL_MEM_SIZE,
422 :     sizeof(cl_ulong), &(kern->localSzb), 0);
423 :     if (sts != CL_SUCCESS) {
424 :     fprintf (stderr, "error getting local memory size for %s\n", name);
425 :     return false;
426 :     }
427 :    
428 :     if (VerboseFlg) {
429 :     fprintf(stderr, "kernel %s: workgroup size = %d, local memory = %d bytes\n",
430 :     name, (int)kern->workGrpSize, (int)kern->localSzb);
431 :     }
432 :    
433 :     return true;
434 :     }
435 :    
436 :     static void LogMessagesToStderr (const char *errstr, const void *private_info, size_t cb, void *user_data)
437 :     {
438 :     fprintf(stderr, "***** error log *****\n");
439 :     fprintf(stderr, "%s\n", errstr);
440 :     fprintf(stderr, "***** end error log *****\n");
441 :     }
442 :    
443 :     /*! \brief initialize the OpenCL execution context, including loading and compiling the OpenCL
444 :     * program.
445 :     * \param clInfo points to the summary information about the available OpenCL devices.
446 :     * \param wrld the Diderot execution information.
447 :     * \return true if initialization is successful, otherwise false.
448 :     */
449 :     static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld)
450 :     {
451 :     cl_int sts;
452 :     int pltIx = 0; // main patform index
453 :    
454 :     // find a GPU on platform[0]
455 :     DeviceInfo_t *dev = 0;
456 :     int i;
457 :     for (i = 0; i < clInfo->platforms[pltIx].numDevices; i++) {
458 :     if (isGPUDevice (&(clInfo->platforms[pltIx].devices[i]))
459 :     && clInfo->platforms[pltIx].devices[i].isAvail) {
460 :     dev = &(clInfo->platforms[pltIx].devices[i]);
461 :     break;
462 :     }
463 :     }
464 :    
465 :     if (dev == 0) {
466 :     fprintf (stderr, "unable to find GPU device\n");
467 :     return false;
468 :     }
469 :    
470 :     if (VerboseFlg) {
471 :     fprintf (stderr, "using platform %d, device %d: %s\n",
472 :     pltIx, i, clInfo->platforms[0].devices[i].name);
473 :     }
474 :    
475 :     // create the context
476 :     cl_context cxt = clCreateContext(0, 1, &(dev->id), LogMessagesToStderr, 0, &sts);
477 :     if (sts != CL_SUCCESS) {
478 :     fprintf (stderr, "error creating OpenCL context\n");
479 :     return false;
480 :     }
481 :    
482 :     // create the program from the source
483 :     int fnameLen = strlen(wrld->name) + 4; // name + ".cl\0"
484 :     char *fname = (char *)CheckedAlloc(fnameLen);
485 :     snprintf(fname, fnameLen, "%s.cl", wrld->name);
486 :     char *updateSource = LoadSource (fname);
487 :     free (fname);
488 :    
489 :     const char *src[1] = {updateSource};
490 :     cl_program prog = clCreateProgramWithSource(cxt, 1, src, NULL, &sts);
491 :     free (updateSource);
492 :     if (sts != CL_SUCCESS) {
493 :     fprintf (stderr, "error creating program\n");
494 :     return false;
495 :     }
496 :    
497 :     // build the program
498 :     char options[1024];
499 :     snprintf (options, sizeof(options),
500 :     "-D DIDEROT_CL_VERSION=%d -D DIDEROT_CU_WIDTH=%d -I %s -w",
501 :     100*dev->majorVersion + dev->minorVersion,
502 :     dev->cuWidth,
503 :     DIDEROT_INCLUDE_PATH);
504 :     if (VerboseFlg) {
505 :     fprintf (stderr, "clBuildProgram options: %s\n", options);
506 :     }
507 :     sts = clBuildProgram (prog, 1, &(dev->id), options, 0, 0);
508 :     if (sts != CL_SUCCESS) {
509 :     size_t logSzb;
510 :     clGetProgramBuildInfo (prog, dev->id, CL_PROGRAM_BUILD_LOG, 0, 0, &logSzb);
511 :     char *log = CheckedAlloc(logSzb+1);
512 :     clGetProgramBuildInfo (prog, dev->id, CL_PROGRAM_BUILD_LOG, logSzb, log, &logSzb);
513 :     log[logSzb] = '\0';
514 :     fprintf (stderr, "error compiling program:\n%s\n", log);
515 :     free (log);
516 :     return false;
517 :     }
518 :    
519 :     // extract the kernels from the program
520 :     if ((! CreateKernel (dev->id, prog, "Diderot_UpdateKernel", &(wrld->kernel[0])))
521 :     || (! CreateKernel (dev->id, prog, "Diderot_CompactionKernel", &(wrld->kernel[1])))
522 :     || (! CreateKernel (dev->id, prog, "Diderot_SchedUpdateKernel", &(wrld->kernel[2]))))
523 :     return false;
524 :    
525 :     // create the command queue
526 :     cl_command_queue q = clCreateCommandQueue(cxt, dev->id, 0, &sts);
527 :     if (sts != CL_SUCCESS) {
528 :     fprintf (stderr, "error creating OpenCL command queue\n");
529 :     return false;
530 :     }
531 :    
532 :     // initialize world info
533 :     wrld->device = dev;
534 :     wrld->context = cxt;
535 :     wrld->cmdQ = q;
536 :    
537 :     return true;
538 :     }
539 :    
540 :     // this should be the part of the scheduler
541 :     void *Diderot_AllocStrand (Strand_t *strand)
542 :     {
543 :     return CheckedAlloc(strand->stateSzb);
544 :     }
545 :    
546 :     // block allocation of an initial collection of strands
547 :     Diderot_World_t *Diderot_AllocInitially (
548 :     const char *name, // the name of the program
549 :     Strand_t *strand, // the type of strands being allocated
550 :     bool isArray, // is the initialization an array or collection?
551 :     uint32_t nDims, // depth of iteration nesting
552 :     int32_t *base, // nDims array of base indices
553 :     uint32_t *size) // nDims array of iteration sizes
554 :     {
555 :     Diderot_World_t *wrld = NEW(Diderot_World_t);
556 :     if (wrld == 0) {
557 :     fprintf (stderr, "unable to allocate world\n");
558 :     exit (1);
559 :     }
560 :    
561 :     wrld->name = name; /* NOTE: we are assuming that name is statically allocated! */
562 :     wrld->isArray = isArray;
563 :     wrld->nDims = nDims;
564 :     wrld->base = NEWVEC(int32_t, nDims);
565 :     wrld->size = NEWVEC(uint32_t, nDims);
566 :     size_t numStrands = 1;
567 :     for (int i = 0; i < wrld->nDims; i++) {
568 :     numStrands *= size[i];
569 :     wrld->base[i] = base[i];
570 :     wrld->size[i] = size[i];
571 :     }
572 :    
573 :     if (VerboseFlg) {
574 :     printf("AllocInitially: %d", size[0]);
575 :     for (int i = 1; i < nDims; i++) printf(" x %d", size[i]);
576 :     printf("\n");
577 :     }
578 :    
579 :     // allocate the strand state pointers
580 :     wrld->numStrands = numStrands;
581 :     wrld->inState = CheckedAlloc (strand->stateSzb * numStrands);
582 :     wrld->outState = CheckedAlloc (strand->shadowStrandSzb * numStrands);
583 :     wrld->status = NEWVEC(StatusInt_t, numStrands);
584 :    
585 :     // initialize strand state pointers etc.
586 :     for (size_t i = 0; i < numStrands; i++) {
587 :     wrld->status[i] = DIDEROT_ACTIVE;
588 :     }
589 :    
590 :     return wrld;
591 :    
592 :     }
593 :    
594 :     // get strand state pointers
595 :     void *Diderot_InState (Diderot_World_t *wrld, uint32_t i)
596 :     {
597 :     assert (i < wrld->numStrands);
598 :     return wrld->inState + (i * Diderot_Strands[0]->stateSzb);
599 :     }
600 :    
601 :     void *Diderot_OutState (Diderot_World_t *wrld, uint32_t i)
602 :     {
603 :     assert (i < wrld->numStrands);
604 :     // FIXME: this function is called when outState points to the shadow state,
605 :     // but it is confusing to me -- JHR
606 :     return wrld->outState + (i * Diderot_Strands[0]->shadowStrandSzb);
607 :     }
608 :    
609 :     /***** Support for shadow image values *****/
610 :    
611 :     void ShadowImage1D (cl_context cxt, Shadow_image1D_t *dst, Diderot_image1D_t *src)
612 :     {
613 :     dst->size[0] = src->size[0];
614 :     dst->s = src->s;
615 :     dst->t = src->t;
616 :     }
617 :    
618 :     void ShadowImage2D (cl_context cxt, Shadow_image2D_t *dst, Diderot_image2D_t *src)
619 :     {
620 :     dst->size[0] = src->size[0];
621 :     dst->size[1] = src->size[1];
622 :     ShadowMat2x2 (dst->w2i, src->w2i);
623 :     ShadowVec2 (&dst->tVec, src->tVec);
624 :     ShadowMat2x2 (dst->w2iT, src->w2iT);
625 :     }
626 :    
627 :     void ShadowImage3D (cl_context cxt, Shadow_image3D_t *dst, Diderot_image3D_t *src)
628 :     {
629 :     dst->size[0] = src->size[0];
630 :     dst->size[1] = src->size[1];
631 :     dst->size[2] = src->size[2];
632 :     ShadowMat3x3 (dst->w2i, src->w2i);
633 :     ShadowVec3 (&dst->tVec, src->tVec);
634 :     ShadowMat3x3 (dst->w2iT, src->w2iT);
635 :     }

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0