Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /branches/pure-cfg/src/lib/cl-target/main.c
ViewVC logotype

Annotation of /branches/pure-cfg/src/lib/cl-target/main.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1642 - (view) (download) (as text)

1 : jhr 1267 /*! \file main.c
2 :     *
3 :     * \author John Reppy
4 :     */
5 :    
6 :     /*
7 :     * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
8 :     * All rights reserved.
9 :     */
10 :    
11 : jhr 1275 #include <Diderot/diderot.h>
12 : jhr 1411 #include "clinfo.h"
13 : lamonts 1271 #include <sys/sysctl.h>
14 :     #include <sys/stat.h>
15 : jhr 1267
16 : lamonts 1488 typedef struct {
17 : jhr 1492 cl_int blkIdx; //!< the id of this block
18 :     cl_int nActive; //!< number of active (status != DIE or STABLE) strands
19 :     cl_int nDead; //!< number of strands in the DIE state
20 :     cl_int nStabilizing; //!< number of new strands in the STABILIZE state
21 :     cl_int nDying; //!< number of new strands in the DIE state
22 : lamonts 1488 } StrandBlock_t;
23 :    
24 :     typedef struct {
25 : jhr 1492 cl_int numStrands; //!< number of strands
26 :     cl_int sId; //!< the index into the todo list or queue
27 : lamonts 1501 cl_int nextStrand; //!< index of the next strand to retrieve from the pool
28 :     cl_int clearQueueSz; //!< an indicator on whether the queue size should be cleared
29 : jhr 1492 cl_int queueSize; //!< number of blocks on the scheduler's queue
30 :     cl_int todoSize; //!< number of blocks on the scheduler's todo list
31 : jhr 1519 cl_int numAvailable; //!< number of active strands left to process
32 : lamonts 1488 } SchedState_t;
33 :    
34 :    
35 : jhr 1267 // NOTE: we probably should put this in a file that supports runtime printing
36 :     static bool VerboseFlg = false;
37 : jhr 1287 static bool TimingFlg = false;
38 : lamonts 1513 static bool NrrdOutputFlg = false;
39 : lamonts 1470 #define WARP_SIZE 32
40 : lamonts 1459
41 : jhr 1473 typedef struct {
42 :     cl_kernel kern; //!< OpenCL kernel that implements the program
43 :     size_t workGrpSize; //!< size of workgroup for this kernel
44 :     cl_ulong localSzb; //!< size of local memory used by kernel
45 :     } GPUKernel_t;
46 :    
47 : jhr 1267 struct struct_world {
48 : jhr 1519 STRUCT_WORLD_PREFIX
49 :     // FIXME: document these fields!
50 : lamonts 1341 void *inState;
51 :     void *outState;
52 : jhr 1471 cl_device_id device; //!< OpenCL device
53 :     cl_context context; //!< OpenCL execution context
54 :     cl_command_queue cmdQ; //!< OpenCL command queue
55 : lamonts 1500 GPUKernel_t kernel[3]; //!< OpenCL kernel that implements the program
56 : jhr 1267 };
57 : jhr 1471
58 : jhr 1519 // FIXME: need documentation for this stuct!
59 : lamonts 1459 typedef struct {
60 : jhr 1519 cl_mem schedMem;
61 :     cl_mem outMem;
62 :     cl_mem inMem;
63 :     cl_mem blocksMem;
64 :     cl_mem strandBlocksIdxsMem;
65 :     cl_mem queueMem;
66 :     cl_mem todoMem;
67 :     cl_mem statusMem;
68 :     } KernelArgs_t;
69 : jhr 1267
70 : jhr 1291 static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld);
71 : jhr 1473 static void CheckErrorCode (cl_int sts, const char * msg);
72 : jhr 1519 static void SetPhase1Args (cl_kernel kernel, int *argCount, KernelArgs_t *args);
73 :     static void SetPhase2Args (cl_kernel kernel, int *argCount, KernelArgs_t *args, int blk_sz);
74 :     static void SetScheduleKernelArgs (cl_kernel kernel, int *argCount, KernelArgs_t *args);
75 : lamonts 1500
76 : jhr 1473 static bool CreateKernel (cl_device_id dev, cl_program prog, const char *name, GPUKernel_t *kern);
77 :    
78 : jhr 1358 extern void Diderot_LoadGlobals (cl_context cxt, cl_kernel kernel, cl_command_queue cmdQ, int argStart);
79 :    
80 : jhr 1267 int main (int argc, const char **argv)
81 :     {
82 :     // get information about OpenCL support
83 :     CLInfo_t *clInfo = GetCLInfo();
84 :     if (clInfo == 0) {
85 : jhr 1327 fprintf (stderr, "no OpenCL support\n");
86 :     exit (1);
87 : jhr 1267 }
88 :    
89 : lamonts 1617
90 :     if (VerboseFlg)
91 :     PrintCLInfo (stdout, clInfo);
92 :    
93 :     // run the generated global initialization code
94 :     if (VerboseFlg)
95 :     fprintf (stderr, "initializing globals ...\n");
96 :     Diderot_InitGlobals ();
97 :    
98 :    
99 : jhr 1267 Diderot_Options_t *opts = Diderot_OptNew ();
100 :    
101 :     Diderot_OptAddFlag (opts, "verbose", "enable runtime-system messages", &VerboseFlg);
102 : jhr 1411 Diderot_OptAddFlag (opts, "timing", "enable execution timing", &TimingFlg);
103 : lamonts 1513 Diderot_OptAddFlag (opts, "nrrd", "enable nrrd output", &NrrdOutputFlg);
104 : jhr 1267 Diderot_RegisterGlobalOpts (opts);
105 :     Diderot_OptProcess (opts, argc, argv);
106 :     Diderot_OptFree (opts);
107 :    
108 : jhr 1291 Diderot_World_t *wrld = Diderot_Initially (); // this may not be right for OpenCL
109 : lamonts 1459
110 : jhr 1291 if (! InitCL(clInfo, wrld))
111 :     exit (1);
112 : jhr 1282
113 : lamonts 1488 // Conversion of strands from their host types to their shadow types
114 : jhr 1472 void *shadowInState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands);
115 : lamonts 1488 void *shadowOutState = CheckedAlloc(Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands);
116 : lamonts 1462 uint8_t *strandPtr = (uint8_t *)wrld->inState;
117 :     uint8_t *strandShadowPtr = (uint8_t *)shadowInState;
118 : lamonts 1488 uint8_t *strandShadowOutPtr = (uint8_t *)shadowInState;
119 : lamonts 1462 size_t shadowSize = Diderot_Strands[0]->shadowStrandSzb * wrld->numStrands;
120 :    
121 : jhr 1469 for (int i = 0; i < wrld->numStrands; i++) {
122 :     Diderot_Strands[0]->strandCopy (strandPtr, strandShadowPtr);
123 : lamonts 1488 Diderot_Strands[0]->strandCopy (strandPtr, strandShadowOutPtr);
124 : jhr 1469 strandPtr += Diderot_Strands[0]->stateSzb;
125 :     strandShadowPtr += Diderot_Strands[0]->shadowStrandSzb;
126 : lamonts 1488 strandShadowOutPtr += Diderot_Strands[0]->shadowStrandSzb;
127 : lamonts 1462 }
128 : jhr 1519 // FIXME: it is confusing to use the inState/outState pointers for two different purposes.
129 : jhr 1471 free (wrld->inState);
130 : jhr 1519 free (wrld->outState);
131 : lamonts 1462 wrld->inState = shadowInState;
132 : lamonts 1488 wrld->outState = shadowOutState;
133 : lamonts 1462
134 : jhr 1469 // hack to make the invariant part of the state the same in both copies
135 : lamonts 1462 memcpy (wrld->outState, wrld->inState, shadowSize);
136 :    
137 : jhr 1327 int argCount = 0;
138 :     cl_int sts = CL_SUCCESS;
139 : lamonts 1316
140 : jhr 1519 KernelArgs_t kernelArgs;
141 : lamonts 1488 SchedState_t scheduler;
142 : lamonts 1346
143 : lamonts 1488 scheduler.numStrands = wrld->numStrands;
144 : lamonts 1500 scheduler.nextStrand = 0;
145 : lamonts 1488 scheduler.todoSize = 0;
146 :     scheduler.sId = 0;
147 : lamonts 1501 scheduler.clearQueueSz= 1;
148 : lamonts 1500 scheduler.numAvailable = wrld->numStrands;
149 : lamonts 1488
150 : lamonts 1465 size_t globalWorkSize[1] = {0};
151 : lamonts 1488 size_t localWorkSize[1] = {0};
152 : jhr 1492 int dIdx = clInfo->mainDeviceIdx;
153 : lamonts 1488 int pIdx = clInfo->mainPlatformIdx;
154 :     globalWorkSize[0] = clInfo->platforms[pIdx].devices[dIdx].numCUs * clInfo->platforms[pIdx].devices[dIdx].cuWidth;
155 :     localWorkSize[0] = clInfo->platforms[pIdx].devices[dIdx].cuWidth;
156 : lamonts 1502 int numberOfBlocks = ceil((double)wrld->numStrands/localWorkSize[0]);
157 : lamonts 1488
158 :     size_t strandBlkMemSize = sizeof(int) * numberOfBlocks * clInfo->platforms[pIdx].devices[dIdx].cuWidth;;
159 :     size_t schedListMemSize = sizeof(int) * numberOfBlocks;
160 : jhr 1492 int *schedulerQueue = (int *)CheckedAlloc(schedListMemSize);
161 :     int *schedulerTodoList = (int *)CheckedAlloc(schedListMemSize);
162 :     int *strandBlocksIdxs = (int *)CheckedAlloc(strandBlkMemSize);
163 : lamonts 1488 scheduler.queueSize = numberOfBlocks;
164 : lamonts 1513
165 : lamonts 1488 size_t strandblkMemSize = sizeof(StrandBlock_t) * numberOfBlocks;
166 :     StrandBlock_t * strandBlks = (StrandBlock_t *)CheckedAlloc(strandblkMemSize);
167 : lamonts 1470
168 : jhr 1492 for (int i = 0; i < numberOfBlocks; i++) {
169 : lamonts 1488 schedulerQueue[i] = i;
170 :     strandBlks[i].blkIdx = i;
171 :     strandBlks[i].nActive = 0;
172 :     strandBlks[i].nDead = 0;
173 :     strandBlks[i].nStabilizing = 0;
174 : jhr 1492 strandBlks[i].nDying = 0;
175 : jhr 1432 }
176 : lamonts 1459
177 : lamonts 1488 kernelArgs.schedMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
178 :     sizeof(SchedState_t), &scheduler, &sts);
179 :     CheckErrorCode (sts, "error creating OpenCL scheduler buffer\n");
180 : lamonts 1459
181 : lamonts 1488 kernelArgs.blocksMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
182 :     strandblkMemSize, strandBlks , &sts);
183 :     CheckErrorCode (sts, "error creating OpenCL strand blocks buffer\n");
184 : lamonts 1465
185 : lamonts 1488 kernelArgs.strandBlocksIdxsMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
186 :     strandBlkMemSize, strandBlocksIdxs , &sts);
187 :     CheckErrorCode (sts, "error creating OpenCL strand blocks indices buffer\n");
188 : lamonts 1425
189 : lamonts 1488 kernelArgs.queueMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
190 :     schedListMemSize, schedulerQueue , &sts);
191 :     CheckErrorCode (sts, "error creating OpenCL scheduler queue buffer\n");
192 :    
193 :     kernelArgs.todoMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
194 :     schedListMemSize, schedulerTodoList , &sts);
195 :     CheckErrorCode (sts, "error creating OpenCL scheduler todo buffer\n");
196 :    
197 :     kernelArgs.inMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
198 :     shadowSize, wrld->inState, &sts);
199 :     CheckErrorCode (sts, "error creating OpenCL strand in-state buffer\n");
200 :    
201 :     kernelArgs.outMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
202 :     shadowSize, wrld->outState, &sts);
203 :     CheckErrorCode (sts, "error creating OpenCL strand out-state buffer\n");
204 :    
205 :     kernelArgs.statusMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
206 :     sizeof(int) * wrld->numStrands, wrld->status, &sts);
207 :     CheckErrorCode (sts, "error creating OpenCL world status buffer\n");
208 :    
209 : lamonts 1513 //Setup the Update Kernel's arguments
210 : lamonts 1488 SetPhase1Args(wrld->kernel[0].kern, &argCount, &kernelArgs);
211 :     Diderot_LoadGlobals(wrld->context, wrld->kernel[0].kern, wrld->cmdQ, argCount);
212 : lamonts 1459
213 : lamonts 1513 //Setup the Compaction Kernel's arguments
214 : lamonts 1488 argCount=0;
215 :     SetPhase2Args(wrld->kernel[1].kern, &argCount, &kernelArgs,clInfo->platforms[pIdx].devices[dIdx].cuWidth);
216 :    
217 : lamonts 1513 //Setup the Scheduler Kernel's arguments
218 : lamonts 1500 argCount = 0;
219 :     SetScheduleKernelArgs(wrld->kernel[2].kern, &argCount, &kernelArgs);
220 :    
221 : lamonts 1459 clFinish(wrld->cmdQ);
222 :    
223 : jhr 1603 double t0 = airTime();
224 : lamonts 1459
225 : jhr 1469 clFinish(wrld->cmdQ);
226 : lamonts 1461
227 : lamonts 1500 while (scheduler.numAvailable > 0) {
228 : jhr 1519 // Runs the update kernel on all strands
229 : lamonts 1488 sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel[0].kern, 1, NULL,
230 : jhr 1519 globalWorkSize, localWorkSize, 0, NULL, NULL);
231 : lamonts 1500 CheckErrorCode (sts, "error in executing update kernel\n");
232 : lamonts 1488
233 : jhr 1519 sts = clEnqueueTask(wrld->cmdQ,wrld->kernel[2].kern, 0, NULL, NULL);
234 : lamonts 1500 CheckErrorCode (sts, "error in executing scheduler update kernel before compaction\n");
235 : lamonts 1513
236 : lamonts 1500 // Run the compaction kernel on all strands
237 :     sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel[1].kern, 1, NULL,
238 : jhr 1492 globalWorkSize, localWorkSize, 0, NULL, NULL);
239 : lamonts 1513 CheckErrorCode (sts, "error ccccn executing compaction kernel\n");
240 : lamonts 1501
241 : lamonts 1500 sts = clEnqueueTask(wrld->cmdQ,wrld->kernel[2].kern,0,NULL,NULL);
242 :     CheckErrorCode (sts, "error in executing scheduler update kernel after compaction\n");
243 : lamonts 1459
244 : lamonts 1513 sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.schedMem, CL_TRUE, 0, sizeof(SchedState_t),
245 : lamonts 1500 &scheduler, 0, NULL, NULL);
246 : lamonts 1513 CheckErrorCode (sts, "error reading back scheduler information\n");
247 : lamonts 1459 }
248 : lamonts 1470
249 : lamonts 1488 sts = clEnqueueReadBuffer(wrld->cmdQ, kernelArgs.outMem, CL_TRUE, 0,shadowSize,
250 : jhr 1469 wrld->outState, 0, NULL, NULL);
251 : jhr 1471 CheckErrorCode (sts, "error in reading back output\n");
252 : lamonts 1459
253 : jhr 1469 sts = clEnqueueReadBuffer(
254 : lamonts 1488 wrld->cmdQ, kernelArgs.statusMem, CL_TRUE, 0, sizeof(int) * wrld->numStrands,
255 : jhr 1469 wrld->status, 0, NULL, NULL);
256 : jhr 1471 CheckErrorCode (sts, "error in reading back status\n");
257 : jhr 1342
258 : lamonts 1526 wrld->outputSzb = Diderot_Strands[0]->shadowStrandSzb;
259 : jhr 1469 clFinish (wrld->cmdQ);
260 : jhr 1473 clReleaseKernel(wrld->kernel[0].kern);
261 :     clReleaseKernel(wrld->kernel[1].kern);
262 : lamonts 1316 clReleaseCommandQueue(wrld->cmdQ);
263 :     clReleaseContext(wrld->context);
264 :    
265 : jhr 1603 double totalTime = airTime() - t0;
266 : jhr 1287
267 :     if (VerboseFlg)
268 :     fprintf (stderr, "done in %f seconds\n", totalTime);
269 :     else if (TimingFlg)
270 :     printf ("usr=%f\n", totalTime);
271 :    
272 : nseltzer 1453 // here we have the final state of all of the strands in the "in" buffer
273 : lamonts 1513 // output the final strand states
274 :     if (NrrdOutputFlg)
275 : jhr 1519 Diderot_Output (wrld, Diderot_Strands[0]->shadowStrandSzb);
276 : lamonts 1513 else
277 : jhr 1519 Diderot_Print (wrld);
278 : jhr 1287
279 : lamonts 1526
280 : lamonts 1488 Diderot_Shutdown (wrld);
281 : jhr 1287
282 :     return 0;
283 :    
284 : lamonts 1271 }
285 : jhr 1469
286 : jhr 1519 static void SetPhase1Args (cl_kernel kernel, int *argCount, KernelArgs_t *args)
287 : lamonts 1459 {
288 : jhr 1492 int count = *argCount;
289 : lamonts 1459 cl_int sts = CL_SUCCESS;
290 : jhr 1282
291 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->inMem);
292 : lamonts 1500 CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand in-state argument\n");
293 : lamonts 1488
294 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->outMem);
295 : lamonts 1500 CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand out-state argument\n");
296 : lamonts 1459
297 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->statusMem);
298 : lamonts 1500 CheckErrorCode (sts, "Update Kernel: error Setting OpenCL world status argument\n");
299 : lamonts 1459
300 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem);
301 : lamonts 1500 CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler argument\n");
302 : lamonts 1459
303 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->blocksMem);
304 : lamonts 1500 CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand blocks argument\n");
305 : lamonts 1459
306 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->strandBlocksIdxsMem);
307 : lamonts 1500 CheckErrorCode (sts, "Update Kernel: error Setting OpenCL strand blocks' indices argument\n");
308 : lamonts 1459
309 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->queueMem);
310 : lamonts 1500 CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler queue argument\n");
311 : lamonts 1488
312 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->todoMem);
313 : lamonts 1500 CheckErrorCode (sts, "Update Kernel: error Setting OpenCL scheduler todo argument\n");
314 : lamonts 1488
315 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(StrandBlock_t), NULL);
316 : lamonts 1513 CheckErrorCode (sts, "Update Kernel: error Setting OpenCL local strand blockargument\n");
317 : lamonts 1488
318 : jhr 1492 *argCount = count;
319 :    
320 : jhr 1469 }
321 :    
322 : jhr 1519 static void SetPhase2Args (cl_kernel kernel, int *argCount, KernelArgs_t *args, int blk_sz)
323 : lamonts 1459 {
324 : jhr 1492 int count = *argCount;
325 :     cl_int sts = CL_SUCCESS;
326 : lamonts 1459
327 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->statusMem);
328 : lamonts 1500 CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL world status argument\n");
329 : lamonts 1459
330 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem);
331 : lamonts 1500 CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler argument\n");
332 : lamonts 1488
333 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->blocksMem);
334 : lamonts 1500 CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL strand blocks argument\n");
335 : lamonts 1488
336 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->strandBlocksIdxsMem);
337 : lamonts 1500 CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL strand blocks' indices argument\n");
338 : lamonts 1488
339 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->queueMem);
340 : lamonts 1500 CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler queue argument\n");
341 : lamonts 1488
342 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->todoMem);
343 : lamonts 1500 CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL scheduler todo argument\n");
344 : lamonts 1488
345 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(StrandBlock_t), NULL);
346 : lamonts 1500 CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local strand blockargument\n");
347 : lamonts 1488
348 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz, NULL);
349 : lamonts 1500 CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local preStable argument\n");
350 : lamonts 1488
351 : jhr 1492 sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz, NULL);
352 : lamonts 1500 CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local preDead argument\n");
353 : lamonts 1488
354 : lamonts 1500 sts = clSetKernelArg (kernel, count++, sizeof(int) * blk_sz * 2, NULL);
355 :     CheckErrorCode (sts, "Compaction Kernel: error Setting OpenCL local temporary array for the prefix scan of preStable preDead argument\n");
356 : lamonts 1488
357 : lamonts 1513
358 : jhr 1492 *argCount = count;
359 : jhr 1469 }
360 : jhr 1519 static void SetScheduleKernelArgs(cl_kernel kernel, int *argCount, KernelArgs_t *args)
361 : lamonts 1500 {
362 :     int count = *argCount;
363 :     cl_int sts = CL_SUCCESS;
364 :    
365 :     sts = clSetKernelArg (kernel, count++, sizeof(cl_mem), &args->schedMem);
366 :     CheckErrorCode (sts, "Scheduler Kernel: error setting OpenCL scheduler argument\n");
367 : jhr 1469
368 : lamonts 1500 *argCount = count;
369 :     }
370 :    
371 : jhr 1471 static void CheckErrorCode (cl_int sts, const char *msg)
372 : lamonts 1459 {
373 :     if (sts != CL_SUCCESS) {
374 : jhr 1464 fprintf (stderr, "%s", msg);
375 : lamonts 1459 exit(1);
376 :     }
377 : jhr 1469 }
378 :    
379 : jhr 1287 /*! \brief load OpenCL code from a file
380 :     */
381 : jhr 1291 static char *LoadSource (const char *filename)
382 : lamonts 1271 {
383 :     struct stat statbuf;
384 : jhr 1287 if (stat(filename, &statbuf) < 0) {
385 : jhr 1327 fprintf (stderr, "unable to stat OpenCL source file %s\n", filename);
386 :     exit (1);
387 : jhr 1287 }
388 :    
389 : jhr 1472 char *source = (char *)CheckedAlloc(statbuf.st_size + 1);
390 : jhr 1287 if (source == 0) {
391 : jhr 1327 fprintf (stderr, "unable to allocate memory for OpenCL source\n");
392 :     exit (1);
393 : jhr 1287 }
394 :    
395 :     FILE *fh = fopen(filename, "r");
396 :     if ((fh == 0)
397 :     || (fread(source, statbuf.st_size, 1, fh) != 1)) {
398 : jhr 1327 fprintf (stderr, "unable to read OpenCL source from %s\n", filename);
399 :     exit (1);
400 : jhr 1287 }
401 :     source[statbuf.st_size] = '\0';
402 :     fclose (fh);
403 : lamonts 1271
404 :     return source;
405 : jhr 1342 }
406 :    
407 : jhr 1473 //! create a kernel object from a program
408 :     static bool CreateKernel (cl_device_id dev, cl_program prog, const char *name, GPUKernel_t *kern)
409 :     {
410 :     cl_int sts;
411 :    
412 :     kern->kern = clCreateKernel(prog, name, &sts);
413 :     if (sts != CL_SUCCESS) {
414 :     fprintf (stderr, "error getting %s from program\n", name);
415 :     return false;
416 :     }
417 :     sts = clGetKernelWorkGroupInfo (
418 :     kern->kern, dev, CL_KERNEL_WORK_GROUP_SIZE,
419 :     sizeof(size_t), &(kern->workGrpSize), 0);
420 :     if (sts != CL_SUCCESS) {
421 :     fprintf (stderr, "error getting workgroup size for %s\n", name);
422 :     return false;
423 :     }
424 :     sts = clGetKernelWorkGroupInfo (
425 :     kern->kern, dev, CL_KERNEL_LOCAL_MEM_SIZE,
426 :     sizeof(cl_ulong), &(kern->localSzb), 0);
427 :     if (sts != CL_SUCCESS) {
428 :     fprintf (stderr, "error getting local memory size for %s\n", name);
429 :     return false;
430 :     }
431 :    
432 :     if (VerboseFlg) {
433 :     fprintf(stderr, "kernel %s: workgroup size = %d, local memory = %d bytes\n",
434 :     name, (int)kern->workGrpSize, (int)kern->localSzb);
435 :     }
436 :    
437 :     return true;
438 :     }
439 :    
440 : jhr 1352 static void LogMessagesToStderr (const char *errstr, const void *private_info, size_t cb, void *user_data)
441 : lamonts 1328 {
442 : jhr 1480 fprintf(stderr, "***** error log *****\n");
443 : jhr 1342 fprintf(stderr, "%s\n", errstr);
444 : jhr 1480 fprintf(stderr, "***** end error log *****\n");
445 : jhr 1342 }
446 : jhr 1267
447 : jhr 1417 /*! \brief initialize the OpenCL execution context, including loading and compiling the OpenCL
448 :     * program.
449 :     * \param clInfo points to the summary information about the available OpenCL devices.
450 :     * \param wrld the Diderot execution information.
451 :     * \return true if initialization is successful, otherwise false.
452 : jhr 1291 */
453 :     static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld)
454 :     {
455 : jhr 1327 cl_int sts;
456 : jhr 1291
457 :     // find a GPU on platform[0]
458 : jhr 1420 DeviceInfo_t *dev = 0;
459 : lamonts 1488 clInfo->mainPlatformIdx = 0;
460 : jhr 1291 int i;
461 :     for (i = 0; i < clInfo->platforms[0].numDevices; i++) {
462 : jhr 1417 if (isGPUDevice (&(clInfo->platforms[0].devices[i]))
463 : jhr 1472 && clInfo->platforms[0].devices[i].isAvail) {
464 : jhr 1420 dev = &(clInfo->platforms[0].devices[i]);
465 : lamonts 1488 clInfo->mainDeviceIdx = i;
466 : jhr 1291 break;
467 :     }
468 :     }
469 : lamonts 1488
470 : jhr 1420 if (dev == 0) {
471 : jhr 1291 fprintf (stderr, "unable to find GPU device\n");
472 :     return false;
473 :     }
474 :    
475 : jhr 1469 if (VerboseFlg) {
476 :     fprintf (stderr, "using platform 0, device %d: %s\n",
477 :     i, clInfo->platforms[0].devices[i].name);
478 :     }
479 :    
480 : jhr 1291 // create the context
481 : jhr 1420 cl_context cxt = clCreateContext(0, 1, &(dev->id), LogMessagesToStderr, 0, &sts);
482 : jhr 1291 if (sts != CL_SUCCESS) {
483 : jhr 1327 fprintf (stderr, "error creating OpenCL context\n");
484 : jhr 1291 return false;
485 :     }
486 :    
487 :     // create the program from the source
488 :     int fnameLen = strlen(wrld->name) + 4; // name + ".cl\0"
489 : jhr 1472 char *fname = (char *)CheckedAlloc(fnameLen);
490 : jhr 1291 snprintf(fname, fnameLen, "%s.cl", wrld->name);
491 : lamonts 1459 char *updateSource = LoadSource (fname);
492 : jhr 1464 free (fname);
493 : lamonts 1459
494 : lamonts 1642 const char *src[1] = {updateSource};
495 :     cl_program prog = clCreateProgramWithSource(cxt, 1, src, NULL, &sts);
496 : jhr 1492 free (updateSource);
497 : jhr 1291 if (sts != CL_SUCCESS) {
498 : jhr 1327 fprintf (stderr, "error creating program\n");
499 : jhr 1291 return false;
500 :     }
501 :    
502 :     // build the program
503 : jhr 1420 char options[1024];
504 :     snprintf (options, sizeof(options),
505 : jhr 1472 "-D DIDEROT_CL_VERSION=%d -D DIDEROT_CU_WIDTH=%d -I %s -w",
506 : jhr 1420 100*dev->majorVersion + dev->minorVersion,
507 : jhr 1472 dev->cuWidth,
508 : jhr 1420 DIDEROT_INCLUDE_PATH);
509 : jhr 1472 if (VerboseFlg) {
510 :     fprintf (stderr, "clBuildProgram options: %s\n", options);
511 :     }
512 : jhr 1420 sts = clBuildProgram (prog, 1, &(dev->id), options, 0, 0);
513 : jhr 1291 if (sts != CL_SUCCESS) {
514 : jhr 1352 size_t logSzb;
515 : jhr 1420 clGetProgramBuildInfo (prog, dev->id, CL_PROGRAM_BUILD_LOG, 0, 0, &logSzb);
516 : jhr 1472 char *log = CheckedAlloc(logSzb+1);
517 : jhr 1420 clGetProgramBuildInfo (prog, dev->id, CL_PROGRAM_BUILD_LOG, logSzb, log, &logSzb);
518 : jhr 1352 log[logSzb] = '\0';
519 :     fprintf (stderr, "error compiling program:\n%s\n", log);
520 :     free (log);
521 : jhr 1291 return false;
522 :     }
523 :    
524 : jhr 1473 // extract the kernels from the program
525 : lamonts 1488 if ((! CreateKernel (dev->id, prog, "Diderot_UpdateKernel", &(wrld->kernel[0])))
526 : lamonts 1500 || (! CreateKernel (dev->id, prog, "Diderot_CompactionKernel", &(wrld->kernel[1])))
527 :     || (! CreateKernel (dev->id, prog, "Diderot_SchedUpdateKernel", &(wrld->kernel[2]))))
528 : jhr 1291 return false;
529 :    
530 : jhr 1417 // create the command queue
531 : jhr 1420 cl_command_queue q = clCreateCommandQueue(cxt, dev->id, 0, &sts);
532 : jhr 1417 if (sts != CL_SUCCESS) {
533 :     fprintf (stderr, "error creating OpenCL command queue\n");
534 :     return false;
535 :     }
536 :    
537 : jhr 1291 // initialize world info
538 : jhr 1420 wrld->device = dev->id;
539 : jhr 1291 wrld->context = cxt;
540 :     wrld->cmdQ = q;
541 :    
542 :     return true;
543 :     }
544 : jhr 1327
545 : jhr 1267 // this should be the part of the scheduler
546 :     void *Diderot_AllocStrand (Strand_t *strand)
547 :     {
548 : jhr 1472 return CheckedAlloc(strand->stateSzb);
549 : jhr 1267 }
550 :    
551 :     // block allocation of an initial collection of strands
552 : jhr 1519 Diderot_World_t *Diderot_AllocInitially (
553 : jhr 1287 const char *name, // the name of the program
554 : jhr 1267 Strand_t *strand, // the type of strands being allocated
555 :     bool isArray, // is the initialization an array or collection?
556 :     uint32_t nDims, // depth of iteration nesting
557 :     int32_t *base, // nDims array of base indices
558 :     uint32_t *size) // nDims array of iteration sizes
559 :     {
560 : jhr 1472 Diderot_World_t *wrld = NEW(Diderot_World_t);
561 : jhr 1267 if (wrld == 0) {
562 :     fprintf (stderr, "unable to allocate world\n");
563 :     exit (1);
564 :     }
565 :    
566 : jhr 1287 wrld->name = name; /* NOTE: we are assuming that name is statically allocated! */
567 : jhr 1267 wrld->isArray = isArray;
568 :     wrld->nDims = nDims;
569 : jhr 1472 wrld->base = NEWVEC(int32_t, nDims);
570 :     wrld->size = NEWVEC(uint32_t, nDims);
571 : jhr 1267 size_t numStrands = 1;
572 :     for (int i = 0; i < wrld->nDims; i++) {
573 :     numStrands *= size[i];
574 :     wrld->base[i] = base[i];
575 :     wrld->size[i] = size[i];
576 :     }
577 :    
578 : jhr 1378 if (VerboseFlg) {
579 :     printf("AllocInitially: %d", size[0]);
580 :     for (int i = 1; i < nDims; i++) printf(" x %d", size[i]);
581 :     printf("\n");
582 :     }
583 : jhr 1267
584 :     // allocate the strand state pointers
585 :     wrld->numStrands = numStrands;
586 : jhr 1519 wrld->inState = CheckedAlloc (strand->stateSzb * numStrands);
587 : jhr 1472 wrld->outState = CheckedAlloc (strand->shadowStrandSzb * numStrands);
588 : jhr 1519 wrld->status = NEWVEC(StatusInt_t, numStrands);
589 : jhr 1267
590 :     // initialize strand state pointers etc.
591 :     for (size_t i = 0; i < numStrands; i++) {
592 : lamonts 1351 wrld->status[i] = DIDEROT_ACTIVE;
593 : jhr 1267 }
594 :    
595 :     return wrld;
596 :    
597 :     }
598 :    
599 :     // get strand state pointers
600 :     void *Diderot_InState (Diderot_World_t *wrld, uint32_t i)
601 :     {
602 :     assert (i < wrld->numStrands);
603 : jhr 1519 return &(wrld->inState[i]);
604 : jhr 1267 }
605 :    
606 :     void *Diderot_OutState (Diderot_World_t *wrld, uint32_t i)
607 :     {
608 :     assert (i < wrld->numStrands);
609 : lamonts 1526 return wrld->outState + (i * wrld->outputSzb);
610 : jhr 1267 }
611 :    
612 : jhr 1354 /***** Support for shadow image values *****/
613 : jhr 1435
614 : jhr 1354 void ShadowImage1D (cl_context cxt, Shadow_image1D_t *dst, Diderot_image1D_t *src)
615 :     {
616 :     dst->size[0] = src->size[0];
617 :     dst->s = src->s;
618 : lamonts 1617 dst->t = src->t;
619 : jhr 1354 }
620 : jhr 1435
621 : jhr 1354 void ShadowImage2D (cl_context cxt, Shadow_image2D_t *dst, Diderot_image2D_t *src)
622 :     {
623 : lamonts 1461 dst->size[0] = src->size[0];
624 : jhr 1354 dst->size[1] = src->size[1];
625 : jhr 1355 ShadowMat2x2 (dst->w2i, src->w2i);
626 : lamonts 1461 ShadowVec2 (&dst->tVec, src->tVec);
627 : jhr 1355 ShadowMat2x2 (dst->w2iT, src->w2iT);
628 : jhr 1354 }
629 :    
630 :     void ShadowImage3D (cl_context cxt, Shadow_image3D_t *dst, Diderot_image3D_t *src)
631 :     {
632 :     dst->size[0] = src->size[0];
633 :     dst->size[1] = src->size[1];
634 :     dst->size[2] = src->size[2];
635 : jhr 1355 ShadowMat3x3 (dst->w2i, src->w2i);
636 : lamonts 1461 ShadowVec3 (&dst->tVec, src->tVec);
637 : jhr 1435 ShadowMat3x3 (dst->w2iT, src->w2iT);
638 : jhr 1354 }

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0