Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /branches/pure-cfg/src/lib/cl-target/main.c
ViewVC logotype

Annotation of /branches/pure-cfg/src/lib/cl-target/main.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1346 - (view) (download) (as text)

1 : jhr 1267 /*! \file main.c
2 :     *
3 :     * \author John Reppy
4 :     */
5 :    
6 :     /*
7 :     * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
8 :     * All rights reserved.
9 :     */
10 :    
11 : jhr 1275 #include <Diderot/diderot.h>
12 : jhr 1267 #include <string.h>
13 :     #include <stdio.h>
14 :     #include <assert.h>
15 :     #include "clinfo.h"
16 : lamonts 1271 #include <sys/sysctl.h>
17 :     #include <sys/stat.h>
18 : jhr 1267
19 :     // NOTE: we probably should put this in a file that supports runtime printing
20 :     static bool VerboseFlg = false;
21 : jhr 1287 static bool TimingFlg = false;
22 : jhr 1267
23 :     struct struct_world {
24 : jhr 1287 const char *name; // the program name
25 : jhr 1267 bool isArray; // is the initialization an array or collection?
26 :     uint32_t nDims; // depth of iteration nesting
27 :     int32_t *base; // nDims array of base indices
28 :     uint32_t *size; // nDims array of iteration sizes
29 :     uint32_t numStrands; // number of strands in the world
30 : lamonts 1346 unsigned int strandSize; // the sizeof of the strand buffers
31 : lamonts 1341 void *inState;
32 :     void *outState;
33 : jhr 1267 uint8_t *status; // array of strand status flags
34 : jhr 1291 cl_device_id device; // OpenCL device
35 :     cl_context context; // OpenCL execution context
36 :     cl_command_queue cmdQ; // OpenCL command queue
37 :     cl_kernel kernel; // OpenCL Kernel that implements the program
38 : jhr 1267 };
39 :    
40 : jhr 1291 static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld);
41 : jhr 1267
42 : lamonts 1341 extern void Diderot_LoadGlobals (cl_context context, cl_kernel kernel, cl_command_queue cmdQ, int argStart);
43 : lamonts 1346 extern void printArray(FILE *outS,void * array) ;
44 : jhr 1267 int main (int argc, const char **argv)
45 :     {
46 :     // get information about OpenCL support
47 :     CLInfo_t *clInfo = GetCLInfo();
48 :     if (clInfo == 0) {
49 : jhr 1327 fprintf (stderr, "no OpenCL support\n");
50 :     exit (1);
51 : jhr 1267 }
52 :    
53 :     Diderot_Options_t *opts = Diderot_OptNew ();
54 :    
55 :     Diderot_OptAddFlag (opts, "verbose", "enable runtime-system messages", &VerboseFlg);
56 :     Diderot_RegisterGlobalOpts (opts);
57 :     Diderot_OptProcess (opts, argc, argv);
58 :     Diderot_OptFree (opts);
59 :    
60 :     // run the generated global initialization code
61 : jhr 1275 if (VerboseFlg) printf("initializing globals ...\n");
62 : lamonts 1316
63 : jhr 1291 Diderot_InitGlobals();
64 : jhr 1267
65 :     /***** FIXME: OpenCL specific stuff goes here. Things to do:
66 :     **
67 :     ** 1) copy data to GPU
68 :     ** 2) initialize strands
69 :     ** 3) run strands to termination
70 :     ** 4) load results from GPU
71 :     **/
72 : lamonts 1271
73 : jhr 1291 Diderot_World_t *wrld = Diderot_Initially (); // this may not be right for OpenCL
74 :     if (! InitCL(clInfo, wrld))
75 :     exit (1);
76 : jhr 1282
77 : jhr 1327 int argCount = 0;
78 :     cl_int sts = CL_SUCCESS;
79 : lamonts 1316
80 : jhr 1342 /* Create the strand in-state and out-state buffers */
81 :     cl_mem stateInMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
82 : lamonts 1341 wrld->strandSize, wrld->inState, &sts);
83 : jhr 1342 clFinish (wrld->cmdQ);
84 : jhr 1327 if (sts != CL_SUCCESS) {
85 :     fprintf (stderr, "error creating OpenCL strand in-state buffer\n");
86 : lamonts 1316 exit(1);
87 :     }
88 :    
89 : jhr 1342 cl_mem stateOutMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE| CL_MEM_COPY_HOST_PTR,
90 : lamonts 1341 wrld->strandSize, wrld->outState, &sts);
91 : jhr 1342 clFinish (wrld->cmdQ);
92 : jhr 1327 if (sts != CL_SUCCESS) {
93 :     fprintf (stderr, "error creating OpenCL strand in-state buffer\n");
94 :     exit(1);
95 : lamonts 1316 }
96 : lamonts 1346
97 :     cl_mem statusMem = clCreateBuffer (wrld->context, CL_MEM_READ_WRITE| CL_MEM_COPY_HOST_PTR,
98 :     sizeof(uint8_t) * wrld->numStrands, wrld->status, &sts);
99 :    
100 :     clFinish (wrld->cmdQ);
101 :    
102 :     if (sts != CL_SUCCESS) {
103 :     fprintf (stderr, "error creating OpenCL world status buffer\n");
104 :     exit(1);
105 :     }
106 :    
107 : jhr 1327 /* Set the in-state and out-state strand agruments */
108 :     sts = clSetKernelArg (wrld->kernel, argCount++, sizeof(cl_mem), &stateInMem);
109 : jhr 1342 clFinish (wrld->cmdQ);
110 : jhr 1327 if (sts != CL_SUCCESS) {
111 :     fprintf (stderr, "error Setting OpenCL strand in-state argument\n");
112 :     exit(1);
113 : lamonts 1316 }
114 : lamonts 1346
115 : jhr 1327 sts = clSetKernelArg (wrld->kernel, argCount++, sizeof(cl_mem), &stateOutMem);
116 : jhr 1342 clFinish(wrld->cmdQ);
117 :     if (sts != CL_SUCCESS) {
118 : jhr 1327 fprintf (stderr, "error Setting OpenCL strand out-state argument\n");
119 :     exit(1);
120 :     }
121 : lamonts 1316
122 : lamonts 1346 sts = clSetKernelArg (wrld->kernel, argCount++, sizeof(cl_mem), &statusMem);
123 :     clFinish (wrld->cmdQ);
124 :     if (sts != CL_SUCCESS) {
125 :     fprintf (stderr, "error Setting OpenCL world status argument\n");
126 :     exit(1);
127 :     }
128 :    
129 :     double t0 = GetTime();
130 :     /* FIXME: Also, what happens if nDims != 2? We never output results! */
131 :    
132 : jhr 1327 if (wrld->nDims == 2) {
133 :     size_t global_work_size[2], local_work_size[2];
134 : jhr 1342
135 : jhr 1327 global_work_size[0] = wrld->size[0];
136 :     global_work_size[1] = wrld->size[1];
137 :     local_work_size[0] = 1;
138 :     local_work_size[1] = 1;
139 :     cl_int width = global_work_size[1];
140 : lamonts 1316
141 : jhr 1327 sts = clSetKernelArg(wrld->kernel, argCount++, sizeof(cl_int), &width);
142 : lamonts 1341 clFinish(wrld->cmdQ);
143 : jhr 1327 if (sts != CL_SUCCESS) {
144 :     fprintf (stderr, "error Setting OpenCL width argument\n");
145 :     exit(1);
146 :     }
147 : lamonts 1316
148 : lamonts 1341 Diderot_LoadGlobals(wrld->context, wrld->kernel,wrld->cmdQ, argCount);
149 : lamonts 1316
150 : jhr 1327 sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel, 2, NULL, global_work_size,
151 :     local_work_size, 0, NULL, NULL);
152 : lamonts 1341 clFinish(wrld->cmdQ);
153 : jhr 1342 if (sts != CL_SUCCESS) {
154 : jhr 1327 fprintf (stderr, "error in executing kernel code:%d\n",sts);
155 :     exit(1);
156 :     }
157 : lamonts 1316
158 : lamonts 1346 sts = clEnqueueReadBuffer(wrld->cmdQ, stateOutMem, CL_TRUE, 0, wrld->strandSize,
159 : jhr 1327 wrld->outState, 0, NULL, NULL);
160 : jhr 1342 clFinish(wrld->cmdQ);
161 : jhr 1327 if (sts != CL_SUCCESS) {
162 :     fprintf (stderr, "error in reading back output code:%d\n",sts);
163 :     exit(1);
164 : lamonts 1346 }
165 :    
166 :     sts = clEnqueueReadBuffer(wrld->cmdQ, statusMem, CL_TRUE, 0, sizeof(uint8_t) * wrld->numStrands,
167 :     wrld->status, 0, NULL, NULL);
168 :     clFinish(wrld->cmdQ);
169 :     if (sts != CL_SUCCESS) {
170 :     fprintf (stderr, "error in reading back output code:%d\n",sts);
171 :     exit(1);
172 :     }
173 :    
174 : jhr 1342 }
175 :    
176 : jhr 1327 /* FIXME: release all OpenCL objects */
177 : lamonts 1316 clReleaseKernel(wrld->kernel);
178 :     clReleaseCommandQueue(wrld->cmdQ);
179 :     clReleaseContext(wrld->context);
180 :    
181 : jhr 1267
182 : jhr 1287 double totalTime = GetTime() - t0;
183 :    
184 :     if (VerboseFlg)
185 :     fprintf (stderr, "done in %f seconds\n", totalTime);
186 :     else if (TimingFlg)
187 :     printf ("usr=%f\n", totalTime);
188 :    
189 : jhr 1267 // here we have the final state of all of the strands in the "in" buffer
190 : jhr 1287 int outFileNameLen = strlen(wrld->name) + 5;
191 :     char *outFileName = (char *)malloc(outFileNameLen);
192 :     snprintf (outFileName, outFileNameLen, "%s.txt", wrld->name);
193 :     FILE *outS = fopen(outFileName, "w");
194 : jhr 1267 if (outS == NULL) {
195 : jhr 1287 fprintf (stderr, "Cannot open output file %s\n", outFileName);
196 : jhr 1267 exit (8);
197 : jhr 1287 }
198 :    
199 : lamonts 1346 /*for (int i = 0; i < wrld->numStrands; i++) {
200 :     if (wrld->status[i] == DIDEROT_STABILIZE)
201 :     Diderot_Strands[0]->print (outS, &wrld->outState[i]);
202 :     }*/
203 :     Diderot_Strands[0]->print (outS,wrld->status,wrld->numStrands, wrld->outState);
204 : jhr 1287 fclose (outS);
205 :    
206 :     Diderot_Shutdown (wrld);
207 :    
208 :     return 0;
209 :    
210 : lamonts 1271 }
211 : jhr 1282
212 : jhr 1287 /*! \brief load OpenCL code from a file
213 :     */
214 : jhr 1291 static char *LoadSource (const char *filename)
215 : lamonts 1271 {
216 :     struct stat statbuf;
217 : jhr 1287 if (stat(filename, &statbuf) < 0) {
218 : jhr 1327 fprintf (stderr, "unable to stat OpenCL source file %s\n", filename);
219 :     exit (1);
220 : jhr 1287 }
221 :    
222 :     char *source = (char *) malloc(statbuf.st_size + 1);
223 :     if (source == 0) {
224 : jhr 1327 fprintf (stderr, "unable to allocate memory for OpenCL source\n");
225 :     exit (1);
226 : jhr 1287 }
227 :    
228 :     FILE *fh = fopen(filename, "r");
229 :     if ((fh == 0)
230 :     || (fread(source, statbuf.st_size, 1, fh) != 1)) {
231 : jhr 1327 fprintf (stderr, "unable to read OpenCL source from %s\n", filename);
232 :     exit (1);
233 : jhr 1287 }
234 :     source[statbuf.st_size] = '\0';
235 :     fclose (fh);
236 : lamonts 1271
237 :     return source;
238 : jhr 1342 }
239 :    
240 :     void clLogMessagesToStdoutAPPLE (const char *errstr, const void *private_info, size_t cb, void *user_data)
241 : lamonts 1328 {
242 : jhr 1342 fprintf(stderr, "%s\n", errstr);
243 :     }
244 : jhr 1267
245 : jhr 1291 /*! \brief build an OpenCL program from source.
246 :     */
247 :     static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld)
248 :     {
249 : jhr 1327 cl_int sts;
250 : jhr 1291
251 :     // find a GPU on platform[0]
252 :     cl_device_id dev;
253 :     int i;
254 :     for (i = 0; i < clInfo->platforms[0].numDevices; i++) {
255 :     if (clInfo->platforms[0].devices[i].ty == CL_DEVICE_TYPE_GPU) {
256 :     dev = clInfo->platforms[0].devices[i].id;
257 :     break;
258 :     }
259 :     }
260 :     if (i == clInfo->platforms[0].numDevices) {
261 :     fprintf (stderr, "unable to find GPU device\n");
262 :     return false;
263 :     }
264 :    
265 :     // create the context
266 : lamonts 1328 cl_context cxt = clCreateContext(0, 1, &dev, clLogMessagesToStdoutAPPLE, 0, &sts);
267 : jhr 1291 if (sts != CL_SUCCESS) {
268 : jhr 1327 fprintf (stderr, "error creating OpenCL context\n");
269 : jhr 1291 return false;
270 :     }
271 :    
272 :     // create the command queue
273 :     cl_command_queue q = clCreateCommandQueue(cxt, dev, 0, &sts);
274 :     if (sts != CL_SUCCESS) {
275 : jhr 1327 fprintf (stderr, "error creating OpenCL command queue\n");
276 : jhr 1291 return false;
277 :     }
278 :    
279 :     // create the program from the source
280 :     int fnameLen = strlen(wrld->name) + 4; // name + ".cl\0"
281 :     char *fname = (char *)malloc(fnameLen);
282 :     snprintf(fname, fnameLen, "%s.cl", wrld->name);
283 :     const char *src = LoadSource (fname);
284 :     free (fname);
285 :     cl_program prog = clCreateProgramWithSource(cxt, 1, &src, NULL, &sts);
286 :     if (sts != CL_SUCCESS) {
287 : jhr 1327 fprintf (stderr, "error creating program\n");
288 : jhr 1291 return false;
289 :     }
290 :     free ((void *)src);
291 :    
292 :     // build the program
293 : jhr 1327 const char *options = "-I " DIDEROT_INCLUDE_PATH;
294 : jhr 1291 sts = clBuildProgram (prog, 1, &dev, options, 0, 0);
295 :     if (sts != CL_SUCCESS) {
296 :     size_t logSzb;
297 :     clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, 0, 0, &logSzb);
298 :     char *log = malloc(logSzb+1);
299 :     clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, logSzb, log, &logSzb);
300 :     log[logSzb] = '\0';
301 : jhr 1327 fprintf (stderr, "error compiling program:\n%s\n", log);
302 : jhr 1291 free (log);
303 :     return false;
304 :     }
305 :    
306 :     // extract the kernel from the program
307 : jhr 1327 cl_kernel kernel = clCreateKernel(prog, "Diderot_KernelMain", &sts);
308 : jhr 1291 if (sts != CL_SUCCESS) {
309 : jhr 1327 fprintf (stderr, "error getting kernel from program\n");
310 : jhr 1291 return false;
311 :     }
312 :    
313 :     // initialize world info
314 :     wrld->device = dev;
315 :     wrld->context = cxt;
316 :     wrld->cmdQ = q;
317 :     wrld->kernel = kernel;
318 :    
319 :     return true;
320 :     }
321 : jhr 1327
322 : jhr 1267 // this should be the part of the scheduler
323 :     void *Diderot_AllocStrand (Strand_t *strand)
324 :     {
325 :     return malloc(strand->stateSzb);
326 :     }
327 :    
328 :     // block allocation of an initial collection of strands
329 : lamonts 1346 Diderot_World_t *Diderot_AllocInitially(
330 : jhr 1287 const char *name, // the name of the program
331 : jhr 1267 Strand_t *strand, // the type of strands being allocated
332 :     bool isArray, // is the initialization an array or collection?
333 :     uint32_t nDims, // depth of iteration nesting
334 :     int32_t *base, // nDims array of base indices
335 :     uint32_t *size) // nDims array of iteration sizes
336 :     {
337 :     Diderot_World_t *wrld = (Diderot_World_t *) malloc (sizeof(Diderot_World_t));
338 :     if (wrld == 0) {
339 :     fprintf (stderr, "unable to allocate world\n");
340 :     exit (1);
341 :     }
342 :    
343 : jhr 1287 wrld->name = name; /* NOTE: we are assuming that name is statically allocated! */
344 : jhr 1267 wrld->isArray = isArray;
345 :     wrld->nDims = nDims;
346 :     wrld->base = (int32_t *) malloc (nDims * sizeof(int32_t));
347 :     wrld->size = (uint32_t *) malloc (nDims * sizeof(uint32_t));
348 :     size_t numStrands = 1;
349 :     for (int i = 0; i < wrld->nDims; i++) {
350 :     numStrands *= size[i];
351 :     wrld->base[i] = base[i];
352 :     wrld->size[i] = size[i];
353 :     }
354 :    
355 :     if (VerboseFlg) {
356 :     printf("AllocInitially: %d", size[0]);
357 :     for (int i = 1; i < nDims; i++) printf(" x %d", size[i]);
358 :     printf("\n");
359 :     }
360 :    
361 :     // allocate the strand state pointers
362 :     wrld->numStrands = numStrands;
363 : lamonts 1346 wrld->strandSize = strand->stateSzb * numStrands;
364 : lamonts 1341 /* wrld->inState = (void **) malloc (numStrands * sizeof(void *));
365 :     wrld->outState = (void **) malloc (numStrands * sizeof(void *)); */
366 : jhr 1342 wrld->inState = malloc (wrld->strandSize);
367 :     wrld->outState = malloc (wrld->strandSize);
368 : jhr 1267 wrld->status = (uint8_t *) malloc (numStrands * sizeof(uint8_t));
369 :     if ((wrld->inState == 0) || (wrld->outState == 0) || (wrld->status == 0)) {
370 :     fprintf (stderr, "unable to allocate strand states\n");
371 :     exit (1);
372 :     }
373 :    
374 :     // initialize strand state pointers etc.
375 :     for (size_t i = 0; i < numStrands; i++) {
376 : lamonts 1341 // wrld->inState[i] = Diderot_AllocStrand (strand);
377 :     // wrld->outState[i] = Diderot_AllocStrand (strand);
378 : lamonts 1346 wrld->status[i] = DIDEROT_NEW;
379 : jhr 1267 }
380 :    
381 :     return wrld;
382 :    
383 :     }
384 :    
385 :     // get strand state pointers
386 :     void *Diderot_InState (Diderot_World_t *wrld, uint32_t i)
387 :     {
388 :     assert (i < wrld->numStrands);
389 : lamonts 1341 return &wrld->inState[i];
390 : jhr 1267 }
391 :    
392 :     void *Diderot_OutState (Diderot_World_t *wrld, uint32_t i)
393 :     {
394 :     assert (i < wrld->numStrands);
395 : lamonts 1341 return &wrld->outState[i];
396 : jhr 1267 }
397 :    
398 :     bool Diderot_IsActive (Diderot_World_t *wrld, uint32_t i)
399 :     {
400 :     assert (i < wrld->numStrands);
401 :     return !wrld->status[i];
402 : jhr 1287 }

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0