27 |
int32_t *base; // nDims array of base indices |
int32_t *base; // nDims array of base indices |
28 |
uint32_t *size; // nDims array of iteration sizes |
uint32_t *size; // nDims array of iteration sizes |
29 |
uint32_t numStrands; // number of strands in the world |
uint32_t numStrands; // number of strands in the world |
30 |
|
size_t strandSize; // the sizeof of the strand buffers |
31 |
void **inState; |
void **inState; |
32 |
void **outState; |
void **outState; |
33 |
uint8_t *status; // array of strand status flags |
uint8_t *status; // array of strand status flags |
39 |
|
|
40 |
static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld); |
static bool InitCL (CLInfo_t *clInfo, Diderot_World_t *wrld); |
41 |
|
|
42 |
extern void Diderot_GPU_Init (cl_device_id device); // obsolete |
extern void Diderot_LoadGlobals (cl_context context, cl_kernel kernel, int argStart); |
|
|
|
43 |
int main (int argc, const char **argv) |
int main (int argc, const char **argv) |
44 |
{ |
{ |
45 |
// get information about OpenCL support |
// get information about OpenCL support |
59 |
// run the generated global initialization code |
// run the generated global initialization code |
60 |
if (VerboseFlg) printf("initializing globals ...\n"); |
if (VerboseFlg) printf("initializing globals ...\n"); |
61 |
|
|
|
/* Globals are loaded from the openCL code */ |
|
62 |
Diderot_InitGlobals(); |
Diderot_InitGlobals(); |
63 |
|
|
64 |
/***** FIXME: OpenCL specific stuff goes here. Things to do: |
/***** FIXME: OpenCL specific stuff goes here. Things to do: |
73 |
if (! InitCL(clInfo, wrld)) |
if (! InitCL(clInfo, wrld)) |
74 |
exit (1); |
exit (1); |
75 |
|
|
76 |
Diderot_GPU_Init(clInfo->platforms[0].devices[0].id); // obsolete |
int argCount = 0; |
77 |
|
|
78 |
|
cl_int sts = CL_SUCCESS; |
79 |
|
|
80 |
|
/* Create the strand in-state and out-state buffers */ |
81 |
|
cl_mem in_state_mem, out_state_mem; |
82 |
|
|
83 |
|
in_state_mem = clCreateBuffer(wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, |
84 |
|
wrld->strandSize, *(wrld->inState), &sts); |
85 |
|
|
86 |
|
if (sts != CL_SUCCESS) { |
87 |
|
fprintf (stderr, "error creating OpenCL strand in-state buffer\n"); |
88 |
|
exit(1); |
89 |
|
} |
90 |
|
|
91 |
|
out_state_mem = clCreateBuffer(wrld->context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, |
92 |
|
wrld->strandSize, *(wrld->outState), &sts); |
93 |
|
|
94 |
|
if (sts != CL_SUCCESS) { |
95 |
|
fprintf (stderr, "error creating OpenCL strand in-state buffer\n"); |
96 |
|
exit(1); |
97 |
|
} |
98 |
|
|
99 |
|
/* Set the in-state and out-state strand agruments */ |
100 |
|
sts = clSetKernelArg(wrld->kernel, argCount++, sizeof(cl_mem), &in_state_mem); |
101 |
|
|
102 |
|
if (sts != CL_SUCCESS) { |
103 |
|
fprintf (stderr, "error Setting OpenCL strand in-state argument\n"); |
104 |
|
exit(1); |
105 |
|
} |
106 |
|
|
107 |
|
|
108 |
|
sts = clSetKernelArg(wrld->kernel, argCount++, sizeof(cl_mem), &out_state_mem); |
109 |
|
|
110 |
|
if (sts != CL_SUCCESS) { |
111 |
|
fprintf (stderr, "error Setting OpenCL strand out-state argument\n"); |
112 |
|
exit(1); |
113 |
|
} |
114 |
|
|
115 |
|
/* Setup up global/local work sizes and fire the kernel for execution */ |
116 |
|
if( wrld->nDims == 2) { |
117 |
|
size_t global_work_size[2], local_work_size[2]; |
118 |
|
global_work_size[0] = wrld->size[0]; |
119 |
|
global_work_size[1] = wrld->size[1]; |
120 |
|
local_work_size[0] = 16; |
121 |
|
local_work_size[1] = 16; |
122 |
|
|
123 |
|
cl_int width = global_work_size[1]; |
124 |
|
|
125 |
|
sts = clSetKernelArg(wrld->kernel, argCount++, sizeof(cl_int), &width); |
126 |
|
|
127 |
|
if (sts != CL_SUCCESS) { |
128 |
|
fprintf (stderr, "error Setting OpenCL width argument\n"); |
129 |
|
exit(1); |
130 |
|
} |
131 |
|
|
132 |
|
|
133 |
|
Diderot_LoadGlobals(wrld->context, wrld->kernel, argCount); |
134 |
|
|
135 |
|
sts = clEnqueueNDRangeKernel(wrld->cmdQ, wrld->kernel, 2, NULL, global_work_size, |
136 |
|
local_work_size, 0, NULL, NULL); |
137 |
|
|
138 |
|
|
139 |
|
if (sts != CL_SUCCESS) { |
140 |
|
fprintf (stderr, "error in executing kernel\n"); |
141 |
|
exit(1); |
142 |
|
} |
143 |
|
|
144 |
|
clFinish(wrld->cmdQ); |
145 |
|
|
146 |
|
sts = clEnqueueReadBuffer(wrld->cmdQ, out_state_mem, CL_TRUE, 0, wrld->strandSize, wrld->outState, 0, |
147 |
|
NULL, NULL); |
148 |
|
|
149 |
|
if (sts != CL_SUCCESS) { |
150 |
|
fprintf (stderr, "error in reading back output\n"); |
151 |
|
exit(1); |
152 |
|
} |
153 |
|
} |
154 |
|
|
155 |
|
/* FIXME: release all OpenCL objects */ |
156 |
|
clReleaseKernel(wrld->kernel); |
157 |
|
clReleaseCommandQueue(wrld->cmdQ); |
158 |
|
clReleaseContext(wrld->context); |
159 |
|
|
160 |
double t0 = GetTime(); |
double t0 = GetTime(); |
161 |
|
|
180 |
|
|
181 |
for (int i = 0; i < wrld->numStrands; i++) { |
for (int i = 0; i < wrld->numStrands; i++) { |
182 |
if (wrld->status[i] == DIDEROT_STABLE) |
if (wrld->status[i] == DIDEROT_STABLE) |
183 |
Diderot_Strands[0]->print (outS, wrld->inState[i]); |
Diderot_Strands[0]->print (outS, wrld->outState[i]); |
184 |
} |
} |
185 |
fclose (outS); |
fclose (outS); |
186 |
|
|
294 |
|
|
295 |
return true; |
return true; |
296 |
} |
} |
|
|
|
297 |
// this should be the part of the scheduler |
// this should be the part of the scheduler |
298 |
void *Diderot_AllocStrand (Strand_t *strand) |
void *Diderot_AllocStrand (Strand_t *strand) |
299 |
{ |
{ |
335 |
|
|
336 |
// allocate the strand state pointers |
// allocate the strand state pointers |
337 |
wrld->numStrands = numStrands; |
wrld->numStrands = numStrands; |
338 |
|
wrld->strandSize = sizeof(strand) * numStrands; |
339 |
wrld->inState = (void **) malloc (numStrands * sizeof(void *)); |
wrld->inState = (void **) malloc (numStrands * sizeof(void *)); |
340 |
wrld->outState = (void **) malloc (numStrands * sizeof(void *)); |
wrld->outState = (void **) malloc (numStrands * sizeof(void *)); |
341 |
wrld->status = (uint8_t *) malloc (numStrands * sizeof(uint8_t)); |
wrld->status = (uint8_t *) malloc (numStrands * sizeof(uint8_t)); |