Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /branches/lamont/src/compiler/c-target/fragments/par-kdtree.in
ViewVC logotype

Annotation of /branches/lamont/src/compiler/c-target/fragments/par-kdtree.in

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2565 - (view) (download)

1 : lamonts 2453 /* sequential version of the merge sort for the spatial indices */
2 :     void Diderot_Spatial_SeqMergeSort(@PREFIX@World_t *wrld, uint32_t * indices, uint32_t * tmpIndices, int low, int high, uint32_t dim)
3 : lamonts 2422 {
4 :    
5 : lamonts 2453 if (low >= high)
6 :     return;
7 : lamonts 2422
8 : lamonts 2453 int mid = (low + high) / 2;
9 :    
10 :     //Sort two sub-arrays first so that they are placed into the temporary
11 :     //indices array.
12 :     Diderot_Spatial_SeqMergeSort(wrld,tmpIndices,indices, low, mid,dim);
13 :     Diderot_Spatial_SeqMergeSort(wrld,tmpIndices,indices, mid + 1, high,dim);
14 :    
15 :     // Once temporary indice array contains the two sorted sub-arrays
16 :     // they are merged into indice array
17 :     Diderot_Spatial_SeqMerge(wrld,indices,tmpIndices,low, mid, mid + 1, high,low,dim);
18 :     }
19 :     /* sequential version of the merge operation for the spatial indices */
20 :     /* Note: Sub-arrays are adjacent to each other in the
21 :     the sequential version but not necessarily for the parallel version;
22 :     therefore, sub-arrays boundaries must be specified
23 :     explicitly.*/
24 :     void Diderot_Spatial_SeqMerge(@PREFIX@World_t *wrld, uint32_t * indices, uint32_t * tmpIndices, int lowL, int highL, int lowR, int highR, int low, uint32_t dim)
25 :     {
26 :     int high = low + highL - lowL + highR - lowR + 1;
27 :    
28 :     //Note: The offset is needed for the way the tree is built. We need to make sure the next axis to be sorted
29 :     //has a duplicate copy of the current sorted axis
30 :     int offSet = (dim + 1) == wrld->spatialTree->dim ? -(dim * wrld->numStrands) : ((dim + 1) * wrld->numStrands);
31 :    
32 :     // Bottom-up implementation
33 :     for (;low <= high; low++)
34 :     {
35 :     if (lowL > highL){
36 :     indices[low] = tmpIndices[lowR];
37 :     indices[low + offSet] = tmpIndices[lowR];
38 :     tmpIndices[low + offSet] = tmpIndices[lowR];
39 :     lowR++;
40 :     }else if (lowR > highR) {
41 :     indices[low] = tmpIndices[lowL];
42 :     indices[low + offSet] = tmpIndices[lowL];
43 :     tmpIndices[low + offSet] = tmpIndices[lowL];
44 :     lowL++;
45 :     }else {
46 :     Diderot_real_t lowLValue = Diderot_KDTree_GetPosValue(wrld->inState[tmpIndices[lowL]],dim);
47 :     Diderot_real_t lowRValue = Diderot_KDTree_GetPosValue(wrld->inState[tmpIndices[lowR]],dim);
48 :     if(lowLValue < lowRValue){
49 :     indices[low] = tmpIndices[lowL];
50 :     indices[low + offSet] = tmpIndices[lowL];
51 :     tmpIndices[low + offSet] = tmpIndices[lowL];
52 :     lowL++;
53 :     }else {
54 :     indices[low] = tmpIndices[lowR];
55 :     indices[low + offSet] = tmpIndices[lowR];
56 :     tmpIndices[low + offSet] = tmpIndices[lowR];
57 :     lowR++;
58 :     }
59 :     }
60 : lamonts 2422 }
61 : lamonts 2453 }
62 :     /* Swaps the values at two addresses */
63 :     void Diderot_KDTree_SwapValues(int * a, int *b){
64 :     int temp;
65 :     temp = *a;
66 :     *a = *b;
67 :     *b = temp;
68 :     }
69 :     int Diderot_BinarySearch(@PREFIX@World_t *wrld,uint32_t *tmpIndices, int low, int high, Diderot_real_t searchValue, int dim)
70 :     {
71 :     if(low > high + 1)
72 :     high = low;
73 :     else
74 :     high = high + 1;
75 : lamonts 2422
76 : lamonts 2453 while (low < high)
77 :     {
78 :     int mid = (low + high) / 2;
79 :     Diderot_real_t pivot = Diderot_KDTree_GetPosValue(wrld->inState[tmpIndices[mid]],dim);
80 :     if (pivot < searchValue)
81 :     low = mid + 1;
82 :     else
83 :     high = mid;
84 :     }
85 :     return low;
86 :     }
87 :     /* Assigns a worker to perform a spatial task (merge sort, merge,or tree node construction) */
88 : lamonts 2562 SpatialWorkerArgs_t * Diderot_Grab_SpatailWorker(SpatialScheduler_t* sched,ParSpatialWorkerType workerType,void * nodeData, bool * donePtr){
89 : lamonts 2422
90 : lamonts 2453 SpatialWorkerArgs_t * retWorker = 0;
91 :     pthread_mutex_lock (&sched->lock);
92 : lamonts 2565 for(int i = sched->nextWorker; i < sched->numWorkers; i++){
93 : lamonts 2453 if(sched->workerData[i].isIdle){
94 :     sched->workerData[i].isIdle = false;
95 :     sched->workerData[i].data = nodeData;
96 : lamonts 2562 sched->workerData[i].isDone = donePtr;
97 : lamonts 2453 sched->workerData[i].workerType = workerType;
98 :     retWorker = &sched->workerData[i];
99 :     pthread_cond_signal (&sched->workerData[i].runWait);
100 :     sched->nextWorker++;
101 : lamonts 2565 if(sched->nextWorker == sched->numWorkers-1)
102 : lamonts 2453 sched->nextWorker = 0;
103 :     break;
104 :     }
105 :     }
106 :     pthread_mutex_unlock (&sched->lock);
107 :     return retWorker;
108 : lamonts 2422 }
109 : lamonts 2453 /* parallel version of the merge sort for the spatial indices */
110 :     void Diderot_Spatial_ParMergeSort(@PREFIX@World_t *wrld,SpatialScheduler_t * sched,uint32_t * indices, uint32_t * tmpIndices, int low, int high, int depth, uint32_t dim)
111 : lamonts 2422 {
112 : lamonts 2562 bool isDone = false;
113 : lamonts 2453 // If the depth is less than zero or there's not enough work to perform in parallel
114 :     // then run the sequential version.
115 :     if (high - low + 1 <= sched->seqThreshold || depth <= 0)
116 :     {
117 :     Diderot_Spatial_SeqMergeSort(wrld,indices,tmpIndices, low, high,dim);
118 :     return;
119 :     }
120 :     if(low < high) {
121 :     int mid = (low + high) / 2;
122 :     depth--;
123 : lamonts 2422
124 : lamonts 2453 //Assign a worker to do the upper half the merge sort.
125 :     MergetSortNode_t node;
126 :     node.low=low;
127 :     node.high = mid;
128 :     node.indices = tmpIndices;
129 :     node.tmpIndices = indices;
130 :     node.depth = depth;
131 :     node.dim = dim;
132 :    
133 : lamonts 2562 SpatialWorkerArgs_t * worker = Diderot_Grab_SpatailWorker(sched,MERGESORT_WORKER,(void *)&node,&isDone);
134 : lamonts 2453
135 :     //If a worker is not avaliable then this worker will perform both halves.
136 :     if(!worker){
137 :     Diderot_Spatial_ParMergeSort(wrld,sched,tmpIndices,indices,low, mid,depth,dim);
138 :     Diderot_Spatial_ParMergeSort(wrld,sched,tmpIndices,indices,mid + 1, high, depth,dim);
139 :     }else {
140 :     Diderot_Spatial_ParMergeSort(wrld,sched,tmpIndices,indices,mid + 1, high, depth,dim);
141 :     //Wait until the worker has completed its task
142 :     pthread_mutex_lock (&worker->lock);
143 : lamonts 2562 while(!isDone)
144 : lamonts 2453 pthread_cond_wait (&worker->done,&worker->lock);
145 :     pthread_mutex_unlock (&worker->lock);
146 :     }
147 :     //Merge the two sorted lists.
148 :     Diderot_Spatial_ParMerge(wrld,sched,indices,tmpIndices,low, mid, mid + 1, high, low, depth,dim);
149 :     }
150 :    
151 :     }
152 :     /* parallel version of the merge operation for the spatial indices */
153 :     void Diderot_Spatial_ParMerge(@PREFIX@World_t *wrld,SpatialScheduler_t * sched,uint32_t * indices, uint32_t * tmpIndices,int lowL, int highL, int lowR, int highR, int low, int depth, int dim)
154 :     {
155 :     int nL = highL - lowL + 1;
156 :     int nR = highR - lowR + 1;
157 : lamonts 2562 bool isDone = false;
158 : lamonts 2453
159 :     // If the depth is less than zero or there's not enough work to perform in parallel
160 :     // then run the sequential version.
161 :     if (nL + nR <= sched->seqThreshold || depth <= 0)
162 :     {
163 :     Diderot_Spatial_SeqMerge(wrld,indices,tmpIndices,lowL, highL, lowR, highR,low,dim);
164 :     return;
165 :     }
166 :    
167 :     if (nL < nR)
168 :     {
169 :     Diderot_KDTree_SwapValues(&lowL,&lowR);
170 :     Diderot_KDTree_SwapValues(&highL,&highR);
171 :     Diderot_KDTree_SwapValues(&nL,&nR);
172 :     }
173 :     if(nL == 0) return;
174 :     else
175 :     {
176 :     int midL = (lowL + highL) / 2;
177 :    
178 :     Diderot_real_t pivot = Diderot_KDTree_GetPosValue(wrld->inState[tmpIndices[midL]],dim);
179 :    
180 :     int midR = Diderot_BinarySearch(wrld,tmpIndices, lowR, highR, pivot,dim);
181 :    
182 : lamonts 2562 int mid = low + (midL - lowL) + (midR - lowR);
183 : lamonts 2453
184 :     //Note: The offset is needed for the way the tree is built. We need to make sure the next axis to be sorted
185 :     //has a duplicate copy of the current sorted axis
186 :     int offSet = (dim + 1) == wrld->spatialTree->dim ? -(dim * wrld->numStrands) : ((dim + 1) * wrld->numStrands);
187 :     indices[mid] = tmpIndices[midL];
188 :     indices[mid + offSet] = tmpIndices[midL];
189 :     tmpIndices[mid + offSet] = tmpIndices[midL];
190 :    
191 :     depth--;
192 :    
193 :     //Have a worker perform the upper half of the merge
194 :     MergeNode_t node;
195 :     node.depth = depth;
196 :     node.lowL = lowL;
197 :     node.highL= midL - 1;
198 :     node.lowR = lowR;
199 : lamonts 2562 node.indices = indices;
200 :     node.tmpIndices = tmpIndices;
201 : lamonts 2453 node.highR = midR - 1;
202 :     node.low = low;
203 :     node.dim = dim;
204 :    
205 : lamonts 2562 SpatialWorkerArgs_t * worker = Diderot_Grab_SpatailWorker(sched,MERGE_WORKER,(void *)&node,&isDone);
206 : lamonts 2453
207 :     if(!worker){
208 :     Diderot_Spatial_ParMerge(wrld,sched,indices,tmpIndices, lowL, midL-1, lowR, midR -1, low, depth,dim);
209 :     Diderot_Spatial_ParMerge(wrld,sched,indices,tmpIndices, midL + 1, highL, midR, highR, mid + 1, depth,dim);
210 :     }else {
211 :     Diderot_Spatial_ParMerge(wrld,sched,indices,tmpIndices, midL + 1, highL, midR, highR, mid + 1, depth,dim);
212 :     pthread_mutex_lock (&worker->lock);
213 : lamonts 2562 while(!isDone)
214 : lamonts 2453 pthread_cond_wait (&worker->done,&worker->lock);
215 :     pthread_mutex_unlock (&worker->lock);
216 :     }
217 :     }
218 :     }
219 :     /*The worker function for all the workers */
220 :     void treeWorker(SpatialWorkerArgs_t * workerArg){
221 :    
222 :     while(!workerArg->sched->buildIsDone)
223 :     {
224 :     //Wait until a task is assigned to the worker
225 :     pthread_mutex_lock (&workerArg->sched->lock);
226 : lamonts 2562 workerArg->isIdle = true;
227 :     while(workerArg->isIdle && !workerArg->sched->buildIsDone){
228 :     pthread_cond_wait (&workerArg->runWait,&workerArg->sched->lock);
229 :     }
230 : lamonts 2453 pthread_mutex_unlock (&workerArg->sched->lock);
231 :    
232 : lamonts 2562 if(workerArg->sched->buildIsDone){
233 :     workerArg->isIdle = false;
234 :     break;
235 :     }
236 : lamonts 2453 //Based on the worker type perform the assigned task.
237 :     if(workerArg->workerType == MERGESORT_WORKER){
238 :     MergetSortNode_t * node = (MergetSortNode_t *)workerArg->data;
239 :     Diderot_Spatial_ParMergeSort(workerArg->wrld,
240 :     workerArg->sched,
241 :     node->indices,
242 :     node->tmpIndices,
243 :     node->low,
244 :     node->high,
245 :     node->depth,
246 :     node->dim);
247 :    
248 :     }else if(workerArg->workerType == MERGE_WORKER) {
249 :     MergeNode_t * node = (MergeNode_t *)workerArg->data;
250 :     Diderot_Spatial_ParMerge(workerArg->wrld,
251 :     workerArg->sched,
252 :     node->indices,
253 :     node->tmpIndices,
254 :     node->lowL,
255 :     node->highL,
256 :     node->lowR,
257 :     node->highR,
258 :     node->low,
259 :     node->depth,
260 :     node->dim);
261 :     }else if(workerArg->workerType == TREE_WORKER){
262 :     TreeNode_t * node = (TreeNode_t *)workerArg->data;
263 :     node->parent->right = Diderot_KDTree_BuildHelper(workerArg->wrld,
264 :     workerArg->sched,
265 :     node->start,
266 :     node->len,
267 :     node->axis);
268 :    
269 :     }
270 :     //Notify any worker waiting for this task to be completed.
271 :     pthread_mutex_lock (&workerArg->lock);
272 : lamonts 2562 *(workerArg->isDone)= true;
273 :     pthread_cond_signal (&workerArg->done);
274 : lamonts 2453 pthread_mutex_unlock (&workerArg->lock);
275 :     }
276 :     }
277 :     /* Finds the median strand in an array of strands based on the given axis*/
278 :     @STRANDTY@ * find_median(@PREFIX@World_t *wrld, SpatialScheduler_t * sched, int start, int end, int idx, int * mid){
279 :    
280 :     int startIdx = start + (idx * wrld->numStrands);
281 :     int endIdx = end + (idx * wrld->numStrands);
282 :     int midIdx = startIdx + (endIdx - startIdx)/2;
283 :    
284 :     if(end <= start) return NULL;
285 :     if(end == start + 1) {
286 :     *mid = start;
287 :     return wrld->inState[wrld->spatialIndices[startIdx]];
288 :     }
289 :     Diderot_Spatial_ParMergeSort(wrld,sched,wrld->spatialIndices, wrld->spatialTmpIndices, startIdx,endIdx-1,33453,idx);
290 :    
291 :     *mid = start + (end-start)/2;
292 :    
293 :     return wrld->inState[wrld->spatialIndices[midIdx]];
294 :     }
295 :     /* A recursive tree building function */
296 :     KDNode_t * Diderot_KDTree_BuildHelper(@PREFIX@World_t *wrld, SpatialScheduler_t * sched,int start, int len, int axis)
297 :     {
298 :     KDNode_t* newNode = NULL;
299 : lamonts 2562 bool isDone = false;
300 : lamonts 2453 @STRANDTY@ * n;
301 :     if (!len) return 0;
302 :     int mid;
303 :     if ((n = find_median(wrld,sched,start,start + len,axis,&mid))) {
304 : lamonts 2563 pthread_mutex_lock (&sched->lock);
305 :     newNode = Diderot_KDTree_GrabNode(wrld->spatialTree);
306 :     pthread_mutex_unlock (&sched->lock);
307 : lamonts 2453 newNode->strandId = n->strandId;
308 :     axis = (axis + 1) % wrld->spatialTree->dim;
309 :    
310 :     // If the depth is less than zero or there's not enough work to perform in parallel
311 :     // then run the sequential version.
312 :     if(len < sched->seqThreshold){
313 :     newNode->left = Diderot_KDTree_BuildHelper(wrld,sched,start,mid - start, axis);
314 :     newNode->right = Diderot_KDTree_BuildHelper(wrld,sched,mid + 1,start + len - (mid+ 1), axis);
315 :     }else{
316 :     //Have a woker build the right side of the tree.
317 :     TreeNode_t node;
318 : lamonts 2562 node.axis = axis;
319 : lamonts 2453 node.len = start + len - (mid + 1);
320 :     node.start = mid + 1;
321 : lamonts 2562 node.parent = newNode;
322 : lamonts 2453
323 : lamonts 2562 SpatialWorkerArgs_t * worker = Diderot_Grab_SpatailWorker(sched,TREE_WORKER,(void *)&node,&isDone);
324 : lamonts 2453
325 :     if(!worker){
326 :     newNode->left = Diderot_KDTree_BuildHelper(wrld,sched,start,mid - start, axis);
327 :     newNode->right = Diderot_KDTree_BuildHelper(wrld,sched,mid + 1,start + len - (mid+ 1), axis);
328 :     }else {
329 :     newNode->left = Diderot_KDTree_BuildHelper(wrld,sched,start,mid - start, axis);
330 :     pthread_mutex_lock (&worker->lock);
331 : lamonts 2562 while(!isDone)
332 : lamonts 2453 pthread_cond_wait (&worker->done,&worker->lock);
333 :     pthread_mutex_unlock (&worker->lock);
334 :     }
335 : lamonts 2422 }
336 : lamonts 2453 }
337 :     return newNode;
338 :     }
339 : lamonts 2565 //Debugging purpoes
340 : lamonts 2562 void printTree(KDNode_t* newNode){
341 :     if(newNode != NULL){
342 :     printf("id=%d\n",newNode->strandId);
343 :     printTree(newNode->right);
344 :     printTree(newNode->left);
345 :     }
346 : lamonts 2565 }
347 : lamonts 2562 void Diderot_KDTree_Build(@PREFIX@World_t *wrld, uint32_t id, double * spatialTotalTime)
348 : lamonts 2453 {
349 :     SpatialScheduler_t * sched = wrld->spatialSched;
350 : lamonts 2562 if(sched->leaderId != id) {
351 : lamonts 2453 treeWorker(&sched->workerData[id]);
352 :     return;
353 : lamonts 2422 }
354 : lamonts 2562 double spatialT0 = airTime();
355 : lamonts 2453 KDTree_t * tree = wrld->spatialTree;
356 :     memcpy(wrld->spatialTmpIndices,wrld->spatialIndices, sizeof(uint32_t)*@SPATIAL_DIMENSION@ * wrld->numStrands);
357 :     Diderot_KDTree_PoolClear(tree);
358 :     tree->root = Diderot_KDTree_BuildHelper(wrld, wrld->spatialSched,0,*(tree->numOfStrands),0);
359 :     wrld->spatialSched->buildIsDone=true;
360 : lamonts 2565 for(unsigned int i = 0; i < sched->numWorkers; i++){
361 : lamonts 2453 pthread_cond_signal (&sched->workerData[i].runWait);
362 :     }
363 : lamonts 2565 //printTree(tree->root);
364 :     sched->numWorkers+= 1;
365 : lamonts 2562 *(spatialTotalTime) = *(spatialTotalTime) + (airTime() - spatialT0);
366 : lamonts 2453 }
367 :     void Diderot_KDTree_Init(@PREFIX@World_t *wrld)
368 :     {
369 :     Diderot_KDTree_Alloc(&wrld->spatialTree,&wrld->numStrands,@SPATIAL_DIMENSION@);
370 :     wrld->spatialIndices= NEWVEC(uint32_t,@SPATIAL_DIMENSION@ * wrld->numStrands);
371 :     wrld->spatialTmpIndices= NEWVEC(uint32_t,@SPATIAL_DIMENSION@ * wrld->numStrands);
372 : lamonts 2422
373 : lamonts 2453 uint32_t len = @SPATIAL_DIMENSION@ * wrld->numStrands;
374 : lamonts 2422
375 : lamonts 2453 SpatialScheduler_t * scheduler = (SpatialScheduler_t *)CheckedAlloc(sizeof(SpatialScheduler_t));
376 :     if ((pthread_mutex_init (&scheduler->lock, 0)) != 0){
377 :     printf("Error in initializing the spatial scheduler's lock");
378 :     exit(EXIT_FAILURE);
379 :     }
380 :     int nWorkers = wrld->sched->numWorkers;
381 :     SpatialWorkerArgs_t * workerArgs = NEWVEC(SpatialWorkerArgs_t, nWorkers);
382 :     scheduler->workerData=workerArgs;
383 :     scheduler->nextWorker = 0;
384 : lamonts 2562 scheduler->numRunning= 0;
385 : lamonts 2453 scheduler->seqThreshold = SPATIAL_SEQ_THERSHOLD;
386 :     scheduler->numWorkers = nWorkers;
387 :     scheduler->buildIsDone = false;
388 :     wrld->spatialSched = scheduler;
389 : lamonts 2422
390 : lamonts 2453 for(unsigned int i = 0; i < nWorkers; i++){
391 : lamonts 2562 workerArgs[i].isIdle = false;
392 : lamonts 2453 workerArgs[i].data= 0;
393 :     workerArgs[i].sched = scheduler;
394 :     workerArgs[i].wrld = wrld;
395 :     workerArgs[i].id = i;
396 :     if ((pthread_cond_init (&(workerArgs[i].runWait), 0)) != 0){
397 :     printf("Error in initializing the scheduler lock");
398 :     exit(EXIT_FAILURE);
399 :     }
400 :     if ((pthread_cond_init (&(workerArgs[i].done), 0)) != 0){
401 :     printf("Error in initializing the done condition var ");
402 :     exit(EXIT_FAILURE);
403 :     }
404 :     if ((pthread_mutex_init (&(workerArgs[i].lock), 0)) != 0){
405 :     printf("Error in initializing the worker lock");
406 :     exit(EXIT_FAILURE);
407 :     }
408 :     }
409 :     for(uint32_t i = 0; i < len; i++){
410 :     wrld->spatialIndices[i]= i % wrld->numStrands;
411 :     }
412 : lamonts 2422 }
413 : lamonts 2453 void Diderot_KDTree_Realloc(@PREFIX@World_t *wrld)
414 :     {
415 :     wrld->spatialIndices= (uint32_t*)CheckedAlloc(sizeof(uint32_t)*@SPATIAL_DIMENSION@ * wrld->numStrands);
416 :     wrld->spatialTmpIndices= (uint32_t*)CheckedAlloc(sizeof(uint32_t)*@SPATIAL_DIMENSION@ * wrld->numStrands);
417 :     uint32_t len = @SPATIAL_DIMENSION@ * wrld->numStrands;
418 :     for(uint32_t i = 0; i < len; i++){
419 :     wrld->spatialIndices[i]= i % wrld->numStrands;
420 :     }
421 :     }

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0