Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/lamont/src/compiler/c-target/fragments/par-kdtree.in
ViewVC logotype

View of /branches/lamont/src/compiler/c-target/fragments/par-kdtree.in

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2853 - (download) (annotate)
Mon Dec 15 16:31:25 2014 UTC (4 years, 7 months ago) by lamonts
File size: 19827 byte(s)
Readded spatial timing to runtime arguments for pthreads
 uint32_t * Diderot_Par_Median_Search(SpatialWorkerArgs_t * workerData, uint32_t start, uint32_t end, uint32_t dim) 
{
    SpatialScheduler_t * sched = workerData->sched;
    uint32_t nWorkers = sched->wrld->sched->numWorkers; 
    uint32_t workerId = workerData->id;  

    while(true) { 
      
      uint32_t blockSize = (end - start)/nWorkers; 
      workerData->lessCount = 0; 
      workerData->greaterCount = 0; 
 

      /* Phase #1: Gather Work */ 
      pthread_mutex_lock (&sched->barrier);

      workerData->start = sched->nextStrand;  

      if(sched->startRunning - 1 > 0){
        workerData->end = workerData->start + blockSize; 
        sched->nextStrand += blockSize; 
        sched->startRunning-=1; 
      }else {                               
        workerData->end = end; 
      }
      pthread_mutex_unlock (&sched->barrier); 


      /* Phase #2: Find the Medians of the workers block */ 
      uint32_t inc = 0; 
      uint32_t spiltterPerWorker = blockSize/PAR_MEDIANS_SIZE; 
      uint32_t seqStart = workerData->start; 
      uint32_t seqEnd = (seqStart + PAR_MEDIANS_SIZE) > workerData->end ? workerData->end : seqStart + PAR_MEDIANS_SIZE;
    
      do
      {
        seq_median(sched->wrld, sched->spatialIn, seqStart, seqEnd,dim);   
        uint32_t mid =  workerData->start + ((workerData->end-workerData->start)/2); 
        sched->spilttersIn[(workerId * spiltterPerWorker) + inc] = sched->spatialIn[mid]; 
        inc++; 
        seqStart = seqEnd; 
        seqEnd = (seqStart + PAR_MEDIANS_SIZE) > workerData->end ? workerData->end : seqStart + PAR_MEDIANS_SIZE;
      }
      while(inc < spiltterPerWorker); 


      pthread_mutex_lock (&sched->barrier);
      if(sched->numRunning - 1 > 0){
        sched->numRunning-=1; 
        pthread_cond_wait (&sched->runWait,&sched->barrier);
      }else {        

        /* Phase #3: Now find the medians of the medians */ 
        sched->globalMedian = seq_median(sched->wrld, sched->spilttersIn, 0, spiltterPerWorker * nWorkers,dim); 
        sched->numRunning = nWorkers; 

        pthread_cond_broadcast(&sched->runWait); 
      }
      pthread_mutex_unlock (&sched->barrier);


      /* Phase 4: Count the number of strands less than and greater than the global median */ 
      uint32_t mid =  (spiltterPerWorker * nWorkers)/2;
      if(sched->wrld->inState[sched->spilttersIn[mid]]->strandId != sched->globalMedian->strandId)
      {
        printf("The globalMedians are not equal to each other\n"); 
        exit(1); 
      }
      Diderot_real_t globalM = Diderot_KDTree_GetPosValue(sched->wrld->inState[sched->spilttersIn[mid]],dim); 

      for(int i = workerData->start; i < workerData->end; i++)
      {
        Diderot_real_t val = Diderot_KDTree_GetPosValue(sched->wrld->inState[sched->spatialIn[i]],dim); 
        if(globalM >= val) 
        {
            workerData->lessCount++; 
        }else 
        {
            workerData->greaterCount++; 
        }
    }
    pthread_mutex_lock (&sched->barrier);

    sched->lessCount += workerData->lessCount; 

    if(sched->numRunning - 1 > 0){ 
        sched->numRunning-=1; 
        pthread_cond_wait (&sched->runWait,&sched->barrier);
    }else {
        sched->numRunning = nWorkers;
        sched->greaterCount = (end - start) - sched->lessCount; 
        pthread_cond_broadcast(&sched->runWait); 
    }
    pthread_mutex_unlock (&sched->barrier);

    /* Phase 5: Now Reorder the spatialOut array to fill all the values less than global median in the correct spots */ 
    uint32_t less = 0, greater = 0;  

    for(uint32_t i = 0; i <  workerId; i++){
         greater += sched->workers[i].greaterCount; 
         less += sched->workers[i].lessCount;
    }
    greater += (sched->lessCount + start); 
    less += start; 


    for(int i = workerData->start; i < workerData->end; i++)
    {
        Diderot_real_t val = Diderot_KDTree_GetPosValue(sched->wrld->inState[sched->spatialIn[i]],dim); 
        if(globalM > val)  
        {
            sched->spatialOut[less++] = sched->spatialIn[i]; 
        }else if (globalM < val) 
        {
            sched->spatialOut[greater++] = sched->spatialIn[i]; 
        }else
        {
            sched->globMedIdx = less; 
            sched->spatialOut[less++] = sched->spatialIn[i]; 
        }
    }

    pthread_mutex_lock (&sched->barrier);
    if(sched->numRunning - 1 > 0){
        sched->numRunning-=1; 
        pthread_cond_wait (&sched->runWait,&sched->barrier);
    }else {
        uint32_t temp = sched->spatialOut[ (sched->lessCount - 1) + start]; 
        sched->spatialOut[ (sched->lessCount - 1) + start] = sched->spatialOut[sched->globMedIdx]; 
        sched->spatialOut[sched->globMedIdx] = temp; 
        sched->globMedIdx = start + (sched->lessCount - 1); 

       uint32_t newStart = 0, newEnd = 0, k = sched->wrld->numStrands/2; 

        if(k  == sched->globMedIdx){
            sched->state = DONE; 
            sched->isRunning = false; 
            sched->endTime = airTime(); 
        }
        else { 
 
            if (sched->globMedIdx < k) {
                sched->state = UPPER; 
                newStart = sched->globMedIdx + 1; 
                newEnd = end; 
                sched->copyStart = start; 
                sched->copyEnd = sched->globMedIdx + 1; 
            }
           else {
                sched->state = LOWER;  
                newStart = start;
                newEnd = sched->globMedIdx; 
                sched->copyStart = sched->globMedIdx; 
                sched->copyEnd = end; 
            }

            //Check if there's enough work for the next round 
            if(newEnd- newStart <= sched->seqThreshold){
                seq_median(sched->wrld, sched->spatialOut, newStart,newEnd, dim);
                sched->state = DONE;
                sched->isRunning = false; 
                sched->endTime = airTime(); 
            }else {
                uint32_t * temp = sched->spatialOut; 
                sched->spatialOut = sched->spatialIn; 
                sched->spatialIn = temp;
            }
        }

        sched->nextStrand = newStart; 
        sched->lessCount = 0; 
        sched->greaterCount = 0; 
        sched->startRunning = nWorkers; 
        sched->numRunning = nWorkers; 

        pthread_cond_broadcast(&sched->runWait); 
    }
    pthread_mutex_unlock (&sched->barrier);

    if(sched->state == DONE)
    {
        return sched->spatialOut; 
    }
    else 
    {
        uint32_t copySize = (sched->copyEnd - sched->copyStart)/nWorkers; 
        uint32_t copyStart = (copySize == 0) ? 1 : sched->copyStart  + (copySize * workerId); 

        uint32_t copyEnd = (nWorkers - 1 == workerId) ? sched->copyEnd : copyStart + copySize; 


        for(int index = copyStart; index < copyEnd;  index++)
        {
            sched->spatialOut[index] = sched->spatialIn[index]; 
        }

        if(sched->state == LOWER){
            end = sched->globMedIdx; 
        }else{
            start = sched->globMedIdx + 1; 
        }
    }

    }
    return NULL; 
}

void spatialWorker(SpatialWorkerArgs_t * worker){
   
   bool done = false; 
   do 
   { 
      SpatialScheduler_t * sched = worker->sched; 
      pthread_mutex_lock (&sched->barrier);
      if(sched->isTreeBuilt)
        done = true; 
      else {
        if(!sched->isRunning)
          pthread_cond_wait (&sched->runWait,&sched->barrier);
      }
      pthread_mutex_unlock (&sched->barrier);
      if(sched->isTreeBuilt)
        done = true;
      else{
        Diderot_Par_Median_Search(worker,sched->start,sched->end,sched->dim);
      }
    }
    while(!done); 
}
//Debugging purpoes 
void printTree(KDNode_t* newNode){
    if(newNode != NULL){
        printf("id=%d\n",newNode->strandId); 
        printTree(newNode->right); 
        printTree(newNode->left); 
    }
}
/* Swaps the values at two addresses */ 
void Diderot_KDTree_SwapValues(uint32_t * a, uint32_t *b){
    uint32_t temp;
    temp = *a;
    *a = *b;
    *b = temp;
}

@STRANDTY@ * seq_median(@PREFIX@World_t *wrld, uint32_t * spatialIndices, int start, int end, int dim)
{
    if(end <= start) return NULL;
    if(end == start + 1) return wrld->inState[spatialIndices[start]];;
    int curr, store, mid =  start + (end-start)/2;
    Diderot_real_t pivot;
    int count = 0; 
    while(true) {
        
        pivot = Diderot_KDTree_GetPosValue(wrld->inState[spatialIndices[mid]],dim);

        //printf("Pivot = %f\n",pivot); 

        Diderot_KDTree_SwapValues(&spatialIndices[mid],&spatialIndices[end -1]);

        for (store = curr = start; curr < end; curr++) {
            Diderot_real_t currVal = Diderot_KDTree_GetPosValue(wrld->inState[spatialIndices[curr]],dim); 
            if (currVal < pivot) {
                if (curr != store)
                    Diderot_KDTree_SwapValues(&spatialIndices[curr], &spatialIndices[store]);
                store++;
            }
        }
        Diderot_KDTree_SwapValues(&spatialIndices[store],&spatialIndices[end -1]);

        /* median has duplicate values */
        Diderot_real_t storeVal = Diderot_KDTree_GetPosValue(wrld->inState[spatialIndices[store]],dim); 
        Diderot_real_t midVal = Diderot_KDTree_GetPosValue(wrld->inState[spatialIndices[mid]],dim);
        if (storeVal  == midVal){
            return wrld->inState[spatialIndices[mid]];
        }
        if (store > mid)    end = store;
        else        start = store;


    }
    return NULL;
}

@STRANDTY@ * find_median(@PREFIX@World_t *wrld,SpatialWorkerArgs_t * worker,int start, int end, int dim)
{
      SpatialScheduler_t * sched  =  wrld->spatialSched; 

    if((end-start) <= sched->seqThreshold){
        return seq_median(wrld,wrld->spatialSched->spatialIn,start,end,dim); 
    }else {
        pthread_mutex_lock (&sched->barrier);
        sched->dim = dim; 
        sched->isRunning = true; 
        sched->start = start; 
        sched->end = end; 
        sched->nextStrand = start; 
        pthread_cond_broadcast(&sched->runWait); 
        pthread_mutex_unlock (&sched->barrier); 
        int mid = start + (end - start)/2; 
        uint32_t * spatial = Diderot_Par_Median_Search(worker,sched->start,sched->end,dim);
        @STRANDTY@ * m = wrld->inState[spatial[mid]]; 

        if(spatial == wrld->spatialSched->spatialOut) {
          uint32_t * temp = wrld->spatialSched->spatialOut; 
          wrld->spatialSched->spatialOut = wrld->spatialSched->spatialIn; 
          wrld->spatialSched->spatialIn = temp; 
        }
        return m; 
    }

}
/* A recursive tree building function */ 
KDNode_t * Diderot_KDTree_BuildHelper(@PREFIX@World_t *wrld, SpatialScheduler_t * sched,SpatialWorkerArgs_t * worker, int start, int len, int axis)
{
	KDNode_t* newNode = NULL;
	bool isDone = false; 
	@STRANDTY@ * n;

	if (!len) return NULL;
  int end = start + len; 
	int mid = start + (end-start)/2;

  if ((n = find_median(wrld,worker,start,end,axis))) {
      
      newNode = Diderot_KDTree_GrabNode(wrld->spatialTree); 
    
      newNode->strandId = n->strandId; 
      axis = (axis + 1) % wrld->spatialTree->dim;

      newNode->left  = Diderot_KDTree_BuildHelper(wrld,sched,worker,start,mid - start, axis);
      newNode->right = Diderot_KDTree_BuildHelper(wrld,sched,worker,mid + 1,start + len - (mid+ 1), axis);
  }

  return newNode;

	/*if ((n = find_median(wrld,start,start + len,axis))) {
        pthread_mutex_lock (&sched->lock);
            newNode = Diderot_KDTree_GrabNode(wrld->spatialTree); 
        pthread_mutex_unlock (&sched->lock);
		
        newNode->strandId = n->strandId; 
		  axis = (axis + 1) % wrld->spatialTree->dim;

        // If the depth is less than zero or there's not enough work to perform in parallel 
        // then run the sequential version.
        if(len < sched->seqThreshold){
    		    newNode->left  = Diderot_KDTree_BuildHelper(wrld,sched,start,mid - start, axis);
    		    newNode->right = Diderot_KDTree_BuildHelper(wrld,sched,mid + 1,start + len - (mid+ 1), axis);
        }else{
            //Have a woker build the right side of the tree. 
        	newNode->axis = axis;
     	    newNode->len = start + len - (mid + 1);
     	    newNode->start = mid + 1;


  	        SpatialWorkerArgs_t * worker = Diderot_Grab_SpatailWorker(sched,TREE_WORKER,newNode,&isDone);

        	    if(!worker){
    		        newNode->left  = Diderot_KDTree_BuildHelper(wrld,sched,start,mid - start, axis);
    		        newNode->right = Diderot_KDTree_BuildHelper(wrld,sched,mid + 1,start + len - (mid+ 1), axis);
    	        }else {
    		        newNode->left  = Diderot_KDTree_BuildHelper(wrld,sched,start,mid - start, axis);
    	        }
        }
	} */ 
}
KDNode_t * Diderot_KDTree_BuildHelper2(
  KDTree_t * tree,  
  @STRANDTY@ ** strands, 
  uint32_t len, 
  uint32_t axis,
  uint32_t dim)
{
  @STRANDTY@ ** median; 
  KDNode_t * newNode = 0; 

    if(!len) return 0; 

   // double spatialT0 = airTime();
  if((median = Diderot_KDTree_Seq_FindMedian(strands,strands + len,axis,dim))) { 
   // double spatialTotalTime = (airTime() - spatialT0);  
  //   if(spatialTotalTime >= 0.03)
    //    printf("time to sort=%f\n",spatialTotalTime); 

    newNode = Diderot_KDTree_GrabNode(tree); 
    newNode->strandId = median[0]->strandId; 
    axis = (axis + 1) % tree->dim; 
    newNode->left = Diderot_KDTree_BuildHelper2(tree,strands, median - strands, axis,dim); 
    newNode->right = Diderot_KDTree_BuildHelper2(tree,median + 1, strands + len - (median + 1),axis,dim); 
  }
  return newNode; 
} 
/* A recursive tree building function */ 
/*void Diderot_KDTree_Build(@PREFIX@World_t *wrld, uint32_t id, double * spatialTotalTime) 
{
    SpatialScheduler_t * sched  =  wrld->spatialSched; 
    static @STRANDTY@ ** spatialStrandsCpy=NULL; 

    if(sched->leaderId != id) { 
        spatialWorker(&sched->workers[id]); 
        return; 
    }

    if(!spatialStrandsCpy) { 
        spatialStrandsCpy = (@STRANDTY@**)CheckedAlloc(sizeof(@STRANDTY@ *)* wrld->numStrands);
    }

    KDTree_t * tree  = wrld->spatialTree; 
    memcpy(spatialStrandsCpy,wrld->inState, sizeof(@STRANDTY@ *) * wrld->numStrands);
    Diderot_KDTree_PoolClear(tree); 
    tree->root = Diderot_KDTree_BuildHelper2(tree,spatialStrandsCpy,*(tree->numOfStrands),0,tree->dim);
  
    pthread_mutex_lock (&sched->barrier);
    sched->isTreeBuilt = true; 
    pthread_cond_broadcast(&sched->runWait); 
    pthread_mutex_unlock (&sched->barrier); 
}*/
void Diderot_KDTree_Build(@PREFIX@World_t *wrld, uint32_t id, double * spatialTotalTime) 
{
    SpatialScheduler_t * sched  =  wrld->spatialSched; 
    uint32_t blockSize = wrld->numStrands/wrld->sched->numWorkers;
    uint32_t start = id * blockSize; 
    uint32_t end = (wrld->sched->numWorkers - 1 == id) ? wrld->numStrands : start + blockSize; 
    static bool isInited = false; 

    if(sched->leaderId != id) { 
        spatialWorker(&sched->workers[id]); 
     }else {
        sched->startTime = airTime();
        KDTree_t * tree  = wrld->spatialTree; 
        //int numStrands = *(tree->numOfStrands); 
      //  memcpy(wrld->spatialTmpIndices,wrld->spatialIndices, sizeof(uint32_t)*@SPATIAL_DIMENSION@ * wrld->numStrands);
        Diderot_KDTree_PoolClear(tree);   
        /* Sort the position data */ 
        /*double spatialST0 = airTime();
        Diderot_Spatial_ParMergeSort(wrld,sched,wrld->spatialIndices,wrld->spatialTmpIndices,0,numStrands -1,33390,0);
        if(@SPATIAL_DIMENSION@ > 1){
            Diderot_Spatial_ParMergeSort(wrld,sched,wrld->spatialIndices,wrld->spatialTmpIndices,wrld->numStrands,(2 * wrld->numStrands)-1,33390,1);
            if(@SPATIAL_DIMENSION@ > 2){
                Diderot_Spatial_ParMergeSort(wrld,sched,wrld->spatialIndices,wrld->spatialTmpIndices,wrld->numStrands * 2,(3 * wrld->numStrands)-1,33390,2);
            }
        }*/ 
       // double spatialTotalTimeS = (airTime() - spatialST0);   
        // if(spatialTotalTimeS >= 0.03)
        //    printf("maybe fixed parallel time to sort=%f\n",spatialTotalTimeS);  
       // double spatialST0 = airTime();
        //preSort(wrld,sched,0,*(tree->numOfStrands),0); 
       // double spatialTotalTimeS = (airTime() - spatialST0);   
        // if(spatialTotalTimeS >= 0.03) 

       // pthread_mutex_lock(&sched->lock); 
         //   sched->numWorkers +=1; 
        //pthread_mutex_unlock(&sched->lock); 
        //treeWorker(&sched->workerData[id]);  

        tree->root = Diderot_KDTree_BuildHelper(wrld, wrld->spatialSched,&sched->workers[id], 0,*(tree->numOfStrands),0);
        sched->endTime = airTime();
        pthread_mutex_lock (&sched->barrier);
        sched->isTreeBuilt = true; 
        pthread_cond_broadcast(&sched->runWait); 
        pthread_mutex_unlock (&sched->barrier); 
       // static int times = 0; 
       // times++; 
       // checkTree(wrld, tree->root, 0,*(tree->numOfStrands),0);
       // if(times == 2){
        //  printTree(tree->root); 

       //    exit(0); 
       // }
       // printf("!!!!!!DONE!!!!\n"); 
        //pthread_mutex_lock(&sched->lock); 
          //  sched->numWorkers +=1; 
        //pthread_mutex_unlock(&sched->lock); 
       // treeWorker(&sched->workerData[id]);  
        *(spatialTotalTime) = *(spatialTotalTime) + (sched->endTime - sched->startTime); 
      //printf("Build Done\n"); 
      }
      //Reset spatial in for the next iteration 
      for(int i = start; i < end; i++){
        wrld->spatialSched->spatialIn[i] = i; 
      }
}

void Diderot_KDTree_Init(@PREFIX@World_t *wrld) 
{
    Diderot_KDTree_Alloc(&wrld->spatialTree,&wrld->numStrands,@SPATIAL_DIMENSION@);
  //  wrld->spatialIndices= NEWVEC(uint32_t,@SPATIAL_DIMENSION@ * wrld->numStrands); 

    SpatialScheduler_t * scheduler = (SpatialScheduler_t *)CheckedAlloc(sizeof(SpatialScheduler_t));

    if ((pthread_cond_init (&(scheduler->runWait), 0)) != 0){
        printf("Error in initializing the spatial scheduler's run wait cond");
        exit(EXIT_FAILURE);
     }

    if ((pthread_mutex_init (&scheduler->barrier, 0)) != 0){
        printf("Error in initializing the spatial scheduler's barrier");
        exit(EXIT_FAILURE);
    }

    scheduler->spatialIn = NEWVEC(uint32_t, wrld->numStrands); 
    scheduler->spatialOut = NEWVEC(uint32_t, wrld->numStrands); 

    for(uint32_t i = 0; i < wrld->numStrands; i++){
            scheduler->spatialIn[i] = i; 
    }

    int nWorkers = wrld->sched->numWorkers;

    uint32_t splitterSize = (wrld->numStrands/nWorkers)/PAR_MEDIANS_SIZE; 
    
    scheduler->spilttersIn =  NEWVEC(uint32_t,  splitterSize * nWorkers);

    SpatialWorkerArgs_t * workerArgs = NEWVEC(SpatialWorkerArgs_t, nWorkers);
    scheduler->workers = workerArgs; 	
    scheduler->seqThreshold = SPATIAL_SEQ_THERSHOLD;
    scheduler->numRunning = nWorkers; 
    scheduler->nextStrand = 0; 
    scheduler->startRunning = nWorkers; 
    scheduler->endTime = 0.0; 
    scheduler->startTime = 0.0;  
    wrld->spatialSched = scheduler;
    scheduler->wrld = wrld; 
    for(uint32_t i = 0; i < nWorkers; i++){
          workerArgs[i].sched = scheduler;
          workerArgs[i].id = i; 
	 }   
    scheduler->lessCount = 0; 
    scheduler->greaterCount = 0;
    scheduler->isTreeBuilt = false; 

}
void Diderot_KDTree_Realloc(@PREFIX@World_t *wrld) 
{
    wrld->spatialSched->spatialIn = (uint32_t*)CheckedAlloc(sizeof(uint32_t)* wrld->numStrands);
    wrld->spatialSched->spatialIn = (uint32_t*)CheckedAlloc(sizeof(uint32_t)* wrld->numStrands);

    uint32_t nWorkers = wrld->sched->numWorkers;
    uint32_t splitterSize = (wrld->numStrands/nWorkers)/PAR_MEDIANS_SIZE; 
    wrld->spatialSched->spilttersIn =  (uint32_t*)CheckedAlloc(sizeof(uint32_t)* splitterSize * nWorkers);

    uint32_t len =  @SPATIAL_DIMENSION@ * wrld->numStrands; 

    for(uint32_t  i = 0; i < wrld->numStrands; i++){
	    wrld->spatialSched->spatialIn[i]= i; 
    }
}

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0