Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/lamont/src/compiler/c-target/fragments/par-run.in
ViewVC logotype

View of /branches/lamont/src/compiler/c-target/fragments/par-run.in

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2548 - (download) (annotate)
Wed Feb 26 14:22:24 2014 UTC (5 years, 7 months ago) by lamonts
File size: 8214 byte(s)
Fixed global reduction block
/* Function which processes active strands. */
static void *@PREFIX@Worker (void *arg)
{
    WorkerArg_t *myArg = (WorkerArg_t *)arg;
    @PREFIX@World_t *wrld = myArg->wrld;
    @PREFIX@Globals_t *glob = wrld->globals;
    Diderot_Sched_t *sched = wrld->sched;
    Strand_t *strand = &(wrld->strandDesc[0]);

    int nStrandsPerWorker = wrld->numStrands / sched->numWorkers;
    int start = myArg->id * nStrandsPerWorker;
    int limit;
    uint32_t globalReduceId = 0; 
    if (sched->numWorkers-1 == myArg->id)
        limit = wrld->numStrands;
    else
        limit = start + nStrandsPerWorker;


    while (true) {
      // barrier synchronization at start of super step
        pthread_mutex_lock (&sched->lock);
            if (sched->numIdle+1 < sched->numWorkers) {
                sched->numIdle++;
                pthread_cond_wait (&sched->barrier, &sched->lock);
            }
            else {
              // all other workers are idle, so we can proceed after some initialization
                sched->numIdle = 0;
                sched->numAvail = wrld->numStrands;  // includes inactive strands
                sched->nextStrand = 0;
      #ifdef DIDEROT_SPATIAL_COM      
                wrld->spatialSched->numRunning = 0; 
                wrld->spatialSched->nextWorker = 0;
                wrld->spatialSched->buildIsDone = false;
#endif 

#ifdef DIDEROT_DUAL_STATE
              // swap in and out
                @STRANDTY@ **tmp = wrld->inState;
                wrld->inState = wrld->outState;
                wrld->outState = tmp;
#endif
                pthread_cond_broadcast (&sched->barrier);
            }
        pthread_mutex_unlock (&sched->lock);

      // if there are no active strands left or we've hit the maximum number of steps, then we're done
        if ((sched->numActive == 0) || (sched->numSteps >= myArg->maxNSteps)) {
            pthread_cond_signal (&sched->runWait);
            pthread_exit (0);
        }

        @CALL_BUILD_TREE_FN@

      // iterate until there is no more work to do
        int blkStart, blkSize;
        int numDead = 0;
        do {
          // grab some work
            pthread_mutex_lock (&sched->lock);
                blkStart = sched->nextStrand;
                blkSize = (sched->numAvail >= BLOCK_SIZE) ? BLOCK_SIZE : sched->numAvail;
                sched->numAvail -= blkSize;
                sched->nextStrand += blkSize;
            pthread_mutex_unlock (&sched->lock);
          // update the strands
            for (int i = blkStart;  i < blkStart+blkSize;  i++) {
                if (! wrld->status[i]) {
                    #ifdef DIDEROT_DUAL_STATE
                    StrandStatus_t sts = @STRAND@_Update(wrld, wrld->inState[i]->strandId);
                    #else 
                    StrandStatus_t sts = @STRAND@_Update(wrld, wrld->state[i].strandId);
                    #endif 
                    switch (sts) {
                      case DIDEROT_STABILIZE:
                        wrld->status[i] = DIDEROT_STABILIZE;
                        break;
                      case DIDEROT_DIE:
                        wrld->status[i] = DIDEROT_DIE;
                        numDead++;
                        break;
                      default:
                        break;
                    }
                }
                else {
                    assert ((wrld->status[i] == DIDEROT_STABLE) || (wrld->status[i] == DIDEROT_DIE));
                }
            }
        } while (blkSize > 0);

      // barrier synchronization
        pthread_mutex_lock (&sched->lock);
            sched->numActive -= numDead;
            if (sched->numIdle+1 < sched->numWorkers) {
                sched->numIdle++;
#ifdef DIDEROT_PARALLEL_REDUCTION
                globalReduceId = wrld->globSched->idCounter++; 
#endif 
                pthread_cond_wait (&sched->barrier, &sched->lock);
            }
            else {
              // all other workers are idle, so we can proceed
                // Increase the number of active strands if new strands were created
                if (wrld->poolInfo.nNewStrand > 0) { 
                    sched->numActive += wrld->poolInfo.nNewStrand;
                    wrld->numStrands += wrld->poolInfo.nNewStrand;
                    wrld->poolInfo.nNewStrand = 0; 
#ifdef DIDEROT_SPATIAL_COM
        			Diderot_KDTree_Realloc(wrld); 
#endif 
                }
                sched->numIdle = 0;
#ifdef DIDEROT_PARALLEL_REDUCTION
                globalReduceId = wrld->globSched->idCounter++; 
                wrld->globSched->idCounter = 0; 
                wrld->globSched->nextStrand = 0;
                wrld->globSched->numIdle = 0;
                wrld->globSched->done = false; 
                wrld->globSched->leaderId = globalReduceId; 
#endif 
                sched->numSteps++;
     
            }
        pthread_mutex_unlock (&sched->lock);

/**** If there is a global computation phase, it goes here ****/
#ifdef DIDEROT_PARALLEL_REDUCTION
        if(wrld->globSched->leaderId != globalReduceId) {
            Diderot_GlobalReduceRunFun(wrld,&globalReduceId);  
        }else { 
            Diderot_GlobalFun(wrld,&globalReduceId); 
            pthread_mutex_lock (&wrld->globSched->lock);
            wrld->globSched->idCounter=0; 
            wrld->globSched->done = true; 
                    pthread_cond_broadcast(&wrld->globSched->barrier);
            pthread_mutex_unlock (&wrld->globSched->lock);
        }
#else 
        Diderot_GlobalFun(wrld,0); 
#endif 
      // stabilize any threads that need stabilization.  Each worker is responsible for
      // a contiguous region of the strands
// FIXME: once we switch to dynamic lists of strand blocks, then we can use finer-grain tracking
        int numStabilized = 0;
        for (int i = start;  i < limit;  i++) {
            if (wrld->status[i] == DIDEROT_STABILIZE) {
#ifdef DIDEROT_DUAL_STATE
              // stabilize the strand's state.  Note that the outState has been set by
              // the last call to update, so we make the inState be the target of the
              // stabilize method.
		@STRAND@_Stabilize(glob, wrld->outState[i], wrld->inState[i]);
                memcpy (wrld->outState[i], wrld->inState[i], strand->stateSzb);
#else
		@STRAND@_Stabilize(glob, &wrld->state[i]);
#endif
                wrld->status[i] = DIDEROT_STABLE;
                numStabilized++;
            }
        }
      // adjust the numActive count
#if defined(HAVE_BUILTIN_ATOMIC_OPS)
        __sync_fetch_and_sub(&sched->numActive, numStabilized);
#else
        pthread_mutex_lock (&sched->lock);
            sched->numActive -= numStabilized;
        pthread_mutex_unlock (&sched->lock);
#endif
    } // end while

    return 0;

}

//! Run the Diderot program (parallel version)
//! \param wrld the world-state of the Diderot program
//! \param maxNSteps the limit on the number of super steps; 0 means unlimited
//! \return the number of steps taken.
uint32_t @PREFIX@Run (@PREFIX@World_t *wrld, uint32_t maxNSteps)
{
    Diderot_Sched_t *sched = wrld->sched;

    if (maxNSteps == 0) maxNSteps = 0xffffffff;  // essentially unlimited

  // Start worker threads
    int nWorkers = sched->numWorkers;
    WorkerArg_t *args = NEWVEC(WorkerArg_t, nWorkers);
    if (wrld->verboseFlg) fprintf (stderr, "initializing %d workers ...\n", nWorkers);
    double t0 = airTime();
    sched->numWorkers = nWorkers;
    sched->numIdle = 0;
    sched->numSteps = 0;
#ifdef DIDEROT_DUAL_STATE
    @STRANDTY@ **tmp = wrld->inState;
    wrld->inState = wrld->outState;
    wrld->outState = tmp;
#endif 
    for (int i = 0; i < nWorkers; i++) {
        pthread_t pid;
        args[i].wrld = wrld;
        args[i].maxNSteps = maxNSteps;
        args[i].id = i;
        if (pthread_create (&pid, NULL, @PREFIX@Worker, (void *)&(args[i])) != 0) {
            fprintf (stderr, "unable to create worker thread\n");
            exit (1);
        }
        pthread_detach (pid);
    }

  // wait for the computation to finish
    pthread_mutex_lock (&sched->lock);
        pthread_cond_wait (&sched->runWait, &sched->lock);
    pthread_mutex_unlock (&sched->lock);

    FREE(args);

    return sched->numSteps;
}


root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0