Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/vis12-cl/src/compiler/cl-target/fragments/cl-update-nobsp.in
ViewVC logotype

View of /branches/vis12-cl/src/compiler/cl-target/fragments/cl-update-nobsp.in

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3139 - (download) (annotate)
Fri Mar 27 14:03:38 2015 UTC (4 years, 3 months ago) by jhr
File size: 7733 byte(s)
  working on OpenCL issues
// number of strands to grab from global pool
//
#define GRAB_SZ		(2*BLK_SZ)

// update scheduler for no BSP
//
KERNEL void UpdateKern (__global @PREFIX@Sched_t *gSched, __global @PREFIX@Globals_t *glob, uint stepLimit)
{
    int myId = get_local_id(0);
    int myIdx;          // index of strand being executed by this lane
    int myNSteps;       // number of steps executed by the current strand in this lane
    int myMaxNSteps;    // max number of steps executed by this lane for a single strand

    if ((myId == 0) && (get_global_id(0) == 0)) {
      // global initialization
        gSched->nextStrand = 0;
    }
    barrier (CLK_GLOBAL_MEM_FENCE);

    __global StrandStatus_t *status = gSched->status;
    __global @STRANDTY@ *state = gSched->state;
    int myNStrands = gSched->nStrands;
    int myStabilized = 0;       // private count of # of stabilized strands run in this lane
#ifdef DIDEROT_HAS_DIE
    int myDied = 0;             // private count of # of dying strands run in this lane
#endif

    __local int nActive;                // number of currently active lanes
    __local int nIdle;                  // number of active lanes that need a strand
    __local int nextAvailStrand;        // next strand to schedule from locally allocated block
    __local int maxAvailStrand;         // max strand index of locally allocated block

  // used to compute per-scheduler stats
    __local int maxNSteps[BLK_SZ];
    __local int nStabilized[BLK_SZ];
#ifdef DIDEROT_HAS_DIE
    __local int nDied[BLK_SZ];
#endif

    if (myId == 0) {
      // grab initial block of strands
        nextAvailStrand = min (myNStrands, atomic_add (&(gSched->nextStrand), GRAB_SZ));
        maxAvailStrand = min (myNStrands, nextAvailStrand + GRAB_SZ);
      // initialize local scheduler state
        nActive = min(maxAvailStrand - nextAvailStrand, BLK_SZ);
        nIdle = 0;
    }
    barrier (CLK_LOCAL_MEM_FENCE);

  // index of this lane's strand
    myIdx = nextAvailStrand + myId;
    myNSteps = 0;
    barrier (0);

  // make a local copy of this lane's strand state
    @STRANDTY@ myStrand;
    StrandStatus_t sts;
    if (myIdx < myNStrands) {
        @STRAND@_CopyFromGlobal (&myStrand, &state[myIdx]);
        sts = status[myIdx];
    }
    if (myId == 0) {
        nextAvailStrand += BLK_SZ;
    }
    barrier (CLK_LOCAL_MEM_FENCE);

    while (nActive > 0) {
        if (myIdx < myNStrands) {
            if (sts == DIDEROT_ACTIVE) {
                sts = @STRAND@_Update (glob, &myStrand);
                ++myNSteps;
            }

#define GET_NEXT_STRAND                                                                 \
                do {                                                                    \
                    myMaxNSteps = max(myMaxNSteps, myNSteps);                           \
                    if ((myIdx = atomic_inc(&nextAvailStrand)) < maxAvailStrand) {      \
                        @STRAND@_CopyFromGlobal (&myStrand, &state[myIdx]);             \
                        sts = status[myIdx];                                            \
                        myNSteps = 0;                                                   \
                    }                                                                   \
                    else {                                                              \
                        sts = DIDEROT_IDLE;                                             \
                        atomic_inc (&nIdle);                                            \
                    }                                                                   \
                } while(0)
                    
            switch (sts) {
              case DIDEROT_ACTIVE:
                if (myNSteps >= stepLimit) {
                  // suspend this strand and get another
                    status[myIdx] = sts;
                    @STRAND@_CopyToGlobal (&state[myIdx], &myStrand);
                    GET_NEXT_STRAND;
                }
                break;
#ifdef DIDEROT_HAS_DIE
              case DIDEROT_DYING:
                myDied++;
                status[myIdx] = DIDEROT_DEAD;
              // get another strand to execute
                GET_NEXT_STRAND;
                break;
#endif
              case DIDEROT_STABILIZE:
                myStabilized++;
                status[myIdx] = DIDEROT_STABLE;
/* perhaps we can combine the stabilize and copy-to-global steps? */
                @STRAND@_Stabilize (glob, &myStrand);
                @STRAND@_CopyToGlobal (&state[myIdx], &myStrand);
              // get another strand to execute
                GET_NEXT_STRAND;
                break;
              default:
                break;
            }
        }
        barrier (CLK_LOCAL_MEM_FENCE);

        if (nIdle > 0) {
          // idle strands means that we need to get more strands from the global pool
            if (myId == 0) {
                if (gSched->nextStrand < myNStrands) {
                    // grab a new block of strands
                      nextAvailStrand = min (myNStrands, atomic_add (&(gSched->nextStrand), GRAB_SZ));
                      maxAvailStrand = min (myNStrands, nextAvailStrand + GRAB_SZ);
                    // the actual number of available strands
                      int nAvail = maxAvailStrand - nextAvailStrand;
                      if (nAvail < nIdle) {
                          nActive -= (nIdle - nActive);  // some lanes will become inactive
                      }
                      nIdle = 0;
                }
                else { // there are no more strands to schedule, so the idle lanes become inactive
                    nActive -= nIdle;
                    nIdle = 0;
                }
            }
            barrier (CLK_LOCAL_MEM_FENCE);
            if (sts == DIDEROT_IDLE) {
              // grab another strand to execute
                GET_NEXT_STRAND;
            }
        }

    } /* while */

    maxNSteps[myId] = myMaxNSteps;
    nStabilized[myId] = myStabilized;
#ifdef DIDEROT_HAS_DIE
    nDied[myId] = myDied;
#endif
    barrier (CLK_LOCAL_MEM_FENCE);

  // at this point either all strands in this workgroup have run to completion, so we compute
  // the scheduling info for this workgroup and then update the global info.
#if (BLK_SZ == 64)
    if (myId < 32) {
        maxNSteps[myId] = max(maxNSteps[myId], maxNSteps[myId+32]);
        nStabilized[myId] += nStabilized[myId+32];
#ifdef DIDEROT_HAS_DIE
        nDied[myId] += nDied[myId+32];
#endif
    }
#endif
    if (myId < 16) {
        maxNSteps[myId] = max(maxNSteps[myId], maxNSteps[myId+16]);
        nStabilized[myId] += nStabilized[myId+16];
#ifdef DIDEROT_HAS_DIE
        nDied[myId] += nDied[myId+16];
#endif
    }
    if (myId < 8) {
        maxNSteps[myId] = max(maxNSteps[myId], maxNSteps[myId+8]);
        nStabilized[myId] += nStabilized[myId+8];
#ifdef DIDEROT_HAS_DIE
        nDied[myId] += nDied[myId+8];
#endif
    }
    if (myId < 4) {
        maxNSteps[myId] = max(maxNSteps[myId], maxNSteps[myId+4]);
        nStabilized[myId] += nStabilized[myId+4];
#ifdef DIDEROT_HAS_DIE
        nDied[myId] += nDied[myId+4];
#endif
    }
    if (myId < 2) {
        maxNSteps[myId] = max(maxNSteps[myId], maxNSteps[myId+2]);
        nStabilized[myId] += nStabilized[myId+2];
#ifdef DIDEROT_HAS_DIE
        nDied[myId] += nDied[myId+2];
#endif
    }
    if (myId == 0) {
        maxNSteps[myId] = max(maxNSteps[myId], maxNSteps[myId+1]);
        nStabilized[myId] += nStabilized[myId+1];
#ifdef DIDEROT_HAS_DIE
        nDied[myId] += nDied[myId+1];
#endif
        atomic_add (&(gSched->nStable), nStabilized[0]);
    }
    /* FIXME: need to compute global max steps */

} /* UpdateKern */

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0