Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /trunk/src/compiler/cl-target/fragments/sched.in
ViewVC logotype

Annotation of /trunk/src/compiler/cl-target/fragments/sched.in

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1679 - (view) (download)

1 : jhr 1671 //! \brief Retrieve a block's worth (BLK_SZ) of strands for a StrandBlock_t.
2 :     //! \param sched global scheduler state
3 :     //! \param blocks global array of strand block indicies
4 :     //! \param lBlk pointer to global storage for the strand block
5 :     //! \return true if there were strands assigned to the strand block, otherwise false
6 :     inline bool GrabWork (
7 :     __global SchedState_t *sched,
8 :     __global int *blockIndxs,
9 :     __local StrandBlock_t *lBlk)
10 :     {
11 :     // Get a block of strands
12 :     int startStrandIdx = atom_add(&sched->nextStrand, BLK_SZ);
13 :     int idx = 0, count = 0;
14 :    
15 :     // For each work-item in the workgroup
16 :     for (int i = startStrandIdx, count = 0; i < sched->numStrands && count < BLK_SZ; i++,count++) {
17 :     // Increment the number of active strands and assign each work-item a strand to process
18 :     lBlk->nActive++;
19 :     blockIndxs[lBlk->blkIdx * BLK_SZ + idx] = i;
20 :     idx++;
21 :     }
22 :     return (idx != 0);
23 :     }
24 :    
25 :     //! \brief Get a block of strands from the scheduler's queue pool.
26 :     //! \param sched global scheduler state
27 :     //! \param blockIndxs the strand indicies for a strand block
28 :     //! \param queue global scheduler's queue
29 :     //! \param blocks global array of strand blocks
30 :     //! \param lBlk pointer to local storage for the strand block
31 :     inline void GetBlock (
32 :     __global SchedState_t *sched,
33 :     __global int *blockIndxs,
34 :     __global int *queue,
35 :     __global StrandBlock_t *blocks,
36 :     __local StrandBlock_t *lBlk)
37 :     {
38 :    
39 :     // Retrieve a block from the list
40 :     int blkIdx = atom_inc(&(sched->sId));
41 :    
42 :     // Check to see if the index retrieved is less than the
43 :     // number of blocks on the queue
44 :     if (blkIdx < sched->queueSize) {
45 :     // Copy over the global data for the block into local memory
46 :     lBlk->blkIdx = (blocks[queue[blkIdx]]).blkIdx;
47 :     lBlk->nActive = (blocks[queue[blkIdx]]).nActive;
48 :     lBlk->nDead = (blocks[queue[blkIdx]]).nDead;
49 :     lBlk->nStabilizing = (blocks[queue[blkIdx]]).nStabilizing;
50 :     lBlk->nDying = (blocks[queue[blkIdx]]).nDying;
51 :     mem_fence(CLK_LOCAL_MEM_FENCE);
52 :    
53 :     // If the block has no active strands then
54 :     if (lBlk->nActive == 0) {
55 :    
56 :     // Check to see if there is any strands left to processs.
57 :     // If there are no strands left to process than make this block inactive
58 :     if (!GrabWork(sched,blockIndxs,lBlk))
59 :     lBlk->nActive = -1;
60 :     else
61 :     blocks[lBlk->blkIdx].nActive = lBlk->nActive;
62 :     }
63 :     }
64 :     else // if there isn't any queue items then notify the the workgroup
65 :     lBlk->nActive = -1;
66 :     }
67 :    
68 :     //! \brief Copies the strand block back from __local space to __global space
69 :     //! \param gBlock global strand block
70 :     //! \param lBlock local strand block
71 :     inline void StrandBlock_Copy (__global StrandBlock_t * gBlock, __local StrandBlock_t * lBlock)
72 :     {
73 :     gBlock->nActive = lBlock->nActive;
74 :     gBlock->nDead = lBlock->nDead;
75 :     gBlock->nStabilizing = lBlock->nStabilizing;
76 :     gBlock->nDying = lBlock->nDying;
77 :     }
78 :    
79 :     //! \brief Get a block of strands from the scheduler's todo list.
80 :     //! \param sched global scheduler state
81 :     //! \param queue global scheduler's todo queue
82 :     //! \param blocks global array of strand blocks
83 :     //! \param lBlk pointer to local storage for the strand block
84 :     inline void GetBlockFromTodoList (
85 :     __global SchedState_t *sched,
86 :     __global int *todoList,
87 :     __global StrandBlock_t *blocks,
88 :     __local StrandBlock_t *lBlk)
89 :     {
90 :     // Retrieve a block from the todolist
91 :     int blkIdx = atom_inc(&(sched->sId));
92 :    
93 :     // Check to see if the index retrieved is less than the
94 :     // number of blocks on the todo queue
95 :     if (blkIdx < sched->todoSize) {
96 :     // Copy over the global data for the block into local memory
97 :     lBlk-> blkIdx = (blocks[todoList[blkIdx]]).blkIdx;
98 :     lBlk->nActive = (blocks[todoList[blkIdx]]).nActive;
99 :     lBlk->nDead = (blocks[todoList[blkIdx]]).nDead;
100 :     lBlk->nStabilizing = (blocks[todoList[blkIdx]]).nStabilizing;
101 :     lBlk->nDying = (blocks[todoList[blkIdx]]).nDying;
102 :     mem_fence(CLK_LOCAL_MEM_FENCE);
103 :     }
104 :     else // if there isn't any queue items then notify the the workgroup
105 :     lBlk->nActive = -1;
106 :    
107 :     }
108 :    
109 :     // A native parallel-prefix-scan algorithm
110 :     //! \param input the block indicies for a strand block
111 :     //! \param status global scheduler's status array
112 :     //! \param output the pre-stable or pre-die arrays
113 :     //! \param statusTy type of status for the output data
114 :     //! \param n the number of strand indicies
115 :     //! \param temp pointer to local storage for the scan algorithm
116 :     inline void scan (
117 :     __global int *input,
118 :     __global int *status,
119 :     __local int *output,
120 :     StrandStatus_t statusTy,
121 :     int n,
122 :     __local int *temp)
123 :     {
124 :     int thid = get_local_id(0);
125 :     int pout = 0, pin = 1;
126 :    
127 :     // load input into local memory
128 :     // exclusive scan: shift right by one and set first element to 0
129 :     if (thid == 0 || status[(*(input + thid - 1))] != (int)statusTy)
130 :     temp[thid] = 0;
131 :     else
132 :     temp[thid] = 1;
133 :    
134 :     barrier(CLK_LOCAL_MEM_FENCE);
135 :    
136 :     for (int offset = 1; offset < n; offset *= 2) {
137 :     pout = 1 - pout;
138 :     pin = 1 - pout;
139 :    
140 :     if(thid >= offset)
141 :     temp[pout * n + thid] = temp[pin * n + thid] + temp[pin * n + thid - offset];
142 :     else
143 :     temp[pout * n + thid] = temp[pin * n + thid];
144 :    
145 :     barrier(CLK_LOCAL_MEM_FENCE);
146 :     }
147 :     output[thid] = temp[pout* n + thid];
148 :     }
149 :    
150 :     // Scheduler Meta-Clearing Kenerl: Clears the queue sizes and strand id acculator
151 :     //! \param sched global scheduler state
152 :     __kernel void Diderot_SchedUpdateKernel (__global SchedState_t *sched)
153 :     {
154 :     sched->sId = 0;
155 : jhr 1679 if (sched->clearQueueSz == 1) {
156 : jhr 1671 sched->clearQueueSz = 0;
157 :     sched->queueSize = 0;
158 : jhr 1679 }
159 :     else {
160 : jhr 1671 sched->clearQueueSz = 1;
161 :     sched->todoSize = 0;
162 :     }
163 :     }
164 :    
165 :     // Compaction Kernel: compact strands and replicate stable state
166 :     //! \param status global strand status array
167 :     //! \param sched global scheduler state
168 :     //! \param blocks global array of strand blocks
169 :     //! \param blockIndxs the strand indicies for a strand block
170 :     //! \param queue global scheduler's queue
171 :     //! \param todo global scheduler's todo queue
172 :     //! \param preStable pointer to local storage for number of stable strands with lower index
173 :     //! \param preDead pointer to local storage for number of dead strands with lower index
174 :     //! \param prefixScanTemp pointer to local storage for prefix-scan algorithm
175 :     __kernel __attribute__((reqd_work_group_size(BLK_SZ, 1, 1)))
176 :     void Diderot_CompactionKernel (
177 :     __global int *status, // strand status array
178 :     __global SchedState_t *sched, // scheduler state
179 :     __global StrandBlock_t *blocks, // array of scheduler blocks
180 : jhr 1679 __global int *blockIndxs,
181 :     __global int *queue,
182 :     __global int *todoList,
183 :     __local StrandBlock_t *bp,
184 :     __local int *preStable,
185 :     __local int *preDead,
186 :     __local int *prefixScanTemp
187 :     )
188 : jhr 1671 {
189 :     int id = get_local_id(0);
190 :     int qIdx;
191 :    
192 :     do {
193 : jhr 1679 if (id == 0) {
194 : jhr 1671 bool done;
195 :     do {
196 :     GetBlockFromTodoList(sched,todoList,blocks,bp);
197 :     done = true;
198 : jhr 1679 if (bp->nActive > 0) {
199 : jhr 1671 bp->nActive = bp->nActive - (bp->nStabilizing + bp->nDying);
200 :     bp->nDead += bp->nDying;
201 :     if (bp->nActive == 0) {
202 :     //Check to see if this block can execute more strands for the
203 :     //next iteration
204 :     GrabWork(sched,blockIndxs,bp);
205 :     bp->nStabilizing = 0;
206 :     bp->nDying = 0;
207 :     StrandBlock_Copy(&blocks[bp->blkIdx],bp);
208 :     done = false;
209 :     }
210 :     }
211 : jhr 1679 } while (!done);
212 : jhr 1671 }
213 :     barrier (CLK_LOCAL_MEM_FENCE);
214 :    
215 :     if (bp->nActive > 0) {
216 : jhr 1679 int idx = blockIndxs[bp->blkIdx * BLK_SZ + id];
217 : jhr 1671
218 :     scan(blockIndxs + bp->blkIdx * BLK_SZ,status,preStable,DIDEROT_STABLE,BLK_SZ,prefixScanTemp);
219 :     barrier(CLK_LOCAL_MEM_FENCE);
220 :     scan(blockIndxs + bp->blkIdx * BLK_SZ,status,preDead,DIDEROT_DIE,BLK_SZ,prefixScanTemp);
221 :     barrier(CLK_LOCAL_MEM_FENCE);
222 :    
223 :     // compaction
224 :     // with these arrays, we can then compute the new index of each slot in
225 :     // parallel as follows
226 : jhr 1679 int newIdx;
227 : jhr 1671 switch (status[idx]) {
228 :     case DIDEROT_ACTIVE:
229 : jhr 1679 newIdx = id - preStable[id] - preDead[id];
230 : jhr 1671 break;
231 :     case DIDEROT_STABLE:
232 : jhr 1679 newIdx = bp->nActive + preStable[id];
233 : jhr 1671 break;
234 :     case DIDEROT_DIE:
235 : jhr 1679 newIdx = bp->nActive + preStable[id] + preDead[id];
236 : jhr 1671 break;
237 :     }
238 : jhr 1679 blockIndxs[bp->blkIdx * BLK_SZ + newIdx] = idx;
239 : jhr 1671
240 :     barrier (CLK_LOCAL_MEM_FENCE);
241 : jhr 1679 if (id == 0) {
242 :     // put bp on scheduling queue
243 :     qIdx = atom_inc(&(sched->queueSize));
244 :     bp->nStabilizing = 0;
245 :     bp->nDying = 0;
246 :     StrandBlock_Copy(&blocks[bp->blkIdx],bp);
247 :     queue[qIdx] = bp->blkIdx;
248 : jhr 1671 }
249 :     }
250 :     } while (bp->nActive > 0);
251 :    
252 :     }

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0