13 |
#include <assert.h> |
#include <assert.h> |
14 |
#include <Diderot/diderot.h> |
#include <Diderot/diderot.h> |
15 |
#include <pthread.h> |
#include <pthread.h> |
16 |
#include <semaphore.h> |
|
17 |
|
#ifdef HAVE_BUILTIN_ATOMIC_OPS |
18 |
|
STATIC_INLINE uint32_t AtomicInc (uint32_t *x) |
19 |
|
{ |
20 |
|
return __sync_add_and_fetch(x, 1); |
21 |
|
} |
22 |
|
STATIC_INLINE uint32_t AtomicDec (uint32_t *x) |
23 |
|
{ |
24 |
|
return __sync_sub_and_fetch(x, 1); |
25 |
|
} |
26 |
|
#else |
27 |
|
# error atomic operations not supported |
28 |
|
#endif |
29 |
|
|
30 |
// The number of strands a worker will take for processing at one time |
// The number of strands a worker will take for processing at one time |
31 |
#define BLOCK_SIZE 32 |
#define BLOCK_SIZE 32 |
36 |
int32_t *base; // nDims array of base indices |
int32_t *base; // nDims array of base indices |
37 |
uint32_t *size; // nDims array of iteration sizes |
uint32_t *size; // nDims array of iteration sizes |
38 |
uint32_t numStrands; // number of strands in the world |
uint32_t numStrands; // number of strands in the world |
39 |
sem_t numActive; // number of active strands in the world |
uint32_t numActive; // number of active strands in the world |
40 |
void **inState; |
void **inState; |
41 |
void **outState; |
void **outState; |
42 |
uint8_t *status; // array of strand status flags |
uint8_t *status; // array of strand status flags |
62 |
pthread_mutex_lock(&wrld->lock); |
pthread_mutex_lock(&wrld->lock); |
63 |
|
|
64 |
// If there is no more work to do this iteration, we wait |
// If there is no more work to do this iteration, we wait |
65 |
while(wrld->nextStrand == wrld->numStrands) |
while(wrld->nextStrand == wrld->numStrands) { |
|
{ |
|
66 |
wrld->nWorkers++; |
wrld->nWorkers++; |
67 |
if (wrld->nWorkers == wrld->numThreads) { |
if (wrld->nWorkers == wrld->numThreads) { |
68 |
pthread_cond_signal(&wrld->main); |
pthread_cond_signal(&wrld->main); |
137 |
int nWorkers = cpuInfo.numHWCores; |
int nWorkers = cpuInfo.numHWCores; |
138 |
pthread_t *workers = (pthread_t *) malloc (nWorkers * sizeof(pthread_t)); |
pthread_t *workers = (pthread_t *) malloc (nWorkers * sizeof(pthread_t)); |
139 |
|
|
140 |
|
printf ("initializing %d workers ...\n", nWorkers); |
141 |
wrld->numThreads = nWorkers; |
wrld->numThreads = nWorkers; |
142 |
for (int i = 0; i < nWorkers; i++) { |
for (int i = 0; i < nWorkers; i++) { |
143 |
pthread_create (&workers[i], NULL, worker_func, wrld); |
pthread_create (&workers[i], NULL, worker_func, wrld); |