Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml
ViewVC logotype

Annotation of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1286 - (view) (download)

1 : lamonts 1244 (* c-target.sml
2 :     *
3 :     * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
4 :     * All rights reserved.
5 :     *)
6 :    
7 :     structure CLTarget : TARGET =
8 :     struct
9 :    
10 :     structure IL = TreeIL
11 :     structure V = IL.Var
12 :     structure Ty = IL.Ty
13 :     structure CL = CLang
14 :     structure RN = RuntimeNames
15 : jhr 1273 structure ToCL = TreeToCL
16 : lamonts 1244
17 : jhr 1285 (* C variable translation *)
18 :     structure TrCVar =
19 :     struct
20 :     type env = CL.typed_var TreeIL.Var.Map.map
21 :     fun lookup (env, x) = (case V.Map.find (env, x)
22 :     of SOME(CL.V(_, x')) => x'
23 :     | NONE => raise Fail(concat["TrCVar.lookup(_, ", V.name x, ")"])
24 :     (* end case *))
25 :     (* translate a variable that occurs in an l-value context (i.e., as the target of an assignment) *)
26 :     fun lvalueVar (env, x) = (case V.kind x
27 : jhr 1286 of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
28 : jhr 1285 | IL.VK_State strand => raise Fail "unexpected strand context"
29 :     | IL.VK_Local => CL.mkVar(lookup(env, x))
30 :     (* end case *))
31 :     (* translate a variable that occurs in an r-value context *)
32 :     val rvalueVar = lvalueVar
33 :     end
34 :    
35 :     structure ToC = TreeToCFn (TrCVar)
36 :    
37 :     type var = CL.typed_var
38 : lamonts 1244 type exp = CL.exp
39 :     type stm = CL.stm
40 :    
41 : jhr 1279 (* OpenCL specific types *)
42 :     val clProgramTy = CL.T_Named "cl_program"
43 :     val clKernelTy = CL.T_Named "cl_kernel"
44 :     val clCmdQueueTy = CL.T_Named "cl_command_queue"
45 :     val clContextTy = CL.T_Named "cl_context"
46 :     val clDeviceIdTy = CL.T_Named "cl_device_id"
47 :     val clPlatformIdTy = CL.T_Named "cl_platform_id"
48 :     val clMemoryTy = CL.T_Named "cl_mem"
49 :    
50 : lamonts 1244 datatype strand = Strand of {
51 : jhr 1261 name : string,
52 :     tyName : string,
53 :     state : var list ref,
54 :     output : (Ty.ty * CL.var) option ref, (* the strand's output variable (only one for now) *)
55 : lamonts 1271 code : CL.decl list ref,
56 : jhr 1273 init_code: CL.decl ref
57 : lamonts 1244 }
58 :    
59 :     datatype program = Prog of {
60 : jhr 1278 name : string, (* stem of source file *)
61 : jhr 1261 double : bool, (* true for double-precision support *)
62 :     parallel : bool, (* true for multithreaded (or multi-GPU) target *)
63 :     debug : bool, (* true for debug support in executable *)
64 :     globals : CL.decl list ref,
65 :     topDecls : CL.decl list ref,
66 :     strands : strand AtomTable.hash_table,
67 :     initially : CL.stm list ref,
68 : jhr 1273 numDims: int ref,
69 :     imgGlobals: (string * int) list ref,
70 :     prFn: CL.decl ref
71 :     }
72 : lamonts 1244
73 :     datatype env = ENV of {
74 : jhr 1261 info : env_info,
75 :     vMap : var V.Map.map,
76 :     scope : scope
77 : lamonts 1244 }
78 :    
79 :     and env_info = INFO of {
80 : jhr 1261 prog : program
81 : lamonts 1244 }
82 :    
83 :     and scope
84 :     = NoScope
85 :     | GlobalScope
86 :     | InitiallyScope
87 : jhr 1261 | StrandScope of TreeIL.var list (* strand initialization *)
88 :     | MethodScope of TreeIL.var list (* method body; vars are state variables *)
89 : lamonts 1244
90 : jhr 1273 (* the supprted widths of vectors of reals on the target. *)
91 :     (* FIXME: for OpenCL 1.1, 3 is also valid *)
92 :     fun vectorWidths () = [2, 4, 8, 16]
93 : lamonts 1244
94 :     (* tests for whether various expression forms can appear inline *)
95 : jhr 1261 fun inlineCons n = (n < 2) (* vectors are inline, but not matrices *)
96 :     val inlineMatrixExp = false (* can matrix-valued expressions appear inline? *)
97 : lamonts 1244
98 :     (* TreeIL to target translations *)
99 :     structure Tr =
100 :     struct
101 : jhr 1286 (* this function is used for the initially clause, so it generates OpenCL *)
102 : jhr 1261 fun fragment (ENV{info, vMap, scope}, blk) = let
103 : jhr 1273 val (vMap, stms) = ToCL.trFragment (vMap, blk)
104 : jhr 1261 in
105 :     (ENV{info=info, vMap=vMap, scope=scope}, stms)
106 :     end
107 :     fun saveState cxt stateVars (env, args, stm) = (
108 :     ListPair.foldrEq
109 : jhr 1273 (fn (x, e, stms) => ToCL.trAssign(env, x, e)@stms)
110 : jhr 1261 [stm]
111 :     (stateVars, args)
112 :     ) handle ListPair.UnequalLengths => (
113 :     print(concat["saveState ", cxt, ": length mismatch; ", Int.toString(List.length args), " args\n"]);
114 :     raise Fail(concat["saveState ", cxt, ": length mismatch"]))
115 :     fun block (ENV{vMap, scope, ...}, blk) = (case scope
116 : jhr 1273 of StrandScope stateVars => ToCL.trBlock (vMap, saveState "StrandScope" stateVars, blk)
117 :     | MethodScope stateVars => ToCL.trBlock (vMap, saveState "MethodScope" stateVars, blk)
118 : jhr 1286 | InitiallyScope => ToCL.trBlock (vMap, fn (_, _, stm) => [stm], blk)
119 :     | _ => ToC.trBlock (vMap, fn (_, _, stm) => [stm], blk)
120 : jhr 1261 (* end case *))
121 : jhr 1273 fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e)
122 : lamonts 1244 end
123 :    
124 :     (* variables *)
125 :     structure Var =
126 :     struct
127 : jhr 1273 fun name (ToCL.V(_, name)) = name
128 : jhr 1278 fun global (Prog{globals, imgGlobals, ...}, name, ty) = let
129 : jhr 1273 val ty' = ToCL.trType ty
130 : jhr 1261 fun isImgGlobal (imgGlobals, Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}), name) = imgGlobals := (name,dim):: !imgGlobals
131 :     | isImgGlobal (imgGlobals, _, _) = ()
132 :     in
133 :     globals := CL.D_Var([], ty', name, NONE) :: !globals;
134 :     isImgGlobal(imgGlobals,ty,name);
135 : jhr 1286 ToCL.V(ty', name)
136 : jhr 1261 end
137 : jhr 1273 fun param x = ToCL.V(ToCL.trType(V.ty x), V.name x)
138 : jhr 1261 fun state (Strand{state, ...}, x) = let
139 : jhr 1273 val ty' = ToCL.trType(V.ty x)
140 :     val x' = ToCL.V(ty', V.name x)
141 : jhr 1261 in
142 :     state := x' :: !state;
143 :     x'
144 :     end
145 : lamonts 1244 end
146 :    
147 :     (* environments *)
148 :     structure Env =
149 :     struct
150 :     (* create a new environment *)
151 : jhr 1261 fun new prog = ENV{
152 :     info=INFO{prog = prog},
153 :     vMap = V.Map.empty,
154 :     scope = NoScope
155 :     }
156 : lamonts 1244 (* define the current translation context *)
157 : jhr 1261 fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope}
158 :     val scopeGlobal = setScope GlobalScope
159 :     val scopeInitially = setScope InitiallyScope
160 :     fun scopeStrand (env, svars) = setScope (StrandScope svars) env
161 :     fun scopeMethod (env, svars) = setScope (MethodScope svars) env
162 : lamonts 1244 (* bind a TreeIL varaiable to a target variable *)
163 : jhr 1261 fun bind (ENV{info, vMap, scope}, x, x') = ENV{
164 :     info = info,
165 :     vMap = V.Map.insert(vMap, x, x'),
166 :     scope = scope
167 :     }
168 : lamonts 1244 end
169 :    
170 :     (* programs *)
171 :     structure Program =
172 :     struct
173 : jhr 1278 fun new {name, double, parallel, debug} = (
174 : jhr 1261 RN.initTargetSpec double;
175 : jhr 1286 CNames.initTargetSpec double;
176 : jhr 1261 Prog{
177 : jhr 1278 name = name,
178 : jhr 1261 double = double, parallel = parallel, debug = debug,
179 : lamonts 1271 globals = ref [],
180 : jhr 1261 topDecls = ref [],
181 :     strands = AtomTable.mkTable (16, Fail "strand table"),
182 :     initially = ref([CL.S_Comment["missing initially"]]),
183 : lamonts 1264 numDims = ref(0),
184 :     imgGlobals = ref[],
185 :     prFn = ref(CL.D_Comment(["No Print Function"]))
186 : jhr 1261 })
187 : lamonts 1244 (* register the global initialization part of a program *)
188 : lamonts 1264 fun globalIndirects (globals,stms) = let
189 : jhr 1281 fun getGlobals (CL.D_Var(_,_,globalVar,_)::rest) =
190 :     CL.mkAssign(CL.mkIndirect(CL.mkVar RN.globalsVarName,globalVar),CL.mkVar globalVar)
191 :     ::getGlobals rest
192 :     | getGlobals [] = []
193 :     | getGlobals (_::rest) = getGlobals rest
194 : jhr 1273 in
195 : jhr 1281 stms @ getGlobals globals
196 : jhr 1273 end
197 : jhr 1261
198 :     (* register the code that is used to register command-line options for input variables *)
199 :     fun inputs (Prog{topDecls, ...}, stm) = let
200 :     val inputsFn = CL.D_Func(
201 :     [], CL.voidTy, RN.registerOpts,
202 :     [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")],
203 :     stm)
204 :     in
205 :     topDecls := inputsFn :: !topDecls
206 :     end
207 :    
208 : jhr 1286 (* register the global initialization part of a program *)
209 :     fun init (Prog{topDecls, ...}, init) = let
210 :     val globPtrTy = CL.T_Ptr(CL.T_Named RN.globalsTy)
211 :     val initFn = CL.D_Func(
212 :     [], CL.voidTy, RN.initGlobals, [CL.PARAM([], globPtrTy, RN.globalsVarName)],
213 :     init)
214 :     val shutdownFn = CL.D_Func(
215 :     [], CL.voidTy, RN.shutdown,
216 :     [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.worldTy), "wrld")],
217 :     CL.S_Block[])
218 :     in
219 :     topDecls := shutdownFn :: initFn :: !topDecls
220 :     end
221 : jhr 1261
222 : lamonts 1244 (* create and register the initially function for a program *)
223 : jhr 1261 fun initially {
224 : jhr 1278 prog = Prog{strands, initially, numDims,...},
225 : jhr 1261 isArray : bool,
226 :     iterPrefix : stm list,
227 :     iters : (var * exp * exp) list,
228 :     createPrefix : stm list,
229 :     strand : Atom.atom,
230 :     args : exp list
231 :     } = let
232 :     val name = Atom.toString strand
233 :     val nDims = List.length iters
234 :     fun mapi f xs = let
235 :     fun mapf (_, []) = []
236 :     | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs)
237 :     in
238 :     mapf (0, xs)
239 :     end
240 :     val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters
241 :     val sizeInit = mapi
242 : jhr 1273 (fn (i, (ToCL.V(ty, _), lo, hi)) =>
243 : jhr 1278 (i, CL.I_Exp(CL.mkBinOp(CL.mkBinOp(hi, CL.#-, lo), CL.#+, CL.mkInt(1, ty))))
244 : jhr 1261 ) iters
245 :     val numStrandsVar = "numStrandsVar"
246 :     val allocCode = iterPrefix @ [
247 :     CL.mkComment["allocate initial block of strands"],
248 :     CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)),
249 :     CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)),
250 : jhr 1278 CL.mkDecl(CL.int32,"numDims",SOME(CL.I_Exp(CL.mkInt(IntInf.fromInt nDims, CL.int32))))
251 : jhr 1273 ]
252 : jhr 1281 val numStrandsLoopBody =
253 :     CL.mkExpStm(CL.mkAssignOp(CL.mkVar numStrandsVar, CL.*=,CL.mkSubscript(CL.mkVar "size",CL.mkVar "i")))
254 : jhr 1278 val numStrandsLoop = CL.mkFor([(CL.intTy, "i", CL.mkInt(0,CL.intTy))],
255 :     CL.mkBinOp(CL.mkVar "i", CL.#<, CL.mkVar "numDims"),
256 :     [CL.mkPostOp(CL.mkVar "i", CL.^++)], numStrandsLoopBody)
257 : jhr 1273 in
258 :     numDims := nDims;
259 :     initially := allocCode @ [numStrandsLoop]
260 :     end
261 : lamonts 1256
262 :    
263 : lamonts 1244 (***** OUTPUT *****)
264 : jhr 1282 fun genStrandInit (Strand{name,tyName,state,output,code,...}, nDims) = let
265 : jhr 1278 val params = [
266 :     CL.PARAM([], CL.T_Ptr(CL.uint32), "sizes" ),
267 :     CL.PARAM([], CL.intTy, "width"),
268 :     CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "strands")
269 :     ]
270 :     val body = let
271 :     fun loopParams 3 = ["x", "y", "k"]
272 :     | loopParams 2 = ["x", "y"]
273 :     | loopParams 1 = ["x"]
274 :     | loopParams _ = raise Fail "genStrandInit: missing size dim"
275 :     fun mkLoopNest ([], _, nDims) = if nDims = 1
276 :     then CL.mkBlock [
277 :     CL.mkCall(RN.strandInit name, [
278 :     CL.mkUnOp(CL.%&,CL.mkSubscript(CL.mkVar "strands",CL.mkStr "x")),
279 :     CL.mkVar "x"])
280 :     ]
281 : jhr 1281 else let
282 :     val index = CL.mkBinOp(CL.mkBinOp(CL.mkVar "x",CL.#*,CL.mkVar "width"),CL.#+,CL.mkVar "y")
283 :     in
284 :     CL.mkBlock([CL.mkCall(RN.strandInit name, [CL.mkUnOp(CL.%&,CL.mkSubscript(CL.mkVar "strands",index)),
285 :     CL.mkVar "x", CL.mkVar"y"])])
286 :     end
287 :     | mkLoopNest (param::rest,count,nDims) = let
288 :     val body = mkLoopNest (rest, count + 1,nDims)
289 :     in
290 :     CL.mkFor(
291 :     [(CL.intTy, param, CL.mkInt(0,CL.intTy))],
292 :     CL.mkBinOp(CL.mkVar param, CL.#<, CL.mkSubscript(CL.mkVar "sizes",CL.mkInt(count,CL.intTy))),
293 :     [CL.mkPostOp(CL.mkVar param, CL.^++)],
294 :     body)
295 :     end
296 :     in
297 :     [mkLoopNest ((loopParams nDims),0,nDims)]
298 :     end
299 : lamonts 1271 in
300 : jhr 1281 CL.D_Func(["static"], CL.voidTy, RN.strandInitSetup, params,CL.mkBlock(body))
301 :     end
302 :    
303 : lamonts 1271 fun genStrandPrint (Strand{name, tyName, state, output, code,...},nDims) = let
304 : lamonts 1264 (* the print function *)
305 :     val prFnName = concat[name, "_print"]
306 :     val prFn = let
307 :     val params = [
308 :     CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"),
309 : lamonts 1271 CL.PARAM([], CL.T_Ptr(CL.uint32), "sizes" ),
310 : lamonts 1264 CL.PARAM([], CL.intTy, "width"),
311 :     CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")
312 :     ]
313 :    
314 :     val SOME(ty, x) = !output
315 :     val outState = if nDims = 1 then
316 : jhr 1278 CL.mkSelect(CL.mkSubscript(CL.mkVar "self",CL.mkVar "x"), x)
317 : lamonts 1264 else if nDims = 2 then
318 :     CL.mkSelect(CL.mkSubscript(CL.mkVar "self",
319 : jhr 1278 CL.mkBinOp(CL.mkBinOp(CL.mkVar "x",CL.#*,CL.mkVar "width"),CL.#+,CL.mkVar "y")), x)
320 : lamonts 1264
321 :     else CL.mkSelect(CL.mkVar "self",x)
322 :    
323 :     val prArgs = (case ty
324 : jhr 1278 of Ty.IVecTy 1 => [CL.mkStr(!RN.gIntFormat ^ "\n"), outState]
325 : lamonts 1264 | Ty.IVecTy d => let
326 : jhr 1278 val fmt = CL.mkStr(
327 : lamonts 1264 String.concatWith " " (List.tabulate(d, fn _ => !RN.gIntFormat))
328 :     ^ "\n")
329 : jhr 1273 val args = List.tabulate (d, fn i => ToCL.vecIndex(outState, i))
330 : lamonts 1264 in
331 :     fmt :: args
332 :     end
333 : jhr 1278 | Ty.TensorTy[] => [CL.mkStr "%f\n", outState]
334 : lamonts 1264 | Ty.TensorTy[d] => let
335 : jhr 1278 val fmt = CL.mkStr(
336 : lamonts 1264 String.concatWith " " (List.tabulate(d, fn _ => "%f"))
337 :     ^ "\n")
338 : jhr 1273 val args = List.tabulate (d, fn i => ToCL.vecIndex(outState, i))
339 : lamonts 1264 in
340 :     fmt :: args
341 :     end
342 :     | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty)
343 :     (* end case *))
344 :    
345 :     val body = let
346 :    
347 :     fun loopParams (3) =
348 :     "x"::"y"::"k"::[]
349 :     | loopParams (2) =
350 :     "x"::"y"::[]
351 :     | loopParams (1) =
352 :     "x"::[]
353 :     | loopParams (_) =
354 :     raise Fail("genStrandPrint: unsupported output type " ^ Ty.toString ty)
355 :    
356 :     fun mkLoopNest ([],_) =
357 :     CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs)
358 :     | mkLoopNest (param::rest,count) = let
359 :     val body = mkLoopNest (rest, count + 1)
360 :     in
361 :     CL.mkFor(
362 : jhr 1278 [(CL.intTy, param, CL.mkInt(0,CL.intTy))],
363 :     CL.mkBinOp(CL.mkVar param, CL.#<, CL.mkSubscript(CL.mkVar "sizes",CL.mkInt(count,CL.intTy))),
364 :     [CL.mkPostOp(CL.mkVar param, CL.^++)],
365 : lamonts 1264 body)
366 :     end
367 :     in
368 :     [mkLoopNest ((loopParams nDims),0)]
369 :     end
370 :    
371 :     in
372 :     CL.D_Func(["static"], CL.voidTy, prFnName, params,CL.mkBlock(body))
373 :     end
374 :     in
375 :     prFn
376 :     end
377 : jhr 1261 fun genStrandTyDef (Strand{tyName, state,...}) =
378 :     (* the type declaration for the strand's state struct *)
379 :     CL.D_StructDef(
380 : jhr 1273 List.rev (List.map (fn ToCL.V(ty, x) => (ty, x)) (!state)),
381 : jhr 1261 tyName)
382 :    
383 :    
384 : lamonts 1264 (* generates the load kernel function *)
385 : jhr 1261 (* FIXME: this code might be part of the runtime system *)
386 : lamonts 1264 fun genKernelLoader() =
387 :     CL.D_Verbatim ( ["/* Loads the Kernel from a file */",
388 :     "char * loadKernel (const char * filename) {",
389 :     "struct stat statbuf;",
390 :     "FILE *fh;",
391 :     "char *source;",
392 :     "fh = fopen(filename, \"r\");",
393 :     "if (fh == 0)",
394 :     " return 0;",
395 :     "stat(filename, &statbuf);",
396 :     "source = (char *) malloc(statbuf.st_size + 1);",
397 :     "fread(source, statbuf.st_size, 1, fh);",
398 :     "fread(source, statbuf.st_size, 1, fh);",
399 :     "return source;",
400 :     "}"])
401 :     (* generates the opencl buffers for the image data *)
402 :     fun getGlobalDataBuffers(globals,count,contextVar,errVar) = let
403 : jhr 1279 val globalBufferDecl = CL.mkDecl(clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE)
404 : jhr 1278 val globalBuffer = CL.mkAssign(CL.mkVar(concat[RN.globalsVarName,"_cl"]), CL.mkApply("clCreateBuffer",
405 :     [CL.mkVar contextVar,
406 :     CL.mkVar "CL_MEM_COPY_HOST_PTR",
407 :     CL.mkApply("sizeof",[CL.mkVar RN.globalsTy]),
408 :     CL.mkVar RN.globalsVarName,
409 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)]))
410 : jhr 1261
411 : lamonts 1264 fun genDataBuffers([],_,_,_) = []
412 :     | genDataBuffers((var,nDims)::globals,count,contextVar,errVar) = let
413 :     (* FIXME: use CL constructors to build expressions (not strings) *)
414 :     val size = if nDims = 1 then
415 : jhr 1278 CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*,
416 :     CL.mkIndirect(CL.mkVar var, "size[0]"))
417 : lamonts 1264 else if nDims = 2 then
418 : jhr 1278 CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*,
419 :     CL.mkIndirect(CL.mkVar var, concat["size[0]", " * ", var, "->size[1]"]))
420 : lamonts 1264 else
421 : jhr 1278 CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*,
422 :     CL.mkIndirect(CL.mkVar var,concat["size[0]", " * ", var, "->size[1] * ", var, "->size[2]"]))
423 : lamonts 1264
424 :     in
425 : jhr 1279 CL.mkDecl(clMemoryTy, RN.addBufferSuffix var ,NONE)::
426 :     CL.mkDecl(clMemoryTy, RN.addBufferSuffixData var ,NONE)::
427 : jhr 1278 CL.mkAssign(CL.mkVar(RN.addBufferSuffix var), CL.mkApply("clCreateBuffer",
428 :     [CL.mkVar contextVar,
429 :     CL.mkVar "CL_MEM_COPY_HOST_PTR",
430 :     CL.mkApply("sizeof",[CL.mkVar (RN.imageTy nDims)]),
431 :     CL.mkVar var,
432 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)])) ::
433 :     CL.mkAssign(CL.mkVar(RN.addBufferSuffixData var), CL.mkApply("clCreateBuffer",
434 :     [CL.mkVar contextVar,
435 :     CL.mkVar "CL_MEM_COPY_HOST_PTR",
436 : lamonts 1264 size,
437 : jhr 1278 CL.mkIndirect(CL.mkVar var,"data"),
438 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)])):: genDataBuffers(globals,count + 2,contextVar,errVar)
439 : lamonts 1264 end
440 :     in
441 :     [globalBufferDecl] @ [globalBuffer] @ genDataBuffers(globals,count + 2,contextVar,errVar)
442 :     end
443 :    
444 : jhr 1261
445 : lamonts 1264 (* generates the kernel arguments for the image data *)
446 :     fun genGlobalArguments(globals,count,kernelVar,errVar) = let
447 : jhr 1278 val globalArgument = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=,CL.mkApply("clSetKernelArg",
448 :     [CL.mkVar kernelVar,
449 :     CL.mkInt(count,CL.intTy),
450 :     CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
451 :     CL.mkUnOp(CL.%&,CL.mkVar(concat[RN.globalsVarName,"_cl"]))])))
452 : lamonts 1264
453 :     fun genDataArguments([],_,_,_) = []
454 :     | genDataArguments((var,nDims)::globals,count,kernelVar,errVar) =
455 :    
456 : jhr 1278 CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=, CL.mkApply("clSetKernelArg",
457 :     [CL.mkVar kernelVar,
458 :     CL.mkInt(count,CL.intTy),
459 :     CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
460 :     CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffix var))])))::
461 : lamonts 1264
462 : jhr 1278 CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=,CL.mkApply("clSetKernelArg",
463 :     [CL.mkVar kernelVar,
464 :     CL.mkInt((count + 1),CL.intTy),
465 :     CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
466 :     CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffixData var))]))):: genDataArguments (globals, count + 2,kernelVar,errVar)
467 : lamonts 1264
468 :     in
469 :    
470 :     [globalArgument] @ genDataArguments(globals,count + 1,kernelVar,errVar)
471 :    
472 :     end
473 :    
474 :     (* generates the main function of host code *)
475 :     fun genHostMain() = let
476 : jhr 1278 val setupCall = [CL.mkCall(RN.setupFName,[CL.mkVar RN.globalsVarName])]
477 : jhr 1273 val globalsDecl = CL.mkDecl(
478 :     CL.T_Ptr(CL.T_Named RN.globalsTy),
479 :     RN.globalsVarName,
480 : jhr 1278 SOME(CL.I_Exp(CL.mkApply("malloc", [CL.mkApply("sizeof",[CL.mkVar RN.globalsTy])]))))
481 :     val initGlobalsCall = CL.mkCall(RN.initGlobals,[CL.mkVar RN.globalsVarName])
482 :     val returnStm = [CL.mkReturn(SOME(CL.mkInt(0,CL.intTy)))]
483 : jhr 1273 val params = [
484 :     CL.PARAM([],CL.intTy, "argc"),
485 :     CL.PARAM([],CL.charArrayPtr,"argv")
486 :     ]
487 :     val body = CL.mkBlock([globalsDecl] @ [initGlobalsCall] @ setupCall @ returnStm)
488 :     in
489 :     CL.D_Func([],CL.intTy,"main",params,body)
490 :     end
491 : lamonts 1264
492 : jhr 1273 (* generates the host-side setup function *)
493 :     fun genHostSetupFunc (strand as Strand{name,tyName,...}, filename, nDims, initially, imgGlobals) = let
494 :     (* Delcare opencl setup objects *)
495 :     val programVar= "program"
496 :     val kernelVar = "kernel"
497 :     val cmdVar = "queue"
498 :     val inStateVar = "selfin"
499 :     val outStateVar = "selfout"
500 :     val stateSizeVar= "state_mem_size"
501 :     val clInstateVar = "clSelfIn"
502 :     val clOutStateVar = "clSelfOut"
503 :     val clGlobals = "clGlobals"
504 :     val sourcesVar = "sources"
505 :     val contextVar = "context"
506 :     val errVar = "err"
507 :     val imgDataSizeVar = "image_dataSize"
508 :     val globalVar = "global_work_size"
509 :     val localVar = "local_work_size"
510 :     val clFNVar = "filename"
511 :     val numStrandsVar = "numStrandsVar"
512 :     val headerFNVar = "header"
513 :     val deviceVar = "device"
514 :     val platformsVar = "platforms"
515 :     val numPlatformsVar = "num_platforms"
516 :     val numDevicesVar = "num_devices"
517 : jhr 1278 val assertStm = CL.mkCall("assert",[CL.mkBinOp(CL.mkVar errVar, CL.#==, CL.mkVar "CL_SUCCESS")])
518 : jhr 1273 val params = [
519 :     CL.PARAM([],CL.T_Named("cl_device_id"), deviceVar)
520 :     ]
521 :     val declarations = [
522 : jhr 1279 CL.mkDecl(clProgramTy, programVar, NONE),
523 :     CL.mkDecl(clKernelTy, kernelVar, NONE),
524 :     CL.mkDecl(clCmdQueueTy, cmdVar, NONE),
525 :     CL.mkDecl(clContextTy, contextVar, NONE),
526 : jhr 1273 CL.mkDecl(CL.intTy, errVar, NONE),
527 : jhr 1278 CL.mkDecl(CL.intTy, numStrandsVar, SOME(CL.I_Exp(CL.mkInt(1,CL.intTy)))),
528 : jhr 1273 CL.mkDecl(CL.intTy, stateSizeVar, NONE),
529 :     CL.mkDecl(CL.intTy, "width", NONE),
530 :     CL.mkDecl(CL.intTy, imgDataSizeVar, NONE),
531 : jhr 1279 (*CL.mkDecl(clDeviceIdTy, deviceVar, NONE), *)
532 : jhr 1273 CL.mkDecl(CL.T_Ptr(CL.T_Named tyName), inStateVar,NONE),
533 : jhr 1279 CL.mkDecl(clMemoryTy,clInstateVar,NONE),
534 :     CL.mkDecl(clMemoryTy,clOutStateVar,NONE),
535 : jhr 1273 CL.mkDecl(CL.T_Ptr(CL.T_Named tyName), outStateVar,NONE),
536 : jhr 1278 CL.mkDecl(CL.charPtr, clFNVar,SOME(CL.I_Exp(CL.mkStr filename))),
537 : jhr 1273 (* FIXME: use Paths.diderotInclude *)
538 : jhr 1278 CL.mkDecl(CL.charPtr, headerFNVar,SOME(CL.I_Exp(CL.mkStr "../src/include/Diderot/cl-types.h"))),
539 : jhr 1273 CL.mkDecl(CL.T_Array(CL.charPtr,SOME(2)),sourcesVar,NONE),
540 :     CL.mkDecl(CL.T_Array(CL.T_Named "size_t",SOME(nDims)),globalVar,NONE),
541 :     CL.mkDecl(CL.T_Array(CL.T_Named "size_t",SOME(nDims)),localVar,NONE),
542 : jhr 1278 CL.mkDecl(CL.intTy,numDevicesVar,SOME(CL.I_Exp(CL.mkInt(~1,CL.intTy)))),
543 : jhr 1273 CL.mkDecl(CL.T_Array(CL.T_Named "cl_platform_id", SOME(1)), platformsVar, NONE),
544 : jhr 1278 CL.mkDecl(CL.intTy,"num_platforms",SOME(CL.I_Exp(CL.mkInt(~1,CL.intTy))))
545 : jhr 1273 ]
546 :     (* Setup Global Variables *)
547 :     val globalsDecl = CL.mkDecl(
548 :     CL.T_Ptr(CL.T_Named RN.globalsTy),
549 :     RN.globalsVarName,
550 : jhr 1278 SOME(CL.I_Exp(CL.mkApply("malloc", [CL.mkApply("sizeof",[CL.mkVar RN.globalsTy])]))))
551 :     val initGlobalsCall = CL.mkCall(RN.initGlobals,[CL.mkVar RN.globalsVarName])
552 : lamonts 1271
553 :     (* Retrieve the platforms
554 : jhr 1278 val platformStm = [CL.mkAssign(CL.mkVar errVar, CL.mkApply("clGetPlatformIDs",
555 :     [CL.mkInt(10,CL.intTy),
556 :     CL.mkVar platformsVar,
557 :     CL.mkUnOp(CL.%&,CL.mkVar numPlatformsVar)])),
558 : lamonts 1264 assertStm]
559 :    
560 : jhr 1278 val devicesStm = [CL.mkAssign(CL.mkVar errVar, CL.mkApply("clGetDeviceIDs",
561 :     [CL.mkSubscript(CL.mkVar platformsVar,CL.mkInt(0,CL.intTy)),
562 :     CL.mkVar "CL_DEVICE_TYPE_GPU",
563 :     CL.mkInt(1,CL.intTy),
564 :     CL.mkUnOp(CL.%&,CL.mkVar deviceVar),
565 :     CL.mkUnOp(CL.%&,CL.mkVar numDevicesVar)])),
566 : lamonts 1271 assertStm] *)
567 : lamonts 1264
568 :     (* Create Context *)
569 : jhr 1278 val contextStm = [CL.mkAssign(CL.mkVar contextVar, CL.mkApply("clCreateContext",
570 :     [CL.mkInt(0,CL.intTy),
571 :     CL.mkInt(1,CL.intTy),
572 :     CL.mkUnOp(CL.%&,CL.mkVar deviceVar),
573 :     CL.mkVar "NULL",
574 :     CL.mkVar "NULL",
575 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)])),
576 : lamonts 1264 assertStm]
577 :    
578 :     (* Create Command Queue *)
579 : jhr 1278 val commandStm = [CL.mkAssign(CL.mkVar cmdVar, CL.mkApply("clCreateCommandQueue",
580 :     [CL.mkVar contextVar,
581 :     CL.mkVar deviceVar,
582 :     CL.mkInt(0,CL.intTy),
583 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)])),
584 : lamonts 1264 assertStm]
585 :    
586 :    
587 :     (*Create Program/Build/Kernel with Source statement *)
588 : jhr 1278 val createProgStm = CL.mkAssign(CL.mkVar programVar, CL.mkApply("clCreateProgramWithSource",
589 :     [CL.mkVar contextVar,
590 :     CL.mkInt(2,CL.intTy),
591 :     CL.mkCast(CL.T_Ptr(CL.T_Named("const char *")),CL.mkUnOp(CL.%&,CL.mkVar sourcesVar)),
592 :     CL.mkVar "NULL",
593 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)]))
594 : lamonts 1264
595 :     (* FIXME: Remove after testing purposes, Build Log for OpenCL*)
596 : jhr 1278 val buildLog = [CL.mkAssign(CL.mkVar errVar, CL.mkApply("clBuildProgram",
597 :     [CL.mkVar programVar,
598 :     CL.mkInt(0,CL.intTy),
599 :     CL.mkVar "NULL",
600 :     CL.mkVar "NULL",
601 :     CL.mkVar "NULL",
602 :     CL.mkVar "NULL"])),
603 : lamonts 1271 CL.mkDecl(CL.charPtr, "build", NONE),
604 :     CL.mkDecl(CL.T_Named("size_t"),"ret_val_size",NONE),
605 : jhr 1278 CL.mkAssign(CL.mkVar errVar, CL.mkApply("clGetProgramBuildInfo",
606 :     [CL.mkVar programVar,
607 :     CL.mkVar deviceVar,
608 :     CL.mkVar "CL_PROGRAM_BUILD_LOG",
609 :     CL.mkInt(0,CL.intTy),
610 :     CL.mkVar "NULL",
611 :     CL.mkUnOp(CL.%&,CL.mkVar "ret_val_size")])),
612 :     CL.mkAssign(CL.mkVar "build", CL.mkApply("malloc", [CL.mkVar "ret_val_size"])),
613 :     CL.mkAssign(CL.mkVar errVar, CL.mkApply("clGetProgramBuildInfo",
614 :     [CL.mkVar programVar,
615 :     CL.mkVar deviceVar,
616 :     CL.mkVar "CL_PROGRAM_BUILD_LOG",
617 :     CL.mkVar "ret_val_size",
618 :     CL.mkVar "build",
619 :     CL.mkVar "NULL"])),
620 :     CL.mkAssign(CL.mkSubscript(CL.mkVar "build",CL.mkVar "ret_val_size"),CL.mkVar ("'\\" ^ "0'")),
621 :     CL.mkCall("printf",[CL.mkStr ( "Build Log:" ^ "\n" ^ "%s" ^ "\n"), CL.mkVar "build"])]
622 : lamonts 1264
623 :    
624 :    
625 :    
626 : jhr 1278 val createKernel = CL.mkAssign(CL.mkVar kernelVar, CL.mkApply("clCreateKernel",
627 :     [CL.mkVar programVar,
628 :     CL.mkStr RN.kernelFuncName,
629 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)]))
630 : lamonts 1264
631 :    
632 :     val create_build_stms = [createProgStm,assertStm] @ buildLog @ [assertStm,createKernel,assertStm]
633 :    
634 :    
635 :    
636 :     (* Create Memory Buffers for Strand States and Globals *)
637 : jhr 1278 val strandSize = CL.mkAssign(CL.mkVar stateSizeVar,CL.mkBinOp(CL.mkApply("sizeof",
638 :     [CL.mkVar tyName]), CL.#*,CL.mkVar numStrandsVar))
639 : lamonts 1271
640 : jhr 1278 val clStrandObjects = [CL.mkAssign(CL.mkVar clInstateVar, CL.mkApply("clCreateBuffer",
641 :     [CL.mkVar contextVar,
642 :     CL.mkVar "CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR",
643 :     CL.mkVar stateSizeVar,
644 :     CL.mkVar "NULL",
645 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)])),
646 :     CL.mkAssign(CL.mkVar clOutStateVar, CL.mkApply("clCreateBuffer",
647 :     [CL.mkVar contextVar,
648 :     CL.mkVar "CL_MEM_READ_WRITE",
649 :     CL.mkVar stateSizeVar,
650 :     CL.mkVar "NULL",
651 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)]))]
652 : lamonts 1271
653 :    
654 :     (* Setup up selfOut variable *)
655 : jhr 1278 val strandsArrays = [CL.mkAssign(CL.mkVar outStateVar, CL.mkApply("malloc", [CL.mkBinOp(CL.mkVar numStrandsVar,
656 :     CL.#*, CL.mkApply("sizeof",[CL.mkVar tyName]))])),
657 :     CL.mkAssign(CL.mkVar inStateVar, CL.mkApply("malloc", [CL.mkBinOp(CL.mkVar numStrandsVar,
658 :     CL.#*, CL.mkApply("sizeof",[CL.mkVar tyName]))]))]
659 : lamonts 1271
660 :    
661 :     (* Initialize Width Parameter *)
662 :     val widthDel = if nDims = 2 then
663 : jhr 1278 CL.mkAssign(CL.mkVar "width",CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(1, CL.intTy)))
664 : lamonts 1271 else
665 : jhr 1278 CL.mkAssign(CL.mkVar "width",CL.mkInt(0,CL.intTy))
666 : lamonts 1271
667 :    
668 : jhr 1278 val strands_init = CL.mkCall(RN.strandInitSetup, [
669 :     CL.mkVar "size", CL.mkVar "width", CL.mkVar inStateVar
670 :     ])
671 : lamonts 1271
672 : lamonts 1264 val clGlobalBuffers = getGlobalDataBuffers(!imgGlobals,3,contextVar,errVar)
673 :    
674 :    
675 :     (* Load the Kernel and Header Files *)
676 : jhr 1278 val sourceStms = [CL.mkAssign(CL.mkSubscript(CL.mkVar sourcesVar,CL.mkInt(1,CL.intTy)),
677 :     CL.mkApply(RN.clLoaderFN, [CL.mkVar clFNVar])),
678 :     CL.mkAssign(CL.mkSubscript(CL.mkVar sourcesVar,CL.mkInt(0,CL.intTy)),
679 :     CL.mkApply(RN.clLoaderFN, [CL.mkVar headerFNVar]))]
680 : lamonts 1271
681 : jhr 1278 (* val sourceStms = [CL.mkAssign(CL.mkSubscript(CL.mkVar sourcesVar,CL.mkInt(1,CL.intTy)),
682 :     CL.mkApply(RN.clLoaderFN, [CL.mkVar clFNVar]))] *)
683 : lamonts 1271
684 : lamonts 1264
685 : jhr 1261 (* Created Enqueue Statements *)
686 :     (* FIXME: simplify this code by function abstraction *)
687 : lamonts 1264 val enqueueStm = if nDims = 1
688 : jhr 1278 then [CL.mkAssign(CL.mkVar errVar,
689 : lamonts 1264 CL.mkApply("clEnqueueNDRangeKernel",
690 : jhr 1278 [CL.mkVar cmdVar,
691 :     CL.mkVar kernelVar,
692 :     CL.mkInt(1,CL.intTy),
693 :     CL.mkVar "NULL",
694 :     CL.mkVar globalVar,
695 :     CL.mkVar localVar,
696 :     CL.mkInt(0,CL.intTy),
697 :     CL.mkVar "NULL",
698 :     CL.mkVar "NULL"])),CL.mkCall("clFinish",[CL.mkVar cmdVar])]
699 : lamonts 1264 else if nDims = 2 then
700 : jhr 1278 [CL.mkAssign(CL.mkVar errVar,
701 : lamonts 1264 CL.mkApply("clEnqueueNDRangeKernel",
702 : jhr 1278 [CL.mkVar cmdVar,
703 :     CL.mkVar kernelVar,
704 :     CL.mkInt(2,CL.intTy),
705 :     CL.mkVar "NULL",
706 :     CL.mkVar globalVar,
707 :     CL.mkVar localVar,
708 :     CL.mkInt(0,CL.intTy),
709 :     CL.mkVar "NULL",
710 :     CL.mkVar "NULL"])),CL.mkCall("clFinish",[CL.mkVar cmdVar])]
711 : lamonts 1264 else
712 : jhr 1278 [CL.mkAssign(CL.mkVar errVar,
713 : lamonts 1264 CL.mkApply("clEnqueueNDRangeKernel",
714 : jhr 1278 [CL.mkVar cmdVar,
715 :     CL.mkVar kernelVar,
716 :     CL.mkInt(3,CL.intTy),
717 :     CL.mkVar "NULL",
718 :     CL.mkVar globalVar,
719 :     CL.mkVar localVar,
720 :     CL.mkInt(0,CL.intTy),
721 :     CL.mkVar "NULL",
722 :     CL.mkVar "NULL"])),CL.mkCall("clFinish",[CL.mkVar cmdVar])]
723 : lamonts 1264
724 : lamonts 1271
725 : lamonts 1264
726 :     (* Setup Global and Local variables *)
727 :    
728 :     val globalAndlocalStms = if nDims = 1 then
729 : jhr 1278 [CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(0,CL.intTy)),
730 :     CL.mkSubscript(CL.mkVar "size", CL.mkInt(0,CL.intTy))),
731 :     CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(0,CL.intTy)),
732 :     CL.mkVar "16")]
733 : lamonts 1264
734 :    
735 :     else if nDims = 2 then
736 : jhr 1278 [CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(0,CL.intTy)),
737 :     CL.mkSubscript(CL.mkVar "size", CL.mkInt(0,CL.intTy))),
738 :     CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(1,CL.intTy)),
739 :     CL.mkSubscript(CL.mkVar "size", CL.mkInt(1,CL.intTy))),
740 :     CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(0,CL.intTy)),
741 :     CL.mkVar "16"),
742 :     CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(1,CL.intTy)),
743 :     CL.mkVar "16")]
744 : lamonts 1264
745 :     else
746 : jhr 1278 [CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(0,CL.intTy)),
747 :     CL.mkSubscript(CL.mkVar "size", CL.mkInt(0,CL.intTy))),
748 :     CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(1,CL.intTy)),
749 :     CL.mkSubscript(CL.mkVar "size", CL.mkInt(1,CL.intTy))),
750 :     CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(2,CL.intTy)),
751 :     CL.mkSubscript(CL.mkVar "size", CL.mkInt(2,CL.intTy))),
752 :     CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(0,CL.intTy)),
753 :     CL.mkVar "16"),
754 :     CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(1,CL.intTy)),
755 :     CL.mkVar "16"),
756 :     CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(2,CL.intTy)),
757 :     CL.mkVar "16")]
758 : lamonts 1264
759 : lamonts 1244
760 : lamonts 1264
761 :     (* Setup Kernel arguments *)
762 : jhr 1278 val kernelArguments = [CL.mkAssign(CL.mkVar errVar,CL.mkApply("clSetKernelArg",
763 :     [CL.mkVar kernelVar,
764 :     CL.mkInt(0,CL.intTy),
765 :     CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
766 :     CL.mkUnOp(CL.%&,CL.mkVar clInstateVar)])),
767 :     CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar, CL.|=,CL.mkApply("clSetKernelArg",
768 :     [CL.mkVar kernelVar,
769 :     CL.mkInt(1,CL.intTy),
770 :     CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
771 :     CL.mkUnOp(CL.%&,CL.mkVar clOutStateVar)]))),
772 :     CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar, CL.|=,CL.mkApply("clSetKernelArg",
773 :     [CL.mkVar kernelVar,
774 :     CL.mkInt(2,CL.intTy),
775 :     CL.mkApply("sizeof",[CL.mkVar "int"]),
776 :     CL.mkUnOp(CL.%&,CL.mkVar "width")])))]
777 : lamonts 1264
778 :     val clGlobalArguments = genGlobalArguments(!imgGlobals,3,kernelVar,errVar) @ [assertStm]
779 :    
780 :     (* Retrieve output *)
781 : jhr 1278 val outputStm = CL.mkAssign(CL.mkVar errVar,
782 : lamonts 1264 CL.mkApply("clEnqueueReadBuffer",
783 : jhr 1278 [CL.mkVar cmdVar,
784 :     CL.mkVar clOutStateVar,
785 :     CL.mkVar "CL_TRUE",
786 :     CL.mkInt(0,CL.intTy),
787 :     CL.mkVar stateSizeVar,
788 :     CL.mkVar outStateVar,
789 :     CL.mkInt(0,CL.intTy),
790 :     CL.mkVar "NULL",
791 :     CL.mkVar "NULL"]))
792 : lamonts 1264
793 :     (* Free all the objects *)
794 : jhr 1278 val freeStms = [CL.mkCall("clReleaseKernel",[CL.mkVar kernelVar]),
795 :     CL.mkCall("clReleaseProgram",[CL.mkVar programVar ]),
796 :     CL.mkCall("clReleaseCommandQueue",[CL.mkVar cmdVar]),
797 :     CL.mkCall("clReleaseContext",[CL.mkVar contextVar]),
798 :     CL.mkCall("clReleaseMemObject",[CL.mkVar clInstateVar]),
799 :     CL.mkCall("clReleaseMemObject",[CL.mkVar clOutStateVar])]
800 : lamonts 1264
801 :    
802 :     (*Setup Strand Print Function *)
803 :     val outputData = [CL.mkDecl(CL.T_Ptr(CL.T_Named("FILE")), "outS", SOME(CL.I_Exp(CL.mkApply("fopen",
804 : jhr 1278 [CL.mkStr "mip.txt",
805 :     CL.mkStr "w"])))),
806 : lamonts 1264 CL.mkCall(concat[name, "_print"],
807 : jhr 1278 [CL.mkVar "outS",
808 :     CL.mkVar "size",
809 :     CL.mkVar "width",
810 :     CL.mkVar outStateVar])]
811 : lamonts 1271
812 : lamonts 1264
813 :    
814 :     (* Body put all the statments together *)
815 : jhr 1273 val body = declarations @ [globalsDecl,initGlobalsCall] (*@ platformStm @ devicesStm *) @ contextStm @ commandStm @ !initially @ [strandSize] @
816 : lamonts 1271 strandsArrays @ globalAndlocalStms @ [widthDel,strands_init] @ clStrandObjects @ clGlobalBuffers @ sourceStms @ create_build_stms (*@
817 :     kernelArguments @ clGlobalArguments @ enqueueStm @ [outputStm] @ freeStms @ outputData *)
818 : lamonts 1264
819 :     in
820 :    
821 :     CL.D_Func([],CL.voidTy,RN.setupFName,params,CL.mkBlock(body))
822 :    
823 :     end
824 :     (* generate the data and global parameters *)
825 :     fun genKeneralGlobalParams ((name,tyname)::rest) =
826 : lamonts 1271 CL.PARAM([], CL.T_Ptr(CL.T_Named RN.globalsTy), concat[RN.globalsVarName]) ::
827 :     CL.PARAM([], CL.T_Ptr(CL.T_Named (RN.imageTy tyname)),RN.addBufferSuffix name) ::
828 :     CL.PARAM([], CL.T_Ptr(CL.voidTy),RN.addBufferSuffixData name) ::
829 : lamonts 1264 genKeneralGlobalParams(rest)
830 :    
831 :     | genKeneralGlobalParams ([]) = []
832 :    
833 :     (*generate code for intilizing kernel global data *)
834 :     fun initKernelGlobals (globals,imgGlobals) = let
835 :     fun initGlobalStruct (CL.D_Var(_, _ , name, _)::rest) =
836 : jhr 1278 CL.mkAssign(CL.mkVar name, CL.mkIndirect(CL.mkVar RN.globalsVarName, name)) ::
837 : lamonts 1264 initGlobalStruct(rest)
838 :     | initGlobalStruct ( _::rest) = initGlobalStruct(rest)
839 :     | initGlobalStruct([]) = []
840 :    
841 :     fun initGlobalImages((name,tyname)::rest) =
842 : jhr 1278 CL.mkAssign(CL.mkVar name, CL.mkVar (RN.addBufferSuffix name)) ::
843 :     CL.mkAssign(CL.mkIndirect(CL.mkVar name,"data"),CL.mkVar (RN.addBufferSuffixData name)) ::
844 : lamonts 1264 initGlobalImages(rest)
845 :     | initGlobalImages([]) = []
846 :     in
847 :     initGlobalStruct(globals) @ initGlobalImages(imgGlobals)
848 :     end
849 :    
850 :     (* generate the main kernel function for the .cl file *)
851 : lamonts 1271 fun genKernelFun(Strand{name, tyName, state, output, code,...},nDims,globals,imgGlobals) = let
852 : lamonts 1264 val fName = RN.kernelFuncName;
853 :     val inState = "strand_in"
854 :     val outState = "strand_out"
855 :     val params = [
856 :     CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
857 :     CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut"),
858 :     CL.PARAM(["__global"], CL.intTy, "width")
859 :     ] @ genKeneralGlobalParams(!imgGlobals)
860 :     val thread_ids = if nDims = 1
861 : jhr 1278 then [CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy)))),
862 :     CL.mkAssign(CL.mkVar "x",CL.mkApply(RN.getGlobalThreadId,[CL.mkInt(0,CL.intTy)]))]
863 : lamonts 1264 else
864 : jhr 1278 [CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy)))),
865 :     CL.mkDecl(CL.intTy, "y", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy)))),
866 :     CL.mkAssign(CL.mkVar "x", CL.mkApply(RN.getGlobalThreadId,[CL.mkInt(0,CL.intTy)])),
867 :     CL.mkAssign(CL.mkVar "y",CL.mkApply(RN.getGlobalThreadId,[CL.mkInt(1,CL.intTy)]))]
868 : lamonts 1264
869 :     val strandDecl = [CL.mkDecl(CL.T_Named tyName, inState, NONE),
870 :     CL.mkDecl(CL.T_Named tyName, outState,NONE)]
871 :     val strandObjects = if nDims = 1
872 : jhr 1278 then [CL.mkAssign(CL.mkSubscript(CL.mkVar "selfIn",CL.mkStr "x"),
873 :     CL.mkVar inState),
874 :     CL.mkAssign(CL.mkSubscript(CL.mkVar "selfOut",CL.mkStr "x"),
875 :     CL.mkVar outState)]
876 : lamonts 1264 else let
877 : jhr 1278 val index = CL.mkBinOp(CL.mkBinOp(CL.mkVar "x",CL.#*,CL.mkVar "width"),CL.#+,CL.mkVar "y")
878 : lamonts 1264 in
879 : jhr 1278 [CL.mkAssign(CL.mkSubscript(CL.mkVar "selfIn",index),
880 :     CL.mkVar inState),
881 :     CL.mkAssign(CL.mkSubscript(CL.mkVar "selfOut",index),
882 :     CL.mkVar outState)]
883 : lamonts 1264 end
884 :    
885 :    
886 : jhr 1278 val status = CL.mkDecl(CL.intTy, "status", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy))))
887 : lamonts 1271 val local_vars = thread_ids @ initKernelGlobals(!globals,!imgGlobals) @ strandDecl @ strandObjects @ [status]
888 : jhr 1278 val while_exp = CL.mkBinOp(CL.mkBinOp(CL.mkVar "status",CL.#!=, CL.mkVar RN.kStabilize),CL.#||,CL.mkBinOp(CL.mkVar "status", CL.#!=, CL.mkVar RN.kDie))
889 :     val while_body = [CL.mkAssign(CL.mkVar "status", CL.mkApply(RN.strandUpdate name,[ CL.mkUnOp(CL.%&,CL.mkVar inState), CL.mkUnOp(CL.%&,CL.mkVar outState)])),
890 :     CL.mkCall(RN.strandStabilize name,[ CL.mkUnOp(CL.%&,CL.mkVar inState), CL.mkUnOp(CL.%&,CL.mkVar outState)])]
891 : lamonts 1264
892 :     val whileBlock = [CL.mkWhile(while_exp,CL.mkBlock while_body)]
893 :    
894 :     val body = CL.mkBlock(local_vars @ whileBlock)
895 :     in
896 :     CL.D_Func(["__kernel"], CL.voidTy, fName, params, body)
897 :     end
898 :     (* generate a global structure from the globals *)
899 :     fun genGlobalStruct(globals) = let
900 :     fun getGlobals(CL.D_Var(_,ty,globalVar,_)::rest) = (ty,globalVar)::getGlobals(rest)
901 :     | getGlobals([]) = []
902 :     | getGlobals(_::rest) = getGlobals(rest)
903 :     in
904 :     CL.D_StructDef(getGlobals(globals),RN.globalsTy)
905 :     end
906 :    
907 : lamonts 1244 (* generate the table of strand descriptors *)
908 : jhr 1261 fun genStrandTable (ppStrm, strands) = let
909 :     val nStrands = length strands
910 : jhr 1278 fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.mkVar(RN.strandDesc name)))
911 : jhr 1261 fun genInits (_, []) = []
912 :     | genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss)
913 :     fun ppDecl dcl = PrintAsC.output(ppStrm, dcl)
914 :     in
915 :     ppDecl (CL.D_Var([], CL.int32, RN.numStrands,
916 : jhr 1278 SOME(CL.I_Exp(CL.mkInt(IntInf.fromInt nStrands, CL.int32)))));
917 : jhr 1261 ppDecl (CL.D_Var([],
918 :     CL.T_Array(CL.T_Ptr(CL.T_Named RN.strandDescTy), SOME nStrands),
919 :     RN.strands,
920 :     SOME(CL.I_Array(genInits (0, strands)))))
921 :     end
922 : lamonts 1244
923 : lamonts 1264
924 :     fun genSrc (baseName, Prog{double,globals, topDecls, strands, initially,imgGlobals,numDims,...}) = let
925 :     val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"}
926 :     val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"}
927 :     val clOutS = TextIO.openOut clFileName
928 :     val cOutS = TextIO.openOut cFileName
929 : jhr 1273 (* FIXME: need to use PrintAsC and PrintAsCL *)
930 : lamonts 1264 val clppStrm = PrintAsC.new clOutS
931 :     val cppStrm = PrintAsC.new cOutS
932 :     fun cppDecl dcl = PrintAsC.output(cppStrm, dcl)
933 : jhr 1282 fun clppDecl dcl = PrintAsCL.output(clppStrm, dcl)
934 : lamonts 1264 val strands = AtomTable.listItems strands
935 : jhr 1273 val [strand as Strand{name, tyName, code,init_code, ...}] = strands
936 : lamonts 1264 in
937 : jhr 1273 (* Generate the OpenCl file *)
938 :     clppDecl (CL.D_Verbatim([
939 :     if double
940 :     then "#define DIDEROT_DOUBLE_PRECISION"
941 :     else "#define DIDEROT_SINGLE_PRECISION",
942 :     "#define DIDEROT_TARGET_CL",
943 :     "#include \"Diderot/cl-types.h\""
944 :     ]));
945 :     List.app clppDecl (List.rev (!globals));
946 :     clppDecl (genGlobalStruct (!globals));
947 :     clppDecl (genStrandTyDef strand);
948 :     List.app clppDecl (!code);
949 :     clppDecl (genKernelFun (strand,!numDims,globals,imgGlobals));
950 :     (* Generate the Host file .c *)
951 :     cppDecl (CL.D_Verbatim([
952 :     if double
953 :     then "#define DIDEROT_DOUBLE_PRECISION"
954 :     else "#define DIDEROT_SINGLE_PRECISION",
955 :     "#define DIDEROT_TARGET_CL",
956 :     "#include \"Diderot/diderot.h\""
957 :     ]));
958 :     List.app cppDecl (List.rev (!globals));
959 :     cppDecl (genGlobalStruct (!globals));
960 :     cppDecl (genStrandTyDef strand);
961 :     cppDecl (!init_code);
962 :     cppDecl (genStrandInit(strand,!numDims));
963 :     cppDecl (genStrandPrint(strand,!numDims));
964 :     (* cppDecl (genKernelLoader());*)
965 :     List.app cppDecl (List.rev (!topDecls));
966 :     cppDecl (genHostSetupFunc (strand, clFileName, !numDims, initially, imgGlobals));
967 : lamonts 1264 PrintAsC.close cppStrm;
968 : jhr 1282 PrintAsCL.close clppStrm;
969 : lamonts 1264 TextIO.closeOut cOutS;
970 :     TextIO.closeOut clOutS
971 :     end
972 :    
973 : lamonts 1244 (* output the code to a file. The string is the basename of the file, the extension
974 :     * is provided by the target.
975 :     *)
976 : lamonts 1264 fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let
977 :     fun condCons (true, x, xs) = x::xs
978 :     | condCons (false, _, xs) = xs
979 :     (* generate the C compiler flags *)
980 :     val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude]
981 :     val cflags = condCons (parallel, #pthread Paths.cflags, cflags)
982 :     val cflags = if debug
983 :     then #debug Paths.cflags :: cflags
984 :     else #ndebug Paths.cflags :: cflags
985 :     val cflags = #base Paths.cflags :: cflags
986 :     (* generate the loader flags *)
987 :     val extraLibs = condCons (parallel, #pthread Paths.extraLibs, [])
988 :     val extraLibs = Paths.teemLinkFlags @ #base Paths.extraLibs :: extraLibs
989 : lamonts 1271 val extraLibs = #cl Paths.extraLibs :: extraLibs
990 : lamonts 1264 val rtLib = TargetUtil.runtimeName {
991 :     target = TargetUtil.TARGET_CL,
992 :     parallel = parallel, double = double, debug = debug
993 :     }
994 :     val ldOpts = rtLib :: extraLibs
995 :     in
996 : lamonts 1271 genSrc (basename, prog);
997 :     RunCC.compile (basename, cflags);
998 :     RunCC.link (basename, ldOpts)
999 : jhr 1273 end
1000 : lamonts 1244
1001 : jhr 1273 end
1002 : lamonts 1264
1003 : lamonts 1244 (* strands *)
1004 :     structure Strand =
1005 :     struct
1006 : jhr 1261 fun define (Prog{strands, ...}, strandId) = let
1007 :     val name = Atom.toString strandId
1008 :     val strand = Strand{
1009 :     name = name,
1010 :     tyName = RN.strandTy name,
1011 :     state = ref [],
1012 :     output = ref NONE,
1013 : lamonts 1271 code = ref [],
1014 : jhr 1273 init_code = ref (CL.D_Comment(["no init code"]))
1015 : jhr 1261 }
1016 :     in
1017 :     AtomTable.insert strands (strandId, strand);
1018 :     strand
1019 :     end
1020 : lamonts 1244
1021 :     (* return the strand with the given name *)
1022 : jhr 1261 fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId
1023 : lamonts 1244
1024 :     (* register the strand-state initialization code. The variables are the strand
1025 :     * parameters.
1026 :     *)
1027 : lamonts 1271 fun init (Strand{name, tyName, code,init_code, ...}, params, init) = let
1028 : jhr 1261 val fName = RN.strandInit name
1029 :     val params =
1030 :     CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut") ::
1031 : jhr 1273 List.map (fn (ToCL.V(ty, x)) => CL.PARAM([], ty, x)) params
1032 : jhr 1261 val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
1033 :     in
1034 : jhr 1273 init_code := initFn
1035 : jhr 1261 end
1036 : lamonts 1244
1037 :     (* register a strand method *)
1038 : lamonts 1271 fun method (Strand{name, tyName, code,...}, methName, body) = let
1039 : jhr 1261 val fName = concat[name, "_", methName]
1040 :     val params = [
1041 :     CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
1042 :     CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut")
1043 :     ]
1044 :     val methFn = CL.D_Func([], CL.int32, fName, params, body)
1045 :     in
1046 : jhr 1273 code := methFn :: !code
1047 : jhr 1261 end
1048 :    
1049 : jhr 1273 fun output (Strand{output, ...}, ty, ToCL.V(_, x)) = output := SOME(ty, x)
1050 : lamonts 1244
1051 :     end
1052 :    
1053 :     end
1054 :    
1055 :     structure CLBackEnd = CodeGenFn(CLTarget)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0