Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml
ViewVC logotype

Annotation of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1316 - (view) (download)

1 : jhr 1315 (* cl-target.sml
2 : lamonts 1244 *
3 :     * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
4 :     * All rights reserved.
5 :     *)
6 :    
7 :     structure CLTarget : TARGET =
8 :     struct
9 :    
10 :     structure IL = TreeIL
11 :     structure V = IL.Var
12 :     structure Ty = IL.Ty
13 :     structure CL = CLang
14 :     structure RN = RuntimeNames
15 : jhr 1273 structure ToCL = TreeToCL
16 : lamonts 1305 structure N = CNames
17 : lamonts 1244
18 : jhr 1315 (* C variable translation *)
19 :     structure TrCVar =
20 : lamonts 1305 struct
21 :     type env = CL.typed_var TreeIL.Var.Map.map
22 :     fun lookup (env, x) = (case V.Map.find (env, x)
23 :     of SOME(CL.V(_, x')) => x'
24 : jhr 1315 | NONE => raise Fail(concat["TrCVar.lookup(_, ", V.name x, ")"])
25 : lamonts 1305 (* end case *))
26 :     (* translate a variable that occurs in an l-value context (i.e., as the target of an assignment) *)
27 :     fun lvalueVar (env, x) = (case V.kind x
28 : jhr 1315 of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
29 : lamonts 1305 | IL.VK_State strand => CL.mkIndirect(CL.mkVar "selfOut", lookup(env, x))
30 :     | IL.VK_Local => CL.mkVar(lookup(env, x))
31 :     (* end case *))
32 :     (* translate a variable that occurs in an r-value context *)
33 :     fun rvalueVar (env, x) = (case V.kind x
34 : jhr 1315 of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
35 : lamonts 1305 | IL.VK_State strand => CL.mkIndirect(CL.mkVar "selfIn", lookup(env, x))
36 :     | IL.VK_Local => CL.mkVar(lookup(env, x))
37 :     (* end case *))
38 : jhr 1285 end
39 :    
40 :     structure ToC = TreeToCFn (TrCVar)
41 :    
42 :     type var = CL.typed_var
43 : lamonts 1244 type exp = CL.exp
44 :     type stm = CL.stm
45 :    
46 : jhr 1313 (* OpenCL specific types *)
47 :     val clIntTy = CL.T_Named "cl_int"
48 : jhr 1279 val clProgramTy = CL.T_Named "cl_program"
49 :     val clKernelTy = CL.T_Named "cl_kernel"
50 :     val clCmdQueueTy = CL.T_Named "cl_command_queue"
51 :     val clContextTy = CL.T_Named "cl_context"
52 :     val clDeviceIdTy = CL.T_Named "cl_device_id"
53 :     val clPlatformIdTy = CL.T_Named "cl_platform_id"
54 :     val clMemoryTy = CL.T_Named "cl_mem"
55 : jhr 1313 val globPtrTy = CL.T_Ptr(CL.T_Named RN.globalsTy)
56 : jhr 1279
57 : jhr 1307 (* variable or field that is mirrored between host and GPU *)
58 :     type mirror_var = {
59 :     hostTy : CL.ty, (* variable type on Host (i.e., C type) *)
60 :     gpuTy : CL.ty, (* variable's type on GPU (i.e., OpenCL type) *)
61 :     var : CL.var (* variable name *)
62 :     }
63 :    
64 : lamonts 1244 datatype strand = Strand of {
65 : jhr 1261 name : string,
66 :     tyName : string,
67 : jhr 1307 state : mirror_var list ref,
68 : jhr 1261 output : (Ty.ty * CL.var) option ref, (* the strand's output variable (only one for now) *)
69 : lamonts 1271 code : CL.decl list ref,
70 : jhr 1273 init_code: CL.decl ref
71 : lamonts 1244 }
72 :    
73 :     datatype program = Prog of {
74 : jhr 1307 name : string, (* stem of source file *)
75 : jhr 1261 double : bool, (* true for double-precision support *)
76 :     parallel : bool, (* true for multithreaded (or multi-GPU) target *)
77 :     debug : bool, (* true for debug support in executable *)
78 : jhr 1307 globals : mirror_var list ref,
79 : jhr 1261 topDecls : CL.decl list ref,
80 :     strands : strand AtomTable.hash_table,
81 : lamonts 1305 initially : CL.decl ref,
82 : jhr 1273 numDims: int ref,
83 :     imgGlobals: (string * int) list ref,
84 :     prFn: CL.decl ref
85 :     }
86 : lamonts 1244
87 :     datatype env = ENV of {
88 : jhr 1261 info : env_info,
89 :     vMap : var V.Map.map,
90 :     scope : scope
91 : lamonts 1244 }
92 :    
93 :     and env_info = INFO of {
94 : jhr 1261 prog : program
95 : lamonts 1244 }
96 :    
97 :     and scope
98 :     = NoScope
99 :     | GlobalScope
100 :     | InitiallyScope
101 : jhr 1261 | StrandScope of TreeIL.var list (* strand initialization *)
102 :     | MethodScope of TreeIL.var list (* method body; vars are state variables *)
103 : lamonts 1244
104 : jhr 1273 (* the supprted widths of vectors of reals on the target. *)
105 :     (* FIXME: for OpenCL 1.1, 3 is also valid *)
106 :     fun vectorWidths () = [2, 4, 8, 16]
107 : lamonts 1244
108 :     (* tests for whether various expression forms can appear inline *)
109 : jhr 1261 fun inlineCons n = (n < 2) (* vectors are inline, but not matrices *)
110 :     val inlineMatrixExp = false (* can matrix-valued expressions appear inline? *)
111 : lamonts 1244
112 :     (* TreeIL to target translations *)
113 :     structure Tr =
114 :     struct
115 : jhr 1261 fun fragment (ENV{info, vMap, scope}, blk) = let
116 : jhr 1308 val (vMap, stms) = (case scope
117 :     of GlobalScope => ToC.trFragment (vMap, blk)
118 : jhr 1315 | InitiallyScope => ToC.trFragment (vMap, blk)
119 : jhr 1308 | _ => ToCL.trFragment (vMap, blk)
120 :     (* end case *))
121 : jhr 1261 in
122 :     (ENV{info=info, vMap=vMap, scope=scope}, stms)
123 :     end
124 : jhr 1315 fun block (ENV{vMap, scope, ...}, blk) = let
125 :     fun saveState cxt stateVars trAssign (env, args, stm) = (
126 :     ListPair.foldrEq
127 :     (fn (x, e, stms) => trAssign(env, x, e)@stms)
128 :     [stm]
129 :     (stateVars, args)
130 :     ) handle ListPair.UnequalLengths => (
131 :     print(concat["saveState ", cxt, ": length mismatch; ", Int.toString(List.length args), " args\n"]);
132 :     raise Fail(concat["saveState ", cxt, ": length mismatch"]))
133 :     in
134 :     case scope
135 :     (* NOTE: if we move strand initialization to the GPU, then we'll have to change the following code! *)
136 :     of StrandScope stateVars =>
137 :     ToC.trBlock (vMap, saveState "StrandScope" stateVars ToC.trAssign, blk)
138 :     | MethodScope stateVars =>
139 :     ToCL.trBlock (vMap, saveState "MethodScope" stateVars ToCL.trAssign, blk)
140 :     | InitiallyScope => ToC.trBlock (vMap, fn (_, _, stm) => [stm], blk)
141 :     | _ => ToC.trBlock (vMap, fn (_, _, stm) => [stm], blk)
142 :     (* end case *)
143 :     end
144 : jhr 1273 fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e)
145 : lamonts 1244 end
146 :    
147 :     (* variables *)
148 :     structure Var =
149 :     struct
150 : jhr 1273 fun name (ToCL.V(_, name)) = name
151 : jhr 1307 fun global (Prog{globals, imgGlobals, ...}, name, ty) = let
152 :     val x = {hostTy = ToC.trType ty, gpuTy = ToCL.trType ty, var = name}
153 :     fun isImgGlobal (Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}), name) =
154 :     imgGlobals := (name,dim) :: !imgGlobals
155 :     | isImgGlobal _ = ()
156 : jhr 1261 in
157 : jhr 1307 globals := x :: !globals;
158 :     isImgGlobal (ty, name);
159 :     ToCL.V(#gpuTy x, name)
160 : jhr 1261 end
161 : jhr 1273 fun param x = ToCL.V(ToCL.trType(V.ty x), V.name x)
162 : jhr 1261 fun state (Strand{state, ...}, x) = let
163 : jhr 1307 val ty = V.ty x
164 :     val x' = {hostTy = ToC.trType ty, gpuTy = ToCL.trType ty, var = V.name x}
165 : jhr 1261 in
166 :     state := x' :: !state;
167 : jhr 1307 ToCL.V(#gpuTy x', #var x')
168 : jhr 1261 end
169 : lamonts 1244 end
170 :    
171 :     (* environments *)
172 :     structure Env =
173 :     struct
174 :     (* create a new environment *)
175 : jhr 1261 fun new prog = ENV{
176 :     info=INFO{prog = prog},
177 :     vMap = V.Map.empty,
178 :     scope = NoScope
179 :     }
180 : lamonts 1244 (* define the current translation context *)
181 : jhr 1261 fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope}
182 :     val scopeGlobal = setScope GlobalScope
183 :     val scopeInitially = setScope InitiallyScope
184 :     fun scopeStrand (env, svars) = setScope (StrandScope svars) env
185 :     fun scopeMethod (env, svars) = setScope (MethodScope svars) env
186 : lamonts 1244 (* bind a TreeIL varaiable to a target variable *)
187 : jhr 1261 fun bind (ENV{info, vMap, scope}, x, x') = ENV{
188 :     info = info,
189 :     vMap = V.Map.insert(vMap, x, x'),
190 :     scope = scope
191 :     }
192 : lamonts 1244 end
193 :    
194 :     (* programs *)
195 :     structure Program =
196 :     struct
197 : jhr 1278 fun new {name, double, parallel, debug} = (
198 : jhr 1261 RN.initTargetSpec double;
199 : jhr 1286 CNames.initTargetSpec double;
200 : jhr 1261 Prog{
201 : jhr 1307 name = name,
202 : jhr 1261 double = double, parallel = parallel, debug = debug,
203 : lamonts 1316 globals = ref [],
204 : jhr 1261 topDecls = ref [],
205 :     strands = AtomTable.mkTable (16, Fail "strand table"),
206 : jhr 1307 initially = ref(CL.D_Comment["missing initially"]),
207 :     numDims = ref(0),
208 :     imgGlobals = ref[],
209 :     prFn = ref(CL.D_Comment(["No Print Function"]))
210 : jhr 1261 })
211 : lamonts 1244 (* register the global initialization part of a program *)
212 : jhr 1308 (* FIXME: unused code; can this be removed??
213 : jhr 1307 fun globalIndirects (globals,stms) = let
214 : lamonts 1305 fun getGlobals ({name,target as TargetUtil.TARGET_CL}::rest) =
215 : jhr 1307 CL.mkAssign(CL.mkIndirect(CL.mkVar RN.globalsVarName,name),CL.mkVar name)
216 :     ::getGlobals rest
217 : jhr 1281 | getGlobals [] = []
218 :     | getGlobals (_::rest) = getGlobals rest
219 : jhr 1273 in
220 : jhr 1281 stms @ getGlobals globals
221 : jhr 1273 end
222 : jhr 1308 *)
223 : jhr 1261 (* register the code that is used to register command-line options for input variables *)
224 :     fun inputs (Prog{topDecls, ...}, stm) = let
225 :     val inputsFn = CL.D_Func(
226 :     [], CL.voidTy, RN.registerOpts,
227 :     [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")],
228 :     stm)
229 :     in
230 :     topDecls := inputsFn :: !topDecls
231 :     end
232 :    
233 : jhr 1286 (* register the global initialization part of a program *)
234 : jhr 1307 fun init (Prog{topDecls, ...}, init) = let
235 : lamonts 1316 val globalsDecl = CL.mkAssign(CL.E_Var RN.globalsVarName,
236 :     CL.mkApply("malloc", [CL.mkApply("sizeof",[CL.mkVar RN.globalsTy])]))
237 :    
238 :     val initGlobalsCall = CL.mkCall(RN.initGlobalsHelper,[])
239 :    
240 :     val initFn = CL.D_Func(
241 :     [], CL.voidTy, RN.initGlobals, [],
242 :     CL.mkBlock([globalsDecl,initGlobalsCall]))
243 :     val initFn_helper = CL.D_Func(
244 :     [], CL.voidTy, RN.initGlobalsHelper, [],
245 : jhr 1286 init)
246 : jhr 1307 val shutdownFn = CL.D_Func(
247 :     [], CL.voidTy, RN.shutdown,
248 :     [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.worldTy), "wrld")],
249 :     CL.S_Block[])
250 :     in
251 : lamonts 1316 topDecls := shutdownFn :: initFn :: initFn_helper :: !topDecls
252 : jhr 1307 end
253 : lamonts 1305 (* create and register the initially function for a program *)
254 : jhr 1307 fun initially {
255 :     prog = Prog{name=progName, strands, initially, ...},
256 :     isArray : bool,
257 :     iterPrefix : stm list,
258 :     iters : (var * exp * exp) list,
259 :     createPrefix : stm list,
260 :     strand : Atom.atom,
261 :     args : exp list
262 :     } = let
263 :     val name = Atom.toString strand
264 :     val nDims = List.length iters
265 :     val worldTy = CL.T_Ptr(CL.T_Named N.worldTy)
266 :     fun mapi f xs = let
267 :     fun mapf (_, []) = []
268 :     | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs)
269 :     in
270 :     mapf (0, xs)
271 :     end
272 :     val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters
273 :     val sizeInit = mapi
274 :     (fn (i, (CL.V(ty, _), lo, hi)) =>
275 :     (i, CL.I_Exp(CL.mkBinOp(CL.mkBinOp(hi, CL.#-, lo), CL.#+, CL.E_Int(1, ty))))
276 :     ) iters
277 :     (* code to allocate the world and initial strands *)
278 :     val wrld = "wrld"
279 :     val allocCode = [
280 :     CL.mkComment["allocate initial block of strands"],
281 :     CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)),
282 :     CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)),
283 :     CL.mkDecl(worldTy, wrld,
284 :     SOME(CL.I_Exp(CL.E_Apply(N.allocInitially, [
285 :     CL.mkVar "ProgramName",
286 :     CL.mkUnOp(CL.%&, CL.E_Var(N.strandDesc name)),
287 :     CL.E_Bool isArray,
288 :     CL.E_Int(IntInf.fromInt nDims, CL.int32),
289 :     CL.E_Var "base",
290 :     CL.E_Var "size"
291 :     ]))))
292 :     ]
293 : lamonts 1316 (* create the loop nest for the initially iterations
294 : jhr 1307 val indexVar = "ix"
295 :     val strandTy = CL.T_Ptr(CL.T_Named(N.strandTy name))
296 :     fun mkLoopNest [] = CL.mkBlock(createPrefix @ [
297 :     CL.mkDecl(strandTy, "sp",
298 :     SOME(CL.I_Exp(
299 :     CL.E_Cast(strandTy,
300 :     CL.E_Apply(N.inState, [CL.E_Var "wrld", CL.E_Var indexVar]))))),
301 : jhr 1315 CL.mkCall(N.strandInit name,
302 :     CL.E_Var RN.globalsVarName :: CL.E_Var "sp" :: args),
303 : jhr 1307 CL.mkAssign(CL.E_Var indexVar, CL.mkBinOp(CL.E_Var indexVar, CL.#+, CL.E_Int(1, CL.uint32)))
304 :     ])
305 :     | mkLoopNest ((CL.V(ty, param), lo, hi)::iters) = let
306 :     val body = mkLoopNest iters
307 :     in
308 :     CL.mkFor(
309 :     [(ty, param, lo)],
310 :     CL.mkBinOp(CL.E_Var param, CL.#<=, hi),
311 :     [CL.mkPostOp(CL.E_Var param, CL.^++)],
312 :     body)
313 :     end
314 :     val iterCode = [
315 :     CL.mkComment["initially"],
316 :     CL.mkDecl(CL.uint32, indexVar, SOME(CL.I_Exp(CL.E_Int(0, CL.uint32)))),
317 :     mkLoopNest iters
318 : lamonts 1316 ] *)
319 : jhr 1307 val body = CL.mkBlock(
320 :     iterPrefix @
321 :     allocCode @
322 :     [CL.mkReturn(SOME(CL.E_Var "wrld"))])
323 : lamonts 1316 val initFn = CL.D_Func([], worldTy, N.initially, [], body)
324 : jhr 1307 in
325 :     initially := initFn
326 :     end
327 : jhr 1281
328 : lamonts 1305 (***** OUTPUT *****)
329 : jhr 1307 fun genStrandPrint (Strand{name, tyName, state, output, code,...}) = let
330 : jhr 1308 (* the print function *)
331 : jhr 1307 val prFnName = concat[name, "_print"]
332 :     val prFn = let
333 : jhr 1308 val params = [
334 :     CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"),
335 :     CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")
336 :     ]
337 :     val SOME(ty, x) = !output
338 :     val outState = CL.mkIndirect(CL.mkVar "self", x)
339 :     val prArgs = (case ty
340 :     of Ty.IVecTy 1 => [CL.E_Str(!N.gIntFormat ^ "\n"), outState]
341 :     | Ty.IVecTy d => let
342 : jhr 1315 val fmt = CL.mkStr(
343 : jhr 1308 String.concatWith " " (List.tabulate(d, fn _ => !N.gIntFormat))
344 :     ^ "\n")
345 :     val args = List.tabulate (d, fn i => ToC.ivecIndex(outState, d, i))
346 :     in
347 :     fmt :: args
348 :     end
349 : jhr 1315 | Ty.TensorTy[] => [CL.mkStr "%f\n", outState]
350 : jhr 1308 | Ty.TensorTy[d] => let
351 : jhr 1315 val fmt = CL.mkStr(
352 : jhr 1308 String.concatWith " " (List.tabulate(d, fn _ => "%f"))
353 :     ^ "\n")
354 :     val args = List.tabulate (d, fn i => ToC.vecIndex(outState, d, i))
355 :     in
356 :     fmt :: args
357 :     end
358 :     | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty)
359 :     (* end case *))
360 :     in
361 :     CL.D_Func(["static"], CL.voidTy, prFnName, params,
362 :     CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs))
363 :     end
364 : jhr 1307 in
365 : jhr 1308 prFn
366 : jhr 1307 end
367 : lamonts 1305
368 : jhr 1307 fun genStrandTyDef (targetTy, Strand{tyName, state,...}) =
369 : jhr 1261 (* the type declaration for the strand's state struct *)
370 :     CL.D_StructDef(
371 : jhr 1307 List.rev (List.map (fn x => (targetTy x, #var x)) (!state)),
372 :     tyName)
373 : lamonts 1305
374 : jhr 1308 (* generates the load kernel function *)
375 : jhr 1307
376 :     (* generates the opencl buffers for the image data *)
377 : jhr 1315 fun getGlobalDataBuffers (globals,contextVar,errVar) = let
378 : jhr 1307 val globalBufferDecl = CL.mkDecl(clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE)
379 :     val globalBuffer = CL.mkAssign(CL.mkVar(concat[RN.globalsVarName,"_cl"]),
380 :     CL.mkApply("clCreateBuffer", [
381 :     CL.mkVar contextVar,
382 :     CL.mkVar "CL_MEM_COPY_HOST_PTR",
383 :     CL.mkApply("sizeof",[CL.mkVar RN.globalsTy]),
384 :     CL.mkVar RN.globalsVarName,
385 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)
386 :     ]))
387 : jhr 1315 fun genDataBuffers ([],_,_) = []
388 :     | genDataBuffers ((var,nDims)::globals, contextVar, errVar) = let
389 :     val hostVar = CL.mkIndirect(CL.mkVar RN.globalsVarName, var)
390 : jhr 1313 (* FIXME: use CL constructors to build expressions (not strings) *)
391 : jhr 1315 fun sizeExp i = CL.mkSubscript(CL.mkIndirect(hostVar, "size"), CL.mkInt i)
392 :     val size = CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*, sizeExp 0)
393 :     val size = if (nDims > 1)
394 :     then CL.mkBinOp(size, CL.#*, sizeExp 1)
395 :     else size
396 :     val size = if (nDims > 2)
397 :     then CL.mkBinOp(size, CL.#*, sizeExp 2)
398 :     else size
399 : jhr 1308 in
400 :     CL.mkDecl(clMemoryTy, RN.addBufferSuffix var ,NONE)::
401 :     CL.mkDecl(clMemoryTy, RN.addBufferSuffixData var ,NONE)::
402 : jhr 1315 CL.mkAssign(CL.mkVar(RN.addBufferSuffix var),
403 :     CL.mkApply("clCreateBuffer", [
404 :     CL.mkVar contextVar,
405 :     CL.mkVar "CL_MEM_COPY_HOST_PTR",
406 :     CL.mkApply("sizeof",[CL.mkVar (RN.imageTy nDims)]),
407 :     hostVar,
408 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)
409 :     ])) ::
410 :     CL.mkAssign(CL.mkVar(RN.addBufferSuffixData var),
411 :     CL.mkApply("clCreateBuffer", [
412 :     CL.mkVar contextVar,
413 :     CL.mkVar "CL_MEM_COPY_HOST_PTR",
414 :     size,
415 :     CL.mkIndirect(hostVar, "data"),
416 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)
417 :     ])) :: genDataBuffers(globals,contextVar,errVar)
418 : jhr 1308 end
419 : jhr 1307 in
420 : jhr 1308 globalBufferDecl :: globalBuffer :: genDataBuffers(globals,contextVar,errVar)
421 : jhr 1307 end
422 : lamonts 1264
423 : jhr 1309 (* generates the kernel arguments for the image data *)
424 : jhr 1313 fun genGlobalArguments (globals, count, kernelVar, errVar) = let
425 :     val globalArgument = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=,
426 :     CL.mkApply("clSetKernelArg",
427 :     [CL.mkVar kernelVar,
428 :     CL.mkPostOp(CL.E_Var count, CL.^++),
429 :     CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
430 :     CL.mkUnOp(CL.%&,CL.mkVar(concat[RN.globalsVarName,"_cl"]))])))
431 :     fun genDataArguments ([],_,_,_) = []
432 :     | genDataArguments ((var,nDims)::globals,count,kernelVar,errVar) =
433 :     CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=,
434 :     CL.mkApply("clSetKernelArg",
435 :     [CL.mkVar kernelVar,
436 :     CL.mkPostOp(CL.E_Var count, CL.^++),
437 :     CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
438 :     CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffix var))]))) ::
439 :     CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=,
440 :     CL.mkApply("clSetKernelArg",
441 :     [CL.mkVar kernelVar,
442 :     CL.mkPostOp(CL.E_Var count, CL.^++),
443 :     CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
444 :     CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffixData var))]))) ::
445 :     genDataArguments (globals,count,kernelVar,errVar)
446 :     in
447 :     globalArgument :: genDataArguments(globals, count, kernelVar, errVar)
448 :     end
449 : lamonts 1264
450 : lamonts 1305 (* generates the globals buffers and arguments function *)
451 : jhr 1307 fun genGlobalBuffersArgs (imgGlobals) = let
452 : jhr 1273 (* Delcare opencl setup objects *)
453 :     val errVar = "err"
454 :     val imgDataSizeVar = "image_dataSize"
455 : jhr 1315 val params = [
456 : lamonts 1305 CL.PARAM([],CL.T_Named("cl_context"), "context"),
457 : jhr 1307 CL.PARAM([],CL.T_Named("cl_kernel"), "kernel"),
458 :     CL.PARAM([],CL.T_Named("int"), "argStart")
459 : jhr 1273 ]
460 : jhr 1313 val clGlobalBuffers = getGlobalDataBuffers(!imgGlobals, "context", errVar)
461 :     val clGlobalArguments = genGlobalArguments(!imgGlobals, "argStart", "kernel", errVar)
462 : jhr 1307 (* Body put all the statments together *)
463 : jhr 1314 val body = CL.mkDecl(clIntTy, errVar, SOME(CL.I_Exp(CL.mkInt 0)))
464 : jhr 1313 :: clGlobalBuffers @ clGlobalArguments
465 :     in
466 : jhr 1307 CL.D_Func([],CL.voidTy,RN.globalsSetupName,params,CL.mkBlock(body))
467 :     end
468 : lamonts 1305
469 : jhr 1307 (* generate the data and global parameters *)
470 :     fun genKeneralGlobalParams ((name,tyname)::rest) =
471 :     CL.PARAM([], CL.T_Ptr(CL.T_Named RN.globalsTy), concat[RN.globalsVarName]) ::
472 :     CL.PARAM([], CL.T_Ptr(CL.T_Named (RN.imageTy tyname)),RN.addBufferSuffix name) ::
473 :     CL.PARAM([], CL.T_Ptr(CL.voidTy),RN.addBufferSuffixData name) ::
474 :     genKeneralGlobalParams rest
475 :     | genKeneralGlobalParams [] = []
476 :    
477 :     (*generate code for intilizing kernel global data *)
478 :     (* FIXME: should use List.map here *)
479 : lamonts 1316 fun initGlobalImages ((name, tyname)::rest) =
480 :     CL.mkAssign(CL.mkIndirect(CL.E_Var RN.globalsVarName, name), CL.mkVar (RN.addBufferSuffix name)) ::
481 :     CL.mkAssign(CL.mkIndirect(CL.E_Var RN.globalsVarName,concat[name,"->","data"]),CL.mkVar (RN.addBufferSuffixData name)) ::
482 :     initGlobalImages rest
483 :     | initGlobalImages [] = []
484 :    
485 : jhr 1307
486 :     (* generate the main kernel function for the .cl file *)
487 :     fun genKernelFun (strand, nDims, globals, imgGlobals) = let
488 :     val Strand{name, tyName, state, output, code,...} = strand
489 :     val fName = RN.kernelFuncName;
490 :     val inState = "strand_in"
491 :     val outState = "strand_out"
492 :     val params = [
493 :     CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
494 :     CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut"),
495 :     CL.PARAM(["__global"], CL.intTy, "width")
496 :     ] @ genKeneralGlobalParams(!imgGlobals)
497 :     val thread_ids = if nDims = 1
498 :     then [
499 : jhr 1314 CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.mkInt 0))),
500 :     CL.mkAssign(CL.mkVar "x",CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 0]))
501 : jhr 1307 ]
502 :     else [
503 : jhr 1314 CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.mkInt 0))),
504 :     CL.mkDecl(CL.intTy, "y", SOME(CL.I_Exp(CL.mkInt 0))),
505 :     CL.mkAssign(CL.mkVar "x", CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 0])),
506 :     CL.mkAssign(CL.mkVar "y",CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 1]))
507 : jhr 1307 ]
508 :     val strandDecl = [
509 :     CL.mkDecl(CL.T_Named tyName, inState, NONE),
510 :     CL.mkDecl(CL.T_Named tyName, outState,NONE)]
511 : jhr 1309 val strandObjects = if nDims = 1
512 :     then [
513 :     CL.mkAssign( CL.mkVar inState, CL.mkSubscript(CL.mkVar "selfIn", CL.mkStr "x")),
514 :     CL.mkAssign(CL.mkVar outState,CL.mkSubscript(CL.mkVar "selfOut", CL.mkStr "x"))
515 :     ]
516 :     else let
517 :     val index = CL.mkBinOp(CL.mkBinOp(CL.mkVar "x",CL.#*,CL.mkVar "width"),CL.#+,CL.mkVar "y")
518 :     in [
519 :     CL.mkAssign(CL.mkVar inState, CL.mkSubscript(CL.mkVar "selfIn",index)),
520 :     CL.mkAssign(CL.mkVar outState,CL.mkSubscript(CL.mkVar "selfOut",index))
521 :     ] end
522 : jhr 1314 val status = CL.mkDecl(CL.intTy, "status", SOME(CL.I_Exp(CL.mkInt 0)))
523 : lamonts 1316 val strand_Init_Stm = CL.mkCall(RN.strandInit name, [CL.E_Var RN.globalsVarName,CL.mkUnOp(CL.%&,CL.E_Var inState), CL.E_Var "x", CL.E_Var "y"])
524 :     val local_vars = thread_ids @ initGlobalImages(!imgGlobals) @ strandDecl @ strandObjects @ [strand_Init_Stm,status]
525 :     val while_exp = CL.mkBinOp(
526 : jhr 1309 CL.mkBinOp(CL.mkVar "status",CL.#!=, CL.mkVar RN.kStabilize),
527 :     CL.#||,
528 :     CL.mkBinOp(CL.mkVar "status", CL.#!=, CL.mkVar RN.kDie))
529 : jhr 1307 val whileBody = CL.mkBlock [
530 :     CL.mkAssign(CL.mkVar "status",
531 :     CL.mkApply(RN.strandUpdate name,
532 : lamonts 1316 [CL.mkUnOp(CL.%&,CL.mkVar inState), CL.mkUnOp(CL.%&,CL.mkVar outState),CL.E_Var RN.globalsVarName])),
533 : jhr 1307 CL.mkCall(RN.strandStabilize name,
534 : lamonts 1316 [CL.mkUnOp(CL.%&,CL.mkVar inState), CL.mkUnOp(CL.%&,CL.mkVar outState),CL.E_Var RN.globalsVarName])
535 : jhr 1307 ]
536 :     val whileBlock = [CL.mkWhile(while_exp, whileBody)]
537 :     val body = CL.mkBlock(local_vars @ whileBlock)
538 :     in
539 :     CL.D_Func(["__kernel"], CL.voidTy, fName, params, body)
540 :     end
541 :     (* generate a global structure from the globals *)
542 :     fun genGlobalStruct (targetTy, globals) = let
543 :     val globs = List.map (fn (x : mirror_var) => (targetTy x, #var x)) globals
544 :     in
545 :     CL.D_StructDef(globs, RN.globalsTy)
546 :     end
547 :     fun genGlobals (declFn, targetTy, globals) = let
548 :     fun doVar (x : mirror_var) = declFn (CL.D_Var([], targetTy x, #var x, NONE))
549 :     in
550 :     List.app doVar globals
551 :     end
552 : lamonts 1264
553 : jhr 1315 fun genStrandDesc (Strand{name, output, ...}) = let
554 :     (* the strand's descriptor object *)
555 :     val descI = let
556 :     fun fnPtr (ty, f) = CL.I_Exp(CL.mkCast(CL.T_Named ty, CL.mkVar f))
557 :     val SOME(outTy, _) = !output
558 :     in
559 :     CL.I_Struct[
560 :     ("name", CL.I_Exp(CL.mkStr name)),
561 :     ("stateSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(N.strandTy name)))),
562 :     (*
563 :     ("outputSzb", CL.I_Exp(CL.mkSizeof(ToC.trTy outTy))),
564 :     *)
565 :     ("update", fnPtr("update_method_t", "0")),
566 :     ("print", fnPtr("print_method_t", name ^ "_print"))
567 :     ]
568 :     end
569 :     val desc = CL.D_Var([], CL.T_Named N.strandDescTy, N.strandDesc name, SOME descI)
570 :     in
571 :     desc
572 :     end
573 :    
574 :     (* generate the table of strand descriptors *)
575 :     fun genStrandTable (declFn, strands) = let
576 :     val nStrands = length strands
577 :     fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.E_Var(N.strandDesc name)))
578 :     fun genInits (_, []) = []
579 :     | genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss)
580 :     in
581 :     declFn (CL.D_Var([], CL.int32, N.numStrands,
582 :     SOME(CL.I_Exp(CL.E_Int(IntInf.fromInt nStrands, CL.int32)))));
583 :     declFn (CL.D_Var([],
584 :     CL.T_Array(CL.T_Ptr(CL.T_Named N.strandDescTy), SOME nStrands),
585 :     N.strands,
586 :     SOME(CL.I_Array(genInits (0, strands)))))
587 :     end
588 :    
589 : jhr 1308 fun genSrc (baseName, prog) = let
590 : lamonts 1316 val Prog{name,double, globals, topDecls, strands, initially, imgGlobals, numDims, ...} = prog
591 : jhr 1307 val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"}
592 :     val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"}
593 :     val clOutS = TextIO.openOut clFileName
594 :     val cOutS = TextIO.openOut cFileName
595 :     val clppStrm = PrintAsCL.new clOutS
596 :     val cppStrm = PrintAsC.new cOutS
597 : lamonts 1316 val progName = name
598 : jhr 1307 fun cppDecl dcl = PrintAsC.output(cppStrm, dcl)
599 :     fun clppDecl dcl = PrintAsCL.output(clppStrm, dcl)
600 :     val strands = AtomTable.listItems strands
601 :     val [strand as Strand{name, tyName, code, init_code, ...}] = strands
602 :     in
603 : jhr 1273 (* Generate the OpenCl file *)
604 :     clppDecl (CL.D_Verbatim([
605 :     if double
606 :     then "#define DIDEROT_DOUBLE_PRECISION"
607 :     else "#define DIDEROT_SINGLE_PRECISION",
608 :     "#define DIDEROT_TARGET_CL",
609 : lamonts 1305 "#include \"Diderot/cl-diderot.h\""
610 : jhr 1273 ]));
611 : jhr 1307 clppDecl (genGlobalStruct (#gpuTy, !globals));
612 :     clppDecl (genStrandTyDef(#gpuTy, strand));
613 : lamonts 1316 clppDecl (!init_code);
614 : jhr 1273 List.app clppDecl (!code);
615 : jhr 1307 clppDecl (genKernelFun (strand, !numDims, globals, imgGlobals));
616 :     (* Generate the Host C file *)
617 : jhr 1273 cppDecl (CL.D_Verbatim([
618 :     if double
619 :     then "#define DIDEROT_DOUBLE_PRECISION"
620 :     else "#define DIDEROT_SINGLE_PRECISION",
621 :     "#define DIDEROT_TARGET_CL",
622 :     "#include \"Diderot/diderot.h\""
623 :     ]));
624 : jhr 1315 cppDecl (CL.D_Var(["static"], CL.charPtr, "ProgramName",
625 : lamonts 1316 SOME(CL.I_Exp(CL.mkStr progName))));
626 :    
627 : jhr 1307 cppDecl (genGlobalStruct (#hostTy, !globals));
628 : lamonts 1316 cppDecl (CL.D_Var(["static"], CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName, NONE));
629 : jhr 1308 cppDecl (genStrandTyDef (#hostTy, strand));
630 : jhr 1307 cppDecl (genStrandPrint strand);
631 : jhr 1273 List.app cppDecl (List.rev (!topDecls));
632 : jhr 1315 cppDecl (genGlobalBuffersArgs imgGlobals);
633 :     List.app (fn strand => cppDecl (genStrandDesc strand)) strands;
634 :     genStrandTable (cppDecl, strands);
635 : jhr 1307 cppDecl (!initially);
636 :     PrintAsC.close cppStrm;
637 :     PrintAsCL.close clppStrm;
638 :     TextIO.closeOut cOutS;
639 :     TextIO.closeOut clOutS
640 :     end
641 : lamonts 1264
642 : lamonts 1244 (* output the code to a file. The string is the basename of the file, the extension
643 :     * is provided by the target.
644 :     *)
645 : jhr 1307 fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let
646 :     fun condCons (true, x, xs) = x::xs
647 :     | condCons (false, _, xs) = xs
648 :     (* generate the C compiler flags *)
649 :     val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude]
650 :     val cflags = condCons (parallel, #pthread Paths.cflags, cflags)
651 :     val cflags = if debug
652 :     then #debug Paths.cflags :: cflags
653 :     else #ndebug Paths.cflags :: cflags
654 :     val cflags = #base Paths.cflags :: cflags
655 :     (* generate the loader flags *)
656 :     val extraLibs = condCons (parallel, #pthread Paths.extraLibs, [])
657 :     val extraLibs = Paths.teemLinkFlags @ #base Paths.extraLibs :: extraLibs
658 :     val extraLibs = #cl Paths.extraLibs :: extraLibs
659 :     val rtLib = TargetUtil.runtimeName {
660 :     target = TargetUtil.TARGET_CL,
661 :     parallel = parallel, double = double, debug = debug
662 :     }
663 :     val ldOpts = rtLib :: extraLibs
664 :     in
665 :     genSrc (basename, prog);
666 :     RunCC.compile (basename, cflags);
667 :     RunCC.link (basename, ldOpts)
668 : jhr 1273 end
669 : lamonts 1244
670 : jhr 1273 end
671 : lamonts 1264
672 : lamonts 1244 (* strands *)
673 :     structure Strand =
674 :     struct
675 : jhr 1261 fun define (Prog{strands, ...}, strandId) = let
676 :     val name = Atom.toString strandId
677 :     val strand = Strand{
678 :     name = name,
679 :     tyName = RN.strandTy name,
680 :     state = ref [],
681 :     output = ref NONE,
682 : lamonts 1271 code = ref [],
683 : jhr 1273 init_code = ref (CL.D_Comment(["no init code"]))
684 : jhr 1261 }
685 :     in
686 :     AtomTable.insert strands (strandId, strand);
687 :     strand
688 :     end
689 : lamonts 1244
690 :     (* return the strand with the given name *)
691 : jhr 1261 fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId
692 : lamonts 1244
693 :     (* register the strand-state initialization code. The variables are the strand
694 :     * parameters.
695 :     *)
696 : jhr 1308 fun init (Strand{name, tyName, code, init_code, ...}, params, init) = let
697 : jhr 1261 val fName = RN.strandInit name
698 :     val params =
699 : jhr 1313 CL.PARAM([], globPtrTy, RN.globalsVarName) ::
700 : jhr 1261 CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut") ::
701 : jhr 1273 List.map (fn (ToCL.V(ty, x)) => CL.PARAM([], ty, x)) params
702 : jhr 1261 val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
703 :     in
704 : jhr 1273 init_code := initFn
705 : jhr 1261 end
706 : lamonts 1244
707 :     (* register a strand method *)
708 : lamonts 1271 fun method (Strand{name, tyName, code,...}, methName, body) = let
709 : jhr 1261 val fName = concat[name, "_", methName]
710 :     val params = [
711 :     CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
712 : lamonts 1316 CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut"),
713 :     CL.PARAM([], CL.T_Ptr(CL.T_Named (RN.globalsTy)), RN.globalsVarName)
714 : jhr 1261 ]
715 :     val methFn = CL.D_Func([], CL.int32, fName, params, body)
716 :     in
717 : jhr 1273 code := methFn :: !code
718 : jhr 1261 end
719 :    
720 : jhr 1273 fun output (Strand{output, ...}, ty, ToCL.V(_, x)) = output := SOME(ty, x)
721 : lamonts 1244
722 :     end
723 :    
724 :     end
725 :    
726 :     structure CLBackEnd = CodeGenFn(CLTarget)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0