Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml
ViewVC logotype

Annotation of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1357 - (view) (download)

1 : jhr 1315 (* cl-target.sml
2 : lamonts 1244 *
3 :     * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
4 :     * All rights reserved.
5 :     *)
6 :    
7 :     structure CLTarget : TARGET =
8 :     struct
9 :    
10 :     structure IL = TreeIL
11 :     structure V = IL.Var
12 :     structure Ty = IL.Ty
13 :     structure CL = CLang
14 :     structure RN = RuntimeNames
15 : jhr 1273 structure ToCL = TreeToCL
16 : lamonts 1305 structure N = CNames
17 : lamonts 1244
18 : jhr 1343 (* helper functions for specifying parameters in various address spaces *)
19 :     fun clParam (spc, ty, x) = CL.PARAM([spc], ty, x)
20 :     fun globalParam (ty, x) = CL.PARAM(["__global"], ty, x)
21 :     fun constantParam (ty, x) = CL.PARAM(["__constant"], ty, x)
22 :     fun localParam (ty, x) = CL.PARAM(["__local"], ty, x)
23 :     fun privateParam (ty, x) = CL.PARAM(["__private"], ty, x)
24 :    
25 : jhr 1315 (* C variable translation *)
26 :     structure TrCVar =
27 : lamonts 1305 struct
28 :     type env = CL.typed_var TreeIL.Var.Map.map
29 :     fun lookup (env, x) = (case V.Map.find (env, x)
30 :     of SOME(CL.V(_, x')) => x'
31 : jhr 1315 | NONE => raise Fail(concat["TrCVar.lookup(_, ", V.name x, ")"])
32 : lamonts 1305 (* end case *))
33 :     (* translate a variable that occurs in an l-value context (i.e., as the target of an assignment) *)
34 :     fun lvalueVar (env, x) = (case V.kind x
35 : jhr 1315 of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
36 : lamonts 1305 | IL.VK_State strand => CL.mkIndirect(CL.mkVar "selfOut", lookup(env, x))
37 :     | IL.VK_Local => CL.mkVar(lookup(env, x))
38 :     (* end case *))
39 :     (* translate a variable that occurs in an r-value context *)
40 :     fun rvalueVar (env, x) = (case V.kind x
41 : jhr 1315 of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
42 : lamonts 1305 | IL.VK_State strand => CL.mkIndirect(CL.mkVar "selfIn", lookup(env, x))
43 :     | IL.VK_Local => CL.mkVar(lookup(env, x))
44 :     (* end case *))
45 : jhr 1285 end
46 :    
47 :     structure ToC = TreeToCFn (TrCVar)
48 :    
49 :     type var = CL.typed_var
50 : lamonts 1244 type exp = CL.exp
51 :     type stm = CL.stm
52 :    
53 : jhr 1313 (* OpenCL specific types *)
54 :     val clIntTy = CL.T_Named "cl_int"
55 : jhr 1279 val clProgramTy = CL.T_Named "cl_program"
56 :     val clKernelTy = CL.T_Named "cl_kernel"
57 :     val clCmdQueueTy = CL.T_Named "cl_command_queue"
58 :     val clContextTy = CL.T_Named "cl_context"
59 :     val clDeviceIdTy = CL.T_Named "cl_device_id"
60 :     val clPlatformIdTy = CL.T_Named "cl_platform_id"
61 :     val clMemoryTy = CL.T_Named "cl_mem"
62 : jhr 1313 val globPtrTy = CL.T_Ptr(CL.T_Named RN.globalsTy)
63 : jhr 1279
64 : jhr 1307 (* variable or field that is mirrored between host and GPU *)
65 :     type mirror_var = {
66 :     hostTy : CL.ty, (* variable type on Host (i.e., C type) *)
67 :     gpuTy : CL.ty, (* variable's type on GPU (i.e., OpenCL type) *)
68 :     var : CL.var (* variable name *)
69 :     }
70 :    
71 : lamonts 1244 datatype strand = Strand of {
72 : jhr 1261 name : string,
73 :     tyName : string,
74 : jhr 1307 state : mirror_var list ref,
75 : jhr 1261 output : (Ty.ty * CL.var) option ref, (* the strand's output variable (only one for now) *)
76 : lamonts 1271 code : CL.decl list ref,
77 : jhr 1273 init_code: CL.decl ref
78 : lamonts 1244 }
79 :    
80 :     datatype program = Prog of {
81 : jhr 1307 name : string, (* stem of source file *)
82 : jhr 1261 double : bool, (* true for double-precision support *)
83 :     parallel : bool, (* true for multithreaded (or multi-GPU) target *)
84 :     debug : bool, (* true for debug support in executable *)
85 : jhr 1307 globals : mirror_var list ref,
86 : jhr 1261 topDecls : CL.decl list ref,
87 :     strands : strand AtomTable.hash_table,
88 : lamonts 1305 initially : CL.decl ref,
89 : jhr 1333 numDims: int ref, (* number of dimensions in initially iteration *)
90 : jhr 1273 imgGlobals: (string * int) list ref,
91 :     prFn: CL.decl ref
92 :     }
93 : lamonts 1244
94 :     datatype env = ENV of {
95 : jhr 1261 info : env_info,
96 :     vMap : var V.Map.map,
97 :     scope : scope
98 : lamonts 1244 }
99 :    
100 :     and env_info = INFO of {
101 : jhr 1261 prog : program
102 : lamonts 1244 }
103 :    
104 :     and scope
105 :     = NoScope
106 :     | GlobalScope
107 :     | InitiallyScope
108 : jhr 1261 | StrandScope of TreeIL.var list (* strand initialization *)
109 :     | MethodScope of TreeIL.var list (* method body; vars are state variables *)
110 : lamonts 1244
111 : jhr 1273 (* the supprted widths of vectors of reals on the target. *)
112 :     (* FIXME: for OpenCL 1.1, 3 is also valid *)
113 :     fun vectorWidths () = [2, 4, 8, 16]
114 : lamonts 1244
115 :     (* tests for whether various expression forms can appear inline *)
116 : jhr 1261 fun inlineCons n = (n < 2) (* vectors are inline, but not matrices *)
117 :     val inlineMatrixExp = false (* can matrix-valued expressions appear inline? *)
118 : lamonts 1244
119 :     (* TreeIL to target translations *)
120 :     structure Tr =
121 :     struct
122 : jhr 1261 fun fragment (ENV{info, vMap, scope}, blk) = let
123 : jhr 1308 val (vMap, stms) = (case scope
124 : jhr 1326 of GlobalScope => ToC.trFragment (vMap, blk)
125 : jhr 1321 (* NOTE: if we move strand initialization to the GPU, then we'll have to change the following code! *)
126 : jhr 1326 | InitiallyScope => ToC.trFragment (vMap, blk)
127 :     | _ => ToCL.trFragment (vMap, blk)
128 :     (* end case *))
129 : jhr 1261 in
130 :     (ENV{info=info, vMap=vMap, scope=scope}, stms)
131 :     end
132 : jhr 1315 fun block (ENV{vMap, scope, ...}, blk) = let
133 : jhr 1326 fun saveState cxt stateVars trAssign (env, args, stm) = (
134 :     ListPair.foldrEq
135 :     (fn (x, e, stms) => trAssign(env, x, e)@stms)
136 :     [stm]
137 :     (stateVars, args)
138 :     ) handle ListPair.UnequalLengths => (
139 :     print(concat["saveState ", cxt, ": length mismatch; ", Int.toString(List.length args), " args\n"]);
140 :     raise Fail(concat["saveState ", cxt, ": length mismatch"]))
141 :     in
142 :     case scope
143 : jhr 1315 (* NOTE: if we move strand initialization to the GPU, then we'll have to change the following code! *)
144 : jhr 1326 of StrandScope stateVars =>
145 : jhr 1353 ToCL.trBlock (vMap, saveState "StrandScope" stateVars ToCL.trAssign, blk)
146 : jhr 1326 | MethodScope stateVars =>
147 :     ToCL.trBlock (vMap, saveState "MethodScope" stateVars ToCL.trAssign, blk)
148 :     | InitiallyScope => ToCL.trBlock (vMap, fn (_, _, stm) => [stm], blk)
149 :     | _ => ToC.trBlock (vMap, fn (_, _, stm) => [stm], blk)
150 :     (* end case *)
151 :     end
152 : jhr 1273 fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e)
153 : lamonts 1244 end
154 :    
155 :     (* variables *)
156 :     structure Var =
157 :     struct
158 : jhr 1273 fun name (ToCL.V(_, name)) = name
159 : jhr 1307 fun global (Prog{globals, imgGlobals, ...}, name, ty) = let
160 :     val x = {hostTy = ToC.trType ty, gpuTy = ToCL.trType ty, var = name}
161 :     fun isImgGlobal (Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}), name) =
162 :     imgGlobals := (name,dim) :: !imgGlobals
163 :     | isImgGlobal _ = ()
164 : jhr 1261 in
165 : jhr 1307 globals := x :: !globals;
166 :     isImgGlobal (ty, name);
167 :     ToCL.V(#gpuTy x, name)
168 : jhr 1261 end
169 : jhr 1273 fun param x = ToCL.V(ToCL.trType(V.ty x), V.name x)
170 : jhr 1261 fun state (Strand{state, ...}, x) = let
171 : jhr 1307 val ty = V.ty x
172 :     val x' = {hostTy = ToC.trType ty, gpuTy = ToCL.trType ty, var = V.name x}
173 : jhr 1261 in
174 :     state := x' :: !state;
175 : jhr 1307 ToCL.V(#gpuTy x', #var x')
176 : jhr 1261 end
177 : lamonts 1244 end
178 :    
179 :     (* environments *)
180 :     structure Env =
181 :     struct
182 :     (* create a new environment *)
183 : jhr 1261 fun new prog = ENV{
184 :     info=INFO{prog = prog},
185 :     vMap = V.Map.empty,
186 :     scope = NoScope
187 :     }
188 : lamonts 1244 (* define the current translation context *)
189 : jhr 1261 fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope}
190 :     val scopeGlobal = setScope GlobalScope
191 :     val scopeInitially = setScope InitiallyScope
192 :     fun scopeStrand (env, svars) = setScope (StrandScope svars) env
193 :     fun scopeMethod (env, svars) = setScope (MethodScope svars) env
194 : lamonts 1244 (* bind a TreeIL varaiable to a target variable *)
195 : jhr 1261 fun bind (ENV{info, vMap, scope}, x, x') = ENV{
196 :     info = info,
197 :     vMap = V.Map.insert(vMap, x, x'),
198 :     scope = scope
199 :     }
200 : lamonts 1244 end
201 :    
202 :     (* programs *)
203 :     structure Program =
204 :     struct
205 : jhr 1278 fun new {name, double, parallel, debug} = (
206 : jhr 1261 RN.initTargetSpec double;
207 : jhr 1286 CNames.initTargetSpec double;
208 : jhr 1261 Prog{
209 : jhr 1307 name = name,
210 : jhr 1261 double = double, parallel = parallel, debug = debug,
211 : jhr 1331 globals = ref [],
212 : jhr 1261 topDecls = ref [],
213 :     strands = AtomTable.mkTable (16, Fail "strand table"),
214 : jhr 1307 initially = ref(CL.D_Comment["missing initially"]),
215 : jhr 1332 numDims = ref 0,
216 : jhr 1307 imgGlobals = ref[],
217 :     prFn = ref(CL.D_Comment(["No Print Function"]))
218 : jhr 1261 })
219 : jhr 1357
220 : jhr 1261 (* register the code that is used to register command-line options for input variables *)
221 :     fun inputs (Prog{topDecls, ...}, stm) = let
222 :     val inputsFn = CL.D_Func(
223 :     [], CL.voidTy, RN.registerOpts,
224 :     [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")],
225 :     stm)
226 :     in
227 :     topDecls := inputsFn :: !topDecls
228 :     end
229 :    
230 : jhr 1286 (* register the global initialization part of a program *)
231 : jhr 1307 fun init (Prog{topDecls, ...}, init) = let
232 : jhr 1331 val globalsDecl = CL.mkAssign(CL.E_Var RN.globalsVarName,
233 : lamonts 1316 CL.mkApply("malloc", [CL.mkApply("sizeof",[CL.mkVar RN.globalsTy])]))
234 : jhr 1331 val initFn = CL.D_Func(
235 : lamonts 1316 [], CL.voidTy, RN.initGlobals, [],
236 : jhr 1357 CL.mkBlock[globalsDecl, CL.mkCall(RN.initGlobalsHelper,[])])
237 :     val initHelperFn = CL.D_Func(
238 : lamonts 1316 [], CL.voidTy, RN.initGlobalsHelper, [],
239 : jhr 1286 init)
240 : jhr 1307 val shutdownFn = CL.D_Func(
241 :     [], CL.voidTy, RN.shutdown,
242 :     [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.worldTy), "wrld")],
243 :     CL.S_Block[])
244 :     in
245 : jhr 1357 topDecls := shutdownFn :: initFn :: initHelperFn :: !topDecls
246 : jhr 1307 end
247 : jhr 1357
248 : jhr 1333 (* create and register the initially function for a program *)
249 : jhr 1307 fun initially {
250 : jhr 1332 prog = Prog{name=progName, strands, initially, numDims, ...},
251 : jhr 1307 isArray : bool,
252 :     iterPrefix : stm list,
253 :     iters : (var * exp * exp) list,
254 :     createPrefix : stm list,
255 :     strand : Atom.atom,
256 :     args : exp list
257 :     } = let
258 :     val name = Atom.toString strand
259 :     val nDims = List.length iters
260 :     val worldTy = CL.T_Ptr(CL.T_Named N.worldTy)
261 :     fun mapi f xs = let
262 :     fun mapf (_, []) = []
263 :     | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs)
264 :     in
265 :     mapf (0, xs)
266 :     end
267 :     val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters
268 :     val sizeInit = mapi
269 :     (fn (i, (CL.V(ty, _), lo, hi)) =>
270 :     (i, CL.I_Exp(CL.mkBinOp(CL.mkBinOp(hi, CL.#-, lo), CL.#+, CL.E_Int(1, ty))))
271 :     ) iters
272 :     (* code to allocate the world and initial strands *)
273 :     val wrld = "wrld"
274 :     val allocCode = [
275 :     CL.mkComment["allocate initial block of strands"],
276 :     CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)),
277 :     CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)),
278 :     CL.mkDecl(worldTy, wrld,
279 : lamonts 1341 SOME(CL.I_Exp(CL.E_Apply(RN.allocInitially, [
280 : jhr 1307 CL.mkVar "ProgramName",
281 :     CL.mkUnOp(CL.%&, CL.E_Var(N.strandDesc name)),
282 :     CL.E_Bool isArray,
283 :     CL.E_Int(IntInf.fromInt nDims, CL.int32),
284 :     CL.E_Var "base",
285 :     CL.E_Var "size"
286 :     ]))))
287 :     ]
288 : lamonts 1316 (* create the loop nest for the initially iterations
289 : jhr 1307 val indexVar = "ix"
290 :     val strandTy = CL.T_Ptr(CL.T_Named(N.strandTy name))
291 :     fun mkLoopNest [] = CL.mkBlock(createPrefix @ [
292 :     CL.mkDecl(strandTy, "sp",
293 :     SOME(CL.I_Exp(
294 :     CL.E_Cast(strandTy,
295 :     CL.E_Apply(N.inState, [CL.E_Var "wrld", CL.E_Var indexVar]))))),
296 : jhr 1315 CL.mkCall(N.strandInit name,
297 : jhr 1326 CL.E_Var RN.globalsVarName :: CL.E_Var "sp" :: args),
298 : jhr 1307 CL.mkAssign(CL.E_Var indexVar, CL.mkBinOp(CL.E_Var indexVar, CL.#+, CL.E_Int(1, CL.uint32)))
299 :     ])
300 :     | mkLoopNest ((CL.V(ty, param), lo, hi)::iters) = let
301 :     val body = mkLoopNest iters
302 :     in
303 :     CL.mkFor(
304 :     [(ty, param, lo)],
305 :     CL.mkBinOp(CL.E_Var param, CL.#<=, hi),
306 :     [CL.mkPostOp(CL.E_Var param, CL.^++)],
307 :     body)
308 :     end
309 :     val iterCode = [
310 :     CL.mkComment["initially"],
311 :     CL.mkDecl(CL.uint32, indexVar, SOME(CL.I_Exp(CL.E_Int(0, CL.uint32)))),
312 :     mkLoopNest iters
313 : lamonts 1316 ] *)
314 : jhr 1307 val body = CL.mkBlock(
315 :     iterPrefix @
316 :     allocCode @
317 :     [CL.mkReturn(SOME(CL.E_Var "wrld"))])
318 : lamonts 1316 val initFn = CL.D_Func([], worldTy, N.initially, [], body)
319 : jhr 1307 in
320 : jhr 1333 numDims := nDims;
321 : jhr 1307 initially := initFn
322 :     end
323 : jhr 1281
324 : lamonts 1305 (***** OUTPUT *****)
325 : jhr 1307 fun genStrandPrint (Strand{name, tyName, state, output, code,...}) = let
326 : jhr 1326 (* the print function *)
327 : jhr 1307 val prFnName = concat[name, "_print"]
328 :     val prFn = let
329 : jhr 1326 val params = [
330 :     CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"),
331 : lamonts 1346 CL.PARAM([], CL.T_Ptr(CL.T_Num(RawTypes.RT_UInt8)),"status"),
332 :     CL.PARAM([], CL.intTy,"numStrands"),
333 : jhr 1326 CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")
334 :     ]
335 :     val SOME(ty, x) = !output
336 : lamonts 1346 val outState = CL.mkSelect(CL.mkSubscript(CL.mkVar "self",CL.E_Var "i"), x)
337 : jhr 1326 val prArgs = (case ty
338 :     of Ty.IVecTy 1 => [CL.E_Str(!N.gIntFormat ^ "\n"), outState]
339 :     | Ty.IVecTy d => let
340 :     val fmt = CL.mkStr(
341 :     String.concatWith " " (List.tabulate(d, fn _ => !N.gIntFormat))
342 :     ^ "\n")
343 :     val args = List.tabulate (d, fn i => ToC.ivecIndex(outState, d, i))
344 :     in
345 :     fmt :: args
346 :     end
347 :     | Ty.TensorTy[] => [CL.mkStr "%f\n", outState]
348 :     | Ty.TensorTy[d] => let
349 :     val fmt = CL.mkStr(
350 :     String.concatWith " " (List.tabulate(d, fn _ => "%f"))
351 :     ^ "\n")
352 :     val args = List.tabulate (d, fn i => ToC.vecIndex(outState, d, i))
353 :     in
354 :     fmt :: args
355 :     end
356 :     | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty)
357 :     (* end case *))
358 : lamonts 1346 val forBody = CL.mkIfThen(CL.mkBinOp(CL.mkSubscript(CL.E_Var "status",CL.E_Var "i"), CL.#==, CL.E_Var "DIDEROT_STABILIZE"),
359 :     CL.mkBlock([CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs)]))
360 :     val body = CL.mkFor(
361 :     [(CL.intTy, "i", CL.mkInt 0)],
362 : lamonts 1347 CL.mkBinOp(CL.E_Var "i", CL.#<, CL.E_Var "numStrands"),
363 : lamonts 1346 [CL.mkPostOp(CL.E_Var "i", CL.^++)],
364 :     forBody)
365 : jhr 1326 in
366 :     CL.D_Func(["static"], CL.voidTy, prFnName, params,
367 : lamonts 1346 body)
368 : jhr 1326 end
369 : jhr 1307 in
370 : jhr 1326 prFn
371 : jhr 1307 end
372 : lamonts 1305
373 : jhr 1307 fun genStrandTyDef (targetTy, Strand{tyName, state,...}) =
374 : jhr 1261 (* the type declaration for the strand's state struct *)
375 :     CL.D_StructDef(
376 : jhr 1307 List.rev (List.map (fn x => (targetTy x, #var x)) (!state)),
377 :     tyName)
378 : lamonts 1351
379 :     fun genStrandCopy(Strand{tyName,name,state,...}) = let
380 :     val params = [
381 :     CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
382 :     CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut")
383 :     ]
384 :     val assignStms = List.rev(List.map(fn x => CL.mkAssign(CL.mkIndirect(CL.E_Var "selfOut", #var x),
385 :     CL.mkIndirect(CL.E_Var "selfIn", #var x))) (!state))
386 :     in
387 :     CL.D_Func([""], CL.voidTy, RN.strandCopy name, params,CL.mkBlock(assignStms))
388 :     end
389 : lamonts 1305
390 : jhr 1308 (* generates the load kernel function *)
391 : jhr 1307
392 :     (* generates the opencl buffers for the image data *)
393 : jhr 1315 fun getGlobalDataBuffers (globals,contextVar,errVar) = let
394 : jhr 1344 val globalBuffErr = "error creating OpenCL global buffer"
395 :     fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.E_Var errVar, CL.#!=, CL.E_Var "CL_SUCCESS"),
396 :     CL.mkBlock([CL.mkCall("fprintf",[CL.E_Var "stderr", CL.E_Str msg]),
397 :     CL.mkCall("exit",[CL.mkInt 1])]))
398 : jhr 1307 val globalBufferDecl = CL.mkDecl(clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE)
399 :     val globalBuffer = CL.mkAssign(CL.mkVar(concat[RN.globalsVarName,"_cl"]),
400 :     CL.mkApply("clCreateBuffer", [
401 :     CL.mkVar contextVar,
402 : lamonts 1328 CL.mkVar "CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR",
403 : jhr 1307 CL.mkApply("sizeof",[CL.mkVar RN.globalsTy]),
404 :     CL.mkVar RN.globalsVarName,
405 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)
406 :     ]))
407 : lamonts 1341 fun genDataBuffers ([],_,_,_) = []
408 :     | genDataBuffers ((var,nDims)::globals, contextVar, errVar,errFn) = let
409 : jhr 1326 val hostVar = CL.mkIndirect(CL.mkVar RN.globalsVarName, var)
410 : jhr 1313 (* FIXME: use CL constructors to build expressions (not strings) *)
411 : jhr 1326 fun sizeExp i = CL.mkSubscript(CL.mkIndirect(hostVar, "size"), CL.mkInt i)
412 :     val size = CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*, sizeExp 0)
413 :     val size = if (nDims > 1)
414 :     then CL.mkBinOp(size, CL.#*, sizeExp 1)
415 :     else size
416 :     val size = if (nDims > 2)
417 :     then CL.mkBinOp(size, CL.#*, sizeExp 2)
418 :     else size
419 :     in
420 :     CL.mkDecl(clMemoryTy, RN.addBufferSuffix var ,NONE)::
421 :     CL.mkDecl(clMemoryTy, RN.addBufferSuffixData var ,NONE)::
422 :     CL.mkAssign(CL.mkVar(RN.addBufferSuffix var),
423 :     CL.mkApply("clCreateBuffer", [
424 :     CL.mkVar contextVar,
425 :     CL.mkVar "CL_MEM_COPY_HOST_PTR",
426 :     CL.mkApply("sizeof",[CL.mkVar (RN.imageTy nDims)]),
427 :     hostVar,
428 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)
429 : jhr 1344 ])) ::
430 :     errFn(concat["error in creating ",RN.addBufferSuffix var, " global buffer"]) ::
431 : jhr 1326 CL.mkAssign(CL.mkVar(RN.addBufferSuffixData var),
432 :     CL.mkApply("clCreateBuffer", [
433 :     CL.mkVar contextVar,
434 :     CL.mkVar "CL_MEM_COPY_HOST_PTR",
435 :     size,
436 :     CL.mkIndirect(hostVar, "data"),
437 :     CL.mkUnOp(CL.%&,CL.mkVar errVar)
438 : jhr 1344 ])) ::
439 :     errFn(concat["error in creating ",RN.addBufferSuffixData var, " global buffer"]) ::
440 :     genDataBuffers(globals,contextVar,errVar,errFn)
441 : jhr 1326 end
442 : jhr 1307 in
443 : jhr 1344 globalBufferDecl
444 :     :: globalBuffer
445 :     :: errorFn(globalBuffErr)
446 :     :: genDataBuffers(globals,contextVar,errVar,errorFn)
447 : jhr 1307 end
448 : lamonts 1264
449 : jhr 1309 (* generates the kernel arguments for the image data *)
450 : jhr 1313 fun genGlobalArguments (globals, count, kernelVar, errVar) = let
451 : jhr 1343 val globalArgErr = "error creating OpenCL global argument"
452 :     fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.E_Var errVar, CL.#!=, CL.E_Var "CL_SUCCESS"),
453 : jhr 1344 CL.mkBlock([CL.mkCall("fprintf",[CL.E_Var "stderr", CL.E_Str msg]),
454 :     CL.mkCall("exit",[CL.mkInt 1])]))
455 : jhr 1326 val globalArgument = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=,
456 :     CL.mkApply("clSetKernelArg",
457 :     [CL.mkVar kernelVar,
458 :     CL.mkPostOp(CL.E_Var count, CL.^++),
459 :     CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
460 :     CL.mkUnOp(CL.%&,CL.mkVar(concat[RN.globalsVarName,"_cl"]))])))
461 : lamonts 1341 fun genDataArguments ([],_,_,_,_) = []
462 :     | genDataArguments ((var,nDims)::globals,count,kernelVar,errVar,errFn) =
463 :     CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.$=,
464 : jhr 1326 CL.mkApply("clSetKernelArg",
465 :     [CL.mkVar kernelVar,
466 :     CL.mkPostOp(CL.E_Var count, CL.^++),
467 :     CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
468 : lamonts 1341 CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffix var))]))) ::
469 : jhr 1344 errFn(concat["error in creating ",RN.addBufferSuffix var, " argument"]) ::
470 : lamonts 1341 CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.$=,
471 : jhr 1326 CL.mkApply("clSetKernelArg",
472 :     [CL.mkVar kernelVar,
473 :     CL.mkPostOp(CL.E_Var count, CL.^++),
474 :     CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
475 :     CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffixData var))]))) ::
476 : jhr 1344 errFn(concat["error in creating ",RN.addBufferSuffixData var, " argument"]) ::
477 : lamonts 1341 genDataArguments (globals,count,kernelVar,errVar,errFn)
478 : jhr 1326 in
479 : lamonts 1341 [globalArgument,errorFn(globalArgErr)] @ genDataArguments(globals, count, kernelVar, errVar,errorFn)
480 : jhr 1326 end
481 : lamonts 1264
482 : lamonts 1305 (* generates the globals buffers and arguments function *)
483 : jhr 1307 fun genGlobalBuffersArgs (imgGlobals) = let
484 : jhr 1273 (* Delcare opencl setup objects *)
485 :     val errVar = "err"
486 :     val imgDataSizeVar = "image_dataSize"
487 : jhr 1315 val params = [
488 : lamonts 1305 CL.PARAM([],CL.T_Named("cl_context"), "context"),
489 : jhr 1307 CL.PARAM([],CL.T_Named("cl_kernel"), "kernel"),
490 : jhr 1344 CL.PARAM([],CL.T_Named("cl_command_queue"), "cmdQ"),
491 : jhr 1307 CL.PARAM([],CL.T_Named("int"), "argStart")
492 : jhr 1273 ]
493 : jhr 1313 val clGlobalBuffers = getGlobalDataBuffers(!imgGlobals, "context", errVar)
494 :     val clGlobalArguments = genGlobalArguments(!imgGlobals, "argStart", "kernel", errVar)
495 : jhr 1307 (* Body put all the statments together *)
496 : jhr 1314 val body = CL.mkDecl(clIntTy, errVar, SOME(CL.I_Exp(CL.mkInt 0)))
497 : jhr 1326 :: clGlobalBuffers @ clGlobalArguments
498 : jhr 1313 in
499 : jhr 1307 CL.D_Func([],CL.voidTy,RN.globalsSetupName,params,CL.mkBlock(body))
500 :     end
501 : lamonts 1305
502 : jhr 1307 (* generate the data and global parameters *)
503 :     fun genKeneralGlobalParams ((name,tyname)::rest) =
504 : jhr 1343 globalParam (CL.T_Ptr(CL.T_Named (RN.imageTy tyname)), RN.addBufferSuffix name) ::
505 :     globalParam (CL.T_Ptr(CL.voidTy), RN.addBufferSuffixData name) ::
506 : jhr 1307 genKeneralGlobalParams rest
507 :     | genKeneralGlobalParams [] = []
508 :    
509 :     (*generate code for intilizing kernel global data *)
510 :     (* FIXME: should use List.map here *)
511 : lamonts 1316 fun initGlobalImages ((name, tyname)::rest) =
512 : jhr 1357 CL.mkAssign(
513 :     CL.mkIndirect(CL.E_Var RN.globalsVarName, name),
514 :     CL.mkVar (RN.addBufferSuffix name)) ::
515 :     CL.mkAssign(
516 :     CL.mkIndirect(CL.mkIndirect(CL.E_Var RN.globalsVarName, name), "data"),
517 :     CL.mkVar (RN.addBufferSuffixData name)) ::
518 :     initGlobalImages rest
519 : lamonts 1316 | initGlobalImages [] = []
520 :    
521 : jhr 1331 (* generate the main kernel function for the .cl file *)
522 : jhr 1307 fun genKernelFun (strand, nDims, globals, imgGlobals) = let
523 :     val Strand{name, tyName, state, output, code,...} = strand
524 :     val fName = RN.kernelFuncName;
525 :     val inState = "strand_in"
526 : lamonts 1341 val outState = "strand_out"
527 : jhr 1345 val tempVar = "tmp"
528 : jhr 1307 val params = [
529 : lamonts 1346 CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
530 :     CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut"),
531 :     CL.PARAM(["__global"], CL.T_Ptr(CL.T_Num(RawTypes.RT_UInt8)), "strandStatus"),
532 :     CL.PARAM(["__global"], CL.intTy, "width"),
533 :     CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName)
534 : jhr 1307 ] @ genKeneralGlobalParams(!imgGlobals)
535 :     val thread_ids = if nDims = 1
536 : jhr 1331 then [
537 :     CL.mkDecl(CL.intTy, "x",
538 :     SOME(CL.I_Exp(CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 0]))))
539 :     ]
540 :     else if nDims = 2
541 :     then [
542 :     CL.mkDecl(CL.intTy, "x",
543 :     SOME(CL.I_Exp(CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 0])))),
544 :     CL.mkDecl(CL.intTy, "y",
545 :     SOME(CL.I_Exp(CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 1]))))
546 :     ]
547 :     else raise Fail "nDims > 2"
548 : jhr 1307 val strandDecl = [
549 : lamonts 1341 CL.mkDecl(CL.T_Ptr(CL.T_Named (concat["__global ",tyName])), inState, NONE),
550 :     CL.mkDecl(CL.T_Ptr(CL.T_Named (concat["__global ",tyName])), outState, NONE),
551 : jhr 1344 CL.mkDecl(CL.T_Ptr(CL.T_Named (concat["__global ",tyName])), tempVar, NONE)
552 : jhr 1331 ]
553 : lamonts 1351 val barrierCode = CL.mkCall(RN.strandCopy name, [CL.E_Var outState, CL.E_Var inState])
554 : lamonts 1346 val barrierStm = CL.mkCall("barrier",[CL.E_Var "CLK_LOCAL_MEM_FENCE"])
555 :     val index = if nDims = 1 then
556 :     CL.mkStr "x"
557 :     else
558 :     CL.mkBinOp(
559 : jhr 1332 CL.mkBinOp(CL.mkVar "x", CL.#*, CL.mkVar "width"), CL.#+, CL.mkVar "y")
560 : lamonts 1346
561 :     val strandObjects =
562 :     [ CL.mkAssign(CL.mkVar inState, CL.mkBinOp(CL.mkVar "selfIn",CL.#+,index)),
563 :     CL.mkAssign(CL.mkVar outState, CL.mkBinOp(CL.mkVar "selfOut",CL.#+,index))
564 :     ]
565 :    
566 :     val stabalizeStm = CL.mkAssign(CL.mkSubscript(CL.mkVar "strandStatus",index),
567 :     CL.E_Var "status")
568 :     val status = CL.mkDecl(CL.intTy, "status", SOME(CL.I_Exp(CL.mkSubscript(CL.mkVar "strandStatus",index))))
569 : jhr 1333 val strandInitStm = CL.mkCall(RN.strandInit name, [
570 :     CL.E_Var RN.globalsVarName,
571 : lamonts 1351 CL.E_Var outState,
572 : jhr 1333 CL.E_Var "x",
573 :     (* FIXME: if nDims = 1, then "y" is not defined! the arguments to this call should really come from
574 :     * the initially code!
575 :     *)
576 :     CL.E_Var "y"])
577 :     val local_vars = thread_ids
578 :     @ initGlobalImages(!imgGlobals)
579 :     @ strandDecl
580 :     @ strandObjects
581 :     @ [strandInitStm,status]
582 : lamonts 1351 val while_exp = CL.mkBinOp(CL.mkVar "status",CL.#==, CL.mkVar RN.kActive)
583 : lamonts 1346 val whileBody = CL.mkBlock ([barrierCode,barrierStm] @ [
584 : jhr 1307 CL.mkAssign(CL.mkVar "status",
585 :     CL.mkApply(RN.strandUpdate name,
586 : lamonts 1341 [CL.mkVar inState, CL.mkVar outState,CL.E_Var RN.globalsVarName]))] )
587 : jhr 1307 val whileBlock = [CL.mkWhile(while_exp, whileBody)]
588 : lamonts 1346 val body = CL.mkBlock(local_vars @ whileBlock @ [stabalizeStm])
589 : jhr 1307 in
590 :     CL.D_Func(["__kernel"], CL.voidTy, fName, params, body)
591 :     end
592 :     (* generate a global structure from the globals *)
593 :     fun genGlobalStruct (targetTy, globals) = let
594 :     val globs = List.map (fn (x : mirror_var) => (targetTy x, #var x)) globals
595 :     in
596 :     CL.D_StructDef(globs, RN.globalsTy)
597 :     end
598 : lamonts 1341
599 : jhr 1307 fun genGlobals (declFn, targetTy, globals) = let
600 :     fun doVar (x : mirror_var) = declFn (CL.D_Var([], targetTy x, #var x, NONE))
601 :     in
602 :     List.app doVar globals
603 :     end
604 : lamonts 1264
605 : jhr 1326 fun genStrandDesc (Strand{name, output, ...}) = let
606 :     (* the strand's descriptor object *)
607 :     val descI = let
608 :     fun fnPtr (ty, f) = CL.I_Exp(CL.mkCast(CL.T_Named ty, CL.mkVar f))
609 :     val SOME(outTy, _) = !output
610 :     in
611 :     CL.I_Struct[
612 :     ("name", CL.I_Exp(CL.mkStr name)),
613 :     ("stateSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(N.strandTy name)))),
614 : jhr 1315 (*
615 : jhr 1326 ("outputSzb", CL.I_Exp(CL.mkSizeof(ToC.trTy outTy))),
616 : jhr 1315 *)
617 : jhr 1326 ("update", fnPtr("update_method_t", "0")),
618 : lamonts 1346 ("print", fnPtr("print_method_cl_t", name ^ "_print"))
619 : jhr 1326 ]
620 :     end
621 :     val desc = CL.D_Var([], CL.T_Named N.strandDescTy, N.strandDesc name, SOME descI)
622 :     in
623 :     desc
624 :     end
625 : jhr 1315
626 :     (* generate the table of strand descriptors *)
627 : jhr 1326 fun genStrandTable (declFn, strands) = let
628 :     val nStrands = length strands
629 :     fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.E_Var(N.strandDesc name)))
630 :     fun genInits (_, []) = []
631 :     | genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss)
632 :     in
633 :     declFn (CL.D_Var([], CL.int32, N.numStrands,
634 :     SOME(CL.I_Exp(CL.E_Int(IntInf.fromInt nStrands, CL.int32)))));
635 :     declFn (CL.D_Var([],
636 :     CL.T_Array(CL.T_Ptr(CL.T_Named N.strandDescTy), SOME nStrands),
637 :     N.strands,
638 :     SOME(CL.I_Array(genInits (0, strands)))))
639 :     end
640 : jhr 1315
641 : jhr 1308 fun genSrc (baseName, prog) = let
642 : jhr 1326 val Prog{name,double, globals, topDecls, strands, initially, imgGlobals, numDims, ...} = prog
643 : jhr 1307 val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"}
644 :     val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"}
645 :     val clOutS = TextIO.openOut clFileName
646 :     val cOutS = TextIO.openOut cFileName
647 :     val clppStrm = PrintAsCL.new clOutS
648 :     val cppStrm = PrintAsC.new cOutS
649 : jhr 1321 val progName = name
650 : jhr 1307 fun cppDecl dcl = PrintAsC.output(cppStrm, dcl)
651 :     fun clppDecl dcl = PrintAsCL.output(clppStrm, dcl)
652 :     val strands = AtomTable.listItems strands
653 :     val [strand as Strand{name, tyName, code, init_code, ...}] = strands
654 :     in
655 : jhr 1273 (* Generate the OpenCl file *)
656 :     clppDecl (CL.D_Verbatim([
657 :     if double
658 :     then "#define DIDEROT_DOUBLE_PRECISION"
659 :     else "#define DIDEROT_SINGLE_PRECISION",
660 :     "#define DIDEROT_TARGET_CL",
661 : lamonts 1305 "#include \"Diderot/cl-diderot.h\""
662 : jhr 1273 ]));
663 : jhr 1307 clppDecl (genGlobalStruct (#gpuTy, !globals));
664 :     clppDecl (genStrandTyDef(#gpuTy, strand));
665 : jhr 1326 clppDecl (!init_code);
666 : lamonts 1351 clppDecl (genStrandCopy(strand));
667 : jhr 1273 List.app clppDecl (!code);
668 : lamonts 1328 clppDecl (genKernelFun (strand, !numDims, globals, imgGlobals));
669 :     (* Generate the Host C file *)
670 : jhr 1273 cppDecl (CL.D_Verbatim([
671 :     if double
672 :     then "#define DIDEROT_DOUBLE_PRECISION"
673 :     else "#define DIDEROT_SINGLE_PRECISION",
674 :     "#define DIDEROT_TARGET_CL",
675 :     "#include \"Diderot/diderot.h\""
676 :     ]));
677 : jhr 1326 cppDecl (CL.D_Var(["static"], CL.charPtr, "ProgramName",
678 :     SOME(CL.I_Exp(CL.mkStr progName))));
679 : jhr 1307 cppDecl (genGlobalStruct (#hostTy, !globals));
680 : jhr 1326 cppDecl (CL.D_Var(["static"], CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName, NONE));
681 : jhr 1308 cppDecl (genStrandTyDef (#hostTy, strand));
682 : jhr 1307 cppDecl (genStrandPrint strand);
683 : jhr 1273 List.app cppDecl (List.rev (!topDecls));
684 : jhr 1315 cppDecl (genGlobalBuffersArgs imgGlobals);
685 : jhr 1326 List.app (fn strand => cppDecl (genStrandDesc strand)) strands;
686 :     genStrandTable (cppDecl, strands);
687 : jhr 1307 cppDecl (!initially);
688 :     PrintAsC.close cppStrm;
689 :     PrintAsCL.close clppStrm;
690 :     TextIO.closeOut cOutS;
691 :     TextIO.closeOut clOutS
692 :     end
693 : lamonts 1264
694 : lamonts 1244 (* output the code to a file. The string is the basename of the file, the extension
695 :     * is provided by the target.
696 :     *)
697 : jhr 1307 fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let
698 :     fun condCons (true, x, xs) = x::xs
699 :     | condCons (false, _, xs) = xs
700 :     (* generate the C compiler flags *)
701 :     val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude]
702 :     val cflags = condCons (parallel, #pthread Paths.cflags, cflags)
703 :     val cflags = if debug
704 :     then #debug Paths.cflags :: cflags
705 :     else #ndebug Paths.cflags :: cflags
706 :     val cflags = #base Paths.cflags :: cflags
707 :     (* generate the loader flags *)
708 :     val extraLibs = condCons (parallel, #pthread Paths.extraLibs, [])
709 :     val extraLibs = Paths.teemLinkFlags @ #base Paths.extraLibs :: extraLibs
710 :     val extraLibs = #cl Paths.extraLibs :: extraLibs
711 :     val rtLib = TargetUtil.runtimeName {
712 :     target = TargetUtil.TARGET_CL,
713 :     parallel = parallel, double = double, debug = debug
714 :     }
715 :     val ldOpts = rtLib :: extraLibs
716 :     in
717 : lamonts 1341 genSrc (basename, prog);
718 : jhr 1307 RunCC.compile (basename, cflags);
719 :     RunCC.link (basename, ldOpts)
720 : jhr 1273 end
721 : lamonts 1244
722 : jhr 1273 end
723 : lamonts 1264
724 : lamonts 1244 (* strands *)
725 :     structure Strand =
726 :     struct
727 : jhr 1261 fun define (Prog{strands, ...}, strandId) = let
728 :     val name = Atom.toString strandId
729 :     val strand = Strand{
730 :     name = name,
731 :     tyName = RN.strandTy name,
732 :     state = ref [],
733 :     output = ref NONE,
734 : lamonts 1271 code = ref [],
735 : jhr 1273 init_code = ref (CL.D_Comment(["no init code"]))
736 : jhr 1261 }
737 :     in
738 :     AtomTable.insert strands (strandId, strand);
739 :     strand
740 :     end
741 : lamonts 1244
742 :     (* return the strand with the given name *)
743 : jhr 1261 fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId
744 : lamonts 1244
745 :     (* register the strand-state initialization code. The variables are the strand
746 :     * parameters.
747 :     *)
748 : jhr 1308 fun init (Strand{name, tyName, code, init_code, ...}, params, init) = let
749 : jhr 1261 val fName = RN.strandInit name
750 :     val params =
751 : jhr 1343 globalParam (globPtrTy, RN.globalsVarName) ::
752 :     globalParam (CL.T_Ptr(CL.T_Named tyName), "selfOut") ::
753 : jhr 1273 List.map (fn (ToCL.V(ty, x)) => CL.PARAM([], ty, x)) params
754 : jhr 1261 val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
755 :     in
756 : jhr 1273 init_code := initFn
757 : jhr 1261 end
758 : lamonts 1351
759 : lamonts 1244 (* register a strand method *)
760 : lamonts 1271 fun method (Strand{name, tyName, code,...}, methName, body) = let
761 : lamonts 1351 val fName = concat[name, "_", methName]
762 : jhr 1261 val params = [
763 : jhr 1343 globalParam (CL.T_Ptr(CL.T_Named tyName), "selfIn"),
764 :     globalParam (CL.T_Ptr(CL.T_Named tyName), "selfOut"),
765 :     globalParam (CL.T_Ptr(CL.T_Named (RN.globalsTy)), RN.globalsVarName)
766 : jhr 1261 ]
767 :     val methFn = CL.D_Func([], CL.int32, fName, params, body)
768 :     in
769 : jhr 1273 code := methFn :: !code
770 : jhr 1261 end
771 :    
772 : jhr 1273 fun output (Strand{output, ...}, ty, ToCL.V(_, x)) = output := SOME(ty, x)
773 : lamonts 1244
774 :     end
775 :    
776 :     end
777 :    
778 :     structure CLBackEnd = CodeGenFn(CLTarget)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0