1 : |
jhr |
1677 |
(* cuda-target.sml
|
2 : |
|
|
*
|
3 : |
|
|
* COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
|
4 : |
|
|
* All rights reserved.
|
5 : |
|
|
*)
|
6 : |
|
|
|
7 : |
|
|
structure CUDATarget : TARGET =
|
8 : |
|
|
struct
|
9 : |
|
|
|
10 : |
|
|
structure IL = TreeIL
|
11 : |
|
|
structure V = IL.Var
|
12 : |
|
|
structure Ty = IL.Ty
|
13 : |
|
|
structure CL = CLang
|
14 : |
|
|
structure RN = RuntimeNames
|
15 : |
|
|
structure ToCL = TreeToCUDA
|
16 : |
|
|
structure N = CNames
|
17 : |
|
|
structure P = Paths
|
18 : |
|
|
(*
|
19 : |
|
|
structure HF = CLHeaderFrag
|
20 : |
|
|
structure SF = CLSchedFrag
|
21 : |
|
|
*)
|
22 : |
|
|
|
23 : |
|
|
(* revmap f l == List.rev(List.map f l) *)
|
24 : |
|
|
fun revmap f = let
|
25 : |
|
|
fun rmap ([], l) = l
|
26 : |
|
|
| rmap (x::r, l) = rmap (r, f x :: l)
|
27 : |
|
|
in
|
28 : |
|
|
fn l => rmap (l, [])
|
29 : |
|
|
end
|
30 : |
|
|
|
31 : |
|
|
(* common arithmetic *)
|
32 : |
|
|
fun #+# (a, b) = CL.mkBinOp(a, CL.#+, b)
|
33 : |
|
|
fun #*# (a, b) = CL.mkBinOp(a, CL.#*, b)
|
34 : |
|
|
infix 5 #+#
|
35 : |
|
|
infix 6 #*#
|
36 : |
|
|
|
37 : |
|
|
(* translate TreeIL types to shadow types *)
|
38 : |
|
|
fun shadowTy ty = (case ty
|
39 : |
|
|
of Ty.BoolTy => CL.T_Named "cl_uint"
|
40 : |
|
|
| Ty.IntTy => CL.T_Named(RN.shadowIntTy ())
|
41 : |
|
|
| Ty.TensorTy[] => CL.T_Named(RN.shadowRealTy ())
|
42 : |
|
|
| Ty.TensorTy[n] => CL.T_Named(RN.shadowVecTy n)
|
43 : |
|
|
| Ty.TensorTy[n, m] => CL.T_Named(RN.shadowMatTy(n,m))
|
44 : |
|
|
| Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}) => CL.T_Named(RN.shadowImageTy dim)
|
45 : |
|
|
| _ => raise Fail(concat["TreeToC.trType(", Ty.toString ty, ")"])
|
46 : |
|
|
(* end case *))
|
47 : |
|
|
|
48 : |
|
|
(* FIXME: add comments that more clearly explain the difference between convertToShadow and
|
49 : |
|
|
* convertStrandToShadow
|
50 : |
|
|
*)
|
51 : |
|
|
(* translate TreeIL types to shadow types *)
|
52 : |
|
|
fun convertToShadow (ty, name) = (case ty
|
53 : |
|
|
of Ty.IntTy => CL.mkAssign(
|
54 : |
|
|
CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name),
|
55 : |
|
|
CL.mkIndirect(CL.mkVar RN.globalsVarName, name))
|
56 : |
|
|
| Ty.TensorTy[n]=> CL.mkCall(RN.convertToShadowVec n, [
|
57 : |
|
|
CL.mkUnOp(CL.%&, CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name)),
|
58 : |
|
|
CL.mkIndirect(CL.mkVar RN.globalsVarName, name)
|
59 : |
|
|
])
|
60 : |
|
|
| Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}) => CL.mkCall(RN.shadowImageFunc dim, [
|
61 : |
|
|
CL.mkVar "context",
|
62 : |
|
|
CL.mkUnOp(CL.%&, CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name)),
|
63 : |
|
|
CL.mkIndirect(CL.mkVar RN.globalsVarName, name)
|
64 : |
|
|
])
|
65 : |
|
|
| Ty.TensorTy[n, m] => CL.mkCall(RN.convertToShadowMat(m,n), [
|
66 : |
|
|
CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name),
|
67 : |
|
|
CL.mkIndirect(CL.mkVar RN.globalsVarName, name)
|
68 : |
|
|
])
|
69 : |
|
|
| _ => CL.mkAssign(
|
70 : |
|
|
CL.mkSelect(CL.mkVar RN.shadowGlaobalsName,name),
|
71 : |
|
|
CL.mkIndirect(CL.mkVar RN.globalsVarName, name))
|
72 : |
|
|
(* end case *))
|
73 : |
|
|
|
74 : |
|
|
(* generate code to convert strand TreeIL types to shadow types *)
|
75 : |
|
|
fun convertStrandToShadow (ty, name, selfIn, selfOut) = (case ty
|
76 : |
|
|
of Ty.IntTy => CL.mkAssign(
|
77 : |
|
|
CL.mkIndirect(CL.mkVar selfIn, name),
|
78 : |
|
|
CL.mkIndirect(CL.mkVar selfOut, name))
|
79 : |
|
|
| Ty.TensorTy[n]=> CL.mkCall(RN.convertToShadowVec n, [
|
80 : |
|
|
CL.mkUnOp(CL.%&, CL.mkIndirect(CL.mkVar selfIn, name)),
|
81 : |
|
|
CL.mkIndirect(CL.mkVar selfOut, name)
|
82 : |
|
|
])
|
83 : |
|
|
| Ty.TensorTy[n, m] => CL.mkCall(RN.convertToShadowMat(m,n), [
|
84 : |
|
|
CL.mkUnOp(CL.%&, CL.mkIndirect(CL.mkVar selfIn, name)),
|
85 : |
|
|
CL.mkIndirect(CL.mkVar selfOut, name)
|
86 : |
|
|
])
|
87 : |
|
|
| _ => CL.mkAssign(
|
88 : |
|
|
CL.mkIndirect(CL.mkVar selfIn, name),
|
89 : |
|
|
CL.mkIndirect(CL.mkVar selfOut, name))
|
90 : |
|
|
(* end case *))
|
91 : |
|
|
|
92 : |
|
|
(* helper functions for specifying parameters in various address spaces *)
|
93 : |
|
|
fun clParam (spc, ty, x) = CL.PARAM([spc], ty, x)
|
94 : |
|
|
fun globalParam (ty, x) = CL.PARAM(["__global"], ty, x)
|
95 : |
|
|
fun constantParam (ty, x) = CL.PARAM(["__constant"], ty, x)
|
96 : |
|
|
fun localParam (ty, x) = CL.PARAM(["__local"], ty, x)
|
97 : |
|
|
fun privateParam (ty, x) = CL.PARAM(["__private"], ty, x)
|
98 : |
|
|
|
99 : |
|
|
(* OpenCL global pointer type *)
|
100 : |
|
|
fun globalPtr ty = CL.T_Qual("__global", CL.T_Ptr ty)
|
101 : |
|
|
|
102 : |
|
|
(* lvalue/rvalue state variable *)
|
103 : |
|
|
fun lvalueSV name = CL.mkIndirect(CL.mkVar "selfOut", name)
|
104 : |
|
|
fun rvalueSV name = CL.mkIndirect(CL.mkVar "selfIn", name)
|
105 : |
|
|
|
106 : |
|
|
(* C variable translation *)
|
107 : |
|
|
structure TrCVar =
|
108 : |
|
|
struct
|
109 : |
|
|
type env = CL.typed_var TreeIL.Var.Map.map
|
110 : |
|
|
fun lookup (env, x) = (case V.Map.find (env, x)
|
111 : |
|
|
of SOME(CL.V(_, x')) => x'
|
112 : |
|
|
| NONE => raise Fail(concat["TrCVar.lookup(_, ", V.name x, ")"])
|
113 : |
|
|
(* end case *))
|
114 : |
|
|
(* translate a variable that occurs in an l-value context (i.e., as the target of an assignment) *)
|
115 : |
|
|
fun lvalueVar (env, x) = (case V.kind x
|
116 : |
|
|
of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
|
117 : |
|
|
| IL.VK_Local => CL.mkVar(lookup(env, x))
|
118 : |
|
|
(* end case *))
|
119 : |
|
|
(* translate a variable that occurs in an r-value context *)
|
120 : |
|
|
fun rvalueVar (env, x) = (case V.kind x
|
121 : |
|
|
of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
|
122 : |
|
|
| IL.VK_Local => CL.mkVar(lookup(env, x))
|
123 : |
|
|
(* end case *))
|
124 : |
|
|
(* translate a strand state variable that occurs in an l-value context *)
|
125 : |
|
|
fun lvalueStateVar (IL.SV{name, ...}) = lvalueSV name
|
126 : |
|
|
(* translate a strand state variable that occurs in an r-value context *)
|
127 : |
|
|
fun rvalueStateVar (IL.SV{name, ...}) = rvalueSV name
|
128 : |
|
|
end
|
129 : |
|
|
|
130 : |
|
|
structure ToC = TreeToCFn (TrCVar)
|
131 : |
|
|
|
132 : |
|
|
type var = CL.typed_var
|
133 : |
|
|
type exp = CL.exp
|
134 : |
|
|
type stm = CL.stm
|
135 : |
|
|
|
136 : |
|
|
(* OpenCL specific types *)
|
137 : |
|
|
val clIntTy = CL.T_Named "cl_int"
|
138 : |
|
|
val clProgramTy = CL.T_Named "cl_program"
|
139 : |
|
|
val clKernelTy = CL.T_Named "cl_kernel"
|
140 : |
|
|
val clCmdQueueTy = CL.T_Named "cl_command_queue"
|
141 : |
|
|
val clContextTy = CL.T_Named "cl_context"
|
142 : |
|
|
val clDeviceIdTy = CL.T_Named "cl_device_id"
|
143 : |
|
|
val clPlatformIdTy = CL.T_Named "cl_platform_id"
|
144 : |
|
|
val clMemoryTy = CL.T_Named "cl_mem"
|
145 : |
|
|
val globPtrTy = CL.T_Ptr(CL.T_Named RN.globalsTy)
|
146 : |
|
|
(* FIXME: what are these for? *)
|
147 : |
|
|
datatype shadow_env = STRAND_SHADOW | GLOBAL_SHADOW
|
148 : |
|
|
|
149 : |
|
|
(* variable or field that is mirrored between host and GPU *)
|
150 : |
|
|
type mirror_var = {
|
151 : |
|
|
(* FIXME: perhaps it would be cleaner to just track the TreeIL type of the variable? *)
|
152 : |
|
|
hostTy : CL.ty, (* variable type on Host (i.e., C type) *)
|
153 : |
|
|
shadowTy : CL.ty, (* host-side shadow type of GPU type *)
|
154 : |
|
|
gpuTy : CL.ty, (* variable's type on GPU (i.e., OpenCL type) *)
|
155 : |
|
|
hToS: stm, (* the statement that converts the variable to its *)
|
156 : |
|
|
(* shadow representation *)
|
157 : |
|
|
var : CL.var (* variable name *)
|
158 : |
|
|
}
|
159 : |
|
|
|
160 : |
|
|
datatype strand = Strand of {
|
161 : |
|
|
name : string,
|
162 : |
|
|
tyName : string,
|
163 : |
|
|
state : mirror_var list,
|
164 : |
|
|
output : (Ty.ty * CL.var), (* the strand's output variable (only one for now) *)
|
165 : |
|
|
code : CL.decl list ref,
|
166 : |
|
|
init_code: CL.decl ref
|
167 : |
|
|
}
|
168 : |
|
|
|
169 : |
|
|
datatype program = Prog of {
|
170 : |
|
|
name : string, (* stem of source file *)
|
171 : |
|
|
double : bool, (* true for double-precision support *)
|
172 : |
|
|
parallel : bool, (* true for multithreaded (or multi-GPU) target *)
|
173 : |
|
|
debug : bool, (* true for debug support in executable *)
|
174 : |
|
|
globals : mirror_var list ref,
|
175 : |
|
|
topDecls : CL.decl list ref,
|
176 : |
|
|
strands : strand AtomTable.hash_table,
|
177 : |
|
|
initially : CL.decl ref,
|
178 : |
|
|
numDims: int ref, (* number of dimensions in initially iteration *)
|
179 : |
|
|
imgGlobals: (string * int) list ref,
|
180 : |
|
|
prFn: CL.decl ref,
|
181 : |
|
|
outFn: CL.decl ref
|
182 : |
|
|
}
|
183 : |
|
|
|
184 : |
|
|
datatype env = ENV of {
|
185 : |
|
|
info : env_info,
|
186 : |
|
|
vMap : var V.Map.map,
|
187 : |
|
|
scope : scope
|
188 : |
|
|
}
|
189 : |
|
|
|
190 : |
|
|
and env_info = INFO of {
|
191 : |
|
|
prog : program
|
192 : |
|
|
}
|
193 : |
|
|
|
194 : |
|
|
and scope
|
195 : |
|
|
= NoScope
|
196 : |
|
|
| GlobalScope
|
197 : |
|
|
| InitiallyScope
|
198 : |
|
|
| StrandScope (* strand initialization *)
|
199 : |
|
|
| MethodScope of StrandUtil.method_name (* method body; vars are state variables *)
|
200 : |
|
|
|
201 : |
|
|
(* the supprted widths of vectors of reals on the target. *)
|
202 : |
|
|
(* FIXME: for OpenCL 1.1, 3 is also valid *)
|
203 : |
|
|
fun vectorWidths () = [2, 4, 8, 16]
|
204 : |
|
|
|
205 : |
|
|
(* we do not support printing on the OpenCL target *)
|
206 : |
|
|
val supportsPrinting = false
|
207 : |
|
|
|
208 : |
|
|
(* tests for whether various expression forms can appear inline *)
|
209 : |
|
|
fun inlineCons n = (n < 2) (* vectors are inline, but not matrices *)
|
210 : |
|
|
val inlineMatrixExp = false (* can matrix-valued expressions appear inline? *)
|
211 : |
|
|
|
212 : |
|
|
(* TreeIL to target translations *)
|
213 : |
|
|
structure Tr =
|
214 : |
|
|
struct
|
215 : |
|
|
fun fragment (ENV{info, vMap, scope}, blk) = let
|
216 : |
|
|
val (vMap, stms) = (case scope
|
217 : |
|
|
of GlobalScope => ToC.trFragment (vMap, blk)
|
218 : |
|
|
| InitiallyScope => ToC.trFragment (vMap, blk)
|
219 : |
|
|
| _ => ToCL.trFragment (vMap, blk)
|
220 : |
|
|
(* end case *))
|
221 : |
|
|
in
|
222 : |
|
|
(ENV{info=info, vMap=vMap, scope=scope}, stms)
|
223 : |
|
|
end
|
224 : |
|
|
fun block (ENV{vMap, scope, ...}, blk) = (case scope
|
225 : |
|
|
of StrandScope => ToC.trBlock (vMap, blk)
|
226 : |
|
|
| MethodScope name => ToCL.trBlock (vMap, blk)
|
227 : |
|
|
| InitiallyScope => ToCL.trBlock (vMap, blk)
|
228 : |
|
|
| _ => ToC.trBlock (vMap, blk)
|
229 : |
|
|
(* end case *))
|
230 : |
|
|
fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e)
|
231 : |
|
|
end
|
232 : |
|
|
|
233 : |
|
|
(* variables *)
|
234 : |
|
|
structure Var =
|
235 : |
|
|
struct
|
236 : |
|
|
fun mirror (ty, name, shadowEnv) = {
|
237 : |
|
|
hostTy = ToC.trType ty,
|
238 : |
|
|
shadowTy = shadowTy ty,
|
239 : |
|
|
gpuTy = ToCL.trType ty,
|
240 : |
|
|
hToS = case shadowEnv
|
241 : |
|
|
of GLOBAL_SHADOW => convertToShadow (ty, name)
|
242 : |
|
|
| STRAND_SHADOW => convertStrandToShadow(ty, name, "selfIn", "selfOut")
|
243 : |
|
|
(* end case *),
|
244 : |
|
|
var = name
|
245 : |
|
|
}
|
246 : |
|
|
fun name (ToCL.V(_, name)) = name
|
247 : |
|
|
fun global (Prog{globals, imgGlobals, ...}, name, ty) = let
|
248 : |
|
|
val x = mirror (ty, name, GLOBAL_SHADOW)
|
249 : |
|
|
fun isImgGlobal (Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}), name) =
|
250 : |
|
|
imgGlobals := (name,dim) :: !imgGlobals
|
251 : |
|
|
| isImgGlobal _ = ()
|
252 : |
|
|
in
|
253 : |
|
|
globals := x :: !globals;
|
254 : |
|
|
isImgGlobal (ty, name);
|
255 : |
|
|
ToCL.V(#gpuTy x, name)
|
256 : |
|
|
end
|
257 : |
|
|
fun param x = ToCL.V(ToCL.trType(V.ty x), V.name x)
|
258 : |
|
|
end
|
259 : |
|
|
|
260 : |
|
|
(* environments *)
|
261 : |
|
|
structure Env =
|
262 : |
|
|
struct
|
263 : |
|
|
(* create a new environment *)
|
264 : |
|
|
fun new prog = ENV{
|
265 : |
|
|
info=INFO{prog = prog},
|
266 : |
|
|
vMap = V.Map.empty,
|
267 : |
|
|
scope = NoScope
|
268 : |
|
|
}
|
269 : |
|
|
(* define the current translation context *)
|
270 : |
|
|
fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope}
|
271 : |
|
|
val scopeGlobal = setScope GlobalScope
|
272 : |
|
|
val scopeInitially = setScope InitiallyScope
|
273 : |
|
|
fun scopeStrand env = setScope StrandScope env
|
274 : |
|
|
fun scopeMethod (env, name) = setScope (MethodScope name) env
|
275 : |
|
|
(* bind a TreeIL varaiable to a target variable *)
|
276 : |
|
|
fun bind (ENV{info, vMap, scope}, x, x') = ENV{
|
277 : |
|
|
info = info,
|
278 : |
|
|
vMap = V.Map.insert(vMap, x, x'),
|
279 : |
|
|
scope = scope
|
280 : |
|
|
}
|
281 : |
|
|
end
|
282 : |
|
|
|
283 : |
|
|
(* programs *)
|
284 : |
|
|
structure Program =
|
285 : |
|
|
struct
|
286 : |
|
|
fun new {name, double, parallel, debug} = (
|
287 : |
|
|
RN.initTargetSpec double;
|
288 : |
|
|
CNames.initTargetSpec {double=double, long=false};
|
289 : |
|
|
Prog{
|
290 : |
|
|
name = name,
|
291 : |
|
|
double = double, parallel = parallel, debug = debug,
|
292 : |
|
|
globals = ref [],
|
293 : |
|
|
topDecls = ref [],
|
294 : |
|
|
strands = AtomTable.mkTable (16, Fail "strand table"),
|
295 : |
|
|
initially = ref(CL.D_Comment["missing initially"]),
|
296 : |
|
|
numDims = ref 0,
|
297 : |
|
|
imgGlobals = ref[],
|
298 : |
|
|
prFn = ref(CL.D_Comment(["No Print Function"])),
|
299 : |
|
|
outFn = ref(CL.D_Comment(["No Output Function"]))
|
300 : |
|
|
})
|
301 : |
|
|
|
302 : |
|
|
(* register the code that is used to register command-line options for input variables *)
|
303 : |
|
|
fun inputs (Prog{topDecls, ...}, stm) = let
|
304 : |
|
|
val inputsFn = CL.D_Func(
|
305 : |
|
|
[], CL.voidTy, RN.registerOpts,
|
306 : |
|
|
[CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")],
|
307 : |
|
|
stm)
|
308 : |
|
|
in
|
309 : |
|
|
topDecls := inputsFn :: !topDecls
|
310 : |
|
|
end
|
311 : |
|
|
|
312 : |
|
|
(* register the global initialization part of a program *)
|
313 : |
|
|
fun init (Prog{topDecls, ...}, init) = let
|
314 : |
|
|
val globalsDecl = CL.mkAssign(CL.mkVar RN.globalsVarName,
|
315 : |
|
|
CL.mkApply("malloc", [CL.mkSizeof(CL.T_Named RN.globalsTy)]))
|
316 : |
|
|
val initFn = CL.D_Func(
|
317 : |
|
|
[], CL.voidTy, RN.initGlobals, [],
|
318 : |
|
|
CL.mkBlock[
|
319 : |
|
|
globalsDecl,
|
320 : |
|
|
CL.mkCall(RN.initGlobalsHelper, [CL.mkVar RN.globalsVarName])
|
321 : |
|
|
])
|
322 : |
|
|
val initHelperFn = CL.D_Func(
|
323 : |
|
|
[], CL.voidTy, RN.initGlobalsHelper,
|
324 : |
|
|
[CL.PARAM([], globPtrTy, RN.globalsVarName)],
|
325 : |
|
|
init)
|
326 : |
|
|
val shutdownFn = CL.D_Func(
|
327 : |
|
|
[], CL.voidTy, RN.shutdown,
|
328 : |
|
|
[CL.PARAM([], CL.T_Ptr(CL.T_Named RN.worldTy), "wrld")],
|
329 : |
|
|
CL.S_Block[])
|
330 : |
|
|
in
|
331 : |
|
|
topDecls := shutdownFn :: initFn :: initHelperFn :: !topDecls
|
332 : |
|
|
end
|
333 : |
|
|
|
334 : |
|
|
(* create and register the initially function for a program *)
|
335 : |
|
|
fun initially {
|
336 : |
|
|
prog = Prog{name=progName, strands, initially,numDims, ...},
|
337 : |
|
|
isArray : bool,
|
338 : |
|
|
iterPrefix : stm list,
|
339 : |
|
|
iters : (var * exp * exp) list,
|
340 : |
|
|
createPrefix : stm list,
|
341 : |
|
|
strand : Atom.atom,
|
342 : |
|
|
args : exp list
|
343 : |
|
|
} = let
|
344 : |
|
|
val name = Atom.toString strand
|
345 : |
|
|
val nDims = List.length iters
|
346 : |
|
|
val worldTy = CL.T_Ptr(CL.T_Named N.worldTy)
|
347 : |
|
|
fun mapi f xs = let
|
348 : |
|
|
fun mapf (_, []) = []
|
349 : |
|
|
| mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs)
|
350 : |
|
|
in
|
351 : |
|
|
mapf (0, xs)
|
352 : |
|
|
end
|
353 : |
|
|
val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters
|
354 : |
|
|
val sizeInit = mapi
|
355 : |
|
|
(fn (i, (CL.V(ty, _), lo, hi)) =>
|
356 : |
|
|
(i, CL.I_Exp(CL.mkBinOp(hi, CL.#-, lo) #+# CL.mkIntTy(1, ty)))
|
357 : |
|
|
) iters
|
358 : |
|
|
(* code to allocate the world and initial strands *)
|
359 : |
|
|
val wrld = "wrld"
|
360 : |
|
|
val allocCode = [
|
361 : |
|
|
CL.mkComment["allocate initial block of strands"],
|
362 : |
|
|
CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)),
|
363 : |
|
|
CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)),
|
364 : |
|
|
CL.mkDecl(worldTy, wrld,
|
365 : |
|
|
SOME(CL.I_Exp(CL.mkApply(N.allocInitially, [
|
366 : |
|
|
CL.mkVar "ProgramName",
|
367 : |
|
|
CL.mkUnOp(CL.%&, CL.mkVar(N.strandDesc name)),
|
368 : |
|
|
CL.mkBool isArray,
|
369 : |
|
|
CL.mkIntTy(IntInf.fromInt nDims, CL.int32),
|
370 : |
|
|
CL.mkVar "base",
|
371 : |
|
|
CL.mkVar "size"
|
372 : |
|
|
]))))
|
373 : |
|
|
]
|
374 : |
|
|
(* create the loop nest for the initially iterations *)
|
375 : |
|
|
val indexVar = "ix"
|
376 : |
|
|
val strandTy = CL.T_Ptr(CL.T_Named(N.strandTy name))
|
377 : |
|
|
fun mkLoopNest [] = CL.mkBlock(createPrefix @ [
|
378 : |
|
|
CL.mkDecl(strandTy, "sp",
|
379 : |
|
|
SOME(CL.I_Exp(
|
380 : |
|
|
CL.mkCast(strandTy,
|
381 : |
|
|
CL.mkApply(N.inState, [CL.mkVar "wrld", CL.mkVar indexVar]))))),
|
382 : |
|
|
CL.mkCall(N.strandInit name, CL.mkVar "sp" :: args),
|
383 : |
|
|
CL.mkAssign(CL.mkVar indexVar, CL.mkVar indexVar #+# CL.mkIntTy(1, CL.uint32))
|
384 : |
|
|
])
|
385 : |
|
|
| mkLoopNest ((CL.V(ty, param), lo, hi)::iters) = let
|
386 : |
|
|
val body = mkLoopNest iters
|
387 : |
|
|
in
|
388 : |
|
|
CL.mkFor(
|
389 : |
|
|
[(ty, param, lo)],
|
390 : |
|
|
CL.mkBinOp(CL.mkVar param, CL.#<=, hi),
|
391 : |
|
|
[CL.mkPostOp(CL.mkVar param, CL.^++)],
|
392 : |
|
|
body)
|
393 : |
|
|
end
|
394 : |
|
|
val iterCode = [
|
395 : |
|
|
CL.mkComment["initially"],
|
396 : |
|
|
CL.mkDecl(CL.uint32, indexVar, SOME(CL.I_Exp(CL.mkIntTy(0, CL.uint32)))),
|
397 : |
|
|
mkLoopNest iters
|
398 : |
|
|
]
|
399 : |
|
|
val body = CL.mkBlock(
|
400 : |
|
|
iterPrefix @
|
401 : |
|
|
allocCode @
|
402 : |
|
|
iterCode @
|
403 : |
|
|
[CL.mkReturn(SOME(CL.mkVar "wrld"))])
|
404 : |
|
|
val initFn = CL.D_Func([], worldTy, N.initially, [], body)
|
405 : |
|
|
in
|
406 : |
|
|
numDims := nDims;
|
407 : |
|
|
initially := initFn
|
408 : |
|
|
end
|
409 : |
|
|
|
410 : |
|
|
|
411 : |
|
|
(***** OUTPUT *****)
|
412 : |
|
|
|
413 : |
|
|
fun genStrandPrint (Strand{name, tyName, state, output, code, ...}) = let
|
414 : |
|
|
(* the print function *)
|
415 : |
|
|
val prFnName = concat[name, "Print"]
|
416 : |
|
|
val prFn = let
|
417 : |
|
|
val params = [
|
418 : |
|
|
CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"),
|
419 : |
|
|
CL.PARAM([], CL.T_Ptr(CL.T_Named(RN.strandShadowTy tyName)), "self")
|
420 : |
|
|
]
|
421 : |
|
|
val (ty, x) = output
|
422 : |
|
|
val outState = CL.mkIndirect(CL.mkVar "self", x)
|
423 : |
|
|
val prArgs = (case ty
|
424 : |
|
|
of Ty.IntTy => [CL.mkStr(!N.gIntFormat ^ "\n"), outState]
|
425 : |
|
|
| Ty.SeqTy(Ty.IntTy, d) => let
|
426 : |
|
|
fun sel i = CL.mkApply(
|
427 : |
|
|
"VSUB",
|
428 : |
|
|
[outState, CL.mkInt(IntInf.fromInt i)])
|
429 : |
|
|
val fmt = CL.mkStr(
|
430 : |
|
|
String.concatWith " " (List.tabulate(d, fn _ => !N.gIntFormat))
|
431 : |
|
|
^ "\n")
|
432 : |
|
|
val args = List.tabulate (d, sel)
|
433 : |
|
|
in
|
434 : |
|
|
fmt :: args
|
435 : |
|
|
end
|
436 : |
|
|
| Ty.TensorTy[] => [CL.mkStr "%f\n", outState]
|
437 : |
|
|
| Ty.TensorTy[d] => let
|
438 : |
|
|
fun sel i = CL.mkApply(
|
439 : |
|
|
"VSUB",
|
440 : |
|
|
[outState, CL.mkInt(IntInf.fromInt i)])
|
441 : |
|
|
val fmt = CL.mkStr(
|
442 : |
|
|
String.concatWith " " (List.tabulate(d, fn _ => "%f"))
|
443 : |
|
|
^ "\n")
|
444 : |
|
|
val args = List.tabulate (d, sel)
|
445 : |
|
|
in
|
446 : |
|
|
fmt :: args
|
447 : |
|
|
end
|
448 : |
|
|
| _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty)
|
449 : |
|
|
(* end case *))
|
450 : |
|
|
in
|
451 : |
|
|
CL.D_Func(["static"], CL.voidTy, prFnName, params,
|
452 : |
|
|
CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs))
|
453 : |
|
|
end
|
454 : |
|
|
in
|
455 : |
|
|
prFn
|
456 : |
|
|
end
|
457 : |
|
|
|
458 : |
|
|
fun genStrandTyDef(targetTy,Strand{state,...},tyName) =
|
459 : |
|
|
(case state
|
460 : |
|
|
of [] => CL.D_Comment(["No Strand Defintiion Included"])
|
461 : |
|
|
| _ => CL.D_StructDef(revmap (fn x => (targetTy x, #var x)) state,
|
462 : |
|
|
tyName)
|
463 : |
|
|
(* end case *))
|
464 : |
|
|
|
465 : |
|
|
|
466 : |
|
|
|
467 : |
|
|
(* generates the globals buffers and arguments function *)
|
468 : |
|
|
fun genConvertShadowTypes (Strand{name, tyName, state,...}) = let
|
469 : |
|
|
(* Delcare opencl setup objects *)
|
470 : |
|
|
val errVar = "err"
|
471 : |
|
|
val params = [
|
472 : |
|
|
CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut"),
|
473 : |
|
|
CL.PARAM([], CL.T_Ptr(CL.T_Named(RN.strandShadowTy tyName)), "selfIn")
|
474 : |
|
|
]
|
475 : |
|
|
val body = List.map (fn (x:mirror_var) => #hToS x) state
|
476 : |
|
|
in
|
477 : |
|
|
CL.D_Func([], CL.voidTy, RN.strandConvertName name, params, CL.mkBlock body)
|
478 : |
|
|
end
|
479 : |
|
|
|
480 : |
|
|
(* generates the opencl buffers for the image data *)
|
481 : |
|
|
fun getGlobalDataBuffers (globals, imgGlobals, contextVar, errVar) = let
|
482 : |
|
|
val globalBuffErr = "error creating OpenCL global buffer\n"
|
483 : |
|
|
fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.mkVar errVar, CL.#!=, CL.mkVar "CL_SUCCESS"),
|
484 : |
|
|
CL.mkBlock([CL.mkCall("fprintf",[CL.mkVar "stderr", CL.mkStr msg]),
|
485 : |
|
|
CL.mkCall("exit",[CL.mkInt 1])]))
|
486 : |
|
|
val shadowTypeDecl =
|
487 : |
|
|
CL.mkDecl(CL.T_Named(RN.shadowGlobalsTy), RN.shadowGlaobalsName, NONE)
|
488 : |
|
|
val globalToShadowStms = List.map (fn (x:mirror_var) => #hToS x) globals
|
489 : |
|
|
val globalBufferDecl = CL.mkDecl(clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE)
|
490 : |
|
|
val globalBuffer = CL.mkAssign(CL.mkVar(concat[RN.globalsVarName,"_cl"]),
|
491 : |
|
|
CL.mkApply("clCreateBuffer", [
|
492 : |
|
|
CL.mkVar contextVar,
|
493 : |
|
|
CL.mkBinOp(CL.mkVar "CL_MEM_READ_ONLY", CL.#|, CL.mkVar "CL_MEM_COPY_HOST_PTR"),
|
494 : |
|
|
CL.mkSizeof(CL.T_Named RN.shadowGlobalsTy),
|
495 : |
|
|
CL.mkUnOp(CL.%&,CL.mkVar RN.shadowGlaobalsName),
|
496 : |
|
|
CL.mkUnOp(CL.%&,CL.mkVar errVar)
|
497 : |
|
|
]))
|
498 : |
|
|
fun genDataBuffers ([],_,_,_) = []
|
499 : |
|
|
| genDataBuffers ((var,nDims)::globals, contextVar, errVar,errFn) = let
|
500 : |
|
|
val hostVar = CL.mkIndirect(CL.mkVar RN.globalsVarName, var)
|
501 : |
|
|
val size = CL.mkIndirect(hostVar, "dataSzb")
|
502 : |
|
|
in
|
503 : |
|
|
CL.mkDecl(clMemoryTy, RN.addBufferSuffixData var ,NONE) ::
|
504 : |
|
|
CL.mkAssign(CL.mkVar(RN.addBufferSuffixData var),
|
505 : |
|
|
CL.mkApply("clCreateBuffer", [
|
506 : |
|
|
CL.mkVar contextVar,
|
507 : |
|
|
CL.mkVar "CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR",
|
508 : |
|
|
size,
|
509 : |
|
|
CL.mkIndirect(hostVar, "data"),
|
510 : |
|
|
CL.mkUnOp(CL.%&,CL.mkVar errVar)
|
511 : |
|
|
])) ::
|
512 : |
|
|
errFn(concat["error in creating ",RN.addBufferSuffixData var, " global buffer\n"]) ::
|
513 : |
|
|
genDataBuffers(globals,contextVar,errVar,errFn)
|
514 : |
|
|
end
|
515 : |
|
|
in
|
516 : |
|
|
[shadowTypeDecl] @ globalToShadowStms
|
517 : |
|
|
@ [globalBufferDecl, globalBuffer,errorFn(globalBuffErr)]
|
518 : |
|
|
@ genDataBuffers(imgGlobals,contextVar,errVar,errorFn)
|
519 : |
|
|
end
|
520 : |
|
|
|
521 : |
|
|
(* generates the kernel arguments for the image data *)
|
522 : |
|
|
fun genGlobalArguments (globals, count, kernelVar, errVar) = let
|
523 : |
|
|
val globalArgErr = "error creating OpenCL global argument\n"
|
524 : |
|
|
fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.mkVar errVar, CL.#!=, CL.mkVar "CL_SUCCESS"),
|
525 : |
|
|
CL.mkBlock([CL.mkCall("fprintf",[CL.mkVar "stderr", CL.mkStr msg]),
|
526 : |
|
|
CL.mkCall("exit",[CL.mkInt 1])]))
|
527 : |
|
|
val globalArgument = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.&=,
|
528 : |
|
|
CL.mkApply("clSetKernelArg",
|
529 : |
|
|
[CL.mkVar kernelVar,
|
530 : |
|
|
CL.mkPostOp(CL.mkVar count, CL.^++),
|
531 : |
|
|
CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
|
532 : |
|
|
CL.mkUnOp(CL.%&,CL.mkVar(concat[RN.globalsVarName,"_cl"]))])))
|
533 : |
|
|
fun genDataArguments ([],_,_,_,_) = []
|
534 : |
|
|
| genDataArguments ((var,nDims)::globals,count,kernelVar,errVar,errFn) =
|
535 : |
|
|
CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.$=,
|
536 : |
|
|
CL.mkApply("clSetKernelArg",
|
537 : |
|
|
[CL.mkVar kernelVar,
|
538 : |
|
|
CL.mkPostOp(CL.mkVar count, CL.^++),
|
539 : |
|
|
CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
|
540 : |
|
|
CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffixData var))]))) ::
|
541 : |
|
|
errFn(concat["error in creating ",RN.addBufferSuffixData var, " argument\n"]) ::
|
542 : |
|
|
genDataArguments (globals,count,kernelVar,errVar,errFn)
|
543 : |
|
|
in
|
544 : |
|
|
globalArgument :: errorFn globalArgErr ::
|
545 : |
|
|
genDataArguments(globals, count, kernelVar, errVar,errorFn)
|
546 : |
|
|
end
|
547 : |
|
|
|
548 : |
|
|
(* generates the globals buffers and arguments function *)
|
549 : |
|
|
fun genGlobalBuffersArgs (globals,imgGlobals) = let
|
550 : |
|
|
(* Delcare opencl setup objects *)
|
551 : |
|
|
val errVar = "err"
|
552 : |
|
|
val params = [
|
553 : |
|
|
CL.PARAM([],CL.T_Named("cl_context"), "context"),
|
554 : |
|
|
CL.PARAM([],CL.T_Named("cl_kernel"), "kernel"),
|
555 : |
|
|
CL.PARAM([],CL.T_Named("cl_command_queue"), "cmdQ"),
|
556 : |
|
|
CL.PARAM([],CL.T_Named("int"), "argStart")
|
557 : |
|
|
]
|
558 : |
|
|
val body = (case globals
|
559 : |
|
|
of [] => [CL.mkReturn(NONE)]
|
560 : |
|
|
| _ => let
|
561 : |
|
|
val clGlobalBuffers =
|
562 : |
|
|
getGlobalDataBuffers(globals, !imgGlobals, "context", errVar)
|
563 : |
|
|
val clGlobalArguments =
|
564 : |
|
|
genGlobalArguments(!imgGlobals, "argStart", "kernel", errVar)
|
565 : |
|
|
in
|
566 : |
|
|
(* Body: put all the statments together *)
|
567 : |
|
|
CL.mkDecl(clIntTy, errVar, SOME(CL.I_Exp(CL.mkInt 0)))
|
568 : |
|
|
:: clGlobalBuffers @ clGlobalArguments
|
569 : |
|
|
end
|
570 : |
|
|
(*end of case*))
|
571 : |
|
|
in
|
572 : |
|
|
CL.D_Func([],CL.voidTy,RN.globalsSetupName,params,CL.mkBlock(body))
|
573 : |
|
|
end
|
574 : |
|
|
|
575 : |
|
|
(* generate the global image meta-data and data parameters *)
|
576 : |
|
|
fun genKeneralGlobalParams ((name,tyname)::[],line) =
|
577 : |
|
|
concat[line, "__global void *", RN.addBufferSuffixData name]
|
578 : |
|
|
| genKeneralGlobalParams ([],line) = line
|
579 : |
|
|
| genKeneralGlobalParams ((name,tyname)::rest, line) =
|
580 : |
|
|
genKeneralGlobalParams(rest, concat[line, "__global void *", RN.addBufferSuffixData name, ",\n"])
|
581 : |
|
|
|
582 : |
|
|
fun genUpdateMethod (Strand{name, tyName, state,...}, globals, imgGlobals) = let
|
583 : |
|
|
val imageDataStms = List.map
|
584 : |
|
|
(fn (x,_) => concat[
|
585 : |
|
|
RN.globalImageDataName, ".", RN.imageDataName x, " = ",
|
586 : |
|
|
RN.addBufferSuffixData x, ";","\n"
|
587 : |
|
|
])
|
588 : |
|
|
(!imgGlobals)
|
589 : |
|
|
fun select ([], a, _) = a
|
590 : |
|
|
| select (_, _, b) = b
|
591 : |
|
|
val placeHolders = [
|
592 : |
|
|
(RN.place_holders, tyName),
|
593 : |
|
|
(RN.p_addDatPtr, select (!imgGlobals, "", ",")),
|
594 : |
|
|
(RN.p_addGlobals, select (!globals, "", ",")),
|
595 : |
|
|
(RN.p_globals, select (!globals, "", "__global Diderot_Globals_t *diderotGlobals")),
|
596 : |
|
|
(RN.p_globalVar, select (!globals, "0", RN.globalsVarName)),
|
597 : |
|
|
(RN.p_dataVar, select (!globals, "0", RN.globalImageDataName)),
|
598 : |
|
|
(RN.p_dataPtr, genKeneralGlobalParams (!imgGlobals, "")),
|
599 : |
|
|
(RN.p_dataAssign, select (!imgGlobals, "",
|
600 : |
|
|
String.concat("Diderot_data_ptr_t diderotDataPtrs;\n" :: imageDataStms)))
|
601 : |
|
|
]
|
602 : |
|
|
in
|
603 : |
|
|
CL.verbatim [CLUpdateFrag.text] placeHolders
|
604 : |
|
|
end
|
605 : |
|
|
|
606 : |
|
|
fun genStrandCopy(Strand{tyName,name,state,...}) = let
|
607 : |
|
|
val params = [
|
608 : |
|
|
CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
|
609 : |
|
|
CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut")
|
610 : |
|
|
]
|
611 : |
|
|
val assignStms = List.rev(
|
612 : |
|
|
List.map
|
613 : |
|
|
(fn x => CL.mkAssign(lvalueSV(#var x), rvalueSV(#var x)))
|
614 : |
|
|
state)
|
615 : |
|
|
in
|
616 : |
|
|
CL.D_Func([""], CL.voidTy, RN.strandCopy, params,CL.mkBlock(assignStms))
|
617 : |
|
|
end
|
618 : |
|
|
|
619 : |
|
|
(* generate a global structure type definition from the list of globals *)
|
620 : |
|
|
fun genGlobalStruct (_, [], _) = CL.D_Comment(["No Global Definition"])
|
621 : |
|
|
| genGlobalStruct (targetTy, globals, tyName) = let
|
622 : |
|
|
val globs = List.map (fn (x : mirror_var) => (targetTy x, #var x)) globals
|
623 : |
|
|
in
|
624 : |
|
|
CL.D_StructDef(globs, tyName)
|
625 : |
|
|
end
|
626 : |
|
|
|
627 : |
|
|
(* generate a global structure type definition from the image data of the image globals *)
|
628 : |
|
|
fun genImageDataStruct ([], _) = CL.D_Comment(["No Image Data Ptrs Definition"])
|
629 : |
|
|
| genImageDataStruct (imgGlobals, tyName) = let
|
630 : |
|
|
val globs = List.map
|
631 : |
|
|
(fn (x, _) => (globalPtr CL.voidTy, RN.imageDataName x))
|
632 : |
|
|
imgGlobals
|
633 : |
|
|
in
|
634 : |
|
|
CL.D_StructDef(globs, tyName)
|
635 : |
|
|
end
|
636 : |
|
|
|
637 : |
|
|
fun genGlobals (declFn, targetTy, globals) = let
|
638 : |
|
|
fun doVar (x : mirror_var) = declFn (CL.D_Var([], targetTy x, #var x, NONE))
|
639 : |
|
|
in
|
640 : |
|
|
List.app doVar globals
|
641 : |
|
|
end
|
642 : |
|
|
|
643 : |
|
|
fun genOutputFun(Strand{name, output,tyName, state, code,...}) = let
|
644 : |
|
|
(* the output function *)
|
645 : |
|
|
val outFnName = concat[name, "_Output"]
|
646 : |
|
|
val outFun = let
|
647 : |
|
|
val params = [
|
648 : |
|
|
CL.PARAM([], CL.T_Ptr CL.voidTy, "outS"),
|
649 : |
|
|
CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")
|
650 : |
|
|
]
|
651 : |
|
|
(* the type and access expression for the strand's output variable *)
|
652 : |
|
|
val (outTy, outState) = (#1 output, CL.mkIndirect(CL.mkVar "self", #2 output))
|
653 : |
|
|
val outState = CL.mkUnOp(CL.%&, outState)
|
654 : |
|
|
in
|
655 : |
|
|
CL.D_Func(["static"], CL.voidTy, outFnName, params,
|
656 : |
|
|
CL.mkCall("memcpy", [CL.mkVar "outS", outState, CL.mkSizeof(shadowTy outTy)] ))
|
657 : |
|
|
end
|
658 : |
|
|
in
|
659 : |
|
|
outFun
|
660 : |
|
|
end
|
661 : |
|
|
|
662 : |
|
|
fun genStrandDesc (outFn,Strand{name, output,tyName, state, code,...}) = let
|
663 : |
|
|
(* the output function *)
|
664 : |
|
|
val outFnName = concat[name, "_Output"]
|
665 : |
|
|
(* the strand's descriptor object *)
|
666 : |
|
|
val descI = let
|
667 : |
|
|
fun fnPtr (ty, f) = CL.I_Exp(CL.mkCast(CL.T_Named ty, CL.mkVar f))
|
668 : |
|
|
val (outTy, _) = output
|
669 : |
|
|
in
|
670 : |
|
|
CL.I_Struct[
|
671 : |
|
|
("name", CL.I_Exp(CL.mkStr name)),
|
672 : |
|
|
("stateSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(RN.strandTy name)))),
|
673 : |
|
|
("shadowStrandSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(RN.strandShadowTy (RN.strandTy name))))),
|
674 : |
|
|
(* FIXME: we may need to add a shadowOutputSzb field too for OpenCL *)
|
675 : |
|
|
("outputSzb", CL.I_Exp(CL.mkSizeof(shadowTy outTy))),
|
676 : |
|
|
("nrrdType", CL.I_Exp(CL.mkInt (NrrdTypes.toNrrdType outTy))),
|
677 : |
|
|
("nrrdSzb", CL.I_Exp(CL.mkInt (NrrdTypes.toNrrdSize outTy))),
|
678 : |
|
|
("update", fnPtr("update_method_t", "0")),
|
679 : |
|
|
("strandCopy", fnPtr("convert_method_t", RN.strandConvertName name)),
|
680 : |
|
|
("print", fnPtr("print_method_t", RN.strandPrintName name)),
|
681 : |
|
|
("output", fnPtr("output_method_t", outFnName)) (* FIXME *)
|
682 : |
|
|
]
|
683 : |
|
|
end
|
684 : |
|
|
val desc = CL.D_Var([], CL.T_Named N.strandDescTy, N.strandDesc name, SOME descI)
|
685 : |
|
|
in
|
686 : |
|
|
desc
|
687 : |
|
|
end
|
688 : |
|
|
|
689 : |
|
|
(* generate the table of strand descriptors *)
|
690 : |
|
|
fun genStrandTable (declFn, strands) = let
|
691 : |
|
|
val nStrands = length strands
|
692 : |
|
|
fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.mkVar(N.strandDesc name)))
|
693 : |
|
|
fun genInits (_, []) = []
|
694 : |
|
|
| genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss)
|
695 : |
|
|
in
|
696 : |
|
|
declFn (CL.D_Var([], CL.int32, N.numStrands,
|
697 : |
|
|
SOME(CL.I_Exp(CL.mkIntTy(IntInf.fromInt nStrands, CL.int32)))));
|
698 : |
|
|
declFn (CL.D_Var([],
|
699 : |
|
|
CL.T_Array(CL.T_Ptr(CL.T_Named N.strandDescTy), SOME nStrands),
|
700 : |
|
|
N.strands,
|
701 : |
|
|
SOME(CL.I_Array(genInits (0, strands)))))
|
702 : |
|
|
end
|
703 : |
|
|
|
704 : |
|
|
fun genSrc (baseName, prog) = let
|
705 : |
|
|
val Prog{
|
706 : |
|
|
name, double, globals, topDecls, strands, initially,
|
707 : |
|
|
imgGlobals, numDims,outFn, ...
|
708 : |
|
|
} = prog
|
709 : |
|
|
val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"}
|
710 : |
|
|
val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"}
|
711 : |
|
|
val clOutS = TextIO.openOut clFileName
|
712 : |
|
|
val cOutS = TextIO.openOut cFileName
|
713 : |
|
|
val clppStrm = PrintAsCL.new clOutS
|
714 : |
|
|
val cppStrm = PrintAsC.new cOutS
|
715 : |
|
|
val progName = name
|
716 : |
|
|
fun cppDecl dcl = PrintAsC.output(cppStrm, dcl)
|
717 : |
|
|
fun clppDecl dcl = PrintAsCL.output(clppStrm, dcl)
|
718 : |
|
|
val strands = AtomTable.listItems strands
|
719 : |
|
|
val [strand as Strand{name, tyName, code, init_code, ...}] = strands
|
720 : |
|
|
in
|
721 : |
|
|
(* Generate the OpenCL file *)
|
722 : |
|
|
(* Retrieve the header information *)
|
723 : |
|
|
clppDecl (CL.verbatim [HF.text] [
|
724 : |
|
|
("OUTFILE", clFileName),
|
725 : |
|
|
("SRCFILE", OS.Path.joinBaseExt{base=baseName, ext=SOME "diderot"}),
|
726 : |
|
|
("PRECISION", if double then "DOUBLE" else "SINGLE")
|
727 : |
|
|
]);
|
728 : |
|
|
(* if there are no globals, then define a dummy type *)
|
729 : |
|
|
if List.null(!globals)
|
730 : |
|
|
then clppDecl (CL.D_Verbatim["typedef void ", RN.globalsTy, ";\n"])
|
731 : |
|
|
else ();
|
732 : |
|
|
(* if there are no images, then define a dummy type *)
|
733 : |
|
|
if List.null(!imgGlobals)
|
734 : |
|
|
then clppDecl (CL.D_Verbatim["typedef void * ", RN.imageDataType, ";\n"])
|
735 : |
|
|
else ();
|
736 : |
|
|
(* Retrieve the scheduler kernels and functions *)
|
737 : |
|
|
clppDecl (CL.D_Verbatim[SF.text]);
|
738 : |
|
|
clppDecl (CL.D_Verbatim[CLEigen2x2Frag.text]);
|
739 : |
|
|
clppDecl (CL.D_Verbatim[CLEigen3x3Frag.text]);
|
740 : |
|
|
clppDecl (genGlobalStruct (#gpuTy, !globals, RN.globalsTy));
|
741 : |
|
|
clppDecl (genImageDataStruct(!imgGlobals, RN.imageDataType));
|
742 : |
|
|
clppDecl (genStrandTyDef(#gpuTy, strand, tyName));
|
743 : |
|
|
List.app clppDecl (!code);
|
744 : |
|
|
clppDecl (genStrandCopy strand);
|
745 : |
|
|
clppDecl (genUpdateMethod(strand, globals, imgGlobals));
|
746 : |
|
|
(* Generate the Host C file *)
|
747 : |
|
|
cppDecl (CL.D_Verbatim[
|
748 : |
|
|
if double
|
749 : |
|
|
then "#define DIDEROT_DOUBLE_PRECISION\n"
|
750 : |
|
|
else "#define DIDEROT_SINGLE_PRECISION\n",
|
751 : |
|
|
"#define DIDEROT_INT\n",
|
752 : |
|
|
"#define DIDEROT_TARGET_CL\n",
|
753 : |
|
|
"#include \"Diderot/diderot.h\"\n"
|
754 : |
|
|
]);
|
755 : |
|
|
cppDecl (CL.D_Verbatim[
|
756 : |
|
|
(case !globals
|
757 : |
|
|
of [] => concat["typedef void ", RN.globalsTy,";\n"]
|
758 : |
|
|
| _ => ""
|
759 : |
|
|
(*end of case*))
|
760 : |
|
|
]);
|
761 : |
|
|
cppDecl (CL.D_Var(["static"], CL.charPtr, "ProgramName",
|
762 : |
|
|
SOME(CL.I_Exp(CL.mkStr progName))));
|
763 : |
|
|
cppDecl (genGlobalStruct (#hostTy, !globals, RN.globalsTy));
|
764 : |
|
|
cppDecl (genGlobalStruct (#shadowTy, !globals, RN.shadowGlobalsTy));
|
765 : |
|
|
(* FIXME: does this really need to be a global? *)
|
766 : |
|
|
cppDecl (CL.D_Var(["static"], globPtrTy, RN.globalsVarName, NONE));
|
767 : |
|
|
cppDecl (genStrandTyDef (#hostTy, strand, tyName));
|
768 : |
|
|
cppDecl (genStrandTyDef (#shadowTy, strand, RN.strandShadowTy tyName));
|
769 : |
|
|
cppDecl (genConvertShadowTypes strand);
|
770 : |
|
|
cppDecl (!init_code);
|
771 : |
|
|
cppDecl (genStrandPrint strand);
|
772 : |
|
|
cppDecl (genOutputFun strand);
|
773 : |
|
|
List.app cppDecl (List.rev (!topDecls));
|
774 : |
|
|
cppDecl (genGlobalBuffersArgs (!globals,imgGlobals));
|
775 : |
|
|
List.app (fn strand => cppDecl (genStrandDesc (outFn,strand))) strands;
|
776 : |
|
|
genStrandTable (cppDecl, strands);
|
777 : |
|
|
cppDecl (!initially);
|
778 : |
|
|
PrintAsC.close cppStrm;
|
779 : |
|
|
PrintAsCL.close clppStrm;
|
780 : |
|
|
TextIO.closeOut cOutS;
|
781 : |
|
|
TextIO.closeOut clOutS
|
782 : |
|
|
end
|
783 : |
|
|
|
784 : |
|
|
(* output the code to the filesystem. The string is the basename of the source file *)
|
785 : |
|
|
fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let
|
786 : |
|
|
fun condCons (true, x, xs) = x::xs
|
787 : |
|
|
| condCons (false, _, xs) = xs
|
788 : |
|
|
(* generate the C compiler flags *)
|
789 : |
|
|
val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude]
|
790 : |
|
|
val cflags = condCons (parallel, #pthread Paths.cflags, cflags)
|
791 : |
|
|
val cflags = if debug
|
792 : |
|
|
then #debug Paths.cflags :: cflags
|
793 : |
|
|
else #ndebug Paths.cflags :: cflags
|
794 : |
|
|
val cflags = #base Paths.cflags :: cflags
|
795 : |
|
|
(* generate the loader flags *)
|
796 : |
|
|
val extraLibs = condCons (parallel, #pthread Paths.extraLibs, [])
|
797 : |
|
|
val extraLibs = Paths.teemLinkFlags @ #base Paths.extraLibs :: extraLibs
|
798 : |
|
|
val extraLibs = #cl Paths.extraLibs :: extraLibs
|
799 : |
|
|
val rtLib = TargetUtil.runtimeName {
|
800 : |
|
|
target = TargetUtil.TARGET_CL,
|
801 : |
|
|
parallel = parallel, double = double, debug = debug
|
802 : |
|
|
}
|
803 : |
|
|
val ldOpts = rtLib :: extraLibs
|
804 : |
|
|
in
|
805 : |
|
|
genSrc (basename, prog);
|
806 : |
|
|
RunCC.compile (basename, cflags);
|
807 : |
|
|
RunCC.link (basename, ldOpts)
|
808 : |
|
|
end
|
809 : |
|
|
|
810 : |
|
|
end (* Program *)
|
811 : |
|
|
|
812 : |
|
|
(* strands *)
|
813 : |
|
|
structure Strand =
|
814 : |
|
|
struct
|
815 : |
|
|
|
816 : |
|
|
fun define (Prog{strands, ...}, strandId, state) = let
|
817 : |
|
|
val name = Atom.toString strandId
|
818 : |
|
|
(* the output state variable *)
|
819 : |
|
|
val outputVar = (case List.filter IL.StateVar.isOutput state
|
820 : |
|
|
of [] => raise Fail("no output specified for strand " ^ name)
|
821 : |
|
|
| [x] => (IL.StateVar.ty x, IL.StateVar.name x)
|
822 : |
|
|
| _ => raise Fail("multiple outputs in " ^ name)
|
823 : |
|
|
(* end case *))
|
824 : |
|
|
(* the state variables *)
|
825 : |
|
|
val state = let
|
826 : |
|
|
fun cvt x = Var.mirror (IL.StateVar.ty x, IL.StateVar.name x, STRAND_SHADOW)
|
827 : |
|
|
in
|
828 : |
|
|
List.map cvt state
|
829 : |
|
|
end
|
830 : |
|
|
val strand = Strand{
|
831 : |
|
|
name = name,
|
832 : |
|
|
tyName = RN.strandTy name,
|
833 : |
|
|
state = state,
|
834 : |
|
|
output = outputVar,
|
835 : |
|
|
code = ref [],
|
836 : |
|
|
init_code = ref (CL.D_Comment(["no init code"]))
|
837 : |
|
|
}
|
838 : |
|
|
in
|
839 : |
|
|
AtomTable.insert strands (strandId, strand);
|
840 : |
|
|
strand
|
841 : |
|
|
end
|
842 : |
|
|
|
843 : |
|
|
(* return the strand with the given name *)
|
844 : |
|
|
fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId
|
845 : |
|
|
|
846 : |
|
|
(* register the strand-state initialization code. The variables are the strand
|
847 : |
|
|
* parameters.
|
848 : |
|
|
*)
|
849 : |
|
|
fun init (Strand{name, tyName, code, init_code, ...}, params, init) = let
|
850 : |
|
|
val fName = RN.strandInit name
|
851 : |
|
|
val params =
|
852 : |
|
|
clParam ("",CL.T_Ptr(CL.T_Named tyName), "selfOut") ::
|
853 : |
|
|
List.map (fn (ToCL.V(ty, x)) => CL.PARAM([], ty, x)) params
|
854 : |
|
|
val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
|
855 : |
|
|
in
|
856 : |
|
|
init_code := initFn
|
857 : |
|
|
end
|
858 : |
|
|
|
859 : |
|
|
(* register a strand method *)
|
860 : |
|
|
fun method (Strand{name, tyName, code,...}, methName, body) = let
|
861 : |
|
|
val params = [
|
862 : |
|
|
globalParam (CL.T_Ptr(CL.T_Named tyName), "selfIn"),
|
863 : |
|
|
globalParam (CL.T_Ptr(CL.T_Named tyName), "selfOut"),
|
864 : |
|
|
globalParam (CL.T_Ptr(CL.T_Named (RN.globalsTy)), RN.globalsVarName),
|
865 : |
|
|
CL.PARAM([],CL.T_Named(RN.imageDataType),RN.globalImageDataName)
|
866 : |
|
|
]
|
867 : |
|
|
val (fName,resTy) = (case methName
|
868 : |
|
|
of StrandUtil.Update => (RN.strandUpdate,CL.T_Named "StrandStatus_t")
|
869 : |
|
|
| StrandUtil.Stabilize => (name ^ StrandUtil.nameToString methName, CL.voidTy)
|
870 : |
|
|
(* end case *))
|
871 : |
|
|
val methFn = CL.D_Func([], resTy, fName, params, body)
|
872 : |
|
|
in
|
873 : |
|
|
code := methFn :: !code
|
874 : |
|
|
end
|
875 : |
|
|
|
876 : |
|
|
end
|
877 : |
|
|
|
878 : |
|
|
end
|
879 : |
|
|
|
880 : |
|
|
structure CUDABackEnd = CodeGenFn(CUDATarget)
|