Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /branches/vis12-cl/src/compiler/cl-target/gen-output.sml
ViewVC logotype

Annotation of /branches/vis12-cl/src/compiler/cl-target/gen-output.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3185 - (view) (download)

1 : jhr 2648 (* gen-output.sml
2 :     *
3 :     * COPYRIGHT (c) 2014 The Diderot Project (http://diderot-language.cs.uchicago.edu)
4 :     * All rights reserved.
5 :     *
6 :     * Generate strand output functions for the OpenCL target. The output formats always have
7 :     * a single axis for the data elements followed by one, or more, axes for the output structure.
8 :     * There are two cases that we handle:
9 :     *
10 :     * grid, fixed-size elements:
11 :     * nrrd has object axis followed by grid axes
12 :     *
13 :     * collection, fixed-size elements
14 :     * nrrd has object axis followed by a single axis
15 :     *
16 :     * NOTE: the C target also supports dynamic-sized elements (i.e., dynamic sequences), but the
17 :     * OpenCL target does not support these yet.
18 :     *
19 :     * The object axis kind depends on the output type, but it will either be one of the tensor types
20 :     * that Teem knows about or else nrrdKindList. In any case, the data elements are written as a
21 :     * flat vector following the in-memory layout. The other axes in the file will have nrrdKindSpace
22 :     * as their kind.
23 :     *
24 :     * TODO: some of this code is common with c-target/gen-output.sml (e.g., writing outputs to
25 :     * files), so we should refactor it.
26 :     *
27 :     * TODO: for sequences of tensors (e.g., tensor[3]{2}), we should use a separate axis for the
28 :     * sequence dimension with kind nrrdKindList.
29 :     *)
30 :    
31 :     structure GenOutput : sig
32 :    
33 :     (* gen (props, nAxes) outputs
34 :     * returns code for getting the output/snapshot nrrds from the program state.
35 :     * The arguments are:
36 :     * props - the target information
37 :     * nAxes - the number of axes in the grid of strands (NONE for a collection)
38 :     * outputs - the list of output state variables paired with their TreeIL types
39 :     * The return value is a record {kernels, getFns}, where
40 :     * kernels - list of OpenCL kernels used to get output variables
41 :     * getFns - list of function declarations that implement the public
42 :     * output and snapshot queries.
43 :     *)
44 : jhr 2699 val gen : Properties.props * int option -> (TreeIL.Ty.ty * string) list -> CLang.decl list
45 : jhr 2648
46 : jhr 2762 val genKernels : Properties.props * CLang.ty * int option
47 :     -> (TreeIL.Ty.ty * string) list
48 :     -> (string * CLang.decl) list
49 : jhr 2699
50 : jhr 2648 end = struct
51 :    
52 :     structure IL = TreeIL
53 :     structure V = IL.Var
54 :     structure Ty = IL.Ty
55 :     structure CL = CLang
56 :     structure Nrrd = NrrdEnums
57 :     structure U = CLUtil
58 : jhr 3095 structure CN = CNames
59 : jhr 2648
60 :     fun mapi f l = let
61 :     fun mapf (i, [], l) = List.rev l
62 :     | mapf (i, x::xs, l) = mapf (i+1, xs, f(i, x)::l)
63 :     in
64 :     mapf (0, l, [])
65 :     end
66 :    
67 :     val nrrdPtrTy = CL.T_Ptr(CL.T_Named "Nrrd")
68 :     val sizeTy = CL.T_Named "size_t"
69 :     fun mkInt i = CL.mkInt(IntInf.fromInt i)
70 : jhr 2694
71 : jhr 2648 (* variables in the generated code *)
72 :     val wrldV = CL.mkVar "wrld"
73 :     val sizesV = CL.mkVar "sizes"
74 : jhr 2712 val nDataV = CL.mkVar "nData"
75 : jhr 2648
76 : jhr 2712 (* utility functions for initializing the sizes array *)
77 :     fun sizes i = CL.mkSubscript(sizesV, mkInt i)
78 :     fun setSizes (i, v) = CL.mkAssign(sizes i, v)
79 :    
80 : jhr 3137 local
81 :     fun get_group_id i = CL.mkApply("get_group_id", [CL.mkInt i])
82 :     fun get_num_groups i = CL.mkApply("get_num_groups", [CL.mkInt i])
83 :     fun get_local_id i = CL.mkApply("get_local_id", [CL.mkInt i])
84 :     in
85 :     (* the expression to compute the base strand index from the group and local ids.
86 :     * This calculation is based on a 2D arrangement, where the first dimension is
87 :     * equal to the number of CUs times the CU width, while the second dimension is
88 :     * equal to the number of workers/CU.
89 :     *
90 :     * int idx = BLK_SZ * (get_group_id(0) + get_num_groups(0)*get_group_id(1)) + get_local_id(0)
91 :     *)
92 :     val indexStm = CL.mkDeclInit(
93 :     CL.uint32, "idx",
94 :     CL.mkBinOp(
95 :     CL.mkBinOp(
96 :     CL.mkVar "BLK_SZ",
97 :     CL.#*,
98 :     CL.mkBinOp(
99 :     get_group_id 0,
100 :     CL.#+,
101 :     CL.mkBinOp(get_num_groups 0, CL.#*, get_group_id 1))),
102 :     CL.#+,
103 :     get_local_id 0))
104 :     (* the expression to compute the strand offset from the group and local ids.
105 :     *
106 :     * int offset = BLK_SZ * get_num_groups(0) * get_num_groups(1)
107 :     *)
108 :     val offsetStm = CL.mkDeclInit(
109 :     CL.uint32, "offset",
110 :     CL.mkBinOp(get_num_groups 0, CL.#*, CL.mkBinOp(get_num_groups 1, CL.#*, CL.mkVar "BLK_SZ")))
111 :     end (* local *)
112 :    
113 : jhr 2648 (* create a kernel for copying the given output state variable to the output
114 :     * buffer.
115 :     *)
116 :     (* NOTES: if the output is a grid, then we want to use the grid indices as a guide for processing
117 :     * the output. Otherwise, the order does not matter, but we do need to worry about synchronizing
118 :     * writes to the output buffer.
119 :     *)
120 : jhr 2762 fun mkCopyKernel tgt strandTy (ty : TreeIL.Ty.ty, name) = let
121 :     val (ty', nElems) = CLTyTranslate.toOutputType ty
122 :     val body = CL.mkBlock[
123 : jhr 3137 indexStm,
124 : jhr 2762 CL.mkDeclInit(CL.uint32, "offset",
125 :     CL.mkBinOp(CL.mkApply("get_num_groups", [CL.mkInt 0]), CL.#*, CL.mkVar "BLK_SZ")),
126 :     CL.S_Decl(["__global"], strandTy, "state", SOME(CL.I_Exp(CL.mkIndirect(CL.mkVar "sched", "state")))),
127 :     CL.mkWhile (CL.mkBinOp(CL.mkVar "idx", CL.#<, CL.mkVar "nStrands"),
128 :     CL.mkBlock(
129 :     CL.S_Decl(["__global"], CL.T_Ptr ty', "dst",
130 :     SOME(CL.I_Exp(CL.mkBinOp(CL.mkVar "outBuf", CL.#+,
131 : jhr 2770 CL.mkBinOp(CL.mkInt(IntInf.fromInt nElems), CL.#*, CL.mkVar "idx"))))) ::
132 : jhr 2762 CLTyTranslate.copyToOutput{
133 :     ty = ty,
134 : jhr 3119 dst = CL.mkVar "dst",
135 : jhr 3116 src = CL.mkSelect(CL.mkSubscript(CL.mkVar "state", CL.mkVar "idx"), "sv_" ^ name)
136 : jhr 2762 } @
137 :     [CL.mkAssign' (CL.mkVar "idx", CL.+=, CL.mkVar "offset")]))
138 :     ]
139 : jhr 3125 val kName = OCLNames.getOutputKern name
140 : jhr 2762 val kern = U.mkKernel(
141 :     kName,
142 : jhr 3095 [U.globalParam(CN.schedPtrTy tgt, "sched"), U.globalParam(CL.T_Ptr ty', "outBuf"), U.clParam(CL.uint32, "nStrands")],
143 : jhr 2762 body)
144 : jhr 2648 in
145 : jhr 2762 (kName, kern)
146 : jhr 2648 end
147 :    
148 : jhr 2694 (* create the body of an output function for fixed-size outputs. The structure of the
149 :     * function body is:
150 :     *
151 :     * declare and compute sizes array
152 : jhr 2712 * allocate GPU data object
153 :     * invoke kernel to copy data from strand state into data buffer
154 : jhr 2694 * allocate nrrd nData
155 : jhr 2712 * copy data from GPU to nrrd
156 : jhr 2694 *)
157 : jhr 2712 fun genFixedOutput (tgt, snapshot, nAxes, ty, name) = let
158 :     val (elemCTy, nrrdType, axisKind, nElems) = OutputUtil.infoOf (tgt, ty)
159 : jhr 2773 val (nAxes, domAxisKind) = (case nAxes
160 :     of NONE => (1, Nrrd.KindList)
161 :     | SOME n => (n, Nrrd.KindSpace)
162 : jhr 2712 (* end case *))
163 :     val nDataAxes = if (axisKind = Nrrd.KindScalar) then 0 else 1
164 : jhr 2773 val sizesDim = nAxes + nDataAxes
165 : jhr 2712 (* generate the sizes initialization code *)
166 :     val initSizes = let
167 :     val dimSizes = let
168 : jhr 2764 val dcl = CL.mkDecl(CL.T_Array(sizeTy, SOME sizesDim), "sizes", NONE)
169 : jhr 2712 in
170 :     if (axisKind = Nrrd.KindScalar)
171 :     then [dcl]
172 :     else [dcl, setSizes(0, mkInt nElems)]
173 :     end
174 :     in
175 : jhr 2773 if #isArray tgt
176 : jhr 2712 then dimSizes @
177 :     List.tabulate (nAxes, fn i =>
178 :     setSizes(i+nDataAxes, CL.mkSubscript(CL.mkIndirect(wrldV, "size"), mkInt(nAxes-i-1))))
179 : jhr 3183 else dimSizes (* raise Fail "output for collection is unimplemented" *)
180 : jhr 2712 end
181 : jhr 3185 (* code to call the output helper *)
182 :     val copyCode = if #isArray tgt
183 :     then CL.mkApply("OutputGridFixed", [
184 :     CL.mkVar "wrld", CL.mkInt(IntInf.fromInt sizesDim), CL.mkVar "sizes",
185 :     CL.mkVar(NrrdEnums.tyToEnum nrrdType),
186 :     CL.mkIndirect(CL.mkVar "wrld", OCLNames.getOutputKern name), CL.mkVar "nData"
187 :     ])
188 :     else CL.mkApply("OutputCollectionFixed", [
189 :     CL.mkVar "wrld", CL.mkSubscript(CL.mkVar "sizes", CL.mkInt 0),
190 :     CL.mkVar(NrrdEnums.tyToEnum nrrdType),
191 :     CL.mkIndirect(CL.mkVar "wrld", OCLNames.getOutputKern name), CL.mkVar "nData"
192 :     ])
193 : jhr 2712 (* the function body *)
194 :     val stms =
195 :     CL.mkComment["Compute sizes of nrrd file"] ::
196 :     initSizes @
197 : jhr 2770 [CL.mkReturn(SOME(CL.mkApply("OutputGridFixed", [
198 : jhr 2764 CL.mkVar "wrld", CL.mkInt(IntInf.fromInt sizesDim), CL.mkVar "sizes",
199 : jhr 2770 CL.mkVar(NrrdEnums.tyToEnum nrrdType),
200 : jhr 3125 CL.mkIndirect(CL.mkVar "wrld", OCLNames.getOutputKern name), CL.mkVar "nData"
201 : jhr 2764 ])))]
202 : jhr 2712 in
203 :     ([CL.PARAM([], nrrdPtrTy, "nData")], CL.mkBlock stms)
204 :     end
205 : jhr 2694
206 :     fun gen (tgt : Properties.props, nAxes) = let
207 :     fun getFn snapshot (ty, name) = let
208 :     val funcName = if snapshot
209 : jhr 3095 then CN.snapshotGet(tgt, name)
210 :     else CN.outputGet(tgt, name)
211 : jhr 2694 fun mkFunc (params, body) =
212 : jhr 3095 CL.D_Func([], CL.boolTy, funcName, CL.PARAM([], CN.worldPtrTy tgt, "wrld")::params, body)
213 : jhr 2694 in
214 :     case ty
215 :     of Ty.DynSeqTy ty' => raise Fail "dynamic sequences not supported for OpenCL"
216 :     | _ => mkFunc (genFixedOutput(tgt, snapshot, nAxes, ty, name))
217 :     (* end case *)
218 :     end
219 :     fun gen' outputs = let
220 :     val getFns = List.map (getFn false) outputs
221 :     val allFns = if (#exec tgt)
222 :     then getFns @ OutputUtil.genOutput(tgt, outputs)
223 :     else if (#snapshot tgt)
224 :     then List.map (getFn true) outputs @ getFns
225 :     else getFns
226 :     in
227 : jhr 2699 allFns
228 : jhr 2694 end
229 :     in
230 :     gen'
231 :     end
232 :    
233 : jhr 2762 (* TODO: we should provide a command-line option to batch output; i.e., to deal with all output
234 :     * variables in one kernel call.
235 :     *)
236 :     fun genKernels (tgt : Properties.props, strandTy, nAxes) = List.map (mkCopyKernel tgt strandTy)
237 : jhr 2694
238 : jhr 2648 end

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0