SCM Repository
Annotation of /branches/vis12-cl/src/compiler/cl-target/gen-output.sml
Parent Directory
|
Revision Log
Revision 2743 - (view) (download)
1 : | jhr | 2648 | (* gen-output.sml |
2 : | * | ||
3 : | * COPYRIGHT (c) 2014 The Diderot Project (http://diderot-language.cs.uchicago.edu) | ||
4 : | * All rights reserved. | ||
5 : | * | ||
6 : | * Generate strand output functions for the OpenCL target. The output formats always have | ||
7 : | * a single axis for the data elements followed by one, or more, axes for the output structure. | ||
8 : | * There are two cases that we handle: | ||
9 : | * | ||
10 : | * grid, fixed-size elements: | ||
11 : | * nrrd has object axis followed by grid axes | ||
12 : | * | ||
13 : | * collection, fixed-size elements | ||
14 : | * nrrd has object axis followed by a single axis | ||
15 : | * | ||
16 : | * NOTE: the C target also supports dynamic-sized elements (i.e., dynamic sequences), but the | ||
17 : | * OpenCL target does not support these yet. | ||
18 : | * | ||
19 : | * The object axis kind depends on the output type, but it will either be one of the tensor types | ||
20 : | * that Teem knows about or else nrrdKindList. In any case, the data elements are written as a | ||
21 : | * flat vector following the in-memory layout. The other axes in the file will have nrrdKindSpace | ||
22 : | * as their kind. | ||
23 : | * | ||
24 : | * TODO: some of this code is common with c-target/gen-output.sml (e.g., writing outputs to | ||
25 : | * files), so we should refactor it. | ||
26 : | * | ||
27 : | * TODO: for sequences of tensors (e.g., tensor[3]{2}), we should use a separate axis for the | ||
28 : | * sequence dimension with kind nrrdKindList. | ||
29 : | *) | ||
30 : | |||
31 : | structure GenOutput : sig | ||
32 : | |||
33 : | (* gen (props, nAxes) outputs | ||
34 : | * returns code for getting the output/snapshot nrrds from the program state. | ||
35 : | * The arguments are: | ||
36 : | * props - the target information | ||
37 : | * nAxes - the number of axes in the grid of strands (NONE for a collection) | ||
38 : | * outputs - the list of output state variables paired with their TreeIL types | ||
39 : | * The return value is a record {kernels, getFns}, where | ||
40 : | * kernels - list of OpenCL kernels used to get output variables | ||
41 : | * getFns - list of function declarations that implement the public | ||
42 : | * output and snapshot queries. | ||
43 : | *) | ||
44 : | jhr | 2699 | val gen : Properties.props * int option -> (TreeIL.Ty.ty * string) list -> CLang.decl list |
45 : | jhr | 2648 | |
46 : | jhr | 2699 | val genKernels : Properties.props * int option -> (TreeIL.Ty.ty * string) list -> CLang.decl list |
47 : | |||
48 : | jhr | 2648 | end = struct |
49 : | |||
50 : | structure IL = TreeIL | ||
51 : | structure V = IL.Var | ||
52 : | structure Ty = IL.Ty | ||
53 : | structure CL = CLang | ||
54 : | jhr | 2743 | structure RN = RuntimeNames |
55 : | jhr | 2648 | structure Nrrd = NrrdEnums |
56 : | structure U = CLUtil | ||
57 : | |||
58 : | fun mapi f l = let | ||
59 : | fun mapf (i, [], l) = List.rev l | ||
60 : | | mapf (i, x::xs, l) = mapf (i+1, xs, f(i, x)::l) | ||
61 : | in | ||
62 : | mapf (0, l, []) | ||
63 : | end | ||
64 : | |||
65 : | val nrrdPtrTy = CL.T_Ptr(CL.T_Named "Nrrd") | ||
66 : | val sizeTy = CL.T_Named "size_t" | ||
67 : | jhr | 2743 | fun wrldPtr tgt = CL.T_Ptr(CL.T_Named(RN.worldTy tgt)) |
68 : | fun globPtr tgt = CL.T_Ptr(CL.T_Named(CLNames.globalsTy tgt)) | ||
69 : | jhr | 2648 | fun mkInt i = CL.mkInt(IntInf.fromInt i) |
70 : | jhr | 2694 | |
71 : | jhr | 2648 | (* variables in the generated code *) |
72 : | val wrldV = CL.mkVar "wrld" | ||
73 : | val sizesV = CL.mkVar "sizes" | ||
74 : | jhr | 2712 | val nDataV = CL.mkVar "nData" |
75 : | jhr | 2648 | |
76 : | jhr | 2712 | (* utility functions for initializing the sizes array *) |
77 : | fun sizes i = CL.mkSubscript(sizesV, mkInt i) | ||
78 : | fun setSizes (i, v) = CL.mkAssign(sizes i, v) | ||
79 : | |||
80 : | jhr | 2648 | (* create a kernel for copying the given output state variable to the output |
81 : | * buffer. | ||
82 : | *) | ||
83 : | (* NOTES: if the output is a grid, then we want to use the grid indices as a guide for processing | ||
84 : | * the output. Otherwise, the order does not matter, but we do need to worry about synchronizing | ||
85 : | * writes to the output buffer. | ||
86 : | *) | ||
87 : | jhr | 2694 | fun mkCopyKernel tgt (ty : TreeIL.Ty.ty, name) = let |
88 : | val ty' = CLTyTranslate.toGPUType ty | ||
89 : | val body = CL.mkBlock[] (* FIXME *) | ||
90 : | jhr | 2648 | in |
91 : | U.mkKernel( | ||
92 : | jhr | 2743 | name ^ "Kern", |
93 : | [U.globalParam(globPtr tgt, "glob"), U.globalParam(CL.T_Ptr ty', "outBuf")], | ||
94 : | jhr | 2648 | body) |
95 : | end | ||
96 : | |||
97 : | jhr | 2694 | (* create the body of an output function for fixed-size outputs. The structure of the |
98 : | * function body is: | ||
99 : | * | ||
100 : | * declare and compute sizes array | ||
101 : | jhr | 2712 | * allocate GPU data object |
102 : | * invoke kernel to copy data from strand state into data buffer | ||
103 : | jhr | 2694 | * allocate nrrd nData |
104 : | jhr | 2712 | * copy data from GPU to nrrd |
105 : | jhr | 2694 | *) |
106 : | jhr | 2712 | fun genFixedOutput (tgt, snapshot, nAxes, ty, name) = let |
107 : | val (elemCTy, nrrdType, axisKind, nElems) = OutputUtil.infoOf (tgt, ty) | ||
108 : | val (isArray, nAxes, domAxisKind) = (case nAxes | ||
109 : | of NONE => (false, 1, Nrrd.KindList) | ||
110 : | | SOME n => (true, n, Nrrd.KindSpace) | ||
111 : | (* end case *)) | ||
112 : | val nDataAxes = if (axisKind = Nrrd.KindScalar) then 0 else 1 | ||
113 : | (* generate the sizes initialization code *) | ||
114 : | val initSizes = let | ||
115 : | val dimSizes = let | ||
116 : | val dcl = CL.mkDecl(CL.T_Array(sizeTy, SOME(nAxes+nDataAxes)), "sizes", NONE) | ||
117 : | in | ||
118 : | if (axisKind = Nrrd.KindScalar) | ||
119 : | then [dcl] | ||
120 : | else [dcl, setSizes(0, mkInt nElems)] | ||
121 : | end | ||
122 : | in | ||
123 : | if isArray | ||
124 : | then dimSizes @ | ||
125 : | List.tabulate (nAxes, fn i => | ||
126 : | setSizes(i+nDataAxes, CL.mkSubscript(CL.mkIndirect(wrldV, "size"), mkInt(nAxes-i-1)))) | ||
127 : | else raise Fail "output for collection is unimplemented" | ||
128 : | end | ||
129 : | (* code to copy the data from the GPU *) | ||
130 : | val copyCode = [] (* FIXME *) | ||
131 : | (* the function body *) | ||
132 : | val stms = | ||
133 : | CL.mkComment["Compute sizes of nrrd file"] :: | ||
134 : | initSizes @ | ||
135 : | CL.mkComment["Allocate GPU buffer"] :: | ||
136 : | CL.mkComment["Run copy kernel"] :: | ||
137 : | CL.mkComment["Allocate nData nrrd"] :: | ||
138 : | OutputUtil.maybeAlloc (nDataV, Nrrd.tyToEnum nrrdType, nAxes+nDataAxes) :: | ||
139 : | CL.mkComment["copy data to output nrrd"] :: | ||
140 : | copyCode @ | ||
141 : | [CL.mkReturn(SOME(CL.mkVar "false"))] | ||
142 : | in | ||
143 : | ([CL.PARAM([], nrrdPtrTy, "nData")], CL.mkBlock stms) | ||
144 : | end | ||
145 : | jhr | 2694 | |
146 : | fun gen (tgt : Properties.props, nAxes) = let | ||
147 : | fun getFn snapshot (ty, name) = let | ||
148 : | val funcName = if snapshot | ||
149 : | jhr | 2743 | then RN.snapshotGet(tgt, name) |
150 : | else RN.outputGet(tgt, name) | ||
151 : | jhr | 2694 | fun mkFunc (params, body) = |
152 : | CL.D_Func([], CL.boolTy, funcName, CL.PARAM([], wrldPtr tgt, "wrld")::params, body) | ||
153 : | in | ||
154 : | case ty | ||
155 : | of Ty.DynSeqTy ty' => raise Fail "dynamic sequences not supported for OpenCL" | ||
156 : | | _ => mkFunc (genFixedOutput(tgt, snapshot, nAxes, ty, name)) | ||
157 : | (* end case *) | ||
158 : | end | ||
159 : | fun gen' outputs = let | ||
160 : | val getFns = List.map (getFn false) outputs | ||
161 : | val allFns = if (#exec tgt) | ||
162 : | then getFns @ OutputUtil.genOutput(tgt, outputs) | ||
163 : | else if (#snapshot tgt) | ||
164 : | then List.map (getFn true) outputs @ getFns | ||
165 : | else getFns | ||
166 : | in | ||
167 : | jhr | 2699 | allFns |
168 : | jhr | 2694 | end |
169 : | in | ||
170 : | gen' | ||
171 : | end | ||
172 : | |||
173 : | jhr | 2699 | fun genKernels (tgt : Properties.props, nAxes) = List.map (mkCopyKernel tgt) |
174 : | jhr | 2694 | |
175 : | jhr | 2648 | end |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |