SCM Repository
[diderot] / branches / cuda / src / compiler / cuda-target / tree-to-cuda.sml |
View of /branches/cuda/src/compiler/cuda-target/tree-to-cuda.sml
Parent Directory
|
Revision Log
Revision 1677 -
(download)
(annotate)
Fri Dec 16 22:17:33 2011 UTC (9 years, 2 months ago) by jhr
File size: 18178 byte(s)
Fri Dec 16 22:17:33 2011 UTC (9 years, 2 months ago) by jhr
File size: 18178 byte(s)
Working on CUDA target
(* tree-to-cuda.sml * * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu) * All rights reserved. * * Translate TreeIL to the CUDA version of CLang. *) structure TreeToCUDA : sig datatype var = V of (CLang.ty * CLang.var) type env = var TreeIL.Var.Map.map val trType : TreeIL.Ty.ty -> CLang.ty val trBlock : env * TreeIL.block -> CLang.stm val trFragment : env * TreeIL.block -> env * CLang.stm list val trAssign : env * TreeIL.var * TreeIL.exp -> CLang.stm list val trExp : env * TreeIL.exp -> CLang.exp (* vector indexing support. Arguments are: vector, index *) val vecIndex : CLang.exp * int -> CLang.exp end = struct structure CL = CLang structure RN = RuntimeNames structure IL = TreeIL structure Op = IL.Op structure Ty = IL.Ty structure V = IL.Var datatype var = datatype CLang.typed_var type env = var TreeIL.Var.Map.map fun lookup (env, x) = (case V.Map.find (env, x) of SOME(V(_, x')) => x' | NONE => raise Fail(concat["lookup(_, ", V.name x, ")"]) (* end case *)) (* integer literal expression *) fun intExp (i : int) = CL.mkInt(IntInf.fromInt i) (* the type of an image-data pointer. *) fun imageDataPtrTy rTy = CL.T_Qual("__global", CL.T_Ptr(CL.T_Num rTy)) (* translate TreeIL types to CLang types *) fun trType ty = (case ty of Ty.BoolTy => CLang.T_Named "uint" | Ty.StringTy => CL.charPtr | Ty.IntTy => !RN.gIntTy | Ty.TensorTy[] => !RN.gRealTy | Ty.TensorTy[n] => CL.T_Named(RN.vecTy n) | Ty.TensorTy[n, m] => CL.T_Named(RN.matTy(n,m)) | Ty.SeqTy(Ty.IntTy, n) => CL.T_Named(RN.ivecTy n) | Ty.SeqTy(Ty.TensorTy[] , n) => CL.T_Named(RN.vecTy n) | Ty.AddrTy(ImageInfo.ImgInfo{ty=(_, rTy), ...}) => imageDataPtrTy rTy | Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}) => CL.T_Named(RN.imageTy dim) | _ => raise Fail(concat["TreeToC.trType(", Ty.toString ty, ")"]) (* end case *)) (* generate new variables *) local val count = ref 0 fun freshName prefix = let val n = !count in count := n+1; concat[prefix, "_", Int.toString n] end in fun tmpVar ty = freshName "tmp" fun freshVar prefix = freshName prefix end (* local *) (* translate IL basis functions *) fun trApply (f, args) = CL.mkApply(ILBasis.toString f, args) (* vector indexing support. Arguments are: vector, index *) local val fields = Vector.fromList [ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "sa", "sb", "sc", "sd", "se", "sf" ] in fun vecIndex (v, ix) = CL.mkSelect(v, Vector.sub(fields, ix)) end (* translate a variable use *) fun trVar (env, x) = (case V.kind x of IL.VK_Global => CL.mkIndirect(CL.E_Var RN.globalsVarName, lookup(env, x)) | IL.VK_Local => CL.mkVar(lookup(env, x)) (* end case *)) (* translate a state-variable use *) fun trStateVar (IL.SV{name, ...}) = CL.mkIndirect(CL.mkVar "selfIn", name) fun castArgs ty = List.map (fn e => CL.mkCast(ty, e)) (* Translate a TreeIL operator application to a CLang expression *) fun trOp (rator, args) = (case (rator, args) of (Op.Add ty, [a, b]) => CL.mkBinOp(a, CL.#+, b) | (Op.Sub ty, [a, b]) => CL.mkBinOp(a, CL.#-, b) | (Op.Mul ty, [a, b]) => CL.mkBinOp(a, CL.#*, b) | (Op.Div ty, [a, b]) => CL.mkBinOp(a, CL.#/, b) | (Op.Neg ty, [a]) => CL.mkUnOp(CL.%-, a) | (Op.Abs(Ty.IntTy), args) => CL.mkApply("abs", args) | (Op.Abs(Ty.TensorTy[]), args) => CL.mkApply(RN.fabs, args) | (Op.Abs(Ty.TensorTy[_]), args) => CL.mkApply(RN.fabs, args) | (Op.Abs ty, [a]) => raise Fail(concat["Abs<", Ty.toString ty, ">"]) | (Op.LT ty, [a, b]) => CL.mkBinOp(a, CL.#<, b) | (Op.LTE ty, [a, b]) => CL.mkBinOp(a, CL.#<=, b) | (Op.EQ ty, [a, b]) => CL.mkBinOp(a, CL.#==, b) | (Op.NEQ ty, [a, b]) => CL.mkBinOp(a, CL.#!=, b) | (Op.GTE ty, [a, b]) => CL.mkBinOp(a, CL.#>=, b) | (Op.GT ty, [a, b]) => CL.mkBinOp(a, CL.#>, b) | (Op.Not, [a]) => CL.mkUnOp(CL.%!, a) | (Op.Max, args) => CL.mkApply(RN.max, castArgs (!RN.gRealTy) args) | (Op.Min, args) => CL.mkApply(RN.min, castArgs (!RN.gRealTy) args) | (Op.Clamp ty, [lo, hi, x]) => CL.mkApply(RN.clamp, [x, lo, hi]) | (Op.Lerp ty, args) => (case ty of Ty.TensorTy[] => CL.mkApply(RN.lerp, castArgs (!RN.gRealTy) args) | Ty.TensorTy[n] => CL.mkApply(RN.lerp, castArgs (CL.T_Named(RN.vecTy n)) args) | _ => raise Fail(concat[ "lerp<", Ty.toString ty, "> not supported" ]) (* end case *)) | (Op.Dot d, args) => CL.E_Apply(RN.dot, args) | (Op.MulVecMat(m, n), args) => if (1 < m) andalso (m < 4) andalso (m = n) then CL.E_Apply(RN.mulVecMat(m,n), args) else raise Fail "unsupported vector-matrix multiply" | (Op.MulMatVec(m, n), args) => if (1 < m) andalso (m < 4) andalso (m = n) then CL.E_Apply(RN.mulMatVec(m,n), args) else raise Fail "unsupported matrix-vector multiply" | (Op.MulMatMat(m, n, p), args) => if (1 < m) andalso (m < 4) andalso (m = n) andalso (n = p) then CL.E_Apply(RN.mulMatMat(m,n,p), args) else raise Fail "unsupported matrix-matrix multiply" | (Op.Cross, args) => CL.E_Apply(RN.cross, args) | (Op.Norm(Ty.TensorTy[n]), args) => CL.E_Apply(RN.length, args) | (Op.Norm(Ty.TensorTy[m,n]), args) => CL.E_Apply(RN.norm(m,n), args) | (Op.Normalize d, args) => CL.E_Apply(RN.normalize, args) | (Op.Scale(Ty.TensorTy[n]), [s, v]) => CL.mkBinOp(s, CL.#*, v) | (Op.PrincipleEvec ty, _) => raise Fail "PrincipleEvec unimplemented" | (Op.Select(Ty.TupleTy tys, i), [a]) => raise Fail "Select unimplemented" | (Op.Index(Ty.SeqTy(Ty.IntTy, n), i), [a]) => vecIndex (a, i) | (Op.Index(Ty.TensorTy[n], i), [a]) => vecIndex (a, i) | (Op.Subscript(Ty.SeqTy(Ty.IntTy, n)), [v, CL.E_Int(ix, _)]) => vecIndex (v, Int.fromLarge ix) | (Op.Subscript(Ty.SeqTy(Ty.IntTy, n)), [v, ix]) => let val unionTy = CL.T_Named(concat["union", Int.toString n, !RN.gIntSuffix, "_t"]) val vecExp = CL.mkSelect(CL.mkCast(unionTy, v), "i") in CL.mkSubscript(vecExp, ix) end | (Op.Subscript(Ty.TensorTy[n]), [v, CL.E_Int(ix, _)]) => vecIndex (v, Int.fromLarge ix) | (Op.Subscript(Ty.TensorTy[n]), [v, ix]) => let val unionTy = CL.T_Named(concat["union", Int.toString n, !RN.gRealSuffix, "_t"]) val vecExp = CL.mkSelect(CL.mkCast(unionTy, v), "r") in CL.mkSubscript(vecExp, ix) end | (Op.Subscript(Ty.TensorTy[_,_]), [m, ix, CL.E_Int(jx, _)]) => vecIndex(CL.mkSubscript(m, ix), Int.fromLarge jx) | (Op.Subscript(Ty.TensorTy[_,n]), [m, ix, jx]) => let val unionTy = CL.T_Named(concat["union", Int.toString n, !RN.gRealSuffix, "_t"]) val vecExp = CL.mkSelect(CL.mkCast(unionTy, CL.mkSubscript(m, ix)), "r") in CL.mkSubscript(vecExp, jx) end | (Op.Subscript ty, t::(ixs as _::_)) => raise Fail(concat["Subscript<", Ty.toString ty, "> unsupported"]) | (Op.Ceiling d, args) => CL.mkApply("ceil", args) | (Op.Floor d, args) => CL.mkApply("floor", args) | (Op.Round d, args) => CL.mkApply("round", args) | (Op.Trunc d, args) => CL.mkApply("trunc", args) | (Op.IntToReal, [a]) => CL.mkCast(!RN.gRealTy, a) | (Op.RealToInt 1, [a]) => CL.mkCast(!RN.gIntTy, a) | (Op.RealToInt d, args) => CL.mkApply(RN.vecftoi d, args) (* FIXME: need type info *) | (Op.ImageAddress(ImageInfo.ImgInfo{ty=(_,rTy), ...}), [a as CL.E_Indirect(_,field)]) => let val cTy = imageDataPtrTy rTy in CL.mkCast(cTy, CL.mkSelect(CL.mkVar RN.globalImageDataName, RN.imageDataName field)) end | (Op.LoadVoxels(info, 1), [a]) => let val realTy as CL.T_Num rTy = !RN.gRealTy val a = CL.E_UnOp(CL.%*, a) in if (rTy = ImageInfo.sampleTy info) then a else CL.E_Cast(realTy, a) end | (Op.LoadVoxels _, [a]) => raise Fail("impossible " ^ Op.toString rator) | (Op.PosToImgSpace(ImageInfo.ImgInfo{dim, ...}), [img, pos]) => CL.mkApply(RN.toImageSpace dim, [CL.mkUnOp(CL.%&,img), pos]) | (Op.TensorToWorldSpace(info, ty), [v, x]) => CL.mkApply(RN.toWorldSpace ty, [CL.mkUnOp(CL.%&,v), x]) | (Op.LoadImage info, [a]) => raise Fail("impossible " ^ Op.toString rator) | (Op.Inside(ImageInfo.ImgInfo{dim, ...}, s), [pos, img]) => CL.mkApply(RN.inside dim, [pos, CL.mkUnOp(CL.%&,img), intExp s]) | (Op.Input(ty, name, desc), []) => raise Fail("impossible " ^ Op.toString rator) | (Op.InputWithDefault(ty, name, desc), [a]) => raise Fail("impossible " ^ Op.toString rator) | _ => raise Fail(concat[ "unknown or incorrect operator ", Op.toString rator ]) (* end case *)) fun trExp (env, e) = (case e of IL.E_State x => trStateVar x | IL.E_Var x => trVar (env, x) | IL.E_Lit(Literal.Int n) => CL.mkIntTy(n, !RN.gIntTy) | IL.E_Lit(Literal.Bool b) => CL.mkBool b | IL.E_Lit(Literal.Float f) => CL.mkFlt(f, !RN.gRealTy) | IL.E_Lit(Literal.String s) => CL.mkStr s | IL.E_Op(rator, args) => trOp (rator, trExps(env, args)) | IL.E_Apply(f, args) => trApply(f, trExps(env, args)) | IL.E_Cons(Ty.TensorTy[n], args) => CL.mkApply(RN.mkVec n, trExps(env, args)) | IL.E_Cons(ty, _) => raise Fail(concat["E_Cons(", Ty.toString ty, ", _) in expression"]) (* end case *)) and trExps (env, exps) = List.map (fn exp => trExp(env, exp)) exps fun trLHSVar (env, lhs) = (case V.kind lhs of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, lhs)) | IL.VK_Local => CL.mkVar(lookup(env, lhs)) (* end case *)) fun trLHSStateVar (IL.SV{name, ...}) = CL.mkIndirect(CL.mkVar "selfOut", name) fun trSet (env, lhs, rhs) = ( (* certain rhs forms, such as those that return a matrix, * require a function call instead of an assignment *) case rhs of IL.E_Op(Op.Add(Ty.TensorTy[m,n]), args) => [CL.mkCall(RN.addMat(m,n), lhs :: trExps(env, args))] | IL.E_Op(Op.Sub(Ty.TensorTy[m,n]), args) => [CL.mkCall(RN.subMat(m,n), lhs :: trExps(env, args))] | IL.E_Op(Op.Neg(Ty.TensorTy[m,n]), args) => [CL.mkCall(RN.scaleMat(m,n), lhs :: intExp ~1 :: trExps(env, args))] | IL.E_Op(Op.Scale(Ty.TensorTy[m,n]), args) => [CL.mkCall(RN.scaleMat(m,n), lhs :: trExps(env, args))] | IL.E_Op(Op.MulMatMat(m,n,p), args) => [CL.mkCall(RN.mulMatMat(m,n,p), lhs :: trExps(env, args))] | IL.E_Op(Op.Identity n, args) => [CL.mkCall(RN.identityMat n, [lhs])] | IL.E_Op(Op.Zero(Ty.TensorTy[m,n]), args) => [CL.mkCall(RN.zeroMat(m,n), [lhs])] | IL.E_Op(Op.TensorToWorldSpace(info, ty as Ty.TensorTy[_,_]), [img,src]) => [CL.mkCall(RN.toWorldSpace ty, lhs :: [CL.mkUnOp(CL.%&,trExp(env, img)),trExp(env, src)] )] | IL.E_Op(Op.LoadVoxels(info, n), [a]) => if (n > 1) then let val stride = ImageInfo.stride info val rTy = ImageInfo.sampleTy info val vp = freshVar "vp" val needsCast = (CL.T_Num rTy <> !RN.gRealTy) fun mkLoad i = let val e = CL.mkSubscript(CL.mkVar vp, intExp(i*stride)) in if needsCast then CL.mkCast(!RN.gRealTy, e) else e end in [ CL.mkDecl(imageDataPtrTy rTy, vp, SOME(CL.I_Exp(trExp(env, a)))), CL.mkAssign(lhs, CL.mkApply(RN.mkVec n, List.tabulate (n, mkLoad))) ] end else [CL.mkAssign(lhs, trExp(env, rhs))] | IL.E_Cons(Ty.TensorTy[n,m], args) => let (* matrices are represented as arrays of union<d><ty>_t vectors *) fun doRows (_, []) = [] | doRows (i, e::es) = CL.mkAssign(CL.mkSubscript(lhs, intExp i), e) :: doRows (i+1, es) in doRows (0, trExps(env, args)) end | IL.E_Var x => (case IL.Var.ty x of Ty.TensorTy[n,m] => [CL.mkCall(RN.copyMat(n,m), [lhs, trVar(env, x)])] | _ => [CL.mkAssign(lhs, trVar(env, x))] (* end case *)) | _ => [CL.mkAssign(lhs, trExp(env, rhs))] (* end case *)) fun trAssign (env, lhs, rhs) = trSet (env, trLHSVar (env, lhs), rhs) fun trLocals (env : env, locals) = List.foldl (fn (x, env) => V.Map.insert(env, x, V(trType(V.ty x), V.name x))) env locals (* generate code to check the status of runtime-system calls *) fun checkSts mkDecl = let val sts = freshVar "sts" in mkDecl sts @ [CL.mkIfThen( CL.mkBinOp(CL.mkVar "DIDEROT_OK", CL.#!=, CL.mkVar sts), CL.mkCall("exit", [intExp 1]))] end fun trStms (env, stms) = let fun trStmt (env, stm) = (case stm of IL.S_Comment text => [CL.mkComment text] | IL.S_Assign([x], exp) => trAssign (env, x, exp) | IL.S_IfThen(cond, thenBlk) => [CL.mkIfThen(trExp(env, cond), trBlk(env, thenBlk))] | IL.S_IfThenElse(cond, thenBlk, elseBlk) => [CL.mkIfThenElse(trExp(env, cond), trBlk(env, thenBlk), trBlk(env, elseBlk))] | IL.S_New _ => raise Fail "new not supported yet" (* FIXME *) | IL.S_Save([x], exp) => trSet (env, trLHSStateVar x, exp) (* FIXME: I think that S_LoadImage should never happen in OpenCL code [jhr] *) | IL.S_LoadImage(lhs, dim, name) => checkSts (fn sts => let val lhs = lookup(env, lhs) val name = trExp(env, name) val imgTy = CL.T_Named(RN.imageTy dim) val loadFn = RN.loadImage dim in [ CL.mkDecl( CL.T_Named RN.statusTy, sts, SOME(CL.I_Exp(CL.E_Apply(loadFn, [name, CL.mkUnOp(CL.%&, CL.E_Var lhs)])))) ] end) (* FIXME: I think that S_Input should never happen in OpenCL code [jhr] *) | IL.S_Input(lhs, name, desc, optDflt) => checkSts (fn sts => let val inputFn = RN.input(V.ty lhs) val lhs = lookup(env, lhs) val (initCode, hasDflt) = (case optDflt of SOME e => ([CL.mkAssign(CL.E_Var lhs, trExp(env, e))], true) | NONE => ([], false) (* end case *)) val code = [ CL.mkDecl( CL.T_Named RN.statusTy, sts, SOME(CL.I_Exp(CL.E_Apply(inputFn, [ CL.E_Str name, CL.mkUnOp(CL.%&, CL.mkIndirect(CL.mkVar (RN.globalsVarName), lhs)), CL.mkBool hasDflt ])))) ] in initCode @ code end) | IL.S_Exit args => [CL.mkReturn NONE] | IL.S_Active => [CL.mkReturn(SOME(CL.mkVar RN.kActive))] | IL.S_Stabilize => [CL.mkReturn(SOME(CL.mkVar RN.kStabilize))] | IL.S_Die => [CL.mkReturn(SOME(CL.mkVar RN.kDie))] (* end case *)) in List.foldr (fn (stm, stms) => trStmt(env, stm)@stms) [] stms end and trBlk (env, IL.Block{locals, body}) = let val env = trLocals (env, locals) val stms = trStms (env, body) fun mkDecl (x, stms) = (case V.Map.find (env, x) of SOME(V(ty, x')) => CL.mkDecl(ty, x', NONE) :: stms | NONE => raise Fail(concat["mkDecl(", V.name x, ", _)"]) (* end case *)) val stms = List.foldr mkDecl stms locals in CL.mkBlock stms end fun trFragment (env, IL.Block{locals, body}) = let val env = trLocals (env, locals) val stms = trStms (env, body) fun mkDecl (x, stms) = (case V.Map.find (env, x) of SOME(V(ty, x')) => CL.mkDecl(ty, x', NONE) :: stms | NONE => raise Fail(concat["mkDecl(", V.name x, ", _)"]) (* end case *)) val stms = List.foldr mkDecl stms locals in (env, stms) end val trBlock = trBlk end
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |