(* cuda-target.sml * * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu) * All rights reserved. *) structure CUDATarget : TARGET = struct structure IL = TreeIL structure V = IL.Var structure Ty = IL.Ty structure CL = CLang structure RN = RuntimeNames structure ToCL = TreeToCUDA structure N = CNames structure P = Paths (* structure HF = CLHeaderFrag structure SF = CLSchedFrag *) (* revmap f l == List.rev(List.map f l) *) fun revmap f = let fun rmap ([], l) = l | rmap (x::r, l) = rmap (r, f x :: l) in fn l => rmap (l, []) end (* common arithmetic *) fun #+# (a, b) = CL.mkBinOp(a, CL.#+, b) fun #*# (a, b) = CL.mkBinOp(a, CL.#*, b) infix 5 #+# infix 6 #*# (* translate TreeIL types to shadow types *) fun shadowTy ty = (case ty of Ty.BoolTy => CL.T_Named "cl_uint" | Ty.IntTy => CL.T_Named(RN.shadowIntTy ()) | Ty.TensorTy[] => CL.T_Named(RN.shadowRealTy ()) | Ty.TensorTy[n] => CL.T_Named(RN.shadowVecTy n) | Ty.TensorTy[n, m] => CL.T_Named(RN.shadowMatTy(n,m)) | Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}) => CL.T_Named(RN.shadowImageTy dim) | _ => raise Fail(concat["TreeToC.trType(", Ty.toString ty, ")"]) (* end case *)) (* FIXME: add comments that more clearly explain the difference between convertToShadow and * convertStrandToShadow *) (* translate TreeIL types to shadow types *) fun convertToShadow (ty, name) = (case ty of Ty.IntTy => CL.mkAssign( CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name), CL.mkIndirect(CL.mkVar RN.globalsVarName, name)) | Ty.TensorTy[n]=> CL.mkCall(RN.convertToShadowVec n, [ CL.mkUnOp(CL.%&, CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name)), CL.mkIndirect(CL.mkVar RN.globalsVarName, name) ]) | Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}) => CL.mkCall(RN.shadowImageFunc dim, [ CL.mkVar "context", CL.mkUnOp(CL.%&, CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name)), CL.mkIndirect(CL.mkVar RN.globalsVarName, name) ]) | Ty.TensorTy[n, m] => CL.mkCall(RN.convertToShadowMat(m,n), [ CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name), CL.mkIndirect(CL.mkVar RN.globalsVarName, name) ]) | _ => CL.mkAssign( CL.mkSelect(CL.mkVar RN.shadowGlaobalsName,name), CL.mkIndirect(CL.mkVar RN.globalsVarName, name)) (* end case *)) (* generate code to convert strand TreeIL types to shadow types *) fun convertStrandToShadow (ty, name, selfIn, selfOut) = (case ty of Ty.IntTy => CL.mkAssign( CL.mkIndirect(CL.mkVar selfIn, name), CL.mkIndirect(CL.mkVar selfOut, name)) | Ty.TensorTy[n]=> CL.mkCall(RN.convertToShadowVec n, [ CL.mkUnOp(CL.%&, CL.mkIndirect(CL.mkVar selfIn, name)), CL.mkIndirect(CL.mkVar selfOut, name) ]) | Ty.TensorTy[n, m] => CL.mkCall(RN.convertToShadowMat(m,n), [ CL.mkUnOp(CL.%&, CL.mkIndirect(CL.mkVar selfIn, name)), CL.mkIndirect(CL.mkVar selfOut, name) ]) | _ => CL.mkAssign( CL.mkIndirect(CL.mkVar selfIn, name), CL.mkIndirect(CL.mkVar selfOut, name)) (* end case *)) (* helper functions for specifying parameters in various address spaces *) fun clParam (spc, ty, x) = CL.PARAM([spc], ty, x) fun globalParam (ty, x) = CL.PARAM(["__global"], ty, x) fun constantParam (ty, x) = CL.PARAM(["__constant"], ty, x) fun localParam (ty, x) = CL.PARAM(["__local"], ty, x) fun privateParam (ty, x) = CL.PARAM(["__private"], ty, x) (* OpenCL global pointer type *) fun globalPtr ty = CL.T_Qual("__global", CL.T_Ptr ty) (* lvalue/rvalue state variable *) fun lvalueSV name = CL.mkIndirect(CL.mkVar "selfOut", name) fun rvalueSV name = CL.mkIndirect(CL.mkVar "selfIn", name) (* C variable translation *) structure TrCVar = struct type env = CL.typed_var TreeIL.Var.Map.map fun lookup (env, x) = (case V.Map.find (env, x) of SOME(CL.V(_, x')) => x' | NONE => raise Fail(concat["TrCVar.lookup(_, ", V.name x, ")"]) (* end case *)) (* translate a variable that occurs in an l-value context (i.e., as the target of an assignment) *) fun lvalueVar (env, x) = (case V.kind x of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x)) | IL.VK_Local => CL.mkVar(lookup(env, x)) (* end case *)) (* translate a variable that occurs in an r-value context *) fun rvalueVar (env, x) = (case V.kind x of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x)) | IL.VK_Local => CL.mkVar(lookup(env, x)) (* end case *)) (* translate a strand state variable that occurs in an l-value context *) fun lvalueStateVar (IL.SV{name, ...}) = lvalueSV name (* translate a strand state variable that occurs in an r-value context *) fun rvalueStateVar (IL.SV{name, ...}) = rvalueSV name end structure ToC = TreeToCFn (TrCVar) type var = CL.typed_var type exp = CL.exp type stm = CL.stm (* OpenCL specific types *) val clIntTy = CL.T_Named "cl_int" val clProgramTy = CL.T_Named "cl_program" val clKernelTy = CL.T_Named "cl_kernel" val clCmdQueueTy = CL.T_Named "cl_command_queue" val clContextTy = CL.T_Named "cl_context" val clDeviceIdTy = CL.T_Named "cl_device_id" val clPlatformIdTy = CL.T_Named "cl_platform_id" val clMemoryTy = CL.T_Named "cl_mem" val globPtrTy = CL.T_Ptr(CL.T_Named RN.globalsTy) (* FIXME: what are these for? *) datatype shadow_env = STRAND_SHADOW | GLOBAL_SHADOW (* variable or field that is mirrored between host and GPU *) type mirror_var = { (* FIXME: perhaps it would be cleaner to just track the TreeIL type of the variable? *) hostTy : CL.ty, (* variable type on Host (i.e., C type) *) shadowTy : CL.ty, (* host-side shadow type of GPU type *) gpuTy : CL.ty, (* variable's type on GPU (i.e., OpenCL type) *) hToS: stm, (* the statement that converts the variable to its *) (* shadow representation *) var : CL.var (* variable name *) } datatype strand = Strand of { name : string, tyName : string, state : mirror_var list, output : (Ty.ty * CL.var), (* the strand's output variable (only one for now) *) code : CL.decl list ref, init_code: CL.decl ref } datatype program = Prog of { name : string, (* stem of source file *) double : bool, (* true for double-precision support *) parallel : bool, (* true for multithreaded (or multi-GPU) target *) debug : bool, (* true for debug support in executable *) globals : mirror_var list ref, topDecls : CL.decl list ref, strands : strand AtomTable.hash_table, initially : CL.decl ref, numDims: int ref, (* number of dimensions in initially iteration *) imgGlobals: (string * int) list ref, prFn: CL.decl ref, outFn: CL.decl ref } datatype env = ENV of { info : env_info, vMap : var V.Map.map, scope : scope } and env_info = INFO of { prog : program } and scope = NoScope | GlobalScope | InitiallyScope | StrandScope (* strand initialization *) | MethodScope of StrandUtil.method_name (* method body; vars are state variables *) (* the supprted widths of vectors of reals on the target. *) (* FIXME: for OpenCL 1.1, 3 is also valid *) fun vectorWidths () = [2, 4, 8, 16] (* we do not support printing on the OpenCL target *) val supportsPrinting = false (* tests for whether various expression forms can appear inline *) fun inlineCons n = (n < 2) (* vectors are inline, but not matrices *) val inlineMatrixExp = false (* can matrix-valued expressions appear inline? *) (* TreeIL to target translations *) structure Tr = struct fun fragment (ENV{info, vMap, scope}, blk) = let val (vMap, stms) = (case scope of GlobalScope => ToC.trFragment (vMap, blk) | InitiallyScope => ToC.trFragment (vMap, blk) | _ => ToCL.trFragment (vMap, blk) (* end case *)) in (ENV{info=info, vMap=vMap, scope=scope}, stms) end fun block (ENV{vMap, scope, ...}, blk) = (case scope of StrandScope => ToC.trBlock (vMap, blk) | MethodScope name => ToCL.trBlock (vMap, blk) | InitiallyScope => ToCL.trBlock (vMap, blk) | _ => ToC.trBlock (vMap, blk) (* end case *)) fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e) end (* variables *) structure Var = struct fun mirror (ty, name, shadowEnv) = { hostTy = ToC.trType ty, shadowTy = shadowTy ty, gpuTy = ToCL.trType ty, hToS = case shadowEnv of GLOBAL_SHADOW => convertToShadow (ty, name) | STRAND_SHADOW => convertStrandToShadow(ty, name, "selfIn", "selfOut") (* end case *), var = name } fun name (ToCL.V(_, name)) = name fun global (Prog{globals, imgGlobals, ...}, name, ty) = let val x = mirror (ty, name, GLOBAL_SHADOW) fun isImgGlobal (Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}), name) = imgGlobals := (name,dim) :: !imgGlobals | isImgGlobal _ = () in globals := x :: !globals; isImgGlobal (ty, name); ToCL.V(#gpuTy x, name) end fun param x = ToCL.V(ToCL.trType(V.ty x), V.name x) end (* environments *) structure Env = struct (* create a new environment *) fun new prog = ENV{ info=INFO{prog = prog}, vMap = V.Map.empty, scope = NoScope } (* define the current translation context *) fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope} val scopeGlobal = setScope GlobalScope val scopeInitially = setScope InitiallyScope fun scopeStrand env = setScope StrandScope env fun scopeMethod (env, name) = setScope (MethodScope name) env (* bind a TreeIL varaiable to a target variable *) fun bind (ENV{info, vMap, scope}, x, x') = ENV{ info = info, vMap = V.Map.insert(vMap, x, x'), scope = scope } end (* programs *) structure Program = struct fun new {name, double, parallel, debug} = ( RN.initTargetSpec double; CNames.initTargetSpec {double=double, long=false}; Prog{ name = name, double = double, parallel = parallel, debug = debug, globals = ref [], topDecls = ref [], strands = AtomTable.mkTable (16, Fail "strand table"), initially = ref(CL.D_Comment["missing initially"]), numDims = ref 0, imgGlobals = ref[], prFn = ref(CL.D_Comment(["No Print Function"])), outFn = ref(CL.D_Comment(["No Output Function"])) }) (* register the code that is used to register command-line options for input variables *) fun inputs (Prog{topDecls, ...}, stm) = let val inputsFn = CL.D_Func( [], CL.voidTy, RN.registerOpts, [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")], stm) in topDecls := inputsFn :: !topDecls end (* register the global initialization part of a program *) fun init (Prog{topDecls, ...}, init) = let val globalsDecl = CL.mkAssign(CL.mkVar RN.globalsVarName, CL.mkApply("malloc", [CL.mkSizeof(CL.T_Named RN.globalsTy)])) val initFn = CL.D_Func( [], CL.voidTy, RN.initGlobals, [], CL.mkBlock[ globalsDecl, CL.mkCall(RN.initGlobalsHelper, [CL.mkVar RN.globalsVarName]) ]) val initHelperFn = CL.D_Func( [], CL.voidTy, RN.initGlobalsHelper, [CL.PARAM([], globPtrTy, RN.globalsVarName)], init) val shutdownFn = CL.D_Func( [], CL.voidTy, RN.shutdown, [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.worldTy), "wrld")], CL.S_Block[]) in topDecls := shutdownFn :: initFn :: initHelperFn :: !topDecls end (* create and register the initially function for a program *) fun initially { prog = Prog{name=progName, strands, initially,numDims, ...}, isArray : bool, iterPrefix : stm list, iters : (var * exp * exp) list, createPrefix : stm list, strand : Atom.atom, args : exp list } = let val name = Atom.toString strand val nDims = List.length iters val worldTy = CL.T_Ptr(CL.T_Named N.worldTy) fun mapi f xs = let fun mapf (_, []) = [] | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs) in mapf (0, xs) end val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters val sizeInit = mapi (fn (i, (CL.V(ty, _), lo, hi)) => (i, CL.I_Exp(CL.mkBinOp(hi, CL.#-, lo) #+# CL.mkIntTy(1, ty))) ) iters (* code to allocate the world and initial strands *) val wrld = "wrld" val allocCode = [ CL.mkComment["allocate initial block of strands"], CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)), CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)), CL.mkDecl(worldTy, wrld, SOME(CL.I_Exp(CL.mkApply(N.allocInitially, [ CL.mkVar "ProgramName", CL.mkUnOp(CL.%&, CL.mkVar(N.strandDesc name)), CL.mkBool isArray, CL.mkIntTy(IntInf.fromInt nDims, CL.int32), CL.mkVar "base", CL.mkVar "size" ])))) ] (* create the loop nest for the initially iterations *) val indexVar = "ix" val strandTy = CL.T_Ptr(CL.T_Named(N.strandTy name)) fun mkLoopNest [] = CL.mkBlock(createPrefix @ [ CL.mkDecl(strandTy, "sp", SOME(CL.I_Exp( CL.mkCast(strandTy, CL.mkApply(N.inState, [CL.mkVar "wrld", CL.mkVar indexVar]))))), CL.mkCall(N.strandInit name, CL.mkVar "sp" :: args), CL.mkAssign(CL.mkVar indexVar, CL.mkVar indexVar #+# CL.mkIntTy(1, CL.uint32)) ]) | mkLoopNest ((CL.V(ty, param), lo, hi)::iters) = let val body = mkLoopNest iters in CL.mkFor( [(ty, param, lo)], CL.mkBinOp(CL.mkVar param, CL.#<=, hi), [CL.mkPostOp(CL.mkVar param, CL.^++)], body) end val iterCode = [ CL.mkComment["initially"], CL.mkDecl(CL.uint32, indexVar, SOME(CL.I_Exp(CL.mkIntTy(0, CL.uint32)))), mkLoopNest iters ] val body = CL.mkBlock( iterPrefix @ allocCode @ iterCode @ [CL.mkReturn(SOME(CL.mkVar "wrld"))]) val initFn = CL.D_Func([], worldTy, N.initially, [], body) in numDims := nDims; initially := initFn end (***** OUTPUT *****) fun genStrandPrint (Strand{name, tyName, state, output, code, ...}) = let (* the print function *) val prFnName = concat[name, "Print"] val prFn = let val params = [ CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"), CL.PARAM([], CL.T_Ptr(CL.T_Named(RN.strandShadowTy tyName)), "self") ] val (ty, x) = output val outState = CL.mkIndirect(CL.mkVar "self", x) val prArgs = (case ty of Ty.IntTy => [CL.mkStr(!N.gIntFormat ^ "\n"), outState] | Ty.SeqTy(Ty.IntTy, d) => let fun sel i = CL.mkApply( "VSUB", [outState, CL.mkInt(IntInf.fromInt i)]) val fmt = CL.mkStr( String.concatWith " " (List.tabulate(d, fn _ => !N.gIntFormat)) ^ "\n") val args = List.tabulate (d, sel) in fmt :: args end | Ty.TensorTy[] => [CL.mkStr "%f\n", outState] | Ty.TensorTy[d] => let fun sel i = CL.mkApply( "VSUB", [outState, CL.mkInt(IntInf.fromInt i)]) val fmt = CL.mkStr( String.concatWith " " (List.tabulate(d, fn _ => "%f")) ^ "\n") val args = List.tabulate (d, sel) in fmt :: args end | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty) (* end case *)) in CL.D_Func(["static"], CL.voidTy, prFnName, params, CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs)) end in prFn end fun genStrandTyDef(targetTy,Strand{state,...},tyName) = (case state of [] => CL.D_Comment(["No Strand Defintiion Included"]) | _ => CL.D_StructDef(revmap (fn x => (targetTy x, #var x)) state, tyName) (* end case *)) (* generates the globals buffers and arguments function *) fun genConvertShadowTypes (Strand{name, tyName, state,...}) = let (* Delcare opencl setup objects *) val errVar = "err" val params = [ CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut"), CL.PARAM([], CL.T_Ptr(CL.T_Named(RN.strandShadowTy tyName)), "selfIn") ] val body = List.map (fn (x:mirror_var) => #hToS x) state in CL.D_Func([], CL.voidTy, RN.strandConvertName name, params, CL.mkBlock body) end (* generates the opencl buffers for the image data *) fun getGlobalDataBuffers (globals, imgGlobals, contextVar, errVar) = let val globalBuffErr = "error creating OpenCL global buffer\n" fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.mkVar errVar, CL.#!=, CL.mkVar "CL_SUCCESS"), CL.mkBlock([CL.mkCall("fprintf",[CL.mkVar "stderr", CL.mkStr msg]), CL.mkCall("exit",[CL.mkInt 1])])) val shadowTypeDecl = CL.mkDecl(CL.T_Named(RN.shadowGlobalsTy), RN.shadowGlaobalsName, NONE) val globalToShadowStms = List.map (fn (x:mirror_var) => #hToS x) globals val globalBufferDecl = CL.mkDecl(clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE) val globalBuffer = CL.mkAssign(CL.mkVar(concat[RN.globalsVarName,"_cl"]), CL.mkApply("clCreateBuffer", [ CL.mkVar contextVar, CL.mkBinOp(CL.mkVar "CL_MEM_READ_ONLY", CL.#|, CL.mkVar "CL_MEM_COPY_HOST_PTR"), CL.mkSizeof(CL.T_Named RN.shadowGlobalsTy), CL.mkUnOp(CL.%&,CL.mkVar RN.shadowGlaobalsName), CL.mkUnOp(CL.%&,CL.mkVar errVar) ])) fun genDataBuffers ([],_,_,_) = [] | genDataBuffers ((var,nDims)::globals, contextVar, errVar,errFn) = let val hostVar = CL.mkIndirect(CL.mkVar RN.globalsVarName, var) val size = CL.mkIndirect(hostVar, "dataSzb") in CL.mkDecl(clMemoryTy, RN.addBufferSuffixData var ,NONE) :: CL.mkAssign(CL.mkVar(RN.addBufferSuffixData var), CL.mkApply("clCreateBuffer", [ CL.mkVar contextVar, CL.mkVar "CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR", size, CL.mkIndirect(hostVar, "data"), CL.mkUnOp(CL.%&,CL.mkVar errVar) ])) :: errFn(concat["error in creating ",RN.addBufferSuffixData var, " global buffer\n"]) :: genDataBuffers(globals,contextVar,errVar,errFn) end in [shadowTypeDecl] @ globalToShadowStms @ [globalBufferDecl, globalBuffer,errorFn(globalBuffErr)] @ genDataBuffers(imgGlobals,contextVar,errVar,errorFn) end (* generates the kernel arguments for the image data *) fun genGlobalArguments (globals, count, kernelVar, errVar) = let val globalArgErr = "error creating OpenCL global argument\n" fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.mkVar errVar, CL.#!=, CL.mkVar "CL_SUCCESS"), CL.mkBlock([CL.mkCall("fprintf",[CL.mkVar "stderr", CL.mkStr msg]), CL.mkCall("exit",[CL.mkInt 1])])) val globalArgument = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.&=, CL.mkApply("clSetKernelArg", [CL.mkVar kernelVar, CL.mkPostOp(CL.mkVar count, CL.^++), CL.mkApply("sizeof",[CL.mkVar "cl_mem"]), CL.mkUnOp(CL.%&,CL.mkVar(concat[RN.globalsVarName,"_cl"]))]))) fun genDataArguments ([],_,_,_,_) = [] | genDataArguments ((var,nDims)::globals,count,kernelVar,errVar,errFn) = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.$=, CL.mkApply("clSetKernelArg", [CL.mkVar kernelVar, CL.mkPostOp(CL.mkVar count, CL.^++), CL.mkApply("sizeof",[CL.mkVar "cl_mem"]), CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffixData var))]))) :: errFn(concat["error in creating ",RN.addBufferSuffixData var, " argument\n"]) :: genDataArguments (globals,count,kernelVar,errVar,errFn) in globalArgument :: errorFn globalArgErr :: genDataArguments(globals, count, kernelVar, errVar,errorFn) end (* generates the globals buffers and arguments function *) fun genGlobalBuffersArgs (globals,imgGlobals) = let (* Delcare opencl setup objects *) val errVar = "err" val params = [ CL.PARAM([],CL.T_Named("cl_context"), "context"), CL.PARAM([],CL.T_Named("cl_kernel"), "kernel"), CL.PARAM([],CL.T_Named("cl_command_queue"), "cmdQ"), CL.PARAM([],CL.T_Named("int"), "argStart") ] val body = (case globals of [] => [CL.mkReturn(NONE)] | _ => let val clGlobalBuffers = getGlobalDataBuffers(globals, !imgGlobals, "context", errVar) val clGlobalArguments = genGlobalArguments(!imgGlobals, "argStart", "kernel", errVar) in (* Body: put all the statments together *) CL.mkDecl(clIntTy, errVar, SOME(CL.I_Exp(CL.mkInt 0))) :: clGlobalBuffers @ clGlobalArguments end (*end of case*)) in CL.D_Func([],CL.voidTy,RN.globalsSetupName,params,CL.mkBlock(body)) end (* generate the global image meta-data and data parameters *) fun genKeneralGlobalParams ((name,tyname)::[],line) = concat[line, "__global void *", RN.addBufferSuffixData name] | genKeneralGlobalParams ([],line) = line | genKeneralGlobalParams ((name,tyname)::rest, line) = genKeneralGlobalParams(rest, concat[line, "__global void *", RN.addBufferSuffixData name, ",\n"]) fun genUpdateMethod (Strand{name, tyName, state,...}, globals, imgGlobals) = let val imageDataStms = List.map (fn (x,_) => concat[ RN.globalImageDataName, ".", RN.imageDataName x, " = ", RN.addBufferSuffixData x, ";","\n" ]) (!imgGlobals) fun select ([], a, _) = a | select (_, _, b) = b val placeHolders = [ (RN.place_holders, tyName), (RN.p_addDatPtr, select (!imgGlobals, "", ",")), (RN.p_addGlobals, select (!globals, "", ",")), (RN.p_globals, select (!globals, "", "__global Diderot_Globals_t *diderotGlobals")), (RN.p_globalVar, select (!globals, "0", RN.globalsVarName)), (RN.p_dataVar, select (!globals, "0", RN.globalImageDataName)), (RN.p_dataPtr, genKeneralGlobalParams (!imgGlobals, "")), (RN.p_dataAssign, select (!imgGlobals, "", String.concat("Diderot_data_ptr_t diderotDataPtrs;\n" :: imageDataStms))) ] in CL.verbatim [CLUpdateFrag.text] placeHolders end fun genStrandCopy(Strand{tyName,name,state,...}) = let val params = [ CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"), CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut") ] val assignStms = List.rev( List.map (fn x => CL.mkAssign(lvalueSV(#var x), rvalueSV(#var x))) state) in CL.D_Func([""], CL.voidTy, RN.strandCopy, params,CL.mkBlock(assignStms)) end (* generate a global structure type definition from the list of globals *) fun genGlobalStruct (_, [], _) = CL.D_Comment(["No Global Definition"]) | genGlobalStruct (targetTy, globals, tyName) = let val globs = List.map (fn (x : mirror_var) => (targetTy x, #var x)) globals in CL.D_StructDef(globs, tyName) end (* generate a global structure type definition from the image data of the image globals *) fun genImageDataStruct ([], _) = CL.D_Comment(["No Image Data Ptrs Definition"]) | genImageDataStruct (imgGlobals, tyName) = let val globs = List.map (fn (x, _) => (globalPtr CL.voidTy, RN.imageDataName x)) imgGlobals in CL.D_StructDef(globs, tyName) end fun genGlobals (declFn, targetTy, globals) = let fun doVar (x : mirror_var) = declFn (CL.D_Var([], targetTy x, #var x, NONE)) in List.app doVar globals end fun genOutputFun(Strand{name, output,tyName, state, code,...}) = let (* the output function *) val outFnName = concat[name, "_Output"] val outFun = let val params = [ CL.PARAM([], CL.T_Ptr CL.voidTy, "outS"), CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self") ] (* the type and access expression for the strand's output variable *) val (outTy, outState) = (#1 output, CL.mkIndirect(CL.mkVar "self", #2 output)) val outState = CL.mkUnOp(CL.%&, outState) in CL.D_Func(["static"], CL.voidTy, outFnName, params, CL.mkCall("memcpy", [CL.mkVar "outS", outState, CL.mkSizeof(shadowTy outTy)] )) end in outFun end fun genStrandDesc (outFn,Strand{name, output,tyName, state, code,...}) = let (* the output function *) val outFnName = concat[name, "_Output"] (* the strand's descriptor object *) val descI = let fun fnPtr (ty, f) = CL.I_Exp(CL.mkCast(CL.T_Named ty, CL.mkVar f)) val (outTy, _) = output in CL.I_Struct[ ("name", CL.I_Exp(CL.mkStr name)), ("stateSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(RN.strandTy name)))), ("shadowStrandSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(RN.strandShadowTy (RN.strandTy name))))), (* FIXME: we may need to add a shadowOutputSzb field too for OpenCL *) ("outputSzb", CL.I_Exp(CL.mkSizeof(shadowTy outTy))), ("nrrdType", CL.I_Exp(CL.mkInt (NrrdTypes.toNrrdType outTy))), ("nrrdSzb", CL.I_Exp(CL.mkInt (NrrdTypes.toNrrdSize outTy))), ("update", fnPtr("update_method_t", "0")), ("strandCopy", fnPtr("convert_method_t", RN.strandConvertName name)), ("print", fnPtr("print_method_t", RN.strandPrintName name)), ("output", fnPtr("output_method_t", outFnName)) (* FIXME *) ] end val desc = CL.D_Var([], CL.T_Named N.strandDescTy, N.strandDesc name, SOME descI) in desc end (* generate the table of strand descriptors *) fun genStrandTable (declFn, strands) = let val nStrands = length strands fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.mkVar(N.strandDesc name))) fun genInits (_, []) = [] | genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss) in declFn (CL.D_Var([], CL.int32, N.numStrands, SOME(CL.I_Exp(CL.mkIntTy(IntInf.fromInt nStrands, CL.int32))))); declFn (CL.D_Var([], CL.T_Array(CL.T_Ptr(CL.T_Named N.strandDescTy), SOME nStrands), N.strands, SOME(CL.I_Array(genInits (0, strands))))) end fun genSrc (baseName, prog) = let val Prog{ name, double, globals, topDecls, strands, initially, imgGlobals, numDims,outFn, ... } = prog val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"} val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"} val clOutS = TextIO.openOut clFileName val cOutS = TextIO.openOut cFileName val clppStrm = PrintAsCL.new clOutS val cppStrm = PrintAsC.new cOutS val progName = name fun cppDecl dcl = PrintAsC.output(cppStrm, dcl) fun clppDecl dcl = PrintAsCL.output(clppStrm, dcl) val strands = AtomTable.listItems strands val [strand as Strand{name, tyName, code, init_code, ...}] = strands in (* Generate the OpenCL file *) (* Retrieve the header information *) clppDecl (CL.verbatim [HF.text] [ ("OUTFILE", clFileName), ("SRCFILE", OS.Path.joinBaseExt{base=baseName, ext=SOME "diderot"}), ("PRECISION", if double then "DOUBLE" else "SINGLE") ]); (* if there are no globals, then define a dummy type *) if List.null(!globals) then clppDecl (CL.D_Verbatim["typedef void ", RN.globalsTy, ";\n"]) else (); (* if there are no images, then define a dummy type *) if List.null(!imgGlobals) then clppDecl (CL.D_Verbatim["typedef void * ", RN.imageDataType, ";\n"]) else (); (* Retrieve the scheduler kernels and functions *) clppDecl (CL.D_Verbatim[SF.text]); clppDecl (CL.D_Verbatim[CLEigen2x2Frag.text]); clppDecl (CL.D_Verbatim[CLEigen3x3Frag.text]); clppDecl (genGlobalStruct (#gpuTy, !globals, RN.globalsTy)); clppDecl (genImageDataStruct(!imgGlobals, RN.imageDataType)); clppDecl (genStrandTyDef(#gpuTy, strand, tyName)); List.app clppDecl (!code); clppDecl (genStrandCopy strand); clppDecl (genUpdateMethod(strand, globals, imgGlobals)); (* Generate the Host C file *) cppDecl (CL.D_Verbatim[ if double then "#define DIDEROT_DOUBLE_PRECISION\n" else "#define DIDEROT_SINGLE_PRECISION\n", "#define DIDEROT_INT\n", "#define DIDEROT_TARGET_CL\n", "#include \"Diderot/diderot.h\"\n" ]); cppDecl (CL.D_Verbatim[ (case !globals of [] => concat["typedef void ", RN.globalsTy,";\n"] | _ => "" (*end of case*)) ]); cppDecl (CL.D_Var(["static"], CL.charPtr, "ProgramName", SOME(CL.I_Exp(CL.mkStr progName)))); cppDecl (genGlobalStruct (#hostTy, !globals, RN.globalsTy)); cppDecl (genGlobalStruct (#shadowTy, !globals, RN.shadowGlobalsTy)); (* FIXME: does this really need to be a global? *) cppDecl (CL.D_Var(["static"], globPtrTy, RN.globalsVarName, NONE)); cppDecl (genStrandTyDef (#hostTy, strand, tyName)); cppDecl (genStrandTyDef (#shadowTy, strand, RN.strandShadowTy tyName)); cppDecl (genConvertShadowTypes strand); cppDecl (!init_code); cppDecl (genStrandPrint strand); cppDecl (genOutputFun strand); List.app cppDecl (List.rev (!topDecls)); cppDecl (genGlobalBuffersArgs (!globals,imgGlobals)); List.app (fn strand => cppDecl (genStrandDesc (outFn,strand))) strands; genStrandTable (cppDecl, strands); cppDecl (!initially); PrintAsC.close cppStrm; PrintAsCL.close clppStrm; TextIO.closeOut cOutS; TextIO.closeOut clOutS end (* output the code to the filesystem. The string is the basename of the source file *) fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let fun condCons (true, x, xs) = x::xs | condCons (false, _, xs) = xs (* generate the C compiler flags *) val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude] val cflags = condCons (parallel, #pthread Paths.cflags, cflags) val cflags = if debug then #debug Paths.cflags :: cflags else #ndebug Paths.cflags :: cflags val cflags = #base Paths.cflags :: cflags (* generate the loader flags *) val extraLibs = condCons (parallel, #pthread Paths.extraLibs, []) val extraLibs = Paths.teemLinkFlags @ #base Paths.extraLibs :: extraLibs val extraLibs = #cl Paths.extraLibs :: extraLibs val rtLib = TargetUtil.runtimeName { target = TargetUtil.TARGET_CL, parallel = parallel, double = double, debug = debug } val ldOpts = rtLib :: extraLibs in genSrc (basename, prog); RunCC.compile (basename, cflags); RunCC.link (basename, ldOpts) end end (* Program *) (* strands *) structure Strand = struct fun define (Prog{strands, ...}, strandId, state) = let val name = Atom.toString strandId (* the output state variable *) val outputVar = (case List.filter IL.StateVar.isOutput state of [] => raise Fail("no output specified for strand " ^ name) | [x] => (IL.StateVar.ty x, IL.StateVar.name x) | _ => raise Fail("multiple outputs in " ^ name) (* end case *)) (* the state variables *) val state = let fun cvt x = Var.mirror (IL.StateVar.ty x, IL.StateVar.name x, STRAND_SHADOW) in List.map cvt state end val strand = Strand{ name = name, tyName = RN.strandTy name, state = state, output = outputVar, code = ref [], init_code = ref (CL.D_Comment(["no init code"])) } in AtomTable.insert strands (strandId, strand); strand end (* return the strand with the given name *) fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId (* register the strand-state initialization code. The variables are the strand * parameters. *) fun init (Strand{name, tyName, code, init_code, ...}, params, init) = let val fName = RN.strandInit name val params = clParam ("",CL.T_Ptr(CL.T_Named tyName), "selfOut") :: List.map (fn (ToCL.V(ty, x)) => CL.PARAM([], ty, x)) params val initFn = CL.D_Func([], CL.voidTy, fName, params, init) in init_code := initFn end (* register a strand method *) fun method (Strand{name, tyName, code,...}, methName, body) = let val params = [ globalParam (CL.T_Ptr(CL.T_Named tyName), "selfIn"), globalParam (CL.T_Ptr(CL.T_Named tyName), "selfOut"), globalParam (CL.T_Ptr(CL.T_Named (RN.globalsTy)), RN.globalsVarName), CL.PARAM([],CL.T_Named(RN.imageDataType),RN.globalImageDataName) ] val (fName,resTy) = (case methName of StrandUtil.Update => (RN.strandUpdate,CL.T_Named "StrandStatus_t") | StrandUtil.Stabilize => (name ^ StrandUtil.nameToString methName, CL.voidTy) (* end case *)) val methFn = CL.D_Func([], resTy, fName, params, body) in code := methFn :: !code end end end structure CUDABackEnd = CodeGenFn(CUDATarget)
Click to toggle
does not end with </html> tag
does not end with </body> tag
The output has ended thus: = methFn :: !code end end end structure CUDABackEnd = CodeGenFn(CUDATarget)