SCM Repository
View of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml
Parent Directory
|
Revision Log
Revision 1316 -
(download)
(annotate)
Sat Jun 11 22:45:44 2011 UTC (11 years ago) by lamonts
File size: 32022 byte(s)
Sat Jun 11 22:45:44 2011 UTC (11 years ago) by lamonts
File size: 32022 byte(s)
OpenCL runtime now compiling correctly and contains hookups
(* cl-target.sml * * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu) * All rights reserved. *) structure CLTarget : TARGET = struct structure IL = TreeIL structure V = IL.Var structure Ty = IL.Ty structure CL = CLang structure RN = RuntimeNames structure ToCL = TreeToCL structure N = CNames (* C variable translation *) structure TrCVar = struct type env = CL.typed_var TreeIL.Var.Map.map fun lookup (env, x) = (case V.Map.find (env, x) of SOME(CL.V(_, x')) => x' | NONE => raise Fail(concat["TrCVar.lookup(_, ", V.name x, ")"]) (* end case *)) (* translate a variable that occurs in an l-value context (i.e., as the target of an assignment) *) fun lvalueVar (env, x) = (case V.kind x of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x)) | IL.VK_State strand => CL.mkIndirect(CL.mkVar "selfOut", lookup(env, x)) | IL.VK_Local => CL.mkVar(lookup(env, x)) (* end case *)) (* translate a variable that occurs in an r-value context *) fun rvalueVar (env, x) = (case V.kind x of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x)) | IL.VK_State strand => CL.mkIndirect(CL.mkVar "selfIn", lookup(env, x)) | IL.VK_Local => CL.mkVar(lookup(env, x)) (* end case *)) end structure ToC = TreeToCFn (TrCVar) type var = CL.typed_var type exp = CL.exp type stm = CL.stm (* OpenCL specific types *) val clIntTy = CL.T_Named "cl_int" val clProgramTy = CL.T_Named "cl_program" val clKernelTy = CL.T_Named "cl_kernel" val clCmdQueueTy = CL.T_Named "cl_command_queue" val clContextTy = CL.T_Named "cl_context" val clDeviceIdTy = CL.T_Named "cl_device_id" val clPlatformIdTy = CL.T_Named "cl_platform_id" val clMemoryTy = CL.T_Named "cl_mem" val globPtrTy = CL.T_Ptr(CL.T_Named RN.globalsTy) (* variable or field that is mirrored between host and GPU *) type mirror_var = { hostTy : CL.ty, (* variable type on Host (i.e., C type) *) gpuTy : CL.ty, (* variable's type on GPU (i.e., OpenCL type) *) var : CL.var (* variable name *) } datatype strand = Strand of { name : string, tyName : string, state : mirror_var list ref, output : (Ty.ty * CL.var) option ref, (* the strand's output variable (only one for now) *) code : CL.decl list ref, init_code: CL.decl ref } datatype program = Prog of { name : string, (* stem of source file *) double : bool, (* true for double-precision support *) parallel : bool, (* true for multithreaded (or multi-GPU) target *) debug : bool, (* true for debug support in executable *) globals : mirror_var list ref, topDecls : CL.decl list ref, strands : strand AtomTable.hash_table, initially : CL.decl ref, numDims: int ref, imgGlobals: (string * int) list ref, prFn: CL.decl ref } datatype env = ENV of { info : env_info, vMap : var V.Map.map, scope : scope } and env_info = INFO of { prog : program } and scope = NoScope | GlobalScope | InitiallyScope | StrandScope of TreeIL.var list (* strand initialization *) | MethodScope of TreeIL.var list (* method body; vars are state variables *) (* the supprted widths of vectors of reals on the target. *) (* FIXME: for OpenCL 1.1, 3 is also valid *) fun vectorWidths () = [2, 4, 8, 16] (* tests for whether various expression forms can appear inline *) fun inlineCons n = (n < 2) (* vectors are inline, but not matrices *) val inlineMatrixExp = false (* can matrix-valued expressions appear inline? *) (* TreeIL to target translations *) structure Tr = struct fun fragment (ENV{info, vMap, scope}, blk) = let val (vMap, stms) = (case scope of GlobalScope => ToC.trFragment (vMap, blk) | InitiallyScope => ToC.trFragment (vMap, blk) | _ => ToCL.trFragment (vMap, blk) (* end case *)) in (ENV{info=info, vMap=vMap, scope=scope}, stms) end fun block (ENV{vMap, scope, ...}, blk) = let fun saveState cxt stateVars trAssign (env, args, stm) = ( ListPair.foldrEq (fn (x, e, stms) => trAssign(env, x, e)@stms) [stm] (stateVars, args) ) handle ListPair.UnequalLengths => ( print(concat["saveState ", cxt, ": length mismatch; ", Int.toString(List.length args), " args\n"]); raise Fail(concat["saveState ", cxt, ": length mismatch"])) in case scope (* NOTE: if we move strand initialization to the GPU, then we'll have to change the following code! *) of StrandScope stateVars => ToC.trBlock (vMap, saveState "StrandScope" stateVars ToC.trAssign, blk) | MethodScope stateVars => ToCL.trBlock (vMap, saveState "MethodScope" stateVars ToCL.trAssign, blk) | InitiallyScope => ToC.trBlock (vMap, fn (_, _, stm) => [stm], blk) | _ => ToC.trBlock (vMap, fn (_, _, stm) => [stm], blk) (* end case *) end fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e) end (* variables *) structure Var = struct fun name (ToCL.V(_, name)) = name fun global (Prog{globals, imgGlobals, ...}, name, ty) = let val x = {hostTy = ToC.trType ty, gpuTy = ToCL.trType ty, var = name} fun isImgGlobal (Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}), name) = imgGlobals := (name,dim) :: !imgGlobals | isImgGlobal _ = () in globals := x :: !globals; isImgGlobal (ty, name); ToCL.V(#gpuTy x, name) end fun param x = ToCL.V(ToCL.trType(V.ty x), V.name x) fun state (Strand{state, ...}, x) = let val ty = V.ty x val x' = {hostTy = ToC.trType ty, gpuTy = ToCL.trType ty, var = V.name x} in state := x' :: !state; ToCL.V(#gpuTy x', #var x') end end (* environments *) structure Env = struct (* create a new environment *) fun new prog = ENV{ info=INFO{prog = prog}, vMap = V.Map.empty, scope = NoScope } (* define the current translation context *) fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope} val scopeGlobal = setScope GlobalScope val scopeInitially = setScope InitiallyScope fun scopeStrand (env, svars) = setScope (StrandScope svars) env fun scopeMethod (env, svars) = setScope (MethodScope svars) env (* bind a TreeIL varaiable to a target variable *) fun bind (ENV{info, vMap, scope}, x, x') = ENV{ info = info, vMap = V.Map.insert(vMap, x, x'), scope = scope } end (* programs *) structure Program = struct fun new {name, double, parallel, debug} = ( RN.initTargetSpec double; CNames.initTargetSpec double; Prog{ name = name, double = double, parallel = parallel, debug = debug, globals = ref [], topDecls = ref [], strands = AtomTable.mkTable (16, Fail "strand table"), initially = ref(CL.D_Comment["missing initially"]), numDims = ref(0), imgGlobals = ref[], prFn = ref(CL.D_Comment(["No Print Function"])) }) (* register the global initialization part of a program *) (* FIXME: unused code; can this be removed?? fun globalIndirects (globals,stms) = let fun getGlobals ({name,target as TargetUtil.TARGET_CL}::rest) = CL.mkAssign(CL.mkIndirect(CL.mkVar RN.globalsVarName,name),CL.mkVar name) ::getGlobals rest | getGlobals [] = [] | getGlobals (_::rest) = getGlobals rest in stms @ getGlobals globals end *) (* register the code that is used to register command-line options for input variables *) fun inputs (Prog{topDecls, ...}, stm) = let val inputsFn = CL.D_Func( [], CL.voidTy, RN.registerOpts, [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")], stm) in topDecls := inputsFn :: !topDecls end (* register the global initialization part of a program *) fun init (Prog{topDecls, ...}, init) = let val globalsDecl = CL.mkAssign(CL.E_Var RN.globalsVarName, CL.mkApply("malloc", [CL.mkApply("sizeof",[CL.mkVar RN.globalsTy])])) val initGlobalsCall = CL.mkCall(RN.initGlobalsHelper,[]) val initFn = CL.D_Func( [], CL.voidTy, RN.initGlobals, [], CL.mkBlock([globalsDecl,initGlobalsCall])) val initFn_helper = CL.D_Func( [], CL.voidTy, RN.initGlobalsHelper, [], init) val shutdownFn = CL.D_Func( [], CL.voidTy, RN.shutdown, [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.worldTy), "wrld")], CL.S_Block[]) in topDecls := shutdownFn :: initFn :: initFn_helper :: !topDecls end (* create and register the initially function for a program *) fun initially { prog = Prog{name=progName, strands, initially, ...}, isArray : bool, iterPrefix : stm list, iters : (var * exp * exp) list, createPrefix : stm list, strand : Atom.atom, args : exp list } = let val name = Atom.toString strand val nDims = List.length iters val worldTy = CL.T_Ptr(CL.T_Named N.worldTy) fun mapi f xs = let fun mapf (_, []) = [] | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs) in mapf (0, xs) end val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters val sizeInit = mapi (fn (i, (CL.V(ty, _), lo, hi)) => (i, CL.I_Exp(CL.mkBinOp(CL.mkBinOp(hi, CL.#-, lo), CL.#+, CL.E_Int(1, ty)))) ) iters (* code to allocate the world and initial strands *) val wrld = "wrld" val allocCode = [ CL.mkComment["allocate initial block of strands"], CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)), CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)), CL.mkDecl(worldTy, wrld, SOME(CL.I_Exp(CL.E_Apply(N.allocInitially, [ CL.mkVar "ProgramName", CL.mkUnOp(CL.%&, CL.E_Var(N.strandDesc name)), CL.E_Bool isArray, CL.E_Int(IntInf.fromInt nDims, CL.int32), CL.E_Var "base", CL.E_Var "size" ])))) ] (* create the loop nest for the initially iterations val indexVar = "ix" val strandTy = CL.T_Ptr(CL.T_Named(N.strandTy name)) fun mkLoopNest [] = CL.mkBlock(createPrefix @ [ CL.mkDecl(strandTy, "sp", SOME(CL.I_Exp( CL.E_Cast(strandTy, CL.E_Apply(N.inState, [CL.E_Var "wrld", CL.E_Var indexVar]))))), CL.mkCall(N.strandInit name, CL.E_Var RN.globalsVarName :: CL.E_Var "sp" :: args), CL.mkAssign(CL.E_Var indexVar, CL.mkBinOp(CL.E_Var indexVar, CL.#+, CL.E_Int(1, CL.uint32))) ]) | mkLoopNest ((CL.V(ty, param), lo, hi)::iters) = let val body = mkLoopNest iters in CL.mkFor( [(ty, param, lo)], CL.mkBinOp(CL.E_Var param, CL.#<=, hi), [CL.mkPostOp(CL.E_Var param, CL.^++)], body) end val iterCode = [ CL.mkComment["initially"], CL.mkDecl(CL.uint32, indexVar, SOME(CL.I_Exp(CL.E_Int(0, CL.uint32)))), mkLoopNest iters ] *) val body = CL.mkBlock( iterPrefix @ allocCode @ [CL.mkReturn(SOME(CL.E_Var "wrld"))]) val initFn = CL.D_Func([], worldTy, N.initially, [], body) in initially := initFn end (***** OUTPUT *****) fun genStrandPrint (Strand{name, tyName, state, output, code,...}) = let (* the print function *) val prFnName = concat[name, "_print"] val prFn = let val params = [ CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"), CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self") ] val SOME(ty, x) = !output val outState = CL.mkIndirect(CL.mkVar "self", x) val prArgs = (case ty of Ty.IVecTy 1 => [CL.E_Str(!N.gIntFormat ^ "\n"), outState] | Ty.IVecTy d => let val fmt = CL.mkStr( String.concatWith " " (List.tabulate(d, fn _ => !N.gIntFormat)) ^ "\n") val args = List.tabulate (d, fn i => ToC.ivecIndex(outState, d, i)) in fmt :: args end | Ty.TensorTy[] => [CL.mkStr "%f\n", outState] | Ty.TensorTy[d] => let val fmt = CL.mkStr( String.concatWith " " (List.tabulate(d, fn _ => "%f")) ^ "\n") val args = List.tabulate (d, fn i => ToC.vecIndex(outState, d, i)) in fmt :: args end | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty) (* end case *)) in CL.D_Func(["static"], CL.voidTy, prFnName, params, CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs)) end in prFn end fun genStrandTyDef (targetTy, Strand{tyName, state,...}) = (* the type declaration for the strand's state struct *) CL.D_StructDef( List.rev (List.map (fn x => (targetTy x, #var x)) (!state)), tyName) (* generates the load kernel function *) (* generates the opencl buffers for the image data *) fun getGlobalDataBuffers (globals,contextVar,errVar) = let val globalBufferDecl = CL.mkDecl(clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE) val globalBuffer = CL.mkAssign(CL.mkVar(concat[RN.globalsVarName,"_cl"]), CL.mkApply("clCreateBuffer", [ CL.mkVar contextVar, CL.mkVar "CL_MEM_COPY_HOST_PTR", CL.mkApply("sizeof",[CL.mkVar RN.globalsTy]), CL.mkVar RN.globalsVarName, CL.mkUnOp(CL.%&,CL.mkVar errVar) ])) fun genDataBuffers ([],_,_) = [] | genDataBuffers ((var,nDims)::globals, contextVar, errVar) = let val hostVar = CL.mkIndirect(CL.mkVar RN.globalsVarName, var) (* FIXME: use CL constructors to build expressions (not strings) *) fun sizeExp i = CL.mkSubscript(CL.mkIndirect(hostVar, "size"), CL.mkInt i) val size = CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*, sizeExp 0) val size = if (nDims > 1) then CL.mkBinOp(size, CL.#*, sizeExp 1) else size val size = if (nDims > 2) then CL.mkBinOp(size, CL.#*, sizeExp 2) else size in CL.mkDecl(clMemoryTy, RN.addBufferSuffix var ,NONE):: CL.mkDecl(clMemoryTy, RN.addBufferSuffixData var ,NONE):: CL.mkAssign(CL.mkVar(RN.addBufferSuffix var), CL.mkApply("clCreateBuffer", [ CL.mkVar contextVar, CL.mkVar "CL_MEM_COPY_HOST_PTR", CL.mkApply("sizeof",[CL.mkVar (RN.imageTy nDims)]), hostVar, CL.mkUnOp(CL.%&,CL.mkVar errVar) ])) :: CL.mkAssign(CL.mkVar(RN.addBufferSuffixData var), CL.mkApply("clCreateBuffer", [ CL.mkVar contextVar, CL.mkVar "CL_MEM_COPY_HOST_PTR", size, CL.mkIndirect(hostVar, "data"), CL.mkUnOp(CL.%&,CL.mkVar errVar) ])) :: genDataBuffers(globals,contextVar,errVar) end in globalBufferDecl :: globalBuffer :: genDataBuffers(globals,contextVar,errVar) end (* generates the kernel arguments for the image data *) fun genGlobalArguments (globals, count, kernelVar, errVar) = let val globalArgument = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=, CL.mkApply("clSetKernelArg", [CL.mkVar kernelVar, CL.mkPostOp(CL.E_Var count, CL.^++), CL.mkApply("sizeof",[CL.mkVar "cl_mem"]), CL.mkUnOp(CL.%&,CL.mkVar(concat[RN.globalsVarName,"_cl"]))]))) fun genDataArguments ([],_,_,_) = [] | genDataArguments ((var,nDims)::globals,count,kernelVar,errVar) = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=, CL.mkApply("clSetKernelArg", [CL.mkVar kernelVar, CL.mkPostOp(CL.E_Var count, CL.^++), CL.mkApply("sizeof",[CL.mkVar "cl_mem"]), CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffix var))]))) :: CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=, CL.mkApply("clSetKernelArg", [CL.mkVar kernelVar, CL.mkPostOp(CL.E_Var count, CL.^++), CL.mkApply("sizeof",[CL.mkVar "cl_mem"]), CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffixData var))]))) :: genDataArguments (globals,count,kernelVar,errVar) in globalArgument :: genDataArguments(globals, count, kernelVar, errVar) end (* generates the globals buffers and arguments function *) fun genGlobalBuffersArgs (imgGlobals) = let (* Delcare opencl setup objects *) val errVar = "err" val imgDataSizeVar = "image_dataSize" val params = [ CL.PARAM([],CL.T_Named("cl_context"), "context"), CL.PARAM([],CL.T_Named("cl_kernel"), "kernel"), CL.PARAM([],CL.T_Named("int"), "argStart") ] val clGlobalBuffers = getGlobalDataBuffers(!imgGlobals, "context", errVar) val clGlobalArguments = genGlobalArguments(!imgGlobals, "argStart", "kernel", errVar) (* Body put all the statments together *) val body = CL.mkDecl(clIntTy, errVar, SOME(CL.I_Exp(CL.mkInt 0))) :: clGlobalBuffers @ clGlobalArguments in CL.D_Func([],CL.voidTy,RN.globalsSetupName,params,CL.mkBlock(body)) end (* generate the data and global parameters *) fun genKeneralGlobalParams ((name,tyname)::rest) = CL.PARAM([], CL.T_Ptr(CL.T_Named RN.globalsTy), concat[RN.globalsVarName]) :: CL.PARAM([], CL.T_Ptr(CL.T_Named (RN.imageTy tyname)),RN.addBufferSuffix name) :: CL.PARAM([], CL.T_Ptr(CL.voidTy),RN.addBufferSuffixData name) :: genKeneralGlobalParams rest | genKeneralGlobalParams [] = [] (*generate code for intilizing kernel global data *) (* FIXME: should use List.map here *) fun initGlobalImages ((name, tyname)::rest) = CL.mkAssign(CL.mkIndirect(CL.E_Var RN.globalsVarName, name), CL.mkVar (RN.addBufferSuffix name)) :: CL.mkAssign(CL.mkIndirect(CL.E_Var RN.globalsVarName,concat[name,"->","data"]),CL.mkVar (RN.addBufferSuffixData name)) :: initGlobalImages rest | initGlobalImages [] = [] (* generate the main kernel function for the .cl file *) fun genKernelFun (strand, nDims, globals, imgGlobals) = let val Strand{name, tyName, state, output, code,...} = strand val fName = RN.kernelFuncName; val inState = "strand_in" val outState = "strand_out" val params = [ CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"), CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut"), CL.PARAM(["__global"], CL.intTy, "width") ] @ genKeneralGlobalParams(!imgGlobals) val thread_ids = if nDims = 1 then [ CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.mkInt 0))), CL.mkAssign(CL.mkVar "x",CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 0])) ] else [ CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.mkInt 0))), CL.mkDecl(CL.intTy, "y", SOME(CL.I_Exp(CL.mkInt 0))), CL.mkAssign(CL.mkVar "x", CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 0])), CL.mkAssign(CL.mkVar "y",CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 1])) ] val strandDecl = [ CL.mkDecl(CL.T_Named tyName, inState, NONE), CL.mkDecl(CL.T_Named tyName, outState,NONE)] val strandObjects = if nDims = 1 then [ CL.mkAssign( CL.mkVar inState, CL.mkSubscript(CL.mkVar "selfIn", CL.mkStr "x")), CL.mkAssign(CL.mkVar outState,CL.mkSubscript(CL.mkVar "selfOut", CL.mkStr "x")) ] else let val index = CL.mkBinOp(CL.mkBinOp(CL.mkVar "x",CL.#*,CL.mkVar "width"),CL.#+,CL.mkVar "y") in [ CL.mkAssign(CL.mkVar inState, CL.mkSubscript(CL.mkVar "selfIn",index)), CL.mkAssign(CL.mkVar outState,CL.mkSubscript(CL.mkVar "selfOut",index)) ] end val status = CL.mkDecl(CL.intTy, "status", SOME(CL.I_Exp(CL.mkInt 0))) val strand_Init_Stm = CL.mkCall(RN.strandInit name, [CL.E_Var RN.globalsVarName,CL.mkUnOp(CL.%&,CL.E_Var inState), CL.E_Var "x", CL.E_Var "y"]) val local_vars = thread_ids @ initGlobalImages(!imgGlobals) @ strandDecl @ strandObjects @ [strand_Init_Stm,status] val while_exp = CL.mkBinOp( CL.mkBinOp(CL.mkVar "status",CL.#!=, CL.mkVar RN.kStabilize), CL.#||, CL.mkBinOp(CL.mkVar "status", CL.#!=, CL.mkVar RN.kDie)) val whileBody = CL.mkBlock [ CL.mkAssign(CL.mkVar "status", CL.mkApply(RN.strandUpdate name, [CL.mkUnOp(CL.%&,CL.mkVar inState), CL.mkUnOp(CL.%&,CL.mkVar outState),CL.E_Var RN.globalsVarName])), CL.mkCall(RN.strandStabilize name, [CL.mkUnOp(CL.%&,CL.mkVar inState), CL.mkUnOp(CL.%&,CL.mkVar outState),CL.E_Var RN.globalsVarName]) ] val whileBlock = [CL.mkWhile(while_exp, whileBody)] val body = CL.mkBlock(local_vars @ whileBlock) in CL.D_Func(["__kernel"], CL.voidTy, fName, params, body) end (* generate a global structure from the globals *) fun genGlobalStruct (targetTy, globals) = let val globs = List.map (fn (x : mirror_var) => (targetTy x, #var x)) globals in CL.D_StructDef(globs, RN.globalsTy) end fun genGlobals (declFn, targetTy, globals) = let fun doVar (x : mirror_var) = declFn (CL.D_Var([], targetTy x, #var x, NONE)) in List.app doVar globals end fun genStrandDesc (Strand{name, output, ...}) = let (* the strand's descriptor object *) val descI = let fun fnPtr (ty, f) = CL.I_Exp(CL.mkCast(CL.T_Named ty, CL.mkVar f)) val SOME(outTy, _) = !output in CL.I_Struct[ ("name", CL.I_Exp(CL.mkStr name)), ("stateSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(N.strandTy name)))), (* ("outputSzb", CL.I_Exp(CL.mkSizeof(ToC.trTy outTy))), *) ("update", fnPtr("update_method_t", "0")), ("print", fnPtr("print_method_t", name ^ "_print")) ] end val desc = CL.D_Var([], CL.T_Named N.strandDescTy, N.strandDesc name, SOME descI) in desc end (* generate the table of strand descriptors *) fun genStrandTable (declFn, strands) = let val nStrands = length strands fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.E_Var(N.strandDesc name))) fun genInits (_, []) = [] | genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss) in declFn (CL.D_Var([], CL.int32, N.numStrands, SOME(CL.I_Exp(CL.E_Int(IntInf.fromInt nStrands, CL.int32))))); declFn (CL.D_Var([], CL.T_Array(CL.T_Ptr(CL.T_Named N.strandDescTy), SOME nStrands), N.strands, SOME(CL.I_Array(genInits (0, strands))))) end fun genSrc (baseName, prog) = let val Prog{name,double, globals, topDecls, strands, initially, imgGlobals, numDims, ...} = prog val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"} val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"} val clOutS = TextIO.openOut clFileName val cOutS = TextIO.openOut cFileName val clppStrm = PrintAsCL.new clOutS val cppStrm = PrintAsC.new cOutS val progName = name fun cppDecl dcl = PrintAsC.output(cppStrm, dcl) fun clppDecl dcl = PrintAsCL.output(clppStrm, dcl) val strands = AtomTable.listItems strands val [strand as Strand{name, tyName, code, init_code, ...}] = strands in (* Generate the OpenCl file *) clppDecl (CL.D_Verbatim([ if double then "#define DIDEROT_DOUBLE_PRECISION" else "#define DIDEROT_SINGLE_PRECISION", "#define DIDEROT_TARGET_CL", "#include \"Diderot/cl-diderot.h\"" ])); clppDecl (genGlobalStruct (#gpuTy, !globals)); clppDecl (genStrandTyDef(#gpuTy, strand)); clppDecl (!init_code); List.app clppDecl (!code); clppDecl (genKernelFun (strand, !numDims, globals, imgGlobals)); (* Generate the Host C file *) cppDecl (CL.D_Verbatim([ if double then "#define DIDEROT_DOUBLE_PRECISION" else "#define DIDEROT_SINGLE_PRECISION", "#define DIDEROT_TARGET_CL", "#include \"Diderot/diderot.h\"" ])); cppDecl (CL.D_Var(["static"], CL.charPtr, "ProgramName", SOME(CL.I_Exp(CL.mkStr progName)))); cppDecl (genGlobalStruct (#hostTy, !globals)); cppDecl (CL.D_Var(["static"], CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName, NONE)); cppDecl (genStrandTyDef (#hostTy, strand)); cppDecl (genStrandPrint strand); List.app cppDecl (List.rev (!topDecls)); cppDecl (genGlobalBuffersArgs imgGlobals); List.app (fn strand => cppDecl (genStrandDesc strand)) strands; genStrandTable (cppDecl, strands); cppDecl (!initially); PrintAsC.close cppStrm; PrintAsCL.close clppStrm; TextIO.closeOut cOutS; TextIO.closeOut clOutS end (* output the code to a file. The string is the basename of the file, the extension * is provided by the target. *) fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let fun condCons (true, x, xs) = x::xs | condCons (false, _, xs) = xs (* generate the C compiler flags *) val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude] val cflags = condCons (parallel, #pthread Paths.cflags, cflags) val cflags = if debug then #debug Paths.cflags :: cflags else #ndebug Paths.cflags :: cflags val cflags = #base Paths.cflags :: cflags (* generate the loader flags *) val extraLibs = condCons (parallel, #pthread Paths.extraLibs, []) val extraLibs = Paths.teemLinkFlags @ #base Paths.extraLibs :: extraLibs val extraLibs = #cl Paths.extraLibs :: extraLibs val rtLib = TargetUtil.runtimeName { target = TargetUtil.TARGET_CL, parallel = parallel, double = double, debug = debug } val ldOpts = rtLib :: extraLibs in genSrc (basename, prog); RunCC.compile (basename, cflags); RunCC.link (basename, ldOpts) end end (* strands *) structure Strand = struct fun define (Prog{strands, ...}, strandId) = let val name = Atom.toString strandId val strand = Strand{ name = name, tyName = RN.strandTy name, state = ref [], output = ref NONE, code = ref [], init_code = ref (CL.D_Comment(["no init code"])) } in AtomTable.insert strands (strandId, strand); strand end (* return the strand with the given name *) fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId (* register the strand-state initialization code. The variables are the strand * parameters. *) fun init (Strand{name, tyName, code, init_code, ...}, params, init) = let val fName = RN.strandInit name val params = CL.PARAM([], globPtrTy, RN.globalsVarName) :: CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut") :: List.map (fn (ToCL.V(ty, x)) => CL.PARAM([], ty, x)) params val initFn = CL.D_Func([], CL.voidTy, fName, params, init) in init_code := initFn end (* register a strand method *) fun method (Strand{name, tyName, code,...}, methName, body) = let val fName = concat[name, "_", methName] val params = [ CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfIn"), CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut"), CL.PARAM([], CL.T_Ptr(CL.T_Named (RN.globalsTy)), RN.globalsVarName) ] val methFn = CL.D_Func([], CL.int32, fName, params, body) in code := methFn :: !code end fun output (Strand{output, ...}, ty, ToCL.V(_, x)) = output := SOME(ty, x) end end structure CLBackEnd = CodeGenFn(CLTarget)
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |