Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/lamont_dev/src/compiler/cl-target/cl-target.sml
ViewVC logotype

View of /branches/lamont_dev/src/compiler/cl-target/cl-target.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2039 - (download) (annotate)
Wed Oct 17 16:10:37 2012 UTC (7 years ago) by lamonts
File size: 41489 byte(s)
Added Query Code to tree-to-cl
(* cl-target.sml
 *
 * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
 * All rights reserved.
 *)

structure CLTarget : TARGET =
  struct

    structure IL = TreeIL
    structure V = IL.Var
    structure Ty = IL.Ty
    structure CL = CLang
    structure RN = RuntimeNames
    structure ToCL = TreeToCL
    structure N = CNames
    structure P = Paths 
    structure HF = CLHeaderFrag
    structure SF = CLSchedFrag
    structure SF = CLSpatialFrag


  (* revmap f l == List.rev(List.map f l) *)
    fun revmap f = let
          fun rmap ([], l) = l
            | rmap (x::r, l) = rmap (r, f x :: l)
          in
            fn l => rmap (l, [])
          end

  (* common arithmetic *)
    fun #+# (a, b) = CL.mkBinOp(a, CL.#+, b)
    fun #*# (a, b) = CL.mkBinOp(a, CL.#*, b)
    infix 5 #+#
    infix 6 #*#

  (* translate TreeIL types to shadow types *)
    fun shadowTy ty = (case ty
           of Ty.BoolTy => CL.T_Named "cl_uint"
            | Ty.IntTy => CL.T_Named(RN.shadowIntTy ())
            | Ty.TensorTy[] => CL.T_Named(RN.shadowRealTy ())
            | Ty.TensorTy[n] => CL.T_Named(RN.shadowVecTy n)
            | Ty.TensorTy[n, m] => CL.T_Named(RN.shadowMatTy(n,m))
            | Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}) => CL.T_Named(RN.shadowImageTy dim)
            | _ => raise Fail(concat["TreeToC.trType(", Ty.toString ty, ")"])
          (* end case *))

(* FIXME: add comments that more clearly explain the difference between convertToShadow and
 * convertStrandToShadow
 *)
   (* translate TreeIL types to shadow types *)
    fun convertToShadow (ty, name) = (case ty
           of Ty.IntTy => CL.mkAssign(
                CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name),
                CL.mkIndirect(CL.mkVar RN.globalsVarName, name))
            | Ty.TensorTy[n]=> CL.mkCall(RN.convertToShadowVec n, [
                  CL.mkUnOp(CL.%&, CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name)), 
                  CL.mkIndirect(CL.mkVar RN.globalsVarName, name)
                ]) 
            | Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}) => CL.mkCall(RN.shadowImageFunc dim, [
                  CL.mkVar "context", 
                  CL.mkUnOp(CL.%&, CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name)), 
                  CL.mkIndirect(CL.mkVar RN.globalsVarName, name)
                ])
            | Ty.TensorTy[n, m] => CL.mkCall(RN.convertToShadowMat(m,n), [
                  CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name), 
                  CL.mkIndirect(CL.mkVar RN.globalsVarName, name)
                ]) 
            | _ => CL.mkAssign(
                CL.mkSelect(CL.mkVar RN.shadowGlaobalsName,name),
                CL.mkIndirect(CL.mkVar RN.globalsVarName, name))
          (* end case *))

   (* generate code to convert strand TreeIL types to shadow types *)
    fun convertStrandToShadow (ty, name, selfIn, selfOut) = (case ty
           of Ty.IntTy => CL.mkAssign(
                CL.mkIndirect(CL.mkVar selfIn, name),
                CL.mkIndirect(CL.mkVar selfOut, name))
            | Ty.TensorTy[n]=> CL.mkCall(RN.convertToShadowVec n, [
                  CL.mkUnOp(CL.%&, CL.mkIndirect(CL.mkVar selfIn, name)), 
                  CL.mkIndirect(CL.mkVar selfOut, name)
                ]) 
            | Ty.TensorTy[n, m] => CL.mkCall(RN.convertToShadowMat(m,n), [
                  CL.mkUnOp(CL.%&, CL.mkIndirect(CL.mkVar selfIn, name)), 
                  CL.mkIndirect(CL.mkVar selfOut, name)
                ]) 
            | _ => CL.mkAssign(
                CL.mkIndirect(CL.mkVar selfIn, name),
                CL.mkIndirect(CL.mkVar selfOut, name))
          (* end case *))

  (* helper functions for specifying parameters in various address spaces *)
    fun clParam (spc, ty, x) = CL.PARAM([spc], ty, x)
    fun globalParam (ty, x) = CL.PARAM(["__global"], ty, x)
    fun constantParam (ty, x) = CL.PARAM(["__constant"], ty, x)
    fun localParam (ty, x) = CL.PARAM(["__local"], ty, x)
    fun privateParam (ty, x) = CL.PARAM(["__private"], ty, x)

  (* OpenCL global pointer type *)
    fun globalPtr ty = CL.T_Qual("__global", CL.T_Ptr ty)

  (* lvalue/rvalue state variable *)
    fun lvalueSV name = CL.mkIndirect(CL.mkVar "selfOut", name)
    fun rvalueSV name = CL.mkIndirect(CL.mkVar "selfIn", name)

  (* C variable translation *)
    structure TrCVar =
      struct
        type env = CL.typed_var TreeIL.Var.Map.map
        fun lookup (env, x) = (case V.Map.find (env, x)
               of SOME(CL.V(_, x')) => x'
                | NONE => raise Fail(concat["TrCVar.lookup(_, ", V.name x, ")"])
              (* end case *))
      (* translate a variable that occurs in an l-value context (i.e., as the target of an assignment) *)
        fun lvalueVar (env, x) = (case V.kind x
               of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
                | IL.VK_Local => CL.mkVar(lookup(env, x))
              (* end case *))
      (* translate a variable that occurs in an r-value context *)
        fun rvalueVar (env, x) = (case V.kind x
               of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
                | IL.VK_Local => CL.mkVar(lookup(env, x))
              (* end case *))
      (* translate a strand state variable that occurs in an l-value context *)
	fun lvalueStateVar (IL.SV{name, ...}) = lvalueSV name
      (* translate a strand state variable that occurs in an r-value context *)
	fun rvalueStateVar (IL.SV{name, ...}) = rvalueSV name
      end

    structure ToC = TreeToCFn (TrCVar)

    type var = CL.typed_var
    type exp = CL.exp
    type stm = CL.stm

  (* OpenCL specific types *)
    val clIntTy = CL.T_Named "cl_int"
    val clProgramTy = CL.T_Named "cl_program" 
    val clKernelTy  = CL.T_Named "cl_kernel"
    val clCmdQueueTy = CL.T_Named "cl_command_queue" 
    val clContextTy = CL.T_Named "cl_context" 
    val clDeviceIdTy = CL.T_Named "cl_device_id"
    val clPlatformIdTy = CL.T_Named "cl_platform_id" 
    val clMemoryTy = CL.T_Named "cl_mem"
    val globPtrTy = CL.T_Ptr(CL.T_Named RN.globalsTy)
(* FIXME: what are these for? *)
    datatype shadow_env = STRAND_SHADOW | GLOBAL_SHADOW

  (* variable or field that is mirrored between host and GPU *)
    type mirror_var = {
(* FIXME: perhaps it would be cleaner to just track the TreeIL type of the variable? *)
            hostTy : CL.ty,             (* variable type on Host (i.e., C type) *)
            shadowTy : CL.ty,           (* host-side shadow type of GPU type *)
            gpuTy : CL.ty,              (* variable's type on GPU (i.e., OpenCL type) *)
            hToS: stm,                  (* the statement that converts the variable to its *)
                                        (* shadow representation *)
            var : CL.var                (* variable name *)
          }

    datatype strand = Strand of {
        name : string,
        tyName : string,
        state : mirror_var list,
        output : (Ty.ty * CL.var),	(* the strand's output variable (only one for now) *)
        code : CL.decl list ref,
        init_code: CL.decl ref
      }

    datatype program = Prog of {
        name : string,                  (* stem of source file *)
        double : bool,                  (* true for double-precision support *)
        parallel : bool,                (* true for multithreaded (or multi-GPU) target *)
        debug : bool,                   (* true for debug support in executable *)
        globals : mirror_var list ref, 
        topDecls : CL.decl list ref,
        strands : strand AtomTable.hash_table,
        initially :  CL.decl ref,
        numDims: int ref,               (* number of dimensions in initially iteration *)
        imgGlobals: (string * int) list ref,
        prFn: CL.decl ref,
        outFn: CL.decl ref 
      }

    datatype env = ENV of {
        info : env_info,
        vMap : var V.Map.map,
        scope : scope
      }

    and env_info = INFO of {
        prog : program
      }

    and scope
      = NoScope
      | GlobalScope
      | InitiallyScope
      | StrandScope			(* strand initialization *)
      | MethodScope of StrandUtil.method_name	(* method body; vars are state variables *)

  (* the supprted widths of vectors of reals on the target. *)
(* FIXME: for OpenCL 1.1, 3 is also valid *)
    fun vectorWidths () = [2, 4, 8, 16]

  (* we do not support printing on the OpenCL target *)
    val supportsPrinting = false

  (* tests for whether various expression forms can appear inline *)
    fun inlineCons n = (n < 2)          (* vectors are inline, but not matrices *)
    val inlineMatrixExp = false         (* can matrix-valued expressions appear inline? *)

  (* TreeIL to target translations *)
    structure Tr =
      struct
        fun fragment (ENV{info, vMap, scope}, blk) = let
              val (vMap, stms) = (case scope
                     of GlobalScope => ToC.trFragment (vMap, blk)
                      | InitiallyScope => ToC.trFragment (vMap, blk)
                      | _ => ToCL.trFragment (vMap, blk)
                    (* end case *))
              in
                (ENV{info=info, vMap=vMap, scope=scope}, stms)
              end
        fun block (ENV{vMap, scope, ...}, blk) = (case scope
	       of StrandScope => ToC.trBlock (vMap, blk)
		| MethodScope name => ToCL.trBlock (vMap, blk)
		| InitiallyScope => ToCL.trBlock (vMap, blk)
		| _ => ToC.trBlock (vMap, blk)
	      (* end case *))
        fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e)
      end

  (* variables *)
    structure Var =
      struct
        fun mirror (ty, name, shadowEnv) = {
                hostTy = ToC.trType ty,
                shadowTy = shadowTy ty,
                gpuTy = ToCL.trType ty,
                hToS = case shadowEnv
		   of GLOBAL_SHADOW => convertToShadow (ty, name)
		    | STRAND_SHADOW => convertStrandToShadow(ty, name, "selfIn", "selfOut")
		  (* end case *),
                var = name
              }
        fun name (ToCL.V(_, name)) = name
        fun global (Prog{globals, imgGlobals, ...}, name, ty) = let
              val x = mirror (ty, name, GLOBAL_SHADOW)
              fun isImgGlobal (Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}), name) = 
                    imgGlobals  := (name,dim) :: !imgGlobals 
                | isImgGlobal _ =  () 
              in
                globals := x :: !globals;
                isImgGlobal (ty, name); 
                ToCL.V(#gpuTy x, name)
              end
        fun param x = ToCL.V(ToCL.trType(V.ty x), V.name x)
      end

  (* environments *)
    structure Env =
      struct
      (* create a new environment *)
        fun new prog = ENV{
                info=INFO{prog = prog},
                vMap = V.Map.empty,
                scope = NoScope
              }
      (* define the current translation context *)
        fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope}
        val scopeGlobal = setScope GlobalScope
        val scopeInitially = setScope InitiallyScope
        fun scopeStrand env = setScope StrandScope env
        fun scopeMethod (env, name) = setScope (MethodScope name) env
      (* bind a TreeIL varaiable to a target variable *)
        fun bind (ENV{info, vMap, scope}, x, x') = ENV{
                info = info,
                vMap = V.Map.insert(vMap, x, x'),
                scope = scope
              }
      end

  (* programs *)
    structure Program =
      struct
        fun new {name, double, parallel, debug} = (
              RN.initTargetSpec double;
              CNames.initTargetSpec {double=double, long=false};
              Prog{
                  name = name,
                  double = double, parallel = parallel, debug = debug,
                  globals = ref [],
                  topDecls = ref [],
                  strands = AtomTable.mkTable (16, Fail "strand table"),
                  initially = ref(CL.D_Comment["missing initially"]),
                  numDims = ref 0, 
                  imgGlobals = ref[], 
                  prFn = ref(CL.D_Comment(["No Print Function"])), 
                  outFn = ref(CL.D_Comment(["No Output Function"]))
                })

      (* register the code that is used to register command-line options for input variables *)
        fun inputs (Prog{topDecls, ...}, stm) = let
              val inputsFn = CL.D_Func(
                    [], CL.voidTy, RN.registerOpts,
                    [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")],
                    stm)
              in
                topDecls := inputsFn :: !topDecls
              end
                
      (* register the global initialization part of a program *)
        fun init (Prog{topDecls, ...}, init) = let
              val globalsDecl = CL.mkAssign(CL.mkVar RN.globalsVarName,
                    CL.mkApply("malloc", [CL.mkSizeof(CL.T_Named RN.globalsTy)]))
              val initFn = CL.D_Func(
                    [], CL.voidTy, RN.initGlobals, [],
                    CL.mkBlock[
                        globalsDecl,
                        CL.mkCall(RN.initGlobalsHelper, [CL.mkVar RN.globalsVarName])
                      ])
              val initHelperFn = CL.D_Func(
                    [], CL.voidTy, RN.initGlobalsHelper,
                    [CL.PARAM([], globPtrTy, RN.globalsVarName)],
                    init)
              val shutdownFn = CL.D_Func(
                    [], CL.voidTy, RN.shutdown,
                    [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.worldTy), "wrld")],
                    CL.S_Block[])
              in
                topDecls := shutdownFn :: initFn :: initHelperFn :: !topDecls
              end

      (* create and register the initially function for a program *)
        fun initially {
              prog = Prog{name=progName, strands, initially,numDims, ...},
              isArray : bool,
              iterPrefix : stm list,
              iters : (var * exp * exp) list,
              createPrefix : stm list,
              strand : Atom.atom,
              args : exp list
            } = let
              val name = Atom.toString strand
              val nDims = List.length iters
              val worldTy = CL.T_Ptr(CL.T_Named N.worldTy)
              fun mapi f xs = let
                    fun mapf (_, []) = []
                      | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs)
                    in
                      mapf (0, xs)
                    end
              val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters
              val sizeInit = mapi
                    (fn (i, (CL.V(ty, _), lo, hi)) =>
                        (i, CL.I_Exp(CL.mkBinOp(hi, CL.#-, lo) #+# CL.mkIntTy(1, ty)))
                    ) iters
            (* code to allocate the world and initial strands *)
              val wrld = "wrld"
              val allocCode = [
                      CL.mkComment["allocate initial block of strands"],
                      CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)),
                      CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)),
                      CL.mkDecl(worldTy, wrld,
                        SOME(CL.I_Exp(CL.mkApply(N.allocInitially, [
                            CL.mkVar "ProgramName",
                            CL.mkUnOp(CL.%&, CL.mkVar(N.strandDesc name)),
                            CL.mkBool isArray,
                            CL.mkIntTy(IntInf.fromInt nDims, CL.int32),
                            CL.mkVar "base",
                            CL.mkVar "size"
                          ]))))
                    ]
            (* create the loop nest for the initially iterations *)
              val indexVar = "ix"
              val strandTy = CL.T_Ptr(CL.T_Named(N.strandTy name))
              fun mkLoopNest [] = CL.mkBlock(createPrefix @ [
                      CL.mkDecl(strandTy, "sp",
                        SOME(CL.I_Exp(
                          CL.mkCast(strandTy,
                          CL.mkApply(N.inState, [CL.mkVar "wrld", CL.mkVar indexVar]))))),
                      CL.mkCall(N.strandInit name, CL.mkVar "sp" :: args),
                      CL.mkAssign(CL.mkVar indexVar, CL.mkVar indexVar #+# CL.mkIntTy(1, CL.uint32))
                    ])
                | mkLoopNest ((CL.V(ty, param), lo, hi)::iters) = let
                    val body = mkLoopNest iters
                    in
                      CL.mkFor(
                        [(ty, param, lo)],
                        CL.mkBinOp(CL.mkVar param, CL.#<=, hi),
                        [CL.mkPostOp(CL.mkVar param, CL.^++)],
                        body)
                    end
              val iterCode = [
                      CL.mkComment["initially"],
                      CL.mkDecl(CL.uint32, indexVar, SOME(CL.I_Exp(CL.mkIntTy(0, CL.uint32)))),
                      mkLoopNest iters
                    ]
              val body = CL.mkBlock(
                    iterPrefix @
                    allocCode @
                    iterCode @
                    [CL.mkReturn(SOME(CL.mkVar "wrld"))])
              val initFn = CL.D_Func([], worldTy, N.initially, [], body)
              in
                numDims := nDims;
                initially := initFn
              end


      (***** OUTPUT *****)

        fun genStrandPrint (Strand{name, tyName, state, output, code, ...}) = let
            (* the print function *)
              val prFnName = concat[name, "Print"]
              val prFn = let
                    val params = [
                          CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"),
                          CL.PARAM([], CL.T_Ptr(CL.T_Named(RN.strandShadowTy tyName)), "self")
                        ]
                    val (ty, x) = output
                    val outState = CL.mkIndirect(CL.mkVar "self", x)
                    val prArgs = (case ty
                           of Ty.IntTy => [CL.mkStr(!N.gIntFormat ^ "\n"), outState]
                            | Ty.SeqTy(Ty.IntTy, d) => let
				fun sel i = CL.mkApply(
					  "VSUB",
					  [outState, CL.mkInt(IntInf.fromInt i)])
                                val fmt = CL.mkStr(
                                      String.concatWith " " (List.tabulate(d, fn _ => !N.gIntFormat))
                                      ^ "\n")
                                val args = List.tabulate (d, sel)
                                in
                                  fmt :: args
                                end
                            | Ty.TensorTy[] => [CL.mkStr "%f\n", outState]
                            | Ty.TensorTy[d] => let
				fun sel i = CL.mkApply(
					"VSUB",
					[outState, CL.mkInt(IntInf.fromInt i)])
                                val fmt = CL.mkStr(
                                      String.concatWith " " (List.tabulate(d, fn _ => "%f"))
                                      ^ "\n")
                                val args = List.tabulate (d, sel)
                                in
                                  fmt :: args
                                end
                            | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty)
                          (* end case *))
                    in
                      CL.D_Func(["static"], CL.voidTy, prFnName, params,
                        CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs))
                    end
              in
                prFn
              end 
 
        fun genStrandTyDef(targetTy,Strand{state,...},tyName) =  
		(case state
		   of [] => CL.D_Comment(["No Strand Defintiion Included"])  
		    | _ => CL.D_StructDef(revmap (fn x => (targetTy x, #var x)) state,
                			tyName)
			 	 (* end case *))



     (* generates the globals buffers and arguments function *) 
        fun genConvertShadowTypes (Strand{name, tyName, state,...}) = let
            (* Delcare opencl setup objects *) 
              val errVar = "err"
              val params = [
                      CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut"), 
                      CL.PARAM([], CL.T_Ptr(CL.T_Named(RN.strandShadowTy tyName)), "selfIn")
                    ]
              val body = List.map (fn (x:mirror_var) => #hToS x) state
              in
                CL.D_Func([], CL.voidTy, RN.strandConvertName name, params, CL.mkBlock body)
              end

      (* generates the opencl buffers for the image data *) 
        fun getGlobalDataBuffers (globals, imgGlobals, contextVar, errVar) = let 
              val globalBuffErr = "error creating OpenCL global buffer\n"  
              fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.mkVar errVar, CL.#!=, CL.mkVar "CL_SUCCESS"), 
                    CL.mkBlock([CL.mkCall("fprintf",[CL.mkVar "stderr", CL.mkStr msg]), 
                    CL.mkCall("exit",[CL.mkInt 1])]))
              val shadowTypeDecl =
		    CL.mkDecl(CL.T_Named(RN.shadowGlobalsTy), RN.shadowGlaobalsName, NONE)
              val globalToShadowStms = List.map (fn (x:mirror_var) => #hToS x) globals 
              val globalBufferDecl = CL.mkDecl(clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE)
              val globalBuffer = CL.mkAssign(CL.mkVar(concat[RN.globalsVarName,"_cl"]),
                    CL.mkApply("clCreateBuffer", [
                        CL.mkVar contextVar,
                        CL.mkBinOp(CL.mkVar "CL_MEM_READ_ONLY", CL.#|, CL.mkVar "CL_MEM_COPY_HOST_PTR"),
                        CL.mkSizeof(CL.T_Named RN.shadowGlobalsTy),
                        CL.mkUnOp(CL.%&,CL.mkVar RN.shadowGlaobalsName),
                        CL.mkUnOp(CL.%&,CL.mkVar errVar)
                      ]))
              fun genDataBuffers ([],_,_,_) = [] 
                | genDataBuffers ((var,nDims)::globals, contextVar, errVar,errFn) = let
                    val hostVar = CL.mkIndirect(CL.mkVar RN.globalsVarName, var)
                    val size = CL.mkIndirect(hostVar, "dataSzb") 
                    in 
                      CL.mkDecl(clMemoryTy, RN.addBufferSuffixData var ,NONE) :: 
                      CL.mkAssign(CL.mkVar(RN.addBufferSuffixData var),
                        CL.mkApply("clCreateBuffer", [
                            CL.mkVar contextVar,
                            CL.mkVar "CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR",
                            size,
                            CL.mkIndirect(hostVar, "data"),
                            CL.mkUnOp(CL.%&,CL.mkVar errVar)
                          ])) ::
                        errFn(concat["error in creating ",RN.addBufferSuffixData var, " global buffer\n"]) ::
                        genDataBuffers(globals,contextVar,errVar,errFn) 
                    end
              in 
                [shadowTypeDecl] @ globalToShadowStms 
                @ [globalBufferDecl, globalBuffer,errorFn(globalBuffErr)]
                @ genDataBuffers(imgGlobals,contextVar,errVar,errorFn) 
              end 

      (* generates the kernel arguments for the image data *) 
        fun genGlobalArguments (globals, count, kernelVar, errVar) = let 
              val globalArgErr = "error creating OpenCL global argument\n"  
              fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.mkVar errVar, CL.#!=, CL.mkVar "CL_SUCCESS"), 
                    CL.mkBlock([CL.mkCall("fprintf",[CL.mkVar "stderr", CL.mkStr msg]), 
                    CL.mkCall("exit",[CL.mkInt 1])]))
              val globalArgument = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.&=,
                    CL.mkApply("clSetKernelArg",
                      [CL.mkVar kernelVar, 
                       CL.mkPostOp(CL.mkVar count, CL.^++),
                       CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
                       CL.mkUnOp(CL.%&,CL.mkVar(concat[RN.globalsVarName,"_cl"]))])))     
              fun genDataArguments ([],_,_,_,_) = [] 
                | genDataArguments ((var,nDims)::globals,count,kernelVar,errVar,errFn) =  
                    CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.$=,
                      CL.mkApply("clSetKernelArg",
                        [CL.mkVar kernelVar, 
                         CL.mkPostOp(CL.mkVar count, CL.^++),
                         CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
                         CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffixData var))]))) ::
                         errFn(concat["error in creating ",RN.addBufferSuffixData var, " argument\n"]) ::
                    genDataArguments (globals,count,kernelVar,errVar,errFn) 
              in   
                globalArgument :: errorFn globalArgErr ::
                  genDataArguments(globals, count, kernelVar, errVar,errorFn) 
              end 

      (* generates the globals buffers and arguments function *) 
        fun genGlobalBuffersArgs (globals,imgGlobals) = let
            (* Delcare opencl setup objects *) 
              val errVar = "err"
              val params = [
                      CL.PARAM([],CL.T_Named("cl_context"), "context"),
                      CL.PARAM([],CL.T_Named("cl_kernel"), "kernel"),
                      CL.PARAM([],CL.T_Named("cl_command_queue"), "cmdQ"),
                      CL.PARAM([],CL.T_Named("int"), "argStart")
                    ]
              val body = (case globals 
		     of [] => [CL.mkReturn(NONE)] 
		      | _ => let 		
			    val clGlobalBuffers =
				  getGlobalDataBuffers(globals, !imgGlobals, "context", errVar) 
			    val clGlobalArguments =
				  genGlobalArguments(!imgGlobals, "argStart", "kernel", errVar)
                            in
                            (* Body: put all the statments together *) 
			      CL.mkDecl(clIntTy, errVar, SOME(CL.I_Exp(CL.mkInt 0)))
			      :: clGlobalBuffers @ clGlobalArguments 
                            end 
		    (*end of case*))
              in
                CL.D_Func([],CL.voidTy,RN.globalsSetupName,params,CL.mkBlock(body))
              end 

      (* generate the global image meta-data and data parameters *) 
        fun genKeneralGlobalParams ((name,tyname)::[],line) = 
	      concat[line, "__global void *", RN.addBufferSuffixData name]
          | genKeneralGlobalParams ([],line) = line  
          | genKeneralGlobalParams ((name,tyname)::rest, line) = 
	      genKeneralGlobalParams(rest, concat[line, "__global void *", RN.addBufferSuffixData name, ",\n"]) 

	fun genUpdateMethod (Strand{name, tyName, state,...}, globals, imgGlobals) = let
	      val imageDataStms = List.map
		      (fn (x,_) => concat[
			  RN.globalImageDataName, ".", RN.imageDataName x, " = ",
			  RN.addBufferSuffixData x, ";","\n"
			])
			(!imgGlobals)
	      fun select ([], a, _) = a
		| select (_, _, b) = b
	      val placeHolders = [
		      (RN.place_holders, tyName), 
		      (RN.p_addDatPtr, select (!imgGlobals, "", ",")),
		      (RN.p_addGlobals, select (!globals, "", ",")),
		      (RN.p_globals, select (!globals, "", "__global Diderot_Globals_t *diderotGlobals")),
		      (RN.p_globalVar, select (!globals, "0", RN.globalsVarName)),
		      (RN.p_dataVar, select (!globals, "0", RN.globalImageDataName)),
		      (RN.p_dataPtr, genKeneralGlobalParams (!imgGlobals, "")),
		      (RN.p_dataAssign,  select (!imgGlobals, "",
			String.concat("Diderot_data_ptr_t diderotDataPtrs;\n" :: imageDataStms)))
		    ]
	      in 
		CL.verbatim [CLUpdateFrag.text] placeHolders 
	      end 

	fun genStrandCopy(Strand{tyName,name,state,...}) = let 
	      val params = [
		      CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
		      CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut")
		    ]
	      val assignStms = List.rev(
		    List.map
		      (fn x => CL.mkAssign(lvalueSV(#var x), rvalueSV(#var x)))
		        state)
	      in 
		CL.D_Func([""], CL.voidTy, RN.strandCopy, params,CL.mkBlock(assignStms)) 
	      end  

      (* generate a global structure type definition from the list of globals *) 
        fun genGlobalStruct (_, [], _) = CL.D_Comment(["No Global Definition"])
	  | genGlobalStruct (targetTy, globals, tyName) = let
              val globs = List.map (fn (x : mirror_var) => (targetTy x, #var x)) globals
              in
		CL.D_StructDef(globs, tyName)
              end
        
      (* generate a global structure type definition from the image data of the image globals *) 
        fun genImageDataStruct ([], _) = CL.D_Comment(["No Image Data Ptrs Definition"])
	  | genImageDataStruct (imgGlobals, tyName) = let
              val globs = List.map
		    (fn (x, _) => (globalPtr CL.voidTy, RN.imageDataName x))
		      imgGlobals
              in
		CL.D_StructDef(globs, tyName)
              end

        fun genGlobals (declFn, targetTy, globals) = let
              fun doVar (x : mirror_var) = declFn (CL.D_Var([], targetTy x, #var x, NONE))
              in
                List.app doVar globals
              end

        fun genOutputFun(Strand{name, output,tyName, state, code,...}) = let 
	    (* the output function *)
              val outFnName = concat[name, "_Output"]
              val outFun = let
                    val params = [
                          CL.PARAM([], CL.T_Ptr CL.voidTy, "outS"),
                          CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")
                        ]
            (* the type and access expression for the strand's output variable *)
              val (outTy, outState) = (#1 output, CL.mkIndirect(CL.mkVar "self", #2 output))
                    val outState = CL.mkUnOp(CL.%&, outState)
                    in
                      CL.D_Func(["static"], CL.voidTy, outFnName, params,
                        CL.mkCall("memcpy", [CL.mkVar "outS", outState, CL.mkSizeof(shadowTy outTy)] ))
                    end
            in 
              outFun 
            end 
    
        fun genStrandDesc (outFn,Strand{name, output,tyName, state, code,...}) = let
            (* the output function *)
              val outFnName = concat[name, "_Output"]
            (* the strand's descriptor object *)
              val descI = let
                    fun fnPtr (ty, f) = CL.I_Exp(CL.mkCast(CL.T_Named ty, CL.mkVar f))
                    val (outTy, _) = output
                    in
                      CL.I_Struct[
                          ("name", CL.I_Exp(CL.mkStr name)),
                          ("stateSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(RN.strandTy name)))),
                          ("shadowStrandSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(RN.strandShadowTy (RN.strandTy name))))),
(* FIXME: we may need to add a shadowOutputSzb field too for OpenCL *)
                          ("outputSzb", CL.I_Exp(CL.mkSizeof(shadowTy outTy))),
                          ("nrrdType", CL.I_Exp(CL.mkInt (NrrdTypes.toNrrdType outTy))), 
			              ("nrrdSzb", CL.I_Exp(CL.mkInt (NrrdTypes.toNrrdSize outTy))),
                          ("update", fnPtr("update_method_t", "0")),
                          ("strandCopy",  fnPtr("convert_method_t", RN.strandConvertName name)),
                          ("print", fnPtr("print_method_t", RN.strandPrintName name)),
                          ("output", fnPtr("output_method_t", outFnName)) (* FIXME *)
                        ]
                    end
              val desc = CL.D_Var([], CL.T_Named N.strandDescTy, N.strandDesc name, SOME descI)
              in
                desc
              end

      (* generate the table of strand descriptors *)
        fun genStrandTable (declFn, strands) = let
              val nStrands = length strands
              fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.mkVar(N.strandDesc name)))
              fun genInits (_, []) = []
                | genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss)
              in
                declFn (CL.D_Var([], CL.int32, N.numStrands,
                  SOME(CL.I_Exp(CL.mkIntTy(IntInf.fromInt nStrands, CL.int32)))));
                declFn (CL.D_Var([],
                  CL.T_Array(CL.T_Ptr(CL.T_Named N.strandDescTy), SOME nStrands),
                  N.strands,
                  SOME(CL.I_Array(genInits (0, strands)))))
              end

        fun genSrc (baseName, prog) = let
              val Prog{
		      name, double, globals, topDecls, strands, initially,
		      imgGlobals, numDims,outFn, ...
		    } = prog
              val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"}
              val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"}
              val clOutS = TextIO.openOut clFileName
              val cOutS = TextIO.openOut cFileName
              val clppStrm = PrintAsCL.new clOutS
              val cppStrm = PrintAsC.new cOutS
              val progName = name
              fun cppDecl dcl = PrintAsC.output(cppStrm, dcl) 
              fun clppDecl dcl = PrintAsCL.output(clppStrm, dcl)
              val strands = AtomTable.listItems strands
              val [strand as Strand{name, tyName, code, init_code, ...}] = strands
              in
              (* Generate the OpenCL file *)
              (* Retrieve the header information *) 
                clppDecl (CL.verbatim [HF.text] [
		    ("OUTFILE", clFileName),
		    ("SRCFILE", OS.Path.joinBaseExt{base=baseName, ext=SOME "diderot"}),
		    ("PRECISION", if double then "DOUBLE" else "SINGLE")
		  ]);
	      (* if there are no globals, then define a dummy type *)
		if List.null(!globals)
		  then clppDecl (CL.D_Verbatim["typedef void ", RN.globalsTy, ";\n"])
		  else ();
	      (* if there are no images, then define a dummy type *)
		if List.null(!imgGlobals)
		  then clppDecl (CL.D_Verbatim["typedef void * ", RN.imageDataType, ";\n"])
		  else ();
	      (* Retrieve the scheduler kernels and functions *) 
                clppDecl (CL.D_Verbatim[SF.text]); 
		clppDecl (CL.D_Verbatim[CLEigen2x2Frag.text]); 
		clppDecl (CL.D_Verbatim[CLEigen3x3Frag.text]); 
                clppDecl (genGlobalStruct (#gpuTy, !globals, RN.globalsTy)); 
                clppDecl (genImageDataStruct(!imgGlobals, RN.imageDataType)); 
                clppDecl (genStrandTyDef(#gpuTy, strand, tyName));  
                List.app clppDecl (!code); 
		clppDecl (genStrandCopy strand); 
                clppDecl (genUpdateMethod(strand, globals, imgGlobals)); 
              (* Generate the Host C file *)
                cppDecl (CL.D_Verbatim[
		    if double
		      then "#define DIDEROT_DOUBLE_PRECISION\n"
		      else "#define DIDEROT_SINGLE_PRECISION\n",
		    "#define DIDEROT_INT\n",
		    "#define DIDEROT_TARGET_CL\n",
		    "#include \"Diderot/diderot.h\"\n"
		  ]); 
                cppDecl (CL.D_Verbatim[
		  (case !globals 
                     of [] => concat["typedef void ", RN.globalsTy,";\n"] 
                      |  _ => ""
		     (*end of case*))
                  ]);
                cppDecl (CL.D_Var(["static"], CL.charPtr, "ProgramName",
                  SOME(CL.I_Exp(CL.mkStr progName))));
                cppDecl (genGlobalStruct (#hostTy, !globals, RN.globalsTy));
                cppDecl (genGlobalStruct (#shadowTy, !globals, RN.shadowGlobalsTy));
(* FIXME: does this really need to be a global? *)
                cppDecl (CL.D_Var(["static"], globPtrTy, RN.globalsVarName, NONE));
                cppDecl (genStrandTyDef (#hostTy, strand, tyName));
                cppDecl (genStrandTyDef (#shadowTy, strand, RN.strandShadowTy tyName));
                cppDecl (genConvertShadowTypes strand); 
                cppDecl  (!init_code);
                cppDecl (genStrandPrint strand); 
                cppDecl (genOutputFun strand); 
                List.app cppDecl (List.rev (!topDecls));
                cppDecl (genGlobalBuffersArgs (!globals,imgGlobals));
                List.app (fn strand => cppDecl (genStrandDesc (outFn,strand))) strands;
                genStrandTable (cppDecl, strands);
                cppDecl (!initially);    
                PrintAsC.close cppStrm;
                PrintAsCL.close clppStrm; 
                TextIO.closeOut cOutS;
                TextIO.closeOut clOutS
              end

      (* output the code to the filesystem.  The string is the basename of the source file *)
        fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let
              fun condCons (true, x, xs) = x::xs
                | condCons (false, _, xs) = xs
            (* generate the C compiler flags *)
              val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude]
              val cflags = condCons (parallel, #pthread Paths.cflags, cflags)
              val cflags = if debug
                    then #debug Paths.cflags :: cflags
                    else #ndebug Paths.cflags :: cflags
              val cflags = #base Paths.cflags :: cflags
            (* generate the loader flags *)
              val extraLibs = condCons (parallel, #pthread Paths.extraLibs, [])
              val extraLibs = Paths.teemLinkFlags @  #base Paths.extraLibs :: extraLibs
                   val extraLibs =  #cl Paths.extraLibs :: extraLibs
              val rtLib = TargetUtil.runtimeName {
                      target = TargetUtil.TARGET_CL,
                      parallel = parallel, double = double, debug = debug
                    }
              val ldOpts = rtLib :: extraLibs
              in
                genSrc (basename, prog); 
                RunCC.compile (basename, cflags);
                RunCC.link (basename, ldOpts)
              end

      end (* Program *)

  (* strands *)
    structure Strand =
      struct

        fun define (Prog{strands, ...}, strandId, state) = let
              val name = Atom.toString strandId
            (* the output state variable *)
              val outputVar = (case List.filter IL.StateVar.isOutput state
                     of [] => raise Fail("no output specified for strand " ^ name)
                      | [x] => (IL.StateVar.ty x, IL.StateVar.name x)
                      | _ => raise Fail("multiple outputs in " ^ name)
                    (* end case *))
            (* the state variables *)
              val state = let
                    fun cvt x = Var.mirror (IL.StateVar.ty x, IL.StateVar.name x, STRAND_SHADOW)
                    in
                      List.map cvt state
                    end
              val strand = Strand{
                      name = name,
                      tyName = RN.strandTy name,
                      state = state,
                      output = outputVar,
                      code = ref [],
                      init_code = ref (CL.D_Comment(["no init code"]))
                    }
              in
                AtomTable.insert strands (strandId, strand);
                strand
              end

      (* return the strand with the given name *)
        fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId

      (* register the strand-state initialization code.  The variables are the strand
       * parameters.
       *)
        fun init (Strand{name, tyName, code, init_code, ...}, params, init) = let
              val fName = RN.strandInit name
              val params =
                    clParam ("",CL.T_Ptr(CL.T_Named tyName), "selfOut") ::
                      List.map (fn (ToCL.V(ty, x)) => CL.PARAM([], ty, x)) params
              val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
              in
                init_code := initFn 
              end
        
      (* register a strand method *)
        fun method (Strand{name, tyName, code,...}, methName, body) = let
              val params = [
                      globalParam (CL.T_Ptr(CL.T_Named tyName), "selfIn"),
                      globalParam (CL.T_Ptr(CL.T_Named tyName), "selfOut"),
                      globalParam (CL.T_Ptr(CL.T_Named (RN.globalsTy)), RN.globalsVarName), 
                      CL.PARAM([],CL.T_Named(RN.imageDataType),RN.globalImageDataName)
                    ]
             val updateParams = [
                      globalParam (CL.T_Ptr(CL.T_Named tyName), "selfIn"),
                      globalParam (CL.T_Ptr(CL.T_Named tyName), "selfOut"),
                      globalParam (CL.T_Ptr(CL.T_Named tyName), "stateIn"), 
                      globalParam (CL.T_Ptr(CL.intTy), RN.gridName), 
                      globalParam (CL.T_Ptr(CL.intTy), RN.gridCounterName), 
                      globalParam (CL.T_Ptr(CL.T_Named RN.gridTy),RN.gridCxtName), 
                      globalParam (CL.T_Ptr(CL.T_Named RN.spatialInfoTy),RN.spatialInfoName), 
                      globalParam (CL.T_Ptr(CL.T_Named tyName), RN.spatialStrandsName), 
                      CL.PARAM([],CL.intTy,"idx"), 
                      globalParam (CL.T_Ptr(CL.intTy), RN.spatialBufferName), 
                      globalParam (CL.T_Ptr(CL.T_Named (RN.globalsTy)), RN.globalsVarName), 
                      CL.PARAM([],CL.T_Named(RN.imageDataType),RN.globalImageDataName)
                    ]

              val (fName,resTy,params') = (case methName
                     of StrandUtil.Update => (RN.strandUpdate,CL.T_Named "StrandStatus_t",updateParams)
                      | StrandUtil.Stabilize => (name ^ StrandUtil.nameToString methName, CL.voidTy,params)
                    (* end case *))
              val methFn = CL.D_Func([], resTy, fName, params', body)
              in
                code := methFn :: !code
              end

      end

  end

structure CLBackEnd = CodeGenFn(CLTarget)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0