Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/vis12/src/compiler/cl-target/cl-target.sml
ViewVC logotype

View of /branches/vis12/src/compiler/cl-target/cl-target.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2435 - (download) (annotate)
Sat Sep 21 14:36:39 2013 UTC (6 years ago) by jhr
File size: 40129 byte(s)
  Modify compiler to dynamically determine the intall location of the Diderot
  libraries and include files.  This feature will allow the diderot compiler
  to be installed in places other than where it is built.
(* cl-target.sml
 *
 * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
 * All rights reserved.
 *)

structure CLTarget : TARGET =
  struct

    structure IL = TreeIL
    structure V = IL.Var
    structure Ty = IL.Ty
    structure CL = CLang
    structure RN = RuntimeNames
    structure N = CNames
    structure ToC = TreeToC
    structure ToCL = TreeToCL
    structure HF = CLHeaderFrag
    structure SF = CLSchedFrag

  (* revmap f l == List.rev(List.map f l) *)
    fun revmap f = let
          fun rmap ([], l) = l
            | rmap (x::r, l) = rmap (r, f x :: l)
          in
            fn l => rmap (l, [])
          end

  (* common arithmetic *)
    fun #+# (a, b) = CL.mkBinOp(a, CL.#+, b)
    fun #*# (a, b) = CL.mkBinOp(a, CL.#*, b)
    infix 5 #+#
    infix 6 #*#

  (* translate TreeIL types to shadow types *)
    fun shadowTy ty = (case ty
           of Ty.BoolTy => CL.T_Named "cl_uint"
            | Ty.IntTy => CL.T_Named(RN.shadowIntTy ())
            | Ty.TensorTy[] => CL.T_Named(RN.shadowRealTy ())
            | Ty.TensorTy[n] => CL.T_Named(RN.shadowVecTy n)
            | Ty.TensorTy[n, m] => CL.T_Named(RN.shadowMatTy(n,m))
            | Ty.ImageTy info => CL.T_Named(RN.shadowImageTy(ImageInfo.dim info))
            | _ => raise Fail(concat["TreeToC.trType(", Ty.toString ty, ")"])
          (* end case *))

(* FIXME: add comments that more clearly explain the difference between convertToShadow and
 * convertStrandToShadow
 *)
   (* translate TreeIL types to shadow types *)
    fun convertToShadow (ty, name) = (case ty
           of Ty.IntTy => CL.mkAssign(
                CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name),
                CL.mkIndirect(CL.mkVar RN.globalsVarName, name))
            | Ty.TensorTy[n]=> CL.mkCall(RN.convertToShadowVec n, [
                  CL.mkUnOp(CL.%&, CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name)), 
                  CL.mkIndirect(CL.mkVar RN.globalsVarName, name)
                ]) 
            | Ty.ImageTy info => CL.mkCall(RN.shadowImageFunc(ImageInfo.dim info), [
                  CL.mkVar "context", 
                  CL.mkUnOp(CL.%&, CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name)), 
                  CL.mkIndirect(CL.mkVar RN.globalsVarName, name)
                ])
            | Ty.TensorTy[n, m] => CL.mkCall(RN.convertToShadowMat(m,n), [
                  CL.mkSelect(CL.mkVar RN.shadowGlaobalsName, name), 
                  CL.mkIndirect(CL.mkVar RN.globalsVarName, name)
                ]) 
            | _ => CL.mkAssign(
                CL.mkSelect(CL.mkVar RN.shadowGlaobalsName,name),
                CL.mkIndirect(CL.mkVar RN.globalsVarName, name))
          (* end case *))

   (* generate code to convert strand TreeIL types to shadow types *)
    fun convertStrandToShadow (ty, name, selfIn, selfOut) = (case ty
           of Ty.IntTy => CL.mkAssign(
                CL.mkIndirect(CL.mkVar selfIn, name),
                CL.mkIndirect(CL.mkVar selfOut, name))
            | Ty.TensorTy[n]=> CL.mkCall(RN.convertToShadowVec n, [
                  CL.mkUnOp(CL.%&, CL.mkIndirect(CL.mkVar selfIn, name)), 
                  CL.mkIndirect(CL.mkVar selfOut, name)
                ]) 
            | Ty.TensorTy[n, m] => CL.mkCall(RN.convertToShadowMat(m,n), [
                  CL.mkUnOp(CL.%&, CL.mkIndirect(CL.mkVar selfIn, name)), 
                  CL.mkIndirect(CL.mkVar selfOut, name)
                ]) 
            | _ => CL.mkAssign(
                CL.mkIndirect(CL.mkVar selfIn, name),
                CL.mkIndirect(CL.mkVar selfOut, name))
          (* end case *))

  (* helper functions for specifying parameters in various address spaces *)
    fun clParam spc (ty, x) = CL.PARAM([spc], ty, x)
    val globalParam = clParam "__global"
    val constantParam = clParam "__constant"
    val localParam = clParam "__local"
    val privateParam = clParam "__private"

  (* OpenCL global pointer type *)
    fun globalPtr ty = CL.T_Qual("__global", CL.T_Ptr ty)

  (* lvalue/rvalue state variable *)
    fun lvalueSV name = CL.mkIndirect(CL.mkVar "selfOut", name)
    fun rvalueSV name = CL.mkIndirect(CL.mkVar "selfIn", name)

    type var = CL.typed_var
    type exp = CL.exp
    type stm = CL.stm

  (* OpenCL specific types *)
    val clIntTy = CL.T_Named "cl_int"
    val clProgramTy = CL.T_Named "cl_program" 
    val clKernelTy  = CL.T_Named "cl_kernel"
    val clCmdQueueTy = CL.T_Named "cl_command_queue" 
    val clContextTy = CL.T_Named "cl_context" 
    val clDeviceIdTy = CL.T_Named "cl_device_id"
    val clPlatformIdTy = CL.T_Named "cl_platform_id" 
    val clMemoryTy = CL.T_Named "cl_mem"
    val globPtrTy = CL.T_Ptr(CL.T_Named RN.globalsTy)
(* FIXME: what are these for? *)
    datatype shadow_env = STRAND_SHADOW | GLOBAL_SHADOW

  (* variable or field that is mirrored between host and GPU *)
    type mirror_var = {
(* FIXME: perhaps it would be cleaner to just track the TreeIL type of the variable? *)
            hostTy : CL.ty,             (* variable type on Host (i.e., C type) *)
            shadowTy : CL.ty,           (* host-side shadow type of GPU type *)
            gpuTy : CL.ty,              (* variable's type on GPU (i.e., OpenCL type) *)
            hToS: stm,                  (* the statement that converts the variable to its *)
                                        (* shadow representation *)
            var : CL.var                (* variable name *)
          }

    datatype strand = Strand of {
        name : string,
        tyName : string,
        state : mirror_var list,
        output : (Ty.ty * CL.var),	(* the strand's output variable (only one for now) *)
        code : CL.decl list ref,
        init_code: CL.decl ref
      }

    datatype program = Prog of {
	props : Properties.props,
        globals : mirror_var list ref, 
        topDecls : CL.decl list ref,
        strands : strand AtomTable.hash_table,
        initially :  CL.decl ref,
        numDims: int ref,               (* number of dimensions in initially iteration *)
        imgGlobals: (string * int) list ref,
        prFn: CL.decl ref,
        outFn: CL.decl ref 
      }

    datatype env = ENV of {
        info : env_info,
        vMap : var V.Map.map,
        scope : scope
      }

    and env_info = INFO of {
        prog : program
      }

    and scope
      = NoScope
      | GlobalScope
      | InitiallyScope
      | StrandScope			(* strand initialization *)
      | MethodScope of StrandUtil.method_name	(* method body; vars are state variables *)

  (* the supprted widths of vectors of reals on the target. *)
(* FIXME: for OpenCL 1.1, 3 is also valid *)
    fun vectorWidths () = [2, 4, 8, 16]

  (* we do not support printing on the OpenCL target *)
    val supportsPrinting = false

  (* tests for whether various expression forms can appear inline *)
    fun inlineCons n = (n < 2)          (* vectors are inline, but not matrices *)
    val inlineMatrixExp = false         (* can matrix-valued expressions appear inline? *)

  (* TreeIL to target translations *)
    structure Tr =
      struct
        fun fragment (ENV{info, vMap, scope}, blk) = let
              val (vMap, stms) = (case scope
                     of GlobalScope => ToC.trFragment (vMap, blk)
                      | InitiallyScope => ToC.trFragment (vMap, blk)
                      | _ => ToCL.trFragment (vMap, blk)
                    (* end case *))
              in
                (ENV{info=info, vMap=vMap, scope=scope}, stms)
              end
        fun block (ENV{vMap, scope, ...}, blk) = (case scope
	       of StrandScope => ToC.trBlock (vMap, blk)
		| MethodScope name => ToCL.trBlock (vMap, blk)
		| InitiallyScope => ToCL.trBlock (vMap, blk)
		| _ => ToC.trBlock (vMap, blk)
	      (* end case *))
        fun free (ENV{vMap, ...}, blk) = ToC.trFree (vMap, blk)
        fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e)
      end

  (* variables *)
    structure Var =
      struct
        fun mirror (ty, name, shadowEnv) = {
                hostTy = ToC.trType ty,
                shadowTy = shadowTy ty,
                gpuTy = ToCL.trType ty,
                hToS = case shadowEnv
		   of GLOBAL_SHADOW => convertToShadow (ty, name)
		    | STRAND_SHADOW => convertStrandToShadow(ty, name, "selfIn", "selfOut")
		  (* end case *),
                var = name
              }
        fun name (ToCL.V(_, name)) = name
        fun global (Prog{globals, imgGlobals, ...}, name, ty) = let
              val x = mirror (ty, name, GLOBAL_SHADOW)
              fun isImgGlobal (Ty.ImageTy info, name) = 
                    imgGlobals  := (name, ImageInfo.dim info) :: !imgGlobals 
                | isImgGlobal _ =  () 
              in
                globals := x :: !globals;
                isImgGlobal (ty, name); 
                ToCL.V(#gpuTy x, name)
              end
        fun param x = ToCL.V(ToCL.trType(V.ty x), V.name x)
      end

  (* environments *)
    structure Env =
      struct
      (* create a new environment *)
        fun new prog = ENV{
                info=INFO{prog = prog},
                vMap = V.Map.empty,
                scope = NoScope
              }
      (* bind a TreeIL varaiable to a target variable *)
        fun bind (ENV{info, vMap, scope}, x, x') = ENV{
                info = info,
                vMap = V.Map.insert(vMap, x, x'),
                scope = scope
              }
      (* define the current translation context *)
	fun setScope (scope, glob, selfIn, selfOut) (ENV{info, vMap, ...}) = ENV{
		info = info,
		vMap = List.foldl
		    (fn ((x, x'), vm) => V.Map.insert(vm, x, x'))
		      vMap [
			  (PseudoVars.global, CL.V(CL.voidTy, glob)),
			  (PseudoVars.selfIn, CL.V(CL.voidTy, selfIn)),
			  (PseudoVars.selfOut, CL.V(CL.voidTy, selfOut))
			],
		scope = scope
	      }
      (* define the current translation context *)
        val scopeGlobal = setScope (GlobalScope, RN.globalsVarName, "_bogus_", "_bogus_")
        val scopeInitially =
	      setScope (InitiallyScope, RN.globalsVarName, "_bogus_", "_bogus_")
        val scopeStrand = setScope (StrandScope, RN.globalsVarName, "selfIn", "selfOut")
        fun scopeMethod (env, name) =
	      setScope (MethodScope name, RN.globalsVarName, "selfIn", "selfOut") env
      end

  (* strands *)
    structure Strand =
      struct

        fun define (Prog{strands, ...}, strandId, state) = let
              val name = Atom.toString strandId
            (* the output state variable *)
              val outputVar = (case List.filter IL.StateVar.isOutput state
                     of [] => raise Fail("no output specified for strand " ^ name)
                      | [x] => (IL.StateVar.ty x, IL.StateVar.name x)
                      | _ => raise Fail("multiple outputs in " ^ name)
                    (* end case *))
            (* the state variables *)
              val state = let
                    fun cvt x = Var.mirror (IL.StateVar.ty x, IL.StateVar.name x, STRAND_SHADOW)
                    in
                      List.map cvt state
                    end
              val strand = Strand{
                      name = name,
                      tyName = RN.strandTy name,
                      state = state,
                      output = outputVar,
                      code = ref [],
                      init_code = ref (CL.D_Comment(["no init code"]))
                    }
              in
                AtomTable.insert strands (strandId, strand);
                strand
              end

      (* return the strand with the given name *)
        fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId

      (* register the strand-state initialization code.  The variables are the strand
       * parameters.
       *)
        fun init (Strand{name, tyName, code, init_code, ...}, params, init) = let
              val fName = RN.strandInit name
              val params =
                    clParam "" (CL.T_Ptr(CL.T_Named tyName), "selfOut") ::
                      List.map (fn (ToCL.V(ty, x)) => CL.PARAM([], ty, x)) params
              val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
              in
                init_code := initFn 
              end
        
      (* register a strand method *)
        fun method (Strand{name, tyName, code,...}, methName, body) = let
              val params = [
                      globalParam (CL.T_Ptr(CL.T_Named tyName), "selfIn"),
                      globalParam (CL.T_Ptr(CL.T_Named tyName), "selfOut"),
                      globalParam (CL.T_Ptr(CL.T_Named (RN.globalsTy)), RN.globalsVarName), 
                      clParam "" (CL.T_Named(RN.imageDataType), RN.globalImageDataName)
                    ]
              val (fName,resTy) = (case methName
                     of StrandUtil.Update => (RN.strandUpdate,CL.T_Named "StrandStatus_t")
                      | StrandUtil.Stabilize => (name ^ StrandUtil.nameToString methName, CL.voidTy)
                    (* end case *))
              val methFn = CL.D_Func([], resTy, fName, params, body)
              in
                code := methFn :: !code
              end

      end (* Strand *)

  (* programs *)
    structure Program =
      struct
        fun new (tgt : TargetUtil.target_desc, props) = (
              RN.initTargetSpec (#double tgt);
              CNames.initTargetSpec {double = #double tgt, long = false};
              Prog{
                  props = Properties.mkProps (tgt, props),
                  globals = ref [],
                  topDecls = ref [],
                  strands = AtomTable.mkTable (16, Fail "strand table"),
                  initially = ref(CL.D_Comment["missing initially"]),
                  numDims = ref 0, 
                  imgGlobals = ref[], 
                  prFn = ref(CL.D_Comment(["No Print Function"])), 
                  outFn = ref(CL.D_Comment(["No Output Function"]))
                })

      (* register the code that is used to register command-line options for input variables *)
        fun inputs (Prog{topDecls, ...}, env, blk) = let
              val inputsFn = CL.D_Func(
                    [], CL.voidTy, RN.registerOpts,
                    [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")],
                    Tr.block (env, blk))
              in
                topDecls := inputsFn :: !topDecls
              end
                
      (* register the global initialization part of a program *)
        fun init (Prog{topDecls, ...}, init) = let
              val globalsDecl = CL.mkAssign(CL.mkVar RN.globalsVarName,
                    CL.mkApply("malloc", [CL.mkSizeof(CL.T_Named RN.globalsTy)]))
              val initFn = CL.D_Func(
                    [], CL.voidTy, RN.initGlobals, [],
                    CL.mkBlock[
                        globalsDecl,
                        CL.mkCall(RN.initGlobalsHelper, [CL.mkVar RN.globalsVarName])
                      ])
              val initHelperFn = CL.D_Func(
                    [], CL.voidTy, RN.initGlobalsHelper,
                    [CL.PARAM([], globPtrTy, RN.globalsVarName)],
                    init)
              val shutdownFn = CL.D_Func(
                    [], CL.voidTy, RN.shutdown,
                    [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.worldTy), "wrld")],
                    CL.S_Block[])
              in
                topDecls := shutdownFn :: initFn :: initHelperFn :: !topDecls
              end
      (* register the global destruction part of a program *)
        fun free (Prog{props, topDecls, ...}, free) = let
              val worldTy = CL.T_Ptr(CL.T_Named(N.worldTy props))
              val globTy = CL.T_Ptr(CL.T_Named(N.globalTy props))
              val free = CL.mkBlock(
                    CL.mkDeclInit(globTy, "glob", CL.mkIndirect(CL.mkVar "wrld", "globals")) ::
                    CL.unBlock free @ [CL.mkReturn(SOME(CL.mkVar "false"))])
              val freeFn = CL.D_Func(
                    ["static"], CL.boolTy, N.freeGlobals,
                    [CL.PARAM([], worldTy, "wrld")],
                    free)
              in
                topDecls := freeFn :: !topDecls
              end

      (* create and register the initially function for a program *)
        fun initially {
              prog = Prog{props, strands, initially, numDims, ...},
              isArray : bool,
              iterPrefix : stm list,
              iters : (var * exp * exp) list,
              createPrefix : stm list,
              strand : Atom.atom,
              args : exp list
            } = let
              val name = Atom.toString strand
              val nDims = List.length iters
              val worldTy = CL.T_Ptr(CL.T_Named(N.worldTy props))
              fun mapi f xs = let
                    fun mapf (_, []) = []
                      | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs)
                    in
                      mapf (0, xs)
                    end
              val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters
              val sizeInit = mapi
                    (fn (i, (CL.V(ty, _), lo, hi)) =>
                        (i, CL.I_Exp(CL.mkBinOp(hi, CL.#-, lo) #+# CL.mkIntTy(1, ty)))
                    ) iters
            (* code to allocate the world and initial strands *)
              val wrld = "wrld"
              val allocCode = [
                      CL.mkComment["allocate initial block of strands"],
                      CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)),
                      CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)),
                      CL.mkDecl(worldTy, wrld,
                        SOME(CL.I_Exp(CL.mkApply(N.allocInitially, [
                            CL.mkVar "ProgramName",
                            CL.mkUnOp(CL.%&, CL.mkVar(N.strandDesc name)),
                            CL.mkBool isArray,
                            CL.mkIntTy(IntInf.fromInt nDims, CL.int32),
                            CL.mkVar "base",
                            CL.mkVar "size"
                          ]))))
                    ]
            (* create the loop nest for the initially iterations *)
              val indexVar = "ix"
              val strandTy = CL.T_Ptr(CL.T_Named(N.strandTy name))
              fun mkLoopNest [] = CL.mkBlock(createPrefix @ [
                      CL.mkDecl(strandTy, "sp",
                        SOME(CL.I_Exp(
                          CL.mkCast(strandTy,
                          CL.mkApply(N.inState, [CL.mkVar "wrld", CL.mkVar indexVar]))))),
                      CL.mkCall(N.strandInit name, CL.mkVar "sp" :: args),
                      CL.mkAssign(CL.mkVar indexVar, CL.mkVar indexVar #+# CL.mkIntTy(1, CL.uint32))
                    ])
                | mkLoopNest ((CL.V(ty, param), lo, hi)::iters) = let
                    val body = mkLoopNest iters
                    in
                      CL.mkFor(
                        [(ty, param, lo)],
                        CL.mkBinOp(CL.mkVar param, CL.#<=, hi),
                        [CL.mkPostOp(CL.mkVar param, CL.^++)],
                        body)
                    end
              val iterCode = [
                      CL.mkComment["initially"],
                      CL.mkDecl(CL.uint32, indexVar, SOME(CL.I_Exp(CL.mkIntTy(0, CL.uint32)))),
                      mkLoopNest iters
                    ]
              val body = CL.mkBlock(
                    iterPrefix @
                    allocCode @
                    iterCode @
                    [CL.mkReturn(SOME(CL.mkVar "wrld"))])
              val initFn = CL.D_Func([], worldTy, N.initially props, [], body)
              in
                numDims := nDims;
                initially := initFn
              end


      (***** OUTPUT *****)

        fun genStrandPrint (Strand{name, tyName, state, output, code, ...}) = let
            (* the print function *)
              val prFnName = concat[name, "Print"]
              val prFn = let
                    val params = [
                          CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"),
                          CL.PARAM([], CL.T_Ptr(CL.T_Named(RN.strandShadowTy tyName)), "self")
                        ]
                    val (ty, x) = output
                    val outState = CL.mkIndirect(CL.mkVar "self", x)
                    val prArgs = (case ty
                           of Ty.IntTy => [CL.mkStr(!N.gIntFormat ^ "\n"), outState]
                            | Ty.SeqTy(Ty.IntTy, d) => let
				fun sel i = CL.mkApply(
					  "VSUB",
					  [outState, CL.mkInt(IntInf.fromInt i)])
                                val fmt = CL.mkStr(
                                      String.concatWith " " (List.tabulate(d, fn _ => !N.gIntFormat))
                                      ^ "\n")
                                val args = List.tabulate (d, sel)
                                in
                                  fmt :: args
                                end
                            | Ty.TensorTy[] => [CL.mkStr "%f\n", outState]
                            | Ty.TensorTy[d] => let
				fun sel i = CL.mkApply(
					"VSUB",
					[outState, CL.mkInt(IntInf.fromInt i)])
                                val fmt = CL.mkStr(
                                      String.concatWith " " (List.tabulate(d, fn _ => "%f"))
                                      ^ "\n")
                                val args = List.tabulate (d, sel)
                                in
                                  fmt :: args
                                end
                            | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty)
                          (* end case *))
                    in
                      CL.D_Func(["static"], CL.voidTy, prFnName, params,
                        CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs))
                    end
              in
                prFn
              end 
 
        fun genStrandTyDef (targetTy,Strand{state,...}, tyName) = (case state
	       of [] => CL.D_Comment(["No Strand Defintiion Included"])  
		| _ => CL.D_StructDef(
		    NONE,
		    revmap (fn x => (targetTy x, #var x)) state,
		    SOME tyName)
	     (* end case *))

     (* generates the globals buffers and arguments function *) 
        fun genConvertShadowTypes (Strand{name, tyName, state,...}) = let
            (* Declare opencl setup objects *) 
              val errVar = "err"
              val params = [
                      CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut"), 
                      CL.PARAM([], CL.T_Ptr(CL.T_Named(RN.strandShadowTy tyName)), "selfIn")
                    ]
              val body = List.map (fn (x:mirror_var) => #hToS x) state
              in
                CL.D_Func([], CL.voidTy, RN.strandConvertName name, params, CL.mkBlock body)
              end

      (* generates the opencl buffers for the image data *) 
        fun getGlobalDataBuffers (globals, imgGlobals, contextVar, errVar) = let 
              val globalBuffErr = "error creating OpenCL global buffer\n"  
              fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.mkVar errVar, CL.#!=, CL.mkVar "CL_SUCCESS"), 
                    CL.mkBlock([CL.mkCall("fprintf",[CL.mkVar "stderr", CL.mkStr msg]), 
                    CL.mkCall("exit",[CL.mkInt 1])]))
              val shadowTypeDecl =
		    CL.mkDecl(CL.T_Named(RN.shadowGlobalsTy), RN.shadowGlaobalsName, NONE)
              val globalToShadowStms = List.map (fn (x:mirror_var) => #hToS x) globals 
              val globalBufferDecl = CL.mkDecl(clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE)
              val globalBuffer = CL.mkAssign(CL.mkVar(concat[RN.globalsVarName,"_cl"]),
                    CL.mkApply("clCreateBuffer", [
                        CL.mkVar contextVar,
                        CL.mkBinOp(CL.mkVar "CL_MEM_READ_ONLY", CL.#|, CL.mkVar "CL_MEM_COPY_HOST_PTR"),
                        CL.mkSizeof(CL.T_Named RN.shadowGlobalsTy),
                        CL.mkUnOp(CL.%&,CL.mkVar RN.shadowGlaobalsName),
                        CL.mkUnOp(CL.%&,CL.mkVar errVar)
                      ]))
              fun genDataBuffers ([],_,_,_) = [] 
                | genDataBuffers ((var,nDims)::globals, contextVar, errVar,errFn) = let
                    val hostVar = CL.mkIndirect(CL.mkVar RN.globalsVarName, var)
                    val size = CL.mkIndirect(hostVar, "dataSzb") 
                    in 
                      CL.mkDecl(clMemoryTy, RN.addBufferSuffixData var ,NONE) :: 
                      CL.mkAssign(CL.mkVar(RN.addBufferSuffixData var),
                        CL.mkApply("clCreateBuffer", [
                            CL.mkVar contextVar,
                            CL.mkVar "CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR",
                            size,
                            CL.mkIndirect(hostVar, "data"),
                            CL.mkUnOp(CL.%&,CL.mkVar errVar)
                          ])) ::
                        errFn(concat["error in creating ",RN.addBufferSuffixData var, " global buffer\n"]) ::
                        genDataBuffers(globals,contextVar,errVar,errFn) 
                    end
              in 
                [shadowTypeDecl] @ globalToShadowStms 
                @ [globalBufferDecl, globalBuffer,errorFn(globalBuffErr)]
                @ genDataBuffers(imgGlobals,contextVar,errVar,errorFn) 
              end 

      (* generates the kernel arguments for the image data *) 
        fun genGlobalArguments (globals, count, kernelVar, errVar) = let 
              val globalArgErr = "error creating OpenCL global argument\n"  
              fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.mkVar errVar, CL.#!=, CL.mkVar "CL_SUCCESS"), 
                    CL.mkBlock([CL.mkCall("fprintf",[CL.mkVar "stderr", CL.mkStr msg]), 
                    CL.mkCall("exit",[CL.mkInt 1])]))
              val globalArgument = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.&=,
                    CL.mkApply("clSetKernelArg",
                      [CL.mkVar kernelVar, 
                       CL.mkPostOp(CL.mkVar count, CL.^++),
                       CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
                       CL.mkUnOp(CL.%&,CL.mkVar(concat[RN.globalsVarName,"_cl"]))])))     
              fun genDataArguments ([],_,_,_,_) = [] 
                | genDataArguments ((var,nDims)::globals,count,kernelVar,errVar,errFn) =  
                    CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.$=,
                      CL.mkApply("clSetKernelArg",
                        [CL.mkVar kernelVar, 
                         CL.mkPostOp(CL.mkVar count, CL.^++),
                         CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
                         CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffixData var))]))) ::
                         errFn(concat["error in creating ",RN.addBufferSuffixData var, " argument\n"]) ::
                    genDataArguments (globals,count,kernelVar,errVar,errFn) 
              in   
                globalArgument :: errorFn globalArgErr ::
                  genDataArguments(globals, count, kernelVar, errVar,errorFn) 
              end 

      (* generates the globals buffers and arguments function *) 
        fun genGlobalBuffersArgs (globals,imgGlobals) = let
            (* Delcare opencl setup objects *) 
              val errVar = "err"
              val params = [
                      CL.PARAM([],CL.T_Named("cl_context"), "context"),
                      CL.PARAM([],CL.T_Named("cl_kernel"), "kernel"),
                      CL.PARAM([],CL.T_Named("cl_command_queue"), "cmdQ"),
                      CL.PARAM([],CL.T_Named("int"), "argStart")
                    ]
              val body = (case globals 
		     of [] => [CL.mkReturn(NONE)] 
		      | _ => let 		
			    val clGlobalBuffers =
				  getGlobalDataBuffers(globals, !imgGlobals, "context", errVar) 
			    val clGlobalArguments =
				  genGlobalArguments(!imgGlobals, "argStart", "kernel", errVar)
                            in
                            (* Body: put all the statments together *) 
			      CL.mkDecl(clIntTy, errVar, SOME(CL.I_Exp(CL.mkInt 0)))
			      :: clGlobalBuffers @ clGlobalArguments 
                            end 
		    (*end of case*))
              in
                CL.D_Func([],CL.voidTy,RN.globalsSetupName,params,CL.mkBlock(body))
              end 

      (* generate the global image meta-data and data parameters *) 
        fun genKeneralGlobalParams ((name,tyname)::[],line) = 
	      concat[line, "__global void *", RN.addBufferSuffixData name]
          | genKeneralGlobalParams ([],line) = line  
          | genKeneralGlobalParams ((name,tyname)::rest, line) = 
	      genKeneralGlobalParams(rest, concat[line, "__global void *", RN.addBufferSuffixData name, ",\n"]) 

	fun genUpdateMethod (Strand{name, tyName, state,...}, globals, imgGlobals) = let
	      val imageDataStms = List.map
		      (fn (x,_) => concat[
			  RN.globalImageDataName, ".", RN.imageDataName x, " = ",
			  RN.addBufferSuffixData x, ";","\n"
			])
			(!imgGlobals)
	      fun select ([], a, _) = a
		| select (_, _, b) = b
	      val placeHolders = [
		      (RN.place_holders, tyName), 
		      (RN.p_addDatPtr, select (!imgGlobals, "", ",")),
		      (RN.p_addGlobals, select (!globals, "", ",")),
		      (RN.p_globals, select (!globals, "", "__global Diderot_Globals_t *diderotGlobals")),
		      (RN.p_globalVar, select (!globals, "0", RN.globalsVarName)),
		      (RN.p_dataVar, select (!globals, "0", RN.globalImageDataName)),
		      (RN.p_dataPtr, genKeneralGlobalParams (!imgGlobals, "")),
		      (RN.p_dataAssign,  select (!imgGlobals, "",
			String.concat("Diderot_data_ptr_t diderotDataPtrs;\n" :: imageDataStms)))
		    ]
	      in 
		CL.verbatimDcl [CLUpdateFrag.text] placeHolders 
	      end 

	fun genStrandCopy(Strand{tyName,name,state,...}) = let 
	      val params = [
		      CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
		      CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut")
		    ]
	      val assignStms = List.rev(
		    List.map
		      (fn x => CL.mkAssign(lvalueSV(#var x), rvalueSV(#var x)))
		        state)
	      in 
		CL.D_Func([""], CL.voidTy, RN.strandCopy, params,CL.mkBlock(assignStms)) 
	      end  

      (* generate a global structure type definition from the list of globals *) 
        fun genGlobalStruct (_, [], _) = CL.D_Comment(["No Global Definition"])
	  | genGlobalStruct (targetTy, globals, tyName) = let
              val globs = List.map (fn (x : mirror_var) => (targetTy x, #var x)) globals
              in
		CL.D_StructDef(NONE, globs, SOME tyName)
              end
        
      (* generate a global structure type definition from the image data of the image globals *) 
        fun genImageDataStruct ([], _) = CL.D_Comment(["No Image Data Ptrs Definition"])
	  | genImageDataStruct (imgGlobals, tyName) = let
              val globs = List.map
		    (fn (x, _) => (globalPtr CL.voidTy, RN.imageDataName x))
		      imgGlobals
              in
		CL.D_StructDef(NONE, globs, SOME tyName)
              end

        fun genGlobals (declFn, targetTy, globals) = let
              fun doVar (x : mirror_var) = declFn (CL.D_Var([], targetTy x, #var x, NONE))
              in
                List.app doVar globals
              end

        fun genOutputFun(Strand{name, output,tyName, state, code,...}) = let 
	    (* the output function *)
              val outFnName = concat[name, "_Output"]
              val outFun = let
                    val params = [
                          CL.PARAM([], CL.T_Ptr CL.voidTy, "outS"),
                          CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")
                        ]
            (* the type and access expression for the strand's output variable *)
              val (outTy, outState) = (#1 output, CL.mkIndirect(CL.mkVar "self", #2 output))
                    val outState = CL.mkUnOp(CL.%&, outState)
                    in
                      CL.D_Func(["static"], CL.voidTy, outFnName, params,
                        CL.mkCall("memcpy", [CL.mkVar "outS", outState, CL.mkSizeof(shadowTy outTy)] ))
                    end
            in 
              outFun 
            end 
    
        fun genStrandDesc (outFn,Strand{name, output,tyName, state, code,...}) = let
            (* the output function *)
              val outFnName = concat[name, "_Output"]
            (* the strand's descriptor object *)
              val descI = let
                    fun fnPtr (ty, f) = CL.I_Exp(CL.mkCast(CL.T_Named ty, CL.mkVar f))
                    val (outTy, _) = output
                    in
                      CL.I_Struct[
                          ("name", CL.I_Exp(CL.mkStr name)),
                          ("stateSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(RN.strandTy name)))),
                          ("shadowStrandSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(RN.strandShadowTy (RN.strandTy name))))),
(* FIXME: we may need to add a shadowOutputSzb field too for OpenCL *)
                          ("outputSzb", CL.I_Exp(CL.mkSizeof(shadowTy outTy))),
(* DEPRECATED
                          ("nrrdType", CL.I_Exp(CL.mkInt (NrrdTypes.toNrrdType outTy))), 
			  ("nrrdSzb", CL.I_Exp(CL.mkInt (NrrdTypes.toNrrdSize outTy))),
*)
                          ("update", fnPtr("update_method_t", "0")),
                          ("strandCopy",  fnPtr("convert_method_t", RN.strandConvertName name)),
                          ("print", fnPtr("print_method_t", RN.strandPrintName name)),
                          ("output", fnPtr("output_method_t", outFnName)) (* FIXME *)
                        ]
                    end
              val desc = CL.D_Var([], CL.T_Named N.strandDescTy, N.strandDesc name, SOME descI)
              in
                desc
              end

      (* generate the table of strand descriptors *)
        fun genStrandTable (declFn, strands) = let
              val nStrands = length strands
              fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.mkVar(N.strandDesc name)))
              fun genInits (_, []) = []
                | genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss)
              in
                declFn (CL.D_Var([], CL.int32, N.numStrands,
                  SOME(CL.I_Exp(CL.mkIntTy(IntInf.fromInt nStrands, CL.int32)))));
                declFn (CL.D_Var([],
                  CL.T_Array(CL.T_Ptr(CL.T_Named N.strandDescTy), SOME nStrands),
                  N.strands,
                  SOME(CL.I_Array(genInits (0, strands)))))
              end

        fun genSrc (baseName, prog) = let
              val Prog{
		      props,
		      globals, topDecls, strands, initially,
		      imgGlobals, numDims,outFn, ...
		    } = prog
              val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"}
              val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"}
              val clOutS = TextIO.openOut clFileName
              val cOutS = TextIO.openOut cFileName
              val clppStrm = PrintAsCL.new clOutS
              val cppStrm = PrintAsC.new cOutS
              fun cppDecl dcl = PrintAsC.output(cppStrm, dcl) 
              fun clppDecl dcl = PrintAsCL.output(clppStrm, dcl)
              val strands = AtomTable.listItems strands
              val [strand as Strand{name, tyName, code, init_code, ...}] = strands
              in
              (* Generate the OpenCL file *)
              (* Retrieve the header information *) 
                clppDecl (CL.verbatimDcl [HF.text] [
		    ("OUTFILE", clFileName),
		    ("SRCFILE", OS.Path.joinBaseExt{base=baseName, ext=SOME "diderot"}),
		    ("DIDEROT_FLOAT_PRECISION", Properties.floatPrecisionDef props),
		    ("DIDEROT_INT_PRECISION", Properties.intPrecisionDef props)
		  ]);
	      (* if there are no globals, then define a dummy type *)
		if List.null(!globals)
		  then clppDecl (CL.D_Verbatim["typedef void ", RN.globalsTy, ";\n"])
		  else ();
	      (* if there are no images, then define a dummy type *)
		if List.null(!imgGlobals)
		  then clppDecl (CL.D_Verbatim["typedef void * ", RN.imageDataType, ";\n"])
		  else ();
	      (* Retrieve the scheduler kernels and functions *) 
                clppDecl (CL.D_Verbatim[SF.text]); 
		clppDecl (CL.D_Verbatim[CLEigen2x2Frag.text]); 
		clppDecl (CL.D_Verbatim[CLEigen3x3Frag.text]); 
                clppDecl (genGlobalStruct (#gpuTy, !globals, RN.globalsTy)); 
                clppDecl (genImageDataStruct(!imgGlobals, RN.imageDataType)); 
                clppDecl (genStrandTyDef(#gpuTy, strand, tyName));  
                List.app clppDecl (!code); 
		clppDecl (genStrandCopy strand); 
                clppDecl (genUpdateMethod(strand, globals, imgGlobals)); 
              (* Generate the Host C file *)
                cppDecl (CL.D_Verbatim[
		    concat["#define " ^ Properties.floatPrecisionDef props, "\n"],
		    concat["#define " ^ Properties.intPrecisionDef props, "\n"],
		    concat["#define " ^ Properties.targetDef props, "\n"],
		    "#include \"Diderot/diderot.h\"\n"
		  ]);
		case !globals
		 of [] => cppDecl (CL.D_Verbatim[concat["typedef void ", RN.globalsTy, ";\n"] ])
		  | _ => ()
		(* end case *);
                cppDecl (CL.D_Var(["static"], CL.charPtr, "ProgramName",
                  SOME(CL.I_Exp(CL.mkStr(#srcFile props)))));
                cppDecl (genGlobalStruct (#hostTy, !globals, RN.globalsTy));
                cppDecl (genGlobalStruct (#shadowTy, !globals, RN.shadowGlobalsTy));
(* FIXME: does this really need to be a global? *)
                cppDecl (CL.D_Var(["static"], globPtrTy, RN.globalsVarName, NONE));
                cppDecl (genStrandTyDef (#hostTy, strand, tyName));
                cppDecl (genStrandTyDef (#shadowTy, strand, RN.strandShadowTy tyName));
                cppDecl (genConvertShadowTypes strand); 
                cppDecl  (!init_code);
                cppDecl (genStrandPrint strand); 
                cppDecl (genOutputFun strand); 
                List.app cppDecl (List.rev (!topDecls));
                cppDecl (genGlobalBuffersArgs (!globals,imgGlobals));
                List.app (fn strand => cppDecl (genStrandDesc (outFn,strand))) strands;
                genStrandTable (cppDecl, strands);
                cppDecl (!initially);    
                PrintAsC.close cppStrm;
                PrintAsCL.close clppStrm; 
                TextIO.closeOut cOutS;
                TextIO.closeOut clOutS
              end

      (* output the code to the filesystem.  The string is the basename of the source file *)
        fun generate (prog as Prog{props, ...}) = let
	      val {outDir, outBase, exec, double, parallel, debug, ...} = props
              fun condCons (true, x, xs) = x::xs
                | condCons (false, _, xs) = xs
            (* generate the C compiler flags *)
              val cflags = ["-I" ^ Paths.diderotInclude(), "-I" ^ Paths.teemInclude()]
              val cflags = condCons (#cl Paths.cflags <> "", #cl Paths.cflags, cflags)
              val cflags = condCons (parallel, #pthread Paths.cflags, cflags)
              val cflags = if debug
                    then #debug Paths.cflags :: cflags
                    else #ndebug Paths.cflags :: cflags
              val cflags = #base Paths.cflags :: cflags
            (* generate the loader flags *)
              val extraLibs = condCons (parallel, #pthread Paths.extraLibs, [])
              val extraLibs = Paths.teemLinkFlags() @  #base Paths.extraLibs :: extraLibs
                   val extraLibs =  #cl Paths.extraLibs :: extraLibs
              val rtLib = Properties.runtimeName props
              val ldOpts = rtLib :: extraLibs
	      val basename = OS.Path.joinDirFile{dir=outDir, file=outBase}
              in
                genSrc (basename, prog); 
                RunCC.compile (basename, cflags);
                RunCC.linkExec (basename, ldOpts)
              end

      end (* Program *)

  end

structure CLBackEnd = CodeGenFn(CLTarget)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0