Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml
ViewVC logotype

View of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1425 - (download) (annotate)
Fri Jul 1 18:05:07 2011 UTC (8 years, 2 months ago) by lamonts
File size: 40373 byte(s)
Added support for 1D and 3D access for OpenCL
(* cl-target.sml
 *
 * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
 * All rights reserved.
 *)

structure CLTarget : TARGET =
  struct

    structure IL = TreeIL
    structure V = IL.Var
    structure Ty = IL.Ty
    structure CL = CLang
    structure RN = RuntimeNames
    structure ToCL = TreeToCL
    structure N = CNames

  (* translate TreeIL types to shadow types *)
    fun shadowTy ty = (case ty
           of Ty.BoolTy => CL.T_Named "cl_bool"
            | Ty.StringTy => raise Fail "unexpected string type"
            | Ty.IVecTy 1 => CL.T_Named(RN.shadowIntTy ())
            | Ty.IVecTy n => raise Fail "unexpected int vector type"
            | Ty.TensorTy[] => CL.T_Named(RN.shadowRealTy ())
            | Ty.TensorTy[n] => CL.T_Named(RN.shadowVecTy n)
            | Ty.TensorTy[n, m] => CL.T_Named(RN.shadowMatTy(n,m))
            | Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}) => CL.T_Named(RN.shadowImageTy dim)
            | _ => raise Fail(concat["TreeToC.trType(", Ty.toString ty, ")"])
          (* end case *))
          
   (* translate TreeIL types to shadow types *)
    fun convertToShadow (ty,name) = (case ty
           of Ty.IVecTy 1 => CL.mkAssign(CL.mkSelect(CL.mkVar(RN.shadowGlaobalsName),name),
                                CL.mkIndirect(CL.mkVar(RN.globalsVarName), name))
            | Ty.TensorTy[n]=> CL.mkCall(RN.convertToShadowVec n,[
                         CL.mkSelect(CL.mkVar(RN.shadowGlaobalsName),name), 
                          CL.mkIndirect(CL.mkVar(RN.globalsVarName), name)]) 
            | Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}) =>  CL.mkCall(RN.shadowImageFunc dim, [
                         CL.mkVar "context", 
                         CL.mkUnOp(CL.%&,CL.mkSelect(CL.mkVar(RN.shadowGlaobalsName),name)), 
                         CL.mkIndirect(CL.mkVar(RN.globalsVarName),name)
                         ])
            | Ty.TensorTy[n, m] => CL.mkCall(RN.convertToShadowMat(m,n),[
                         CL.mkSelect(CL.mkVar(RN.shadowGlaobalsName),name), 
                          CL.mkIndirect(CL.mkVar(RN.globalsVarName), name)]) 
            | _ => CL.mkAssign(CL.mkSelect(CL.mkVar(RN.shadowGlaobalsName),name),
                                CL.mkIndirect(CL.mkVar(RN.globalsVarName), name))
           (*end case *))

  (* helper functions for specifying parameters in various address spaces *)
    fun clParam (spc, ty, x) = CL.PARAM([spc], ty, x)
    fun globalParam (ty, x) = CL.PARAM(["__global"], ty, x)
    fun constantParam (ty, x) = CL.PARAM(["__constant"], ty, x)
    fun localParam (ty, x) = CL.PARAM(["__local"], ty, x)
    fun privateParam (ty, x) = CL.PARAM(["__private"], ty, x)

  (* OpenCL global pointer type *)
    fun globalPtr ty = CL.T_Qual("__global", CL.T_Ptr ty)

  (* C variable translation *)
    structure TrCVar =
      struct
        type env = CL.typed_var TreeIL.Var.Map.map
        fun lookup (env, x) = (case V.Map.find (env, x)
               of SOME(CL.V(_, x')) => x'
                | NONE => raise Fail(concat["TrCVar.lookup(_, ", V.name x, ")"])
              (* end case *))
      (* translate a variable that occurs in an l-value context (i.e., as the target of an assignment) *)
        fun lvalueVar (env, x) = (case V.kind x
               of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
                | IL.VK_State strand => CL.mkIndirect(CL.mkVar "selfOut", lookup(env, x))
                | IL.VK_Local => CL.mkVar(lookup(env, x))
              (* end case *))
      (* translate a variable that occurs in an r-value context *)
        fun rvalueVar (env, x) = (case V.kind x
               of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
                | IL.VK_State strand => CL.mkIndirect(CL.mkVar "selfIn", lookup(env, x))
                | IL.VK_Local => CL.mkVar(lookup(env, x))
              (* end case *))
      end

    structure ToC = TreeToCFn (TrCVar)

    type var = CL.typed_var
    type exp = CL.exp
    type stm = CL.stm

  (* OpenCL specific types *)
    val clIntTy = CL.T_Named "cl_int"
    val clProgramTy = CL.T_Named "cl_program" 
    val clKernelTy  = CL.T_Named "cl_kernel"
    val clCmdQueueTy = CL.T_Named "cl_command_queue" 
    val clContextTy = CL.T_Named "cl_context" 
    val clDeviceIdTy = CL.T_Named "cl_device_id"
    val clPlatformIdTy = CL.T_Named "cl_platform_id" 
    val clMemoryTy = CL.T_Named "cl_mem"
    val globPtrTy = CL.T_Ptr(CL.T_Named RN.globalsTy)

  (* variable or field that is mirrored between host and GPU *)
    type mirror_var = {
            hostTy : CL.ty,             (* variable type on Host (i.e., C type) *)
            shadowTy : CL.ty,           (* host-side shadow type of GPU type *)
            gpuTy : CL.ty,              (* variable's type on GPU (i.e., OpenCL type) *)
            hToS: stm,					(*the statement that converts the variable to its shadow representation *) 
            var : CL.var                (* variable name *)
          }

    datatype strand = Strand of {
        name : string,
        tyName : string,
        state : mirror_var list ref,
        output : (Ty.ty * CL.var) option ref, (* the strand's output variable (only one for now) *)
        code : CL.decl list ref,
        init_code: CL.decl ref
      }

    datatype program = Prog of {
        name : string,                  (* stem of source file *)
        double : bool,                  (* true for double-precision support *)
        parallel : bool,                (* true for multithreaded (or multi-GPU) target *)
        debug : bool,                   (* true for debug support in executable *)
        globals : mirror_var list ref, 
        topDecls : CL.decl list ref,
        strands : strand AtomTable.hash_table,
        initially :  CL.decl ref,
        numDims: int ref,               (* number of dimensions in initially iteration *)
        imgGlobals: (string * int) list ref,
        prFn: CL.decl ref
      }

    datatype env = ENV of {
        info : env_info,
        vMap : var V.Map.map,
        scope : scope
      }

    and env_info = INFO of {
        prog : program
      }

    and scope
      = NoScope
      | GlobalScope
      | InitiallyScope
      | StrandScope of TreeIL.var list  (* strand initialization *)
      | MethodScope of TreeIL.var list  (* method body; vars are state variables *)

  (* the supprted widths of vectors of reals on the target. *)
(* FIXME: for OpenCL 1.1, 3 is also valid *)
    fun vectorWidths () = [2, 4, 8, 16]

  (* tests for whether various expression forms can appear inline *)
    fun inlineCons n = (n < 2)          (* vectors are inline, but not matrices *)
    val inlineMatrixExp = false         (* can matrix-valued expressions appear inline? *)

  (* TreeIL to target translations *)
    structure Tr =
      struct
        fun fragment (ENV{info, vMap, scope}, blk) = let
              val (vMap, stms) = (case scope
                     of GlobalScope => ToC.trFragment (vMap, blk)
(* NOTE: if we move strand initialization to the GPU, then we'll have to change the following code! *)
                      | InitiallyScope => ToC.trFragment (vMap, blk)
                      | _ => ToCL.trFragment (vMap, blk)
                    (* end case *))
              in
                (ENV{info=info, vMap=vMap, scope=scope}, stms)
              end
        fun block (ENV{vMap, scope, ...}, blk) = let
              fun saveState cxt stateVars trAssign (env, args, stm) = (
                    ListPair.foldrEq
                      (fn (x, e, stms) => trAssign(env, x, e)@stms)
                        [stm]
                          (stateVars, args)
                    ) handle ListPair.UnequalLengths => (
                      print(concat["saveState ", cxt, ": length mismatch; ", Int.toString(List.length args), " args\n"]);
                      raise Fail(concat["saveState ", cxt, ": length mismatch"]))
              in
                case scope
(* NOTE: if we move strand initialization to the GPU, then we'll have to change the following code! *)
                 of StrandScope stateVars =>
                      ToCL.trBlock (vMap, saveState "StrandScope" stateVars ToCL.trAssign, blk)
                  | MethodScope stateVars =>
                      ToCL.trBlock (vMap, saveState "MethodScope" stateVars ToCL.trAssign, blk)
                  | InitiallyScope => ToCL.trBlock (vMap, fn (_, _, stm) => [stm], blk)
                  | _ => ToC.trBlock (vMap, fn (_, _, stm) => [stm], blk)
                (* end case *)
              end
        fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e)
      end

  (* variables *)
    structure Var =
      struct
        fun mirror (ty, name) = {
                hostTy = ToC.trType ty,
                shadowTy = shadowTy ty,
                gpuTy = ToCL.trType ty,
                hToS = convertToShadow(ty,name), 
                var = name
              }
        fun name (ToCL.V(_, name)) = name
        fun global (Prog{globals, imgGlobals, ...}, name, ty) = let
              val x = mirror (ty, name)
              fun isImgGlobal (Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}), name) = 
                    imgGlobals  := (name,dim) :: !imgGlobals 
                | isImgGlobal _ =  () 
              in
                globals := x :: !globals;
                isImgGlobal (ty, name); 
                ToCL.V(#gpuTy x, name)
              end
        fun param x = ToCL.V(ToCL.trType(V.ty x), V.name x)
        fun state (Strand{state, ...}, x) = let
              val ty = V.ty x
              val x' = mirror (ty, V.name x)
              in
                state := x' :: !state;
                ToCL.V(#gpuTy x', #var x')
              end
      end

  (* environments *)
    structure Env =
      struct
      (* create a new environment *)
        fun new prog = ENV{
                info=INFO{prog = prog},
                vMap = V.Map.empty,
                scope = NoScope
              }
      (* define the current translation context *)
        fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope}
        val scopeGlobal = setScope GlobalScope
        val scopeInitially = setScope InitiallyScope
        fun scopeStrand (env, svars) = setScope (StrandScope svars) env
        fun scopeMethod (env, svars) = setScope (MethodScope svars) env
      (* bind a TreeIL varaiable to a target variable *)
        fun bind (ENV{info, vMap, scope}, x, x') = ENV{
                info = info,
                vMap = V.Map.insert(vMap, x, x'),
                scope = scope
              }
      end

  (* programs *)
    structure Program =
      struct
        fun new {name, double, parallel, debug} = (
              RN.initTargetSpec double;
              CNames.initTargetSpec double;
              Prog{
                  name = name,
                  double = double, parallel = parallel, debug = debug,
                  globals = ref [],
                  topDecls = ref [],
                  strands = AtomTable.mkTable (16, Fail "strand table"),
                  initially = ref(CL.D_Comment["missing initially"]),
                  numDims = ref 0, 
                  imgGlobals = ref[], 
                  prFn = ref(CL.D_Comment(["No Print Function"])) 
                })

      (* register the code that is used to register command-line options for input variables *)
        fun inputs (Prog{topDecls, ...}, stm) = let
              val inputsFn = CL.D_Func(
                    [], CL.voidTy, RN.registerOpts,
                    [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")],
                    stm)
              in
                topDecls := inputsFn :: !topDecls
              end
                
      (* register the global initialization part of a program *)
        fun init (Prog{topDecls, ...}, init) = let
              val globalsDecl = CL.mkAssign(CL.E_Var RN.globalsVarName,
                    CL.mkApply("malloc", [CL.mkSizeof(CL.T_Named RN.globalsTy)]))
              val initFn = CL.D_Func(
                    [], CL.voidTy, RN.initGlobals, [],
                    CL.mkBlock[
            globalsDecl,
            CL.mkCall(RN.initGlobalsHelper, [CL.mkVar RN.globalsVarName])
              ])
              val initHelperFn = CL.D_Func(
                    [], CL.voidTy, RN.initGlobalsHelper,
            [CL.PARAM([], globPtrTy, RN.globalsVarName)],
                    init)
              val shutdownFn = CL.D_Func(
                    [], CL.voidTy, RN.shutdown,
                    [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.worldTy), "wrld")],
                    CL.S_Block[])
              in
                topDecls := shutdownFn :: initFn :: initHelperFn :: !topDecls
              end

      (* create and register the initially function for a program *)      
        fun initially {
              prog = Prog{name=progName, strands, initially, numDims, ...},
              isArray : bool,
              iterPrefix : stm list,
              iters : (var * exp * exp) list,
              createPrefix : stm list,
              strand : Atom.atom,
              args : exp list
            } = let
              val name = Atom.toString strand
              val nDims = List.length iters
              val worldTy = CL.T_Ptr(CL.T_Named N.worldTy)
              fun mapi f xs = let
                    fun mapf (_, []) = []
                      | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs)
                    in
                      mapf (0, xs)
                    end
              val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters
              val sizeInit = mapi
                    (fn (i, (CL.V(ty, _), lo, hi)) =>
                        (i, CL.I_Exp(CL.mkBinOp(CL.mkBinOp(hi, CL.#-, lo), CL.#+, CL.E_Int(1, ty))))
                    ) iters
            (* code to allocate the world and initial strands *)
              val wrld = "wrld"
              val allocCode = [
                      CL.mkComment["allocate initial block of strands"],
                      CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)),
                      CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)),
                      CL.mkDecl(worldTy, wrld,
                        SOME(CL.I_Exp(CL.E_Apply(RN.allocInitially, [
                            CL.mkVar "ProgramName",
                            CL.mkUnOp(CL.%&, CL.E_Var(N.strandDesc name)),
                            CL.E_Bool isArray,
                            CL.E_Int(IntInf.fromInt nDims, CL.int32),
                            CL.E_Var "base",
                            CL.E_Var "size"
                          ]))))
                    ]
            (* create the loop nest for the initially iterations
              val indexVar = "ix"
              val strandTy = CL.T_Ptr(CL.T_Named(N.strandTy name))
              fun mkLoopNest [] = CL.mkBlock(createPrefix @ [
                      CL.mkDecl(strandTy, "sp",
                        SOME(CL.I_Exp(
                          CL.E_Cast(strandTy,
                          CL.E_Apply(N.inState, [CL.E_Var "wrld", CL.E_Var indexVar]))))),
                      CL.mkCall(N.strandInit name,
                        CL.E_Var RN.globalsVarName :: CL.E_Var "sp" :: args),
                      CL.mkAssign(CL.E_Var indexVar, CL.mkBinOp(CL.E_Var indexVar, CL.#+, CL.E_Int(1, CL.uint32)))
                    ])
                | mkLoopNest ((CL.V(ty, param), lo, hi)::iters) = let
                    val body = mkLoopNest iters
                    in
                      CL.mkFor(
                        [(ty, param, lo)],
                        CL.mkBinOp(CL.E_Var param, CL.#<=, hi),
                        [CL.mkPostOp(CL.E_Var param, CL.^++)],
                        body)
                    end
              val iterCode = [
                      CL.mkComment["initially"],
                      CL.mkDecl(CL.uint32, indexVar, SOME(CL.I_Exp(CL.E_Int(0, CL.uint32)))),
                      mkLoopNest iters
                    ] *)
              val body = CL.mkBlock(
                    iterPrefix @
                    allocCode @
                    [CL.mkReturn(SOME(CL.E_Var "wrld"))])
              val initFn = CL.D_Func([], worldTy, N.initially, [], body)
              in
                numDims := nDims;
                initially := initFn
              end

      (***** OUTPUT *****)
        fun genStrandPrint (Strand{name, tyName, state, output, code,...}) = let
            (* the print function *)
              val prFnName = concat[name, "Print"]
              val prFn = let
                    val params = [
                            CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"),
                            CL.PARAM([], CL.T_Ptr(CL.T_Num(RawTypes.RT_UInt8)),"status"),
                            CL.PARAM([], CL.intTy,"numStrands"),
                            CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")
                          ]
                    val SOME(ty, x) = !output
                    val outState = CL.mkSelect(CL.mkSubscript(CL.mkVar "self", CL.E_Var "i"), x)
                    val prArgs = (case ty
                           of Ty.IVecTy 1 => [CL.E_Str(!N.gIntFormat ^ "\n"), outState]
                            | Ty.IVecTy d => let
                                val fmt = CL.mkStr(
                                      String.concatWith " " (List.tabulate(d, fn _ => !N.gIntFormat))
                                      ^ "\n")
                                val args = List.tabulate (d, fn i => ToC.ivecIndex(outState, d, i))
                                in
                                  fmt :: args
                                end
                            | Ty.TensorTy[] => [CL.mkStr "%f\n", outState]
                            | Ty.TensorTy[d] => let
                                val fmt = CL.mkStr(
                                      String.concatWith " " (List.tabulate(d, fn _ => "%f"))
                                      ^ "\n")
                                val args = List.tabulate (d, fn i => ToC.vecIndex(outState, d, i))
                                in
                                  fmt :: args
                                end
                            | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty)
                          (* end case *))
                    val forBody = CL.mkIfThen(
                          CL.mkBinOp(CL.mkSubscript(CL.E_Var "status",CL.E_Var "i"), CL.#==, CL.E_Var "DIDEROT_STABILIZE"),
                          CL.mkBlock([CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs)]))
                    val body =  CL.mkFor(
                        [(CL.intTy, "i", CL.mkInt 0)],
                        CL.mkBinOp(CL.E_Var "i", CL.#<, CL.E_Var "numStrands"),
                        [CL.mkPostOp(CL.E_Var "i", CL.^++)],
                        forBody)
                    in
                      CL.D_Func(["static"], CL.voidTy, prFnName, params, body)
                    end
              in
                prFn
              end 

        fun genStrandTyDef (targetTy, Strand{tyName, state,...}) = 
            (* the type declaration for the strand's state struct *)
              CL.D_StructDef(
                List.rev (List.map (fn x => (targetTy x, #var x)) (!state)),
                tyName)
        
      (* generates the opencl buffers for the image data *) 
        fun getGlobalDataBuffers (globals, imgGlobals, contextVar, errVar) = let 
              val globalBuffErr = "error creating OpenCL global buffer"  
              fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.E_Var errVar, CL.#!=, CL.E_Var "CL_SUCCESS"), 
                    CL.mkBlock([CL.mkCall("fprintf",[CL.E_Var "stderr", CL.E_Str msg]), 
                    CL.mkCall("exit",[CL.mkInt 1])]))
              val shadowTypeDecl =
		    CL.mkDecl(CL.T_Named(RN.shadowGlobalsTy), RN.shadowGlaobalsName, NONE)
              val globalToShadowStms = List.map (fn (x:mirror_var) => #hToS x ) globals 
              val globalBufferDecl = CL.mkDecl(clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE)
              val globalBuffer = CL.mkAssign(CL.mkVar(concat[RN.globalsVarName,"_cl"]),
                    CL.mkApply("clCreateBuffer", [
                        CL.mkVar contextVar,
                        CL.mkBinOp(CL.mkVar "CL_MEM_READ_ONLY", CL.#|, CL.mkVar "CL_MEM_COPY_HOST_PTR"),
                        CL.mkSizeof(CL.T_Named RN.shadowGlobalsTy),
                        CL.mkUnOp(CL.%&,CL.mkVar RN.shadowGlaobalsName),
                        CL.mkUnOp(CL.%&,CL.mkVar errVar)
                      ]))
              fun genDataBuffers ([],_,_,_) = [] 
                | genDataBuffers ((var,nDims)::globals, contextVar, errVar,errFn) = let
                    val hostVar = CL.mkIndirect(CL.mkVar RN.globalsVarName, var)
                    val size = CL.mkIndirect(hostVar, "dataSzb") 
                    in 
                      CL.mkDecl(clMemoryTy, RN.addBufferSuffixData var ,NONE) :: 
                      CL.mkAssign(CL.mkVar(RN.addBufferSuffixData var),
                        CL.mkApply("clCreateBuffer", [
                            CL.mkVar contextVar,
                            CL.mkVar "CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR",
                            size,
                            CL.mkIndirect(hostVar, "data"),
                            CL.mkUnOp(CL.%&,CL.mkVar errVar)
                          ])) ::
                        errFn(concat["error in creating ",RN.addBufferSuffixData var, " global buffer"]) ::
                        genDataBuffers(globals,contextVar,errVar,errFn) 
                    end
              in 
                [shadowTypeDecl] @ globalToShadowStms 
                @ [globalBufferDecl, globalBuffer,errorFn(globalBuffErr)]
                @ genDataBuffers(imgGlobals,contextVar,errVar,errorFn) 
              end 

      (* generates the kernel arguments for the image data *) 
        fun genGlobalArguments (globals, count, kernelVar, errVar) = let 
              val globalArgErr = "error creating OpenCL global argument"  
              fun errorFn msg = CL.mkIfThen(CL.mkBinOp(CL.E_Var errVar, CL.#!=, CL.E_Var "CL_SUCCESS"), 
                    CL.mkBlock([CL.mkCall("fprintf",[CL.E_Var "stderr", CL.E_Str msg]), 
                    CL.mkCall("exit",[CL.mkInt 1])]))
              val globalArgument = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.&=,
                    CL.mkApply("clSetKernelArg",
                      [CL.mkVar kernelVar, 
                       CL.mkPostOp(CL.E_Var count, CL.^++),
                       CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
                       CL.mkUnOp(CL.%&,CL.mkVar(concat[RN.globalsVarName,"_cl"]))])))     
              fun genDataArguments ([],_,_,_,_) = [] 
                | genDataArguments ((var,nDims)::globals,count,kernelVar,errVar,errFn) =  
                    CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.$=,
                      CL.mkApply("clSetKernelArg",
                        [CL.mkVar kernelVar, 
                         CL.mkPostOp(CL.E_Var count, CL.^++),
                         CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
                         CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffixData var))]))) ::
                         errFn(concat["error in creating ",RN.addBufferSuffixData var, " argument"]) ::
                    genDataArguments (globals,count,kernelVar,errVar,errFn) 
              in   
               [globalArgument,errorFn(globalArgErr)] @ genDataArguments(globals, count, kernelVar, errVar,errorFn) 
              end 

      (* generates the globals buffers and arguments function *) 
        fun genGlobalBuffersArgs (globals,imgGlobals) = let
            (* Delcare opencl setup objects *) 
              val errVar = "err"
              val imgDataSizeVar = "image_dataSize"
              val params = [
                      CL.PARAM([],CL.T_Named("cl_context"), "context"),
                      CL.PARAM([],CL.T_Named("cl_kernel"), "kernel"),
                      CL.PARAM([],CL.T_Named("cl_command_queue"), "cmdQ"),
                      CL.PARAM([],CL.T_Named("int"), "argStart")
                    ]
              val clGlobalBuffers = getGlobalDataBuffers(globals,!imgGlobals, "context", errVar) 
              val clGlobalArguments = genGlobalArguments(!imgGlobals, "argStart", "kernel", errVar)
            (* Body put all the statments together *) 
              val body = CL.mkDecl(clIntTy, errVar, SOME(CL.I_Exp(CL.mkInt 0)))
                    :: clGlobalBuffers @ clGlobalArguments 
              in
                CL.D_Func([],CL.voidTy,RN.globalsSetupName,params,CL.mkBlock(body))
              end 

      (* generate the data and global parameters *) 
        fun genKeneralGlobalParams ((name,tyname)::rest) = 
              globalParam (CL.T_Ptr(CL.voidTy), RN.addBufferSuffixData name) ::
              genKeneralGlobalParams rest
          | genKeneralGlobalParams [] = []
        
      (* generate the main kernel function for the .cl file *) 
        fun genKernelFun (strand, nDims, globals, imgGlobals) = let
              val Strand{name, tyName, state, output, code,...} = strand
              val fName = RN.kernelFuncName; 
              val inState = "strand_in" 
              val outState = "strand_out"
              val tempVar = "tmp"  
              val sizeParams = if nDims = 1 then
                        []                
                    else if nDims = 2 then 
                        [CL.PARAM([], CL.intTy, "width")]
                    else 
                        [CL.PARAM([], CL.intTy, "width"),CL.PARAM([], CL.intTy, "height")] 
              val params = [
                      globalParam(CL.T_Ptr(CL.T_Named tyName), "selfIn"),
                      globalParam(CL.T_Ptr(CL.T_Named tyName), "selfOut"),
                      globalParam(CL.T_Ptr(CL.T_Num(RawTypes.RT_UInt8)), "strandStatus")] @
                      sizeParams @
                     [ globalParam(globPtrTy, RN.globalsVarName)
                    ] @ genKeneralGlobalParams(!imgGlobals) 
              val thread_ids = if nDims = 1 
                      then [
                          CL.mkDecl(CL.intTy, "x",
                            SOME(CL.I_Exp(CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 0]))))
                        ]
                    else if nDims = 2
                      then [
                          CL.mkDecl(CL.intTy, "x",
                            SOME(CL.I_Exp(CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 1])))), 
                          CL.mkDecl(CL.intTy, "y",
                            SOME(CL.I_Exp(CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 0]))))
                        ]
                    else 
                       [
                          CL.mkDecl(CL.intTy, "x",
                            SOME(CL.I_Exp(CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 1])))), 
                          CL.mkDecl(CL.intTy, "y",
                            SOME(CL.I_Exp(CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 0])))), 
                          CL.mkDecl(CL.intTy, "z",
                            SOME(CL.I_Exp(CL.mkApply(RN.getGlobalThreadId,[CL.mkInt 2]))))
                       ]
                    
                    
              val strandDecl = [
                      CL.mkAttrDecl(["__global"], CL.T_Ptr(CL.T_Named tyName), inState, NONE), 
                      CL.mkAttrDecl(["__global"], CL.T_Ptr(CL.T_Named tyName), outState, NONE),
                      CL.mkAttrDecl(["__global"], CL.T_Ptr(CL.T_Named tyName), tempVar, NONE)
                    ]
              val imageDataDecl = CL.mkDecl(CL.T_Named(RN.imageDataType),RN.globalImageDataName,NONE)
              val imageDataStms = List.map (fn (x,_) => 
                  CL.mkAssign(CL.mkSelect(CL.mkVar(RN.globalImageDataName),RN.imageDataName x), 
                              CL.mkVar(RN.addBufferSuffixData x))) (!imgGlobals)
              val barrierCode = CL.mkIfThen(CL.mkBinOp(CL.E_Var "status",CL.#==,CL.E_Var "DIDEROT_ACTIVE"), 
                                 CL.mkBlock ([CL.mkAssign(CL.E_Var tempVar, CL.E_Var inState), 
                                 CL.mkAssign(CL.E_Var inState, CL.E_Var outState), 
                                 CL.mkAssign(CL.E_Var outState, CL.E_Var tempVar)])) 
              val barrierStm = CL.mkCall("barrier",[CL.E_Var "CLK_LOCAL_MEM_FENCE"])
              val index = if nDims = 1 then
                        CL.mkVar "x"                
                    else if nDims = 2 then 
                        CL.mkBinOp(
                            CL.mkBinOp(CL.mkVar "y", CL.#*, CL.mkVar "width"), CL.#+, CL.mkVar "x")
                    else 
                       CL.mkBinOp(CL.mkBinOp(CL.mkBinOp(
                            CL.mkBinOp(CL.mkVar "z", CL.#*, CL.mkVar "width"),CL.#*, CL.mkVar "height"), CL.#+,
                            CL.mkBinOp(CL.mkVar "y",CL.#*,CL.mkVar "height")),CL.#+,CL.mkVar "x") 
               
              val args = if nDims = 1 then
                        [CL.mkVar "x"]                
                    else if nDims = 2 then 
                        [CL.mkVar "x", CL.mkVar "y"]
                    else 
                        [CL.mkVar "x", CL.mkVar "y", CL.mkVar "z"] 
            
  
              val strandObjects = 
                     [ CL.mkAssign(CL.mkVar inState,  CL.mkBinOp(CL.mkVar "selfIn",CL.#+,index)), 
                       CL.mkAssign(CL.mkVar outState, CL.mkBinOp(CL.mkVar "selfOut",CL.#+,index))
                     ]                  

                val stabalizeStm = CL.mkAssign(CL.mkSubscript(CL.mkVar "strandStatus",index), 
                                                                        CL.E_Var "status")         
              val status = CL.mkDecl(CL.intTy, "status", SOME(CL.I_Exp(CL.mkSubscript(CL.mkVar "strandStatus",index))))
              val strandInitStm = CL.mkCall(RN.strandInit name, [
                      CL.mkVar RN.globalsVarName,
                      CL.mkVar inState] @ args)
              val local_vars = thread_ids
                    @ [imageDataDecl]
                    @ imageDataStms
                    @ strandDecl
                    @ strandObjects
                    @ [strandInitStm,status]
              val while_exp = CL.mkBinOp(CL.mkVar "status",CL.#==, CL.mkVar RN.kActive)
              val whileBody = CL.mkBlock ([
                      CL.mkAssign(CL.mkVar "status",
                        CL.mkApply(RN.strandUpdate name,
                          [CL.mkVar inState, 
                           CL.mkVar outState,
                           CL.mkVar RN.globalsVarName, 
                           CL.mkVar RN.globalImageDataName]))] @ [barrierCode,barrierStm] )  
              val whileBlock = [CL.mkWhile(while_exp, whileBody)]         
              val body = CL.mkBlock(local_vars @ whileBlock @ [stabalizeStm])
              in 
                CL.D_Func(["__kernel"], CL.voidTy, fName, params, body)
              end

      (* generate a global structure type definition from the list of globals *) 
        fun genGlobalStruct (targetTy, globals, tyName) = let
              val globs = List.map (fn (x : mirror_var) => (targetTy x, #var x)) globals
              in
                CL.D_StructDef(globs, tyName) 
              end
        
      (* generate a global structure type definition from the image data of the image globals *) 
        fun genImageDataStruct (imgGlobals, tyName) = let
              val globs = List.map
		    (fn (x, _) => (globalPtr CL.voidTy, RN.imageDataName x))
		      imgGlobals
              in
                CL.D_StructDef(globs, tyName) 
              end

        fun genGlobals (declFn, targetTy, globals) = let
              fun doVar (x : mirror_var) = declFn (CL.D_Var([], targetTy x, #var x, NONE))
              in
                List.app doVar globals
              end

        fun genStrandDesc (Strand{name, output, ...}) = let
            (* the strand's descriptor object *)
              val descI = let
                    fun fnPtr (ty, f) = CL.I_Exp(CL.mkCast(CL.T_Named ty, CL.mkVar f))
                    val SOME(outTy, _) = !output
                    in
                      CL.I_Struct[
                          ("name", CL.I_Exp(CL.mkStr name)),
                          ("stateSzb", CL.I_Exp(CL.mkSizeof(CL.T_Named(N.strandTy name)))),
(*
                          ("outputSzb", CL.I_Exp(CL.mkSizeof(ToC.trTy outTy))),
*)
                          ("update", fnPtr("update_method_t", "0")),
                          ("print", fnPtr("print_method_t", name ^ "Print"))
                        ]
                    end
              val desc = CL.D_Var([], CL.T_Named N.strandDescTy, N.strandDesc name, SOME descI)
              in
                desc
              end

      (* generate the table of strand descriptors *)
        fun genStrandTable (declFn, strands) = let
              val nStrands = length strands
              fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.E_Var(N.strandDesc name)))
              fun genInits (_, []) = []
                | genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss)
              in
                declFn (CL.D_Var([], CL.int32, N.numStrands,
                  SOME(CL.I_Exp(CL.E_Int(IntInf.fromInt nStrands, CL.int32)))));
                declFn (CL.D_Var([],
                  CL.T_Array(CL.T_Ptr(CL.T_Named N.strandDescTy), SOME nStrands),
                  N.strands,
                  SOME(CL.I_Array(genInits (0, strands)))))
              end

        fun genSrc (baseName, prog) = let
              val Prog{name,double, globals, topDecls, strands, initially, imgGlobals, numDims, ...} = prog
              val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"}
              val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"}
              val clOutS = TextIO.openOut clFileName
              val cOutS = TextIO.openOut cFileName
              val clppStrm = PrintAsCL.new clOutS
              val cppStrm = PrintAsC.new cOutS
              val progName = name
              fun cppDecl dcl = PrintAsC.output(cppStrm, dcl) 
              fun clppDecl dcl = PrintAsCL.output(clppStrm, dcl)
              val strands = AtomTable.listItems strands
              val [strand as Strand{name, tyName, code, init_code, ...}] = strands
              in
              (* Generate the OpenCl file *)
                clppDecl (CL.D_Verbatim([
                    if double
                      then "#define DIDEROT_DOUBLE_PRECISION"
                      else "#define DIDEROT_SINGLE_PRECISION",
                    "#define DIDEROT_TARGET_CL",
                    "#include \"Diderot/cl-diderot.h\""
                  ])); 
                clppDecl (genGlobalStruct (#gpuTy, !globals, RN.globalsTy)); 
                clppDecl (genImageDataStruct(!imgGlobals,RN.imageDataType)); 
                clppDecl (genStrandTyDef(#gpuTy, strand)); 
                clppDecl  (!init_code);
                List.app clppDecl (!code); 
                clppDecl (genKernelFun (strand, !numDims, globals, imgGlobals)); 
              (* Generate the Host C file *)
                cppDecl (CL.D_Verbatim([
                    if double
                      then "#define DIDEROT_DOUBLE_PRECISION"
                      else "#define DIDEROT_SINGLE_PRECISION",
                    "#define DIDEROT_TARGET_CL",
                    "#include \"Diderot/diderot.h\""
                  ])); 
                cppDecl (CL.D_Var(["static"], CL.charPtr, "ProgramName",
                  SOME(CL.I_Exp(CL.mkStr progName))));
                cppDecl (genGlobalStruct (#hostTy, !globals, RN.globalsTy));
                cppDecl (genGlobalStruct (#shadowTy, !globals, RN.shadowGlobalsTy));
(* FIXME: does this really need to be a global? *)
                cppDecl (CL.D_Var(["static"], globPtrTy, RN.globalsVarName, NONE));
                cppDecl (genStrandTyDef (#hostTy, strand));
                cppDecl (genStrandPrint strand); 
                List.app cppDecl (List.rev (!topDecls));
                cppDecl (genGlobalBuffersArgs (!globals,imgGlobals));
                List.app (fn strand => cppDecl (genStrandDesc strand)) strands;
                genStrandTable (cppDecl, strands);
                cppDecl (!initially);    
                PrintAsC.close cppStrm;
                PrintAsCL.close clppStrm; 
                TextIO.closeOut cOutS;
                TextIO.closeOut clOutS
              end

      (* output the code to the filesystem.  The string is the basename of the source file *)
        fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let
              fun condCons (true, x, xs) = x::xs
                | condCons (false, _, xs) = xs
            (* generate the C compiler flags *)
              val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude]
              val cflags = condCons (parallel, #pthread Paths.cflags, cflags)
              val cflags = if debug
                    then #debug Paths.cflags :: cflags
                    else #ndebug Paths.cflags :: cflags
              val cflags = #base Paths.cflags :: cflags
            (* generate the loader flags *)
              val extraLibs = condCons (parallel, #pthread Paths.extraLibs, [])
              val extraLibs = Paths.teemLinkFlags @  #base Paths.extraLibs :: extraLibs
                   val extraLibs =  #cl Paths.extraLibs :: extraLibs
              val rtLib = TargetUtil.runtimeName {
                      target = TargetUtil.TARGET_CL,
                      parallel = parallel, double = double, debug = debug
                    }
              val ldOpts = rtLib :: extraLibs
              in
                genSrc (basename, prog); 
                RunCC.compile (basename, cflags);
                RunCC.link (basename, ldOpts)
              end

      end

  (* strands *)
    structure Strand =
      struct
        fun define (Prog{strands, ...}, strandId) = let
              val name = Atom.toString strandId
              val strand = Strand{
                      name = name,
                      tyName = RN.strandTy name,
                      state = ref [],
                      output = ref NONE,
                      code = ref [],
                      init_code = ref (CL.D_Comment(["no init code"]))
                    }
              in
                AtomTable.insert strands (strandId, strand);
                strand
              end

      (* return the strand with the given name *)
        fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId

      (* register the strand-state initialization code.  The variables are the strand
       * parameters.
       *)
        fun init (Strand{name, tyName, code, init_code, ...}, params, init) = let
              val fName = RN.strandInit name
              val params =
                    globalParam (globPtrTy, RN.globalsVarName) ::
                    globalParam (CL.T_Ptr(CL.T_Named tyName), "selfOut") ::
                      List.map (fn (ToCL.V(ty, x)) => CL.PARAM([], ty, x)) params
              val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
              in
                init_code := initFn 
              end
        
      (* register a strand method *)
        fun method (Strand{name, tyName, code,...}, methName, body) = let
              val fName = concat[name, "_", methName]
              val params = [
                      globalParam (CL.T_Ptr(CL.T_Named tyName), "selfIn"),
                      globalParam (CL.T_Ptr(CL.T_Named tyName), "selfOut"),
                      globalParam (CL.T_Ptr(CL.T_Named (RN.globalsTy)), RN.globalsVarName), 
                      CL.PARAM([],CL.T_Named(RN.imageDataType),RN.globalImageDataName)
                    ]
              val methFn = CL.D_Func([], CL.int32, fName, params, body)
              in
                code := methFn :: !code
              end
                
        fun output (Strand{output, ...}, ty, ToCL.V(_, x)) = output := SOME(ty, x)

      end

  end

structure CLBackEnd = CodeGenFn(CLTarget)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0