Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml
ViewVC logotype

View of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1261 - (download) (annotate)
Sat May 28 23:16:30 2011 UTC (8 years, 4 months ago) by jhr
File size: 49884 byte(s)
  Working on compiler support for command-line options
(* c-target.sml
 *
 * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
 * All rights reserved.
 *)

structure CLTarget : TARGET =
  struct

    structure IL = TreeIL
    structure V = IL.Var
    structure Ty = IL.Ty
    structure CL = CLang
    structure RN = RuntimeNames
    structure ToC = TreeToCL

    type var = ToC.var
    type exp = CL.exp
    type stm = CL.stm

    datatype strand = Strand of {
        name : string,
        tyName : string,
        state : var list ref,
        output : (Ty.ty * CL.var) option ref,   (* the strand's output variable (only one for now) *)
        code : CL.decl list ref
      }

    datatype program = Prog of {
        double : bool,                  (* true for double-precision support *)
        parallel : bool,                (* true for multithreaded (or multi-GPU) target *)
        debug : bool,                   (* true for debug support in executable *)
        globals : CL.decl list ref,
        topDecls : CL.decl list ref,
        strands : strand AtomTable.hash_table,
        initially : CL.stm list ref,
        numDims: int ref, 
        imgGlobals: (string * int) list ref,
        oneDim: CL.exp ref, 
        twoDim: CL.exp ref, 
        thirdDim: CL.exp ref
    }

    datatype env = ENV of {
        info : env_info,
        vMap : var V.Map.map,
        scope : scope
      }

    and env_info = INFO of {
        prog : program
      }

    and scope
      = NoScope
      | GlobalScope
      | InitiallyScope
      | StrandScope of TreeIL.var list  (* strand initialization *)
      | MethodScope of TreeIL.var list  (* method body; vars are state variables *)

  (* the supprted widths of vectors of reals on the target.  For the GNU vector extensions,
   * the supported sizes are powers of two, but float2 is broken.
   * NOTE: we should also consider the AVX vector hardware, which has 256-bit registers.
   *)
    fun vectorWidths () = if !RuntimeNames.doublePrecision
          then [2, 4, 8]
          else [4, 8]

  (* tests for whether various expression forms can appear inline *)
    fun inlineCons n = (n < 2)          (* vectors are inline, but not matrices *)
    val inlineMatrixExp = false         (* can matrix-valued expressions appear inline? *)

  (* TreeIL to target translations *)
    structure Tr =
      struct
        fun fragment (ENV{info, vMap, scope}, blk) = let
              val (vMap, stms) = ToC.trFragment (vMap, blk)
              in
                (ENV{info=info, vMap=vMap, scope=scope}, stms)
              end
        fun saveState cxt stateVars (env, args, stm) = (
              ListPair.foldrEq
                (fn (x, e, stms) => ToC.trAssign(env, x, e)@stms)
                  [stm]
                    (stateVars, args)
              ) handle ListPair.UnequalLengths => (
                print(concat["saveState ", cxt, ": length mismatch; ", Int.toString(List.length args), " args\n"]);
                raise Fail(concat["saveState ", cxt, ": length mismatch"]))
        fun block (ENV{vMap, scope, ...}, blk) = (case scope
               of StrandScope stateVars => ToC.trBlock (vMap, saveState "StrandScope" stateVars, blk)
                | MethodScope stateVars => ToC.trBlock (vMap, saveState "MethodScope" stateVars, blk)
                | _ => ToC.trBlock (vMap, fn (_, _, stm) => [stm], blk)
              (* end case *))
        fun exp (ENV{vMap, ...}, e) = ToC.trExp(vMap, e)
      end

  (* variables *)
    structure Var =
      struct
        fun name (ToC.V(_, name)) = name
         fun global (Prog{globals,imgGlobals, ...}, name, ty) = let
              val ty' = ToC.trType ty
              fun isImgGlobal (imgGlobals, Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}), name) =  imgGlobals  := (name,dim):: !imgGlobals 
                | isImgGlobal (imgGlobals, _, _) =  () 
              in
                globals := CL.D_Var([], ty', name, NONE) :: !globals;
                isImgGlobal(imgGlobals,ty,name); 
             ToC.V(ty', name)
              end
        fun param x = ToC.V(ToC.trType(V.ty x), V.name x)
        fun state (Strand{state, ...}, x) = let
              val ty' = ToC.trType(V.ty x)
              val x' = ToC.V(ty', V.name x)
              in
                state := x' :: !state;
                x'
              end
      end

  (* environments *)
    structure Env =
      struct
      (* create a new environment *)
        fun new prog = ENV{
                info=INFO{prog = prog},
                vMap = V.Map.empty,
                scope = NoScope
              }
      (* define the current translation context *)
        fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope}
        val scopeGlobal = setScope GlobalScope
        val scopeInitially = setScope InitiallyScope
        fun scopeStrand (env, svars) = setScope (StrandScope svars) env
        fun scopeMethod (env, svars) = setScope (MethodScope svars) env
      (* bind a TreeIL varaiable to a target variable *)
        fun bind (ENV{info, vMap, scope}, x, x') = ENV{
                info = info,
                vMap = V.Map.insert(vMap, x, x'),
                scope = scope
              }
      end

  (* programs *)
    structure Program =
      struct
        fun new {double, parallel, debug} = (
              RN.initTargetSpec double;
              Prog{
                  double = double, parallel = parallel, debug = debug,
                  globals = ref [
                    CL.D_Verbatim[
                        if double
                          then "#define DIDEROT_DOUBLE_PRECISION"
                          else "#define DIDEROT_SINGLE_PRECISION",
                        "#include \"Diderot/opencl_types.h\""
                      ]],
                  topDecls = ref [],
                  strands = AtomTable.mkTable (16, Fail "strand table"),
                  initially = ref([CL.S_Comment["missing initially"]]),
                  numDims = ref(0), 
                  imgGlobals = ref[], 
                  oneDim = ref(CL.E_Str "did not initalize dim"),
                  twoDim = ref(CL.E_Str "did not initalize dim"), 
                  thirdDim = ref(CL.E_Str "did not initalize dim")
                })
      (* register the global initialization part of a program *)
        fun globalIndirects (globals,stms) = let
                 fun getGlobals(CL.D_Var(_,_,globalVar,_)::rest) = CL.mkAssign(CL.mkIndirect(CL.E_Var RN.globalsVarName,globalVar),CL.E_Var globalVar)::getGlobals(rest) 
                   | getGlobals([]) = [] 
                   | getGlobals(_::rest) = getGlobals(rest) 
                in 
                        stms @ getGlobals(globals) 
                end 

      (* register the code that is used to register command-line options for input variables *)
        fun inputs (Prog{topDecls, ...}, stm) = let
              val inputsFn = CL.D_Func(
                    [], CL.voidTy, RN.registerOpts,
                    [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")],
                    stm)
              in
                topDecls := inputsFn :: !topDecls
              end
                
        fun init (Prog{globals,topDecls,...}, CL.S_Block(init)) = let
              val params = [
                          CL.PARAM([], CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName)
                        ]
              val body = CL.S_Block(globalIndirects(!globals,init)) 
              val initFn = CL.D_Func([], CL.voidTy, RN.initGlobals, params, body)
              in
                topDecls := initFn :: !topDecls
              end
          | init (Prog{globals,topDecls,...}, init) = let
              val params = [
                          CL.PARAM([], CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName)
                        ]
              val initFn = CL.D_Func([], CL.voidTy, RN.initGlobals, params, init)
              in
                topDecls := initFn :: !topDecls
              end
            
      (* create and register the initially function for a program *)
        fun initially {
              prog = Prog{strands, initially,numDims,oneDim,twoDim,thirdDim,...},
              isArray : bool,
              iterPrefix : stm list,
              iters : (var * exp * exp) list,
              createPrefix : stm list,
              strand : Atom.atom,
              args : exp list
            } = let
              val name = Atom.toString strand
              val nDims = List.length iters
              fun mapi f xs = let
                    fun mapf (_, []) = []
                      | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs)
                    in
                      mapf (0, xs)
                    end
              val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters
              val sizeInit = mapi
                    (fn (i, (ToC.V(ty, _), lo, hi)) =>
                        (i, CL.I_Exp(CL.mkBinOp(CL.mkBinOp(hi, CL.#-, lo), CL.#+, CL.E_Int(1, ty))))
                    ) iters
                  val numStrandsVar = "numStrandsVar" 
              val allocCode = iterPrefix @ [
                      CL.mkComment["allocate initial block of strands"],
                      CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)),
                      CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)),
                      CL.mkDecl(CL.int32,"numDims",SOME(CL.I_Exp(CL.E_Int(IntInf.fromInt nDims, CL.int32))))
                          ]
                    
            fun mkLoopNest ([],_,_,_,_) = ()
          | mkLoopNest ((ToC.V(ty, param), lo, hi)::iters, oneDim,twoDim,thirdDim, 3) =  
                                (oneDim := hi; mkLoopNest (iters,oneDim,twoDim,thirdDim, 2))
          | mkLoopNest ((ToC.V(ty, param), lo, hi)::iters, oneDim,twoDim,thirdDim, 2) =  
                                (twoDim := hi; mkLoopNest (iters,oneDim,twoDim,thirdDim, 1))
          | mkLoopNest ((ToC.V(ty, param), lo, hi)::iters, oneDim,twoDim,thirdDim, 1) =  
                                 (thirdDim := hi; mkLoopNest (iters,oneDim,twoDim,thirdDim, 0))
          | mkLoopNest ((ToC.V(ty, param), lo, hi)::iters,_,_,_,_) = () 

                        

                  val numStrandsLoopBody = CL.mkExpStm(CL.mkAssignOp(CL.E_Var numStrandsVar, CL.*=,CL.mkSubscript(CL.E_Var "size",CL.E_Var "i")))
                  
                  
                  val numStrandsLoop =  CL.mkFor([(CL.intTy, "i", CL.E_Int(0,CL.intTy))], 
                                                                                   CL.mkBinOp(CL.E_Var "i", CL.#<, CL.E_Var "numDims"), 
                                                                                   [CL.mkPostOp(CL.E_Var "i", CL.^++)], numStrandsLoopBody)
              in
                  numDims := nDims; 
                  initially := allocCode @ [numStrandsLoop];
                  mkLoopNest (iters,oneDim, twoDim, thirdDim, nDims) 
                      
              end

      (***** OUTPUT *****)
    fun genStrand (Strand{name, tyName, state, output, code}) = let
            (* the print function *)
              val prFnName = concat[name, "_print"]
              val prFn = let
                    val params = [
                          CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"),
                          CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")
                        ]
                    val SOME(ty, x) = !output
                    val outState = CL.mkIndirect(CL.mkVar "self", x)
                    val prArgs = (case ty
                           of Ty.IVecTy 1 => [CL.E_Str(!RN.gIntFormat ^ "\n"), outState]
                            | Ty.IVecTy d => let
                                val fmt = CL.E_Str(
                                      String.concatWith " " (List.tabulate(d, fn _ => !RN.gIntFormat))
                                      ^ "\n")
                                val args = List.tabulate (d, fn i => ToC.ivecIndex(outState, d, i))
                                in
                                  fmt :: args
                                end
                            | Ty.TensorTy[] => [CL.E_Str "%f\n", outState]
                            | Ty.TensorTy[d] => let
                                val fmt = CL.E_Str(
                                      String.concatWith " " (List.tabulate(d, fn _ => "%f"))
                                      ^ "\n")
                                val args = List.tabulate (d, fn i => ToC.vecIndex(outState, d, i))
                                in
                                  fmt :: args
                                end
                            | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty)
                          (* end case *))
                    in
                      CL.D_Func(["static"], CL.voidTy, prFnName, params,
                        CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs))
                    end
              in
                                 List.rev (prFn :: !code)
              end 
        fun genStrandTyDef (Strand{tyName, state,...}) = 
            (* the type declaration for the strand's state struct *)
              CL.D_StructDef(
                      List.rev (List.map (fn ToC.V(ty, x) => (ty, x)) (!state)),
                      tyName)
        
        
        (* generates the load kernel function *)
(* FIXME: this code might be part of the runtime system *)
        fun genKernelLoader() =  
                CL.D_Verbatim ( ["/* Loads the Kernel from a file */", 
                                                "char * loadKernel (const char * filename) {",
                                                "struct stat statbuf;",
                                                "FILE *fh;",
                                                "char *source;",
                                                "fh = fopen(filename, \"r\");",
                                                "if (fh == 0)",
                                                "   return 0;",
                                                "stat(filename, &statbuf);",
                                                "source = (char *) malloc(statbuf.st_size + 1);",
                                                "fread(source, statbuf.st_size, 1, fh);",
                                                "fread(source, statbuf.st_size, 1, fh);",
                                                "return source;",
                                                "}"]) 
        (* generates the opencl buffers for the image data *) 
        fun getGlobalDataBuffers(globals,count,contextVar,errVar) = let 
        val globalBufferDecl =  CL.mkDecl(CL.clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE)
        val globalBuffer = CL.mkAssign(CL.E_Var(concat[RN.globalsVarName,"_cl"]), CL.mkApply("clCreateBuffer",
                                                                [CL.E_Var contextVar,
                                                                CL.E_Var "CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
                                                                CL.mkApply("sizeof",[CL.E_Var RN.globalsTy]),
                                                                CL.E_Var RN.globalsVarName,
                                                                CL.E_UnOp(CL.%&,CL.E_Var errVar)]))
        
        fun genDataBuffers([],_,_,_) = [] 
          | genDataBuffers((var,nDims)::globals,count,contextVar,errVar) = let
(* FIXME: use CL constructors to  build expressions (not strings) *)
                   val size = if nDims = 1 then 
                                        CL.mkBinOp(CL.mkApply("sizeof",[CL.E_Var "float"]), CL.#*, 
                                         CL.mkIndirect(CL.E_Var var, "size[0]"))
                                        else if nDims = 2 then 
                                        CL.mkBinOp(CL.mkApply("sizeof",[CL.E_Var "float"]), CL.#*, 
                                          CL.mkIndirect(CL.E_Var var, concat["size[0]", " * ", var, "->size[1]"])) 
                                        else 
                                         CL.mkBinOp(CL.mkApply("sizeof",[CL.E_Var "float"]), CL.#*, 
                                          CL.mkIndirect(CL.E_Var var,concat["size[0]", " * ", var, "->size[1] * ", var, "->size[2]"])) 
                                                                                                                        
                 in 
                   CL.mkDecl(CL.clMemoryTy,RN.addBufferSuffix var ,NONE)::
                   CL.mkDecl(CL.clMemoryTy,RN.addBufferSuffixData var ,NONE)::
                   CL.mkAssign(CL.E_Var(RN.addBufferSuffix var), CL.mkApply("clCreateBuffer",
                                                                [CL.E_Var contextVar,
                                                                CL.E_Var "CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
                                                                CL.mkApply("sizeof",[CL.E_Var (RN.imageTy nDims)]),
                                                                CL.E_Var var,
                                                                CL.E_UnOp(CL.%&,CL.E_Var errVar)])) :: 
                        CL.mkAssign(CL.E_Var(RN.addBufferSuffixData var), CL.mkApply("clCreateBuffer",
                                                                [CL.E_Var contextVar,
                                                                CL.E_Var "CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
                                                                size,
                                                                CL.mkIndirect(CL.E_Var var,"data"),
                                                                CL.E_UnOp(CL.%&,CL.E_Var errVar)])):: genDataBuffers(globals,count + 2,contextVar,errVar) 
                end
        in 
                [globalBufferDecl] @ [globalBuffer] @ genDataBuffers(globals,count + 2,contextVar,errVar) 
        
        end 
        
        (* generates the kernel arguments for the image data *) 
        fun genGlobalArguments(globals,count,kernelVar,errVar) = let 
        val globalArgument = CL.mkAssign(CL.E_Var errVar,CL.mkApply("clSetKernelArg",
                                                                [CL.E_Var kernelVar, 
                                                                 CL.E_Int(count,CL.intTy),
                                                                 CL.mkApply("sizeof",[CL.E_Var "cl_mem"]),
                                                                 CL.E_UnOp(CL.%&,CL.E_Var(concat[RN.globalsVarName,"_cl"]))]))
        
        fun genDataArguments([],_,_,_) = [] 
          | genDataArguments((var,nDims)::globals,count,kernelVar,errVar) = 
        
                CL.mkAssign(CL.E_Var errVar,CL.mkApply("clSetKernelArg",
                                                                [CL.E_Var kernelVar, 
                                                                 CL.E_Int(count,CL.intTy),
                                                                 CL.mkApply("sizeof",[CL.E_Var "cl_mem"]),
                                                                 CL.E_UnOp(CL.%&,CL.E_Var(concat[var,"_cl"]))]))::
          
                        CL.mkAssign(CL.E_Var errVar,CL.mkApply("clSetKernelArg",
                                                                [CL.E_Var kernelVar, 
                                                                 CL.E_Int((count + 1),CL.intTy),
                                                                 CL.mkApply("sizeof",[CL.E_Var "cl_mem"]),
                                                                 CL.E_UnOp(CL.%&,CL.E_Var(concat[var,"_cl", IntegerLit.toString (count + 1)]))])):: genDataArguments (globals, count + 2,kernelVar,errVar) 
                 
        in
        
                [globalArgument] @ genDataArguments(globals,count + 1,kernelVar,errVar) 
        
        end 
        (* generates the main function of host code *) 
        fun genHostMain() = let 
                val setupCall = [CL.mkCall(RN.setupFName,[CL.E_Var RN.globalsVarName])]
                val globalsDecl = CL.mkDecl(CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName,SOME(CL.I_Exp(CL.mkApply("malloc",
                                                                        [CL.mkApply("sizeof",[CL.E_Var RN.globalsTy])]))))
                val initGlobalsCall = CL.mkCall(RN.initGlobals,[CL.E_Var RN.globalsVarName])
                val returnStm = [CL.mkReturn(SOME(CL.E_Int(0,CL.intTy)))]
                val params = [ 
                         CL.PARAM([],CL.intTy, "argc"),
                         CL.PARAM([],CL.charArrayPtr,"argv")
                         ]
                val body = CL.mkBlock([globalsDecl] @ [initGlobalsCall]  @ setupCall @ returnStm)
                in 
                  CL.D_Func([],CL.intTy,"main",params,body) 
                end 
        (* generates the host-side setup function *) 
        fun genHostSetupFunc(strand as Strand{name,tyName,...}, filename, nDims, initially, imgGlobals, oneDim, twoDim, thirdDim) = let
              (* Declare opencl setup objects *) 
                val programVar= "program" 
                val kernelVar = "kernel"
                val cmdVar = "queue" 
                val inStateVar = "selfin" 
                val outStateVar = "selfout" 
                val stateSizeVar= "state_mem_size" 
                val clInstateVar = "clSelfIn"
                val clOutStateVar = "clSelfOut" 
                val clGlobals = "clGlobals" 
                val sourcesVar = "sources" 
                val contextVar = "context" 
                val errVar = "err"
                val imgDataSizeVar = "image_dataSize"
                val globalVar = "global_work_size"
                val localVar = "local_work_size" 
                val clFNVar = "filename"
                val numStrandsVar = "numStrandsVar" 
                val headerFNVar = "header"  
                val deviceVar = "device" 
                val platformsVar = "platforms" 
                val numPlatformsVar = "num_platforms" 
                val numDevicesVar = "num_devices"
                val assertStm = CL.mkCall("assert",[CL.mkBinOp(CL.E_Var errVar, CL.#==, CL.E_Var "CL_SUCCESS")])
                val params = [ 
                         CL.PARAM([],CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName)
                         ]
                val declarations = [CL.mkDecl(CL.clProgramTy, programVar, NONE),
                          CL.mkDecl(CL.clKernelTy, kernelVar, NONE),
                          CL.mkDecl(CL.clCmdQueueTy, cmdVar, NONE),
                          CL.mkDecl(CL.clContextTy, contextVar, NONE),
                          CL.mkDecl(CL.intTy, errVar, NONE),
                          CL.mkDecl(CL.intTy, numStrandsVar, NONE), 
                          CL.mkDecl(CL.intTy, numPlatformsVar, NONE), 
                          CL.mkDecl(CL.intTy, stateSizeVar, NONE), 
                          CL.mkDecl(CL.intTy, imgDataSizeVar, NONE), 
                          CL.mkDecl(CL.clDeviceIdTy, deviceVar, NONE), 
                          CL.mkDecl(CL.T_Ptr(CL.T_Named tyName), inStateVar,NONE), 
                          CL.mkDecl(CL.clMemoryTy,clInstateVar,NONE),
                          CL.mkDecl(CL.clMemoryTy,clOutStateVar,NONE), 
                          CL.mkDecl(CL.T_Ptr(CL.T_Named tyName), outStateVar,NONE),
                          CL.mkDecl(CL.charPtr, clFNVar,SOME(CL.I_Exp(CL.E_Str filename))),
                          CL.mkDecl(CL.charPtr, headerFNVar,SOME(CL.I_Exp(CL.E_Str "Diderot/opencl_types.h"))), 
                          CL.mkDecl(CL.T_Array(CL.charPtr,SOME(2)),sourcesVar,NONE), 
                          CL.mkDecl(CL.T_Array(CL.T_Named "size_t",SOME(nDims)),globalVar,NONE),
                          CL.mkDecl(CL.T_Array(CL.T_Named "size_t",SOME(nDims)),localVar,NONE),
                          CL.mkDecl(CL.intTy,numDevicesVar,SOME(CL.I_Exp(CL.E_Int(~1,CL.intTy)))), 
                          CL.mkDecl(CL.T_Array(CL.clDeviceIdTy, SOME(1)), platformsVar, NONE), 
                          CL.mkDecl(CL.intTy,"num_platforms",SOME(CL.I_Exp(CL.E_Int(~1,CL.intTy))))] 
                
                (* Retrieve the platforms *) 
                val platformStm = [CL.mkAssign(CL.E_Var errVar, CL.mkApply("clGetPlatformIDs",
                                                  [CL.E_Int(10,CL.intTy), 
                                                   CL.E_UnOp(CL.%&,CL.E_Var platformsVar), 
                                                   CL.E_UnOp(CL.%&,CL.E_Var numDevicesVar)])),
                                                   assertStm]
                                                   
                val devicesStm = [CL.mkAssign(CL.E_Var errVar, CL.mkApply("clGetDeviceIDs",
                                                  [CL.mkSubscript(CL.E_Var platformsVar,CL.E_Int(0,CL.intTy)),
                                                   CL.E_Var "CL_DEVICE_TYPE_GPU",
                                                   CL.E_Int(1,CL.intTy), 
                                                   CL.E_UnOp(CL.%&,CL.E_Var deviceVar), 
                                                   CL.E_UnOp(CL.%&,CL.E_Var numDevicesVar)])),
                                                   assertStm] 
                
                (* Create Context *) 
                val contextStm = [CL.mkAssign(CL.E_Var contextVar, CL.mkApply("clCreateContext",
                                                  [CL.E_Int(0,CL.intTy), 
                                                  CL.E_Int(1,CL.intTy),
                                                  CL.E_UnOp(CL.%&,CL.E_Var deviceVar),
                                                  CL.E_Var "NULL",
                                                  CL.E_Var "NULL",
                                                  CL.E_UnOp(CL.%&,CL.E_Var errVar)])),
                                                  assertStm]
                
                (* Create Command Queue *) 
                val commandStm = [CL.mkAssign(CL.E_Var cmdVar, CL.mkApply("clCreateCommandQueue",
                                                  [CL.E_Var contextVar, 
                                                  CL.E_Var deviceVar,
                                                  CL.E_Int(0,CL.intTy),
                                                  CL.E_UnOp(CL.%&,CL.E_Var errVar)])),
                                                  assertStm]
                
                (* Create Memory Buffers for Strand States and Globals *) 
                val strandSize = CL.mkAssign(CL.E_Var stateSizeVar,CL.mkBinOp(CL.mkApply("sizeof",
                                                                        [CL.E_Var tyName]), CL.#*,CL.E_Var numStrandsVar))
                val strandObjects = [CL.mkAssign(CL.E_Var inStateVar, CL.mkApply("malloc", 
                                                                                [CL.E_Var stateSizeVar])),
                                                        CL.mkAssign(CL.E_Var outStateVar, CL.mkApply("malloc", 
                                                                                [CL.E_Var stateSizeVar]))]      
                                                                                
                val clStrandObjects = [CL.mkAssign(CL.E_Var clInstateVar, CL.mkApply("clCreateBuffer",
                                                                [CL.E_Var contextVar,
                                                                CL.E_Var "CL_MEM_READ_WRITE",
                                                                CL.E_Var stateSizeVar,
                                                                CL.E_Var "NULL",
                                                                CL.E_UnOp(CL.%&,CL.E_Var errVar)])),
                                                         CL.mkAssign(CL.E_Var clOutStateVar, CL.mkApply("clCreateBuffer",
                                                                [CL.E_Var contextVar,
                                                                CL.E_Var "CL_MEM_READ_WRITE",
                                                                CL.E_Var stateSizeVar,
                                                                CL.E_Var "NULL",
                                                                CL.E_UnOp(CL.%&,CL.E_Var errVar)]))]
                                                                
            val clGlobalBuffers = getGlobalDataBuffers(!imgGlobals,3,contextVar,errVar) 
            
            
                (* Load the Kernel and Header Files *) 
                val sourceStms = [CL.mkAssign(CL.mkSubscript(CL.E_Var sourcesVar,CL.E_Int(0,CL.intTy)),
                                                                          CL.mkApply(RN.clLoaderFN, [CL.E_Var clFNVar])),
                                                  CL.mkAssign(CL.mkSubscript(CL.E_Var sourcesVar,CL.E_Int(1,CL.intTy)),
                                                                          CL.mkApply(RN.clLoaderFN, [CL.E_Var headerFNVar]))]
                                                                          
                (* Created Enqueue Statements *)
(* FIXME: simplify this code by function abstraction *)
                val enqueueStm = if nDims = 1 
                        then [CL.mkAssign(CL.E_Var errVar, 
                                                          CL.mkApply("clEnqueueNDRangeKernel", 
                                                                                                [CL.E_Var cmdVar,
                                                                                                 CL.E_Var kernelVar,
                                                                                                 CL.E_Int(1,CL.intTy), 
                                                                                                 CL.E_Var "NULL",
                                                                                                 CL.E_Var globalVar,
                                                                                                 CL.E_Var localVar,
                                                                                                 CL.E_Int(0,CL.intTy),
                                                                                                 CL.E_Var "NULL",
                                                                                                 CL.E_Var "NULL"])),CL.mkCall("clFinish",[CL.E_Var cmdVar])]
                        else if nDims = 2  then 
                         [CL.mkAssign(CL.E_Var errVar, 
                                                        CL.mkApply("clEnqueueNDRangeKernel", 
                                                                                                [CL.E_Var cmdVar,
                                                                                                 CL.E_Var kernelVar,
                                                                                                 CL.E_Int(2,CL.intTy), 
                                                                                                 CL.E_Var "NULL",
                                                                                                 CL.E_Var globalVar,
                                                                                                 CL.E_Var localVar,
                                                                                                 CL.E_Int(0,CL.intTy),
                                                                                                 CL.E_Var "NULL",
                                                                                                 CL.E_Var "NULL"])),CL.mkCall("clFinish",[CL.E_Var cmdVar])] 
                        else 
                          [CL.mkAssign(CL.E_Var errVar, 
                                                        CL.mkApply("clEnqueueNDRangeKernel", 
                                                                                                [CL.E_Var cmdVar,
                                                                                                 CL.E_Var kernelVar,
                                                                                                 CL.E_Int(3,CL.intTy), 
                                                                                                 CL.E_Var "NULL",
                                                                                                 CL.E_Var globalVar,
                                                                                                 CL.E_Var localVar,
                                                                                                 CL.E_Int(0,CL.intTy),
                                                                                                 CL.E_Var "NULL",
                                                                                                 CL.E_Var "NULL"])),CL.mkCall("clFinish",[CL.E_Var cmdVar])] 
                
            (* Setup up selfOut variable *) 
              val selfOutStm = CL.mkAssign(CL.E_Var outStateVar, CL.mkApply("malloc", [CL.mkBinOp(CL.E_Var numStrandsVar,
                                                                        CL.#*, CL.mkApply("sizeof",[CL.E_Var tyName]))])) 
            (* Setup Global and Local variables *)      
              val globalAndlocalStms = if nDims = 1
                    then [CL.mkAssign(CL.mkSubscript(CL.E_Var globalVar, CL.E_Int(0,CL.intTy)),
                                                                   CL.mkSubscript(CL.E_Var "size", CL.E_Int(0,CL.intTy))), 
                         CL.mkAssign(CL.mkSubscript(CL.E_Var localVar, CL.E_Int(0,CL.intTy)),
                                                                  CL.E_Var "16")]
                
                
                   else if nDims = 2 then 
                        [CL.mkAssign(CL.mkSubscript(CL.E_Var globalVar, CL.E_Int(0,CL.intTy)),
                                                                   CL.mkSubscript(CL.E_Var "size", CL.E_Int(0,CL.intTy))), 
                        CL.mkAssign(CL.mkSubscript(CL.E_Var globalVar, CL.E_Int(1,CL.intTy)),
                                                                   CL.mkSubscript(CL.E_Var "sizes", CL.E_Int(1,CL.intTy))),
                        CL.mkAssign(CL.mkSubscript(CL.E_Var localVar, CL.E_Int(0,CL.intTy)),
                                                                  CL.E_Var "16"),
                        CL.mkAssign(CL.mkSubscript(CL.E_Var localVar, CL.E_Int(1,CL.intTy)),
                                                                  CL.E_Var "16")]
                                                                  
                   else 
                        [CL.mkAssign(CL.mkSubscript(CL.E_Var globalVar, CL.E_Int(0,CL.intTy)),
                                                                   CL.mkSubscript(CL.E_Var "size", CL.E_Int(0,CL.intTy))), 
                        CL.mkAssign(CL.mkSubscript(CL.E_Var globalVar, CL.E_Int(1,CL.intTy)),
                                                                   CL.mkSubscript(CL.E_Var "size", CL.E_Int(1,CL.intTy))),
                        CL.mkAssign(CL.mkSubscript(CL.E_Var globalVar, CL.E_Int(2,CL.intTy)),
                                                                   CL.mkSubscript(CL.E_Var "size", CL.E_Int(2,CL.intTy))),
                        CL.mkAssign(CL.mkSubscript(CL.E_Var localVar, CL.E_Int(0,CL.intTy)),
                                                                  CL.E_Var "16"),
                        CL.mkAssign(CL.mkSubscript(CL.E_Var localVar, CL.E_Int(1,CL.intTy)),
                                                                  CL.E_Var "16"),
                        CL.mkAssign(CL.mkSubscript(CL.E_Var localVar, CL.E_Int(2,CL.intTy)),
                                                                  CL.E_Var "16")]
                

                
                (* Setup Kernel arguments *) 
              val kernelArguments = [CL.mkAssign(CL.E_Var errVar,CL.mkApply("clSetKernelArg",
                                                                [CL.E_Var kernelVar, 
                                                                 CL.E_Int(0,CL.intTy),
                                                                 CL.mkApply("sizeof",[CL.E_Var "cl_mem"]),
                                                                 CL.E_UnOp(CL.%&,CL.E_Var clInstateVar)])), 
                                                            CL.mkExpStm(CL.mkAssignOp(CL.E_Var errVar, CL.|=,CL.mkApply("clSetKernelArg",
                                                                [CL.E_Var kernelVar, 
                                                                 CL.E_Int(1,CL.intTy),
                                                                 CL.mkApply("sizeof",[CL.E_Var "cl_mem"]),
                                                                 CL.E_UnOp(CL.%&,CL.E_Var clOutStateVar)]))), 
                                                                  CL.mkExpStm(CL.mkAssignOp(CL.E_Var errVar, CL.|=,CL.mkApply("clSetKernelArg",
                                                                [CL.E_Var kernelVar, 
                                                                 CL.E_Int(2,CL.intTy),
                                                                 CL.mkApply("sizeof",[CL.E_Var "int"]),
                                                                 CL.E_UnOp(CL.%&,CL.E_Var "width")])))] 
                                                                 
              val clGlobalArguments = genGlobalArguments(!imgGlobals,3,kernelVar,errVar) 
                
            (* Retrieve output *)
              val outputStm = CL.mkAssign(CL.E_Var errVar, CL.mkApply("clEnqueueReadBuffer", 
                                        [CL.E_Var cmdVar,
                                         CL.E_Var clOutStateVar, 
                                         CL.E_Var "CL_TRUE",
                                         CL.E_Int(0,CL.intTy), 
                                         CL.E_Var stateSizeVar,
                                         CL.E_Var outStateVar,
                                         CL.E_Int(0,CL.intTy),
                                         CL.E_Var "NULL",
                                         CL.E_Var "NULL"]))
            (* Free all the objects *) 
              val freeStms = [CL.mkCall("clReleaseKernel",[CL.E_Var kernelVar]),
                                                CL.mkCall("clReleaseProgram",[CL.E_Var programVar ]),
                                                CL.mkCall("clReleaseCommandQueue",[CL.E_Var cmdVar]),
                                                CL.mkCall("clReleaseContext",[CL.E_Var contextVar]),
                                                CL.mkCall("clReleaseMemObject",[CL.E_Var clInstateVar]),
                                                CL.mkCall("clReleaseMemObject",[CL.E_Var clOutStateVar])]
            (* Body put all the statments together *) 
              val body =  declarations @ platformStm @ devicesStm @ contextStm @ commandStm @ !initially @ [strandSize] @
                                   clStrandObjects @ clGlobalBuffers @ sourceStms  @ [selfOutStm] @ globalAndlocalStms @
                                   kernelArguments @ clGlobalArguments @ enqueueStm @  [outputStm] @ freeStms
                
              in 
                CL.D_Func([],CL.voidTy,RN.setupFName,params,CL.mkBlock(body))
              end 
                
                
        (* generate the main kernel function for the .cl file *) 
        fun genKernelFun (Strand{name, tyName, state, output, code},nDims) = let
              val fName = RN.kernelFuncName; 
              val inState = "strand_in" 
              val outState = "strand_out" 
              val params = [
                       CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
                       CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut"), 
                       CL.PARAM(["__global"], CL.intTy, "width")
                     ]
               val thread_ids = if nDims = 1 
                     then [CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.E_Int(0, CL.intTy)))), 
                           CL.mkAssign(CL.E_Var "x",CL.mkApply(RN.getGlobalThreadId,[CL.E_Int(0,CL.intTy)]))]
                     else [CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.E_Int(0, CL.intTy)))), 
                           CL.mkDecl(CL.intTy, "y", SOME(CL.I_Exp(CL.E_Int(0, CL.intTy)))),
                           CL.mkAssign(CL.E_Var "x",  CL.mkApply(RN.getGlobalThreadId,[CL.E_Int(0,CL.intTy)])),
                           CL.mkAssign(CL.E_Var "y",CL.mkApply(RN.getGlobalThreadId,[CL.E_Int(1,CL.intTy)]))]     
               val strandDecl = [CL.mkDecl(CL.T_Named tyName, inState, NONE), 
                                 CL.mkDecl(CL.T_Named tyName, outState,NONE)]
               val strandObjects  = if nDims = 1 
                     then [CL.mkAssign(CL.mkSubscript(CL.E_Var "selfIn",CL.E_Str "x"), CL.E_Var inState),
                           CL.mkAssign(CL.mkSubscript(CL.E_Var "selfOut",CL.E_Str "x"), CL.E_Var outState)]
                     else let 
                       val index = CL.mkBinOp(CL.mkBinOp(CL.E_Var "y",CL.#*,CL.E_Var "width"),CL.#+,CL.E_Var "x")
                       in 
                         [CL.mkAssign(CL.mkSubscript(CL.E_Var "selfIn",index), CL.E_Var inState), 
                          CL.mkAssign(CL.mkSubscript(CL.E_Var "selfOut",index), CL.E_Var outState)] 
                       end 
               val status = CL.mkDecl(CL.intTy, "status", SOME(CL.I_Exp(CL.E_Int(0, CL.intTy))))
               val strand_init_function = CL.mkCall(RN.strandInit name, [CL.E_Var inState])
               val local_vars =  thread_ids @ strandDecl @ strandObjects @ [status,strand_init_function]
               val while_exp = CL.mkBinOp(CL.E_Var "status",CL.#!=, CL.E_Var RN.kStabilize)
               val while_body = [CL.mkAssign(CL.E_Var "status", CL.mkApply(RN.strandUpdate name,[CL.E_Var inState,CL.E_Var outState])),
                                                     CL.mkCall(RN.strandStabilize name,[CL.E_Var inState,CL.E_Var outState]),
                                                     CL.mkIfThen(CL.mkBinOp(CL.E_Var "status",CL.#==, CL.E_Var RN.kStabilize),CL.mkBreak)]
               
               val whileBlock = [CL.mkWhile(while_exp,CL.mkBlock while_body)]
               val body = CL.mkBlock(local_vars  @ whileBlock)
               in 
                  CL.D_Func(["__kernel"], CL.voidTy, fName, params, body)
               end
        (* generate a global structure from the globals *) 
        fun genGlobalStruct (globals) = let
              fun getGlobals(CL.D_Var(_,ty,globalVar,_)::rest) = (ty,globalVar)::getGlobals(rest) 
                | getGlobals([]) = [] 
                | getGlobals(_::rest) = getGlobals(rest) 
              in 
                CL.D_StructDef(getGlobals(globals),RN.globalsTy) 
              end
      (* generate the table of strand descriptors *)
        fun genStrandTable (ppStrm, strands) = let
              val nStrands = length strands
              fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.E_Var(RN.strandDesc name)))
              fun genInits (_, []) = []
                | genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss)
              fun ppDecl dcl = PrintAsC.output(ppStrm, dcl)
              in
                ppDecl (CL.D_Var([], CL.int32, RN.numStrands,
                  SOME(CL.I_Exp(CL.E_Int(IntInf.fromInt nStrands, CL.int32)))));
                ppDecl (CL.D_Var([],
                  CL.T_Array(CL.T_Ptr(CL.T_Named RN.strandDescTy), SOME nStrands),
                  RN.strands,
                  SOME(CL.I_Array(genInits (0, strands)))))
              end

        fun genSrc (baseName, Prog{globals, topDecls, strands, initially,imgGlobals,numDims,oneDim,twoDim,thirdDim,...}) = let
              val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"}
              val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"}
              val clOutS = TextIO.openOut clFileName
              val cOutS = TextIO.openOut cFileName
              val clppStrm = PrintAsC.new clOutS
              val cppStrm = PrintAsC.new cOutS
              fun cppDecl dcl = PrintAsC.output(cppStrm, dcl) 
              fun clppDecl dcl = PrintAsC.output(clppStrm, dcl)
              val strands = AtomTable.listItems strands
              val singleStrand as Strand{name, tyName, code, ...} = hd(strands) 
              in
              (* Generate the Host file .c *) 
                cppDecl (CL.D_Verbatim([
                    "#include <OpenCL/OpenCL.h>", 
                    "#include Diderot/diderot.h"
                  ]));
                List.app cppDecl (List.rev (!globals));
                cppDecl (genGlobalStruct (!globals));
                cppDecl (genStrandTyDef singleStrand);
                cppDecl (genKernelLoader());
                List.app cppDecl (List.rev (!topDecls));
                cppDecl (genHostSetupFunc (
                    singleStrand, clFileName, !numDims, initially,
                    imgGlobals, oneDim, twoDim, thirdDim));
                cppDecl (genHostMain());
            
              (* Generate the OpenCl file *)
                clppDecl (genGlobalStruct (!globals)); 
                clppDecl (genStrandTyDef singleStrand); 
                List.app clppDecl (!code); 
                clppDecl (genKernelFun (singleStrand,!numDims));
            
                (*List.app (fn strand => List.app ppDecl (genStrand strand)) strands;
                 genStrandTable (ppStrm, strands); 
                ppDecl (!initially);*) 
                
                PrintAsC.close cppStrm;
                PrintAsC.close clppStrm; 
                TextIO.closeOut cOutS;
                TextIO.closeOut clOutS
              end

      (* output the code to a file.  The string is the basename of the file, the extension
       * is provided by the target.
       *)
        fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let
              fun condCons (true, x, xs) = x::xs
                | condCons (false, _, xs) = xs
            (* generate the C compiler flags *)
              val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude]
              val cflags = condCons (parallel, #pthread Paths.cflags, cflags)
              val cflags = if debug
                    then #debug Paths.cflags :: cflags
                    else #ndebug Paths.cflags :: cflags
              val cflags = #base Paths.cflags :: cflags
            (* generate the loader flags *)
              val extraLibs = condCons (parallel, #pthread Paths.extraLibs, [])
              val extraLibs = Paths.teemLinkFlags @  #base Paths.extraLibs :: extraLibs
              val rtLib = TargetUtil.runtimeName {
                      target = TargetUtil.TARGET_CL,
                      parallel = parallel, double = double, debug = debug
                    }
              val ldOpts = rtLib :: extraLibs
              in
                genSrc (basename, prog)
              end
                              
                (*RunCC.compile (basename, cflags);
                RunCC.link (basename, ldOpts)*)

      end

  (* strands *)
    structure Strand =
      struct
        fun define (Prog{strands, ...}, strandId) = let
              val name = Atom.toString strandId
              val strand = Strand{
                      name = name,
                      tyName = RN.strandTy name,
                      state = ref [],
                      output = ref NONE,
                      code = ref []
                    }
              in
                AtomTable.insert strands (strandId, strand);
                strand
              end

      (* return the strand with the given name *)
        fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId

      (* register the strand-state initialization code.  The variables are the strand
       * parameters.
       *)
        fun init (Strand{name, tyName, code, ...}, params, init) = let
              val fName = RN.strandInit name
              val params =
                    CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut") ::
                      List.map (fn (ToC.V(ty, x)) => CL.PARAM([], ty, x)) params
              val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
              in
                code := initFn :: !code
              end

      (* register a strand method *)
        fun method (Strand{name, tyName, code, ...}, methName, body) = let
              val fName = concat[name, "_", methName]
              val params = [
                      CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
                      CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut")
                    ]
              val methFn = CL.D_Func([], CL.int32, fName, params, body)
              in
                code := methFn :: !code
              end
                
        fun output (Strand{output, ...}, ty, ToC.V(_, x)) = output := SOME(ty, x)

      end

  end

structure CLBackEnd = CodeGenFn(CLTarget)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0