Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml
ViewVC logotype

View of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1308 - (download) (annotate)
Sat Jun 11 14:21:07 2011 UTC (8 years, 4 months ago) by jhr
File size: 31875 byte(s)
  More fixes for OpenCL
(* c-target.sml
 *
 * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
 * All rights reserved.
 *)

structure CLTarget : TARGET =
  struct

    structure IL = TreeIL
    structure V = IL.Var
    structure Ty = IL.Ty
    structure CL = CLang
    structure RN = RuntimeNames
    structure ToCL = TreeToCL
    structure N = CNames

  (* variable translation *)
    structure TrVar =
      struct
        type env = CL.typed_var TreeIL.Var.Map.map
        fun lookup (env, x) = (case V.Map.find (env, x)
               of SOME(CL.V(_, x')) => x'
                | NONE => raise Fail(concat["lookup(_, ", V.name x, ")"])
              (* end case *))
      (* translate a variable that occurs in an l-value context (i.e., as the target of an assignment) *)
        fun lvalueVar (env, x) = (case V.kind x
               of IL.VK_Global => CL.mkVar(lookup(env, x))
                | IL.VK_State strand => CL.mkIndirect(CL.mkVar "selfOut", lookup(env, x))
                | IL.VK_Local => CL.mkVar(lookup(env, x))
              (* end case *))
      (* translate a variable that occurs in an r-value context *)
        fun rvalueVar (env, x) = (case V.kind x
               of IL.VK_Global => CL.mkVar(lookup(env, x))
                | IL.VK_State strand => CL.mkIndirect(CL.mkVar "selfIn", lookup(env, x))
                | IL.VK_Local => CL.mkVar(lookup(env, x))
              (* end case *))
      end       
    
        structure ToC = TreeToCFn (TrVar)
  
  (* C variable translation *)
    structure TrCVar =
      struct
        type env = CL.typed_var TreeIL.Var.Map.map
        fun lookup (env, x) = (case V.Map.find (env, x)
               of SOME(CL.V(_, x')) => x'
                | NONE => raise Fail(concat["TrCVar.lookup(_, ", V.name x, ")"])
              (* end case *))
      (* translate a variable that occurs in an l-value context (i.e., as the target of an assignment) *)
        fun lvalueVar (env, x) = (case V.kind x
               of IL.VK_Global => CL.mkIndirect(CL.mkVar RN.globalsVarName, lookup(env, x))
                | IL.VK_State strand => raise Fail "unexpected strand context"
                | IL.VK_Local => CL.mkVar(lookup(env, x))
              (* end case *))
      (* translate a variable that occurs in an r-value context *)
        val rvalueVar = lvalueVar
      end

    structure ToC = TreeToCFn (TrCVar)

    type var = CL.typed_var
    type exp = CL.exp
    type stm = CL.stm

  (* OpenCL specific types *) 
    val clProgramTy = CL.T_Named "cl_program" 
    val clKernelTy  = CL.T_Named "cl_kernel"
    val clCmdQueueTy = CL.T_Named "cl_command_queue" 
    val clContextTy = CL.T_Named "cl_context" 
    val clDeviceIdTy = CL.T_Named "cl_device_id"
    val clPlatformIdTy = CL.T_Named "cl_platform_id" 
    val clMemoryTy = CL.T_Named "cl_mem"

  (* variable or field that is mirrored between host and GPU *)
    type mirror_var = {
            hostTy : CL.ty,             (* variable type on Host (i.e., C type) *)
            gpuTy : CL.ty,              (* variable's type on GPU (i.e., OpenCL type) *)
            var : CL.var                (* variable name *)
          }

    datatype strand = Strand of {
        name : string,
        tyName : string,
        state : mirror_var list ref,
        output : (Ty.ty * CL.var) option ref,   (* the strand's output variable (only one for now) *)
        code : CL.decl list ref,
        init_code: CL.decl ref
      }

    datatype program = Prog of {
        name : string,                  (* stem of source file *)
        double : bool,                  (* true for double-precision support *)
        parallel : bool,                (* true for multithreaded (or multi-GPU) target *)
        debug : bool,                   (* true for debug support in executable *)
        globals : mirror_var list ref, 
        topDecls : CL.decl list ref,
        strands : strand AtomTable.hash_table,
        initially :  CL.decl ref,
        numDims: int ref, 
        imgGlobals: (string * int) list ref,
        prFn: CL.decl ref
      }

    datatype env = ENV of {
        info : env_info,
        vMap : var V.Map.map,
        scope : scope
      }

    and env_info = INFO of {
        prog : program
      }

    and scope
      = NoScope
      | GlobalScope
      | InitiallyScope
      | StrandScope of TreeIL.var list  (* strand initialization *)
      | MethodScope of TreeIL.var list  (* method body; vars are state variables *)

  (* the supprted widths of vectors of reals on the target. *)
(* FIXME: for OpenCL 1.1, 3 is also valid *)
    fun vectorWidths () = [2, 4, 8, 16]

  (* tests for whether various expression forms can appear inline *)
    fun inlineCons n = (n < 2)          (* vectors are inline, but not matrices *)
    val inlineMatrixExp = false         (* can matrix-valued expressions appear inline? *)

  (* TreeIL to target translations *)
    structure Tr =
      struct
        fun fragment (ENV{info, vMap, scope}, blk) = let
              val (vMap, stms) = (case scope
		     of GlobalScope => ToC.trFragment (vMap, blk)
		      | _ => ToCL.trFragment (vMap, blk)
		    (* end case *))
              in
                (ENV{info=info, vMap=vMap, scope=scope}, stms)
              end
        fun saveState cxt stateVars (env, args, stm) = (
              ListPair.foldrEq
                (fn (x, e, stms) => ToCL.trAssign(env, x, e)@stms)
                  [stm]
                    (stateVars, args)
              ) handle ListPair.UnequalLengths => (
                print(concat["saveState ", cxt, ": length mismatch; ", Int.toString(List.length args), " args\n"]);
                raise Fail(concat["saveState ", cxt, ": length mismatch"]))
        fun block (ENV{vMap, scope, ...}, blk) = (case scope
               of StrandScope stateVars => ToCL.trBlock (vMap, saveState "StrandScope" stateVars, blk)
                | MethodScope stateVars => ToCL.trBlock (vMap, saveState "MethodScope" stateVars, blk)
                | InitiallyScope => ToCL.trBlock (vMap, fn (_, _, stm) => [stm], blk)
                | _ => ToC.trBlock (vMap, fn (_, _, stm) => [stm], blk)
              (* end case *))
        fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e)
      end

  (* variables *)
    structure Var =
      struct
        fun name (ToCL.V(_, name)) = name
        fun global (Prog{globals, imgGlobals, ...}, name, ty) = let
              val x = {hostTy = ToC.trType ty, gpuTy = ToCL.trType ty, var = name}
              fun isImgGlobal (Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}), name) = 
                    imgGlobals  := (name,dim) :: !imgGlobals 
                | isImgGlobal _ =  () 
              in
                globals := x :: !globals;
                isImgGlobal (ty, name); 
                ToCL.V(#gpuTy x, name)
              end
        fun param x = ToCL.V(ToCL.trType(V.ty x), V.name x)
        fun state (Strand{state, ...}, x) = let
              val ty = V.ty x
              val x' = {hostTy = ToC.trType ty, gpuTy = ToCL.trType ty, var = V.name x}
              in
                state := x' :: !state;
                ToCL.V(#gpuTy x', #var x')
              end
      end

  (* environments *)
    structure Env =
      struct
      (* create a new environment *)
        fun new prog = ENV{
                info=INFO{prog = prog},
                vMap = V.Map.empty,
                scope = NoScope
              }
      (* define the current translation context *)
        fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope}
        val scopeGlobal = setScope GlobalScope
        val scopeInitially = setScope InitiallyScope
        fun scopeStrand (env, svars) = setScope (StrandScope svars) env
        fun scopeMethod (env, svars) = setScope (MethodScope svars) env
      (* bind a TreeIL varaiable to a target variable *)
        fun bind (ENV{info, vMap, scope}, x, x') = ENV{
                info = info,
                vMap = V.Map.insert(vMap, x, x'),
                scope = scope
              }
      end

  (* programs *)
    structure Program =
      struct
        fun new {name, double, parallel, debug} = (
              RN.initTargetSpec double;
              CNames.initTargetSpec double;
              Prog{
                  name = name,
                  double = double, parallel = parallel, debug = debug,
                  globals = ref [],
                  topDecls = ref [],
                  strands = AtomTable.mkTable (16, Fail "strand table"),
                  initially = ref(CL.D_Comment["missing initially"]),
                  numDims = ref(0), 
                  imgGlobals = ref[], 
                  prFn = ref(CL.D_Comment(["No Print Function"])) 
                })
      (* register the global initialization part of a program *)
(* FIXME: unused code; can this be removed??
          fun globalIndirects (globals,stms) = let
                fun getGlobals ({name,target as TargetUtil.TARGET_CL}::rest) =
                      CL.mkAssign(CL.mkIndirect(CL.mkVar RN.globalsVarName,name),CL.mkVar name)
                        ::getGlobals rest
                  | getGlobals [] = [] 
                  | getGlobals (_::rest) = getGlobals rest
                in 
                  stms @ getGlobals globals
                end 
*)
      (* register the code that is used to register command-line options for input variables *)
        fun inputs (Prog{topDecls, ...}, stm) = let
              val inputsFn = CL.D_Func(
                    [], CL.voidTy, RN.registerOpts,
                    [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")],
                    stm)
              in
                topDecls := inputsFn :: !topDecls
              end
                
      (* register the global initialization part of a program *)
        fun init (Prog{topDecls, ...}, init) = let
              val globPtrTy = CL.T_Ptr(CL.T_Named RN.globalsTy)
              val initFn = CL.D_Func(
                    [], CL.voidTy, RN.initGlobals, [CL.PARAM([], globPtrTy, RN.globalsVarName)],
                    init)
              val shutdownFn = CL.D_Func(
                    [], CL.voidTy, RN.shutdown,
                    [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.worldTy), "wrld")],
                    CL.S_Block[])
              in
                topDecls := shutdownFn :: initFn :: !topDecls
              end
         (* create and register the initially function for a program *)      
        fun initially {
              prog = Prog{name=progName, strands, initially, ...},
              isArray : bool,
              iterPrefix : stm list,
              iters : (var * exp * exp) list,
              createPrefix : stm list,
              strand : Atom.atom,
              args : exp list
            } = let
              val name = Atom.toString strand
              val nDims = List.length iters
              val worldTy = CL.T_Ptr(CL.T_Named N.worldTy)
              fun mapi f xs = let
                    fun mapf (_, []) = []
                      | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs)
                    in
                      mapf (0, xs)
                    end
              val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters
              val sizeInit = mapi
                    (fn (i, (CL.V(ty, _), lo, hi)) =>
                        (i, CL.I_Exp(CL.mkBinOp(CL.mkBinOp(hi, CL.#-, lo), CL.#+, CL.E_Int(1, ty))))
                    ) iters
            (* code to allocate the world and initial strands *)
              val wrld = "wrld"
              val allocCode = [
                      CL.mkComment["allocate initial block of strands"],
                      CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)),
                      CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)),
                      CL.mkDecl(worldTy, wrld,
                        SOME(CL.I_Exp(CL.E_Apply(N.allocInitially, [
                            CL.mkVar "ProgramName",
                            CL.mkUnOp(CL.%&, CL.E_Var(N.strandDesc name)),
                            CL.E_Bool isArray,
                            CL.E_Int(IntInf.fromInt nDims, CL.int32),
                            CL.E_Var "base",
                            CL.E_Var "size"
                          ]))))
                    ]
            (* create the loop nest for the initially iterations *)
              val indexVar = "ix"
              val strandTy = CL.T_Ptr(CL.T_Named(N.strandTy name))
              fun mkLoopNest [] = CL.mkBlock(createPrefix @ [
                      CL.mkDecl(strandTy, "sp",
                        SOME(CL.I_Exp(
                          CL.E_Cast(strandTy,
                          CL.E_Apply(N.inState, [CL.E_Var "wrld", CL.E_Var indexVar]))))),
                      CL.mkCall(N.strandInit name, CL.E_Var "sp" :: args),
                      CL.mkAssign(CL.E_Var indexVar, CL.mkBinOp(CL.E_Var indexVar, CL.#+, CL.E_Int(1, CL.uint32)))
                    ])
                | mkLoopNest ((CL.V(ty, param), lo, hi)::iters) = let
                    val body = mkLoopNest iters
                    in
                      CL.mkFor(
                        [(ty, param, lo)],
                        CL.mkBinOp(CL.E_Var param, CL.#<=, hi),
                        [CL.mkPostOp(CL.E_Var param, CL.^++)],
                        body)
                    end
              val iterCode = [
                      CL.mkComment["initially"],
                      CL.mkDecl(CL.uint32, indexVar, SOME(CL.I_Exp(CL.E_Int(0, CL.uint32)))),
                      mkLoopNest iters
                    ]
              val body = CL.mkBlock(
                    iterPrefix @
                    allocCode @
                    iterCode @
                    [CL.mkReturn(SOME(CL.E_Var "wrld"))])
              val initFn = CL.D_Func([], worldTy, N.initially, [], body)
              in
                initially := initFn
              end

      (***** OUTPUT *****)
        fun genStrandPrint (Strand{name, tyName, state, output, code,...}) = let
	    (* the print function *)
              val prFnName = concat[name, "_print"]
              val prFn = let
		    val params = [
			    CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"),
			    CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")
			  ]
		    val SOME(ty, x) = !output
		    val outState = CL.mkIndirect(CL.mkVar "self", x)
		    val prArgs = (case ty
			   of Ty.IVecTy 1 => [CL.E_Str(!N.gIntFormat ^ "\n"), outState]
			    | Ty.IVecTy d => let
				val fmt = CL.E_Str(
				      String.concatWith " " (List.tabulate(d, fn _ => !N.gIntFormat))
				      ^ "\n")
				val args = List.tabulate (d, fn i => ToC.ivecIndex(outState, d, i))
				in
				  fmt :: args
				end
			    | Ty.TensorTy[] => [CL.E_Str "%f\n", outState]
			    | Ty.TensorTy[d] => let
				val fmt = CL.E_Str(
				      String.concatWith " " (List.tabulate(d, fn _ => "%f"))
				      ^ "\n")
				val args = List.tabulate (d, fn i => ToC.vecIndex(outState, d, i))
				in
				  fmt :: args
				end
			    | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty)
			  (* end case *))
		    in
		      CL.D_Func(["static"], CL.voidTy, prFnName, params,
			CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs))
		    end
              in
		prFn
              end 

        fun genStrandTyDef (targetTy, Strand{tyName, state,...}) = 
            (* the type declaration for the strand's state struct *)
              CL.D_StructDef(
                List.rev (List.map (fn x => (targetTy x, #var x)) (!state)),
                tyName)

      (* generates the load kernel function *)

      (* generates the opencl buffers for the image data *) 
        fun getGlobalDataBuffers(globals,contextVar,errVar) = let 
              val globalBufferDecl =  CL.mkDecl(clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE)
              val globalBuffer = CL.mkAssign(CL.mkVar(concat[RN.globalsVarName,"_cl"]),
                    CL.mkApply("clCreateBuffer", [
                        CL.mkVar contextVar,
                        CL.mkVar "CL_MEM_COPY_HOST_PTR",
                        CL.mkApply("sizeof",[CL.mkVar RN.globalsTy]),
                        CL.mkVar RN.globalsVarName,
                        CL.mkUnOp(CL.%&,CL.mkVar errVar)
                      ]))
	      fun genDataBuffers([],_,_) = [] 
		| genDataBuffers((var,nDims)::globals,contextVar,errVar) = let
(* FIXME: use CL constructors to  build expressions (not strings) *)
		    val size = if nDims = 1
			    then CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*, 
			      CL.mkIndirect(CL.mkVar var, "size[0]"))
			  else if nDims = 2
			    then CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*, 
			      CL.mkIndirect(CL.mkVar var, concat["size[0]", " * ", var, "->size[1]"])) 
			    else CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*, 
			      CL.mkIndirect(CL.mkVar var,concat["size[0]", " * ", var, "->size[1] * ", var, "->size[2]"]))                                                                                        
		    in 
		      CL.mkDecl(clMemoryTy, RN.addBufferSuffix var ,NONE)::
		      CL.mkDecl(clMemoryTy, RN.addBufferSuffixData var ,NONE)::
		      CL.mkAssign(CL.mkVar(RN.addBufferSuffix var), CL.mkApply("clCreateBuffer",
			[CL.mkVar contextVar,
			CL.mkVar "CL_MEM_COPY_HOST_PTR",
			CL.mkApply("sizeof",[CL.mkVar (RN.imageTy nDims)]),
			CL.mkVar var,
			CL.mkUnOp(CL.%&,CL.mkVar errVar)])) :: 
		      CL.mkAssign(CL.mkVar(RN.addBufferSuffixData var), CL.mkApply("clCreateBuffer",
			[CL.mkVar contextVar,
			CL.mkVar "CL_MEM_COPY_HOST_PTR",
			size,
			CL.mkIndirect(CL.mkVar var,"data"),
			CL.mkUnOp(CL.%&,CL.mkVar errVar)])):: genDataBuffers(globals,contextVar,errVar) 
		    end
              in 
		globalBufferDecl :: globalBuffer :: genDataBuffers(globals,contextVar,errVar) 
              end 

        
(* generates the kernel arguments for the image data *) 
        fun genGlobalArguments(globals,count,kernelVar,errVar) = let 
        val globalArgument = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=,CL.mkApply("clSetKernelArg",
                                                                [CL.mkVar kernelVar, 
                                                                 CL.mkPostOp(CL.E_Var count, CL.^++),
                                                                 CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
                                                                 CL.mkUnOp(CL.%&,CL.mkVar(concat[RN.globalsVarName,"_cl"]))])))
        
        fun genDataArguments([],_,_,_) = [] 
          | genDataArguments((var,nDims)::globals,count,kernelVar,errVar) = 
        
                CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=, CL.mkApply("clSetKernelArg",
                                [CL.mkVar kernelVar, 
                                 CL.mkPostOp(CL.E_Var count, CL.^++),
                                 CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
                                 CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffix var))])))::
          
                        CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=,CL.mkApply("clSetKernelArg",
                                [CL.mkVar kernelVar, 
                                 CL.mkPostOp(CL.E_Var count, CL.^++),
                                 CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
                                 CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffixData var))]))):: genDataArguments (globals,count,kernelVar,errVar) 
                 
        in
        
                [globalArgument] @ genDataArguments(globals,count,kernelVar,errVar) 
        
        end 

      (* generates the globals buffers and arguments function *) 
        fun genGlobalBuffersArgs (imgGlobals) = let
            (* Delcare opencl setup objects *) 
              val errVar = "err"
              val imgDataSizeVar = "image_dataSize"
              val params = [ 
                      CL.PARAM([],CL.T_Named("cl_context"), "context"),
                      CL.PARAM([],CL.T_Named("cl_kernel"), "kernel"),
                      CL.PARAM([],CL.T_Named("int"), "argStart")
                    ]
              val clGlobalBuffers = getGlobalDataBuffers(!imgGlobals, "context", "err") 
              val clGlobalArguments = genGlobalArguments(!imgGlobals, "argStart", "kernel", "err")
            (* Body put all the statments together *) 
              val body =  clGlobalBuffers @ clGlobalArguments 
              in 
                CL.D_Func([],CL.voidTy,RN.globalsSetupName,params,CL.mkBlock(body))
              end 

      (* generate the data and global parameters *) 
        fun genKeneralGlobalParams ((name,tyname)::rest) = 
              CL.PARAM([], CL.T_Ptr(CL.T_Named RN.globalsTy), concat[RN.globalsVarName]) ::
              CL.PARAM([], CL.T_Ptr(CL.T_Named (RN.imageTy tyname)),RN.addBufferSuffix name) :: 
              CL.PARAM([], CL.T_Ptr(CL.voidTy),RN.addBufferSuffixData name) ::
              genKeneralGlobalParams rest
          | genKeneralGlobalParams [] = []
        
      (*generate code for intilizing kernel global data *) 
        fun initKernelGlobals (globals, imgGlobals) = let
(* FIXME: should use List.map here *)
              fun initGlobalStruct ({hostTy, gpuTy, var}::rest) =  
                    CL.mkAssign(CL.mkVar var, CL.mkIndirect(CL.mkVar RN.globalsVarName, var)) :: 
                    initGlobalStruct rest
                | initGlobalStruct [] = [] 
              fun initGlobalImages ((name, tyname)::rest) = 
                    CL.mkAssign(CL.mkVar name, CL.mkVar (RN.addBufferSuffix name)) :: 
                    CL.mkAssign(CL.mkIndirect(CL.mkVar name,"data"),CL.mkVar (RN.addBufferSuffixData name)) ::
                    initGlobalImages rest
                  | initGlobalImages [] = [] 
              in 
                initGlobalStruct globals @ initGlobalImages(imgGlobals) 
              end 
        
        (* generate the main kernel function for the .cl file *) 
        fun genKernelFun (strand, nDims, globals, imgGlobals) = let
              val Strand{name, tyName, state, output, code,...} = strand
              val fName = RN.kernelFuncName; 
              val inState = "strand_in" 
              val outState = "strand_out" 
              val params = [
                      CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
                      CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut"),
                      CL.PARAM(["__global"], CL.intTy, "width")
                    ] @ genKeneralGlobalParams(!imgGlobals) 
              val thread_ids = if nDims = 1 
                    then [
                        CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy)))), 
                        CL.mkAssign(CL.mkVar "x",CL.mkApply(RN.getGlobalThreadId,[CL.mkInt(0,CL.intTy)]))
                      ]
                    else [
                        CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy)))), 
                        CL.mkDecl(CL.intTy, "y", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy)))),
                        CL.mkAssign(CL.mkVar "x",  CL.mkApply(RN.getGlobalThreadId,[CL.mkInt(0,CL.intTy)])),
                        CL.mkAssign(CL.mkVar "y",CL.mkApply(RN.getGlobalThreadId,[CL.mkInt(1,CL.intTy)]))
                      ] 
              val strandDecl = [
                    CL.mkDecl(CL.T_Named tyName, inState, NONE), 
                    CL.mkDecl(CL.T_Named tyName, outState,NONE)]
              val strandObjects  = if nDims = 1 
                        then [
                            CL.mkAssign( CL.mkVar inState, CL.mkSubscript(CL.mkVar "selfIn", CL.mkStr "x")),
                            CL.mkAssign(CL.mkVar outState,CL.mkSubscript(CL.mkVar "selfOut", CL.mkStr "x"))
                          ]
                        else let 
                                val index = CL.mkBinOp(CL.mkBinOp(CL.mkVar "x",CL.#*,CL.mkVar "width"),CL.#+,CL.mkVar "y")
                                in 
                                        [CL.mkAssign(CL.mkVar inState, CL.mkSubscript(CL.mkVar "selfIn",index)), 
                                         CL.mkAssign(CL.mkVar outState,CL.mkSubscript(CL.mkVar "selfOut",index))] 
                                end                     
              val status = CL.mkDecl(CL.intTy, "status", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy))))
              val local_vars =  thread_ids @ initKernelGlobals(!globals,!imgGlobals)  @ strandDecl @ strandObjects @ [status]
              val while_exp = CL.mkBinOp(CL.mkBinOp(CL.mkVar "status",CL.#!=, CL.mkVar RN.kStabilize),CL.#||,CL.mkBinOp(CL.mkVar "status", CL.#!=, CL.mkVar RN.kDie))
              val whileBody = CL.mkBlock [
                      CL.mkAssign(CL.mkVar "status",
                        CL.mkApply(RN.strandUpdate name,
                          [CL.mkUnOp(CL.%&,CL.mkVar inState), CL.mkUnOp(CL.%&,CL.mkVar outState)])),
                      CL.mkCall(RN.strandStabilize name,
                        [CL.mkUnOp(CL.%&,CL.mkVar inState), CL.mkUnOp(CL.%&,CL.mkVar outState)])
                    ]     
              val whileBlock = [CL.mkWhile(while_exp, whileBody)]         
              val body = CL.mkBlock(local_vars  @ whileBlock)
              in 
                CL.D_Func(["__kernel"], CL.voidTy, fName, params, body)
              end
      (* generate a global structure from the globals *) 
        fun genGlobalStruct (targetTy, globals) = let
              val globs = List.map (fn (x : mirror_var) => (targetTy x, #var x)) globals
              in
                CL.D_StructDef(globs, RN.globalsTy) 
              end
        fun genGlobals (declFn, targetTy, globals) = let
              fun doVar (x : mirror_var) = declFn (CL.D_Var([], targetTy x, #var x, NONE))
              in
                List.app doVar globals
              end

        fun genSrc (baseName, prog) = let
	      val Prog{double, globals, topDecls, strands, initially, imgGlobals, numDims, ...} = prog
              val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"}
              val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"}
              val clOutS = TextIO.openOut clFileName
              val cOutS = TextIO.openOut cFileName
(* FIXME: need to use PrintAsC and PrintAsCL *)
              val clppStrm = PrintAsCL.new clOutS
              val cppStrm = PrintAsC.new cOutS
              fun cppDecl dcl = PrintAsC.output(cppStrm, dcl) 
              fun clppDecl dcl = PrintAsCL.output(clppStrm, dcl)
              val strands = AtomTable.listItems strands
              val [strand as Strand{name, tyName, code, init_code, ...}] = strands
              in
              (* Generate the OpenCl file *)
                clppDecl (CL.D_Verbatim([
                    if double
                      then "#define DIDEROT_DOUBLE_PRECISION"
                      else "#define DIDEROT_SINGLE_PRECISION",
                    "#define DIDEROT_TARGET_CL",
                    "#include \"Diderot/cl-diderot.h\""
                  ])); 
                genGlobals (clppDecl, #gpuTy, !globals); 
                clppDecl (genGlobalStruct (#gpuTy, !globals)); 
                clppDecl (genStrandTyDef(#gpuTy, strand)); 
                List.app clppDecl (!code); 
                clppDecl (genKernelFun (strand, !numDims, globals, imgGlobals));

              (* Generate the Host C file *) 
                cppDecl (CL.D_Verbatim([
                    if double
                      then "#define DIDEROT_DOUBLE_PRECISION"
                      else "#define DIDEROT_SINGLE_PRECISION",
                    "#define DIDEROT_TARGET_CL",
                    "#include \"Diderot/diderot.h\""
                  ])); 
                genGlobals (cppDecl, #hostTy, !globals); 
                cppDecl (genGlobalStruct (#hostTy, !globals));
                cppDecl (genStrandTyDef (#hostTy, strand));
                cppDecl  (!init_code);
                cppDecl (genStrandPrint strand); 
                List.app cppDecl (List.rev (!topDecls));
                cppDecl (genGlobalBuffersArgs (imgGlobals));    
                cppDecl (!initially);    
                PrintAsC.close cppStrm;
                PrintAsCL.close clppStrm; 
                TextIO.closeOut cOutS;
                TextIO.closeOut clOutS
              end

      (* output the code to a file.  The string is the basename of the file, the extension
       * is provided by the target.
       *)
        fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let
              fun condCons (true, x, xs) = x::xs
                | condCons (false, _, xs) = xs
            (* generate the C compiler flags *)
              val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude]
              val cflags = condCons (parallel, #pthread Paths.cflags, cflags)
              val cflags = if debug
                    then #debug Paths.cflags :: cflags
                    else #ndebug Paths.cflags :: cflags
              val cflags = #base Paths.cflags :: cflags
            (* generate the loader flags *)
              val extraLibs = condCons (parallel, #pthread Paths.extraLibs, [])
              val extraLibs = Paths.teemLinkFlags @  #base Paths.extraLibs :: extraLibs
                   val extraLibs =  #cl Paths.extraLibs :: extraLibs
              val rtLib = TargetUtil.runtimeName {
                      target = TargetUtil.TARGET_CL,
                      parallel = parallel, double = double, debug = debug
                    }
              val ldOpts = rtLib :: extraLibs
              in
                genSrc (basename, prog);
                RunCC.compile (basename, cflags);
                RunCC.link (basename, ldOpts)
              end

      end

  (* strands *)
    structure Strand =
      struct
        fun define (Prog{strands, ...}, strandId) = let
              val name = Atom.toString strandId
              val strand = Strand{
                      name = name,
                      tyName = RN.strandTy name,
                      state = ref [],
                      output = ref NONE,
                      code = ref [],
                      init_code = ref (CL.D_Comment(["no init code"]))
                    }
              in
                AtomTable.insert strands (strandId, strand);
                strand
              end

      (* return the strand with the given name *)
        fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId

      (* register the strand-state initialization code.  The variables are the strand
       * parameters.
       *)
        fun init (Strand{name, tyName, code, init_code, ...}, params, init) = let
              val fName = RN.strandInit name
              val params =
                    CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut") ::
                      List.map (fn (ToCL.V(ty, x)) => CL.PARAM([], ty, x)) params
              val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
              in
                init_code := initFn 
              end

      (* register a strand method *)
        fun method (Strand{name, tyName, code,...}, methName, body) = let
              val fName = concat[name, "_", methName]
              val params = [
                      CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
                      CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut")
                    ]
              val methFn = CL.D_Func([], CL.int32, fName, params, body)
              in
                code := methFn :: !code
              end
                
        fun output (Strand{output, ...}, ty, ToCL.V(_, x)) = output := SOME(ty, x)

      end

  end

structure CLBackEnd = CodeGenFn(CLTarget)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0