Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/vis12-cl/src/compiler/cl-target/cl-target.sml
ViewVC logotype

View of /branches/vis12-cl/src/compiler/cl-target/cl-target.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3088 - (download) (annotate)
Tue Mar 17 18:27:27 2015 UTC (4 years, 5 months ago) by jhr
File size: 35584 byte(s)
  working on vis12-cl reorg
(* cl-target.sml
 *
 * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
 * All rights reserved.
 *
 * Kernels:
 *      <prefix>_GetSizes
 *      <prefix>_InitGlobals
 *      <prefix>_SetInput_<name>        for each input <name>
 *      <prefix>_GetOutput_<name>       for each output <name>
 *)

structure CLTarget : TARGET =
  struct

    structure IL = TreeIL
    structure V = IL.Var
    structure Ty = IL.Ty
    structure CL = CLang
    structure CLN = CLNames
    structure RN = RuntimeNames
    structure ToCL = TreeToCL
    structure SU = StrandUtil
    structure CLU = CLUtil

    type props = Properties.props

    type var = CL.typed_var
    type exp = CL.exp
    type stm = CL.stm

    datatype strand = Strand of {
        prog : program,
        name : string,
        tyName : string,
        state : var list,
        outputs : (Ty.ty * CL.var) list, (* the strand's output variables *)
        code : CL.decl list ref
      }

    and program = Prog of {
        props : props,                  (* info about target *)
        inputs : GenInputs.input_desc list ref,
        globals : global_var list ref,
        globInit : CL.stm list ref,     (* global initialization code (OpenCL); this code gets *)
                                        (* combined with the setInitDim code. *)
        kernels : string list ref,      (* a list of the OpenCL kernels used by this program *)
        topCDecls : CL.decl list ref,   (* top-level decls for the C file *)
        topOCLDecls : CL.decl list ref, (* top-level decls for the OpenCL file *)
        strands : strand AtomTable.hash_table,
        nAxes : int option ref,         (* number of axes in initial grid (NONE means collection) *)
        initDim : int ref,              (* iteration nesting depth for initially *)
        setInitDim : CL.stm list ref    (* OpenCL code to initialize the initial strand grid/collection size *)
      }

    withtype global_var = {cTy : CL.ty, oclTy : CL.ty, name : string}

    datatype env = ENV of {
        info : env_info,
        vMap : var V.Map.map,
        scope : scope
      }

    and env_info = INFO of {
        prog : program
      }

    and scope
      = NoScope
      | GlobalScope
      | InitiallyScope
      | StrandScope                             (* strand initialization *)
      | MethodScope of StrandUtil.method_name   (* method body; vars are state variables *)

  (* the supprted widths of vectors of reals on the target; we are assuming OpenCL 1.1 or later *)
    fun vectorWidths () = [2, 3, 4, 8, 16]

  (* we do not support printing on the OpenCL target *)
    val supportsPrinting = false

  (* tests for whether various expression forms can appear inline *)
    fun inlineCons n = (n < 2)          (* vectors are inline, but not matrices *)
    val inlineMatrixExp = false         (* can matrix-valued expressions appear inline? *)

  (* OpenCL global pointer type *)
    fun globalPtrTy props = CL.T_Ptr(CL.T_Named(CLN.globalsTy props))

  (* TreeIL to target translations *)
    structure Tr =
      struct
        fun fragment (ENV{info, vMap, scope}, blk) = let
              val (vMap, stms) = ToCL.trFragment (vMap, blk)
              in
                (ENV{info=info, vMap=vMap, scope=scope}, stms)
              end
        fun block (ENV{vMap, scope, ...}, blk) = ToCL.trBlock (vMap, blk)
        fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e)
      end

  (* variables *)
    structure Var =
      struct
        fun name (ToCL.V(_, name)) = name
        fun global (Prog{globals, ...}, gv) = let
              val ty = IL.GlobalVar.ty gv
              val gv' = {
                      cTy = CLTyTranslate.toCPUType ty,
                      oclTy = CLTyTranslate.toGPUType ty,
                      name = "gv_" ^ IL.GlobalVar.name gv
                    }
              in
                globals := gv' :: !globals;
                CL.V(#oclTy gv', #name gv')
              end
        fun param x = ToCL.V(CLTyTranslate.toGPUType(V.ty x), V.name x)
      end

  (* environments *)
    structure Env =
      struct
      (* create a new environment *)
        fun new prog = ENV{
                info=INFO{prog = prog},
                vMap = V.Map.empty,
                scope = NoScope
              }
      (* bind a TreeIL varaiable to a target variable *)
        fun bind (ENV{info, vMap, scope}, x, x') = ENV{
                info = info,
                vMap = V.Map.insert(vMap, x, x'),
                scope = scope
              }
      (* define the current translation context *)
        fun setScope (scope, glob, selfIn, selfOut) (ENV{info, vMap, ...}) = ENV{
                info = info,
                vMap = List.foldl
                    (fn ((x, x'), vm) => V.Map.insert(vm, x, x'))
                      vMap [
                          (PseudoVars.global, CL.V(CL.voidTy, glob)),
                          (PseudoVars.selfIn, CL.V(CL.voidTy, selfIn)),
                          (PseudoVars.selfOut, CL.V(CL.voidTy, selfOut))
                        ],
                scope = scope
              }
      (* define the current translation context *)
        val scopeGlobal = setScope (GlobalScope, "glob", "_bogus_", "_bogus_")
        val scopeInitially = setScope (InitiallyScope, "glob", "_bogus_", "_bogus_")
        fun scopeStrand (env as ENV{info=INFO{prog=Prog{props, ...}}, ...}) =
              if Properties.dualState props
                then setScope (StrandScope, "glob", "selfIn", "selfOut") env
                else setScope (StrandScope, "glob", "self", "self") env
        fun scopeMethod (env, name) =
              setScope (MethodScope name, "glob", "selfIn", "selfOut") env
        fun scopeMethod (env as ENV{info=INFO{prog=Prog{props, ...}}, ...}, name) =
              if Properties.dualState props
                then setScope (MethodScope name, "glob", "selfIn", "selfOut") env
                else setScope (MethodScope name, "glob", "self", "self") env
      end

  (* strands *)
    structure Strand =
      struct
        fun define (prog as Prog{strands, kernels, topOCLDecls, ...}, strandId, state) = let
              val name = Atom.toString strandId
            (* the output state variable *)
              val outputVars = let
                    fun cvtOut x = if IL.StateVar.isOutput x
                          then SOME(IL.StateVar.ty x, IL.StateVar.name x)
                          else NONE
                    val outputVars = List.mapPartial cvtOut state
                    in
                      outputVars
                    end
            (* the state variables *)
              val state = let
                    fun cvt x = CL.V(CLTyTranslate.toGPUType(IL.StateVar.ty x), IL.StateVar.name x)
                    in
                      List.map cvt state
                    end
              val strand = Strand{
                      prog = prog,
                      name = name,
                      tyName = CLN.strandTy name,
                      state = state,
                      outputs = outputVars,
                      code = ref []
                    }
              in
                AtomTable.insert strands (strandId, strand);
                strand
              end

      (* return the strand with the given name *)
        fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId

      (* register the strand-state initialization code.  The variables are the strand
       * parameters.
       *)
        fun init (Strand{prog=Prog{props, ...}, name, tyName, code, ...}, params, init) = let
              val globTy = globalPtrTy props
              val fName = CLN.strandInit name
              val selfParam = if Properties.dualState props
                    then "selfOut"
                    else "self"
              val params =
                    CLU.globalParam(globTy, "glob") ::
                    CLU.globalParam(CL.T_Ptr(CL.T_Named tyName), selfParam) ::
                      List.map (fn (CL.V(ty, x)) => CLU.clParam(ty, x)) params
              val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
              in
                code := initFn :: !code
              end

      (* register a strand method *)
        fun method (Strand{prog=Prog{props, ...}, name, tyName, code, ...}, methName, body) = let
              val globTy = globalPtrTy props
              val fName = concat[name, "_", StrandUtil.nameToString methName]
              val stateParams = if Properties.dualState props
                    then [
                        CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
                        CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut")
                      ]
                    else [CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")]
              val params =
                    CLU.globalParam (globTy, "glob") ::
                    stateParams
              val resTy = (case methName
                     of StrandUtil.Update => CL.T_Named "StrandStatus_t"
                      | StrandUtil.Stabilize => CL.voidTy
                    (* end case *))
              val methFn = CL.D_Func([], resTy, fName, params, body)
              in
                code := methFn :: !code
              end

      end

  (* programs *)
    structure Program =
      struct
        fun new (tgt : TargetUtil.target_desc, props : StrandUtil.program_prop list) = let
              val props = Properties.mkProps (tgt, props)
              val kernels = [
                      CLN.getSizesKern,
                      CLN.initGlobalsKern,
                      CLN.initiallyKern,
                      CLN.updateKern
                    ]
              val kernels = if #snapshot props
                    then kernels @ [CLN.snapshotKern]
                    else kernels
              val kernels = if Properties.noBSP props
                    then kernels
                    else kernels @ [
                        CLN.schedKern,
                        CLN.compactKern
                      ]
              in
                RN.initTargetSpec tgt;
                Prog{
                    props = props,
                    inputs = ref [],
                    globals = ref [],
                    globInit = ref [],
                    kernels = ref kernels,
                    topCDecls = ref [],
                    topOCLDecls = ref [],
                    strands = AtomTable.mkTable (16, Fail "strand table"),
                    nAxes = ref(SOME ~1),
                    initDim = ref ~1,
                    setInitDim = ref[]
                  }
              end
      (* gather the inputs *)
        fun inputs (Prog{inputs, ...}, env, blk) = inputs := GenInputs.gatherInputs blk
      (* register the global initialization part of a program; the C-side code is responsible
       * for allocating GPU memory and handling the inputs.  The GPU-side code is included
       * in the globals initialization kernel (see genGlobInitKern)
       *)
        fun init (Prog{props, topCDecls, globInit, ...}, init) = if #hasGlobals props
              then let
                val worldTy = RN.worldPtrTy props
                val wrldV = CL.mkVar "wrld"
              (* the body of the global initializtion code *)
                val initStms = [CL.mkReturn(SOME(CL.mkVar "false"))]
              (* for libraries, we need to make sure that the inputs are initialized *)
                val initStms = if not(#exec props) andalso #hasInputs props
                      then CL.mkIfThen(
                        CL.mkApply(RN.checkDefined props, [wrldV]),
                        CL.mkReturn(SOME(CL.mkBool true))) :: initStms
                      else initStms
                val initFn = CL.D_Func(
                      ["static"], CL.boolTy, RN.initGlobals,
                      [CL.PARAM([], worldTy, "wrld")],
                      CL.mkBlock initStms)
                in
                  topCDecls := initFn :: !topCDecls;
		  globInit := CL.unBlock init
                end
              else ()
      (* register the global destruction part of a program *)
(* TODO: factor out common code from c-target.sml *)
      (* for the OpenCL target, we need to free the GPU-side resources and the CPU-side inputs *)
        fun free (Prog{props, topCDecls, ...}, env, globals) = let
              val worldTy = RN.worldPtrTy props
              val globTy = CL.T_Ptr(CL.T_Named(CLN.globalsTy props))
              fun global x = CL.mkIndirect(CL.mkVar "glob", "gv_" ^ IL.GlobalVar.name x)
              fun freeGlob (x, stms) = (case IL.GlobalVar.ty x
                     of Ty.DynSeqTy _ => CL.mkCall("Diderot_DynSeqFree", [global x]) :: stms
                      | Ty.ImageTy info => CL.mkCall(RN.freeImage(ImageInfo.dim info), [
                            CL.mkCast(World.prefixTy, CL.mkVar "wrld"), global x
                          ]) :: stms
                      | _ => stms
                    (* end case *))
              val body = CL.mkBlock(
                    CL.mkDeclInit(globTy, "glob", CL.mkIndirect(CL.mkVar "wrld", "globals")) ::
                    List.foldr freeGlob [CL.mkReturn(SOME(CL.mkVar "false"))] globals)
              val freeFn = CL.D_Func(
                    ["static"], CL.boolTy, RN.freeGlobals,
                    [CL.PARAM([], worldTy, "wrld")],
                    body)
              in
                topCDecls := freeFn :: !topCDecls
              end
      (* create and register the initially function for a program *)
        fun initially {
              prog = Prog{props, strands, topOCLDecls, nAxes, initDim, setInitDim, ...},
              iterPrefix : stm list,
              iters : (var * exp * exp) list,
              createPrefix : stm list,
              strand : Atom.atom,
              args : exp list
            } = let
              val name = Atom.toString strand
              val nDims = List.length iters
              val globTy = globalPtrTy props
            (* create the code to set the initially dimensions in the globals structure; this code
             * will be included in the InitGlobals kernel.
             *)
              val () = let
                    fun lhs (fld, i) = CL.mkSubscript(CL.mkIndirect(CL.mkVar "glob", fld), CL.mkInt i)
                    fun gen ([], _) = []
                      | gen ((_, lo, hi) :: r, i) =
                          CL.mkAssign(lhs("initiallyLo", i), lo) ::
                          CL.mkAssign(lhs("initiallyHi", i), hi) :: gen (r, i+1)
                    in
                      setInitDim := gen (iters, 0)
                    end
            (* create the loop nest for the initially iterations *)
              fun loopBnd hiOrLo i =
                    CL.mkSubscript(CL.mkIndirect(CL.mkVar "glob", hiOrLo), CL.mkInt i)
              val indexVar = "ix"
              val strandTy = CL.T_Ptr(CL.T_Named(CLN.strandTy name))
              fun statePtr inout = CL.mkAddrOf(CL.mkSubscript(CL.mkVar inout, CL.mkVar indexVar))
              fun mkLoopNest (_, []) = if Properties.dualState props
                    then CL.mkBlock(createPrefix @ [
                        CL.mkCall(CLN.strandInit name, CL.mkVar "glob" :: statePtr "inState" :: args),
                        CL.mkCall("memcpy", [
                            statePtr "outState", statePtr "inState",
                            CL.mkSizeof(CL.T_Named(CLN.strandTy name))
                          ]),
                        CL.S_Exp(CL.mkPostOp(CL.mkVar indexVar, CL.^++))
                      ])
                    else CL.mkBlock(createPrefix @ [
                        CL.mkCall(CLN.strandInit name, CL.mkVar "glob" :: statePtr "state" :: args),
                        CL.S_Exp(CL.mkPostOp(CL.mkVar indexVar, CL.^++))
                      ])
                | mkLoopNest (i, (CL.V(ty, param), _, _)::iters) = let
                    val body = mkLoopNest(i+1, iters)
                    in
                      CL.mkFor(
                        [(ty, param, loopBnd "initiallyLo" i)],
                        CL.mkBinOp(CL.mkVar param, CL.#<=, loopBnd "initiallyHi" i),
                        [CL.mkPostOp(CL.mkVar param, CL.^++)],
                        body)
                    end
              val iterCode = [
                      CL.mkComment["initially"],
                      CL.mkDecl(CL.uint32, indexVar, SOME(CL.I_Exp(CL.mkInt 0))),
                      mkLoopNest (0, iters)
                    ]
              val body = CL.mkBlock(iterPrefix @ iterCode)
              val initKern = CLUtil.mkTaskKernel(
                    CLN.initiallyKern,
                    [ CLU.globalParam(globTy, "glob"),
                      CLU.globalParam(CL.T_Ptr(CL.T_Named(CLN.strandTy (Atom.toString strand))), "state"),
                      CLU.globalParam(CL.T_Ptr CL.uint8, "status")],
                    body)
              in
                nAxes := (if #isArray props then SOME nDims else NONE);
                initDim := nDims;
                topOCLDecls := initKern :: !topOCLDecls
              end

      (***** OUTPUT *****)

      (* create the target-specific substitution list *)
        fun mkSubs (props : props, Strand{name, tyName, ...}) = [
                ("CFILE",       OS.Path.joinBaseExt{base= #outBase props, ext= SOME "c"}),
                ("HDRFILE",     OS.Path.joinBaseExt{base= #outBase props, ext= SOME "h"}),
                ("CLFILE",      OS.Path.joinBaseExt{base= #outBase props, ext= SOME "cl"}),
                ("PREFIX",      #namespace props),
                ("SRCFILE",     #srcFile props),
                ("PROG_NAME",   #outBase props),
                ("STRAND",      name),
                ("STRANDTY",    tyName),
                ("DIDEROT_FLOAT_PRECISION", Properties.floatPrecisionDef props),
                ("DIDEROT_INT_PRECISION", Properties.intPrecisionDef props),
                ("DIDEROT_TARGET", Properties.targetDef props)
              ]

        fun condCons (true, x, xs) = x::xs
          | condCons (false, _, xs) = xs

        fun verbFrag (props : props, parFrag, seqFrag, subs) =
              CL.verbatimDcl [if (#parallel props) then parFrag else seqFrag] subs

        fun compile (props : props, basename) = let
            (* generate the C compiler flags *)
              val cflags = ["-I" ^ Paths.diderotInclude(), "-I" ^ Paths.teemInclude()]
              val cflags = condCons (#parallel props, #pthread Paths.cflags, cflags)
              val cflags = if #debug props
                    then #debug Paths.cflags :: cflags
                    else #ndebug Paths.cflags :: cflags
              val cflags = #base Paths.cflags :: cflags
              in
                RunCC.compile (basename, cflags)
              end

        fun ldFlags (props : props) = if #exec props
              then let
                val extraLibs = [#cl Paths.extraLibs]
                val extraLibs = Paths.teemLinkFlags() @ #base Paths.extraLibs :: extraLibs
                val rtLib = Properties.runtimeName props
                in
                  rtLib :: extraLibs
                end
              else [Properties.runtimeName props]

        fun genStrandStruct (Strand{prog=Prog{props, ...}, name, tyName, state, ...}) = let
            (* the type declaration for the strand's state struct *)
              val selfTyDef = CL.D_StructDef(
                      NONE,
                      List.rev (List.map (fn CL.V(ty, x) => (ty, x)) state),
                      SOME(CLN.strandTy name))
              in
                selfTyDef
              end

      (* Generate an OpenCL function to copy the state of a strand *)
        fun genStrandCopy (Strand{name, tyName, state, ...}, mkName, dstAddrSp, srcAddrSp) = let
              val ptrTy = CL.T_Ptr(CL.T_Named tyName)
              fun copy (CL.V(_, x)) = CL.mkAssign(
                    CL.mkIndirect(CL.mkVar "dst", x),
                    CL.mkIndirect(CL.mkVar "src", x))
              in
                CL.D_Func(
                  ["inline"], CL.voidTy, mkName name,
                  [CL.PARAM([dstAddrSp], ptrTy, "src"), CL.PARAM([srcAddrSp], ptrTy, "dst")],
                  CL.mkBlock(List.map copy state))
              end

      (* generate the C and OpenCL code to support the outputs.
       * NOTE: this function must be called _before_ genBindKernels
       *)
        fun registerOutputKernels (Strand{prog, tyName, outputs, ...}) = let
              val Prog{props, kernels, topCDecls, topOCLDecls, nAxes, ...} = prog
            (* generate the output kernels *)
              val (kNames, kDecls) =
                    ListPair.unzip
                      (GenOutput.genKernels (props, CL.T_Ptr(CL.T_Named tyName), !nAxes) outputs)
(* TODO: generate C API *)
              in
                kernels := !kernels @ kNames;
                topOCLDecls := !topOCLDecls @ kDecls
              end

      (* generate an OpenCL struct definition for the globals.  In addition to the user-defined
       * globals, we add variabes to hold the base and size values for the initial grid/collection
       * of strands.
       *)
        fun genGlobalStruct (props, iterDim, globals) = let
              fun genField (projTy : global_var -> CL.ty) gv = (projTy gv, #name gv)
              val intArrTy = CL.T_Array(CL.intTy, SOME iterDim)
              val fields = (intArrTy, "initiallyLo") ::
                    (intArrTy, "initiallyHi") ::
                    List.map (genField #oclTy) globals
              in
                CL.D_StructDef(NONE, fields, SOME(CLN.globalsTy props))
              end

      (* generate the global initialization kernel *)
        fun genGlobInitKern (Prog{props, globInit, setInitDim, ...}) = let
              val globTy = globalPtrTy props
              in
                CLU.mkTaskKernel (CLN.initGlobalsKern,
                  [CLU.globalParam(globTy, "glob")],
                  CL.mkBlock(
                    !globInit @
                    CL.S_Comment["compute size of initial strand grid"] ::
                    !setInitDim))
              end

      (* generate the C function for binding the kernels used by the program *)
        fun genBindKernels (props, kernels) = let
              val worldTy = RN.worldPtrTy props
	      val wrldV = CL.mkVar "wrld"
	      val stsV = CL.mkVar "sts"
              fun bindKern (k, stms) =
                    CL.mkAssign(
                      CL.mkIndirect(CL.mkVar "wrld", k),
                      CL.mkApply("clCreateKernel", [
                          CL.mkIndirect(wrldV, "prog"), CL.mkStr k, CL.mkAddrOf(CL.mkVar "sts")
                        ])) ::
                    CL.mkIfThen(
		      CL.mkApply("CheckOCLStatus", [wrldV, stsV, CL.mkStr("binding "^k)]),
                      CL.mkReturn(SOME(CL.mkVar "true"))) ::
                    stms
              val stms = List.foldr bindKern [CL.mkReturn(SOME(CL.mkVar "false"))] kernels
              val stms = CL.mkDecl(CL.T_Named "cl_int", "sts", NONE) :: stms
              val body = CL.mkBlock stms
              in
                CL.D_Func(["static"], CL.boolTy, "BindKernels", [CL.PARAM([], worldTy, "wrld")], body)
              end

      (* generate the C struct declaration for the world representation *)
        fun genWorldStruct (Prog{props, globals, kernels, ...}) = let
(*
              val extras = if Properties.dualState props
                    then [
                        (CL.T_Ptr(CL.T_Ptr(CL.T_Named tyName)), "inState"),
                        (CL.T_Ptr(CL.T_Ptr(CL.T_Named tyName)), "outState")
                      ]
                    else [
                        (CL.T_Ptr(CL.T_Named tyName),           "state")
                      ]
              val extras = if null(!globals)
                    then extras
                    else (CL.T_Ptr(CL.T_Named(CLN.globalsTy)), "globals") :: extras
              val extras = (CL.T_Ptr CL.uint8, "status") :: extras
*)
              val extras = [
                      (CL.uint32, "nWorkers"),
                      (CL.T_Ptr(CL.T_Named "Diderot_DeviceInfo_t"), "device"),
                      (CL.T_Named "cl_int", "gpuDevId"),
                      (CL.T_Ptr(CL.T_Named "Diderot_OCLInfo_t"), "oclInfo")
                    ]
              val extras = if #exec props
                    then extras
                    else (CL.T_Named(RN.definedInpTy props), "definedInp") :: extras
              val extras = (CL.T_Named(#namespace props ^ "Sizes_t"), "oclSizes") :: extras
            (* OpenCL execution context *)
              val extras =
                    (CL.T_Named "cl_context", "context") ::
                    (CL.T_Named "cl_command_queue", "cmdQ") ::
                    (CL.T_Named "cl_program", "prog") :: extras
            (* add fields for GPU memory objects *)
              val extras =
                    (CL.T_Named "cl_mem", "schedBuf") ::
                    (CL.T_Named "cl_mem", "globalsBuf") ::
                    (CL.T_Named "cl_mem", "stateBuf") ::
                    (CL.T_Named "cl_mem", "statusBuf") :: extras
            (* add a field for each kernel *)
              val extras = let
                    fun kField (k, extras) = (CL.T_Named "cl_kernel", k) :: extras
                    in
                      List.foldl kField extras (!kernels)
                    end
              in
                World.genStruct (props, List.rev extras)
              end

        fun genIntially (Prog{nAxes, initDim, ...}, substitutions) = CL.verbatimDcl [CInitiallyFrag.text]
              (("INIT_DIMS", Int.toString(! initDim)) ::
              ("IS_ARRAY", Bool.toString(isSome(! nAxes))) :: substitutions)

        fun genRun (Prog{props, ...}, substututions) =
              if Properties.noBSP props
                then CL.verbatimDcl [CRunNoBSPFrag.text] substututions
                else CL.verbatimDcl [CRunFrag.text] substututions

        type output = {file : string, outS : TextIO.outstream, ppStrm : TextIOPP.stream}

      (* open pretty printing streams for both the C and OpenCL output files *)
        fun openOut baseName = let
              fun openOut (mkPP, ext) = let
                    val fileName = OS.Path.joinBaseExt{base=baseName, ext=SOME ext}
                    val outS = TextIO.openOut fileName
                    in {
                      file = fileName,
                      outS = outS,
                      ppStrm = mkPP outS
                    } end
              in
                {cOut = openOut (PrintAsC.new, "c"), oclOut = openOut (PrintAsCL.new, "cl")}
              end

        fun closeOut {file, outS, ppStrm} = (
              TextIOPP.closeStream ppStrm;
              TextIO.closeOut outS)

      (* generate the OpenCL source code. *)
        fun outputCLSrc (out : output, prog, strand, substitutions) = let
              val Prog{props, nAxes, initDim, globals, topOCLDecls, ...} = prog
              val Strand{outputs, code=strandCode, ...} = strand
              fun ppDecl dcl = PrintAsCL.output(#ppStrm out, dcl)
              in
              (* generate output kernels *)
                registerOutputKernels strand;
              (* Retrieve the header information *)
                ppDecl (CL.verbatimDcl [CLHeadFrag.text] substitutions);
(* FIXME: check to see if we really need the DUAL_STATE define for OpenCL *)
                if Properties.dualState props
                  then ppDecl (CL.D_Verbatim ["#define DIDEROT_DUAL_STATE\n"])
                  else ();
                if #hasDie props
                  then ppDecl (CL.D_Verbatim ["#define DIDEROT_HAS_DIE\n"])
                  else ();
                if Properties.noBSP props
                  then ppDecl (CL.D_Verbatim ["#define DIDEROT_NO_BSP\n"])
                  else ();
              (* type definitions *)
                ppDecl (genGlobalStruct (props, !initDim, !globals));
                ppDecl (genStrandStruct strand);
(* FIXME: should only include eigen code fragments if they are being used! *)
(* comment out for debugging
                ppDecl (CL.D_Verbatim[CLEigen2x2Frag.text]);
*)
(* FIXME: should only include eigen code fragments if they are being used! *)
(* comment out for debugging
                ppDecl (CL.D_Verbatim[CLEigen3x3Frag.text]);
*)
                ppDecl (genStrandCopy (strand, CLN.copyToGlobal, "__global", "__private"));
                ppDecl (genStrandCopy (strand, CLN.copyFromGlobal, "__private", "__global"));
                ppDecl (genGlobInitKern prog);
                List.app ppDecl (!strandCode);
              (* Specialize the scheduler kernels and functions *)
                if Properties.noBSP props
                  then ppDecl (CL.verbatimDcl [CLSchedNoBSPFrag.text, CLUpdateNoBSPFrag.text] substitutions)
                else if Properties.noComm props
                  then ppDecl (CL.verbatimDcl [CLSchedFrag.text, CLUpdateNoComFrag.text] substitutions)
                  else ppDecl (CL.verbatimDcl [CLSchedFrag.text, CLUpdateFrag.text] substitutions);
                List.app ppDecl (!topOCLDecls);
              (* kernel for computing sizes of runtime data structures *)
                ppDecl (CL.verbatimDcl [CLSizesFrag.text] substitutions)
              end

      (* output common C source that is common to both the library and standalone targets *)
        fun outputCSrc (out : output, prog, strand, substitutions) = let
              val Prog{props, nAxes, inputs, kernels, ...} = prog
              fun ppDecl dcl = PrintAsC.output(#ppStrm out, dcl)
              in
                if Properties.dualState props
                  then ppDecl (CL.D_Verbatim ["#define DIDEROT_DUAL_STATE\n"])
                  else ();
              (* generate the host-side type definitions *)
                ppDecl (CL.verbatimDcl [CSizesFrag.text] substitutions);
                List.app ppDecl (GenInputs.genDefinedInpStruct (props, !inputs));
                ppDecl (genWorldStruct prog);
                List.app ppDecl (GenInputs.genInputsStruct (props, !inputs));
              (* support code for OpenCL *)
                ppDecl (CL.verbatimDcl [
                    CCheckCLStatusFrag.text
                  ] substitutions);
              (* kernel binding *)
                ppDecl (genBindKernels (props, !kernels))
              end

        fun outputLibSrc (out : output, prog, strand, substitutions) = let
              val Prog{props, inputs, topCDecls, nAxes, ...} = prog
              val Strand{outputs, ...} = strand
              val outputs = GenOutput.gen (props, !nAxes) outputs
              fun ppDecl dcl = PrintAsC.output(#ppStrm out, dcl)
              in
                ppDecl (CL.verbatimDcl [LibHdrFrag.text] substitutions);
                outputCSrc (out, prog, strand, substitutions);
                List.app ppDecl (GenInputs.genInputFuns(props, !inputs));
                List.app ppDecl (!topCDecls);
              (* helper functions for output *)
                if Option.isSome(!nAxes)
                  then ppDecl (CL.verbatimDcl [COutputGridFixedFrag.text] substitutions)
(* FIXME
                  else ppDecl (CL.verbatimDcl [COutputCollectionFixedFrag.text] substitutions);
*)
                  else ();
                List.app ppDecl outputs;
                ppDecl (CL.verbatimDcl [CBodyFrag.text] substitutions);
                ppDecl (CL.verbatimDcl [CInitFrag.text] substitutions);
                ppDecl (genIntially (prog, substitutions));
                ppDecl (genRun (prog, substitutions));
                ppDecl (CL.verbatimDcl [CShutdownFrag.text] substitutions)
              end

        fun generateLib (prog as Prog{props, inputs, strands, ...}) = let
              val {outDir, outBase, exec, double, parallel, debug, ...} = props
              val [strand] = AtomTable.listItems strands
              val basename = OS.Path.joinDirFile{dir=outDir, file=outBase}
              val [Strand{state, outputs, ...}] = AtomTable.listItems strands
              val {cOut, oclOut} = openOut basename
              val substitutions = mkSubs(props, strand)
              in
              (* generate the library .h file *)
                GenLibraryInterface.gen {
                    props = props,
                    rt = SOME CLibInterfaceCLFrag.text,
                    inputs = !inputs,
                    outputs = outputs
                  };
              (* generate source code *)
                outputCLSrc (oclOut, prog, strand, substitutions);
                outputLibSrc (cOut, prog, strand, substitutions);
              (* close the output streams *)
                closeOut cOut;
                closeOut oclOut;
              (* compile and link *)
                compile (props, basename);
                RunCC.linkLib (basename, ldFlags props)
              end

        fun outputExecSrc (out : output, prog, strand, substitutions) = let
              val Prog{props, inputs, topCDecls, strands, nAxes, ...} = prog
              val Strand{outputs, ...} = strand
              val outputs = GenOutput.gen (props, !nAxes) outputs
              fun ppDecl dcl = PrintAsC.output(#ppStrm out, dcl)
              in
                ppDecl (CL.verbatimDcl [ExecHdrFrag.text] substitutions);
                outputCSrc (out, prog, strand, substitutions);
                List.app ppDecl (GenInputs.genExecInputFuns (props, !inputs));
                List.app ppDecl (!topCDecls);
              (* helper functions for output *)
                if Option.isSome(!nAxes)
                  then ppDecl (CL.verbatimDcl [COutputGridFixedFrag.text] substitutions)
(* FIXME
                  else ppDecl (CL.verbatimDcl [COutputCollectionFixedFrag.text] substitutions);
*)
                  else ();
                List.app ppDecl outputs;
                ppDecl (CL.verbatimDcl [CInitFrag.text] substitutions);
                ppDecl (genIntially (prog, substitutions));
                ppDecl (genRun (prog, substitutions));
                ppDecl (CL.verbatimDcl [CShutdownFrag.text] substitutions);
                ppDecl (CL.verbatimDcl [CMainFrag.text] substitutions)
              end

      (* output the code to a file.  The string is the basename of the file, the extension
       * is provided by the target.
       *)
        fun generateExec (prog as Prog{props, strands, ...}) = let
              val {outDir, outBase, exec, double, parallel, debug, ...} = props
              val [strand] = AtomTable.listItems strands
              val basename = OS.Path.joinDirFile{dir=outDir, file=outBase}
              val {cOut, oclOut} = openOut basename
              val substitutions = mkSubs(props, strand)
              in
                outputCLSrc (oclOut, prog, strand, substitutions);
                outputExecSrc (cOut, prog, strand, substitutions);
              (* close the output streams *)
                closeOut cOut;
                closeOut oclOut;
              (* compile and link *)
                compile (props, basename);
                RunCC.linkExec (basename, ldFlags props)
              end

        fun generate (prog as Prog{props, globals, topCDecls, topOCLDecls, ...}) = (
              globals := List.rev (!globals);
              topCDecls := List.rev (!topCDecls);
              topOCLDecls := List.rev (!topOCLDecls);
              if #exec props
                then generateExec prog
                else generateLib prog)

      end

  end

structure CLBackEnd = CodeGenFn(CLTarget)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0