Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml
ViewVC logotype

View of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1281 - (download) (annotate)
Mon Jun 6 18:21:50 2011 UTC (8 years, 1 month ago) by jhr
File size: 44022 byte(s)
  fixing indentation
(* c-target.sml
 *
 * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
 * All rights reserved.
 *)

structure CLTarget : TARGET =
  struct

    structure IL = TreeIL
    structure V = IL.Var
    structure Ty = IL.Ty
    structure CL = CLang
    structure RN = RuntimeNames
    structure ToCL = TreeToCL

    type var = ToCL.var
    type exp = CL.exp
    type stm = CL.stm

  (* OpenCL specific types *) 
    val clProgramTy = CL.T_Named "cl_program" 
    val clKernelTy  = CL.T_Named "cl_kernel"
    val clCmdQueueTy = CL.T_Named "cl_command_queue" 
    val clContextTy = CL.T_Named "cl_context" 
    val clDeviceIdTy = CL.T_Named "cl_device_id"
    val clPlatformIdTy = CL.T_Named "cl_platform_id" 
    val clMemoryTy = CL.T_Named "cl_mem"

    datatype strand = Strand of {
        name : string,
        tyName : string,
        state : var list ref,
        output : (Ty.ty * CL.var) option ref,   (* the strand's output variable (only one for now) *)
        code : CL.decl list ref,
        init_code: CL.decl ref
      }

    datatype program = Prog of {
	name : string,			(* stem of source file *)
        double : bool,                  (* true for double-precision support *)
        parallel : bool,                (* true for multithreaded (or multi-GPU) target *)
        debug : bool,                   (* true for debug support in executable *)
        globals : CL.decl list ref,
        topDecls : CL.decl list ref,
        strands : strand AtomTable.hash_table,
        initially : CL.stm list ref,
        numDims: int ref, 
        imgGlobals: (string * int) list ref,
        prFn: CL.decl ref
      }

    datatype env = ENV of {
        info : env_info,
        vMap : var V.Map.map,
        scope : scope
      }

    and env_info = INFO of {
        prog : program
      }

    and scope
      = NoScope
      | GlobalScope
      | InitiallyScope
      | StrandScope of TreeIL.var list  (* strand initialization *)
      | MethodScope of TreeIL.var list  (* method body; vars are state variables *)

  (* the supprted widths of vectors of reals on the target. *)
(* FIXME: for OpenCL 1.1, 3 is also valid *)
    fun vectorWidths () = [2, 4, 8, 16]

  (* tests for whether various expression forms can appear inline *)
    fun inlineCons n = (n < 2)          (* vectors are inline, but not matrices *)
    val inlineMatrixExp = false         (* can matrix-valued expressions appear inline? *)

  (* TreeIL to target translations *)
    structure Tr =
      struct
        fun fragment (ENV{info, vMap, scope}, blk) = let
              val (vMap, stms) = ToCL.trFragment (vMap, blk)
              in
                (ENV{info=info, vMap=vMap, scope=scope}, stms)
              end
        fun saveState cxt stateVars (env, args, stm) = (
              ListPair.foldrEq
                (fn (x, e, stms) => ToCL.trAssign(env, x, e)@stms)
                  [stm]
                    (stateVars, args)
              ) handle ListPair.UnequalLengths => (
                print(concat["saveState ", cxt, ": length mismatch; ", Int.toString(List.length args), " args\n"]);
                raise Fail(concat["saveState ", cxt, ": length mismatch"]))
        fun block (ENV{vMap, scope, ...}, blk) = (case scope
               of StrandScope stateVars => ToCL.trBlock (vMap, saveState "StrandScope" stateVars, blk)
                | MethodScope stateVars => ToCL.trBlock (vMap, saveState "MethodScope" stateVars, blk)
                | _ => ToCL.trBlock (vMap, fn (_, _, stm) => [stm], blk)
              (* end case *))
        fun exp (ENV{vMap, ...}, e) = ToCL.trExp(vMap, e)
      end

  (* variables *)
    structure Var =
      struct
        fun name (ToCL.V(_, name)) = name
        fun global (Prog{globals, imgGlobals, ...}, name, ty) = let
              val ty' = ToCL.trType ty
              fun isImgGlobal (imgGlobals, Ty.ImageTy(ImageInfo.ImgInfo{dim, ...}), name) =  imgGlobals  := (name,dim):: !imgGlobals 
                | isImgGlobal (imgGlobals, _, _) =  () 
              in
                globals := CL.D_Var([], ty', name, NONE) :: !globals;
                isImgGlobal(imgGlobals,ty,name); 
             ToCL.V(ty', name)
              end
        fun param x = ToCL.V(ToCL.trType(V.ty x), V.name x)
        fun state (Strand{state, ...}, x) = let
              val ty' = ToCL.trType(V.ty x)
              val x' = ToCL.V(ty', V.name x)
              in
                state := x' :: !state;
                x'
              end
      end

  (* environments *)
    structure Env =
      struct
      (* create a new environment *)
        fun new prog = ENV{
                info=INFO{prog = prog},
                vMap = V.Map.empty,
                scope = NoScope
              }
      (* define the current translation context *)
        fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope}
        val scopeGlobal = setScope GlobalScope
        val scopeInitially = setScope InitiallyScope
        fun scopeStrand (env, svars) = setScope (StrandScope svars) env
        fun scopeMethod (env, svars) = setScope (MethodScope svars) env
      (* bind a TreeIL varaiable to a target variable *)
        fun bind (ENV{info, vMap, scope}, x, x') = ENV{
                info = info,
                vMap = V.Map.insert(vMap, x, x'),
                scope = scope
              }
      end

  (* programs *)
    structure Program =
      struct
        fun new {name, double, parallel, debug} = (
              RN.initTargetSpec double;
              Prog{
		  name = name,
                  double = double, parallel = parallel, debug = debug,
                  globals = ref [],
                  topDecls = ref [],
                  strands = AtomTable.mkTable (16, Fail "strand table"),
                  initially = ref([CL.S_Comment["missing initially"]]),
		 		  numDims = ref(0), 
		  		  imgGlobals = ref[], 
		  		  prFn = ref(CL.D_Comment(["No Print Function"])) 
                })
      (* register the global initialization part of a program *)
   	  fun globalIndirects (globals,stms) = let
                fun getGlobals (CL.D_Var(_,_,globalVar,_)::rest) =
		      CL.mkAssign(CL.mkIndirect(CL.mkVar RN.globalsVarName,globalVar),CL.mkVar globalVar)
		        ::getGlobals rest
                  | getGlobals [] = [] 
                  | getGlobals (_::rest) = getGlobals rest
                in 
                  stms @ getGlobals globals
                end 

      (* register the code that is used to register command-line options for input variables *)
        fun inputs (Prog{topDecls, ...}, stm) = let
              val inputsFn = CL.D_Func(
                    [], CL.voidTy, RN.registerOpts,
                    [CL.PARAM([], CL.T_Ptr(CL.T_Named RN.optionsTy), "opts")],
                    stm)
              in
                topDecls := inputsFn :: !topDecls
              end
                
        fun init (Prog{globals, topDecls,...}, CL.S_Block(init)) = let
              val params = [
                      CL.PARAM([], CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName)
                    ]
              val body = CL.S_Block(globalIndirects(!globals,init)) 
              val initFn = CL.D_Func([], CL.voidTy, RN.initGlobals, params, body)
              in
                topDecls := initFn :: !topDecls
              end
          | init (Prog{globals,topDecls,...}, init) = let
              val params = [
                      CL.PARAM([], CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName)
                    ]
              val initFn = CL.D_Func([], CL.voidTy, RN.initGlobals, params, init)
              in
                topDecls := initFn :: !topDecls
              end
            
      (* create and register the initially function for a program *)
        fun initially {
              prog = Prog{strands, initially, numDims,...},
              isArray : bool,
              iterPrefix : stm list,
              iters : (var * exp * exp) list,
              createPrefix : stm list,
              strand : Atom.atom,
              args : exp list
            } = let
              val name = Atom.toString strand
              val nDims = List.length iters
              fun mapi f xs = let
                    fun mapf (_, []) = []
                      | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs)
                    in
                      mapf (0, xs)
                    end
              val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters
              val sizeInit = mapi
                    (fn (i, (ToCL.V(ty, _), lo, hi)) =>
                        (i, CL.I_Exp(CL.mkBinOp(CL.mkBinOp(hi, CL.#-, lo), CL.#+, CL.mkInt(1, ty))))
                    ) iters
                  val numStrandsVar = "numStrandsVar" 
              val allocCode = iterPrefix @ [
                      CL.mkComment["allocate initial block of strands"],
                      CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)),
                      CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)),
                      CL.mkDecl(CL.int32,"numDims",SOME(CL.I_Exp(CL.mkInt(IntInf.fromInt nDims, CL.int32))))
                    ]               
              val numStrandsLoopBody =
		    CL.mkExpStm(CL.mkAssignOp(CL.mkVar numStrandsVar, CL.*=,CL.mkSubscript(CL.mkVar "size",CL.mkVar "i")))
              val numStrandsLoop =  CL.mkFor([(CL.intTy, "i", CL.mkInt(0,CL.intTy))], 
                    CL.mkBinOp(CL.mkVar "i", CL.#<, CL.mkVar "numDims"), 
                    [CL.mkPostOp(CL.mkVar "i", CL.^++)], numStrandsLoopBody)
              in
                numDims := nDims; 
                initially := allocCode @ [numStrandsLoop]	      	      
              end


      (***** OUTPUT *****)
        fun genStrandInit(Strand{name,tyName,state,output,code,...},nDims) = let 
	      val params = [
		      CL.PARAM([], CL.T_Ptr(CL.uint32), "sizes" ), 
		      CL.PARAM([], CL.intTy, "width"),
		      CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "strands")
		    ]
	      val body = let   
		    fun loopParams 3 = ["x", "y", "k"] 
		      | loopParams 2 = ["x", "y"] 
		      | loopParams 1 = ["x"] 
		      | loopParams _ = raise Fail "genStrandInit: missing size dim"
		    fun mkLoopNest ([], _, nDims) = if nDims = 1
			  then CL.mkBlock [
			      CL.mkCall(RN.strandInit name, [
				CL.mkUnOp(CL.%&,CL.mkSubscript(CL.mkVar "strands",CL.mkStr "x")),
						CL.mkVar "x"])
			    ]
			  else let 
			    val index = CL.mkBinOp(CL.mkBinOp(CL.mkVar "x",CL.#*,CL.mkVar "width"),CL.#+,CL.mkVar "y")
			    in 
			      CL.mkBlock([CL.mkCall(RN.strandInit name, [CL.mkUnOp(CL.%&,CL.mkSubscript(CL.mkVar "strands",index)),
			      CL.mkVar "x", CL.mkVar"y"])])
			    end 		  		
		      | mkLoopNest (param::rest,count,nDims) = let 
			  val body = mkLoopNest (rest, count + 1,nDims)
			  in 
			    CL.mkFor(
				[(CL.intTy, param, CL.mkInt(0,CL.intTy))],
				CL.mkBinOp(CL.mkVar param, CL.#<, CL.mkSubscript(CL.mkVar "sizes",CL.mkInt(count,CL.intTy))),
				[CL.mkPostOp(CL.mkVar param, CL.^++)],
				body)
			  end 
		    in 
		      [mkLoopNest ((loopParams nDims),0,nDims)]
		    end
		in
		  CL.D_Func(["static"], CL.voidTy, RN.strandInitSetup, params,CL.mkBlock(body))
		end

	fun genStrandPrint (Strand{name, tyName, state, output, code,...},nDims) = let
	    (* the print function *)
	      val prFnName = concat[name, "_print"]
	      val prFn = let
		    val params = [
			  CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"),
			  CL.PARAM([], CL.T_Ptr(CL.uint32), "sizes" ), 
			  CL.PARAM([], CL.intTy, "width"),
			  CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")
			]
				
		   val SOME(ty, x) = !output
		   val outState = if nDims = 1 then
		    	  CL.mkSelect(CL.mkSubscript(CL.mkVar "self",CL.mkVar "x"), x) 
		    	else if nDims = 2 then 
		    		CL.mkSelect(CL.mkSubscript(CL.mkVar "self",
		    		   CL.mkBinOp(CL.mkBinOp(CL.mkVar "x",CL.#*,CL.mkVar "width"),CL.#+,CL.mkVar "y")), x)
		    		 
		    	else CL.mkSelect(CL.mkVar "self",x) 
		    		
		    val prArgs = (case ty
			   of Ty.IVecTy 1 => [CL.mkStr(!RN.gIntFormat ^ "\n"), outState]
			    | Ty.IVecTy d => let
				val fmt = CL.mkStr(
				      String.concatWith " " (List.tabulate(d, fn _ => !RN.gIntFormat))
				      ^ "\n")
				val args = List.tabulate (d, fn i => ToCL.vecIndex(outState, i))
				in
				  fmt :: args
				end
			    | Ty.TensorTy[] => [CL.mkStr "%f\n", outState]
			    | Ty.TensorTy[d] => let
				val fmt = CL.mkStr(
				      String.concatWith " " (List.tabulate(d, fn _ => "%f"))
				      ^ "\n")
				val args = List.tabulate (d, fn i => ToCL.vecIndex(outState, i))
				in
				  fmt :: args
				end
			    | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty)
			  (* end case *))
			  
			  val body = let 
			  
			    fun loopParams (3) = 
			    	 "x"::"y"::"k"::[] 
			      | loopParams (2) = 
			      	 "x"::"y"::[] 
			      | loopParams (1) = 
			      	 "x"::[] 
			      | loopParams (_) = 
			      	raise Fail("genStrandPrint: unsupported output type " ^ Ty.toString ty)
			  
			   fun mkLoopNest ([],_) = 
						CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs)		
				| mkLoopNest (param::rest,count) = let 
					val body = mkLoopNest (rest, count + 1)
				   in 
				   		CL.mkFor(
							[(CL.intTy, param, CL.mkInt(0,CL.intTy))],
						CL.mkBinOp(CL.mkVar param, CL.#<, CL.mkSubscript(CL.mkVar "sizes",CL.mkInt(count,CL.intTy))),
						[CL.mkPostOp(CL.mkVar param, CL.^++)],
						body)
				   end 
		    	in 
		    		[mkLoopNest ((loopParams nDims),0)]
		    	end
			  
		    in
		      CL.D_Func(["static"], CL.voidTy, prFnName, params,CL.mkBlock(body))
		    end
	      in
				 prFn
	      end 
        fun genStrandTyDef (Strand{tyName, state,...}) = 
            (* the type declaration for the strand's state struct *)
              CL.D_StructDef(
                      List.rev (List.map (fn ToCL.V(ty, x) => (ty, x)) (!state)),
                      tyName)
        
        
   	(* generates the load kernel function *)
(* FIXME: this code might be part of the runtime system *)
	fun genKernelLoader() =  
		CL.D_Verbatim ( ["/* Loads the Kernel from a file */", 
						"char * loadKernel (const char * filename) {",
						"struct stat statbuf;",
						"FILE *fh;",
						"char *source;",
						"fh = fopen(filename, \"r\");",
						"if (fh == 0)",
						"   return 0;",
						"stat(filename, &statbuf);",
						"source = (char *) malloc(statbuf.st_size + 1);",
						"fread(source, statbuf.st_size, 1, fh);",
						"fread(source, statbuf.st_size, 1, fh);",
						"return source;",
						"}"]) 
(* generates the opencl buffers for the image data *) 
	fun getGlobalDataBuffers(globals,count,contextVar,errVar) = let 
		val globalBufferDecl =  CL.mkDecl(clMemoryTy,concat[RN.globalsVarName,"_cl"],NONE)
		val globalBuffer = CL.mkAssign(CL.mkVar(concat[RN.globalsVarName,"_cl"]), CL.mkApply("clCreateBuffer",
								[CL.mkVar contextVar,
							 	CL.mkVar "CL_MEM_COPY_HOST_PTR",
							 	CL.mkApply("sizeof",[CL.mkVar RN.globalsTy]),
							 	CL.mkVar RN.globalsVarName,
							 	CL.mkUnOp(CL.%&,CL.mkVar errVar)]))
        
	fun genDataBuffers([],_,_,_) = [] 
	  | genDataBuffers((var,nDims)::globals,count,contextVar,errVar) = let
	(* FIXME: use CL constructors to  build expressions (not strings) *)
                  val size = if nDims = 1 then 
	  	   			CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*, 
	  	   			 CL.mkIndirect(CL.mkVar var, "size[0]"))
	  	   			else if nDims = 2 then 
	  	   			CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*, 
	  	   			  CL.mkIndirect(CL.mkVar var, concat["size[0]", " * ", var, "->size[1]"])) 
	  	   			else 
	  	   			 CL.mkBinOp(CL.mkApply("sizeof",[CL.mkVar "float"]), CL.#*, 
	  	   			  CL.mkIndirect(CL.mkVar var,concat["size[0]", " * ", var, "->size[1] * ", var, "->size[2]"])) 
	  	   													
	  	 in 
	  	   CL.mkDecl(clMemoryTy, RN.addBufferSuffix var ,NONE)::
	  	   CL.mkDecl(clMemoryTy, RN.addBufferSuffixData var ,NONE)::
	  	   CL.mkAssign(CL.mkVar(RN.addBufferSuffix var), CL.mkApply("clCreateBuffer",
								[CL.mkVar contextVar,
							 	CL.mkVar "CL_MEM_COPY_HOST_PTR",
							 	CL.mkApply("sizeof",[CL.mkVar (RN.imageTy nDims)]),
							 	CL.mkVar var,
							 	CL.mkUnOp(CL.%&,CL.mkVar errVar)])) :: 
			CL.mkAssign(CL.mkVar(RN.addBufferSuffixData var), CL.mkApply("clCreateBuffer",
								[CL.mkVar contextVar,
							 	 CL.mkVar "CL_MEM_COPY_HOST_PTR",
							 	size,
							 	CL.mkIndirect(CL.mkVar var,"data"),
							 	CL.mkUnOp(CL.%&,CL.mkVar errVar)])):: genDataBuffers(globals,count + 2,contextVar,errVar) 
		end
	in 
		[globalBufferDecl] @ [globalBuffer] @ genDataBuffers(globals,count + 2,contextVar,errVar) 
	end 

        
(* generates the kernel arguments for the image data *) 
	fun genGlobalArguments(globals,count,kernelVar,errVar) = let 
	val globalArgument = CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=,CL.mkApply("clSetKernelArg",
							 	[CL.mkVar kernelVar, 
							 	 CL.mkInt(count,CL.intTy),
							 	 CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
							 	 CL.mkUnOp(CL.%&,CL.mkVar(concat[RN.globalsVarName,"_cl"]))])))
	
	fun genDataArguments([],_,_,_) = [] 
	  | genDataArguments((var,nDims)::globals,count,kernelVar,errVar) = 
	
		CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=, CL.mkApply("clSetKernelArg",
							 	[CL.mkVar kernelVar, 
							 	 CL.mkInt(count,CL.intTy),
							 	 CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
							 	 CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffix var))])))::
	  
			CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar,CL.|=,CL.mkApply("clSetKernelArg",
							 	[CL.mkVar kernelVar, 
							 	 CL.mkInt((count + 1),CL.intTy),
							 	 CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
							 	 CL.mkUnOp(CL.%&,CL.mkVar(RN.addBufferSuffixData var))]))):: genDataArguments (globals, count + 2,kernelVar,errVar) 
		 
	in
	
		[globalArgument] @ genDataArguments(globals,count + 1,kernelVar,errVar) 
	
	end 

	(* generates the main function of host code *) 
	fun genHostMain() = let 
              val setupCall = [CL.mkCall(RN.setupFName,[CL.mkVar RN.globalsVarName])]
              val globalsDecl = CL.mkDecl(
                    CL.T_Ptr(CL.T_Named RN.globalsTy),
                    RN.globalsVarName,
                    SOME(CL.I_Exp(CL.mkApply("malloc", [CL.mkApply("sizeof",[CL.mkVar RN.globalsTy])]))))
              val initGlobalsCall = CL.mkCall(RN.initGlobals,[CL.mkVar RN.globalsVarName])
              val returnStm = [CL.mkReturn(SOME(CL.mkInt(0,CL.intTy)))]
              val params = [ 
                     CL.PARAM([],CL.intTy, "argc"),
                     CL.PARAM([],CL.charArrayPtr,"argv")
                   ]
              val body = CL.mkBlock([globalsDecl] @ [initGlobalsCall]  @ setupCall @ returnStm)
              in 
                CL.D_Func([],CL.intTy,"main",params,body) 
              end 

      (* generates the host-side setup function *) 
	fun genHostSetupFunc (strand as Strand{name,tyName,...}, filename, nDims, initially, imgGlobals) = let
            (* Delcare opencl setup objects *) 
              val programVar= "program" 
              val kernelVar = "kernel"
              val cmdVar = "queue" 
              val inStateVar = "selfin" 
              val outStateVar = "selfout" 
              val stateSizeVar= "state_mem_size" 
              val clInstateVar = "clSelfIn"
              val clOutStateVar = "clSelfOut" 
              val clGlobals = "clGlobals" 
              val sourcesVar = "sources" 
              val contextVar = "context" 
              val errVar = "err"
              val imgDataSizeVar = "image_dataSize"
              val globalVar = "global_work_size"
              val localVar = "local_work_size" 
              val clFNVar = "filename"
              val numStrandsVar = "numStrandsVar" 
              val headerFNVar = "header"  
              val deviceVar = "device" 
              val platformsVar = "platforms" 
              val numPlatformsVar = "num_platforms" 
              val numDevicesVar = "num_devices"
              val assertStm = CL.mkCall("assert",[CL.mkBinOp(CL.mkVar errVar, CL.#==, CL.mkVar "CL_SUCCESS")])
              val params = [ 
                      CL.PARAM([],CL.T_Named("cl_device_id"), deviceVar)
                    ]
              val declarations = [
                    CL.mkDecl(clProgramTy, programVar, NONE),
                    CL.mkDecl(clKernelTy, kernelVar, NONE),
                    CL.mkDecl(clCmdQueueTy, cmdVar, NONE),
                    CL.mkDecl(clContextTy, contextVar, NONE),
                    CL.mkDecl(CL.intTy, errVar, NONE),
                    CL.mkDecl(CL.intTy, numStrandsVar, SOME(CL.I_Exp(CL.mkInt(1,CL.intTy)))), 
                    CL.mkDecl(CL.intTy, stateSizeVar, NONE), 
                    CL.mkDecl(CL.intTy, "width", NONE), 
                    CL.mkDecl(CL.intTy, imgDataSizeVar, NONE), 
                    (*CL.mkDecl(clDeviceIdTy, deviceVar, NONE), *)
                    CL.mkDecl(CL.T_Ptr(CL.T_Named tyName), inStateVar,NONE), 
                    CL.mkDecl(clMemoryTy,clInstateVar,NONE),
                    CL.mkDecl(clMemoryTy,clOutStateVar,NONE), 
                    CL.mkDecl(CL.T_Ptr(CL.T_Named tyName), outStateVar,NONE),
                    CL.mkDecl(CL.charPtr, clFNVar,SOME(CL.I_Exp(CL.mkStr filename))),
(* FIXME:  use Paths.diderotInclude *)
                    CL.mkDecl(CL.charPtr, headerFNVar,SOME(CL.I_Exp(CL.mkStr "../src/include/Diderot/cl-types.h"))), 
                    CL.mkDecl(CL.T_Array(CL.charPtr,SOME(2)),sourcesVar,NONE), 
                    CL.mkDecl(CL.T_Array(CL.T_Named "size_t",SOME(nDims)),globalVar,NONE),
                    CL.mkDecl(CL.T_Array(CL.T_Named "size_t",SOME(nDims)),localVar,NONE),
                    CL.mkDecl(CL.intTy,numDevicesVar,SOME(CL.I_Exp(CL.mkInt(~1,CL.intTy)))), 
                    CL.mkDecl(CL.T_Array(CL.T_Named "cl_platform_id", SOME(1)), platformsVar, NONE), 
                    CL.mkDecl(CL.intTy,"num_platforms",SOME(CL.I_Exp(CL.mkInt(~1,CL.intTy))))
                ]
            (* Setup Global Variables *) 
              val globalsDecl = CL.mkDecl(
                    CL.T_Ptr(CL.T_Named RN.globalsTy),
                    RN.globalsVarName,
                    SOME(CL.I_Exp(CL.mkApply("malloc", [CL.mkApply("sizeof",[CL.mkVar RN.globalsTy])]))))
              val initGlobalsCall = CL.mkCall(RN.initGlobals,[CL.mkVar RN.globalsVarName])	

		(* Retrieve the platforms 
		val platformStm = [CL.mkAssign(CL.mkVar errVar, CL.mkApply("clGetPlatformIDs",
						  [CL.mkInt(10,CL.intTy), 
						   CL.mkVar platformsVar, 
						   CL.mkUnOp(CL.%&,CL.mkVar numPlatformsVar)])),
						   assertStm]
						   
		val devicesStm = [CL.mkAssign(CL.mkVar errVar, CL.mkApply("clGetDeviceIDs",
						  [CL.mkSubscript(CL.mkVar platformsVar,CL.mkInt(0,CL.intTy)),
						   CL.mkVar "CL_DEVICE_TYPE_GPU",
						   CL.mkInt(1,CL.intTy), 
						   CL.mkUnOp(CL.%&,CL.mkVar deviceVar), 
						   CL.mkUnOp(CL.%&,CL.mkVar numDevicesVar)])),
						   assertStm] *) 
		
		(* Create Context *) 
		val contextStm = [CL.mkAssign(CL.mkVar contextVar, CL.mkApply("clCreateContext",
						  [CL.mkInt(0,CL.intTy), 
						  CL.mkInt(1,CL.intTy),
						  CL.mkUnOp(CL.%&,CL.mkVar deviceVar),
						  CL.mkVar "NULL",
						  CL.mkVar "NULL",
						  CL.mkUnOp(CL.%&,CL.mkVar errVar)])),
						  assertStm]
		
		(* Create Command Queue *) 
		val commandStm = [CL.mkAssign(CL.mkVar cmdVar, CL.mkApply("clCreateCommandQueue",
						  [CL.mkVar contextVar, 
						  CL.mkVar deviceVar,
						  CL.mkInt(0,CL.intTy),
						  CL.mkUnOp(CL.%&,CL.mkVar errVar)])),
						  assertStm]
		

		(*Create Program/Build/Kernel with Source statement *) 
		val createProgStm = CL.mkAssign(CL.mkVar programVar, CL.mkApply("clCreateProgramWithSource", 
														[CL.mkVar contextVar, 
														 CL.mkInt(2,CL.intTy), 
														 CL.mkCast(CL.T_Ptr(CL.T_Named("const char *")),CL.mkUnOp(CL.%&,CL.mkVar sourcesVar)),
														 CL.mkVar "NULL",
														 CL.mkUnOp(CL.%&,CL.mkVar errVar)]))

		(* FIXME: Remove after testing purposes, Build Log for OpenCL*) 
		val buildLog = [CL.mkAssign(CL.mkVar errVar, CL.mkApply("clBuildProgram", 
														[CL.mkVar programVar, 
														 CL.mkInt(0,CL.intTy),
														 CL.mkVar "NULL",
														 CL.mkVar "NULL",
														 CL.mkVar "NULL",
														 CL.mkVar "NULL"])), 
					  CL.mkDecl(CL.charPtr, "build", NONE),
					  CL.mkDecl(CL.T_Named("size_t"),"ret_val_size",NONE), 
					   CL.mkAssign(CL.mkVar errVar, CL.mkApply("clGetProgramBuildInfo", 
														[CL.mkVar programVar, 
														CL.mkVar deviceVar,
														 CL.mkVar "CL_PROGRAM_BUILD_LOG",
														 CL.mkInt(0,CL.intTy),
														 CL.mkVar "NULL",
														 CL.mkUnOp(CL.%&,CL.mkVar "ret_val_size")])), 
					  CL.mkAssign(CL.mkVar "build", CL.mkApply("malloc", [CL.mkVar "ret_val_size"])), 
  						CL.mkAssign(CL.mkVar errVar, CL.mkApply("clGetProgramBuildInfo", 
														[CL.mkVar programVar, 
														CL.mkVar deviceVar,
														 CL.mkVar "CL_PROGRAM_BUILD_LOG",
														 CL.mkVar "ret_val_size",
														 CL.mkVar "build",
														 CL.mkVar "NULL"])),
						CL.mkAssign(CL.mkSubscript(CL.mkVar "build",CL.mkVar "ret_val_size"),CL.mkVar ("'\\" ^ "0'")), 
						CL.mkCall("printf",[CL.mkStr ( "Build Log:" ^ "\n" ^ "%s" ^ "\n"), CL.mkVar "build"])] 

	


		val createKernel = CL.mkAssign(CL.mkVar kernelVar, CL.mkApply("clCreateKernel", 
														[CL.mkVar programVar, 
														 CL.mkStr RN.kernelFuncName, 
														 CL.mkUnOp(CL.%&,CL.mkVar errVar)]))
														 
														 
		val create_build_stms = [createProgStm,assertStm] @ buildLog @ [assertStm,createKernel,assertStm] 
		
	
		
		(* Create Memory Buffers for Strand States and Globals *) 
		val strandSize = CL.mkAssign(CL.mkVar stateSizeVar,CL.mkBinOp(CL.mkApply("sizeof",
									[CL.mkVar tyName]), CL.#*,CL.mkVar numStrandsVar))
				
		val clStrandObjects = [CL.mkAssign(CL.mkVar clInstateVar, CL.mkApply("clCreateBuffer",
								[CL.mkVar contextVar,
							 	CL.mkVar "CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR",
							 	CL.mkVar stateSizeVar,
							 	CL.mkVar "NULL",
							 	CL.mkUnOp(CL.%&,CL.mkVar errVar)])),
							 CL.mkAssign(CL.mkVar clOutStateVar, CL.mkApply("clCreateBuffer",
								[CL.mkVar contextVar,
							 	CL.mkVar "CL_MEM_READ_WRITE",
							 	CL.mkVar stateSizeVar,
								CL.mkVar "NULL",
							 	CL.mkUnOp(CL.%&,CL.mkVar errVar)]))]
	

		(* Setup up selfOut variable *) 
		val strandsArrays = [CL.mkAssign(CL.mkVar outStateVar, CL.mkApply("malloc", [CL.mkBinOp(CL.mkVar numStrandsVar,
									CL.#*, CL.mkApply("sizeof",[CL.mkVar tyName]))])),
								CL.mkAssign(CL.mkVar inStateVar, CL.mkApply("malloc", [CL.mkBinOp(CL.mkVar numStrandsVar,
									CL.#*, CL.mkApply("sizeof",[CL.mkVar tyName]))]))]

			
		(* Initialize Width Parameter *) 
		val widthDel = if nDims = 2 then 
			  CL.mkAssign(CL.mkVar "width",CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(1, CL.intTy)))
		   else
		   	  CL.mkAssign(CL.mkVar "width",CL.mkInt(0,CL.intTy)) 


		val strands_init = CL.mkCall(RN.strandInitSetup, [
			CL.mkVar "size", CL.mkVar "width", CL.mkVar inStateVar
		      ])
	
	    val clGlobalBuffers = getGlobalDataBuffers(!imgGlobals,3,contextVar,errVar) 
	    
	    
		(* Load the Kernel and Header Files *) 
		val sourceStms = [CL.mkAssign(CL.mkSubscript(CL.mkVar sourcesVar,CL.mkInt(1,CL.intTy)),
									  CL.mkApply(RN.clLoaderFN, [CL.mkVar clFNVar])), 
	   CL.mkAssign(CL.mkSubscript(CL.mkVar sourcesVar,CL.mkInt(0,CL.intTy)),
									  CL.mkApply(RN.clLoaderFN, [CL.mkVar headerFNVar]))] 

		(* val sourceStms = [CL.mkAssign(CL.mkSubscript(CL.mkVar sourcesVar,CL.mkInt(1,CL.intTy)),
									  CL.mkApply(RN.clLoaderFN, [CL.mkVar clFNVar]))] *)

	                      
                (* Created Enqueue Statements *)
(* FIXME: simplify this code by function abstraction *)
 	val enqueueStm = if nDims = 1 
			then [CL.mkAssign(CL.mkVar errVar, 
							  CL.mkApply("clEnqueueNDRangeKernel", 
									 			[CL.mkVar cmdVar,
									 			 CL.mkVar kernelVar,
									 			 CL.mkInt(1,CL.intTy), 
									 			 CL.mkVar "NULL",
									 			 CL.mkVar globalVar,
									 			 CL.mkVar localVar,
									 			 CL.mkInt(0,CL.intTy),
									 			 CL.mkVar "NULL",
									 			 CL.mkVar "NULL"])),CL.mkCall("clFinish",[CL.mkVar cmdVar])]
			else if nDims = 2  then 
			 [CL.mkAssign(CL.mkVar errVar, 
							CL.mkApply("clEnqueueNDRangeKernel", 
									 			[CL.mkVar cmdVar,
									 			 CL.mkVar kernelVar,
									 			 CL.mkInt(2,CL.intTy), 
									 			 CL.mkVar "NULL",
									 			 CL.mkVar globalVar,
									 			 CL.mkVar localVar,
									 			 CL.mkInt(0,CL.intTy),
									 			 CL.mkVar "NULL",
									 			 CL.mkVar "NULL"])),CL.mkCall("clFinish",[CL.mkVar cmdVar])] 
			else 
			  [CL.mkAssign(CL.mkVar errVar, 
							CL.mkApply("clEnqueueNDRangeKernel", 
									 			[CL.mkVar cmdVar,
									 			 CL.mkVar kernelVar,
									 			 CL.mkInt(3,CL.intTy), 
									 			 CL.mkVar "NULL",
									 			 CL.mkVar globalVar,
									 			 CL.mkVar localVar,
									 			 CL.mkInt(0,CL.intTy),
									 			 CL.mkVar "NULL",
									 			 CL.mkVar "NULL"])),CL.mkCall("clFinish",[CL.mkVar cmdVar])] 
		

			
		(* Setup Global and Local variables *) 
		
		val globalAndlocalStms = if nDims = 1 then 
			[CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(0,CL.intTy)),
								   CL.mkSubscript(CL.mkVar "size", CL.mkInt(0,CL.intTy))), 
			 CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(0,CL.intTy)),
								  CL.mkVar "16")]
		
		
		else if nDims = 2 then 
			[CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(0,CL.intTy)),
								   CL.mkSubscript(CL.mkVar "size", CL.mkInt(0,CL.intTy))), 
			CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(1,CL.intTy)),
								   CL.mkSubscript(CL.mkVar "size", CL.mkInt(1,CL.intTy))),
			CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(0,CL.intTy)),
								  CL.mkVar "16"),
			CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(1,CL.intTy)),
								  CL.mkVar "16")]
								  
		else 
			[CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(0,CL.intTy)),
								   CL.mkSubscript(CL.mkVar "size", CL.mkInt(0,CL.intTy))), 
			CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(1,CL.intTy)),
								   CL.mkSubscript(CL.mkVar "size", CL.mkInt(1,CL.intTy))),
			CL.mkAssign(CL.mkSubscript(CL.mkVar globalVar, CL.mkInt(2,CL.intTy)),
								   CL.mkSubscript(CL.mkVar "size", CL.mkInt(2,CL.intTy))),
			CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(0,CL.intTy)),
								  CL.mkVar "16"),
			CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(1,CL.intTy)),
								  CL.mkVar "16"),
			CL.mkAssign(CL.mkSubscript(CL.mkVar localVar, CL.mkInt(2,CL.intTy)),
								  CL.mkVar "16")]
		

		
		(* Setup Kernel arguments *) 
		val kernelArguments = [CL.mkAssign(CL.mkVar errVar,CL.mkApply("clSetKernelArg",
							 	[CL.mkVar kernelVar, 
							 	 CL.mkInt(0,CL.intTy),
							 	 CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
							 	 CL.mkUnOp(CL.%&,CL.mkVar clInstateVar)])), 
							    CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar, CL.|=,CL.mkApply("clSetKernelArg",
							 	[CL.mkVar kernelVar, 
							 	 CL.mkInt(1,CL.intTy),
							 	 CL.mkApply("sizeof",[CL.mkVar "cl_mem"]),
							 	 CL.mkUnOp(CL.%&,CL.mkVar clOutStateVar)]))), 
							 	  CL.mkExpStm(CL.mkAssignOp(CL.mkVar errVar, CL.|=,CL.mkApply("clSetKernelArg",
							 	[CL.mkVar kernelVar, 
							 	 CL.mkInt(2,CL.intTy),
							 	 CL.mkApply("sizeof",[CL.mkVar "int"]),
							 	 CL.mkUnOp(CL.%&,CL.mkVar "width")])))] 
							 	 
	   val clGlobalArguments = genGlobalArguments(!imgGlobals,3,kernelVar,errVar) @ [assertStm]
		
		(* Retrieve output *)
		val outputStm = CL.mkAssign(CL.mkVar errVar, 
							CL.mkApply("clEnqueueReadBuffer", 
									 			[CL.mkVar cmdVar,
									 			 CL.mkVar clOutStateVar, 
									 			 CL.mkVar "CL_TRUE",
									 			 CL.mkInt(0,CL.intTy), 
									 			 CL.mkVar stateSizeVar,
									 			 CL.mkVar outStateVar,
									 			 CL.mkInt(0,CL.intTy),
									 			 CL.mkVar "NULL",
									 			 CL.mkVar "NULL"]))
		
		(* Free all the objects *) 
		val freeStms = [CL.mkCall("clReleaseKernel",[CL.mkVar kernelVar]),
						CL.mkCall("clReleaseProgram",[CL.mkVar programVar ]),
						CL.mkCall("clReleaseCommandQueue",[CL.mkVar cmdVar]),
						CL.mkCall("clReleaseContext",[CL.mkVar contextVar]),
						CL.mkCall("clReleaseMemObject",[CL.mkVar clInstateVar]),
						CL.mkCall("clReleaseMemObject",[CL.mkVar clOutStateVar])]
		
		
		(*Setup Strand Print Function *) 
		val outputData = [CL.mkDecl(CL.T_Ptr(CL.T_Named("FILE")), "outS", SOME(CL.I_Exp(CL.mkApply("fopen",
						[CL.mkStr "mip.txt",
			 			CL.mkStr "w"])))),  
						CL.mkCall(concat[name, "_print"], 
									[CL.mkVar "outS", 
									 CL.mkVar "size", 
									 CL.mkVar "width", 
									 CL.mkVar outStateVar])]

      
		
		(* Body put all the statments together *) 
		val body =  declarations @ [globalsDecl,initGlobalsCall] (*@ platformStm @ devicesStm *) @ contextStm @ commandStm @ !initially @ [strandSize] @
				   strandsArrays @ globalAndlocalStms @ [widthDel,strands_init]  @ clStrandObjects @ clGlobalBuffers @ sourceStms  @ create_build_stms  (*@
				   kernelArguments @ clGlobalArguments @ enqueueStm @  [outputStm] @ freeStms @ outputData *)
		
		in 
		
	 	CL.D_Func([],CL.voidTy,RN.setupFName,params,CL.mkBlock(body))
		
		end 
(* generate the data and global parameters *) 
	fun genKeneralGlobalParams ((name,tyname)::rest) = 
		CL.PARAM([], CL.T_Ptr(CL.T_Named RN.globalsTy), concat[RN.globalsVarName]) ::
		CL.PARAM([], CL.T_Ptr(CL.T_Named (RN.imageTy tyname)),RN.addBufferSuffix name) :: 
		CL.PARAM([], CL.T_Ptr(CL.voidTy),RN.addBufferSuffixData name) ::
		genKeneralGlobalParams(rest) 
		
	  | genKeneralGlobalParams ([]) = []
	
	(*generate code for intilizing kernel global data *) 
	fun initKernelGlobals (globals,imgGlobals) = let 
		fun initGlobalStruct (CL.D_Var(_, _ , name, _)::rest) =  
				CL.mkAssign(CL.mkVar name, CL.mkIndirect(CL.mkVar RN.globalsVarName, name)) :: 
				initGlobalStruct(rest) 
		  | initGlobalStruct ( _::rest) = initGlobalStruct(rest) 
		  | initGlobalStruct([]) = [] 
		
		fun initGlobalImages((name,tyname)::rest) = 
				CL.mkAssign(CL.mkVar name, CL.mkVar (RN.addBufferSuffix name)) :: 
				CL.mkAssign(CL.mkIndirect(CL.mkVar name,"data"),CL.mkVar (RN.addBufferSuffixData name)) ::
				initGlobalImages(rest)
		  | initGlobalImages([]) = [] 
		in 
		  initGlobalStruct(globals) @ initGlobalImages(imgGlobals) 
		end 
	
	(* generate the main kernel function for the .cl file *) 
	fun genKernelFun(Strand{name, tyName, state, output, code,...},nDims,globals,imgGlobals) = let
		 val fName = RN.kernelFuncName; 
		 val inState = "strand_in" 
		 val outState = "strand_out" 
	     val params = [
		      CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
		      CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut"),
		      CL.PARAM(["__global"], CL.intTy, "width")
		    ] @ genKeneralGlobalParams(!imgGlobals) 
		  val thread_ids = if nDims = 1 
		  	then [CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy)))), 
		  		  CL.mkAssign(CL.mkVar "x",CL.mkApply(RN.getGlobalThreadId,[CL.mkInt(0,CL.intTy)]))]
		  	else 
		  		[CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy)))), 
		  		 CL.mkDecl(CL.intTy, "y", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy)))),
		  		  CL.mkAssign(CL.mkVar "x",  CL.mkApply(RN.getGlobalThreadId,[CL.mkInt(0,CL.intTy)])),
		  		  CL.mkAssign(CL.mkVar "y",CL.mkApply(RN.getGlobalThreadId,[CL.mkInt(1,CL.intTy)]))] 
		  
		  val strandDecl = [CL.mkDecl(CL.T_Named tyName, inState, NONE), 
		  					CL.mkDecl(CL.T_Named tyName, outState,NONE)]
		  val strandObjects  = if nDims = 1 
		  	then [CL.mkAssign(CL.mkSubscript(CL.mkVar "selfIn",CL.mkStr "x"),
		  							 CL.mkVar inState),
		  		  CL.mkAssign(CL.mkSubscript(CL.mkVar "selfOut",CL.mkStr "x"),
		  							 CL.mkVar outState)]
		  	else let 
		  		val index = CL.mkBinOp(CL.mkBinOp(CL.mkVar "x",CL.#*,CL.mkVar "width"),CL.#+,CL.mkVar "y")
		  		in 
		  			[CL.mkAssign(CL.mkSubscript(CL.mkVar "selfIn",index),
		  							CL.mkVar inState), 
		  			 CL.mkAssign(CL.mkSubscript(CL.mkVar "selfOut",index),
		  							CL.mkVar outState)] 
		  		end 
		  		
		  		
		  val status = CL.mkDecl(CL.intTy, "status", SOME(CL.I_Exp(CL.mkInt(0, CL.intTy))))
		  val local_vars =  thread_ids @ initKernelGlobals(!globals,!imgGlobals)  @ strandDecl @ strandObjects @ [status]
		  val while_exp = CL.mkBinOp(CL.mkBinOp(CL.mkVar "status",CL.#!=, CL.mkVar RN.kStabilize),CL.#||,CL.mkBinOp(CL.mkVar "status", CL.#!=, CL.mkVar RN.kDie))
		  val while_body = [CL.mkAssign(CL.mkVar "status", CL.mkApply(RN.strandUpdate name,[ CL.mkUnOp(CL.%&,CL.mkVar inState), CL.mkUnOp(CL.%&,CL.mkVar outState)])),
		  					CL.mkCall(RN.strandStabilize name,[ CL.mkUnOp(CL.%&,CL.mkVar inState),  CL.mkUnOp(CL.%&,CL.mkVar outState)])]
		  
		  val whileBlock = [CL.mkWhile(while_exp,CL.mkBlock while_body)]
		  
		  val body = CL.mkBlock(local_vars  @ whileBlock)
		in 
		   CL.D_Func(["__kernel"], CL.voidTy, fName, params, body)
		end
	(* generate a global structure from the globals *) 
	fun genGlobalStruct(globals) = let
		 fun getGlobals(CL.D_Var(_,ty,globalVar,_)::rest) = (ty,globalVar)::getGlobals(rest) 
		   | getGlobals([]) = [] 
		   | getGlobals(_::rest) = getGlobals(rest) 
		 in 
			CL.D_StructDef(getGlobals(globals),RN.globalsTy) 
		  end

      (* generate the table of strand descriptors *)
        fun genStrandTable (ppStrm, strands) = let
              val nStrands = length strands
              fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.mkVar(RN.strandDesc name)))
              fun genInits (_, []) = []
                | genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss)
              fun ppDecl dcl = PrintAsC.output(ppStrm, dcl)
              in
                ppDecl (CL.D_Var([], CL.int32, RN.numStrands,
                  SOME(CL.I_Exp(CL.mkInt(IntInf.fromInt nStrands, CL.int32)))));
                ppDecl (CL.D_Var([],
                  CL.T_Array(CL.T_Ptr(CL.T_Named RN.strandDescTy), SOME nStrands),
                  RN.strands,
                  SOME(CL.I_Array(genInits (0, strands)))))
              end

  
	fun genSrc (baseName, Prog{double,globals, topDecls, strands, initially,imgGlobals,numDims,...}) = let
	      val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"}
	      val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"}
	      val clOutS = TextIO.openOut clFileName
	      val cOutS = TextIO.openOut cFileName
(* FIXME: need to use PrintAsC and PrintAsCL *)
	      val clppStrm = PrintAsC.new clOutS
	      val cppStrm = PrintAsC.new cOutS
	      fun cppDecl dcl = PrintAsC.output(cppStrm, dcl) 
	      fun clppDecl dcl = PrintAsC.output(clppStrm, dcl)
	      val strands = AtomTable.listItems strands
	      val [strand as Strand{name, tyName, code,init_code, ...}] = strands
	      in
              (* Generate the OpenCl file *)
                clppDecl (CL.D_Verbatim([
                    if double
                      then "#define DIDEROT_DOUBLE_PRECISION"
                      else "#define DIDEROT_SINGLE_PRECISION",
                    "#define DIDEROT_TARGET_CL",
                    "#include \"Diderot/cl-types.h\""
                  ])); 
                List.app clppDecl (List.rev (!globals)); 
                clppDecl (genGlobalStruct (!globals)); 
                clppDecl (genStrandTyDef strand); 
                List.app clppDecl (!code); 
                clppDecl (genKernelFun (strand,!numDims,globals,imgGlobals));
              (* Generate the Host file .c *) 
                cppDecl (CL.D_Verbatim([
                    if double
                      then "#define DIDEROT_DOUBLE_PRECISION"
                      else "#define DIDEROT_SINGLE_PRECISION",
                    "#define DIDEROT_TARGET_CL",
                    "#include \"Diderot/diderot.h\""
                  ])); 
                List.app cppDecl (List.rev (!globals));
                cppDecl (genGlobalStruct (!globals));
                cppDecl (genStrandTyDef strand);
		cppDecl  (!init_code);
		cppDecl (genStrandInit(strand,!numDims)); 
		cppDecl (genStrandPrint(strand,!numDims)); 
                (* cppDecl (genKernelLoader());*) 
                List.app cppDecl (List.rev (!topDecls));
                cppDecl (genHostSetupFunc (strand, clFileName, !numDims, initially, imgGlobals));	    
		PrintAsC.close cppStrm;
		PrintAsC.close clppStrm; 
		TextIO.closeOut cOutS;
		TextIO.closeOut clOutS
	      end

      (* output the code to a file.  The string is the basename of the file, the extension
       * is provided by the target.
       *)
	fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let
	      fun condCons (true, x, xs) = x::xs
		| condCons (false, _, xs) = xs
	    (* generate the C compiler flags *)
	      val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude]
	      val cflags = condCons (parallel, #pthread Paths.cflags, cflags)
	      val cflags = if debug
		    then #debug Paths.cflags :: cflags
		    else #ndebug Paths.cflags :: cflags
	      val cflags = #base Paths.cflags :: cflags
	    (* generate the loader flags *)
	      val extraLibs = condCons (parallel, #pthread Paths.extraLibs, [])
	      val extraLibs = Paths.teemLinkFlags @  #base Paths.extraLibs :: extraLibs
		   val extraLibs =  #cl Paths.extraLibs :: extraLibs
	      val rtLib = TargetUtil.runtimeName {
		      target = TargetUtil.TARGET_CL,
		      parallel = parallel, double = double, debug = debug
		    }
	      val ldOpts = rtLib :: extraLibs
	      in
		genSrc (basename, prog);
		RunCC.compile (basename, cflags);
		RunCC.link (basename, ldOpts)
              end

      end

  (* strands *)
    structure Strand =
      struct
        fun define (Prog{strands, ...}, strandId) = let
              val name = Atom.toString strandId
              val strand = Strand{
                      name = name,
                      tyName = RN.strandTy name,
                      state = ref [],
                      output = ref NONE,
                      code = ref [],
                      init_code = ref (CL.D_Comment(["no init code"]))
                    }
              in
                AtomTable.insert strands (strandId, strand);
                strand
              end

      (* return the strand with the given name *)
        fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId

      (* register the strand-state initialization code.  The variables are the strand
       * parameters.
       *)
        fun init (Strand{name, tyName, code,init_code, ...}, params, init) = let
              val fName = RN.strandInit name
              val params =
                    CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut") ::
                      List.map (fn (ToCL.V(ty, x)) => CL.PARAM([], ty, x)) params
              val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
              in
                init_code := initFn 
              end

      (* register a strand method *)
        fun method (Strand{name, tyName, code,...}, methName, body) = let
              val fName = concat[name, "_", methName]
              val params = [
                      CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
                      CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut")
                    ]
              val methFn = CL.D_Func([], CL.int32, fName, params, body)
              in
                code := methFn :: !code
              end
                
        fun output (Strand{output, ...}, ty, ToCL.V(_, x)) = output := SOME(ty, x)

      end

  end

structure CLBackEnd = CodeGenFn(CLTarget)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0