Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] View of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml
ViewVC logotype

View of /branches/pure-cfg/src/compiler/cl-target/cl-target.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1244 - (download) (annotate)
Wed May 18 19:30:16 2011 UTC (8 years, 4 months ago) by lamonts
File size: 26554 byte(s)
Added the code for generating OpenCL
(* c-target.sml
 *
 * COPYRIGHT (c) 2011 The Diderot Project (http://diderot-language.cs.uchicago.edu)
 * All rights reserved.
 *)

structure CLTarget : TARGET =
  struct

    structure IL = TreeIL
    structure V = IL.Var
    structure Ty = IL.Ty
    structure CL = CLang
    structure RN = RuntimeNames
    structure ToC = TreeToCL

    type var = ToC.var
    type exp = CL.exp
    type stm = CL.stm

    datatype strand = Strand of {
	name : string,
	tyName : string,
	state : var list ref,
	output : (Ty.ty * CL.var) option ref,	(* the strand's output variable (only one for now) *)
	code : CL.decl list ref
      }

    datatype program = Prog of {
	double : bool,			(* true for double-precision support *)
	parallel : bool,		(* true for multithreaded (or multi-GPU) target *)
	debug : bool,			(* true for debug support in executable *)
	globals : CL.decl list ref,
	topDecls : CL.decl list ref,
	strands : strand AtomTable.hash_table,
	initially : CL.decl ref,
	numDims: int ref
    }

    datatype env = ENV of {
	info : env_info,
	vMap : var V.Map.map,
	scope : scope
      }

    and env_info = INFO of {
	prog : program
      }

    and scope
      = NoScope
      | GlobalScope
      | InitiallyScope
      | StrandScope of TreeIL.var list	(* strand initialization *)
      | MethodScope of TreeIL.var list	(* method body; vars are state variables *)

  (* the supprted widths of vectors of reals on the target.  For the GNU vector extensions,
   * the supported sizes are powers of two, but float2 is broken.
   * NOTE: we should also consider the AVX vector hardware, which has 256-bit registers.
   *)
    fun vectorWidths () = if !RuntimeNames.doublePrecision
	  then [2, 4, 8]
	  else [4, 8]

  (* tests for whether various expression forms can appear inline *)
    fun inlineCons n = (n < 2)		(* vectors are inline, but not matrices *)
    val inlineMatrixExp = false		(* can matrix-valued expressions appear inline? *)

  (* TreeIL to target translations *)
    structure Tr =
      struct
	fun fragment (ENV{info, vMap, scope}, blk) = let
	      val (vMap, stms) = ToC.trFragment (vMap, blk)
	      in
		(ENV{info=info, vMap=vMap, scope=scope}, stms)
	      end
	fun saveState cxt stateVars (env, args, stm) = (
	      ListPair.foldrEq
		(fn (x, e, stms) => ToC.trAssign(env, x, e)@stms)
		  [stm]
		    (stateVars, args)
	      ) handle ListPair.UnequalLengths => (
		print(concat["saveState ", cxt, ": length mismatch; ", Int.toString(List.length args), " args\n"]);
		raise Fail(concat["saveState ", cxt, ": length mismatch"]))
	fun block (ENV{vMap, scope, ...}, blk) = (case scope
	       of StrandScope stateVars => ToC.trBlock (vMap, saveState "StrandScope" stateVars, blk)
		| MethodScope stateVars => ToC.trBlock (vMap, saveState "MethodScope" stateVars, blk)
		| _ => ToC.trBlock (vMap, fn (_, _, stm) => [stm], blk)
	      (* end case *))
	fun exp (ENV{vMap, ...}, e) = ToC.trExp(vMap, e)
      end

  (* variables *)
    structure Var =
      struct
	fun name (ToC.V(_, name)) = name
	 fun global (Prog{globals, ...}, name, ty) = let
	      val ty' = ToC.trType ty
	      in
		globals := CL.D_Var([], ty', name, NONE) :: !globals;
		ToC.V(ty', name)
	      end
	fun param x = ToC.V(ToC.trType(V.ty x), V.name x)
	fun state (Strand{state, ...}, x) = let
	      val ty' = ToC.trType(V.ty x)
	      val x' = ToC.V(ty', V.name x)
	      in
		state := x' :: !state;
		x'
	      end
      end

  (* environments *)
    structure Env =
      struct
      (* create a new environment *)
	fun new prog = ENV{
		info=INFO{prog = prog},
		vMap = V.Map.empty,
		scope = NoScope
	      }
      (* define the current translation context *)
	fun setScope scope (ENV{info, vMap, ...}) = ENV{info=info, vMap=vMap, scope=scope}
	val scopeGlobal = setScope GlobalScope
	val scopeInitially = setScope InitiallyScope
	fun scopeStrand (env, svars) = setScope (StrandScope svars) env
	fun scopeMethod (env, svars) = setScope (MethodScope svars) env
      (* bind a TreeIL varaiable to a target variable *)
	fun bind (ENV{info, vMap, scope}, x, x') = ENV{
		info = info,
		vMap = V.Map.insert(vMap, x, x'),
		scope = scope
	      }
      end

  (* programs *)
    structure Program =
      struct
	fun new {double, parallel, debug} = (
	      RN.initTargetSpec double;
	      Prog{
		  double = double, parallel = parallel, debug = debug,
		  globals = ref [
		    CL.D_Verbatim[
			if double
			  then "#define DIDEROT_DOUBLE_PRECISION"
			  else "#define DIDEROT_SINGLE_PRECISION",
			"#include \"Diderot/opencl_types.h\""
		      ]],
		  topDecls = ref [],
		  strands = AtomTable.mkTable (16, Fail "strand table"),
		  initially = ref(CL.D_Comment["missing initially"]),
		  numDims = ref(0)
		})
      (* register the global initialization part of a program *)
	fun init (Prog{topDecls,...}, init) = let
	      val params = [
			  CL.PARAM([], CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName)
			]
		   val initFn = CL.D_Func([], CL.voidTy, RN.initGlobals, params, init)
		
	      in
		topDecls := initFn :: !topDecls
	      end
      (* create and register the initially function for a program *)
	fun initially {
	      prog = Prog{strands, initially,numDims,...},
	      isArray : bool,
	      iterPrefix : stm list,
	      iters : (var * exp * exp) list,
	      createPrefix : stm list,
	      strand : Atom.atom,
	      args : exp list
	    } = let
	      val name = Atom.toString strand
	      val nDims = List.length iters
	      val worldTy = CL.T_Ptr(CL.T_Named RN.worldTy)
	      fun mapi f xs = let
		    fun mapf (_, []) = []
		      | mapf (i, x::xs) = f(i, x) :: mapf(i+1, xs)
		    in
		      mapf (0, xs)
		    end
	      val baseInit = mapi (fn (i, (_, e, _)) => (i, CL.I_Exp e)) iters
	      val sizeInit = mapi
		    (fn (i, (ToC.V(ty, _), lo, hi)) =>
			(i, CL.I_Exp(CL.mkBinOp(CL.mkBinOp(hi, CL.#-, lo), CL.#+, CL.E_Int(1, ty))))
		    ) iters
	      val allocCode = [
		      CL.mkComment["allocate initial block of strands"],
		      CL.mkDecl(CL.T_Array(CL.int32, SOME nDims), "base", SOME(CL.I_Array baseInit)),
		      CL.mkDecl(CL.T_Array(CL.uint32, SOME nDims), "size", SOME(CL.I_Array sizeInit)),
		      CL.mkDecl(worldTy, "wrld",
			SOME(CL.I_Exp(CL.E_Apply(RN.allocInitially, [
			    CL.mkUnOp(CL.%&, CL.E_Var(RN.strandDesc name)),
			    CL.E_Bool isArray,
			    CL.E_Int(IntInf.fromInt nDims, CL.int32),
			    CL.E_Var "base",
			    CL.E_Var "size"
			  ]))))
		    ]
	    (* create the loop nest for the initially iterations *)
	      val indexVar = "ix"
	      val strandTy = CL.T_Ptr(CL.T_Named(RN.strandTy name))
	      fun mkLoopNest [] = CL.mkBlock(createPrefix @ [
		      CL.mkDecl(strandTy, "sp",
			SOME(CL.I_Exp(
			  CL.E_Cast(strandTy,
			  CL.E_Apply(RN.inState, [CL.E_Var "wrld", CL.E_Var indexVar]))))),
		      CL.mkCall(RN.strandInit name, CL.E_Var "sp" :: args),
		      CL.mkAssign(CL.E_Var indexVar, CL.mkBinOp(CL.E_Var indexVar, CL.#+, CL.E_Int(1, CL.uint32)))
		    ])
		| mkLoopNest ((ToC.V(ty, param), lo, hi)::iters) = let
		    val body = mkLoopNest iters
		    in
		      CL.mkFor(
			[(ty, param, lo)],
			CL.mkBinOp(CL.E_Var param, CL.#<=, hi),
			[CL.mkPostOp(CL.E_Var param, CL.^++)],
			body)
		    end
	      val iterCode = [
		      CL.mkComment["initially"],
		      CL.mkDecl(CL.uint32, indexVar, SOME(CL.I_Exp(CL.E_Int(0, CL.uint32)))),
		      mkLoopNest iters
		    ]
	      val body = CL.mkBlock(iterPrefix @ allocCode @ iterCode @ [CL.mkReturn(SOME(CL.E_Var "wrld"))])
	      val initFn = CL.D_Func([], worldTy, RN.initially, [], body)
	      in
	      	      numDims := nDims; 
		initially := initFn
	      end

      (***** OUTPUT *****)
    fun genStrand (Strand{name, tyName, state, output, code}) = let
	    (* the print function *)
	      val prFnName = concat[name, "_print"]
	      val prFn = let
		    val params = [
			  CL.PARAM([], CL.T_Ptr(CL.T_Named "FILE"), "outS"),
			  CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "self")
			]
		    val SOME(ty, x) = !output
		    val outState = CL.mkIndirect(CL.mkVar "self", x)
		    val prArgs = (case ty
			   of Ty.IVecTy 1 => [CL.E_Str(!RN.gIntFormat ^ "\n"), outState]
			    | Ty.IVecTy d => let
				val fmt = CL.E_Str(
				      String.concatWith " " (List.tabulate(d, fn _ => !RN.gIntFormat))
				      ^ "\n")
				val args = List.tabulate (d, fn i => ToC.ivecIndex(outState, d, i))
				in
				  fmt :: args
				end
			    | Ty.TensorTy[] => [CL.E_Str "%f\n", outState]
			    | Ty.TensorTy[d] => let
				val fmt = CL.E_Str(
				      String.concatWith " " (List.tabulate(d, fn _ => "%f"))
				      ^ "\n")
				val args = List.tabulate (d, fn i => ToC.vecIndex(outState, d, i))
				in
				  fmt :: args
				end
			    | _ => raise Fail("genStrand: unsupported output type " ^ Ty.toString ty)
			  (* end case *))
		    in
		      CL.D_Func(["static"], CL.voidTy, prFnName, params,
			CL.mkCall("fprintf", CL.mkVar "outS" :: prArgs))
		    end
	      in
				 List.rev (prFn :: !code)
	      end 
	fun genStrandTyDef (Strand{tyName, state,...}) = 
	    (* the type declaration for the strand's state struct *)
	      CL.D_StructDef(
		      List.rev (List.map (fn ToC.V(ty, x) => (ty, x)) (!state)),
		      tyName)
	
	
	(* generates the load kernel function *)
	fun genKernelLoader() =  
		CL.D_Verbatim ( ["/* Loads the Kernel from a file */", 
						"char * loadKernel (const char * filename) {",
						"struct stat statbuf;",
						"FILE *fh;",
						"char *source;",
						"fh = fopen(filename, \"r\");",
						"if (fh == 0)",
						"   return 0;",
						"stat(filename, &statbuf);",
						"source = (char *) malloc(statbuf.st_size + 1);",
						"fread(source, statbuf.st_size, 1, fh);",
						"fread(source, statbuf.st_size, 1, fh);",
						"return source;",
						"}"]) 
						
	(* generates the main function of host code *) 
	fun genHostMain() = let 
		val setupCall = [CL.mkCall(RN.setupFName,[CL.E_Var RN.globalsVarName])]
		val globalsDecl = CL.mkDecl(CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName,SOME(CL.I_Exp(CL.mkApply("malloc",
									[CL.mkApply("sizeof",[CL.E_Var RN.globalsTy])]))))
		val initGlobalsCall = CL.mkCall(RN.initGlobals,[CL.E_Var RN.globalsVarName])
		val returnStm = [CL.mkReturn(SOME(CL.E_Int(0,CL.intTy)))]
		val params = [ 
			 CL.PARAM([],CL.intTy, "argc"),
			 CL.PARAM([],CL.charArrayPtr,"argv")
			 ]
		val body = CL.mkBlock([globalsDecl] @ [initGlobalsCall]  @ setupCall @ returnStm)
		in 
		  CL.D_Func([],CL.intTy,"main",params,body) 
		end 
	(* generates the host-side setup function *) 
	fun genHostSetupFunc(strand as Strand{name,tyName,...}, filename, nDims) = let
		(*Delcare opencl setup objects *) 
		val programVar= "program" 
		val kernelVar = "kernel"
		val cmdVar = "queue" 
		val inStateVar = "selfin" 
		val outStateVar = "selfout" 
		val stateSizeVar= "state_mem_size" 
		val clInstateVar = "clSelfIn"
		val clOutStateVar = "clSelfOut"
		val clGlobals = "clGlobals" 
		val sourcesVar = "sources" 
		val contextVar = "context" 
		val errVar = "err"
		val globalVar = "global_work_size"
		val localVar = "local_work_size" 
		val clFNVar = "filename"
		val numStrandsVar = "numStrandsVar" 
		val headerFNVar = "header"  
		val deviceVar = "device" 
		val platformsVar = "platforms" 
		val numPlatformsVar = "num_platforms" 
		val numDevicesVar = "num_devices"
		val assertStm = CL.mkCall("assert",[CL.mkBinOp(CL.E_Var errVar, CL.#==, CL.E_Var "CL_SUCCESS")])
		val params = [ 
			 CL.PARAM([],CL.T_Ptr(CL.T_Named RN.globalsTy), RN.globalsVarName)
			 ]
		val delcarations = [CL.mkDecl(CL.clProgramTy, programVar, NONE),
		 	  CL.mkDecl(CL.clKernelTy, kernelVar, NONE),
			  CL.mkDecl(CL.clCmdQueueTy, cmdVar, NONE),
			  CL.mkDecl(CL.clContextTy, contextVar, NONE),
			  CL.mkDecl(CL.intTy, errVar, NONE),
			  CL.mkDecl(CL.intTy, numStrandsVar, NONE), 
			  CL.mkDecl(CL.intTy, numPlatformsVar, NONE), 
			  CL.mkDecl(CL.intTy, stateSizeVar, NONE), 
			  CL.mkDecl(CL.clDeviceIdTy, deviceVar, NONE), 
			  CL.mkDecl(CL.T_Ptr(CL.T_Named tyName), inStateVar,NONE), 
			  CL.mkDecl(CL.clMemoryTy,clInstateVar,NONE),
			  CL.mkDecl(CL.clMemoryTy,clOutStateVar,NONE), 
			  CL.mkDecl(CL.T_Ptr(CL.T_Named tyName), outStateVar,NONE),
			  CL.mkDecl(CL.charPtr, clFNVar,SOME(CL.I_Exp(CL.E_Str filename))),
			  CL.mkDecl(CL.charPtr, headerFNVar,SOME(CL.I_Exp(CL.E_Str "Diderot/opencl_types.h"))), 
			  CL.mkDecl(CL.T_Array(CL.charPtr,SOME(2)),sourcesVar,NONE), 
			  CL.mkDecl(CL.T_Array(CL.T_Named "size_t",SOME(nDims)),globalVar,NONE),
			  CL.mkDecl(CL.T_Array(CL.T_Named "size_t",SOME(nDims)),localVar,NONE),
			  CL.mkDecl(CL.intTy,numDevicesVar,SOME(CL.I_Exp(CL.E_Int(~1,CL.intTy)))), 
			  CL.mkDecl(CL.T_Array(CL.clDeviceIdTy, SOME(1)), platformsVar, NONE), 
			  CL.mkDecl(CL.intTy,"num_platforms",SOME(CL.I_Exp(CL.E_Int(~1,CL.intTy))))] 
		
		(* Retrieve the platforms *) 
		val platformStm = [CL.mkAssign(CL.E_Var errVar, CL.mkApply("clGetPlatformIDs",
						  [CL.E_Int(10,CL.intTy), 
						   CL.E_UnOp(CL.%&,CL.E_Var platformsVar), 
						   CL.E_UnOp(CL.%&,CL.E_Var numDevicesVar)])),
						   assertStm]
						   
		val devicesStm = [CL.mkAssign(CL.E_Var errVar, CL.mkApply("clGetDeviceIDs",
						  [CL.mkSubscript(CL.E_Var platformsVar,CL.E_Int(0,CL.intTy)),
						   CL.E_Var "CL_DEVICE_TYPE_GPU",
						   CL.E_Int(1,CL.intTy), 
						   CL.E_UnOp(CL.%&,CL.E_Var deviceVar), 
						   CL.E_UnOp(CL.%&,CL.E_Var numDevicesVar)])),
						   assertStm] 
		
		(* Create Context *) 
		val contextStm = [CL.mkAssign(CL.E_Var contextVar, CL.mkApply("clCreateContext",
						  [CL.E_Int(0,CL.intTy), 
						  CL.E_Int(1,CL.intTy),
						  CL.E_UnOp(CL.%&,CL.E_Var deviceVar),
						  CL.E_Var "NULL",
						  CL.E_Var "NULL",
						  CL.E_UnOp(CL.%&,CL.E_Var errVar)])),
						  assertStm]
		
		(* Create Command Queue *) 
		val commandStm = [CL.mkAssign(CL.E_Var cmdVar, CL.mkApply("clCreateCommandQueue",
						  [CL.E_Var contextVar, 
						  CL.E_Var deviceVar,
						  CL.E_Int(0,CL.intTy),
						  CL.E_UnOp(CL.%&,CL.E_Var errVar)])),
						  assertStm]
		
		(* Create Memory Buffers for Strand States *) 
		val strandSize = CL.mkAssign(CL.E_Var stateSizeVar,CL.mkBinOp(CL.mkApply("sizeof",
									[CL.E_Var tyName]), CL.#*,CL.E_Var numStrandsVar))
		val strandObjects = [CL.mkAssign(CL.E_Var inStateVar, CL.mkApply("malloc", 
										[CL.E_Var stateSizeVar])),
							CL.mkAssign(CL.E_Var outStateVar, CL.mkApply("malloc", 
										[CL.E_Var stateSizeVar]))] 	
										
		val clStrandObjects = [CL.mkAssign(CL.E_Var clInstateVar, CL.mkApply("clCreateBuffer",
								[CL.E_Var contextVar,
							 	CL.E_Var "CL_MEM_READ_WRITE",
							 	CL.E_Var stateSizeVar,
							 	CL.E_Var "NULL",
							 	CL.E_UnOp(CL.%&,CL.E_Var errVar)])),
							 CL.mkAssign(CL.E_Var clOutStateVar, CL.mkApply("clCreateBuffer",
								[CL.E_Var contextVar,
							 	CL.E_Var "CL_MEM_READ_WRITE",
							 	CL.E_Var stateSizeVar,
								CL.E_Var "NULL",
							 	CL.E_UnOp(CL.%&,CL.E_Var errVar)]))]
		(* Load the Kernel and Header Files *) 
		val sourceStms = [CL.mkAssign(CL.mkSubscript(CL.E_Var sourcesVar,CL.E_Int(0,CL.intTy)),
									  CL.mkApply(RN.clLoaderFN, [CL.E_Var clFNVar])),
						  CL.mkAssign(CL.mkSubscript(CL.E_Var sourcesVar,CL.E_Int(1,CL.intTy)),
									  CL.mkApply(RN.clLoaderFN, [CL.E_Var headerFNVar]))]
									  
		(* Created Enqueue Statements *)
		val enqueueStm = if nDims = 1 
			then [CL.mkAssign(CL.E_Var errVar, 
							  CL.mkApply("clEnqueueNDRangeKernel", 
									 			[CL.E_Var cmdVar,
									 			 CL.E_Var kernelVar,
									 			 CL.E_Int(1,CL.intTy), 
									 			 CL.E_Var "NULL",
									 			 CL.E_Var globalVar,
									 			 CL.E_Var localVar,
									 			 CL.E_Int(0,CL.intTy),
									 			 CL.E_Var "NULL",
									 			 CL.E_Var "NULL"])),CL.mkCall("clFinish",[CL.E_Var cmdVar])]
			else [CL.mkAssign(CL.E_Var errVar, 
							CL.mkApply("clEnqueueNDRangeKernel", 
									 			[CL.E_Var cmdVar,
									 			 CL.E_Var kernelVar,
									 			 CL.E_Int(2,CL.intTy), 
									 			 CL.E_Var "NULL",
									 			 CL.E_Var globalVar,
									 			 CL.E_Var localVar,
									 			 CL.E_Int(0,CL.intTy),
									 			 CL.E_Var "NULL",
									 			 CL.E_Var "NULL"])),CL.mkCall("clFinish",[CL.E_Var cmdVar])] 						 			 
									 
		(* Setup Global and Local variables *) 
		val globalAndlocalStms = [CL.mkAssign(CL.mkSubscript(CL.E_Var globalVar, CL.E_Int(0,CL.intTy)),
								  CL.E_Var numStrandsVar), 
								  CL.mkAssign(CL.mkSubscript(CL.E_Var globalVar, CL.E_Int(1,CL.intTy)),
								  CL.E_Var numStrandsVar),
								  CL.mkAssign(CL.mkSubscript(CL.E_Var localVar, CL.E_Int(0,CL.intTy)),
								  CL.E_Var "16"),
								  CL.mkAssign(CL.mkSubscript(CL.E_Var localVar, CL.E_Int(1,CL.intTy)),
								  CL.E_Var "16")]

		
		(* Setup Kernel arguments *) 
		val kernelArguments = [CL.mkAssign(CL.E_Var errVar,CL.mkApply("clSetKernelArg",
							 	[CL.E_Var kernelVar, 
							 	 CL.E_Int(0,CL.intTy),
							 	 CL.mkApply("sizeof",[CL.E_Var "cl_mem"]),
							 	 CL.E_UnOp(CL.%&,CL.E_Var clInstateVar)])), 
							    CL.mkExpStm(CL.mkAssignOp(CL.E_Var errVar, CL.|=,CL.mkApply("clSetKernelArg",
							 	[CL.E_Var kernelVar, 
							 	 CL.E_Int(1,CL.intTy),
							 	 CL.mkApply("sizeof",[CL.E_Var "cl_mem"]),
							 	 CL.E_UnOp(CL.%&,CL.E_Var clOutStateVar)]))), 
							 	  CL.mkExpStm(CL.mkAssignOp(CL.E_Var errVar, CL.|=,CL.mkApply("clSetKernelArg",
							 	[CL.E_Var kernelVar, 
							 	 CL.E_Int(2,CL.intTy),
							 	 CL.mkApply("sizeof",[CL.E_Var "cl_mem"]),
							 	 CL.E_UnOp(CL.%&,CL.E_Var clOutStateVar)])))] 
		
		(* Retrieve output *)
		val outputStm = CL.mkAssign(CL.E_Var errVar, 
							CL.mkApply("clEnqueueReadBuffer", 
									 			[CL.E_Var cmdVar,
									 			 CL.E_Var clOutStateVar, 
									 			 CL.E_Var "CL_TRUE",
									 			 CL.E_Int(0,CL.intTy), 
									 			 CL.E_Var stateSizeVar,
									 			 CL.E_Var outStateVar,
									 			 CL.E_Int(0,CL.intTy),
									 			 CL.E_Var "NULL",
									 			 CL.E_Var "NULL"]))
		
		(* Free all the objects *) 
		val freeStms = [CL.mkCall("clReleaseKernel",[CL.E_Var kernelVar]),
						CL.mkCall("clReleaseProgram",[CL.E_Var programVar ]),
						CL.mkCall("clReleaseCommandQueue",[CL.E_Var cmdVar]),
						CL.mkCall("clReleaseContext",[CL.E_Var contextVar]),
						CL.mkCall("clReleaseMemObject",[CL.E_Var clInstateVar]),
						CL.mkCall("clReleaseMemObject",[CL.E_Var clOutStateVar])]
		
		(* Body put all the statments together *) 
		val body =  delcarations @ platformStm @ devicesStm @ contextStm @ commandStm @ [strandSize] @
				   clStrandObjects @ sourceStms @ globalAndlocalStms @ kernelArguments @ enqueueStm @ 
				    [outputStm] @ freeStms
		
		in 
		
	 	CL.D_Func([],CL.voidTy,RN.setupFName,params,CL.mkBlock(body))
		
		end 
		
		
	(* generate the main kernel function for the .cl file *) 
	fun genKernelFun(Strand{name, tyName, state, output, code},nDims) = let
		 val fName = RN.kernelFuncName; 
		 val inState = "strand_in" 
		 val outState = "strand_out" 
	     val params = [
		      CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
		      CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named tyName), "selfOut"), 
		      CL.PARAM(["__global"], CL.T_Ptr(CL.T_Named RN.globalsTy), "cl_globals"),
		      CL.PARAM(["__global"], CL.intTy, "width")
		    ]
		  val thread_ids = if nDims = 1 
		  	then [CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.E_Int(0, CL.intTy)))), 
		  		  CL.mkAssign(CL.E_Var "x",CL.mkApply(RN.getGlobalThreadId,[CL.E_Int(0,CL.intTy)]))]
		  	else 
		  		[CL.mkDecl(CL.intTy, "x", SOME(CL.I_Exp(CL.E_Int(0, CL.intTy)))), 
		  		 CL.mkDecl(CL.intTy, "y", SOME(CL.I_Exp(CL.E_Int(0, CL.intTy)))),
		  		  CL.mkAssign(CL.E_Var "x",  CL.mkApply(RN.getGlobalThreadId,[CL.E_Int(0,CL.intTy)])),
		  		  CL.mkAssign(CL.E_Var "y",CL.mkApply(RN.getGlobalThreadId,[CL.E_Int(1,CL.intTy)]))] 
		  
		  val strandDecl = [CL.mkDecl(CL.T_Named tyName, inState, NONE), 
		  					CL.mkDecl(CL.T_Named tyName, outState,NONE)]
		  val strandObjects  = if nDims = 1 
		  	then [CL.mkAssign(CL.mkSubscript(CL.E_Var "selfIn",CL.E_Str "x"),
		  							 CL.E_Var inState),
		  		  CL.mkAssign(CL.mkSubscript(CL.E_Var "selfOut",CL.E_Str "x"),
		  							 CL.E_Var outState)]
		  	else let 
		  		val index = CL.mkBinOp(CL.mkBinOp(CL.E_Var "y",CL.#*,CL.E_Var "width"),CL.#+,CL.E_Var "x")
		  		in 
		  			[CL.mkAssign(CL.mkSubscript(CL.E_Var "selfIn",index),
		  							CL.E_Var inState), 
		  			 CL.mkAssign(CL.mkSubscript(CL.E_Var "selfOut",index),
		  							CL.E_Var outState)] 
		  		end 
		  val status = CL.mkDecl(CL.intTy, "status", SOME(CL.I_Exp(CL.E_Int(0, CL.intTy))))
		  val strand_init_function = CL.mkCall(RN.strandInit name, [CL.E_Var inState])
		  val local_vars =  thread_ids @ strandDecl @ strandObjects @ [status,strand_init_function]
		  val while_exp = CL.mkBinOp(CL.E_Var "status",CL.#!=, CL.E_Var RN.kStabilize)
		  val while_body = [CL.mkAssign(CL.E_Var "status", CL.mkApply(RN.strandUpdate name,[CL.E_Var inState,CL.E_Var outState])),
		  					CL.mkCall(RN.strandStabilize name,[CL.E_Var inState,CL.E_Var outState]),
		  					CL.mkIfThen(CL.mkBinOp(CL.E_Var "status",CL.#==, CL.E_Var RN.kStabilize),CL.mkBreak)]
		  
		  val whileBlock = [CL.mkWhile(while_exp,CL.mkBlock while_body)]
		  
		  val body = CL.mkBlock(local_vars  @ whileBlock)
		in 
		   CL.D_Func(["__kernel"], CL.voidTy, fName, params, body)
		end
	(* generate a global structure from the globals *) 
	fun genGlobalStruct(globals) = let
		 fun getGlobals(CL.D_Var(_,ty,globalVar,_)::rest) = (ty,globalVar)::getGlobals(rest) 
		   | getGlobals([]) = [] 
		   | getGlobals(_::rest) = getGlobals(rest) 
		 in 
			CL.D_StructDef(getGlobals(globals),RN.globalsTy) 
		  end
      (* generate the table of strand descriptors *)
	fun genStrandTable (ppStrm, strands) = let
	      val nStrands = length strands
	      fun genInit (Strand{name, ...}) = CL.I_Exp(CL.mkUnOp(CL.%&, CL.E_Var(RN.strandDesc name)))
	      fun genInits (_, []) = []
		| genInits (i, s::ss) = (i, genInit s) :: genInits(i+1, ss)
	      fun ppDecl dcl = PrintAsC.output(ppStrm, dcl)
	      in
		ppDecl (CL.D_Var([], CL.int32, RN.numStrands,
		  SOME(CL.I_Exp(CL.E_Int(IntInf.fromInt nStrands, CL.int32)))));
		ppDecl (CL.D_Var([],
		  CL.T_Array(CL.T_Ptr(CL.T_Named RN.strandDescTy), SOME nStrands),
		  RN.strands,
		  SOME(CL.I_Array(genInits (0, strands)))))
	      end

	fun genSrc (baseName, Prog{globals, topDecls, strands, initially,numDims,...}) = let
	      val clFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "cl"}
	      val cFileName = OS.Path.joinBaseExt{base=baseName, ext=SOME "c"}
	      val clOutS = TextIO.openOut clFileName
	      val cOutS = TextIO.openOut cFileName
	      val clppStrm = PrintAsC.new clOutS
	      val cppStrm = PrintAsC.new cOutS
	      fun cppDecl dcl = PrintAsC.output(cppStrm, dcl) 
	      fun clppDecl dcl = PrintAsC.output(clppStrm, dcl)
	      val strands = AtomTable.listItems strands
	      val single_strand as Strand{name, tyName, code, ...}= hd(strands) 
	      in
	    (* Generate the Host file *) 
	    cppDecl (CL.D_Verbatim([ "#include <OpenCL/OpenCL.h>", 
								 "#include Diderot/diderot.h"]));
	    cppDecl (genGlobalStruct (!globals));
	    cppDecl (genStrandTyDef single_strand);
	    cppDecl (genKernelLoader());
	    List.app cppDecl (List.rev (!topDecls));
	    cppDecl (genHostSetupFunc (single_strand,clFileName,!numDims));
	    cppDecl (genHostMain());
	    
	    (* Generate the OpenCl file *)
	    clppDecl (genGlobalStruct (!globals)); 
	    clppDecl (genStrandTyDef single_strand); 
	    List.app clppDecl (!code); 
	    clppDecl (genKernelFun (single_strand,!numDims));
	    
		(*List.app (fn strand => List.app ppDecl (genStrand strand)) strands;
		 genStrandTable (ppStrm, strands); 
		ppDecl (!initially);*) 
		
		PrintAsC.close cppStrm;
		PrintAsC.close clppStrm; 
		TextIO.closeOut cOutS;
		TextIO.closeOut clOutS
	      end

      (* output the code to a file.  The string is the basename of the file, the extension
       * is provided by the target.
       *)
	fun generate (basename, prog as Prog{double, parallel, debug, ...}) = let
	      fun condCons (true, x, xs) = x::xs
		| condCons (false, _, xs) = xs
	    (* generate the C compiler flags *)
	      val cflags = ["-I" ^ Paths.diderotInclude, "-I" ^ Paths.teemInclude]
	      val cflags = condCons (parallel, #pthread Paths.cflags, cflags)
	      val cflags = if debug
		    then #debug Paths.cflags :: cflags
		    else #ndebug Paths.cflags :: cflags
	      val cflags = #base Paths.cflags :: cflags
	    (* generate the loader flags *)
	      val extraLibs = condCons (parallel, #pthread Paths.extraLibs, [])
	      val extraLibs = Paths.teemLinkFlags @  #base Paths.extraLibs :: extraLibs
	      val rtLib = TargetUtil.runtimeName {
		      target = TargetUtil.TARGET_CL,
		      parallel = parallel, double = double, debug = debug
		    }
	      val ldOpts = rtLib :: extraLibs
	      in
		genSrc (basename, prog)
		end
			      
		(*RunCC.compile (basename, cflags);
		RunCC.link (basename, ldOpts)*)


      end

  (* strands *)
    structure Strand =
      struct
	fun define (Prog{strands, ...}, strandId) = let
	      val name = Atom.toString strandId
	      val strand = Strand{
		      name = name,
		      tyName = RN.strandTy name,
		      state = ref [],
		      output = ref NONE,
		      code = ref []
		    }
	      in
		AtomTable.insert strands (strandId, strand);
		strand
	      end

      (* return the strand with the given name *)
	fun lookup (Prog{strands, ...}, strandId) = AtomTable.lookup strands strandId

      (* register the strand-state initialization code.  The variables are the strand
       * parameters.
       *)
	fun init (Strand{name, tyName, code, ...}, params, init) = let
	      val fName = RN.strandInit name
	      val params =
		    CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut") ::
		      List.map (fn (ToC.V(ty, x)) => CL.PARAM([], ty, x)) params
	      val initFn = CL.D_Func([], CL.voidTy, fName, params, init)
	      in
		code := initFn :: !code
	      end

      (* register a strand method *)
	fun method (Strand{name, tyName, code, ...}, methName, body) = let
	      val fName = concat[name, "_", methName]
	      val params = [
		      CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfIn"),
		      CL.PARAM([], CL.T_Ptr(CL.T_Named tyName), "selfOut")
		    ]
	      val methFn = CL.D_Func([], CL.int32, fName, params, body)
	      in
		code := methFn :: !code
	      end
		
	fun output (Strand{output, ...}, ty, ToC.V(_, x)) = output := SOME(ty, x)

      end

  end

structure CLBackEnd = CodeGenFn(CLTarget)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0