Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] View of /sml/trunk/src/MLRISC/x86/mltree/x86.sml
ViewVC logotype

View of /sml/trunk/src/MLRISC/x86/mltree/x86.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 984 - (download) (annotate)
Wed Nov 21 19:00:08 2001 UTC (17 years, 9 months ago) by george
File size: 76787 byte(s)
  Implemented a complete redesign of MLRISC pseudo-ops. Now there
  ought to never be any question of incompatabilities with
  pseudo-op syntax expected by host assemblers.

  For now, only modules supporting GAS syntax are implemented
  but more should follow, such as MASM, and vendor assembler
  syntax, e.g. IBM as, Sun as, etc.
(*
 *
 * COPYRIGHT (c) 1998 Bell Laboratories.
 * 
 * This is a revised version that takes into account of
 * the extended x86 instruction set, and has better handling of
 * non-standard types.  I've factored out the integer/floating point 
 * comparison code, added optimizations for conditional moves. 
 * The latter generates SETcc and CMOVcc (Pentium Pro only) instructions. 
 * To avoid problems, I have tried to incorporate as much of 
 * Lal's original magic incantations as possible.
 *
 * Some changes:
 *
 *  1.  REMU/REMS/REMT are now supported 
 *  2.  COND is supported by generating SETcc and/or CMOVcc; this
 *      may require at least a Pentium II to work.
 *  3.  Division by a constant has been optimized.   Division by
 *      a power of 2 generates SHRL or SARL.
 *  4.  Better addressing mode selection has been implemented.  This should
 *      improve array indexing on SML/NJ.
 *  5.  Generate testl/testb instead of andl whenever appropriate.  This
 *      is recommended by the Intel Optimization Guide and seems to improve
 *      boxity tests on SML/NJ.
 *
 * More changes for floating point: 
 *  A new mode is implemented which generates pseudo 3-address instructions
 * for floating point.  These instructions are register allocated the
 * normal way, with the virtual registers mapped onto a set of pseudo
 * %fp registers.  These registers are then mapped onto the %st registers
 * with a new postprocessing phase.
 *
 * -- Allen
 *)
local
   val rewriteMemReg = true (* should we rewrite memRegs *)
   val enableFastFPMode = true (* set this to false to disable the mode *)
in

functor X86
  (structure X86Instr : X86INSTR
   structure MLTreeUtils : MLTREE_UTILS
			where T = X86Instr.T
   structure ExtensionComp : MLTREE_EXTENSION_COMP
     			where I = X86Instr and T = X86Instr.T
   structure MLTreeStream : MLTREE_STREAM
			where T = ExtensionComp.T
    datatype arch = Pentium | PentiumPro | PentiumII | PentiumIII
    val arch : arch ref
    val cvti2f : 
         {ty: X86Instr.T.ty, 
          src: X86Instr.operand, 
             (* source operand, guaranteed to be non-memory! *)
          an: Annotations.annotations ref (* cluster annotations *)
         } -> 
         {instrs : X86Instr.instruction list,(* the instructions *)
          tempMem: X86Instr.operand,         (* temporary for CVTI2F *)
          cleanup: X86Instr.instruction list (* cleanup code *)
         }
    (* When the following flag is set, we allocate floating point registers
     * directly on the floating point stack
     *)
    val fast_floating_point : bool ref 
  ) : sig include MLTREECOMP 
          val rewriteMemReg : bool
      end = 
struct
  structure I = X86Instr
  structure T = I.T
  structure TS = ExtensionComp.TS
  structure C = I.C
  structure Shuffle = Shuffle(I)
  structure W32 = Word32
  structure A = MLRiscAnnotations
  structure CFG = ExtensionComp.CFG
  structure CB = CellsBasis

  type instrStream = (I.instruction,C.cellset,CFG.cfg) TS.stream
  type mltreeStream = (T.stm,T.mlrisc list,CFG.cfg) TS.stream

  datatype kind = REAL | INTEGER
 
  structure Gen = MLTreeGen
     (structure T = T
      val intTy = 32
      val naturalWidths = [32]
      datatype rep = SE | ZE | NEITHER
      val rep = NEITHER
     )

  fun error msg = MLRiscErrorMsg.error("X86",msg)

  (* Should we perform automatic MemReg translation?  
   * If this is on, we can avoid doing RewritePseudo phase entirely.
   *)
  val rewriteMemReg = rewriteMemReg

  (* The following hardcoded *)
  fun isMemReg r = rewriteMemReg andalso 
                   let val r = CB.registerNum r
                   in  r >= 8 andalso r < 32 
                   end
  fun isFMemReg r = if enableFastFPMode andalso !fast_floating_point
                    then let val r = CB.registerNum r
                         in r >= 8 andalso r < 32 end
                    else true
  val isAnyFMemReg = List.exists (fn r => 
                                  let val r = CB.registerNum r  
                                  in  r >= 8 andalso r < 32 end
                                 )

  val ST0 = C.ST 0
  val ST7 = C.ST 7
  val one = T.I.int_1

  val opcodes8 = {INC=I.INCB,DEC=I.DECB,ADD=I.ADDB,SUB=I.SUBB,
                  NOT=I.NOTB,NEG=I.NEGB,
                  SHL=I.SHLB,SHR=I.SHRB,SAR=I.SARB,
                  OR=I.ORB,AND=I.ANDB,XOR=I.XORB}
  val opcodes16 = {INC=I.INCW,DEC=I.DECW,ADD=I.ADDW,SUB=I.SUBW,
                   NOT=I.NOTW,NEG=I.NEGW,
                   SHL=I.SHLW,SHR=I.SHRW,SAR=I.SARW,
                   OR=I.ORW,AND=I.ANDW,XOR=I.XORW}
  val opcodes32 = {INC=I.INCL,DEC=I.DECL,ADD=I.ADDL,SUB=I.SUBL,
                   NOT=I.NOTL,NEG=I.NEGL,
                   SHL=I.SHLL,SHR=I.SHRL,SAR=I.SARL,
                   OR=I.ORL,AND=I.ANDL,XOR=I.XORL}

  (* 
   * The code generator 
   *)
  fun selectInstructions 
       (instrStream as
        TS.S.STREAM{emit,defineLabel,entryLabel,pseudoOp,annotation,getAnnotations,
                 beginCluster,endCluster,exitBlock,comment,...}) =
  let exception EA

      (* label where a trap is generated -- one per cluster *)
      val trapLabel = ref (NONE: (I.instruction * Label.label) option)

      (* flag floating point generation *)
      val floatingPointUsed = ref false

      (* effective address of an integer register *)
      fun IntReg r = if isMemReg r then I.MemReg r else I.Direct r
      and RealReg r = if isFMemReg r then I.FDirect r else I.FPR r

      (* Add an overflow trap *)
      fun trap() =
      let val jmp = 
            case !trapLabel of 
              NONE => let val label = Label.label "trap" ()
                          val jmp   = I.JCC{cond=I.O, 
                                            opnd=I.ImmedLabel(T.LABEL label)}
                      in  trapLabel := SOME(jmp, label); jmp end
            | SOME(jmp, _) => jmp
      in  emit jmp end

      val newReg  = C.newReg
      val newFreg = C.newFreg

      fun fsize 32 = I.FP32
        | fsize 64 = I.FP64
        | fsize 80 = I.FP80
        | fsize _  = error "fsize"

      (* mark an expression with a list of annotations *) 
      fun mark'(i,[]) = i 
        | mark'(i,a::an) = mark'(I.ANNOTATION{i=i,a=a},an) 

      (* annotate an expression and emit it *)
      fun mark(i,an) = emit(mark'(i,an))

      val emits = app emit

      (* emit parallel copies for integers 
       * Translates parallel copies that involve memregs into 
       * individual copies.
       *)
      fun copy([], [], an) = ()
        | copy(dst, src, an) = 
          let fun mvInstr{dst as I.MemReg rd, src as I.MemReg rs} = 
                  if CB.sameColor(rd,rs) then [] else
                  let val tmpR = I.Direct(newReg())
                  in  [I.MOVE{mvOp=I.MOVL, src=src, dst=tmpR},
                       I.MOVE{mvOp=I.MOVL, src=tmpR, dst=dst}]
                  end
                | mvInstr{dst=I.Direct rd, src=I.Direct rs} = 
                    if CB.sameColor(rd,rs) then [] 
                    else [I.COPY{dst=[rd], src=[rs], tmp=NONE}]
                | mvInstr{dst, src} = [I.MOVE{mvOp=I.MOVL, src=src, dst=dst}]
          in
             emits (Shuffle.shuffle{mvInstr=mvInstr, ea=IntReg}
               {tmp=SOME(I.Direct(newReg())),
                dst=dst, src=src})
          end
 
      (* conversions *)
      val itow = Word.fromInt
      val wtoi = Word.toInt
      fun toInt32 i = T.I.toInt32(32, i)
      val w32toi32 = Word32.toLargeIntX 
      val i32tow32 = Word32.fromLargeInt

      (* One day, this is going to bite us when precision(LargeInt)>32 *)
      fun wToInt32 w = Int32.fromLarge(Word32.toLargeIntX w)

      (* some useful registers *)
      val eax = I.Direct(C.eax)
      val ecx = I.Direct(C.ecx)
      val edx = I.Direct(C.edx)

      fun immedLabel lab = I.ImmedLabel(T.LABEL lab)
 
      (* Is the expression zero? *)
      fun isZero(T.LI z) = T.I.isZero z 
        | isZero(T.MARK(e,a)) = isZero e
        | isZero _ = false
       (* Does the expression set the zero bit? 
        * WARNING: we assume these things are not optimized out!
        *)
      fun setZeroBit(T.ANDB _)     = true
        | setZeroBit(T.ORB _)      = true
        | setZeroBit(T.XORB _)     = true
        | setZeroBit(T.SRA _)      = true
        | setZeroBit(T.SRL _)      = true
        | setZeroBit(T.SLL _)      = true
        | setZeroBit(T.SUB _)      = true
        | setZeroBit(T.ADDT _)     = true
        | setZeroBit(T.SUBT _)     = true
        | setZeroBit(T.MARK(e, _)) = setZeroBit e
        | setZeroBit _             = false

      fun setZeroBit2(T.ANDB _)     = true
        | setZeroBit2(T.ORB _)      = true
        | setZeroBit2(T.XORB _)     = true
        | setZeroBit2(T.SRA _)      = true
        | setZeroBit2(T.SRL _)      = true
        | setZeroBit2(T.SLL _)      = true
        | setZeroBit2(T.ADD(32, _, _)) = true (* can't use leal! *)
        | setZeroBit2(T.SUB _)      = true
        | setZeroBit2(T.ADDT _)     = true
        | setZeroBit2(T.SUBT _)     = true
        | setZeroBit2(T.MARK(e, _)) = setZeroBit2 e
        | setZeroBit2 _             = false

      (* emit parallel copies for floating point 
       * Normal version.
       *)
      fun fcopy'(fty, [], [], _) = ()
        | fcopy'(fty, dst as [_], src as [_], an) = 
            mark(I.FCOPY{dst=dst,src=src,tmp=NONE}, an)
        | fcopy'(fty, dst, src, an) = 
            mark(I.FCOPY{dst=dst,src=src,tmp=SOME(I.FDirect(newFreg()))}, an)

      (* emit parallel copies for floating point.
       * Fast version.
       * Translates parallel copies that involve memregs into 
       * individual copies.
       *)
       
      fun fcopy''(fty, [], [], _) = ()
        | fcopy''(fty, dst, src, an) = 
          if true orelse isAnyFMemReg dst orelse isAnyFMemReg src then
          let val fsize = fsize fty
              fun mvInstr{dst, src} = [I.FMOVE{fsize=fsize, src=src, dst=dst}]
          in
              emits (Shuffle.shuffle{mvInstr=mvInstr, ea=RealReg}
                {tmp=case dst of
                       [_] => NONE
                     |  _  => SOME(I.FPR(newReg())),
                 dst=dst, src=src})
          end
          else
            mark(I.FCOPY{dst=dst,src=src,tmp=
                         case dst of
                           [_] => NONE
                         | _   => SOME(I.FPR(newFreg()))}, an)
 
      fun fcopy x = if enableFastFPMode andalso !fast_floating_point 
                    then fcopy'' x else fcopy' x

      (* Translates MLTREE condition code to x86 condition code *)
      fun cond T.LT = I.LT | cond T.LTU = I.B
        | cond T.LE = I.LE | cond T.LEU = I.BE
        | cond T.EQ = I.EQ | cond T.NE  = I.NE
        | cond T.GE = I.GE | cond T.GEU = I.AE
        | cond T.GT = I.GT | cond T.GTU = I.A

      fun zero dst = emit(I.BINARY{binOp=I.XORL, src=dst, dst=dst})

      (* Move and annotate *) 
      fun move'(src as I.Direct s, dst as I.Direct d, an) =
          if CB.sameColor(s,d) then ()
          else mark(I.COPY{dst=[d], src=[s], tmp=NONE}, an)
        | move'(I.Immed 0, dst as I.Direct d, an) = 
            mark(I.BINARY{binOp=I.XORL, src=dst, dst=dst}, an)
        | move'(src, dst, an) = mark(I.MOVE{mvOp=I.MOVL, src=src, dst=dst}, an)

      (* Move only! *)  
      fun move(src, dst) = move'(src, dst, [])

      val readonly = I.Region.readonly

      (* 
       * Compute an effective address.  
       *)
      fun address(ea, mem) = let 
          (* Keep building a bigger and bigger effective address expressions 
           * The input is a list of trees
           * b -- base
           * i -- index
           * s -- scale
           * d -- immed displacement
           *)
          fun doEA([], b, i, s, d) = makeAddressingMode(b, i, s, d)
            | doEA(t::trees, b, i, s, d) =
              (case t of 
                 T.LI n   => doEAImmed(trees, toInt32 n, b, i, s, d)
               | T.CONST _ => doEALabel(trees, t, b, i, s, d)
               | T.LABEL _ => doEALabel(trees, t, b, i, s, d)
               | T.LABEXP le => doEALabel(trees, le, b, i, s, d)
               | T.ADD(32, t1, t2 as T.REG(_,r)) => 
                    if isMemReg r then doEA(t2::t1::trees, b, i, s, d)
                    else doEA(t1::t2::trees, b, i, s, d)
               | T.ADD(32, t1, t2) => doEA(t1::t2::trees, b, i, s, d)
               | T.SUB(32, t1, T.LI n) => 
		    doEA(t1::T.LI(T.I.NEG(32,n))::trees, b, i, s, d)
	       | T.SLL(32, t1, T.LI n) => let
		    val n = T.I.toInt(32, n)
                 in 
		   case n
		   of 0 => displace(trees, t1, b, i, s, d)
	  	    | 1 => indexed(trees, t1, t, 1, b, i, s, d)
	  	    | 2 => indexed(trees, t1, t, 2, b, i, s, d)
		    | 3 => indexed(trees, t1, t, 3, b, i, s, d)
		    | _ => displace(trees, t, b, i, s, d)
                 end
               | t => displace(trees, t, b, i, s, d)
              ) 

          (* Add an immed constant *)
          and doEAImmed(trees, 0, b, i, s, d) = doEA(trees, b, i, s, d)
            | doEAImmed(trees, n, b, i, s, I.Immed m) = 
                 doEA(trees, b, i, s, I.Immed(n+m))
            | doEAImmed(trees, n, b, i, s, I.ImmedLabel le) = 
                 doEA(trees, b, i, s, 
                      I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, n)))))
            | doEAImmed(trees, n, b, i, s, _) = error "doEAImmed"

          (* Add a label expression *)
          and doEALabel(trees, le, b, i, s, I.Immed 0) = 
                 doEA(trees, b, i, s, I.ImmedLabel le)
            | doEALabel(trees, le, b, i, s, I.Immed m) = 
                 doEA(trees, b, i, s, 
                      I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, m))))
                      handle Overflow => error "doEALabel: constant too large")
            | doEALabel(trees, le, b, i, s, I.ImmedLabel le') = 
                 doEA(trees, b, i, s, I.ImmedLabel(T.ADD(32,le,le')))
            | doEALabel(trees, le, b, i, s, _) = error "doEALabel"

          and makeAddressingMode(NONE, NONE, _, disp) = disp
            | makeAddressingMode(SOME base, NONE, _, disp) = 
                I.Displace{base=base, disp=disp, mem=mem}
            | makeAddressingMode(base, SOME index, scale, disp) = 
                I.Indexed{base=base, index=index, scale=scale,
                          disp=disp, mem=mem}

          (* generate code for tree and ensure that it is not in %esp *)
          and exprNotEsp tree =
              let val r = expr tree
              in  if CB.sameColor(r, C.esp) then 
                     let val tmp = newReg()
                     in  move(I.Direct r, I.Direct tmp); tmp end
                  else r
              end

          (* Add a base register *)
          and displace(trees, t, NONE, i, s, d) =  (* no base yet *)
               doEA(trees, SOME(expr t), i, s, d)
            | displace(trees, t, b as SOME base, NONE, _, d) = (* no index *)
              (* make t the index, but make sure that it is not %esp! *)
              let val i = expr t
              in  if CB.sameColor(i, C.esp) then
                    (* swap base and index *)
                    if CB.sameColor(base, C.esp) then
                       doEA(trees, SOME i, b, 0, d)
                    else  (* base and index = %esp! *)
                       let val index = newReg()
                       in  move(I.Direct i, I.Direct index);
                           doEA(trees, b, SOME index, 0, d)
                       end
                  else
                    doEA(trees, b, SOME i, 0, d)
              end
            | displace(trees, t, SOME base, i, s, d) = (* base and index *) 
              let val b = expr(T.ADD(32,T.REG(32,base),t))
              in  doEA(trees, SOME b, i, s, d) end

          (* Add an indexed register *)
          and indexed(trees, t, t0, scale, b, NONE, _, d) = (* no index yet *)
               doEA(trees, b, SOME(exprNotEsp t), scale, d)
            | indexed(trees, _, t0, _, NONE, i, s, d) = (* no base *)
               doEA(trees, SOME(expr t0), i, s, d)
            | indexed(trees, _, t0, _, SOME base, i, s, d) = (*base and index*)
               let val b = expr(T.ADD(32, t0, T.REG(32, base)))
               in  doEA(trees, SOME b, i, s, d) end
                  
      in  case doEA([ea], NONE, NONE, 0, I.Immed 0) of 
            I.Immed _ => raise EA
          | I.ImmedLabel le => I.LabelEA le
          | ea => ea
      end (* address *)

          (* reduce an expression into an operand *)
      and operand(T.LI i) = I.Immed(toInt32(i)) 
        | operand(x as (T.CONST _ | T.LABEL _)) = I.ImmedLabel x
        | operand(T.LABEXP le) = I.ImmedLabel le
        | operand(T.REG(_,r)) = IntReg r
        | operand(T.LOAD(32,ea,mem)) = address(ea, mem)
        | operand(t) = I.Direct(expr t)

      and moveToReg(opnd) =
          let val dst = I.Direct(newReg())
          in  move(opnd, dst); dst
          end

      and reduceOpnd(I.Direct r) = r
        | reduceOpnd opnd =
          let val dst = newReg()
          in  move(opnd, I.Direct dst); dst
          end

      (* ensure that the operand is either an immed or register *)
      and immedOrReg(opnd as I.Displace _) = moveToReg opnd
        | immedOrReg(opnd as I.Indexed _)  = moveToReg opnd
        | immedOrReg(opnd as I.MemReg _)   = moveToReg opnd
        | immedOrReg(opnd as I.LabelEA _)  = moveToReg opnd
        | immedOrReg opnd  = opnd

      and isImmediate(I.Immed _) = true
        | isImmediate(I.ImmedLabel _) = true
        | isImmediate _ = false

      and regOrMem opnd = if isImmediate opnd then moveToReg opnd else opnd
 
      and isMemOpnd opnd = 
          (case opnd of 
            I.Displace _ => true
          | I.Indexed _  => true 
          | I.MemReg _   => true 
          | I.LabelEA _  => true 
          | I.FDirect f  => true
          | _            => false
          )
      
         (* 
          * Compute an integer expression and put the result in 
          * the destination register rd.  
          *)
      and doExpr(exp, rd : CB.cell, an) = 
          let val rdOpnd = IntReg rd

              fun equalRd(I.Direct r) = CB.sameColor(r, rd)
                | equalRd(I.MemReg r) = CB.sameColor(r, rd)
                | equalRd _ = false

                 (* Emit a binary operator.  If the destination is
                  * a memReg, do something smarter.
                  *)
              fun genBinary(binOp, opnd1, opnd2) =
                  if isMemReg rd andalso
                     (isMemOpnd opnd1 orelse isMemOpnd opnd2) orelse
                     equalRd(opnd2)
                  then
                  let val tmpR = newReg()
                      val tmp  = I.Direct tmpR
                  in  move(opnd1, tmp);
                      mark(I.BINARY{binOp=binOp, src=opnd2, dst=tmp}, an);
                      move(tmp, rdOpnd)
                  end 
                  else
                     (move(opnd1, rdOpnd);
                      mark(I.BINARY{binOp=binOp, src=opnd2, dst=rdOpnd}, an)
                     )

                 (* Generate a binary operator; it may commute *)
              fun binaryComm(binOp, e1, e2) = 
              let val (opnd1, opnd2) = 
                      case (operand e1, operand e2) of
                        (x as I.Immed _, y)      => (y, x)
                      | (x as I.ImmedLabel _, y) => (y, x)
                      | (x, y as I.Direct _)     => (y, x)
                      | (x, y)                   => (x, y)
              in  genBinary(binOp, opnd1, opnd2)
              end
        
                 (* Generate a binary operator; non-commutative *)
              fun binary(binOp, e1, e2) =
                  genBinary(binOp, operand e1, operand e2)
        
                 (* Generate a unary operator *)
              fun unary(unOp, e) = 
              let val opnd = operand e
              in  if isMemReg rd andalso isMemOpnd opnd then
                     let val tmp = I.Direct(newReg())
                     in  move(opnd, tmp); move(tmp, rdOpnd)
                     end 
                  else move(opnd, rdOpnd);
                  mark(I.UNARY{unOp=unOp, opnd=rdOpnd}, an)
              end
        
                 (* Generate shifts; the shift 
                  * amount must be a constant or in %ecx *)
              fun shift(opcode, e1, e2) =
              let val (opnd1, opnd2) = (operand e1, operand e2)
              in  case opnd2 of 
                    I.Immed _ => genBinary(opcode, opnd1, opnd2)
                  | _ => 
                    if equalRd(opnd2) then 
                    let val tmpR = newReg()
                        val tmp  = I.Direct tmpR
                    in  move(opnd1, tmp);
                        move(opnd2, ecx);
                        mark(I.BINARY{binOp=opcode, src=ecx, dst=tmp},an);
                        move(tmp, rdOpnd)
                    end
                    else
                        (move(opnd1, rdOpnd);
                         move(opnd2, ecx);
                         mark(I.BINARY{binOp=opcode, src=ecx, dst=rdOpnd},an)
                        )
              end
        
                  (* Division or remainder: divisor must be in %edx:%eax pair *)
              fun divrem(signed, overflow, e1, e2, resultReg) =
              let val (opnd1, opnd2) = (operand e1, operand e2)
                  val _ = move(opnd1, eax)
                  val oper = if signed then (emit(I.CDQ); I.IDIVL1)
                             else (zero edx; I.DIVL1)
              in  mark(I.MULTDIV{multDivOp=oper, src=regOrMem opnd2},an);
                  move(resultReg, rdOpnd);
                  if overflow then trap() else ()
              end
       
                  (* Optimize the special case for division *) 
              fun divide(signed, overflow, e1, e2 as T.LI n') = let
		  val n = toInt32 n'
                  val w = T.I.toWord32(32, n')
                  fun isPowerOf2 w = W32.andb((w - 0w1), w) = 0w0 
                  fun log2 n =  (* n must be > 0!!! *)
                      let fun loop(0w1,pow) = pow
                            | loop(w,pow) = loop(W32.>>(w, 0w1),pow+1)
                      in loop(n,0) end
              in  if n > 1 andalso isPowerOf2 w then 
                     let val pow = T.LI(T.I.fromInt(32,log2 w))
                     in  if signed then 
                         (* signed; simulate round towards zero *)
                         let val label = Label.anon()
                             val reg1  = expr e1
                             val opnd1 = I.Direct reg1
                         in  if setZeroBit e1 then ()
                             else emit(I.CMPL{lsrc=opnd1, rsrc=I.Immed 0});
                             emit(I.JCC{cond=I.GE, opnd=immedLabel label});
                             emit(if n = 2 then
                                     I.UNARY{unOp=I.INCL, opnd=opnd1}
                                  else
                                     I.BINARY{binOp=I.ADDL, 
                                              src=I.Immed(n - 1),
                                              dst=opnd1});
                             defineLabel label;
                             shift(I.SARL, T.REG(32, reg1), pow)
                         end
                         else (* unsigned *)
                            shift(I.SHRL, e1, pow)
                     end
                  else
                       (* note the only way we can overflow is if
                        * n = 0 or n = -1
                        *)
                     divrem(signed, overflow andalso (n = ~1 orelse n = 0), 
                            e1, e2, eax)
              end
                | divide(signed, overflow, e1, e2) = 
                    divrem(signed, overflow, e1, e2, eax)

              fun rem(signed, overflow, e1, e2) = 
                    divrem(signed, overflow, e1, e2, edx)

                  (* Makes sure the destination must be a register *)
              fun dstMustBeReg f = 
                  if isMemReg rd then
                  let val tmpR = newReg()
                      val tmp  = I.Direct(tmpR)
                  in  f(tmpR, tmp); move(tmp, rdOpnd) end
                  else f(rd, rdOpnd)

                  (* unsigned integer multiplication *)
              fun uMultiply(e1, e2) = 
                  (* note e2 can never be (I.Direct edx) *)
                  (move(operand e1, eax);
                   mark(I.MULTDIV{multDivOp=I.MULL1, 
                                  src=regOrMem(operand e2)},an);
                   move(eax, rdOpnd)
                  )
        
                  (* signed integer multiplication: 
                   * The only forms that are allowed that also sets the 
                   * OF and CF flags are:
                   *
                   *          (dst)  (src1)  (src2)
                   *      imul r32, r32/m32, imm8
                   *          (dst)  (src) 
                   *      imul r32, imm8
                   *      imul r32, imm32
                   *      imul r32, r32/m32
                   * Note: destination must be a register!
                   *)
              fun multiply(e1, e2) = 
              dstMustBeReg(fn (rd, rdOpnd) =>
              let fun doit(i1 as I.Immed _, i2 as I.Immed _) =
                      (move(i1, rdOpnd);
                       mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=i2},an))
                    | doit(rm, i2 as I.Immed _) = doit(i2, rm)
                    | doit(imm as I.Immed(i), rm) =
                           mark(I.MUL3{dst=rd, src1=rm, src2=i},an)
                    | doit(r1 as I.Direct _, r2 as I.Direct _) =
                      (move(r1, rdOpnd);
                       mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=r2},an))
                    | doit(r1 as I.Direct _, rm) =
                      (move(r1, rdOpnd);
                       mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=rm},an))
                    | doit(rm, r as I.Direct _) = doit(r, rm)
                    | doit(rm1, rm2) =
                       if equalRd rm2 then
                       let val tmpR = newReg()
                           val tmp  = I.Direct tmpR
                       in move(rm1, tmp);
                          mark(I.BINARY{binOp=I.IMULL, dst=tmp, src=rm2},an);
                          move(tmp, rdOpnd)
                       end
                       else
                         (move(rm1, rdOpnd);
                          mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=rm2},an)
                         )
                  val (opnd1, opnd2) = (operand e1, operand e2)
              in  doit(opnd1, opnd2)
              end
              )

                 (* Emit a load instruction; makes sure that the destination
                  * is a register 
                  *)
              fun genLoad(mvOp, ea, mem) = 
                  dstMustBeReg(fn (_, dst) =>
                     mark(I.MOVE{mvOp=mvOp, src=address(ea, mem), dst=dst},an))
    
                 (* Generate a zero extended loads *)
              fun load8(ea, mem) = genLoad(I.MOVZBL, ea, mem)
              fun load16(ea, mem) = genLoad(I.MOVZWL, ea, mem)
              fun load8s(ea, mem) = genLoad(I.MOVSBL, ea, mem)
              fun load16s(ea, mem) = genLoad(I.MOVSWL, ea, mem)
              fun load32(ea, mem) = genLoad(I.MOVL, ea, mem)
        
                 (* Generate a sign extended loads *)
        
                 (* Generate setcc instruction:
                  *  semantics:  MV(rd, COND(_, T.CMP(ty, cc, t1, t2), yes, no))
                  * Bug, if eax is either t1 or t2 then problem will occur!!!
                  * Note that we have to use eax as the destination of the
                  * setcc because it only works on the registers
                  * %al, %bl, %cl, %dl and %[abcd]h.  The last four registers
                  * are inaccessible in 32 bit mode.
                  *)
              fun setcc(ty, cc, t1, t2, yes, no) = 
              let val (cc, yes, no) = 
                         if yes > no then (cc, yes, no)
                         else (T.Basis.negateCond cc, no, yes)
              in  (* Clear the destination first.
                   * This this because stupid SETcc 
                   * only writes to the low order
                   * byte.  That's Intel architecture, folks.
                   *)
                  case (yes, no, cc) of
                    (1, 0, T.LT) =>
                     let val tmp = I.Direct(expr(T.SUB(32,t1,t2)))
                     in  move(tmp, rdOpnd);
                         emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd})
                     end
                  | (1, 0, T.GT) =>
                     let val tmp = I.Direct(expr(T.SUB(32,t1,t2)))
                     in  emit(I.UNARY{unOp=I.NOTL,opnd=tmp});
                         move(tmp, rdOpnd);
                         emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd})
                     end
                  | (1, 0, _) => (* normal case *)
                    let val cc = cmp(true, ty, cc, t1, t2, []) 
                    in  mark(I.SET{cond=cond cc, opnd=eax}, an);
                        emit(I.BINARY{binOp=I.ANDL,src=I.Immed 255, dst=eax});
                        move(eax, rdOpnd)
                    end
                  | (C1, C2, _)  => 
                    (* general case; 
                     * from the Intel optimization guide p3-5 
                     *)
                    let val _  = zero eax;
                        val cc = cmp(true, ty, cc, t1, t2, []) 
                    in  case C1-C2 of
                          D as (1 | 2 | 3 | 4 | 5 | 8 | 9) =>
                          let val (base,scale) = 
                                  case D of
                                    1 => (NONE, 0)
                                  | 2 => (NONE, 1)
                                  | 3 => (SOME C.eax, 1)
                                  | 4 => (NONE, 2)
                                  | 5 => (SOME C.eax, 2)
                                  | 8 => (NONE, 3)
                                  | 9 => (SOME C.eax, 3)
                              val addr = I.Indexed{base=base,
                                                   index=C.eax,
                                                   scale=scale,
                                                   disp=I.Immed C2,
                                                   mem=readonly}
                              val tmpR = newReg()
                              val tmp  = I.Direct tmpR
                          in  emit(I.SET{cond=cond cc, opnd=eax}); 
                              mark(I.LEA{r32=tmpR, addr=addr}, an);
                              move(tmp, rdOpnd)
                          end
                        | D =>
                           (emit(I.SET{cond=cond(T.Basis.negateCond cc), 
                                       opnd=eax}); 
                            emit(I.UNARY{unOp=I.DECL, opnd=eax});
                            emit(I.BINARY{binOp=I.ANDL,
                                          src=I.Immed D, dst=eax});
                            if C2 = 0 then 
                               move(eax, rdOpnd)
                            else
                               let val tmpR = newReg()
                                   val tmp  = I.Direct tmpR
                               in  mark(I.LEA{addr=
                                         I.Displace{
                                             base=C.eax,
                                             disp=I.Immed C2,
                                             mem=readonly},
                                             r32=tmpR}, an);
                                    move(tmp, rdOpnd)
                                end
                           )
                    end
              end (* setcc *)
    
                  (* Generate cmovcc instruction.
                   * on Pentium Pro and Pentium II only
                   *)
              fun cmovcc(ty, cc, t1, t2, yes, no) = 
              let fun genCmov(dstR, _) = 
                  let val _ = doExpr(no, dstR, []) (* false branch *)
                      val cc = cmp(true, ty, cc, t1, t2, [])  (* compare *)
                  in  mark(I.CMOV{cond=cond cc, src=operand yes, dst=dstR}, an) 
                  end
              in  dstMustBeReg genCmov
              end
        
              fun unknownExp exp = doExpr(Gen.compileRexp exp, rd, an) 

                  (* Add n to rd *)
              fun addN n =
              let val n = operand n
                  val src = if isMemReg rd then immedOrReg n else n
              in  mark(I.BINARY{binOp=I.ADDL, src=src, dst=rdOpnd}, an) end

                  (* Generate addition *)
              fun addition(e1, e2) =
                  case e1 of
                    T.REG(_,rs) => if CB.sameColor(rs,rd) then addN e2 
                                   else addition1(e1,e2)
                  | _ => addition1(e1,e2)
              and addition1(e1, e2) =
                  case e2 of
                    T.REG(_,rs) => if CB.sameColor(rs,rd) then addN e1 
                                   else addition2(e1,e2)
                  | _ => addition2(e1,e2) 
              and addition2(e1,e2) =     
                (dstMustBeReg(fn (dstR, _) => 
                    mark(I.LEA{r32=dstR, addr=address(exp, readonly)}, an))
                handle EA => binaryComm(I.ADDL, e1, e2))


          in  case exp of
               T.REG(_,rs) => 
                   if isMemReg rs andalso isMemReg rd then 
                      let val tmp = I.Direct(newReg())
                      in  move'(I.MemReg rs, tmp, an);
                          move'(tmp, rdOpnd, [])
                      end
                   else move'(IntReg rs, rdOpnd, an)
	     | T.LI z => let
		 val n = toInt32 z
	       in 
		 if n=0 then 
		   (* As per Fermin's request, special optimization for rd := 0. 
		    * Currently we don't bother with the size.
		    *)
		   if isMemReg rd then move'(I.Immed 0, rdOpnd, an)
		   else mark(I.BINARY{binOp=I.XORL, src=rdOpnd, dst=rdOpnd}, an)
		 else
		   move'(I.Immed(n), rdOpnd, an)
	       end
             | (T.CONST _ | T.LABEL _) => 
                 move'(I.ImmedLabel exp, rdOpnd, an)
             | T.LABEXP le => move'(I.ImmedLabel le, rdOpnd, an)

               (* 32-bit addition *)
	     | T.ADD(32, e1, e2 as T.LI n) => let
	         val n = toInt32 n
               in 
		 case n 
		 of 1  => unary(I.INCL, e1)
	          | ~1 => unary(I.DECL, e1)
		  | _ => addition(e1, e2)
	       end
	     | T.ADD(32, e1 as T.LI n, e2) => let
	         val n = toInt32 n
	       in
		 case n 
		 of  1 => unary(I.INCL, e2)
	          | ~1 => unary(I.DECL, e2)
		  | _ => addition(e1, e2)
	       end
             | T.ADD(32, e1, e2) => addition(e1, e2)

               (* 32-bit addition but set the flag!
                * This is a stupid hack for now.  
                *)
	     | T.ADD(0, e, e1 as T.LI n) => let
	         val n = T.I.toInt(32, n)
               in
		 if n=1 then unary(I.INCL, e)
		 else if n = ~1 then unary(I.DECL, e)
 		      else binaryComm(I.ADDL, e, e1)
               end
	     | T.ADD(0, e1 as T.LI n, e) => let
	         val n = T.I.toInt(32, n)
	       in
		 if n=1 then unary(I.INCL, e)
		 else if n = ~1 then unary(I.DECL, e)
		      else binaryComm(I.ADDL, e1, e)
               end
	     | T.ADD(0, e1, e2) => binaryComm(I.ADDL, e1, e2)
	         
               (* 32-bit subtraction *)
	     | T.SUB(32, e1, e2 as T.LI n) => let
	         val n = toInt32 n
	       in
		 case n
		 of 0 => doExpr(e1, rd, an)
	          | 1 => unary(I.DECL, e1)
		  | ~1 => unary(I.INCL, e1)
		  | _ => binary(I.SUBL, e1, e2)
               end
	     | T.SUB(32, e1 as T.LI n, e2) => 
	         if T.I.isZero n then unary(I.NEGL, e2)
		 else binary(I.SUBL, e1, e2)
             | T.SUB(32, e1, e2) => binary(I.SUBL, e1, e2)

             | T.MULU(32, x, y) => uMultiply(x, y)
             | T.DIVU(32, x, y) => divide(false, false, x, y)
             | T.REMU(32, x, y) => rem(false, false, x, y)

             | T.MULS(32, x, y) => multiply(x, y)
             | T.DIVS(32, x, y) => divide(true, false, x, y)
             | T.REMS(32, x, y) => rem(true, false, x, y)

             | T.ADDT(32, x, y) => (binaryComm(I.ADDL, x, y); trap())
             | T.SUBT(32, x, y) => (binary(I.SUBL, x, y); trap())
             | T.MULT(32, x, y) => (multiply(x, y); trap())
             | T.DIVT(32, x, y) => divide(true, true, x, y)
             | T.REMT(32, x, y) => rem(true, true, x, y)

             | T.ANDB(32, x, y) => binaryComm(I.ANDL, x, y)
             | T.ORB(32, x, y)  => binaryComm(I.ORL, x, y)
             | T.XORB(32, x, y) => binaryComm(I.XORL, x, y)
             | T.NOTB(32, x)    => unary(I.NOTL, x)

             | T.SRA(32, x, y)  => shift(I.SARL, x, y)
             | T.SRL(32, x, y)  => shift(I.SHRL, x, y)
             | T.SLL(32, x, y)  => shift(I.SHLL, x, y)

             | T.LOAD(8, ea, mem) => load8(ea, mem)
             | T.LOAD(16, ea, mem) => load16(ea, mem)
             | T.LOAD(32, ea, mem) => load32(ea, mem)

             | T.SX(32,8,T.LOAD(8,ea,mem)) => load8s(ea, mem)
             | T.SX(32,16,T.LOAD(16,ea,mem)) => load16s(ea, mem)
             | T.ZX(32,8,T.LOAD(8,ea,mem)) => load8(ea, mem)
             | T.ZX(32,16,T.LOAD(16,ea,mem)) => load16(ea, mem)

             | T.COND(32, T.CMP(ty, cc, t1, t2), T.LI yes, T.LI no) => 
                 setcc(ty, cc, t1, t2, toInt32 yes, toInt32 no)
             | T.COND(32, T.CMP(ty, cc, t1, t2), yes, no) => 
                (case !arch of (* PentiumPro and higher has CMOVcc *)
                   Pentium => unknownExp exp
                 | _ => cmovcc(ty, cc, t1, t2, yes, no)
                )
             | T.LET(s,e) => (doStmt s; doExpr(e, rd, an))
             | T.MARK(e, A.MARKREG f) => (f rd; doExpr(e, rd, an))
             | T.MARK(e, a) => doExpr(e, rd, a::an)
             | T.PRED(e,c) => doExpr(e, rd, A.CTRLUSE c::an)
             | T.REXT e => 
                 ExtensionComp.compileRext (reducer()) {e=e, rd=rd, an=an} 
               (* simplify and try again *)
             | exp => unknownExp exp
          end (* doExpr *)

          (* generate an expression and return its result register 
           * If rewritePseudo is on, the result is guaranteed to be in a 
           * non memReg register
           *)
      and expr(exp as T.REG(_, rd)) = 
          if isMemReg rd then genExpr exp else rd
        | expr exp = genExpr exp

      and genExpr exp = 
          let val rd = newReg() in doExpr(exp, rd, []); rd end

         (* Compare an expression with zero.
          * On the x86, TEST is superior to AND for doing the same thing,
          * since it doesn't need to write out the result in a register.
          *)
     and cmpWithZero(cc as (T.EQ | T.NE), e as T.ANDB(ty, a, b), an) = 
            (case ty of
               8  => test(I.TESTB, a, b, an)
             | 16 => test(I.TESTW, a, b, an)
             | 32 => test(I.TESTL, a, b, an)
             | _  => doExpr(e, newReg(), an); 
             cc)  
        | cmpWithZero(cc, e, an) = 
          let val e = 
                case e of (* hack to disable the lea optimization XXX *)
                  T.ADD(_, a, b) => T.ADD(0, a, b)
                | e => e
          in  doExpr(e, newReg(), an); cc end

          (* Emit a test.
           *   The available modes are
           *      r/m, r
           *      r/m, imm
           * On selecting the right instruction: TESTL/TESTW/TESTB.   
           * When anding an operand with a constant
           * that fits within 8 (or 16) bits, it is possible to use TESTB,
           * (or TESTW) instead of TESTL.   Because x86 is little endian, 
           * this works for memory operands too.  However, with TESTB, it is
           * not possible to use registers other than 
           * AL, CL, BL, DL, and AH, CH, BH, DH.  So, the best way is to
           * perform register allocation first, and if the operand registers
           * are one of EAX, ECX, EBX, or EDX, replace the TESTL instruction 
           * by TESTB.
           *)
      and test(testopcode, a, b, an) = 
          let val (_, opnd1, opnd2) = commuteComparison(T.EQ, true, a, b)
              (* translate r, r/m => r/m, r *)
              val (opnd1, opnd2) = 
                   if isMemOpnd opnd2 then (opnd2, opnd1) else (opnd1, opnd2)
          in  mark(testopcode{lsrc=opnd1, rsrc=opnd2}, an)
          end

          (* %eflags <- src *)
      and moveToEflags src =
          if CB.sameColor(src, C.eflags) then ()
          else (move(I.Direct src, eax); emit(I.LAHF))

          (* dst <- %eflags *) 
      and moveFromEflags dst =
          if CB.sameColor(dst, C.eflags) then ()
          else (emit(I.SAHF); move(eax, I.Direct dst))

         (* generate a condition code expression 
          * The zero is for setting the condition code!  
          * I have no idea why this is used.
          *)
      and doCCexpr(T.CMP(ty, cc, t1, t2), rd, an) = 
          (cmp(false, ty, cc, t1, t2, an); 
           moveFromEflags rd
          ) 
        | doCCexpr(T.CC(cond,rs), rd, an) = 
          if CB.sameColor(rs,C.eflags) orelse CB.sameColor(rd,C.eflags) then
             (moveToEflags rs; moveFromEflags rd)
          else
             move'(I.Direct rs, I.Direct rd, an)
        | doCCexpr(T.CCMARK(e,A.MARKREG f),rd,an) = (f rd; doCCexpr(e,rd,an))
        | doCCexpr(T.CCMARK(e,a), rd, an) = doCCexpr(e,rd,a::an)
        | doCCexpr(T.CCEXT e, cd, an) = 
           ExtensionComp.compileCCext (reducer()) {e=e, ccd=cd, an=an} 
        | doCCexpr _ = error "doCCexpr"

     and ccExpr e = error "ccExpr"

          (* generate a comparison and sets the condition code;
           * return the actual cc used.  If the flag swapable is true,
           * we can also reorder the operands. 
           *)
      and cmp(swapable, ty, cc, t1, t2, an) = 
               (* == and <> can be always be reordered *)
          let val swapable = swapable orelse cc = T.EQ orelse cc = T.NE
          in (* Sometimes the comparison is not necessary because
              * the bits are already set! 
              *)
             if isZero t1 andalso setZeroBit2 t2 then 
                 if swapable then
                    cmpWithZero(T.Basis.swapCond cc, t2, an)
                 else (* can't reorder the comparison! *)
                    genCmp(ty, false, cc, t1, t2, an)
             else if isZero t2 andalso setZeroBit2 t1 then 
                cmpWithZero(cc, t1, an)
             else genCmp(ty, swapable, cc, t1, t2, an) 
          end

          (* Give a and b which are the operands to a comparison (or test)
           * Return the appropriate condition code and operands.
           *   The available modes are:
           *        r/m, imm
           *        r/m, r
           *        r,   r/m
           *)
      and commuteComparison(cc, swapable, a, b) = 
          let val (opnd1, opnd2) = (operand a, operand b)
          in  (* Try to fold in the operands whenever possible *)
              case (isImmediate opnd1, isImmediate opnd2) of
                (true, true) => (cc, moveToReg opnd1, opnd2)
              | (true, false) => 
                   if swapable then (T.Basis.swapCond cc, opnd2, opnd1)
                   else (cc, moveToReg opnd1, opnd2)
              | (false, true) => (cc, opnd1, opnd2)
              | (false, false) => 
                 (case (opnd1, opnd2) of
                    (_, I.Direct _) => (cc, opnd1, opnd2)
                  | (I.Direct _, _) => (cc, opnd1, opnd2)
                  | (_, _)          => (cc, moveToReg opnd1, opnd2)
                 )
          end 
 
          (* generate a real comparison; return the real cc used *)
      and genCmp(ty, swapable, cc, a, b, an) = 
          let val (cc, opnd1, opnd2) = commuteComparison(cc, swapable, a, b)
          in  mark(I.CMPL{lsrc=opnd1, rsrc=opnd2}, an); cc 
          end

          (* generate code for jumps *)
      and jmp(lexp as T.LABEL lab, labs, an) = 
             mark(I.JMP(I.ImmedLabel lexp, [lab]), an)
        | jmp(T.LABEXP le, labs, an) = mark(I.JMP(I.ImmedLabel le, labs), an)
        | jmp(ea, labs, an)          = mark(I.JMP(operand ea, labs), an)

       (* convert mlrisc to cellset:
        *)
       and cellset mlrisc =
           let val addCCReg = CB.CellSet.add 
               fun g([],acc) = acc
                 | g(T.GPR(T.REG(_,r))::regs,acc)  = g(regs,C.addReg(r,acc))
                 | g(T.FPR(T.FREG(_,f))::regs,acc) = g(regs,C.addFreg(f,acc))
                 | g(T.CCR(T.CC(_,cc))::regs,acc)  = g(regs,addCCReg(cc,acc))
                 | g(T.CCR(T.FCC(_,cc))::regs,acc)  = g(regs,addCCReg(cc,acc))
                 | g(_::regs, acc) = g(regs, acc)
           in  g(mlrisc, C.empty) end

          (* generate code for calls *)
      and call(ea, flow, def, use, mem, cutsTo, an, pops) = 
      let fun return(set, []) = set
            | return(set, a::an) =
              case #peek A.RETURN_ARG a of 
                SOME r => return(CB.CellSet.add(r, set), an)
              | NONE => return(set, an)
      in
	  mark(I.CALL{opnd=operand ea,defs=cellset(def),uses=cellset(use),
                      return=return(C.empty,an),cutsTo=cutsTo,mem=mem,
		      pops=pops},an)
      end

          (* generate code for integer stores; first move data to %eax 
           * This is mainly because we can't allocate to registers like
           * ah, dl, dx etc.
           *)
      and genStore(mvOp, ea, d, mem, an) =
          let val src = 
                 case immedOrReg(operand d) of
                     src as I.Direct r =>
                       if CB.sameColor(r,C.eax) 
                       then src else (move(src, eax); eax)
                   | src => src
          in  mark(I.MOVE{mvOp=mvOp, src=src, dst=address(ea,mem)},an)
          end
 
          (* generate code for 8-bit integer stores *)
          (* movb has to use %eax as source. Stupid x86! *)
      and store8(ea, d, mem, an) = genStore(I.MOVB, ea, d, mem, an)
      and store16(ea, d, mem, an) = 
	mark(I.MOVE{mvOp=I.MOVW, src=immedOrReg(operand d), dst=address(ea, mem)}, an)
      and store32(ea, d, mem, an) = 
            move'(immedOrReg(operand d), address(ea, mem), an)

          (* generate code for branching *)
      and branch(T.CMP(ty, cc, t1, t2), lab, an) =
           (* allow reordering of operands *)
           let val cc = cmp(true, ty, cc, t1, t2, []) 
           in  mark(I.JCC{cond=cond cc, opnd=immedLabel lab}, an) end
        | branch(T.FCMP(fty, fcc, t1, t2), lab, an) = 
           fbranch(fty, fcc, t1, t2, lab, an)
        | branch(ccexp, lab, an) =
           (doCCexpr(ccexp, C.eflags, []);
            mark(I.JCC{cond=cond(Gen.condOf ccexp), opnd=immedLabel lab}, an)
           )

          (* generate code for floating point compare and branch *)
      and fbranch(fty, fcc, t1, t2, lab, an) = 
          let fun ignoreOrder (T.FREG _) = true
                | ignoreOrder (T.FLOAD _) = true
                | ignoreOrder (T.FMARK(e,_)) = ignoreOrder e
                | ignoreOrder _ = false

              fun compare'() = (* Sethi-Ullman style *)
                  (if ignoreOrder t1 orelse ignoreOrder t2 then 
                        (reduceFexp(fty, t2, []); reduceFexp(fty, t1, []))
                   else (reduceFexp(fty, t1, []); reduceFexp(fty, t2, []); 
                         emit(I.FXCH{opnd=C.ST(1)}));
                   emit(I.FUCOMPP);
                   fcc
                  )

              fun compare''() = 
                      (* direct style *)
                      (* Try to make lsrc the memory operand *)
                  let val lsrc = foperand(fty, t1)
                      val rsrc = foperand(fty, t2)
                      val fsize = fsize fty
                      fun cmp(lsrc, rsrc, fcc) =
                          (emit(I.FCMP{fsize=fsize,lsrc=lsrc,rsrc=rsrc}); fcc)
                  in  case (lsrc, rsrc) of
                         (I.FPR _, I.FPR _) => cmp(lsrc, rsrc, fcc)
                       | (I.FPR _, mem) => cmp(mem,lsrc,T.Basis.swapFcond fcc)
                       | (mem, I.FPR _) => cmp(lsrc, rsrc, fcc)
                       | (lsrc, rsrc) => (* can't be both memory! *)
                         let val ftmpR = newFreg()
                             val ftmp  = I.FPR ftmpR
                         in  emit(I.FMOVE{fsize=fsize,src=rsrc,dst=ftmp});
                             cmp(lsrc, ftmp, fcc)
                         end
                  end

              fun compare() = 
                  if enableFastFPMode andalso !fast_floating_point 
                  then compare''() else compare'()

              fun andil i = emit(I.BINARY{binOp=I.ANDL,src=I.Immed(i),dst=eax})
              fun testil i = emit(I.TESTL{lsrc=eax,rsrc=I.Immed(i)})
              fun xoril i = emit(I.BINARY{binOp=I.XORL,src=I.Immed(i),dst=eax})
              fun cmpil i = emit(I.CMPL{rsrc=I.Immed(i), lsrc=eax})
              fun j(cc, lab) = mark(I.JCC{cond=cc, opnd=immedLabel lab},an)
              fun sahf() = emit(I.SAHF)
              fun branch(fcc) =
                  case fcc
                  of T.==   => (andil 0x4400; xoril 0x4000; j(I.EQ, lab))
                   | T.?<>  => (andil 0x4400; xoril 0x4000; j(I.NE, lab))
                   | T.?    => (sahf(); j(I.P,lab))
                   | T.<=>  => (sahf(); j(I.NP,lab))
                   | T.>    => (testil 0x4500;  j(I.EQ,lab))
                   | T.?<=  => (testil 0x4500;  j(I.NE,lab))
                   | T.>=   => (testil 0x500; j(I.EQ,lab))
                   | T.?<   => (testil 0x500; j(I.NE,lab))
                   | T.<    => (andil 0x4500; cmpil 0x100; j(I.EQ,lab))
                   | T.?>=  => (andil 0x4500; cmpil 0x100; j(I.NE,lab))
                   | T.<=   => (andil 0x4100; cmpil 0x100; j(I.EQ,lab);
                                cmpil 0x4000; j(I.EQ,lab))
                   | T.?>   => (sahf(); j(I.P,lab); testil 0x4100; j(I.EQ,lab))
                   | T.<>   => (testil 0x4400; j(I.EQ,lab))
                   | T.?=   => (testil 0x4400; j(I.NE,lab))
                   | _      => error "fbranch"
                 (*esac*)
              val fcc = compare() 
          in  emit I.FNSTSW;   
              branch(fcc)
          end

      (*========================================================
       * Floating point code generation starts here.
       * Some generic fp routines first.
       *========================================================*)

       (* Can this tree be folded into the src operand of a floating point
        * operations?
        *)
      and foldableFexp(T.FREG _) = true
        | foldableFexp(T.FLOAD _) = true
        | foldableFexp(T.CVTI2F(_, (16 | 32), _)) = true
        | foldableFexp(T.CVTF2F(_, _, t)) = foldableFexp t
        | foldableFexp(T.FMARK(t, _)) = foldableFexp t
        | foldableFexp _ = false

        (* Move integer e of size ty into a memory location.
         * Returns a quadruple: 
         *  (INTEGER,return ty,effect address of memory location,cleanup code) 
         *) 
      and convertIntToFloat(ty, e) = 
          let val opnd = operand e 
          in  if isMemOpnd opnd andalso (ty = 16 orelse ty = 32)
              then (INTEGER, ty, opnd, [])
              else 
                let val {instrs, tempMem, cleanup} = 
                        cvti2f{ty=ty, src=opnd, an=getAnnotations()}
                in  emits instrs;
                    (INTEGER, 32, tempMem, cleanup)
                end
          end
 
      (*========================================================
       * Sethi-Ullman based floating point code generation as 
       * implemented by Lal 
       *========================================================*)

      and fld(32, opnd) = I.FLDS opnd
        | fld(64, opnd) = I.FLDL opnd
        | fld(80, opnd) = I.FLDT opnd
        | fld _         = error "fld"

      and fild(16, opnd) = I.FILD opnd
        | fild(32, opnd) = I.FILDL opnd
        | fild(64, opnd) = I.FILDLL opnd
        | fild _         = error "fild"

      and fxld(INTEGER, ty, opnd) = fild(ty, opnd)
        | fxld(REAL, fty, opnd) = fld(fty, opnd)

      and fstp(32, opnd) = I.FSTPS opnd
        | fstp(64, opnd) = I.FSTPL opnd
        | fstp(80, opnd) = I.FSTPT opnd
        | fstp _         = error "fstp"

          (* generate code for floating point stores *)
      and fstore'(fty, ea, d, mem, an) = 
          (case d of
             T.FREG(fty, fs) => emit(fld(fty, I.FDirect fs))
           | _ => reduceFexp(fty, d, []);
           mark(fstp(fty, address(ea, mem)), an)
          )

          (* generate code for floating point loads *)
      and fload'(fty, ea, mem, fd, an) = 
            let val ea = address(ea, mem)
            in  mark(fld(fty, ea), an); 
                if CB.sameColor(fd,ST0) then () 
                else emit(fstp(fty, I.FDirect fd))
            end

      and fexpr' e = (reduceFexp(64, e, []); C.ST(0))
          
          (* generate floating point expression and put the result in fd *)
      and doFexpr'(fty, T.FREG(_, fs), fd, an) = 
            (if CB.sameColor(fs,fd) then () 
             else mark(I.FCOPY{dst=[fd], src=[fs], tmp=NONE}, an)
            )
        | doFexpr'(_, T.FLOAD(fty, ea, mem), fd, an) = 
            fload'(fty, ea, mem, fd, an)
        | doFexpr'(fty, T.FEXT fexp, fd, an) = 
            (ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an};
             if CB.sameColor(fd,ST0) then () else emit(fstp(fty, I.FDirect fd))
            )
        | doFexpr'(fty, e, fd, an) =
            (reduceFexp(fty, e, []);
             if CB.sameColor(fd,ST0) then ()
             else mark(fstp(fty, I.FDirect fd), an)
            )

          (* 
           * Generate floating point expression using Sethi-Ullman's scheme:
           * This function evaluates a floating point expression, 
           * and put result in %ST(0).
           *)
      and reduceFexp(fty, fexp, an)  = 
          let val ST = I.ST(C.ST 0)
              val ST1 = I.ST(C.ST 1)
              val cleanupCode = ref [] : I.instruction list ref

              datatype su_tree = 
                LEAF of int * T.fexp * ans
              | BINARY of int * T.fty * fbinop * su_tree * su_tree * ans
              | UNARY of int * T.fty * I.funOp * su_tree * ans
              and fbinop = FADD | FSUB | FMUL | FDIV
                         | FIADD | FISUB | FIMUL | FIDIV
              withtype ans = Annotations.annotations
 
              fun label(LEAF(n, _, _)) = n
                | label(BINARY(n, _, _, _, _, _)) = n
                | label(UNARY(n, _, _, _, _)) = n

              fun annotate(LEAF(n, x, an), a)  = LEAF(n,x,a::an)
                | annotate(BINARY(n,t,b,x,y,an), a) = BINARY(n,t,b,x,y,a::an)
                | annotate(UNARY(n,t,u,x,an), a) = UNARY(n,t,u,x,a::an)

              (* Generate expression tree with sethi-ullman numbers *)
              fun su(e as T.FREG _)       = LEAF(1, e, [])
                | su(e as T.FLOAD _)      = LEAF(1, e, [])
                | su(e as T.CVTI2F _)     = LEAF(1, e, [])
                | su(T.CVTF2F(_, _, t))   = su t
                | su(T.FMARK(t, a))       = annotate(su t, a)
                | su(T.FABS(fty, t))      = suUnary(fty, I.FABS, t)
                | su(T.FNEG(fty, t))      = suUnary(fty, I.FCHS, t)
                | su(T.FSQRT(fty, t))     = suUnary(fty, I.FSQRT, t)
                | su(T.FADD(fty, t1, t2)) = suComBinary(fty,FADD,FIADD,t1,t2)
                | su(T.FMUL(fty, t1, t2)) = suComBinary(fty,FMUL,FIMUL,t1,t2)
                | su(T.FSUB(fty, t1, t2)) = suBinary(fty,FSUB,FISUB,t1,t2)
                | su(T.FDIV(fty, t1, t2)) = suBinary(fty,FDIV,FIDIV,t1,t2)
                | su _ = error "su"
         
              (* Try to fold the the memory operand or integer conversion *) 
              and suFold(e as T.FREG _) = (LEAF(0, e, []), false)
                | suFold(e as T.FLOAD _) = (LEAF(0, e, []), false)
                | suFold(e as T.CVTI2F(_,(16 | 32),_)) = (LEAF(0, e, []), true)
                | suFold(T.CVTF2F(_, _, t)) = suFold t
                | suFold(T.FMARK(t, a)) = 
                  let val (t, integer) = suFold t 
                  in  (annotate(t, a), integer) end
                | suFold e = (su e, false)

              (* Form unary tree *)
              and suUnary(fty, funary, t) = 
                  let val t = su t
                  in  UNARY(label t, fty, funary, t, [])
                  end

              (* Form binary tree *)
              and suBinary(fty, binop, ibinop, t1, t2) =
                  let val t1 = su t1
                      val (t2, integer) = suFold t2
                      val n1 = label t1
                      val n2 = label t2
                      val n  = if n1=n2 then n1+1 else Int.max(n1,n2)
                      val myOp = if integer then ibinop else binop
                  in  BINARY(n, fty, myOp, t1, t2, []) 
                  end

              (* Try to fold in the operand if possible. 
               * This only applies to commutative operations.
               *)
              and suComBinary(fty, binop, ibinop, t1, t2) =
                  let val (t1, t2) = if foldableFexp t2 
                                     then (t1, t2) else (t2, t1)
                  in  suBinary(fty, binop, ibinop, t1, t2) end

              and sameTree(LEAF(_, T.FREG(t1,f1), []), 
                           LEAF(_, T.FREG(t2,f2), [])) = 
                        t1 = t2 andalso CB.sameColor(f1,f2)
                | sameTree _ = false

              (* Traverse tree and generate code *)
              fun gencode(LEAF(_, t, an)) = mark(fxld(leafEA t), an)
                | gencode(BINARY(_, _, binop, x, t2 as LEAF(0, y, a1), a2)) = 
                  let val _          = gencode x
                      val (_, fty, src) = leafEA y
                      fun gen(code) = mark(code, a1 @ a2)
                      fun binary(oper32, oper64) =
                          if sameTree(x, t2) then 
                             gen(I.FBINARY{binOp=oper64, src=ST, dst=ST})
                          else
                             let val oper = 
                                   if isMemOpnd src then
                                      case fty of
                                        32 => oper32
                                      | 64 => oper64
                                      | _  => error "gencode: BINARY"
                                   else oper64
                             in gen(I.FBINARY{binOp=oper, src=src, dst=ST}) end
                      fun ibinary(oper16, oper32) =
                          let val oper = case fty of
                                           16 => oper16 
                                         | 32 => oper32 
                                         | _  => error "gencode: IBINARY"
                          in  gen(I.FIBINARY{binOp=oper, src=src}) end
                  in  case binop of
                        FADD => binary(I.FADDS, I.FADDL) 
                      | FSUB => binary(I.FDIVS, I.FSUBL) 
                      | FMUL => binary(I.FMULS, I.FMULL) 
                      | FDIV => binary(I.FDIVS, I.FDIVL) 
                      | FIADD => ibinary(I.FIADDS, I.FIADDL) 
                      | FISUB => ibinary(I.FIDIVS, I.FISUBL) 
                      | FIMUL => ibinary(I.FIMULS, I.FIMULL) 
                      | FIDIV => ibinary(I.FIDIVS, I.FIDIVL) 
                  end  
                | gencode(BINARY(_, fty, binop, t1, t2, an)) = 
                  let fun doit(t1, t2, oper, operP, operRP) = 
                      let (* oper[P] =>  ST(1) := ST oper ST(1); [pop] 
                           * operR[P] => ST(1) := ST(1) oper ST; [pop]
                           *)
                           val n1 = label t1
                           val n2 = label t2
                      in if n1 < n2 andalso n1 <= 7 then 
                           (gencode t2;
                            gencode t1;
                            mark(I.FBINARY{binOp=operP, src=ST, dst=ST1}, an))
                         else if n2 <= n1 andalso n2 <= 7 then
                           (gencode t1;
                            gencode t2;
                            mark(I.FBINARY{binOp=operRP, src=ST, dst=ST1}, an))
                         else 
                         let (* both labels > 7 *)
                             val fs = I.FDirect(newFreg())
                         in  gencode t2;
                             emit(fstp(fty, fs));
                             gencode t1;
                             mark(I.FBINARY{binOp=oper, src=fs, dst=ST}, an)
                         end
                     end
                  in case binop of 
                       FADD => doit(t1,t2,I.FADDL,I.FADDP,I.FADDP)
                     | FMUL => doit(t1,t2,I.FMULL,I.FMULP,I.FMULP)
                     | FSUB => doit(t1,t2,I.FSUBL,I.FSUBP,I.FSUBRP)
                     | FDIV => doit(t1,t2,I.FDIVL,I.FDIVP,I.FDIVRP)
                     | _ => error "gencode.BINARY"
                  end
                | gencode(UNARY(_, _, unaryOp, su, an)) = 
                   (gencode(su); mark(I.FUNARY(unaryOp),an))

              (* Generate code for a leaf.
               * Returns the type and an effective address
               *) 
              and leafEA(T.FREG(fty, f)) = (REAL, fty, I.FDirect f)
                | leafEA(T.FLOAD(fty, ea, mem)) = (REAL, fty, address(ea, mem))
                | leafEA(T.CVTI2F(_, 32, t)) = int2real(32, t)
                | leafEA(T.CVTI2F(_, 16, t)) = int2real(16, t)
                | leafEA(T.CVTI2F(_, 8, t))  = int2real(8, t)
                | leafEA _ = error "leafEA"

              and int2real(ty, e) = 
                  let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e)
                  in  cleanupCode := !cleanupCode @ cleanup;
                      (INTEGER, ty, ea)
                  end

         in  gencode(su fexp);
             emits(!cleanupCode)
          end (*reduceFexp*)

       (*========================================================
        * This section generates 3-address style floating 
        * point code.  
        *========================================================*)

      and isize 16 = I.I16
        | isize 32 = I.I32
        | isize _  = error "isize"

      and fstore''(fty, ea, d, mem, an) = 
          (floatingPointUsed := true;
           mark(I.FMOVE{fsize=fsize fty, dst=address(ea,mem), 
                        src=foperand(fty, d)},
                an)
          )

      and fload''(fty, ea, mem, d, an) = 
          (floatingPointUsed := true;
           mark(I.FMOVE{fsize=fsize fty, src=address(ea,mem), 
                        dst=RealReg d}, an)
          )

      and fiload''(ity, ea, d, an) = 
          (floatingPointUsed := true;
           mark(I.FILOAD{isize=isize ity, ea=ea, dst=RealReg d}, an)
          )

      and fexpr''(e as T.FREG(_,f)) = 
          if isFMemReg f then transFexpr e else f
        | fexpr'' e = transFexpr e

      and transFexpr e = 
          let val fd = newFreg() in doFexpr''(64, e, fd, []); fd end

         (* 
          * Process a floating point operand.  Put operand in register 
          * when possible.  The operand should match the given fty.
          *)
      and foperand(fty, e as T.FREG(fty', f)) = 
             if fty = fty' then RealReg f else I.FPR(fexpr'' e)
        | foperand(fty, T.CVTF2F(_, _, e)) =
             foperand(fty, e) (* nop on the x86 *)
        | foperand(fty, e as T.FLOAD(fty', ea, mem)) = 
             (* fold operand when the precison matches *)
             if fty = fty' then address(ea, mem) else I.FPR(fexpr'' e)
        | foperand(fty, e) = I.FPR(fexpr'' e)

         (* 
          * Process a floating point operand. 
          * Try to fold in a memory operand or conversion from an integer.
          *)
      and fioperand(T.FREG(fty,f)) = (REAL, fty, RealReg f, [])
        | fioperand(T.FLOAD(fty, ea, mem)) = 
             (REAL, fty, address(ea, mem), [])
        | fioperand(T.CVTF2F(_, _, e)) = fioperand(e) (* nop on the x86 *)
        | fioperand(T.CVTI2F(_, ty, e)) = convertIntToFloat(ty, e)
        | fioperand(T.FMARK(e,an)) = fioperand(e) (* XXX *)
        | fioperand(e) = (REAL, 64, I.FPR(fexpr'' e), [])

          (* Generate binary operator.  Since the real binary operators
           * does not take memory as destination, we also ensure this 
           * does not happen.  
           *)
      and fbinop(targetFty, 
                 binOp, binOpR, ibinOp, ibinOpR, lsrc, rsrc, fd, an) = 
              (* Put the mem operand in rsrc *)
          let val _ = floatingPointUsed := true;
              fun isMemOpnd(T.FREG(_, f)) = isFMemReg f
                | isMemOpnd(T.FLOAD _) = true
                | isMemOpnd(T.CVTI2F(_, (16 | 32), _)) = true
                | isMemOpnd(T.CVTF2F(_, _, t)) = isMemOpnd t
                | isMemOpnd(T.FMARK(t, _)) = isMemOpnd t
                | isMemOpnd _ = false
              val (binOp, ibinOp, lsrc, rsrc) = 
                  if isMemOpnd lsrc then (binOpR, ibinOpR, rsrc, lsrc)
                  else (binOp, ibinOp, lsrc, rsrc)
              val lsrc = foperand(targetFty, lsrc)
              val (kind, fty, rsrc, code) = fioperand(rsrc)
              fun dstMustBeFreg f =
                  if targetFty <> 64 then
                  let val tmpR = newFreg() 
                      val tmp  = I.FPR tmpR
                  in  mark(f tmp, an); 
                      emit(I.FMOVE{fsize=fsize targetFty, 
                                   src=tmp, dst=RealReg fd})
                  end
                  else mark(f(RealReg fd), an)
          in  case kind of
                REAL => 
                  dstMustBeFreg(fn dst => 
                                   I.FBINOP{fsize=fsize fty, binOp=binOp, 
                                            lsrc=lsrc, rsrc=rsrc, dst=dst}) 
              | INTEGER => 
                  (dstMustBeFreg(fn dst =>
                                    I.FIBINOP{isize=isize fty, binOp=ibinOp, 
                                              lsrc=lsrc, rsrc=rsrc, dst=dst});
                   emits code
                  )
          end
 
      and funop(fty, unOp, src, fd, an) = 
          let val src = foperand(fty, src)
          in  mark(I.FUNOP{fsize=fsize fty,
                           unOp=unOp, src=src, dst=RealReg fd},an)
          end

      and doFexpr''(fty, e, fd, an) = 
          case e of
            T.FREG(_,fs) => if CB.sameColor(fs,fd) then () 
                            else fcopy''(fty, [fd], [fs], an)
            (* Stupid x86 does everything as 80-bits internally. *)

            (* Binary operators *)
          | T.FADD(_, a, b) => fbinop(fty, 
                                      I.FADDL, I.FADDL, I.FIADDL, I.FIADDL, 
                                      a, b, fd, an)
          | T.FSUB(_, a, b) => fbinop(fty,
                                      I.FSUBL, I.FSUBRL, I.FISUBL, I.FISUBRL,
                                      a, b, fd, an)
          | T.FMUL(_, a, b) => fbinop(fty,
                                      I.FMULL, I.FMULL, I.FIMULL, I.FIMULL,
                                      a, b, fd, an)
          | T.FDIV(_, a, b) => fbinop(fty,
                                      I.FDIVL, I.FDIVRL, I.FIDIVL, I.FIDIVRL,
                                      a, b, fd, an)

            (* Unary operators *)
          | T.FNEG(_, a) => funop(fty, I.FCHS, a, fd, an)
          | T.FABS(_, a) => funop(fty, I.FABS, a, fd, an)
          | T.FSQRT(_, a) => funop(fty, I.FSQRT, a, fd, an)

            (* Load *)
          | T.FLOAD(fty,ea,mem) => fload''(fty, ea, mem, fd, an)

            (* Type conversions *)
          | T.CVTF2F(_, _, e) => doFexpr''(fty, e, fd, an)
          | T.CVTI2F(_, ty, e) => 
            let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e)
            in  fiload''(ty, ea, fd, an); 
                emits cleanup
            end

          | T.FMARK(e,A.MARKREG f) => (f fd; doFexpr''(fty, e, fd, an))
          | T.FMARK(e, a) => doFexpr''(fty, e, fd, a::an)
          | T.FPRED(e, c) => doFexpr''(fty, e, fd, A.CTRLUSE c::an)
          | T.FEXT fexp =>
             ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an}
          | _ => error("doFexpr''")

       (*========================================================
        * Tie the two styles of fp code generation together
        *========================================================*)
      and fstore(fty, ea, d, mem, an) = 
          if enableFastFPMode andalso !fast_floating_point 
          then fstore''(fty, ea, d, mem, an)
          else fstore'(fty, ea, d, mem, an)
      and fload(fty, ea, d, mem, an) = 
          if enableFastFPMode andalso !fast_floating_point 
          then fload''(fty, ea, d, mem, an)
          else fload'(fty, ea, d, mem, an)
      and fexpr e = 
          if enableFastFPMode andalso !fast_floating_point 
          then fexpr'' e else fexpr' e
      and doFexpr(fty, e, fd, an) = 
          if enableFastFPMode andalso !fast_floating_point 
          then doFexpr''(fty, e, fd, an)
          else doFexpr'(fty, e, fd, an)

      (*================================================================
       * Optimizations for x := x op y 
       * Special optimizations: 
       * Generate a binary operator, result must in memory.
       * The source must not be in memory
       *================================================================*)
      and binaryMem(binOp, src, dst, mem, an) =
          mark(I.BINARY{binOp=binOp, src=immedOrReg(operand src),
                        dst=address(dst,mem)}, an)
      and unaryMem(unOp, opnd, mem, an) =
          mark(I.UNARY{unOp=unOp, opnd=address(opnd,mem)}, an)

      and isOne(T.LI n) = n = one
        | isOne _ = false

      (* 
       * Perform optimizations based on recognizing 
       *    x := x op y    or
       *    x := y op x 
       * first.
       *)
      and store(ty, ea, d, mem, an, 
                {INC,DEC,ADD,SUB,NOT,NEG,SHL,SHR,SAR,OR,AND,XOR},
                doStore
               ) = 
          let fun default() = doStore(ea, d, mem, an)
              fun binary1(t, t', unary, binary, ea', x) =  
                  if t = ty andalso t' = ty then
                     if MLTreeUtils.eqRexp(ea, ea') then
                        if isOne x then unaryMem(unary, ea, mem, an)
                        else binaryMem(binary, x, ea, mem, an)
                      else default()
                  else default()
              fun unary(t,unOp, ea') = 
                  if t = ty andalso MLTreeUtils.eqRexp(ea, ea') then
                     unaryMem(unOp, ea, mem, an)
                  else default() 
              fun binary(t,t',binOp,ea',x) =
                  if t = ty andalso t' = ty andalso
                     MLTreeUtils.eqRexp(ea, ea') then
                      binaryMem(binOp, x, ea, mem, an)
                  else default()

              fun binaryCom1(t,unOp,binOp,x,y) = 
              if t = ty then
              let fun again() =
                    case y of
                      T.LOAD(ty',ea',_) =>
                        if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then
                           if isOne x then unaryMem(unOp, ea, mem, an)
                           else binaryMem(binOp,x,ea,mem,an)
                        else default()
                    | _ => default()
              in  case x of 
                    T.LOAD(ty',ea',_) =>
                      if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then
                         if isOne y then unaryMem(unOp, ea, mem, an)
                         else binaryMem(binOp,y,ea,mem,an)
                      else again()
                  | _ => again()
              end 
              else default()

              fun binaryCom(t,binOp,x,y) = 
              if t = ty then
              let fun again() =
                    case y of
                      T.LOAD(ty',ea',_) =>
                        if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then
                           binaryMem(binOp,x,ea,mem,an)
                        else default()
                    | _ => default()
              in  case x of 
                    T.LOAD(ty',ea',_) =>
                      if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then
                         binaryMem(binOp,y,ea,mem,an)
                      else again()
                  | _ => again()
              end 
              else default()

          in  case d of
                T.ADD(t,x,y) => binaryCom1(t,INC,ADD,x,y)
              | T.SUB(t,T.LOAD(t',ea',_),x) => binary1(t,t',DEC,SUB,ea',x)
              | T.ORB(t,x,y) => binaryCom(t,OR,x,y)
              | T.ANDB(t,x,y) => binaryCom(t,AND,x,y)
              | T.XORB(t,x,y) => binaryCom(t,XOR,x,y)
              | T.SLL(t,T.LOAD(t',ea',_),x) => binary(t,t',SHL,ea',x)
              | T.SRL(t,T.LOAD(t',ea',_),x) => binary(t,t',SHR,ea',x)
              | T.SRA(t,T.LOAD(t',ea',_),x) => binary(t,t',SAR,ea',x)
              | T.NEG(t,T.LOAD(t',ea',_)) => unary(t,NEG,ea')
              | T.NOTB(t,T.LOAD(t',ea',_)) => unary(t,NOT,ea')
              | _ => default()
          end (* store *)
 
          (* generate code for a statement *)
      and stmt(T.MV(_, rd, e), an) = doExpr(e, rd, an)
        | stmt(T.FMV(fty, fd, e), an) = doFexpr(fty, e, fd, an) 
        | stmt(T.CCMV(ccd, e), an) = doCCexpr(e, ccd, an) 
        | stmt(T.COPY(_, dst, src), an) = copy(dst, src, an)
        | stmt(T.FCOPY(fty, dst, src), an) = fcopy(fty, dst, src, an)
        | stmt(T.JMP(e, labs), an) = jmp(e, labs, an)
        | stmt(T.CALL{funct, targets, defs, uses, region, pops, ...}, an) = 
             call(funct,targets,defs,uses,region,[],an, pops)
        | stmt(T.FLOW_TO(T.CALL{funct, targets, defs, uses, region, pops, ...},
                         cutTo), an) = 
             call(funct,targets,defs,uses,region,cutTo,an, pops)
        | stmt(T.RET _, an) = mark(I.RET NONE, an)
        | stmt(T.STORE(8, ea, d, mem), an)  = 
             store(8, ea, d, mem, an, opcodes8, store8)
        | stmt(T.STORE(16, ea, d, mem), an) = 
             store(16, ea, d, mem, an, opcodes16, store16)
        | stmt(T.STORE(32, ea, d, mem), an) = 
             store(32, ea, d, mem, an, opcodes32, store32)

        | stmt(T.FSTORE(fty, ea, d, mem), an) = fstore(fty, ea, d, mem, an)
        | stmt(T.BCC(cc, lab), an) = branch(cc, lab, an)
        | stmt(T.DEFINE l, _) = defineLabel l
        | stmt(T.ANNOTATION(s, a), an) = stmt(s, a::an)
        | stmt(T.EXT s, an) =
             ExtensionComp.compileSext (reducer()) {stm=s, an=an} 
        | stmt(s, _) = doStmts(Gen.compileStm s)

      and doStmt s = stmt(s, [])
      and doStmts ss = app doStmt ss

      and beginCluster' _ =
         ((* Must be cleared by the client.
           * if rewriteMemReg then memRegsUsed := 0w0 else (); 
           *)
          floatingPointUsed := false;
          trapLabel := NONE; 
          beginCluster 0
         )
      and endCluster' a =
         (case !trapLabel
          of NONE => ()
           | SOME(_, lab) => (defineLabel lab; emit(I.INTO))
          (*esac*);
          (* If floating point has been used allocate an extra
           * register just in case we didn't use any explicit register
           *)
          if !floatingPointUsed then (newFreg(); ())
          else ();
          endCluster(a)
         )

      and reducer() = 
          TS.REDUCER{reduceRexp    = expr,
                    reduceFexp    = fexpr,
                    reduceCCexp   = ccExpr,
                    reduceStm     = stmt,
                    operand       = operand,
                    reduceOperand = reduceOpnd,
                    addressOf     = fn e => address(e, I.Region.memory), (*XXX*)
                    emit          = mark,
                    instrStream   = instrStream, 
                    mltreeStream  = self() 
                   }

      and self() =
          TS.S.STREAM
          {  beginCluster   = beginCluster',
             endCluster     = endCluster',
             emit           = doStmt,
             pseudoOp       = pseudoOp,
             defineLabel    = defineLabel,
             entryLabel     = entryLabel,
             comment        = comment,
             annotation     = annotation,
             getAnnotations = getAnnotations,
             exitBlock      = fn mlrisc => exitBlock(cellset mlrisc)
          }

  in  self()
  end 

end (* functor *)

end (* local *)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0