Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86.sml
ViewVC logotype

Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1185 - (view) (download)

1 : jhr 1117 (* x86.sml
2 : monnier 247 *
3 :     * COPYRIGHT (c) 1998 Bell Laboratories.
4 : george 545 *
5 :     * This is a revised version that takes into account of
6 :     * the extended x86 instruction set, and has better handling of
7 :     * non-standard types. I've factored out the integer/floating point
8 :     * comparison code, added optimizations for conditional moves.
9 :     * The latter generates SETcc and CMOVcc (Pentium Pro only) instructions.
10 :     * To avoid problems, I have tried to incorporate as much of
11 :     * Lal's original magic incantations as possible.
12 : monnier 247 *
13 : george 545 * Some changes:
14 :     *
15 : blume 1183 * 1. REMU/REMS are now supported
16 : george 545 * 2. COND is supported by generating SETcc and/or CMOVcc; this
17 :     * may require at least a Pentium II to work.
18 :     * 3. Division by a constant has been optimized. Division by
19 :     * a power of 2 generates SHRL or SARL.
20 :     * 4. Better addressing mode selection has been implemented. This should
21 :     * improve array indexing on SML/NJ.
22 :     * 5. Generate testl/testb instead of andl whenever appropriate. This
23 :     * is recommended by the Intel Optimization Guide and seems to improve
24 :     * boxity tests on SML/NJ.
25 : leunga 731 *
26 :     * More changes for floating point:
27 :     * A new mode is implemented which generates pseudo 3-address instructions
28 :     * for floating point. These instructions are register allocated the
29 :     * normal way, with the virtual registers mapped onto a set of pseudo
30 :     * %fp registers. These registers are then mapped onto the %st registers
31 :     * with a new postprocessing phase.
32 :     *
33 : george 545 * -- Allen
34 : monnier 247 *)
35 : george 545 local
36 :     val rewriteMemReg = true (* should we rewrite memRegs *)
37 : leunga 731 val enableFastFPMode = true (* set this to false to disable the mode *)
38 : george 545 in
39 :    
40 : monnier 247 functor X86
41 :     (structure X86Instr : X86INSTR
42 : leunga 797 structure MLTreeUtils : MLTREE_UTILS
43 : george 933 where T = X86Instr.T
44 : george 555 structure ExtensionComp : MLTREE_EXTENSION_COMP
45 : george 933 where I = X86Instr and T = X86Instr.T
46 : george 984 structure MLTreeStream : MLTREE_STREAM
47 :     where T = ExtensionComp.T
48 : george 545 datatype arch = Pentium | PentiumPro | PentiumII | PentiumIII
49 :     val arch : arch ref
50 : leunga 593 val cvti2f :
51 : leunga 815 {ty: X86Instr.T.ty,
52 :     src: X86Instr.operand,
53 :     (* source operand, guaranteed to be non-memory! *)
54 :     an: Annotations.annotations ref (* cluster annotations *)
55 :     } ->
56 : leunga 593 {instrs : X86Instr.instruction list,(* the instructions *)
57 :     tempMem: X86Instr.operand, (* temporary for CVTI2F *)
58 :     cleanup: X86Instr.instruction list (* cleanup code *)
59 :     }
60 : leunga 731 (* When the following flag is set, we allocate floating point registers
61 :     * directly on the floating point stack
62 :     *)
63 :     val fast_floating_point : bool ref
64 : george 545 ) : sig include MLTREECOMP
65 :     val rewriteMemReg : bool
66 :     end =
67 : monnier 247 struct
68 : leunga 775 structure I = X86Instr
69 :     structure T = I.T
70 : george 984 structure TS = ExtensionComp.TS
71 : george 545 structure C = I.C
72 :     structure Shuffle = Shuffle(I)
73 : monnier 247 structure W32 = Word32
74 : george 545 structure A = MLRiscAnnotations
75 : george 909 structure CFG = ExtensionComp.CFG
76 : george 889 structure CB = CellsBasis
77 : monnier 247
78 : george 984 type instrStream = (I.instruction,C.cellset,CFG.cfg) TS.stream
79 :     type mltreeStream = (T.stm,T.mlrisc list,CFG.cfg) TS.stream
80 : leunga 565
81 :     datatype kind = REAL | INTEGER
82 : george 545
83 :     structure Gen = MLTreeGen
84 :     (structure T = T
85 : jhr 1117 structure Cells = C
86 : george 545 val intTy = 32
87 :     val naturalWidths = [32]
88 :     datatype rep = SE | ZE | NEITHER
89 :     val rep = NEITHER
90 :     )
91 :    
92 : monnier 411 fun error msg = MLRiscErrorMsg.error("X86",msg)
93 : monnier 247
94 : george 545 (* Should we perform automatic MemReg translation?
95 :     * If this is on, we can avoid doing RewritePseudo phase entirely.
96 :     *)
97 :     val rewriteMemReg = rewriteMemReg
98 : leunga 731
99 :     (* The following hardcoded *)
100 : leunga 744 fun isMemReg r = rewriteMemReg andalso
101 : george 889 let val r = CB.registerNum r
102 : leunga 744 in r >= 8 andalso r < 32
103 :     end
104 : leunga 731 fun isFMemReg r = if enableFastFPMode andalso !fast_floating_point
105 : george 889 then let val r = CB.registerNum r
106 : leunga 744 in r >= 8 andalso r < 32 end
107 : leunga 731 else true
108 : leunga 744 val isAnyFMemReg = List.exists (fn r =>
109 : george 889 let val r = CB.registerNum r
110 : leunga 744 in r >= 8 andalso r < 32 end
111 :     )
112 : monnier 247
113 : george 555 val ST0 = C.ST 0
114 :     val ST7 = C.ST 7
115 : leunga 797 val one = T.I.int_1
116 : george 555
117 : leunga 797 val opcodes8 = {INC=I.INCB,DEC=I.DECB,ADD=I.ADDB,SUB=I.SUBB,
118 :     NOT=I.NOTB,NEG=I.NEGB,
119 :     SHL=I.SHLB,SHR=I.SHRB,SAR=I.SARB,
120 :     OR=I.ORB,AND=I.ANDB,XOR=I.XORB}
121 :     val opcodes16 = {INC=I.INCW,DEC=I.DECW,ADD=I.ADDW,SUB=I.SUBW,
122 :     NOT=I.NOTW,NEG=I.NEGW,
123 :     SHL=I.SHLW,SHR=I.SHRW,SAR=I.SARW,
124 :     OR=I.ORW,AND=I.ANDW,XOR=I.XORW}
125 :     val opcodes32 = {INC=I.INCL,DEC=I.DECL,ADD=I.ADDL,SUB=I.SUBL,
126 :     NOT=I.NOTL,NEG=I.NEGL,
127 :     SHL=I.SHLL,SHR=I.SHRL,SAR=I.SARL,
128 :     OR=I.ORL,AND=I.ANDL,XOR=I.XORL}
129 :    
130 : george 545 (*
131 :     * The code generator
132 :     *)
133 : monnier 411 fun selectInstructions
134 : george 545 (instrStream as
135 : george 1003 TS.S.STREAM{emit=emitInstruction,defineLabel,entryLabel,pseudoOp,
136 :     annotation,getAnnotations,beginCluster,endCluster,exitBlock,comment,...}) =
137 :     let
138 :     val emit = emitInstruction o I.INSTR
139 :     exception EA
140 : monnier 411
141 : george 545 (* label where a trap is generated -- one per cluster *)
142 :     val trapLabel = ref (NONE: (I.instruction * Label.label) option)
143 : monnier 247
144 : leunga 731 (* flag floating point generation *)
145 :     val floatingPointUsed = ref false
146 :    
147 : george 545 (* effective address of an integer register *)
148 : leunga 731 fun IntReg r = if isMemReg r then I.MemReg r else I.Direct r
149 :     and RealReg r = if isFMemReg r then I.FDirect r else I.FPR r
150 : monnier 411
151 : george 545 (* Add an overflow trap *)
152 :     fun trap() =
153 : george 1136 let
154 :     val jmp =
155 : george 545 case !trapLabel of
156 : george 909 NONE => let val label = Label.label "trap" ()
157 : george 1136 val jmp =
158 :     I.ANNOTATION{i=I.jcc{cond=I.O,
159 :     opnd=I.ImmedLabel(T.LABEL label)},
160 :     a=MLRiscAnnotations.BRANCHPROB (Probability.unlikely)}
161 : george 545 in trapLabel := SOME(jmp, label); jmp end
162 :     | SOME(jmp, _) => jmp
163 : george 1003 in emitInstruction jmp end
164 : monnier 411
165 : george 545 val newReg = C.newReg
166 :     val newFreg = C.newFreg
167 : monnier 247
168 : leunga 731 fun fsize 32 = I.FP32
169 :     | fsize 64 = I.FP64
170 :     | fsize 80 = I.FP80
171 :     | fsize _ = error "fsize"
172 :    
173 : george 545 (* mark an expression with a list of annotations *)
174 : george 1009 fun mark'(i,[]) = emitInstruction(i)
175 : george 545 | mark'(i,a::an) = mark'(I.ANNOTATION{i=i,a=a},an)
176 : monnier 247
177 : george 545 (* annotate an expression and emit it *)
178 : george 1009 fun mark(i,an) = mark'(I.INSTR i,an)
179 : monnier 247
180 : george 1003 val emits = app emitInstruction
181 : leunga 731
182 : george 545 (* emit parallel copies for integers
183 :     * Translates parallel copies that involve memregs into
184 :     * individual copies.
185 :     *)
186 :     fun copy([], [], an) = ()
187 :     | copy(dst, src, an) =
188 :     let fun mvInstr{dst as I.MemReg rd, src as I.MemReg rs} =
189 : george 889 if CB.sameColor(rd,rs) then [] else
190 : george 545 let val tmpR = I.Direct(newReg())
191 : george 1003 in [I.move{mvOp=I.MOVL, src=src, dst=tmpR},
192 :     I.move{mvOp=I.MOVL, src=tmpR, dst=dst}]
193 : george 545 end
194 :     | mvInstr{dst=I.Direct rd, src=I.Direct rs} =
195 : george 889 if CB.sameColor(rd,rs) then []
196 : george 1009 else [I.COPY{k=CB.GP, sz=32, dst=[rd], src=[rs], tmp=NONE}]
197 : george 1003 | mvInstr{dst, src} = [I.move{mvOp=I.MOVL, src=src, dst=dst}]
198 : george 545 in
199 : leunga 731 emits (Shuffle.shuffle{mvInstr=mvInstr, ea=IntReg}
200 : leunga 744 {tmp=SOME(I.Direct(newReg())),
201 : george 545 dst=dst, src=src})
202 :     end
203 :    
204 :     (* conversions *)
205 :     val itow = Word.fromInt
206 :     val wtoi = Word.toInt
207 : george 761 fun toInt32 i = T.I.toInt32(32, i)
208 : george 545 val w32toi32 = Word32.toLargeIntX
209 :     val i32tow32 = Word32.fromLargeInt
210 : monnier 247
211 : george 545 (* One day, this is going to bite us when precision(LargeInt)>32 *)
212 :     fun wToInt32 w = Int32.fromLarge(Word32.toLargeIntX w)
213 : monnier 247
214 : george 545 (* some useful registers *)
215 :     val eax = I.Direct(C.eax)
216 :     val ecx = I.Direct(C.ecx)
217 :     val edx = I.Direct(C.edx)
218 : monnier 247
219 : leunga 775 fun immedLabel lab = I.ImmedLabel(T.LABEL lab)
220 : george 545
221 :     (* Is the expression zero? *)
222 : george 761 fun isZero(T.LI z) = T.I.isZero z
223 : george 545 | isZero(T.MARK(e,a)) = isZero e
224 :     | isZero _ = false
225 :     (* Does the expression set the zero bit?
226 :     * WARNING: we assume these things are not optimized out!
227 :     *)
228 :     fun setZeroBit(T.ANDB _) = true
229 :     | setZeroBit(T.ORB _) = true
230 :     | setZeroBit(T.XORB _) = true
231 :     | setZeroBit(T.SRA _) = true
232 :     | setZeroBit(T.SRL _) = true
233 :     | setZeroBit(T.SLL _) = true
234 : leunga 695 | setZeroBit(T.SUB _) = true
235 :     | setZeroBit(T.ADDT _) = true
236 :     | setZeroBit(T.SUBT _) = true
237 : george 545 | setZeroBit(T.MARK(e, _)) = setZeroBit e
238 :     | setZeroBit _ = false
239 : monnier 247
240 : leunga 695 fun setZeroBit2(T.ANDB _) = true
241 :     | setZeroBit2(T.ORB _) = true
242 :     | setZeroBit2(T.XORB _) = true
243 :     | setZeroBit2(T.SRA _) = true
244 :     | setZeroBit2(T.SRL _) = true
245 :     | setZeroBit2(T.SLL _) = true
246 :     | setZeroBit2(T.ADD(32, _, _)) = true (* can't use leal! *)
247 :     | setZeroBit2(T.SUB _) = true
248 :     | setZeroBit2(T.ADDT _) = true
249 :     | setZeroBit2(T.SUBT _) = true
250 :     | setZeroBit2(T.MARK(e, _)) = setZeroBit2 e
251 :     | setZeroBit2 _ = false
252 :    
253 : leunga 731 (* emit parallel copies for floating point
254 :     * Normal version.
255 :     *)
256 :     fun fcopy'(fty, [], [], _) = ()
257 :     | fcopy'(fty, dst as [_], src as [_], an) =
258 : george 1009 mark'(I.COPY{k=CB.FP, sz=fty, dst=dst,src=src,tmp=NONE}, an)
259 : leunga 731 | fcopy'(fty, dst, src, an) =
260 : george 1009 mark'(I.COPY{k=CB.FP, sz=fty, dst=dst,src=src,tmp=SOME(I.FDirect(newFreg()))}, an)
261 : monnier 247
262 : leunga 731 (* emit parallel copies for floating point.
263 :     * Fast version.
264 :     * Translates parallel copies that involve memregs into
265 :     * individual copies.
266 :     *)
267 :    
268 :     fun fcopy''(fty, [], [], _) = ()
269 :     | fcopy''(fty, dst, src, an) =
270 :     if true orelse isAnyFMemReg dst orelse isAnyFMemReg src then
271 :     let val fsize = fsize fty
272 : george 1003 fun mvInstr{dst, src} = [I.fmove{fsize=fsize, src=src, dst=dst}]
273 : leunga 731 in
274 :     emits (Shuffle.shuffle{mvInstr=mvInstr, ea=RealReg}
275 : leunga 744 {tmp=case dst of
276 : leunga 731 [_] => NONE
277 :     | _ => SOME(I.FPR(newReg())),
278 :     dst=dst, src=src})
279 :     end
280 :     else
281 : george 1009 mark'(I.COPY{k=CB.FP, sz=fty, dst=dst,
282 :     src=src,tmp=
283 : leunga 731 case dst of
284 :     [_] => NONE
285 :     | _ => SOME(I.FPR(newFreg()))}, an)
286 :    
287 :     fun fcopy x = if enableFastFPMode andalso !fast_floating_point
288 :     then fcopy'' x else fcopy' x
289 :    
290 : george 545 (* Translates MLTREE condition code to x86 condition code *)
291 :     fun cond T.LT = I.LT | cond T.LTU = I.B
292 :     | cond T.LE = I.LE | cond T.LEU = I.BE
293 :     | cond T.EQ = I.EQ | cond T.NE = I.NE
294 :     | cond T.GE = I.GE | cond T.GEU = I.AE
295 :     | cond T.GT = I.GT | cond T.GTU = I.A
296 : jhr 1119 | cond cc = error(concat["cond(", T.Basis.condToString cc, ")"])
297 : monnier 247
298 : leunga 815 fun zero dst = emit(I.BINARY{binOp=I.XORL, src=dst, dst=dst})
299 :    
300 : george 545 (* Move and annotate *)
301 :     fun move'(src as I.Direct s, dst as I.Direct d, an) =
302 : george 889 if CB.sameColor(s,d) then ()
303 : george 1009 else mark'(I.COPY{k=CB.GP, sz=32, dst=[d], src=[s], tmp=NONE}, an)
304 : leunga 815 | move'(I.Immed 0, dst as I.Direct d, an) =
305 :     mark(I.BINARY{binOp=I.XORL, src=dst, dst=dst}, an)
306 : george 545 | move'(src, dst, an) = mark(I.MOVE{mvOp=I.MOVL, src=src, dst=dst}, an)
307 : monnier 247
308 : george 545 (* Move only! *)
309 :     fun move(src, dst) = move'(src, dst, [])
310 : monnier 247
311 : george 545 val readonly = I.Region.readonly
312 : monnier 247
313 : george 545 (*
314 : george 761 * Compute an effective address.
315 : george 545 *)
316 : george 761 fun address(ea, mem) = let
317 : george 545 (* Keep building a bigger and bigger effective address expressions
318 :     * The input is a list of trees
319 :     * b -- base
320 :     * i -- index
321 :     * s -- scale
322 :     * d -- immed displacement
323 :     *)
324 :     fun doEA([], b, i, s, d) = makeAddressingMode(b, i, s, d)
325 :     | doEA(t::trees, b, i, s, d) =
326 :     (case t of
327 : george 761 T.LI n => doEAImmed(trees, toInt32 n, b, i, s, d)
328 : leunga 775 | T.CONST _ => doEALabel(trees, t, b, i, s, d)
329 :     | T.LABEL _ => doEALabel(trees, t, b, i, s, d)
330 :     | T.LABEXP le => doEALabel(trees, le, b, i, s, d)
331 : george 545 | T.ADD(32, t1, t2 as T.REG(_,r)) =>
332 :     if isMemReg r then doEA(t2::t1::trees, b, i, s, d)
333 :     else doEA(t1::t2::trees, b, i, s, d)
334 :     | T.ADD(32, t1, t2) => doEA(t1::t2::trees, b, i, s, d)
335 :     | T.SUB(32, t1, T.LI n) =>
336 : george 761 doEA(t1::T.LI(T.I.NEG(32,n))::trees, b, i, s, d)
337 :     | T.SLL(32, t1, T.LI n) => let
338 :     val n = T.I.toInt(32, n)
339 :     in
340 :     case n
341 :     of 0 => displace(trees, t1, b, i, s, d)
342 :     | 1 => indexed(trees, t1, t, 1, b, i, s, d)
343 :     | 2 => indexed(trees, t1, t, 2, b, i, s, d)
344 :     | 3 => indexed(trees, t1, t, 3, b, i, s, d)
345 :     | _ => displace(trees, t, b, i, s, d)
346 :     end
347 : george 545 | t => displace(trees, t, b, i, s, d)
348 :     )
349 : monnier 247
350 : george 545 (* Add an immed constant *)
351 :     and doEAImmed(trees, 0, b, i, s, d) = doEA(trees, b, i, s, d)
352 :     | doEAImmed(trees, n, b, i, s, I.Immed m) =
353 : george 761 doEA(trees, b, i, s, I.Immed(n+m))
354 : george 545 | doEAImmed(trees, n, b, i, s, I.ImmedLabel le) =
355 : leunga 775 doEA(trees, b, i, s,
356 :     I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, n)))))
357 : george 545 | doEAImmed(trees, n, b, i, s, _) = error "doEAImmed"
358 : monnier 247
359 : george 545 (* Add a label expression *)
360 :     and doEALabel(trees, le, b, i, s, I.Immed 0) =
361 :     doEA(trees, b, i, s, I.ImmedLabel le)
362 :     | doEALabel(trees, le, b, i, s, I.Immed m) =
363 :     doEA(trees, b, i, s,
364 : leunga 775 I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, m))))
365 : george 545 handle Overflow => error "doEALabel: constant too large")
366 :     | doEALabel(trees, le, b, i, s, I.ImmedLabel le') =
367 : leunga 775 doEA(trees, b, i, s, I.ImmedLabel(T.ADD(32,le,le')))
368 : george 545 | doEALabel(trees, le, b, i, s, _) = error "doEALabel"
369 : monnier 247
370 : george 545 and makeAddressingMode(NONE, NONE, _, disp) = disp
371 :     | makeAddressingMode(SOME base, NONE, _, disp) =
372 :     I.Displace{base=base, disp=disp, mem=mem}
373 :     | makeAddressingMode(base, SOME index, scale, disp) =
374 : george 761 I.Indexed{base=base, index=index, scale=scale,
375 : george 545 disp=disp, mem=mem}
376 : monnier 247
377 : george 545 (* generate code for tree and ensure that it is not in %esp *)
378 :     and exprNotEsp tree =
379 :     let val r = expr tree
380 : george 889 in if CB.sameColor(r, C.esp) then
381 : george 545 let val tmp = newReg()
382 :     in move(I.Direct r, I.Direct tmp); tmp end
383 :     else r
384 :     end
385 : monnier 247
386 : george 545 (* Add a base register *)
387 :     and displace(trees, t, NONE, i, s, d) = (* no base yet *)
388 :     doEA(trees, SOME(expr t), i, s, d)
389 :     | displace(trees, t, b as SOME base, NONE, _, d) = (* no index *)
390 :     (* make t the index, but make sure that it is not %esp! *)
391 :     let val i = expr t
392 : george 889 in if CB.sameColor(i, C.esp) then
393 : george 545 (* swap base and index *)
394 : george 889 if CB.sameColor(base, C.esp) then
395 : george 545 doEA(trees, SOME i, b, 0, d)
396 :     else (* base and index = %esp! *)
397 :     let val index = newReg()
398 :     in move(I.Direct i, I.Direct index);
399 :     doEA(trees, b, SOME index, 0, d)
400 :     end
401 :     else
402 :     doEA(trees, b, SOME i, 0, d)
403 :     end
404 :     | displace(trees, t, SOME base, i, s, d) = (* base and index *)
405 :     let val b = expr(T.ADD(32,T.REG(32,base),t))
406 :     in doEA(trees, SOME b, i, s, d) end
407 : monnier 247
408 : george 545 (* Add an indexed register *)
409 :     and indexed(trees, t, t0, scale, b, NONE, _, d) = (* no index yet *)
410 :     doEA(trees, b, SOME(exprNotEsp t), scale, d)
411 :     | indexed(trees, _, t0, _, NONE, i, s, d) = (* no base *)
412 :     doEA(trees, SOME(expr t0), i, s, d)
413 :     | indexed(trees, _, t0, _, SOME base, i, s, d) = (*base and index*)
414 :     let val b = expr(T.ADD(32, t0, T.REG(32, base)))
415 :     in doEA(trees, SOME b, i, s, d) end
416 :    
417 :     in case doEA([ea], NONE, NONE, 0, I.Immed 0) of
418 :     I.Immed _ => raise EA
419 :     | I.ImmedLabel le => I.LabelEA le
420 :     | ea => ea
421 :     end (* address *)
422 : monnier 247
423 : george 545 (* reduce an expression into an operand *)
424 : george 761 and operand(T.LI i) = I.Immed(toInt32(i))
425 : leunga 775 | operand(x as (T.CONST _ | T.LABEL _)) = I.ImmedLabel x
426 :     | operand(T.LABEXP le) = I.ImmedLabel le
427 : george 545 | operand(T.REG(_,r)) = IntReg r
428 :     | operand(T.LOAD(32,ea,mem)) = address(ea, mem)
429 :     | operand(t) = I.Direct(expr t)
430 : monnier 247
431 : george 545 and moveToReg(opnd) =
432 :     let val dst = I.Direct(newReg())
433 :     in move(opnd, dst); dst
434 :     end
435 : monnier 247
436 : george 545 and reduceOpnd(I.Direct r) = r
437 :     | reduceOpnd opnd =
438 :     let val dst = newReg()
439 :     in move(opnd, I.Direct dst); dst
440 :     end
441 : monnier 247
442 : george 545 (* ensure that the operand is either an immed or register *)
443 :     and immedOrReg(opnd as I.Displace _) = moveToReg opnd
444 :     | immedOrReg(opnd as I.Indexed _) = moveToReg opnd
445 :     | immedOrReg(opnd as I.MemReg _) = moveToReg opnd
446 :     | immedOrReg(opnd as I.LabelEA _) = moveToReg opnd
447 :     | immedOrReg opnd = opnd
448 : monnier 247
449 : george 545 and isImmediate(I.Immed _) = true
450 :     | isImmediate(I.ImmedLabel _) = true
451 :     | isImmediate _ = false
452 : monnier 247
453 : george 545 and regOrMem opnd = if isImmediate opnd then moveToReg opnd else opnd
454 :    
455 :     and isMemOpnd opnd =
456 :     (case opnd of
457 :     I.Displace _ => true
458 :     | I.Indexed _ => true
459 :     | I.MemReg _ => true
460 :     | I.LabelEA _ => true
461 : george 555 | I.FDirect f => true
462 : george 545 | _ => false
463 :     )
464 :    
465 :     (*
466 :     * Compute an integer expression and put the result in
467 :     * the destination register rd.
468 :     *)
469 : george 889 and doExpr(exp, rd : CB.cell, an) =
470 : george 545 let val rdOpnd = IntReg rd
471 : monnier 247
472 : george 889 fun equalRd(I.Direct r) = CB.sameColor(r, rd)
473 :     | equalRd(I.MemReg r) = CB.sameColor(r, rd)
474 : george 545 | equalRd _ = false
475 : monnier 247
476 : george 545 (* Emit a binary operator. If the destination is
477 :     * a memReg, do something smarter.
478 :     *)
479 :     fun genBinary(binOp, opnd1, opnd2) =
480 :     if isMemReg rd andalso
481 :     (isMemOpnd opnd1 orelse isMemOpnd opnd2) orelse
482 :     equalRd(opnd2)
483 :     then
484 :     let val tmpR = newReg()
485 :     val tmp = I.Direct tmpR
486 :     in move(opnd1, tmp);
487 :     mark(I.BINARY{binOp=binOp, src=opnd2, dst=tmp}, an);
488 :     move(tmp, rdOpnd)
489 :     end
490 :     else
491 :     (move(opnd1, rdOpnd);
492 :     mark(I.BINARY{binOp=binOp, src=opnd2, dst=rdOpnd}, an)
493 :     )
494 : monnier 247
495 : george 545 (* Generate a binary operator; it may commute *)
496 :     fun binaryComm(binOp, e1, e2) =
497 :     let val (opnd1, opnd2) =
498 :     case (operand e1, operand e2) of
499 :     (x as I.Immed _, y) => (y, x)
500 :     | (x as I.ImmedLabel _, y) => (y, x)
501 :     | (x, y as I.Direct _) => (y, x)
502 :     | (x, y) => (x, y)
503 :     in genBinary(binOp, opnd1, opnd2)
504 :     end
505 :    
506 :     (* Generate a binary operator; non-commutative *)
507 :     fun binary(binOp, e1, e2) =
508 :     genBinary(binOp, operand e1, operand e2)
509 :    
510 :     (* Generate a unary operator *)
511 :     fun unary(unOp, e) =
512 :     let val opnd = operand e
513 :     in if isMemReg rd andalso isMemOpnd opnd then
514 :     let val tmp = I.Direct(newReg())
515 :     in move(opnd, tmp); move(tmp, rdOpnd)
516 :     end
517 :     else move(opnd, rdOpnd);
518 :     mark(I.UNARY{unOp=unOp, opnd=rdOpnd}, an)
519 :     end
520 :    
521 :     (* Generate shifts; the shift
522 :     * amount must be a constant or in %ecx *)
523 :     fun shift(opcode, e1, e2) =
524 :     let val (opnd1, opnd2) = (operand e1, operand e2)
525 :     in case opnd2 of
526 :     I.Immed _ => genBinary(opcode, opnd1, opnd2)
527 :     | _ =>
528 :     if equalRd(opnd2) then
529 :     let val tmpR = newReg()
530 :     val tmp = I.Direct tmpR
531 :     in move(opnd1, tmp);
532 :     move(opnd2, ecx);
533 :     mark(I.BINARY{binOp=opcode, src=ecx, dst=tmp},an);
534 :     move(tmp, rdOpnd)
535 :     end
536 :     else
537 :     (move(opnd1, rdOpnd);
538 :     move(opnd2, ecx);
539 :     mark(I.BINARY{binOp=opcode, src=ecx, dst=rdOpnd},an)
540 :     )
541 :     end
542 :    
543 : blume 1185 (* Division or remainder: divisor must be in %edx:%eax pair *)
544 : george 545 fun divrem(signed, overflow, e1, e2, resultReg) =
545 :     let val (opnd1, opnd2) = (operand e1, operand e2)
546 :     val _ = move(opnd1, eax)
547 : leunga 815 val oper = if signed then (emit(I.CDQ); I.IDIVL1)
548 :     else (zero edx; I.DIVL1)
549 : george 545 in mark(I.MULTDIV{multDivOp=oper, src=regOrMem opnd2},an);
550 :     move(resultReg, rdOpnd);
551 :     if overflow then trap() else ()
552 :     end
553 : monnier 247
554 : blume 1185 (* division with rounding towards negative infinity *)
555 :     fun divinf0 (overflow, e1, e2) = let
556 :     val o1 = operand e1
557 :     val o2 = operand e2
558 :     val l = Label.anon ()
559 :     in
560 :     move (o1, eax);
561 :     emit I.CDQ;
562 :     mark (I.MULTDIV { multDivOp = I.IDIVL1, src = regOrMem o2 },
563 :     an);
564 :     if overflow then trap() else ();
565 :     app emit [I.CMPL { lsrc = edx, rsrc = I.Immed 0 },
566 :     I.JCC { cond = I.EQ, opnd = immedLabel l },
567 :     I.BINARY { binOp = I.XORL,
568 :     src = regOrMem o2,
569 :     dst = edx },
570 :     I.JCC { cond = I.GE, opnd = immedLabel l },
571 :     I.UNARY { unOp = I.DECL, opnd = eax }];
572 :     defineLabel l;
573 :     move (eax, rdOpnd)
574 :     end
575 :    
576 :     (* analyze for power-of-two-ness *)
577 :     fun analyze i' = let
578 :     val i = toInt32 i'
579 :     in
580 :     let val (isneg, a, w) =
581 :     if i >= 0 then (false, i, T.I.toWord32 (32, i'))
582 :     else (true, ~i, T.I.toWord32 (32, T.I.NEG (32, i')))
583 :     fun log2 (0w1, p) = p
584 :     | log2 (w, p) = log2 (W32.>> (w, 0w1), p + 1)
585 :     in
586 :     if w > 0w1 andalso W32.andb (w - 0w1, w) = 0w0 then
587 :     (i, SOME (isneg, a,
588 :     T.LI (T.I.fromInt32 (32, log2 (w, 0)))))
589 :     else (i, NONE)
590 :     end handle _ => (i, NONE)
591 :     end
592 :    
593 :     (* Division by a power of two when rounding to neginf is the
594 :     * same as an arithmetic right shift. *)
595 :     fun divinf (overflow, e1, e2 as T.LI n') =
596 :     (case analyze n' of
597 :     (_, NONE) => divinf0 (overflow, e1, e2)
598 :     | (_, SOME (false, _, p)) =>
599 :     shift (I.SARL, T.REG (32, expr e1), p)
600 :     | (_, SOME (true, _, p)) => let
601 :     val reg = expr e1
602 :     in
603 :     emit(I.UNARY { unOp = I.NEGL, opnd = I.Direct reg });
604 :     shift (I.SARL, T.REG (32, reg), p)
605 :     end)
606 :     | divinf (overflow, e1, e2) = divinf0 (overflow, e1, e2)
607 :    
608 :     fun reminf0 (e1, e2) = let
609 :     val o1 = operand e1
610 :     val o2 = operand e2
611 :     val l = Label.anon ()
612 :     in
613 :     move (o1, eax);
614 :     emit I.CDQ;
615 :     mark (I.MULTDIV { multDivOp = I.IDIVL1, src = regOrMem o2 },
616 :     an);
617 :     app emit [I.CMPL { lsrc = edx, rsrc = I.Immed 0 },
618 :     I.JCC { cond = I.EQ, opnd = immedLabel l }];
619 :     move (edx, eax);
620 :     app emit [I.BINARY { binOp = I.XORL,
621 :     src = regOrMem o2, dst = eax },
622 :     I.JCC { cond = I.GE, opnd = immedLabel l },
623 :     I.BINARY { binOp = I.ADDL,
624 :     src = regOrMem o2, dst = edx }];
625 :     defineLabel l;
626 :     move (edx, rdOpnd)
627 :     end
628 :    
629 :     (* n mod (power-of-2) corresponds to a bitmask (AND).
630 :     * If the power is negative, then we must first negate
631 :     * the argument and then again negate the result. *)
632 :     fun reminf (e1, e2 as T.LI n') =
633 :     (case analyze n' of
634 :     (_, NONE) => reminf0 (e1, e2)
635 :     | (_, SOME (false, a, _)) =>
636 :     binaryComm (I.ANDL, e1,
637 :     T.LI (T.I.fromInt32 (32, a - 1)))
638 :     | (_, SOME (true, a, _)) => let
639 :     val r1 = expr e1
640 :     val o1 = I.Direct r1
641 :     in
642 :     emit (I.UNARY { unOp = I.NEGL, opnd = o1 });
643 :     emit (I.BINARY { binOp = I.ANDL,
644 :     src = I.Immed (a - 1),
645 :     dst = o1 });
646 :     unary (I.NEGL, T.REG (32, r1))
647 :     end)
648 :     | reminf (e1, e2) = reminf0 (e1, e2)
649 :    
650 :     (* Optimize the special case for division *)
651 :     fun divide (signed, overflow, e1, e2 as T.LI n') =
652 :     (case analyze n' of
653 :     (n, SOME (isneg, a, p)) =>
654 :     if signed then
655 :     let val label = Label.anon ()
656 :     val reg1 = expr e1
657 :     val opnd1 = I.Direct reg1
658 :     in
659 :     if isneg then
660 :     emit (I.UNARY { unOp = I.NEGL,
661 :     opnd = opnd1 })
662 :     else if setZeroBit e1 then ()
663 :     else emit (I.CMPL { lsrc = opnd1,
664 :     rsrc = I.Immed 0 });
665 :     emit (I.JCC { cond = I.GE,
666 :     opnd = immedLabel label });
667 :     emit (if a = 2 then
668 :     I.UNARY { unOp = I.INCL,
669 :     opnd = opnd1 }
670 :     else
671 :     I.BINARY { binOp = I.ADDL,
672 :     src = I.Immed (a - 1),
673 :     dst = opnd1 });
674 :     defineLabel label;
675 :     shift (I.SARL, T.REG (32, reg1), p)
676 :     end
677 :     else shift (I.SHRL, e1, p)
678 :     | (n, NONE) =>
679 :     divrem(signed, overflow andalso (n = ~1 orelse n = 0),
680 :     e1, e2, eax))
681 :     | divide (signed, overflow, e1, e2) =
682 :     divrem (signed, overflow, e1, e2, eax)
683 :    
684 : blume 1183 (* rem never causes overflow *)
685 : blume 1185 fun rem (signed, e1, e2 as T.LI n') =
686 :     (case analyze n' of
687 :     (n, SOME (isneg, a, _)) =>
688 :     if signed then
689 :     (* The following logic should work uniformely
690 :     * for both isneg and not isneg. It only uses
691 :     * the absolute value (a) of the divisor.
692 :     * Here is the formula:
693 :     * let p be a power of two and a = abs(p):
694 :     *
695 :     * x % p = x - ((x < 0 ? x + a - 1 : x) & (-a))
696 :     *
697 :     * (That's what GCC seems to do.)
698 :     *)
699 :     let val r1 = expr e1
700 :     val o1 = I.Direct r1
701 :     val rt = newReg ()
702 :     val tmp = I.Direct rt
703 :     val l = Label.anon ()
704 :     in
705 :     move (o1, tmp);
706 :     if setZeroBit e1 then ()
707 :     else emit (I.CMPL { lsrc = o1,
708 :     rsrc = I.Immed 0 });
709 :     emit (I.JCC { cond = I.GE,
710 :     opnd = immedLabel l });
711 :     emit (I.BINARY { binOp = I.ADDL,
712 :     src = I.Immed (a - 1),
713 :     dst = tmp });
714 :     defineLabel l;
715 :     emit (I.BINARY { binOp = I.ANDL,
716 :     src = I.Immed (~a),
717 :     dst = tmp });
718 :     binary (I.SUBL, T.REG (32, rt), T.REG (32, r1))
719 :     end
720 :     else
721 :     if isneg then
722 :     (* this is really strange... *)
723 :     divrem (false, false, e1, e2, edx)
724 :     else
725 :     binaryComm (I.ANDL, e1,
726 :     T.LI (T.I.fromInt32 (32, n - 1)))
727 :     | (_, NONE) => divrem (signed, false, e1, e2, edx))
728 :     | rem(signed, e1, e2) =
729 :     divrem(signed, false, e1, e2, edx)
730 : leunga 815
731 :     (* Makes sure the destination must be a register *)
732 :     fun dstMustBeReg f =
733 :     if isMemReg rd then
734 :     let val tmpR = newReg()
735 :     val tmp = I.Direct(tmpR)
736 :     in f(tmpR, tmp); move(tmp, rdOpnd) end
737 :     else f(rd, rdOpnd)
738 :    
739 : george 545 (* unsigned integer multiplication *)
740 : blume 1185 fun uMultiply0 (e1, e2) =
741 : george 545 (* note e2 can never be (I.Direct edx) *)
742 :     (move(operand e1, eax);
743 : leunga 815 mark(I.MULTDIV{multDivOp=I.MULL1,
744 : george 545 src=regOrMem(operand e2)},an);
745 :     move(eax, rdOpnd)
746 :     )
747 :    
748 : blume 1185 fun uMultiply (e1, e2 as T.LI n') =
749 :     (case analyze n' of
750 :     (_, SOME (false, _, p)) => shift (I.SHLL, e1, p)
751 :     | _ => uMultiply0 (e1, e2))
752 :     | uMultiply (e1 as T.LI _, e2) = uMultiply (e2, e1)
753 :     | uMultiply (e1, e2) = uMultiply0 (e1, e2)
754 :    
755 : george 545 (* signed integer multiplication:
756 :     * The only forms that are allowed that also sets the
757 :     * OF and CF flags are:
758 :     *
759 : leunga 815 * (dst) (src1) (src2)
760 : george 545 * imul r32, r32/m32, imm8
761 : leunga 815 * (dst) (src)
762 : george 545 * imul r32, imm8
763 :     * imul r32, imm32
764 : leunga 815 * imul r32, r32/m32
765 :     * Note: destination must be a register!
766 : george 545 *)
767 : blume 1185 fun multiply (e1, e2) =
768 : leunga 815 dstMustBeReg(fn (rd, rdOpnd) =>
769 :     let fun doit(i1 as I.Immed _, i2 as I.Immed _) =
770 :     (move(i1, rdOpnd);
771 :     mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=i2},an))
772 :     | doit(rm, i2 as I.Immed _) = doit(i2, rm)
773 :     | doit(imm as I.Immed(i), rm) =
774 :     mark(I.MUL3{dst=rd, src1=rm, src2=i},an)
775 :     | doit(r1 as I.Direct _, r2 as I.Direct _) =
776 :     (move(r1, rdOpnd);
777 :     mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=r2},an))
778 :     | doit(r1 as I.Direct _, rm) =
779 :     (move(r1, rdOpnd);
780 :     mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=rm},an))
781 :     | doit(rm, r as I.Direct _) = doit(r, rm)
782 :     | doit(rm1, rm2) =
783 : george 545 if equalRd rm2 then
784 :     let val tmpR = newReg()
785 :     val tmp = I.Direct tmpR
786 :     in move(rm1, tmp);
787 : leunga 815 mark(I.BINARY{binOp=I.IMULL, dst=tmp, src=rm2},an);
788 :     move(tmp, rdOpnd)
789 : george 545 end
790 :     else
791 : leunga 815 (move(rm1, rdOpnd);
792 :     mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=rm2},an)
793 : george 545 )
794 :     val (opnd1, opnd2) = (operand e1, operand e2)
795 : leunga 815 in doit(opnd1, opnd2)
796 : george 545 end
797 : leunga 815 )
798 : monnier 247
799 : blume 1185 fun multiply_notrap (e1, e2 as T.LI n') =
800 :     (case analyze n' of
801 :     (_, SOME (isneg, _, p)) => let
802 :     val r1 = expr e1
803 :     val o1 = I.Direct r1
804 :     in
805 :     if isneg then
806 :     emit (I.UNARY { unOp = I.NEGL, opnd = o1 })
807 :     else ();
808 :     shift (I.SHLL, T.REG (32, r1), p)
809 :     end
810 :     | _ => multiply (e1, e2))
811 :     | multiply_notrap (e1 as T.LI _, e2) = multiply_notrap (e2, e1)
812 :     | multiply_notrap (e1, e2) = multiply (e1, e2)
813 :    
814 : george 545 (* Emit a load instruction; makes sure that the destination
815 :     * is a register
816 :     *)
817 :     fun genLoad(mvOp, ea, mem) =
818 :     dstMustBeReg(fn (_, dst) =>
819 :     mark(I.MOVE{mvOp=mvOp, src=address(ea, mem), dst=dst},an))
820 :    
821 :     (* Generate a zero extended loads *)
822 :     fun load8(ea, mem) = genLoad(I.MOVZBL, ea, mem)
823 :     fun load16(ea, mem) = genLoad(I.MOVZWL, ea, mem)
824 :     fun load8s(ea, mem) = genLoad(I.MOVSBL, ea, mem)
825 :     fun load16s(ea, mem) = genLoad(I.MOVSWL, ea, mem)
826 :     fun load32(ea, mem) = genLoad(I.MOVL, ea, mem)
827 :    
828 :     (* Generate a sign extended loads *)
829 :    
830 :     (* Generate setcc instruction:
831 :     * semantics: MV(rd, COND(_, T.CMP(ty, cc, t1, t2), yes, no))
832 : leunga 583 * Bug, if eax is either t1 or t2 then problem will occur!!!
833 :     * Note that we have to use eax as the destination of the
834 :     * setcc because it only works on the registers
835 :     * %al, %bl, %cl, %dl and %[abcd]h. The last four registers
836 :     * are inaccessible in 32 bit mode.
837 : george 545 *)
838 :     fun setcc(ty, cc, t1, t2, yes, no) =
839 : leunga 583 let val (cc, yes, no) =
840 :     if yes > no then (cc, yes, no)
841 :     else (T.Basis.negateCond cc, no, yes)
842 : george 545 in (* Clear the destination first.
843 :     * This this because stupid SETcc
844 :     * only writes to the low order
845 :     * byte. That's Intel architecture, folks.
846 :     *)
847 : leunga 695 case (yes, no, cc) of
848 :     (1, 0, T.LT) =>
849 :     let val tmp = I.Direct(expr(T.SUB(32,t1,t2)))
850 :     in move(tmp, rdOpnd);
851 :     emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd})
852 :     end
853 :     | (1, 0, T.GT) =>
854 :     let val tmp = I.Direct(expr(T.SUB(32,t1,t2)))
855 :     in emit(I.UNARY{unOp=I.NOTL,opnd=tmp});
856 :     move(tmp, rdOpnd);
857 :     emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd})
858 :     end
859 :     | (1, 0, _) => (* normal case *)
860 : george 545 let val cc = cmp(true, ty, cc, t1, t2, [])
861 : leunga 583 in mark(I.SET{cond=cond cc, opnd=eax}, an);
862 : leunga 695 emit(I.BINARY{binOp=I.ANDL,src=I.Immed 255, dst=eax});
863 : leunga 583 move(eax, rdOpnd)
864 :     end
865 : leunga 695 | (C1, C2, _) =>
866 : george 545 (* general case;
867 : leunga 583 * from the Intel optimization guide p3-5
868 :     *)
869 : leunga 695 let val _ = zero eax;
870 :     val cc = cmp(true, ty, cc, t1, t2, [])
871 : leunga 583 in case C1-C2 of
872 :     D as (1 | 2 | 3 | 4 | 5 | 8 | 9) =>
873 :     let val (base,scale) =
874 :     case D of
875 :     1 => (NONE, 0)
876 :     | 2 => (NONE, 1)
877 :     | 3 => (SOME C.eax, 1)
878 :     | 4 => (NONE, 2)
879 :     | 5 => (SOME C.eax, 2)
880 :     | 8 => (NONE, 3)
881 :     | 9 => (SOME C.eax, 3)
882 :     val addr = I.Indexed{base=base,
883 :     index=C.eax,
884 :     scale=scale,
885 :     disp=I.Immed C2,
886 : george 545 mem=readonly}
887 : leunga 583 val tmpR = newReg()
888 :     val tmp = I.Direct tmpR
889 :     in emit(I.SET{cond=cond cc, opnd=eax});
890 :     mark(I.LEA{r32=tmpR, addr=addr}, an);
891 :     move(tmp, rdOpnd)
892 :     end
893 :     | D =>
894 :     (emit(I.SET{cond=cond(T.Basis.negateCond cc),
895 :     opnd=eax});
896 :     emit(I.UNARY{unOp=I.DECL, opnd=eax});
897 :     emit(I.BINARY{binOp=I.ANDL,
898 :     src=I.Immed D, dst=eax});
899 :     if C2 = 0 then
900 :     move(eax, rdOpnd)
901 :     else
902 :     let val tmpR = newReg()
903 :     val tmp = I.Direct tmpR
904 :     in mark(I.LEA{addr=
905 :     I.Displace{
906 :     base=C.eax,
907 :     disp=I.Immed C2,
908 :     mem=readonly},
909 :     r32=tmpR}, an);
910 :     move(tmp, rdOpnd)
911 :     end
912 :     )
913 :     end
914 : george 545 end (* setcc *)
915 :    
916 :     (* Generate cmovcc instruction.
917 :     * on Pentium Pro and Pentium II only
918 :     *)
919 :     fun cmovcc(ty, cc, t1, t2, yes, no) =
920 :     let fun genCmov(dstR, _) =
921 :     let val _ = doExpr(no, dstR, []) (* false branch *)
922 :     val cc = cmp(true, ty, cc, t1, t2, []) (* compare *)
923 : leunga 1127 in mark(I.CMOV{cond=cond cc, src=regOrMem(operand yes),
924 :     dst=dstR}, an)
925 : george 545 end
926 :     in dstMustBeReg genCmov
927 :     end
928 :    
929 :     fun unknownExp exp = doExpr(Gen.compileRexp exp, rd, an)
930 : monnier 247
931 : leunga 606 (* Add n to rd *)
932 :     fun addN n =
933 :     let val n = operand n
934 :     val src = if isMemReg rd then immedOrReg n else n
935 :     in mark(I.BINARY{binOp=I.ADDL, src=src, dst=rdOpnd}, an) end
936 :    
937 : george 545 (* Generate addition *)
938 :     fun addition(e1, e2) =
939 : leunga 606 case e1 of
940 : george 889 T.REG(_,rs) => if CB.sameColor(rs,rd) then addN e2
941 : leunga 744 else addition1(e1,e2)
942 : leunga 606 | _ => addition1(e1,e2)
943 :     and addition1(e1, e2) =
944 :     case e2 of
945 : george 889 T.REG(_,rs) => if CB.sameColor(rs,rd) then addN e1
946 : leunga 744 else addition2(e1,e2)
947 : leunga 606 | _ => addition2(e1,e2)
948 :     and addition2(e1,e2) =
949 : george 545 (dstMustBeReg(fn (dstR, _) =>
950 :     mark(I.LEA{r32=dstR, addr=address(exp, readonly)}, an))
951 :     handle EA => binaryComm(I.ADDL, e1, e2))
952 : monnier 247
953 :    
954 : george 545 in case exp of
955 :     T.REG(_,rs) =>
956 :     if isMemReg rs andalso isMemReg rd then
957 :     let val tmp = I.Direct(newReg())
958 : leunga 731 in move'(I.MemReg rs, tmp, an);
959 : george 545 move'(tmp, rdOpnd, [])
960 :     end
961 :     else move'(IntReg rs, rdOpnd, an)
962 : george 761 | T.LI z => let
963 :     val n = toInt32 z
964 :     in
965 :     if n=0 then
966 :     (* As per Fermin's request, special optimization for rd := 0.
967 :     * Currently we don't bother with the size.
968 :     *)
969 :     if isMemReg rd then move'(I.Immed 0, rdOpnd, an)
970 :     else mark(I.BINARY{binOp=I.XORL, src=rdOpnd, dst=rdOpnd}, an)
971 :     else
972 :     move'(I.Immed(n), rdOpnd, an)
973 :     end
974 : leunga 775 | (T.CONST _ | T.LABEL _) =>
975 :     move'(I.ImmedLabel exp, rdOpnd, an)
976 :     | T.LABEXP le => move'(I.ImmedLabel le, rdOpnd, an)
977 : monnier 247
978 : george 545 (* 32-bit addition *)
979 : george 761 | T.ADD(32, e1, e2 as T.LI n) => let
980 :     val n = toInt32 n
981 :     in
982 :     case n
983 :     of 1 => unary(I.INCL, e1)
984 :     | ~1 => unary(I.DECL, e1)
985 :     | _ => addition(e1, e2)
986 :     end
987 :     | T.ADD(32, e1 as T.LI n, e2) => let
988 :     val n = toInt32 n
989 :     in
990 :     case n
991 :     of 1 => unary(I.INCL, e2)
992 :     | ~1 => unary(I.DECL, e2)
993 :     | _ => addition(e1, e2)
994 :     end
995 : george 545 | T.ADD(32, e1, e2) => addition(e1, e2)
996 : monnier 247
997 : leunga 695 (* 32-bit addition but set the flag!
998 :     * This is a stupid hack for now.
999 :     *)
1000 : george 761 | T.ADD(0, e, e1 as T.LI n) => let
1001 :     val n = T.I.toInt(32, n)
1002 :     in
1003 :     if n=1 then unary(I.INCL, e)
1004 :     else if n = ~1 then unary(I.DECL, e)
1005 :     else binaryComm(I.ADDL, e, e1)
1006 :     end
1007 :     | T.ADD(0, e1 as T.LI n, e) => let
1008 :     val n = T.I.toInt(32, n)
1009 :     in
1010 :     if n=1 then unary(I.INCL, e)
1011 :     else if n = ~1 then unary(I.DECL, e)
1012 :     else binaryComm(I.ADDL, e1, e)
1013 :     end
1014 :     | T.ADD(0, e1, e2) => binaryComm(I.ADDL, e1, e2)
1015 :    
1016 : george 545 (* 32-bit subtraction *)
1017 : george 761 | T.SUB(32, e1, e2 as T.LI n) => let
1018 :     val n = toInt32 n
1019 :     in
1020 :     case n
1021 :     of 0 => doExpr(e1, rd, an)
1022 :     | 1 => unary(I.DECL, e1)
1023 :     | ~1 => unary(I.INCL, e1)
1024 :     | _ => binary(I.SUBL, e1, e2)
1025 :     end
1026 :     | T.SUB(32, e1 as T.LI n, e2) =>
1027 :     if T.I.isZero n then unary(I.NEGL, e2)
1028 :     else binary(I.SUBL, e1, e2)
1029 : george 545 | T.SUB(32, e1, e2) => binary(I.SUBL, e1, e2)
1030 : monnier 247
1031 : george 545 | T.MULU(32, x, y) => uMultiply(x, y)
1032 :     | T.DIVU(32, x, y) => divide(false, false, x, y)
1033 : blume 1183 | T.REMU(32, x, y) => rem(false, x, y)
1034 : monnier 247
1035 : blume 1185 | T.MULS(32, x, y) => multiply_notrap (x, y)
1036 : blume 1181 | T.DIVS(T.DIV_TO_ZERO, 32, x, y) => divide(true, false, x, y)
1037 : blume 1185 | T.DIVS(T.DIV_TO_NEGINF, 32, x, y) => divinf (false, x, y)
1038 : blume 1183 | T.REMS(T.DIV_TO_ZERO, 32, x, y) => rem(true, x, y)
1039 : blume 1185 | T.REMS(T.DIV_TO_NEGINF, 32, x, y) => reminf (x, y)
1040 : monnier 247
1041 : george 545 | T.ADDT(32, x, y) => (binaryComm(I.ADDL, x, y); trap())
1042 :     | T.SUBT(32, x, y) => (binary(I.SUBL, x, y); trap())
1043 : blume 1185 | T.MULT(32, x, y) => (multiply (x, y); trap ())
1044 : blume 1181 | T.DIVT(T.DIV_TO_ZERO, 32, x, y) => divide(true, true, x, y)
1045 : blume 1185 | T.DIVT(T.DIV_TO_NEGINF, 32, x, y) => divinf (true, x, y)
1046 : monnier 247
1047 : george 545 | T.ANDB(32, x, y) => binaryComm(I.ANDL, x, y)
1048 :     | T.ORB(32, x, y) => binaryComm(I.ORL, x, y)
1049 :     | T.XORB(32, x, y) => binaryComm(I.XORL, x, y)
1050 :     | T.NOTB(32, x) => unary(I.NOTL, x)
1051 : monnier 247
1052 : george 545 | T.SRA(32, x, y) => shift(I.SARL, x, y)
1053 :     | T.SRL(32, x, y) => shift(I.SHRL, x, y)
1054 :     | T.SLL(32, x, y) => shift(I.SHLL, x, y)
1055 : monnier 247
1056 : george 545 | T.LOAD(8, ea, mem) => load8(ea, mem)
1057 :     | T.LOAD(16, ea, mem) => load16(ea, mem)
1058 :     | T.LOAD(32, ea, mem) => load32(ea, mem)
1059 : monnier 498
1060 : leunga 776 | T.SX(32,8,T.LOAD(8,ea,mem)) => load8s(ea, mem)
1061 :     | T.SX(32,16,T.LOAD(16,ea,mem)) => load16s(ea, mem)
1062 :     | T.ZX(32,8,T.LOAD(8,ea,mem)) => load8(ea, mem)
1063 : leunga 779 | T.ZX(32,16,T.LOAD(16,ea,mem)) => load16(ea, mem)
1064 : leunga 776
1065 : leunga 1127 | T.COND(32, T.CMP(ty, cc, t1, t2), y as T.LI yes, n as T.LI no) =>
1066 :     (case !arch of (* PentiumPro and higher has CMOVcc *)
1067 :     Pentium => setcc(ty, cc, t1, t2, toInt32 yes, toInt32 no)
1068 :     | _ => cmovcc(ty, cc, t1, t2, y, n)
1069 :     )
1070 : george 545 | T.COND(32, T.CMP(ty, cc, t1, t2), yes, no) =>
1071 :     (case !arch of (* PentiumPro and higher has CMOVcc *)
1072 :     Pentium => unknownExp exp
1073 :     | _ => cmovcc(ty, cc, t1, t2, yes, no)
1074 :     )
1075 :     | T.LET(s,e) => (doStmt s; doExpr(e, rd, an))
1076 :     | T.MARK(e, A.MARKREG f) => (f rd; doExpr(e, rd, an))
1077 :     | T.MARK(e, a) => doExpr(e, rd, a::an)
1078 :     | T.PRED(e,c) => doExpr(e, rd, A.CTRLUSE c::an)
1079 : george 555 | T.REXT e =>
1080 :     ExtensionComp.compileRext (reducer()) {e=e, rd=rd, an=an}
1081 : george 545 (* simplify and try again *)
1082 :     | exp => unknownExp exp
1083 :     end (* doExpr *)
1084 : monnier 247
1085 : george 545 (* generate an expression and return its result register
1086 :     * If rewritePseudo is on, the result is guaranteed to be in a
1087 :     * non memReg register
1088 :     *)
1089 :     and expr(exp as T.REG(_, rd)) =
1090 :     if isMemReg rd then genExpr exp else rd
1091 :     | expr exp = genExpr exp
1092 : monnier 247
1093 : george 545 and genExpr exp =
1094 :     let val rd = newReg() in doExpr(exp, rd, []); rd end
1095 : monnier 247
1096 : george 545 (* Compare an expression with zero.
1097 :     * On the x86, TEST is superior to AND for doing the same thing,
1098 :     * since it doesn't need to write out the result in a register.
1099 :     *)
1100 : leunga 695 and cmpWithZero(cc as (T.EQ | T.NE), e as T.ANDB(ty, a, b), an) =
1101 : george 545 (case ty of
1102 : leunga 695 8 => test(I.TESTB, a, b, an)
1103 :     | 16 => test(I.TESTW, a, b, an)
1104 :     | 32 => test(I.TESTL, a, b, an)
1105 :     | _ => doExpr(e, newReg(), an);
1106 :     cc)
1107 :     | cmpWithZero(cc, e, an) =
1108 :     let val e =
1109 :     case e of (* hack to disable the lea optimization XXX *)
1110 :     T.ADD(_, a, b) => T.ADD(0, a, b)
1111 :     | e => e
1112 :     in doExpr(e, newReg(), an); cc end
1113 : monnier 247
1114 : george 545 (* Emit a test.
1115 :     * The available modes are
1116 :     * r/m, r
1117 :     * r/m, imm
1118 :     * On selecting the right instruction: TESTL/TESTW/TESTB.
1119 :     * When anding an operand with a constant
1120 :     * that fits within 8 (or 16) bits, it is possible to use TESTB,
1121 :     * (or TESTW) instead of TESTL. Because x86 is little endian,
1122 :     * this works for memory operands too. However, with TESTB, it is
1123 :     * not possible to use registers other than
1124 :     * AL, CL, BL, DL, and AH, CH, BH, DH. So, the best way is to
1125 :     * perform register allocation first, and if the operand registers
1126 :     * are one of EAX, ECX, EBX, or EDX, replace the TESTL instruction
1127 :     * by TESTB.
1128 :     *)
1129 : leunga 695 and test(testopcode, a, b, an) =
1130 : george 545 let val (_, opnd1, opnd2) = commuteComparison(T.EQ, true, a, b)
1131 :     (* translate r, r/m => r/m, r *)
1132 :     val (opnd1, opnd2) =
1133 :     if isMemOpnd opnd2 then (opnd2, opnd1) else (opnd1, opnd2)
1134 : leunga 695 in mark(testopcode{lsrc=opnd1, rsrc=opnd2}, an)
1135 : george 545 end
1136 : monnier 247
1137 : leunga 815 (* %eflags <- src *)
1138 :     and moveToEflags src =
1139 : george 889 if CB.sameColor(src, C.eflags) then ()
1140 : leunga 815 else (move(I.Direct src, eax); emit(I.LAHF))
1141 :    
1142 :     (* dst <- %eflags *)
1143 :     and moveFromEflags dst =
1144 : george 889 if CB.sameColor(dst, C.eflags) then ()
1145 : leunga 815 else (emit(I.SAHF); move(eax, I.Direct dst))
1146 :    
1147 : george 545 (* generate a condition code expression
1148 : leunga 744 * The zero is for setting the condition code!
1149 :     * I have no idea why this is used.
1150 :     *)
1151 :     and doCCexpr(T.CMP(ty, cc, t1, t2), rd, an) =
1152 : leunga 815 (cmp(false, ty, cc, t1, t2, an);
1153 :     moveFromEflags rd
1154 :     )
1155 :     | doCCexpr(T.CC(cond,rs), rd, an) =
1156 : george 889 if CB.sameColor(rs,C.eflags) orelse CB.sameColor(rd,C.eflags) then
1157 : leunga 815 (moveToEflags rs; moveFromEflags rd)
1158 : leunga 744 else
1159 : leunga 815 move'(I.Direct rs, I.Direct rd, an)
1160 : george 545 | doCCexpr(T.CCMARK(e,A.MARKREG f),rd,an) = (f rd; doCCexpr(e,rd,an))
1161 :     | doCCexpr(T.CCMARK(e,a), rd, an) = doCCexpr(e,rd,a::an)
1162 :     | doCCexpr(T.CCEXT e, cd, an) =
1163 : george 555 ExtensionComp.compileCCext (reducer()) {e=e, ccd=cd, an=an}
1164 : george 545 | doCCexpr _ = error "doCCexpr"
1165 : monnier 247
1166 : george 545 and ccExpr e = error "ccExpr"
1167 : monnier 247
1168 : george 545 (* generate a comparison and sets the condition code;
1169 :     * return the actual cc used. If the flag swapable is true,
1170 :     * we can also reorder the operands.
1171 :     *)
1172 :     and cmp(swapable, ty, cc, t1, t2, an) =
1173 : leunga 695 (* == and <> can be always be reordered *)
1174 :     let val swapable = swapable orelse cc = T.EQ orelse cc = T.NE
1175 :     in (* Sometimes the comparison is not necessary because
1176 :     * the bits are already set!
1177 :     *)
1178 :     if isZero t1 andalso setZeroBit2 t2 then
1179 :     if swapable then
1180 :     cmpWithZero(T.Basis.swapCond cc, t2, an)
1181 :     else (* can't reorder the comparison! *)
1182 :     genCmp(ty, false, cc, t1, t2, an)
1183 :     else if isZero t2 andalso setZeroBit2 t1 then
1184 :     cmpWithZero(cc, t1, an)
1185 :     else genCmp(ty, swapable, cc, t1, t2, an)
1186 :     end
1187 : monnier 247
1188 : george 545 (* Give a and b which are the operands to a comparison (or test)
1189 :     * Return the appropriate condition code and operands.
1190 :     * The available modes are:
1191 :     * r/m, imm
1192 :     * r/m, r
1193 :     * r, r/m
1194 :     *)
1195 :     and commuteComparison(cc, swapable, a, b) =
1196 :     let val (opnd1, opnd2) = (operand a, operand b)
1197 :     in (* Try to fold in the operands whenever possible *)
1198 :     case (isImmediate opnd1, isImmediate opnd2) of
1199 :     (true, true) => (cc, moveToReg opnd1, opnd2)
1200 :     | (true, false) =>
1201 :     if swapable then (T.Basis.swapCond cc, opnd2, opnd1)
1202 :     else (cc, moveToReg opnd1, opnd2)
1203 :     | (false, true) => (cc, opnd1, opnd2)
1204 :     | (false, false) =>
1205 :     (case (opnd1, opnd2) of
1206 :     (_, I.Direct _) => (cc, opnd1, opnd2)
1207 :     | (I.Direct _, _) => (cc, opnd1, opnd2)
1208 :     | (_, _) => (cc, moveToReg opnd1, opnd2)
1209 :     )
1210 :     end
1211 :    
1212 :     (* generate a real comparison; return the real cc used *)
1213 :     and genCmp(ty, swapable, cc, a, b, an) =
1214 :     let val (cc, opnd1, opnd2) = commuteComparison(cc, swapable, a, b)
1215 :     in mark(I.CMPL{lsrc=opnd1, rsrc=opnd2}, an); cc
1216 :     end
1217 : monnier 247
1218 : george 545 (* generate code for jumps *)
1219 : leunga 775 and jmp(lexp as T.LABEL lab, labs, an) =
1220 : george 545 mark(I.JMP(I.ImmedLabel lexp, [lab]), an)
1221 : leunga 775 | jmp(T.LABEXP le, labs, an) = mark(I.JMP(I.ImmedLabel le, labs), an)
1222 :     | jmp(ea, labs, an) = mark(I.JMP(operand ea, labs), an)
1223 : george 545
1224 :     (* convert mlrisc to cellset:
1225 :     *)
1226 :     and cellset mlrisc =
1227 : jhr 900 let val addCCReg = CB.CellSet.add
1228 : george 545 fun g([],acc) = acc
1229 :     | g(T.GPR(T.REG(_,r))::regs,acc) = g(regs,C.addReg(r,acc))
1230 :     | g(T.FPR(T.FREG(_,f))::regs,acc) = g(regs,C.addFreg(f,acc))
1231 :     | g(T.CCR(T.CC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc))
1232 :     | g(T.CCR(T.FCC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc))
1233 :     | g(_::regs, acc) = g(regs, acc)
1234 :     in g(mlrisc, C.empty) end
1235 :    
1236 :     (* generate code for calls *)
1237 : blume 839 and call(ea, flow, def, use, mem, cutsTo, an, pops) =
1238 : leunga 815 let fun return(set, []) = set
1239 :     | return(set, a::an) =
1240 :     case #peek A.RETURN_ARG a of
1241 : jhr 900 SOME r => return(CB.CellSet.add(r, set), an)
1242 : leunga 815 | NONE => return(set, an)
1243 : blume 839 in
1244 :     mark(I.CALL{opnd=operand ea,defs=cellset(def),uses=cellset(use),
1245 :     return=return(C.empty,an),cutsTo=cutsTo,mem=mem,
1246 :     pops=pops},an)
1247 : leunga 815 end
1248 : george 545
1249 : leunga 815 (* generate code for integer stores; first move data to %eax
1250 :     * This is mainly because we can't allocate to registers like
1251 :     * ah, dl, dx etc.
1252 :     *)
1253 :     and genStore(mvOp, ea, d, mem, an) =
1254 :     let val src =
1255 : george 545 case immedOrReg(operand d) of
1256 :     src as I.Direct r =>
1257 : george 889 if CB.sameColor(r,C.eax)
1258 : leunga 744 then src else (move(src, eax); eax)
1259 : george 545 | src => src
1260 : leunga 815 in mark(I.MOVE{mvOp=mvOp, src=src, dst=address(ea,mem)},an)
1261 : george 545 end
1262 : leunga 815
1263 :     (* generate code for 8-bit integer stores *)
1264 :     (* movb has to use %eax as source. Stupid x86! *)
1265 :     and store8(ea, d, mem, an) = genStore(I.MOVB, ea, d, mem, an)
1266 : blume 818 and store16(ea, d, mem, an) =
1267 :     mark(I.MOVE{mvOp=I.MOVW, src=immedOrReg(operand d), dst=address(ea, mem)}, an)
1268 : george 545 and store32(ea, d, mem, an) =
1269 :     move'(immedOrReg(operand d), address(ea, mem), an)
1270 :    
1271 :     (* generate code for branching *)
1272 :     and branch(T.CMP(ty, cc, t1, t2), lab, an) =
1273 :     (* allow reordering of operands *)
1274 :     let val cc = cmp(true, ty, cc, t1, t2, [])
1275 :     in mark(I.JCC{cond=cond cc, opnd=immedLabel lab}, an) end
1276 :     | branch(T.FCMP(fty, fcc, t1, t2), lab, an) =
1277 :     fbranch(fty, fcc, t1, t2, lab, an)
1278 :     | branch(ccexp, lab, an) =
1279 : leunga 744 (doCCexpr(ccexp, C.eflags, []);
1280 : george 545 mark(I.JCC{cond=cond(Gen.condOf ccexp), opnd=immedLabel lab}, an)
1281 :     )
1282 :    
1283 :     (* generate code for floating point compare and branch *)
1284 :     and fbranch(fty, fcc, t1, t2, lab, an) =
1285 : leunga 1156 let fun j cc = mark(I.JCC{cond=cc, opnd=immedLabel lab},an)
1286 :     in fbranching(fty, fcc, t1, t2, j)
1287 :     end
1288 :    
1289 :     and fbranching(fty, fcc, t1, t2, j) =
1290 : leunga 731 let fun ignoreOrder (T.FREG _) = true
1291 :     | ignoreOrder (T.FLOAD _) = true
1292 :     | ignoreOrder (T.FMARK(e,_)) = ignoreOrder e
1293 :     | ignoreOrder _ = false
1294 :    
1295 :     fun compare'() = (* Sethi-Ullman style *)
1296 :     (if ignoreOrder t1 orelse ignoreOrder t2 then
1297 :     (reduceFexp(fty, t2, []); reduceFexp(fty, t1, []))
1298 :     else (reduceFexp(fty, t1, []); reduceFexp(fty, t2, []);
1299 :     emit(I.FXCH{opnd=C.ST(1)}));
1300 :     emit(I.FUCOMPP);
1301 :     fcc
1302 :     )
1303 :    
1304 :     fun compare''() =
1305 :     (* direct style *)
1306 :     (* Try to make lsrc the memory operand *)
1307 :     let val lsrc = foperand(fty, t1)
1308 :     val rsrc = foperand(fty, t2)
1309 :     val fsize = fsize fty
1310 :     fun cmp(lsrc, rsrc, fcc) =
1311 : leunga 1156 let val i = !arch <> Pentium
1312 :     in emit(I.FCMP{i=i,fsize=fsize,lsrc=lsrc,rsrc=rsrc});
1313 :     fcc
1314 :     end
1315 : leunga 731 in case (lsrc, rsrc) of
1316 :     (I.FPR _, I.FPR _) => cmp(lsrc, rsrc, fcc)
1317 :     | (I.FPR _, mem) => cmp(mem,lsrc,T.Basis.swapFcond fcc)
1318 :     | (mem, I.FPR _) => cmp(lsrc, rsrc, fcc)
1319 :     | (lsrc, rsrc) => (* can't be both memory! *)
1320 :     let val ftmpR = newFreg()
1321 :     val ftmp = I.FPR ftmpR
1322 :     in emit(I.FMOVE{fsize=fsize,src=rsrc,dst=ftmp});
1323 :     cmp(lsrc, ftmp, fcc)
1324 :     end
1325 :     end
1326 :    
1327 :     fun compare() =
1328 :     if enableFastFPMode andalso !fast_floating_point
1329 :     then compare''() else compare'()
1330 :    
1331 : george 545 fun andil i = emit(I.BINARY{binOp=I.ANDL,src=I.Immed(i),dst=eax})
1332 : leunga 585 fun testil i = emit(I.TESTL{lsrc=eax,rsrc=I.Immed(i)})
1333 : george 545 fun xoril i = emit(I.BINARY{binOp=I.XORL,src=I.Immed(i),dst=eax})
1334 :     fun cmpil i = emit(I.CMPL{rsrc=I.Immed(i), lsrc=eax})
1335 :     fun sahf() = emit(I.SAHF)
1336 : leunga 731 fun branch(fcc) =
1337 : george 545 case fcc
1338 : leunga 1156 of T.== => (andil 0x4400; xoril 0x4000; j(I.EQ))
1339 :     | T.?<> => (andil 0x4400; xoril 0x4000; j(I.NE))
1340 :     | T.? => (sahf(); j(I.P))
1341 :     | T.<=> => (sahf(); j(I.NP))
1342 :     | T.> => (testil 0x4500; j(I.EQ))
1343 :     | T.?<= => (testil 0x4500; j(I.NE))
1344 :     | T.>= => (testil 0x500; j(I.EQ))
1345 :     | T.?< => (testil 0x500; j(I.NE))
1346 :     | T.< => (andil 0x4500; cmpil 0x100; j(I.EQ))
1347 :     | T.?>= => (andil 0x4500; cmpil 0x100; j(I.NE))
1348 :     | T.<= => (andil 0x4100; cmpil 0x100; j(I.EQ);
1349 :     cmpil 0x4000; j(I.EQ))
1350 :     | T.?> => (sahf(); j(I.P); testil 0x4100; j(I.EQ))
1351 :     | T.<> => (testil 0x4400; j(I.EQ))
1352 :     | T.?= => (testil 0x4400; j(I.NE))
1353 : jhr 1119 | _ => error(concat[
1354 :     "fbranch(", T.Basis.fcondToString fcc, ")"
1355 :     ])
1356 : george 545 (*esac*)
1357 : leunga 1156
1358 :     (*
1359 :     * P Z C
1360 :     * x < y 0 0 1
1361 :     * x > y 0 0 0
1362 :     * x = y 0 1 0
1363 :     * unordered 1 1 1
1364 :     * When it's unordered, all three flags, P, Z, C are set.
1365 :     *)
1366 :    
1367 :     fun fast_branch(fcc) =
1368 :     case fcc
1369 :     of T.== => orderedOnly(I.EQ)
1370 :     | T.?<> => (j(I.P); j(I.NE))
1371 :     | T.? => j(I.P)
1372 :     | T.<=> => j(I.NP)
1373 :     | T.> => orderedOnly(I.A)
1374 :     | T.?<= => j(I.BE)
1375 :     | T.>= => orderedOnly(I.AE)
1376 :     | T.?< => j(I.B)
1377 :     | T.< => orderedOnly(I.B)
1378 :     | T.?>= => (j(I.P); j(I.AE))
1379 :     | T.<= => orderedOnly(I.BE)
1380 :     | T.?> => (j(I.P); j(I.A))
1381 :     | T.<> => orderedOnly(I.NE)
1382 :     | T.?= => j(I.EQ)
1383 :     | _ => error(concat[
1384 :     "fbranch(", T.Basis.fcondToString fcc, ")"
1385 :     ])
1386 :     (*esac*)
1387 :     and orderedOnly fcc =
1388 :     let val label = Label.anon()
1389 :     in emit(I.JCC{cond=I.P, opnd=immedLabel label});
1390 :     j fcc;
1391 :     defineLabel label
1392 :     end
1393 :    
1394 : leunga 731 val fcc = compare()
1395 : leunga 1156 in if !arch <> Pentium andalso
1396 :     (enableFastFPMode andalso !fast_floating_point) then
1397 :     fast_branch(fcc)
1398 :     else
1399 :     (emit I.FNSTSW;
1400 :     branch(fcc)
1401 :     )
1402 : monnier 411 end
1403 : monnier 247
1404 : leunga 731 (*========================================================
1405 :     * Floating point code generation starts here.
1406 :     * Some generic fp routines first.
1407 :     *========================================================*)
1408 :    
1409 :     (* Can this tree be folded into the src operand of a floating point
1410 :     * operations?
1411 :     *)
1412 :     and foldableFexp(T.FREG _) = true
1413 :     | foldableFexp(T.FLOAD _) = true
1414 :     | foldableFexp(T.CVTI2F(_, (16 | 32), _)) = true
1415 :     | foldableFexp(T.CVTF2F(_, _, t)) = foldableFexp t
1416 :     | foldableFexp(T.FMARK(t, _)) = foldableFexp t
1417 :     | foldableFexp _ = false
1418 :    
1419 :     (* Move integer e of size ty into a memory location.
1420 :     * Returns a quadruple:
1421 :     * (INTEGER,return ty,effect address of memory location,cleanup code)
1422 :     *)
1423 :     and convertIntToFloat(ty, e) =
1424 :     let val opnd = operand e
1425 :     in if isMemOpnd opnd andalso (ty = 16 orelse ty = 32)
1426 :     then (INTEGER, ty, opnd, [])
1427 :     else
1428 : leunga 815 let val {instrs, tempMem, cleanup} =
1429 :     cvti2f{ty=ty, src=opnd, an=getAnnotations()}
1430 : leunga 731 in emits instrs;
1431 :     (INTEGER, 32, tempMem, cleanup)
1432 :     end
1433 :     end
1434 :    
1435 :     (*========================================================
1436 :     * Sethi-Ullman based floating point code generation as
1437 :     * implemented by Lal
1438 :     *========================================================*)
1439 :    
1440 : george 545 and fld(32, opnd) = I.FLDS opnd
1441 :     | fld(64, opnd) = I.FLDL opnd
1442 : george 555 | fld(80, opnd) = I.FLDT opnd
1443 : george 545 | fld _ = error "fld"
1444 :    
1445 : leunga 565 and fild(16, opnd) = I.FILD opnd
1446 :     | fild(32, opnd) = I.FILDL opnd
1447 :     | fild(64, opnd) = I.FILDLL opnd
1448 :     | fild _ = error "fild"
1449 :    
1450 :     and fxld(INTEGER, ty, opnd) = fild(ty, opnd)
1451 :     | fxld(REAL, fty, opnd) = fld(fty, opnd)
1452 :    
1453 : george 545 and fstp(32, opnd) = I.FSTPS opnd
1454 :     | fstp(64, opnd) = I.FSTPL opnd
1455 : george 555 | fstp(80, opnd) = I.FSTPT opnd
1456 : george 545 | fstp _ = error "fstp"
1457 :    
1458 :     (* generate code for floating point stores *)
1459 : leunga 731 and fstore'(fty, ea, d, mem, an) =
1460 : george 545 (case d of
1461 :     T.FREG(fty, fs) => emit(fld(fty, I.FDirect fs))
1462 :     | _ => reduceFexp(fty, d, []);
1463 :     mark(fstp(fty, address(ea, mem)), an)
1464 :     )
1465 :    
1466 : leunga 731 (* generate code for floating point loads *)
1467 :     and fload'(fty, ea, mem, fd, an) =
1468 :     let val ea = address(ea, mem)
1469 :     in mark(fld(fty, ea), an);
1470 : george 889 if CB.sameColor(fd,ST0) then ()
1471 : leunga 744 else emit(fstp(fty, I.FDirect fd))
1472 : leunga 731 end
1473 :    
1474 :     and fexpr' e = (reduceFexp(64, e, []); C.ST(0))
1475 : george 545
1476 :     (* generate floating point expression and put the result in fd *)
1477 : leunga 731 and doFexpr'(fty, T.FREG(_, fs), fd, an) =
1478 : george 889 (if CB.sameColor(fs,fd) then ()
1479 : george 1009 else mark'(I.COPY{k=CB.FP, sz=64, dst=[fd], src=[fs], tmp=NONE}, an)
1480 : george 545 )
1481 : leunga 731 | doFexpr'(_, T.FLOAD(fty, ea, mem), fd, an) =
1482 :     fload'(fty, ea, mem, fd, an)
1483 :     | doFexpr'(fty, T.FEXT fexp, fd, an) =
1484 :     (ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an};
1485 : george 889 if CB.sameColor(fd,ST0) then () else emit(fstp(fty, I.FDirect fd))
1486 : leunga 731 )
1487 :     | doFexpr'(fty, e, fd, an) =
1488 : george 545 (reduceFexp(fty, e, []);
1489 : george 889 if CB.sameColor(fd,ST0) then ()
1490 : leunga 744 else mark(fstp(fty, I.FDirect fd), an)
1491 : george 545 )
1492 :    
1493 :     (*
1494 :     * Generate floating point expression using Sethi-Ullman's scheme:
1495 :     * This function evaluates a floating point expression,
1496 :     * and put result in %ST(0).
1497 :     *)
1498 :     and reduceFexp(fty, fexp, an) =
1499 : george 555 let val ST = I.ST(C.ST 0)
1500 :     val ST1 = I.ST(C.ST 1)
1501 : leunga 593 val cleanupCode = ref [] : I.instruction list ref
1502 : george 545
1503 : leunga 565 datatype su_tree =
1504 :     LEAF of int * T.fexp * ans
1505 :     | BINARY of int * T.fty * fbinop * su_tree * su_tree * ans
1506 :     | UNARY of int * T.fty * I.funOp * su_tree * ans
1507 :     and fbinop = FADD | FSUB | FMUL | FDIV
1508 :     | FIADD | FISUB | FIMUL | FIDIV
1509 :     withtype ans = Annotations.annotations
1510 : monnier 247
1511 : leunga 565 fun label(LEAF(n, _, _)) = n
1512 :     | label(BINARY(n, _, _, _, _, _)) = n
1513 :     | label(UNARY(n, _, _, _, _)) = n
1514 : george 545
1515 : leunga 565 fun annotate(LEAF(n, x, an), a) = LEAF(n,x,a::an)
1516 :     | annotate(BINARY(n,t,b,x,y,an), a) = BINARY(n,t,b,x,y,a::an)
1517 :     | annotate(UNARY(n,t,u,x,an), a) = UNARY(n,t,u,x,a::an)
1518 : george 545
1519 : leunga 565 (* Generate expression tree with sethi-ullman numbers *)
1520 :     fun su(e as T.FREG _) = LEAF(1, e, [])
1521 :     | su(e as T.FLOAD _) = LEAF(1, e, [])
1522 :     | su(e as T.CVTI2F _) = LEAF(1, e, [])
1523 :     | su(T.CVTF2F(_, _, t)) = su t
1524 :     | su(T.FMARK(t, a)) = annotate(su t, a)
1525 :     | su(T.FABS(fty, t)) = suUnary(fty, I.FABS, t)
1526 :     | su(T.FNEG(fty, t)) = suUnary(fty, I.FCHS, t)
1527 :     | su(T.FSQRT(fty, t)) = suUnary(fty, I.FSQRT, t)
1528 :     | su(T.FADD(fty, t1, t2)) = suComBinary(fty,FADD,FIADD,t1,t2)
1529 :     | su(T.FMUL(fty, t1, t2)) = suComBinary(fty,FMUL,FIMUL,t1,t2)
1530 :     | su(T.FSUB(fty, t1, t2)) = suBinary(fty,FSUB,FISUB,t1,t2)
1531 :     | su(T.FDIV(fty, t1, t2)) = suBinary(fty,FDIV,FIDIV,t1,t2)
1532 :     | su _ = error "su"
1533 :    
1534 :     (* Try to fold the the memory operand or integer conversion *)
1535 :     and suFold(e as T.FREG _) = (LEAF(0, e, []), false)
1536 :     | suFold(e as T.FLOAD _) = (LEAF(0, e, []), false)
1537 :     | suFold(e as T.CVTI2F(_,(16 | 32),_)) = (LEAF(0, e, []), true)
1538 :     | suFold(T.CVTF2F(_, _, t)) = suFold t
1539 :     | suFold(T.FMARK(t, a)) =
1540 :     let val (t, integer) = suFold t
1541 :     in (annotate(t, a), integer) end
1542 :     | suFold e = (su e, false)
1543 :    
1544 :     (* Form unary tree *)
1545 :     and suUnary(fty, funary, t) =
1546 :     let val t = su t
1547 :     in UNARY(label t, fty, funary, t, [])
1548 : george 545 end
1549 : leunga 565
1550 :     (* Form binary tree *)
1551 :     and suBinary(fty, binop, ibinop, t1, t2) =
1552 :     let val t1 = su t1
1553 :     val (t2, integer) = suFold t2
1554 :     val n1 = label t1
1555 :     val n2 = label t2
1556 :     val n = if n1=n2 then n1+1 else Int.max(n1,n2)
1557 :     val myOp = if integer then ibinop else binop
1558 :     in BINARY(n, fty, myOp, t1, t2, [])
1559 : george 545 end
1560 : george 555
1561 : leunga 565 (* Try to fold in the operand if possible.
1562 :     * This only applies to commutative operations.
1563 :     *)
1564 :     and suComBinary(fty, binop, ibinop, t1, t2) =
1565 : leunga 731 let val (t1, t2) = if foldableFexp t2
1566 :     then (t1, t2) else (t2, t1)
1567 : leunga 565 in suBinary(fty, binop, ibinop, t1, t2) end
1568 :    
1569 :     and sameTree(LEAF(_, T.FREG(t1,f1), []),
1570 : leunga 744 LEAF(_, T.FREG(t2,f2), [])) =
1571 : george 889 t1 = t2 andalso CB.sameColor(f1,f2)
1572 : leunga 565 | sameTree _ = false
1573 :    
1574 :     (* Traverse tree and generate code *)
1575 :     fun gencode(LEAF(_, t, an)) = mark(fxld(leafEA t), an)
1576 :     | gencode(BINARY(_, _, binop, x, t2 as LEAF(0, y, a1), a2)) =
1577 :     let val _ = gencode x
1578 :     val (_, fty, src) = leafEA y
1579 :     fun gen(code) = mark(code, a1 @ a2)
1580 :     fun binary(oper32, oper64) =
1581 :     if sameTree(x, t2) then
1582 :     gen(I.FBINARY{binOp=oper64, src=ST, dst=ST})
1583 : george 555 else
1584 :     let val oper =
1585 : leunga 565 if isMemOpnd src then
1586 :     case fty of
1587 :     32 => oper32
1588 :     | 64 => oper64
1589 :     | _ => error "gencode: BINARY"
1590 :     else oper64
1591 :     in gen(I.FBINARY{binOp=oper, src=src, dst=ST}) end
1592 :     fun ibinary(oper16, oper32) =
1593 :     let val oper = case fty of
1594 :     16 => oper16
1595 :     | 32 => oper32
1596 :     | _ => error "gencode: IBINARY"
1597 :     in gen(I.FIBINARY{binOp=oper, src=src}) end
1598 :     in case binop of
1599 :     FADD => binary(I.FADDS, I.FADDL)
1600 :     | FSUB => binary(I.FDIVS, I.FSUBL)
1601 :     | FMUL => binary(I.FMULS, I.FMULL)
1602 :     | FDIV => binary(I.FDIVS, I.FDIVL)
1603 :     | FIADD => ibinary(I.FIADDS, I.FIADDL)
1604 :     | FISUB => ibinary(I.FIDIVS, I.FISUBL)
1605 :     | FIMUL => ibinary(I.FIMULS, I.FIMULL)
1606 :     | FIDIV => ibinary(I.FIDIVS, I.FIDIVL)
1607 :     end
1608 :     | gencode(BINARY(_, fty, binop, t1, t2, an)) =
1609 :     let fun doit(t1, t2, oper, operP, operRP) =
1610 :     let (* oper[P] => ST(1) := ST oper ST(1); [pop]
1611 :     * operR[P] => ST(1) := ST(1) oper ST; [pop]
1612 :     *)
1613 :     val n1 = label t1
1614 :     val n2 = label t2
1615 :     in if n1 < n2 andalso n1 <= 7 then
1616 :     (gencode t2;
1617 :     gencode t1;
1618 :     mark(I.FBINARY{binOp=operP, src=ST, dst=ST1}, an))
1619 :     else if n2 <= n1 andalso n2 <= 7 then
1620 :     (gencode t1;
1621 :     gencode t2;
1622 :     mark(I.FBINARY{binOp=operRP, src=ST, dst=ST1}, an))
1623 :     else
1624 :     let (* both labels > 7 *)
1625 :     val fs = I.FDirect(newFreg())
1626 :     in gencode t2;
1627 :     emit(fstp(fty, fs));
1628 :     gencode t1;
1629 :     mark(I.FBINARY{binOp=oper, src=fs, dst=ST}, an)
1630 :     end
1631 :     end
1632 :     in case binop of
1633 :     FADD => doit(t1,t2,I.FADDL,I.FADDP,I.FADDP)
1634 :     | FMUL => doit(t1,t2,I.FMULL,I.FMULP,I.FMULP)
1635 :     | FSUB => doit(t1,t2,I.FSUBL,I.FSUBP,I.FSUBRP)
1636 :     | FDIV => doit(t1,t2,I.FDIVL,I.FDIVP,I.FDIVRP)
1637 : george 545 | _ => error "gencode.BINARY"
1638 :     end
1639 : leunga 565 | gencode(UNARY(_, _, unaryOp, su, an)) =
1640 :     (gencode(su); mark(I.FUNARY(unaryOp),an))
1641 :    
1642 :     (* Generate code for a leaf.
1643 :     * Returns the type and an effective address
1644 :     *)
1645 :     and leafEA(T.FREG(fty, f)) = (REAL, fty, I.FDirect f)
1646 :     | leafEA(T.FLOAD(fty, ea, mem)) = (REAL, fty, address(ea, mem))
1647 : leunga 593 | leafEA(T.CVTI2F(_, 32, t)) = int2real(32, t)
1648 :     | leafEA(T.CVTI2F(_, 16, t)) = int2real(16, t)
1649 :     | leafEA(T.CVTI2F(_, 8, t)) = int2real(8, t)
1650 : leunga 565 | leafEA _ = error "leafEA"
1651 :    
1652 : leunga 731 and int2real(ty, e) =
1653 :     let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e)
1654 :     in cleanupCode := !cleanupCode @ cleanup;
1655 :     (INTEGER, ty, ea)
1656 : george 545 end
1657 : leunga 731
1658 :     in gencode(su fexp);
1659 :     emits(!cleanupCode)
1660 : george 545 end (*reduceFexp*)
1661 : leunga 731
1662 :     (*========================================================
1663 :     * This section generates 3-address style floating
1664 :     * point code.
1665 :     *========================================================*)
1666 :    
1667 :     and isize 16 = I.I16
1668 :     | isize 32 = I.I32
1669 :     | isize _ = error "isize"
1670 :    
1671 :     and fstore''(fty, ea, d, mem, an) =
1672 :     (floatingPointUsed := true;
1673 :     mark(I.FMOVE{fsize=fsize fty, dst=address(ea,mem),
1674 :     src=foperand(fty, d)},
1675 :     an)
1676 :     )
1677 :    
1678 :     and fload''(fty, ea, mem, d, an) =
1679 :     (floatingPointUsed := true;
1680 :     mark(I.FMOVE{fsize=fsize fty, src=address(ea,mem),
1681 :     dst=RealReg d}, an)
1682 :     )
1683 :    
1684 :     and fiload''(ity, ea, d, an) =
1685 :     (floatingPointUsed := true;
1686 :     mark(I.FILOAD{isize=isize ity, ea=ea, dst=RealReg d}, an)
1687 :     )
1688 :    
1689 :     and fexpr''(e as T.FREG(_,f)) =
1690 :     if isFMemReg f then transFexpr e else f
1691 :     | fexpr'' e = transFexpr e
1692 :    
1693 :     and transFexpr e =
1694 :     let val fd = newFreg() in doFexpr''(64, e, fd, []); fd end
1695 :    
1696 :     (*
1697 :     * Process a floating point operand. Put operand in register
1698 :     * when possible. The operand should match the given fty.
1699 :     *)
1700 :     and foperand(fty, e as T.FREG(fty', f)) =
1701 :     if fty = fty' then RealReg f else I.FPR(fexpr'' e)
1702 :     | foperand(fty, T.CVTF2F(_, _, e)) =
1703 :     foperand(fty, e) (* nop on the x86 *)
1704 :     | foperand(fty, e as T.FLOAD(fty', ea, mem)) =
1705 :     (* fold operand when the precison matches *)
1706 :     if fty = fty' then address(ea, mem) else I.FPR(fexpr'' e)
1707 :     | foperand(fty, e) = I.FPR(fexpr'' e)
1708 :    
1709 :     (*
1710 :     * Process a floating point operand.
1711 :     * Try to fold in a memory operand or conversion from an integer.
1712 :     *)
1713 :     and fioperand(T.FREG(fty,f)) = (REAL, fty, RealReg f, [])
1714 :     | fioperand(T.FLOAD(fty, ea, mem)) =
1715 :     (REAL, fty, address(ea, mem), [])
1716 :     | fioperand(T.CVTF2F(_, _, e)) = fioperand(e) (* nop on the x86 *)
1717 :     | fioperand(T.CVTI2F(_, ty, e)) = convertIntToFloat(ty, e)
1718 :     | fioperand(T.FMARK(e,an)) = fioperand(e) (* XXX *)
1719 :     | fioperand(e) = (REAL, 64, I.FPR(fexpr'' e), [])
1720 :    
1721 :     (* Generate binary operator. Since the real binary operators
1722 :     * does not take memory as destination, we also ensure this
1723 :     * does not happen.
1724 :     *)
1725 :     and fbinop(targetFty,
1726 :     binOp, binOpR, ibinOp, ibinOpR, lsrc, rsrc, fd, an) =
1727 :     (* Put the mem operand in rsrc *)
1728 : leunga 1142 let
1729 : leunga 731 fun isMemOpnd(T.FREG(_, f)) = isFMemReg f
1730 :     | isMemOpnd(T.FLOAD _) = true
1731 :     | isMemOpnd(T.CVTI2F(_, (16 | 32), _)) = true
1732 :     | isMemOpnd(T.CVTF2F(_, _, t)) = isMemOpnd t
1733 :     | isMemOpnd(T.FMARK(t, _)) = isMemOpnd t
1734 :     | isMemOpnd _ = false
1735 :     val (binOp, ibinOp, lsrc, rsrc) =
1736 :     if isMemOpnd lsrc then (binOpR, ibinOpR, rsrc, lsrc)
1737 :     else (binOp, ibinOp, lsrc, rsrc)
1738 :     val lsrc = foperand(targetFty, lsrc)
1739 :     val (kind, fty, rsrc, code) = fioperand(rsrc)
1740 :     fun dstMustBeFreg f =
1741 :     if targetFty <> 64 then
1742 :     let val tmpR = newFreg()
1743 :     val tmp = I.FPR tmpR
1744 :     in mark(f tmp, an);
1745 :     emit(I.FMOVE{fsize=fsize targetFty,
1746 :     src=tmp, dst=RealReg fd})
1747 :     end
1748 :     else mark(f(RealReg fd), an)
1749 :     in case kind of
1750 :     REAL =>
1751 :     dstMustBeFreg(fn dst =>
1752 :     I.FBINOP{fsize=fsize fty, binOp=binOp,
1753 :     lsrc=lsrc, rsrc=rsrc, dst=dst})
1754 :     | INTEGER =>
1755 :     (dstMustBeFreg(fn dst =>
1756 :     I.FIBINOP{isize=isize fty, binOp=ibinOp,
1757 :     lsrc=lsrc, rsrc=rsrc, dst=dst});
1758 :     emits code
1759 :     )
1760 :     end
1761 : george 545
1762 : leunga 731 and funop(fty, unOp, src, fd, an) =
1763 :     let val src = foperand(fty, src)
1764 :     in mark(I.FUNOP{fsize=fsize fty,
1765 :     unOp=unOp, src=src, dst=RealReg fd},an)
1766 :     end
1767 :    
1768 :     and doFexpr''(fty, e, fd, an) =
1769 : leunga 1142 (floatingPointUsed := true;
1770 : leunga 731 case e of
1771 : george 889 T.FREG(_,fs) => if CB.sameColor(fs,fd) then ()
1772 : leunga 731 else fcopy''(fty, [fd], [fs], an)
1773 :     (* Stupid x86 does everything as 80-bits internally. *)
1774 :    
1775 :     (* Binary operators *)
1776 :     | T.FADD(_, a, b) => fbinop(fty,
1777 :     I.FADDL, I.FADDL, I.FIADDL, I.FIADDL,
1778 :     a, b, fd, an)
1779 :     | T.FSUB(_, a, b) => fbinop(fty,
1780 :     I.FSUBL, I.FSUBRL, I.FISUBL, I.FISUBRL,
1781 :     a, b, fd, an)
1782 :     | T.FMUL(_, a, b) => fbinop(fty,
1783 :     I.FMULL, I.FMULL, I.FIMULL, I.FIMULL,
1784 :     a, b, fd, an)
1785 :     | T.FDIV(_, a, b) => fbinop(fty,
1786 :     I.FDIVL, I.FDIVRL, I.FIDIVL, I.FIDIVRL,
1787 :     a, b, fd, an)
1788 :    
1789 :     (* Unary operators *)
1790 :     | T.FNEG(_, a) => funop(fty, I.FCHS, a, fd, an)
1791 :     | T.FABS(_, a) => funop(fty, I.FABS, a, fd, an)
1792 :     | T.FSQRT(_, a) => funop(fty, I.FSQRT, a, fd, an)
1793 :    
1794 :     (* Load *)
1795 :     | T.FLOAD(fty,ea,mem) => fload''(fty, ea, mem, fd, an)
1796 :    
1797 :     (* Type conversions *)
1798 :     | T.CVTF2F(_, _, e) => doFexpr''(fty, e, fd, an)
1799 :     | T.CVTI2F(_, ty, e) =>
1800 :     let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e)
1801 :     in fiload''(ty, ea, fd, an);
1802 :     emits cleanup
1803 :     end
1804 :    
1805 :     | T.FMARK(e,A.MARKREG f) => (f fd; doFexpr''(fty, e, fd, an))
1806 :     | T.FMARK(e, a) => doFexpr''(fty, e, fd, a::an)
1807 :     | T.FPRED(e, c) => doFexpr''(fty, e, fd, A.CTRLUSE c::an)
1808 :     | T.FEXT fexp =>
1809 :     ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an}
1810 :     | _ => error("doFexpr''")
1811 : leunga 1142 )
1812 : leunga 731
1813 :     (*========================================================
1814 :     * Tie the two styles of fp code generation together
1815 :     *========================================================*)
1816 :     and fstore(fty, ea, d, mem, an) =
1817 :     if enableFastFPMode andalso !fast_floating_point
1818 :     then fstore''(fty, ea, d, mem, an)
1819 :     else fstore'(fty, ea, d, mem, an)
1820 :     and fload(fty, ea, d, mem, an) =
1821 :     if enableFastFPMode andalso !fast_floating_point
1822 :     then fload''(fty, ea, d, mem, an)
1823 :     else fload'(fty, ea, d, mem, an)
1824 :     and fexpr e =
1825 :     if enableFastFPMode andalso !fast_floating_point
1826 :     then fexpr'' e else fexpr' e
1827 :     and doFexpr(fty, e, fd, an) =
1828 :     if enableFastFPMode andalso !fast_floating_point
1829 :     then doFexpr''(fty, e, fd, an)
1830 :     else doFexpr'(fty, e, fd, an)
1831 :    
1832 : leunga 797 (*================================================================
1833 :     * Optimizations for x := x op y
1834 :     * Special optimizations:
1835 :     * Generate a binary operator, result must in memory.
1836 :     * The source must not be in memory
1837 :     *================================================================*)
1838 :     and binaryMem(binOp, src, dst, mem, an) =
1839 :     mark(I.BINARY{binOp=binOp, src=immedOrReg(operand src),
1840 :     dst=address(dst,mem)}, an)
1841 :     and unaryMem(unOp, opnd, mem, an) =
1842 :     mark(I.UNARY{unOp=unOp, opnd=address(opnd,mem)}, an)
1843 :    
1844 :     and isOne(T.LI n) = n = one
1845 :     | isOne _ = false
1846 :    
1847 :     (*
1848 :     * Perform optimizations based on recognizing
1849 :     * x := x op y or
1850 :     * x := y op x
1851 :     * first.
1852 :     *)
1853 :     and store(ty, ea, d, mem, an,
1854 :     {INC,DEC,ADD,SUB,NOT,NEG,SHL,SHR,SAR,OR,AND,XOR},
1855 :     doStore
1856 :     ) =
1857 :     let fun default() = doStore(ea, d, mem, an)
1858 :     fun binary1(t, t', unary, binary, ea', x) =
1859 :     if t = ty andalso t' = ty then
1860 :     if MLTreeUtils.eqRexp(ea, ea') then
1861 :     if isOne x then unaryMem(unary, ea, mem, an)
1862 :     else binaryMem(binary, x, ea, mem, an)
1863 :     else default()
1864 :     else default()
1865 :     fun unary(t,unOp, ea') =
1866 :     if t = ty andalso MLTreeUtils.eqRexp(ea, ea') then
1867 :     unaryMem(unOp, ea, mem, an)
1868 :     else default()
1869 :     fun binary(t,t',binOp,ea',x) =
1870 :     if t = ty andalso t' = ty andalso
1871 :     MLTreeUtils.eqRexp(ea, ea') then
1872 :     binaryMem(binOp, x, ea, mem, an)
1873 :     else default()
1874 :    
1875 :     fun binaryCom1(t,unOp,binOp,x,y) =
1876 :     if t = ty then
1877 :     let fun again() =
1878 :     case y of
1879 :     T.LOAD(ty',ea',_) =>
1880 :     if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then
1881 :     if isOne x then unaryMem(unOp, ea, mem, an)
1882 :     else binaryMem(binOp,x,ea,mem,an)
1883 :     else default()
1884 :     | _ => default()
1885 :     in case x of
1886 :     T.LOAD(ty',ea',_) =>
1887 :     if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then
1888 :     if isOne y then unaryMem(unOp, ea, mem, an)
1889 :     else binaryMem(binOp,y,ea,mem,an)
1890 :     else again()
1891 :     | _ => again()
1892 :     end
1893 :     else default()
1894 :    
1895 :     fun binaryCom(t,binOp,x,y) =
1896 :     if t = ty then
1897 :     let fun again() =
1898 :     case y of
1899 :     T.LOAD(ty',ea',_) =>
1900 :     if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then
1901 :     binaryMem(binOp,x,ea,mem,an)
1902 :     else default()
1903 :     | _ => default()
1904 :     in case x of
1905 :     T.LOAD(ty',ea',_) =>
1906 :     if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then
1907 :     binaryMem(binOp,y,ea,mem,an)
1908 :     else again()
1909 :     | _ => again()
1910 :     end
1911 :     else default()
1912 :    
1913 :     in case d of
1914 :     T.ADD(t,x,y) => binaryCom1(t,INC,ADD,x,y)
1915 :     | T.SUB(t,T.LOAD(t',ea',_),x) => binary1(t,t',DEC,SUB,ea',x)
1916 :     | T.ORB(t,x,y) => binaryCom(t,OR,x,y)
1917 :     | T.ANDB(t,x,y) => binaryCom(t,AND,x,y)
1918 :     | T.XORB(t,x,y) => binaryCom(t,XOR,x,y)
1919 :     | T.SLL(t,T.LOAD(t',ea',_),x) => binary(t,t',SHL,ea',x)
1920 :     | T.SRL(t,T.LOAD(t',ea',_),x) => binary(t,t',SHR,ea',x)
1921 :     | T.SRA(t,T.LOAD(t',ea',_),x) => binary(t,t',SAR,ea',x)
1922 :     | T.NEG(t,T.LOAD(t',ea',_)) => unary(t,NEG,ea')
1923 :     | T.NOTB(t,T.LOAD(t',ea',_)) => unary(t,NOT,ea')
1924 :     | _ => default()
1925 :     end (* store *)
1926 :    
1927 : george 545 (* generate code for a statement *)
1928 :     and stmt(T.MV(_, rd, e), an) = doExpr(e, rd, an)
1929 :     | stmt(T.FMV(fty, fd, e), an) = doFexpr(fty, e, fd, an)
1930 :     | stmt(T.CCMV(ccd, e), an) = doCCexpr(e, ccd, an)
1931 :     | stmt(T.COPY(_, dst, src), an) = copy(dst, src, an)
1932 :     | stmt(T.FCOPY(fty, dst, src), an) = fcopy(fty, dst, src, an)
1933 : leunga 744 | stmt(T.JMP(e, labs), an) = jmp(e, labs, an)
1934 : blume 839 | stmt(T.CALL{funct, targets, defs, uses, region, pops, ...}, an) =
1935 :     call(funct,targets,defs,uses,region,[],an, pops)
1936 :     | stmt(T.FLOW_TO(T.CALL{funct, targets, defs, uses, region, pops, ...},
1937 : leunga 796 cutTo), an) =
1938 : blume 839 call(funct,targets,defs,uses,region,cutTo,an, pops)
1939 : george 545 | stmt(T.RET _, an) = mark(I.RET NONE, an)
1940 : leunga 797 | stmt(T.STORE(8, ea, d, mem), an) =
1941 :     store(8, ea, d, mem, an, opcodes8, store8)
1942 :     | stmt(T.STORE(16, ea, d, mem), an) =
1943 :     store(16, ea, d, mem, an, opcodes16, store16)
1944 :     | stmt(T.STORE(32, ea, d, mem), an) =
1945 :     store(32, ea, d, mem, an, opcodes32, store32)
1946 :    
1947 : george 545 | stmt(T.FSTORE(fty, ea, d, mem), an) = fstore(fty, ea, d, mem, an)
1948 : leunga 744 | stmt(T.BCC(cc, lab), an) = branch(cc, lab, an)
1949 : george 545 | stmt(T.DEFINE l, _) = defineLabel l
1950 :     | stmt(T.ANNOTATION(s, a), an) = stmt(s, a::an)
1951 : george 555 | stmt(T.EXT s, an) =
1952 :     ExtensionComp.compileSext (reducer()) {stm=s, an=an}
1953 : george 545 | stmt(s, _) = doStmts(Gen.compileStm s)
1954 :    
1955 :     and doStmt s = stmt(s, [])
1956 :     and doStmts ss = app doStmt ss
1957 :    
1958 :     and beginCluster' _ =
1959 :     ((* Must be cleared by the client.
1960 :     * if rewriteMemReg then memRegsUsed := 0w0 else ();
1961 :     *)
1962 : leunga 731 floatingPointUsed := false;
1963 :     trapLabel := NONE;
1964 :     beginCluster 0
1965 :     )
1966 : george 545 and endCluster' a =
1967 : monnier 247 (case !trapLabel
1968 : monnier 411 of NONE => ()
1969 : george 545 | SOME(_, lab) => (defineLabel lab; emit(I.INTO))
1970 : monnier 411 (*esac*);
1971 : leunga 731 (* If floating point has been used allocate an extra
1972 :     * register just in case we didn't use any explicit register
1973 :     *)
1974 :     if !floatingPointUsed then (newFreg(); ())
1975 :     else ();
1976 : george 545 endCluster(a)
1977 :     )
1978 :    
1979 :     and reducer() =
1980 : george 984 TS.REDUCER{reduceRexp = expr,
1981 : george 545 reduceFexp = fexpr,
1982 :     reduceCCexp = ccExpr,
1983 :     reduceStm = stmt,
1984 :     operand = operand,
1985 :     reduceOperand = reduceOpnd,
1986 :     addressOf = fn e => address(e, I.Region.memory), (*XXX*)
1987 : george 1009 emit = mark',
1988 : george 545 instrStream = instrStream,
1989 :     mltreeStream = self()
1990 :     }
1991 :    
1992 :     and self() =
1993 : george 984 TS.S.STREAM
1994 : leunga 815 { beginCluster = beginCluster',
1995 :     endCluster = endCluster',
1996 :     emit = doStmt,
1997 :     pseudoOp = pseudoOp,
1998 :     defineLabel = defineLabel,
1999 :     entryLabel = entryLabel,
2000 :     comment = comment,
2001 :     annotation = annotation,
2002 :     getAnnotations = getAnnotations,
2003 :     exitBlock = fn mlrisc => exitBlock(cellset mlrisc)
2004 : george 545 }
2005 :    
2006 :     in self()
2007 : monnier 247 end
2008 :    
2009 : george 545 end (* functor *)
2010 :    
2011 :     end (* local *)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0