Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86.sml
ViewVC logotype

Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 779 - (view) (download)

1 : leunga 583 (*
2 : monnier 247 *
3 :     * COPYRIGHT (c) 1998 Bell Laboratories.
4 : george 545 *
5 :     * This is a revised version that takes into account of
6 :     * the extended x86 instruction set, and has better handling of
7 :     * non-standard types. I've factored out the integer/floating point
8 :     * comparison code, added optimizations for conditional moves.
9 :     * The latter generates SETcc and CMOVcc (Pentium Pro only) instructions.
10 :     * To avoid problems, I have tried to incorporate as much of
11 :     * Lal's original magic incantations as possible.
12 : monnier 247 *
13 : george 545 * Some changes:
14 :     *
15 :     * 1. REMU/REMS/REMT are now supported
16 :     * 2. COND is supported by generating SETcc and/or CMOVcc; this
17 :     * may require at least a Pentium II to work.
18 :     * 3. Division by a constant has been optimized. Division by
19 :     * a power of 2 generates SHRL or SARL.
20 :     * 4. Better addressing mode selection has been implemented. This should
21 :     * improve array indexing on SML/NJ.
22 :     * 5. Generate testl/testb instead of andl whenever appropriate. This
23 :     * is recommended by the Intel Optimization Guide and seems to improve
24 :     * boxity tests on SML/NJ.
25 : leunga 731 *
26 :     * More changes for floating point:
27 :     * A new mode is implemented which generates pseudo 3-address instructions
28 :     * for floating point. These instructions are register allocated the
29 :     * normal way, with the virtual registers mapped onto a set of pseudo
30 :     * %fp registers. These registers are then mapped onto the %st registers
31 :     * with a new postprocessing phase.
32 :     *
33 : george 545 * -- Allen
34 : monnier 247 *)
35 : george 545 local
36 :     val rewriteMemReg = true (* should we rewrite memRegs *)
37 : leunga 731 val enableFastFPMode = true (* set this to false to disable the mode *)
38 : george 545 in
39 :    
40 : monnier 247 functor X86
41 :     (structure X86Instr : X86INSTR
42 : george 555 structure ExtensionComp : MLTREE_EXTENSION_COMP
43 : leunga 775 where I = X86Instr
44 : george 545 datatype arch = Pentium | PentiumPro | PentiumII | PentiumIII
45 :     val arch : arch ref
46 : leunga 593 val cvti2f :
47 :     (* source operand, guaranteed to be non-memory! *)
48 : leunga 775 {ty: X86Instr.T.ty, src: X86Instr.operand} ->
49 : leunga 593 {instrs : X86Instr.instruction list,(* the instructions *)
50 :     tempMem: X86Instr.operand, (* temporary for CVTI2F *)
51 :     cleanup: X86Instr.instruction list (* cleanup code *)
52 :     }
53 : leunga 731 (* When the following flag is set, we allocate floating point registers
54 :     * directly on the floating point stack
55 :     *)
56 :     val fast_floating_point : bool ref
57 : george 545 ) : sig include MLTREECOMP
58 :     val rewriteMemReg : bool
59 :     end =
60 : monnier 247 struct
61 : leunga 775 structure I = X86Instr
62 :     structure T = I.T
63 : monnier 429 structure S = T.Stream
64 : george 545 structure C = I.C
65 :     structure Shuffle = Shuffle(I)
66 : monnier 247 structure W32 = Word32
67 : george 545 structure LE = I.LabelExp
68 :     structure A = MLRiscAnnotations
69 : monnier 247
70 : leunga 744 type instrStream = (I.instruction,C.cellset) T.stream
71 :     type mltreeStream = (T.stm,T.mlrisc list) T.stream
72 : leunga 565
73 :     datatype kind = REAL | INTEGER
74 : george 545
75 :     structure Gen = MLTreeGen
76 :     (structure T = T
77 :     val intTy = 32
78 :     val naturalWidths = [32]
79 :     datatype rep = SE | ZE | NEITHER
80 :     val rep = NEITHER
81 :     )
82 :    
83 : monnier 411 fun error msg = MLRiscErrorMsg.error("X86",msg)
84 : monnier 247
85 : george 545 (* Should we perform automatic MemReg translation?
86 :     * If this is on, we can avoid doing RewritePseudo phase entirely.
87 :     *)
88 :     val rewriteMemReg = rewriteMemReg
89 : leunga 731
90 :     (* The following hardcoded *)
91 : leunga 744 fun isMemReg r = rewriteMemReg andalso
92 :     let val r = C.registerNum r
93 :     in r >= 8 andalso r < 32
94 :     end
95 : leunga 731 fun isFMemReg r = if enableFastFPMode andalso !fast_floating_point
96 : leunga 744 then let val r = C.registerNum r
97 :     in r >= 8 andalso r < 32 end
98 : leunga 731 else true
99 : leunga 744 val isAnyFMemReg = List.exists (fn r =>
100 :     let val r = C.registerNum r
101 :     in r >= 8 andalso r < 32 end
102 :     )
103 : monnier 247
104 : george 555 val ST0 = C.ST 0
105 :     val ST7 = C.ST 7
106 :    
107 : george 545 (*
108 :     * The code generator
109 :     *)
110 : monnier 411 fun selectInstructions
111 : george 545 (instrStream as
112 :     S.STREAM{emit,defineLabel,entryLabel,pseudoOp,annotation,
113 : leunga 744 beginCluster,endCluster,exitBlock,comment,...}) =
114 : george 545 let exception EA
115 : monnier 411
116 : george 545 (* label where a trap is generated -- one per cluster *)
117 :     val trapLabel = ref (NONE: (I.instruction * Label.label) option)
118 : monnier 247
119 : leunga 731 (* flag floating point generation *)
120 :     val floatingPointUsed = ref false
121 :    
122 : george 545 (* effective address of an integer register *)
123 : leunga 731 fun IntReg r = if isMemReg r then I.MemReg r else I.Direct r
124 :     and RealReg r = if isFMemReg r then I.FDirect r else I.FPR r
125 : monnier 411
126 : george 545 (* Add an overflow trap *)
127 :     fun trap() =
128 :     let val jmp =
129 :     case !trapLabel of
130 :     NONE => let val label = Label.newLabel "trap"
131 :     val jmp = I.JCC{cond=I.O,
132 : leunga 775 opnd=I.ImmedLabel(T.LABEL label)}
133 : george 545 in trapLabel := SOME(jmp, label); jmp end
134 :     | SOME(jmp, _) => jmp
135 :     in emit jmp end
136 : monnier 411
137 : george 545 val newReg = C.newReg
138 :     val newFreg = C.newFreg
139 : monnier 247
140 : leunga 731 fun fsize 32 = I.FP32
141 :     | fsize 64 = I.FP64
142 :     | fsize 80 = I.FP80
143 :     | fsize _ = error "fsize"
144 :    
145 : george 545 (* mark an expression with a list of annotations *)
146 :     fun mark'(i,[]) = i
147 :     | mark'(i,a::an) = mark'(I.ANNOTATION{i=i,a=a},an)
148 : monnier 247
149 : george 545 (* annotate an expression and emit it *)
150 :     fun mark(i,an) = emit(mark'(i,an))
151 : monnier 247
152 : leunga 731 val emits = app emit
153 :    
154 : george 545 (* emit parallel copies for integers
155 :     * Translates parallel copies that involve memregs into
156 :     * individual copies.
157 :     *)
158 :     fun copy([], [], an) = ()
159 :     | copy(dst, src, an) =
160 :     let fun mvInstr{dst as I.MemReg rd, src as I.MemReg rs} =
161 : leunga 744 if C.sameColor(rd,rs) then [] else
162 : george 545 let val tmpR = I.Direct(newReg())
163 :     in [I.MOVE{mvOp=I.MOVL, src=src, dst=tmpR},
164 :     I.MOVE{mvOp=I.MOVL, src=tmpR, dst=dst}]
165 :     end
166 :     | mvInstr{dst=I.Direct rd, src=I.Direct rs} =
167 : leunga 744 if C.sameColor(rd,rs) then []
168 : george 545 else [I.COPY{dst=[rd], src=[rs], tmp=NONE}]
169 :     | mvInstr{dst, src} = [I.MOVE{mvOp=I.MOVL, src=src, dst=dst}]
170 :     in
171 : leunga 731 emits (Shuffle.shuffle{mvInstr=mvInstr, ea=IntReg}
172 : leunga 744 {tmp=SOME(I.Direct(newReg())),
173 : george 545 dst=dst, src=src})
174 :     end
175 :    
176 :     (* conversions *)
177 :     val itow = Word.fromInt
178 :     val wtoi = Word.toInt
179 : george 761 fun toInt32 i = T.I.toInt32(32, i)
180 : george 545 val w32toi32 = Word32.toLargeIntX
181 :     val i32tow32 = Word32.fromLargeInt
182 : monnier 247
183 : george 545 (* One day, this is going to bite us when precision(LargeInt)>32 *)
184 :     fun wToInt32 w = Int32.fromLarge(Word32.toLargeIntX w)
185 : monnier 247
186 : george 545 (* some useful registers *)
187 :     val eax = I.Direct(C.eax)
188 :     val ecx = I.Direct(C.ecx)
189 :     val edx = I.Direct(C.edx)
190 : monnier 247
191 : leunga 775 fun immedLabel lab = I.ImmedLabel(T.LABEL lab)
192 : george 545
193 :     (* Is the expression zero? *)
194 : george 761 fun isZero(T.LI z) = T.I.isZero z
195 : george 545 | isZero(T.MARK(e,a)) = isZero e
196 :     | isZero _ = false
197 :     (* Does the expression set the zero bit?
198 :     * WARNING: we assume these things are not optimized out!
199 :     *)
200 :     fun setZeroBit(T.ANDB _) = true
201 :     | setZeroBit(T.ORB _) = true
202 :     | setZeroBit(T.XORB _) = true
203 :     | setZeroBit(T.SRA _) = true
204 :     | setZeroBit(T.SRL _) = true
205 :     | setZeroBit(T.SLL _) = true
206 : leunga 695 | setZeroBit(T.SUB _) = true
207 :     | setZeroBit(T.ADDT _) = true
208 :     | setZeroBit(T.SUBT _) = true
209 : george 545 | setZeroBit(T.MARK(e, _)) = setZeroBit e
210 :     | setZeroBit _ = false
211 : monnier 247
212 : leunga 695 fun setZeroBit2(T.ANDB _) = true
213 :     | setZeroBit2(T.ORB _) = true
214 :     | setZeroBit2(T.XORB _) = true
215 :     | setZeroBit2(T.SRA _) = true
216 :     | setZeroBit2(T.SRL _) = true
217 :     | setZeroBit2(T.SLL _) = true
218 :     | setZeroBit2(T.ADD(32, _, _)) = true (* can't use leal! *)
219 :     | setZeroBit2(T.SUB _) = true
220 :     | setZeroBit2(T.ADDT _) = true
221 :     | setZeroBit2(T.SUBT _) = true
222 :     | setZeroBit2(T.MARK(e, _)) = setZeroBit2 e
223 :     | setZeroBit2 _ = false
224 :    
225 : leunga 731 (* emit parallel copies for floating point
226 :     * Normal version.
227 :     *)
228 :     fun fcopy'(fty, [], [], _) = ()
229 :     | fcopy'(fty, dst as [_], src as [_], an) =
230 : george 545 mark(I.FCOPY{dst=dst,src=src,tmp=NONE}, an)
231 : leunga 731 | fcopy'(fty, dst, src, an) =
232 : george 545 mark(I.FCOPY{dst=dst,src=src,tmp=SOME(I.FDirect(newFreg()))}, an)
233 : monnier 247
234 : leunga 731 (* emit parallel copies for floating point.
235 :     * Fast version.
236 :     * Translates parallel copies that involve memregs into
237 :     * individual copies.
238 :     *)
239 :    
240 :     fun fcopy''(fty, [], [], _) = ()
241 :     | fcopy''(fty, dst, src, an) =
242 :     if true orelse isAnyFMemReg dst orelse isAnyFMemReg src then
243 :     let val fsize = fsize fty
244 :     fun mvInstr{dst, src} = [I.FMOVE{fsize=fsize, src=src, dst=dst}]
245 :     in
246 :     emits (Shuffle.shuffle{mvInstr=mvInstr, ea=RealReg}
247 : leunga 744 {tmp=case dst of
248 : leunga 731 [_] => NONE
249 :     | _ => SOME(I.FPR(newReg())),
250 :     dst=dst, src=src})
251 :     end
252 :     else
253 :     mark(I.FCOPY{dst=dst,src=src,tmp=
254 :     case dst of
255 :     [_] => NONE
256 :     | _ => SOME(I.FPR(newFreg()))}, an)
257 :    
258 :     fun fcopy x = if enableFastFPMode andalso !fast_floating_point
259 :     then fcopy'' x else fcopy' x
260 :    
261 : george 545 (* Translates MLTREE condition code to x86 condition code *)
262 :     fun cond T.LT = I.LT | cond T.LTU = I.B
263 :     | cond T.LE = I.LE | cond T.LEU = I.BE
264 :     | cond T.EQ = I.EQ | cond T.NE = I.NE
265 :     | cond T.GE = I.GE | cond T.GEU = I.AE
266 :     | cond T.GT = I.GT | cond T.GTU = I.A
267 : monnier 247
268 : george 545 (* Move and annotate *)
269 :     fun move'(src as I.Direct s, dst as I.Direct d, an) =
270 : leunga 744 if C.sameColor(s,d) then ()
271 : george 545 else mark(I.COPY{dst=[d], src=[s], tmp=NONE}, an)
272 :     | move'(src, dst, an) = mark(I.MOVE{mvOp=I.MOVL, src=src, dst=dst}, an)
273 : monnier 247
274 : george 545 (* Move only! *)
275 :     fun move(src, dst) = move'(src, dst, [])
276 : monnier 247
277 : george 545 fun zero dst = emit(I.BINARY{binOp=I.XORL, src=dst, dst=dst})
278 : monnier 247
279 : george 545 val readonly = I.Region.readonly
280 : monnier 247
281 : george 545 (*
282 : george 761 * Compute an effective address.
283 : george 545 *)
284 : george 761 fun address(ea, mem) = let
285 : george 545 (* Keep building a bigger and bigger effective address expressions
286 :     * The input is a list of trees
287 :     * b -- base
288 :     * i -- index
289 :     * s -- scale
290 :     * d -- immed displacement
291 :     *)
292 :     fun doEA([], b, i, s, d) = makeAddressingMode(b, i, s, d)
293 :     | doEA(t::trees, b, i, s, d) =
294 :     (case t of
295 : george 761 T.LI n => doEAImmed(trees, toInt32 n, b, i, s, d)
296 : leunga 775 | T.CONST _ => doEALabel(trees, t, b, i, s, d)
297 :     | T.LABEL _ => doEALabel(trees, t, b, i, s, d)
298 :     | T.LABEXP le => doEALabel(trees, le, b, i, s, d)
299 : george 545 | T.ADD(32, t1, t2 as T.REG(_,r)) =>
300 :     if isMemReg r then doEA(t2::t1::trees, b, i, s, d)
301 :     else doEA(t1::t2::trees, b, i, s, d)
302 :     | T.ADD(32, t1, t2) => doEA(t1::t2::trees, b, i, s, d)
303 :     | T.SUB(32, t1, T.LI n) =>
304 : george 761 doEA(t1::T.LI(T.I.NEG(32,n))::trees, b, i, s, d)
305 :     | T.SLL(32, t1, T.LI n) => let
306 :     val n = T.I.toInt(32, n)
307 :     in
308 :     case n
309 :     of 0 => displace(trees, t1, b, i, s, d)
310 :     | 1 => indexed(trees, t1, t, 1, b, i, s, d)
311 :     | 2 => indexed(trees, t1, t, 2, b, i, s, d)
312 :     | 3 => indexed(trees, t1, t, 3, b, i, s, d)
313 :     | _ => displace(trees, t, b, i, s, d)
314 :     end
315 : george 545 | t => displace(trees, t, b, i, s, d)
316 :     )
317 : monnier 247
318 : george 545 (* Add an immed constant *)
319 :     and doEAImmed(trees, 0, b, i, s, d) = doEA(trees, b, i, s, d)
320 :     | doEAImmed(trees, n, b, i, s, I.Immed m) =
321 : george 761 doEA(trees, b, i, s, I.Immed(n+m))
322 : george 545 | doEAImmed(trees, n, b, i, s, I.ImmedLabel le) =
323 : leunga 775 doEA(trees, b, i, s,
324 :     I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, n)))))
325 : george 545 | doEAImmed(trees, n, b, i, s, _) = error "doEAImmed"
326 : monnier 247
327 : george 545 (* Add a label expression *)
328 :     and doEALabel(trees, le, b, i, s, I.Immed 0) =
329 :     doEA(trees, b, i, s, I.ImmedLabel le)
330 :     | doEALabel(trees, le, b, i, s, I.Immed m) =
331 :     doEA(trees, b, i, s,
332 : leunga 775 I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, m))))
333 : george 545 handle Overflow => error "doEALabel: constant too large")
334 :     | doEALabel(trees, le, b, i, s, I.ImmedLabel le') =
335 : leunga 775 doEA(trees, b, i, s, I.ImmedLabel(T.ADD(32,le,le')))
336 : george 545 | doEALabel(trees, le, b, i, s, _) = error "doEALabel"
337 : monnier 247
338 : george 545 and makeAddressingMode(NONE, NONE, _, disp) = disp
339 :     | makeAddressingMode(SOME base, NONE, _, disp) =
340 :     I.Displace{base=base, disp=disp, mem=mem}
341 :     | makeAddressingMode(base, SOME index, scale, disp) =
342 : george 761 I.Indexed{base=base, index=index, scale=scale,
343 : george 545 disp=disp, mem=mem}
344 : monnier 247
345 : george 545 (* generate code for tree and ensure that it is not in %esp *)
346 :     and exprNotEsp tree =
347 :     let val r = expr tree
348 : leunga 744 in if C.sameColor(r, C.esp) then
349 : george 545 let val tmp = newReg()
350 :     in move(I.Direct r, I.Direct tmp); tmp end
351 :     else r
352 :     end
353 : monnier 247
354 : george 545 (* Add a base register *)
355 :     and displace(trees, t, NONE, i, s, d) = (* no base yet *)
356 :     doEA(trees, SOME(expr t), i, s, d)
357 :     | displace(trees, t, b as SOME base, NONE, _, d) = (* no index *)
358 :     (* make t the index, but make sure that it is not %esp! *)
359 :     let val i = expr t
360 : leunga 744 in if C.sameColor(i, C.esp) then
361 : george 545 (* swap base and index *)
362 : leunga 744 if C.sameColor(base, C.esp) then
363 : george 545 doEA(trees, SOME i, b, 0, d)
364 :     else (* base and index = %esp! *)
365 :     let val index = newReg()
366 :     in move(I.Direct i, I.Direct index);
367 :     doEA(trees, b, SOME index, 0, d)
368 :     end
369 :     else
370 :     doEA(trees, b, SOME i, 0, d)
371 :     end
372 :     | displace(trees, t, SOME base, i, s, d) = (* base and index *)
373 :     let val b = expr(T.ADD(32,T.REG(32,base),t))
374 :     in doEA(trees, SOME b, i, s, d) end
375 : monnier 247
376 : george 545 (* Add an indexed register *)
377 :     and indexed(trees, t, t0, scale, b, NONE, _, d) = (* no index yet *)
378 :     doEA(trees, b, SOME(exprNotEsp t), scale, d)
379 :     | indexed(trees, _, t0, _, NONE, i, s, d) = (* no base *)
380 :     doEA(trees, SOME(expr t0), i, s, d)
381 :     | indexed(trees, _, t0, _, SOME base, i, s, d) = (*base and index*)
382 :     let val b = expr(T.ADD(32, t0, T.REG(32, base)))
383 :     in doEA(trees, SOME b, i, s, d) end
384 :    
385 :     in case doEA([ea], NONE, NONE, 0, I.Immed 0) of
386 :     I.Immed _ => raise EA
387 :     | I.ImmedLabel le => I.LabelEA le
388 :     | ea => ea
389 :     end (* address *)
390 : monnier 247
391 : george 545 (* reduce an expression into an operand *)
392 : george 761 and operand(T.LI i) = I.Immed(toInt32(i))
393 : leunga 775 | operand(x as (T.CONST _ | T.LABEL _)) = I.ImmedLabel x
394 :     | operand(T.LABEXP le) = I.ImmedLabel le
395 : george 545 | operand(T.REG(_,r)) = IntReg r
396 :     | operand(T.LOAD(32,ea,mem)) = address(ea, mem)
397 :     | operand(t) = I.Direct(expr t)
398 : monnier 247
399 : george 545 and moveToReg(opnd) =
400 :     let val dst = I.Direct(newReg())
401 :     in move(opnd, dst); dst
402 :     end
403 : monnier 247
404 : george 545 and reduceOpnd(I.Direct r) = r
405 :     | reduceOpnd opnd =
406 :     let val dst = newReg()
407 :     in move(opnd, I.Direct dst); dst
408 :     end
409 : monnier 247
410 : george 545 (* ensure that the operand is either an immed or register *)
411 :     and immedOrReg(opnd as I.Displace _) = moveToReg opnd
412 :     | immedOrReg(opnd as I.Indexed _) = moveToReg opnd
413 :     | immedOrReg(opnd as I.MemReg _) = moveToReg opnd
414 :     | immedOrReg(opnd as I.LabelEA _) = moveToReg opnd
415 :     | immedOrReg opnd = opnd
416 : monnier 247
417 : george 545 and isImmediate(I.Immed _) = true
418 :     | isImmediate(I.ImmedLabel _) = true
419 :     | isImmediate _ = false
420 : monnier 247
421 : george 545 and regOrMem opnd = if isImmediate opnd then moveToReg opnd else opnd
422 :    
423 :     and isMemOpnd opnd =
424 :     (case opnd of
425 :     I.Displace _ => true
426 :     | I.Indexed _ => true
427 :     | I.MemReg _ => true
428 :     | I.LabelEA _ => true
429 : george 555 | I.FDirect f => true
430 : george 545 | _ => false
431 :     )
432 :    
433 :     (*
434 :     * Compute an integer expression and put the result in
435 :     * the destination register rd.
436 :     *)
437 :     and doExpr(exp, rd : I.C.cell, an) =
438 :     let val rdOpnd = IntReg rd
439 : monnier 247
440 : leunga 744 fun equalRd(I.Direct r) = C.sameColor(r, rd)
441 :     | equalRd(I.MemReg r) = C.sameColor(r, rd)
442 : george 545 | equalRd _ = false
443 : monnier 247
444 : george 545 (* Emit a binary operator. If the destination is
445 :     * a memReg, do something smarter.
446 :     *)
447 :     fun genBinary(binOp, opnd1, opnd2) =
448 :     if isMemReg rd andalso
449 :     (isMemOpnd opnd1 orelse isMemOpnd opnd2) orelse
450 :     equalRd(opnd2)
451 :     then
452 :     let val tmpR = newReg()
453 :     val tmp = I.Direct tmpR
454 :     in move(opnd1, tmp);
455 :     mark(I.BINARY{binOp=binOp, src=opnd2, dst=tmp}, an);
456 :     move(tmp, rdOpnd)
457 :     end
458 :     else
459 :     (move(opnd1, rdOpnd);
460 :     mark(I.BINARY{binOp=binOp, src=opnd2, dst=rdOpnd}, an)
461 :     )
462 : monnier 247
463 : george 545 (* Generate a binary operator; it may commute *)
464 :     fun binaryComm(binOp, e1, e2) =
465 :     let val (opnd1, opnd2) =
466 :     case (operand e1, operand e2) of
467 :     (x as I.Immed _, y) => (y, x)
468 :     | (x as I.ImmedLabel _, y) => (y, x)
469 :     | (x, y as I.Direct _) => (y, x)
470 :     | (x, y) => (x, y)
471 :     in genBinary(binOp, opnd1, opnd2)
472 :     end
473 :    
474 :     (* Generate a binary operator; non-commutative *)
475 :     fun binary(binOp, e1, e2) =
476 :     genBinary(binOp, operand e1, operand e2)
477 :    
478 :     (* Generate a unary operator *)
479 :     fun unary(unOp, e) =
480 :     let val opnd = operand e
481 :     in if isMemReg rd andalso isMemOpnd opnd then
482 :     let val tmp = I.Direct(newReg())
483 :     in move(opnd, tmp); move(tmp, rdOpnd)
484 :     end
485 :     else move(opnd, rdOpnd);
486 :     mark(I.UNARY{unOp=unOp, opnd=rdOpnd}, an)
487 :     end
488 :    
489 :     (* Generate shifts; the shift
490 :     * amount must be a constant or in %ecx *)
491 :     fun shift(opcode, e1, e2) =
492 :     let val (opnd1, opnd2) = (operand e1, operand e2)
493 :     in case opnd2 of
494 :     I.Immed _ => genBinary(opcode, opnd1, opnd2)
495 :     | _ =>
496 :     if equalRd(opnd2) then
497 :     let val tmpR = newReg()
498 :     val tmp = I.Direct tmpR
499 :     in move(opnd1, tmp);
500 :     move(opnd2, ecx);
501 :     mark(I.BINARY{binOp=opcode, src=ecx, dst=tmp},an);
502 :     move(tmp, rdOpnd)
503 :     end
504 :     else
505 :     (move(opnd1, rdOpnd);
506 :     move(opnd2, ecx);
507 :     mark(I.BINARY{binOp=opcode, src=ecx, dst=rdOpnd},an)
508 :     )
509 :     end
510 :    
511 :     (* Division or remainder: divisor must be in %edx:%eax pair *)
512 :     fun divrem(signed, overflow, e1, e2, resultReg) =
513 :     let val (opnd1, opnd2) = (operand e1, operand e2)
514 :     val _ = move(opnd1, eax)
515 : leunga 606 val oper = if signed then (emit(I.CDQ); I.IDIVL)
516 :     else (zero edx; I.DIVL)
517 : george 545 in mark(I.MULTDIV{multDivOp=oper, src=regOrMem opnd2},an);
518 :     move(resultReg, rdOpnd);
519 :     if overflow then trap() else ()
520 :     end
521 :    
522 :     (* Optimize the special case for division *)
523 : george 761 fun divide(signed, overflow, e1, e2 as T.LI n') = let
524 :     val n = toInt32 n'
525 :     val w = T.I.toWord32(32, n')
526 :     fun isPowerOf2 w = W32.andb((w - 0w1), w) = 0w0
527 : george 545 fun log2 n = (* n must be > 0!!! *)
528 :     let fun loop(0w1,pow) = pow
529 : george 761 | loop(w,pow) = loop(W32.>>(w, 0w1),pow+1)
530 : george 545 in loop(n,0) end
531 :     in if n > 1 andalso isPowerOf2 w then
532 : george 761 let val pow = T.LI(T.I.fromInt(32,log2 w))
533 : george 545 in if signed then
534 :     (* signed; simulate round towards zero *)
535 :     let val label = Label.newLabel ""
536 :     val reg1 = expr e1
537 :     val opnd1 = I.Direct reg1
538 :     in if setZeroBit e1 then ()
539 :     else emit(I.CMPL{lsrc=opnd1, rsrc=I.Immed 0});
540 :     emit(I.JCC{cond=I.GE, opnd=immedLabel label});
541 :     emit(if n = 2 then
542 :     I.UNARY{unOp=I.INCL, opnd=opnd1}
543 :     else
544 :     I.BINARY{binOp=I.ADDL,
545 : george 761 src=I.Immed(n - 1),
546 : george 545 dst=opnd1});
547 :     defineLabel label;
548 :     shift(I.SARL, T.REG(32, reg1), pow)
549 :     end
550 :     else (* unsigned *)
551 :     shift(I.SHRL, e1, pow)
552 :     end
553 :     else
554 :     (* note the only way we can overflow is if
555 :     * n = 0 or n = -1
556 :     *)
557 :     divrem(signed, overflow andalso (n = ~1 orelse n = 0),
558 :     e1, e2, eax)
559 :     end
560 :     | divide(signed, overflow, e1, e2) =
561 :     divrem(signed, overflow, e1, e2, eax)
562 : monnier 247
563 : george 545 fun rem(signed, overflow, e1, e2) =
564 :     divrem(signed, overflow, e1, e2, edx)
565 :    
566 :     (* unsigned integer multiplication *)
567 :     fun uMultiply(e1, e2) =
568 :     (* note e2 can never be (I.Direct edx) *)
569 :     (move(operand e1, eax);
570 : leunga 606 mark(I.MULTDIV{multDivOp=I.MULL,
571 : george 545 src=regOrMem(operand e2)},an);
572 :     move(eax, rdOpnd)
573 :     )
574 :    
575 :     (* signed integer multiplication:
576 :     * The only forms that are allowed that also sets the
577 :     * OF and CF flags are:
578 :     *
579 :     * imul r32, r32/m32, imm8
580 :     * imul r32, imm8
581 :     * imul r32, imm32
582 :     *)
583 :     fun multiply(e1, e2) =
584 :     let fun doit(i1 as I.Immed _, i2 as I.Immed _, dstR, dst) =
585 :     (move(i1, dst);
586 :     mark(I.MUL3{dst=dstR, src1=i2, src2=NONE},an))
587 :     | doit(rm, i2 as I.Immed _, dstR, dst) =
588 :     doit(i2, rm, dstR, dst)
589 :     | doit(imm as I.Immed(i), rm, dstR, dst) =
590 :     mark(I.MUL3{dst=dstR, src1=rm, src2=SOME i},an)
591 :     | doit(r1 as I.Direct _, r2 as I.Direct _, dstR, dst) =
592 :     (move(r1, dst);
593 :     mark(I.MUL3{dst=dstR, src1=r2, src2=NONE},an))
594 :     | doit(r1 as I.Direct _, rm, dstR, dst) =
595 :     (move(r1, dst);
596 :     mark(I.MUL3{dst=dstR, src1=rm, src2=NONE},an))
597 :     | doit(rm, r as I.Direct _, dstR, dst) =
598 :     doit(r, rm, dstR, dst)
599 :     | doit(rm1, rm2, dstR, dst) =
600 :     if equalRd rm2 then
601 :     let val tmpR = newReg()
602 :     val tmp = I.Direct tmpR
603 :     in move(rm1, tmp);
604 :     mark(I.MUL3{dst=tmpR, src1=rm2, src2=NONE},an);
605 :     move(tmp, dst)
606 :     end
607 :     else
608 :     (move(rm1, dst);
609 :     mark(I.MUL3{dst=dstR, src1=rm2, src2=NONE},an)
610 :     )
611 :     val (opnd1, opnd2) = (operand e1, operand e2)
612 :     in if isMemReg rd then (* destination must be a real reg *)
613 :     let val tmpR = newReg()
614 :     val tmp = I.Direct tmpR
615 :     in doit(opnd1, opnd2, tmpR, tmp);
616 :     move(tmp, rdOpnd)
617 :     end
618 :     else
619 :     doit(opnd1, opnd2, rd, rdOpnd)
620 :     end
621 : monnier 247
622 : george 545 (* Makes sure the destination must be a register *)
623 :     fun dstMustBeReg f =
624 :     if isMemReg rd then
625 :     let val tmpR = newReg()
626 :     val tmp = I.Direct(tmpR)
627 :     in f(tmpR, tmp); move(tmp, rdOpnd) end
628 :     else f(rd, rdOpnd)
629 : monnier 247
630 : george 545 (* Emit a load instruction; makes sure that the destination
631 :     * is a register
632 :     *)
633 :     fun genLoad(mvOp, ea, mem) =
634 :     dstMustBeReg(fn (_, dst) =>
635 :     mark(I.MOVE{mvOp=mvOp, src=address(ea, mem), dst=dst},an))
636 :    
637 :     (* Generate a zero extended loads *)
638 :     fun load8(ea, mem) = genLoad(I.MOVZBL, ea, mem)
639 :     fun load16(ea, mem) = genLoad(I.MOVZWL, ea, mem)
640 :     fun load8s(ea, mem) = genLoad(I.MOVSBL, ea, mem)
641 :     fun load16s(ea, mem) = genLoad(I.MOVSWL, ea, mem)
642 :     fun load32(ea, mem) = genLoad(I.MOVL, ea, mem)
643 :    
644 :     (* Generate a sign extended loads *)
645 :    
646 :     (* Generate setcc instruction:
647 :     * semantics: MV(rd, COND(_, T.CMP(ty, cc, t1, t2), yes, no))
648 : leunga 583 * Bug, if eax is either t1 or t2 then problem will occur!!!
649 :     * Note that we have to use eax as the destination of the
650 :     * setcc because it only works on the registers
651 :     * %al, %bl, %cl, %dl and %[abcd]h. The last four registers
652 :     * are inaccessible in 32 bit mode.
653 : george 545 *)
654 :     fun setcc(ty, cc, t1, t2, yes, no) =
655 : leunga 583 let val (cc, yes, no) =
656 :     if yes > no then (cc, yes, no)
657 :     else (T.Basis.negateCond cc, no, yes)
658 : george 545 in (* Clear the destination first.
659 :     * This this because stupid SETcc
660 :     * only writes to the low order
661 :     * byte. That's Intel architecture, folks.
662 :     *)
663 : leunga 695 case (yes, no, cc) of
664 :     (1, 0, T.LT) =>
665 :     let val tmp = I.Direct(expr(T.SUB(32,t1,t2)))
666 :     in move(tmp, rdOpnd);
667 :     emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd})
668 :     end
669 :     | (1, 0, T.GT) =>
670 :     let val tmp = I.Direct(expr(T.SUB(32,t1,t2)))
671 :     in emit(I.UNARY{unOp=I.NOTL,opnd=tmp});
672 :     move(tmp, rdOpnd);
673 :     emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd})
674 :     end
675 :     | (1, 0, _) => (* normal case *)
676 : george 545 let val cc = cmp(true, ty, cc, t1, t2, [])
677 : leunga 583 in mark(I.SET{cond=cond cc, opnd=eax}, an);
678 : leunga 695 emit(I.BINARY{binOp=I.ANDL,src=I.Immed 255, dst=eax});
679 : leunga 583 move(eax, rdOpnd)
680 :     end
681 : leunga 695 | (C1, C2, _) =>
682 : george 545 (* general case;
683 : leunga 583 * from the Intel optimization guide p3-5
684 :     *)
685 : leunga 695 let val _ = zero eax;
686 :     val cc = cmp(true, ty, cc, t1, t2, [])
687 : leunga 583 in case C1-C2 of
688 :     D as (1 | 2 | 3 | 4 | 5 | 8 | 9) =>
689 :     let val (base,scale) =
690 :     case D of
691 :     1 => (NONE, 0)
692 :     | 2 => (NONE, 1)
693 :     | 3 => (SOME C.eax, 1)
694 :     | 4 => (NONE, 2)
695 :     | 5 => (SOME C.eax, 2)
696 :     | 8 => (NONE, 3)
697 :     | 9 => (SOME C.eax, 3)
698 :     val addr = I.Indexed{base=base,
699 :     index=C.eax,
700 :     scale=scale,
701 :     disp=I.Immed C2,
702 : george 545 mem=readonly}
703 : leunga 583 val tmpR = newReg()
704 :     val tmp = I.Direct tmpR
705 :     in emit(I.SET{cond=cond cc, opnd=eax});
706 :     mark(I.LEA{r32=tmpR, addr=addr}, an);
707 :     move(tmp, rdOpnd)
708 :     end
709 :     | D =>
710 :     (emit(I.SET{cond=cond(T.Basis.negateCond cc),
711 :     opnd=eax});
712 :     emit(I.UNARY{unOp=I.DECL, opnd=eax});
713 :     emit(I.BINARY{binOp=I.ANDL,
714 :     src=I.Immed D, dst=eax});
715 :     if C2 = 0 then
716 :     move(eax, rdOpnd)
717 :     else
718 :     let val tmpR = newReg()
719 :     val tmp = I.Direct tmpR
720 :     in mark(I.LEA{addr=
721 :     I.Displace{
722 :     base=C.eax,
723 :     disp=I.Immed C2,
724 :     mem=readonly},
725 :     r32=tmpR}, an);
726 :     move(tmp, rdOpnd)
727 :     end
728 :     )
729 :     end
730 : george 545 end (* setcc *)
731 :    
732 :     (* Generate cmovcc instruction.
733 :     * on Pentium Pro and Pentium II only
734 :     *)
735 :     fun cmovcc(ty, cc, t1, t2, yes, no) =
736 :     let fun genCmov(dstR, _) =
737 :     let val _ = doExpr(no, dstR, []) (* false branch *)
738 :     val cc = cmp(true, ty, cc, t1, t2, []) (* compare *)
739 :     in mark(I.CMOV{cond=cond cc, src=operand yes, dst=dstR}, an)
740 :     end
741 :     in dstMustBeReg genCmov
742 :     end
743 :    
744 :     fun unknownExp exp = doExpr(Gen.compileRexp exp, rd, an)
745 : monnier 247
746 : leunga 606 (* Add n to rd *)
747 :     fun addN n =
748 :     let val n = operand n
749 :     val src = if isMemReg rd then immedOrReg n else n
750 :     in mark(I.BINARY{binOp=I.ADDL, src=src, dst=rdOpnd}, an) end
751 :    
752 : george 545 (* Generate addition *)
753 :     fun addition(e1, e2) =
754 : leunga 606 case e1 of
755 : leunga 744 T.REG(_,rs) => if C.sameColor(rs,rd) then addN e2
756 :     else addition1(e1,e2)
757 : leunga 606 | _ => addition1(e1,e2)
758 :     and addition1(e1, e2) =
759 :     case e2 of
760 : leunga 744 T.REG(_,rs) => if C.sameColor(rs,rd) then addN e1
761 :     else addition2(e1,e2)
762 : leunga 606 | _ => addition2(e1,e2)
763 :     and addition2(e1,e2) =
764 : george 545 (dstMustBeReg(fn (dstR, _) =>
765 :     mark(I.LEA{r32=dstR, addr=address(exp, readonly)}, an))
766 :     handle EA => binaryComm(I.ADDL, e1, e2))
767 : monnier 247
768 :    
769 : george 545 in case exp of
770 :     T.REG(_,rs) =>
771 :     if isMemReg rs andalso isMemReg rd then
772 :     let val tmp = I.Direct(newReg())
773 : leunga 731 in move'(I.MemReg rs, tmp, an);
774 : george 545 move'(tmp, rdOpnd, [])
775 :     end
776 :     else move'(IntReg rs, rdOpnd, an)
777 : george 761 | T.LI z => let
778 :     val n = toInt32 z
779 :     in
780 :     if n=0 then
781 :     (* As per Fermin's request, special optimization for rd := 0.
782 :     * Currently we don't bother with the size.
783 :     *)
784 :     if isMemReg rd then move'(I.Immed 0, rdOpnd, an)
785 :     else mark(I.BINARY{binOp=I.XORL, src=rdOpnd, dst=rdOpnd}, an)
786 :     else
787 :     move'(I.Immed(n), rdOpnd, an)
788 :     end
789 : leunga 775 | (T.CONST _ | T.LABEL _) =>
790 :     move'(I.ImmedLabel exp, rdOpnd, an)
791 :     | T.LABEXP le => move'(I.ImmedLabel le, rdOpnd, an)
792 : monnier 247
793 : george 545 (* 32-bit addition *)
794 : george 761 | T.ADD(32, e1, e2 as T.LI n) => let
795 :     val n = toInt32 n
796 :     in
797 :     case n
798 :     of 1 => unary(I.INCL, e1)
799 :     | ~1 => unary(I.DECL, e1)
800 :     | _ => addition(e1, e2)
801 :     end
802 :     | T.ADD(32, e1 as T.LI n, e2) => let
803 :     val n = toInt32 n
804 :     in
805 :     case n
806 :     of 1 => unary(I.INCL, e2)
807 :     | ~1 => unary(I.DECL, e2)
808 :     | _ => addition(e1, e2)
809 :     end
810 : george 545 | T.ADD(32, e1, e2) => addition(e1, e2)
811 : monnier 247
812 : leunga 695 (* 32-bit addition but set the flag!
813 :     * This is a stupid hack for now.
814 :     *)
815 : george 761 | T.ADD(0, e, e1 as T.LI n) => let
816 :     val n = T.I.toInt(32, n)
817 :     in
818 :     if n=1 then unary(I.INCL, e)
819 :     else if n = ~1 then unary(I.DECL, e)
820 :     else binaryComm(I.ADDL, e, e1)
821 :     end
822 :     | T.ADD(0, e1 as T.LI n, e) => let
823 :     val n = T.I.toInt(32, n)
824 :     in
825 :     if n=1 then unary(I.INCL, e)
826 :     else if n = ~1 then unary(I.DECL, e)
827 :     else binaryComm(I.ADDL, e1, e)
828 :     end
829 :     | T.ADD(0, e1, e2) => binaryComm(I.ADDL, e1, e2)
830 :    
831 : george 545 (* 32-bit subtraction *)
832 : george 761 | T.SUB(32, e1, e2 as T.LI n) => let
833 :     val n = toInt32 n
834 :     in
835 :     case n
836 :     of 0 => doExpr(e1, rd, an)
837 :     | 1 => unary(I.DECL, e1)
838 :     | ~1 => unary(I.INCL, e1)
839 :     | _ => binary(I.SUBL, e1, e2)
840 :     end
841 :     | T.SUB(32, e1 as T.LI n, e2) =>
842 :     if T.I.isZero n then unary(I.NEGL, e2)
843 :     else binary(I.SUBL, e1, e2)
844 : george 545 | T.SUB(32, e1, e2) => binary(I.SUBL, e1, e2)
845 : monnier 247
846 : george 545 | T.MULU(32, x, y) => uMultiply(x, y)
847 :     | T.DIVU(32, x, y) => divide(false, false, x, y)
848 :     | T.REMU(32, x, y) => rem(false, false, x, y)
849 : monnier 247
850 : george 545 | T.MULS(32, x, y) => multiply(x, y)
851 :     | T.DIVS(32, x, y) => divide(true, false, x, y)
852 :     | T.REMS(32, x, y) => rem(true, false, x, y)
853 : monnier 247
854 : george 545 | T.ADDT(32, x, y) => (binaryComm(I.ADDL, x, y); trap())
855 :     | T.SUBT(32, x, y) => (binary(I.SUBL, x, y); trap())
856 :     | T.MULT(32, x, y) => (multiply(x, y); trap())
857 :     | T.DIVT(32, x, y) => divide(true, true, x, y)
858 :     | T.REMT(32, x, y) => rem(true, true, x, y)
859 : monnier 247
860 : george 545 | T.ANDB(32, x, y) => binaryComm(I.ANDL, x, y)
861 :     | T.ORB(32, x, y) => binaryComm(I.ORL, x, y)
862 :     | T.XORB(32, x, y) => binaryComm(I.XORL, x, y)
863 :     | T.NOTB(32, x) => unary(I.NOTL, x)
864 : monnier 247
865 : george 545 | T.SRA(32, x, y) => shift(I.SARL, x, y)
866 :     | T.SRL(32, x, y) => shift(I.SHRL, x, y)
867 :     | T.SLL(32, x, y) => shift(I.SHLL, x, y)
868 : monnier 247
869 : george 545 | T.LOAD(8, ea, mem) => load8(ea, mem)
870 :     | T.LOAD(16, ea, mem) => load16(ea, mem)
871 :     | T.LOAD(32, ea, mem) => load32(ea, mem)
872 : monnier 498
873 : leunga 776 | T.SX(32,8,T.LOAD(8,ea,mem)) => load8s(ea, mem)
874 :     | T.SX(32,16,T.LOAD(16,ea,mem)) => load16s(ea, mem)
875 :     | T.ZX(32,8,T.LOAD(8,ea,mem)) => load8(ea, mem)
876 : leunga 779 | T.ZX(32,16,T.LOAD(16,ea,mem)) => load16(ea, mem)
877 : leunga 776
878 : george 545 | T.COND(32, T.CMP(ty, cc, t1, t2), T.LI yes, T.LI no) =>
879 : leunga 583 setcc(ty, cc, t1, t2, toInt32 yes, toInt32 no)
880 : george 545 | T.COND(32, T.CMP(ty, cc, t1, t2), yes, no) =>
881 :     (case !arch of (* PentiumPro and higher has CMOVcc *)
882 :     Pentium => unknownExp exp
883 :     | _ => cmovcc(ty, cc, t1, t2, yes, no)
884 :     )
885 :     | T.LET(s,e) => (doStmt s; doExpr(e, rd, an))
886 :     | T.MARK(e, A.MARKREG f) => (f rd; doExpr(e, rd, an))
887 :     | T.MARK(e, a) => doExpr(e, rd, a::an)
888 :     | T.PRED(e,c) => doExpr(e, rd, A.CTRLUSE c::an)
889 : george 555 | T.REXT e =>
890 :     ExtensionComp.compileRext (reducer()) {e=e, rd=rd, an=an}
891 : george 545 (* simplify and try again *)
892 :     | exp => unknownExp exp
893 :     end (* doExpr *)
894 : monnier 247
895 : george 545 (* generate an expression and return its result register
896 :     * If rewritePseudo is on, the result is guaranteed to be in a
897 :     * non memReg register
898 :     *)
899 :     and expr(exp as T.REG(_, rd)) =
900 :     if isMemReg rd then genExpr exp else rd
901 :     | expr exp = genExpr exp
902 : monnier 247
903 : george 545 and genExpr exp =
904 :     let val rd = newReg() in doExpr(exp, rd, []); rd end
905 : monnier 247
906 : george 545 (* Compare an expression with zero.
907 :     * On the x86, TEST is superior to AND for doing the same thing,
908 :     * since it doesn't need to write out the result in a register.
909 :     *)
910 : leunga 695 and cmpWithZero(cc as (T.EQ | T.NE), e as T.ANDB(ty, a, b), an) =
911 : george 545 (case ty of
912 : leunga 695 8 => test(I.TESTB, a, b, an)
913 :     | 16 => test(I.TESTW, a, b, an)
914 :     | 32 => test(I.TESTL, a, b, an)
915 :     | _ => doExpr(e, newReg(), an);
916 :     cc)
917 :     | cmpWithZero(cc, e, an) =
918 :     let val e =
919 :     case e of (* hack to disable the lea optimization XXX *)
920 :     T.ADD(_, a, b) => T.ADD(0, a, b)
921 :     | e => e
922 :     in doExpr(e, newReg(), an); cc end
923 : monnier 247
924 : george 545 (* Emit a test.
925 :     * The available modes are
926 :     * r/m, r
927 :     * r/m, imm
928 :     * On selecting the right instruction: TESTL/TESTW/TESTB.
929 :     * When anding an operand with a constant
930 :     * that fits within 8 (or 16) bits, it is possible to use TESTB,
931 :     * (or TESTW) instead of TESTL. Because x86 is little endian,
932 :     * this works for memory operands too. However, with TESTB, it is
933 :     * not possible to use registers other than
934 :     * AL, CL, BL, DL, and AH, CH, BH, DH. So, the best way is to
935 :     * perform register allocation first, and if the operand registers
936 :     * are one of EAX, ECX, EBX, or EDX, replace the TESTL instruction
937 :     * by TESTB.
938 :     *)
939 : leunga 695 and test(testopcode, a, b, an) =
940 : george 545 let val (_, opnd1, opnd2) = commuteComparison(T.EQ, true, a, b)
941 :     (* translate r, r/m => r/m, r *)
942 :     val (opnd1, opnd2) =
943 :     if isMemOpnd opnd2 then (opnd2, opnd1) else (opnd1, opnd2)
944 : leunga 695 in mark(testopcode{lsrc=opnd1, rsrc=opnd2}, an)
945 : george 545 end
946 : monnier 247
947 : george 545 (* generate a condition code expression
948 : leunga 744 * The zero is for setting the condition code!
949 :     * I have no idea why this is used.
950 :     *)
951 :     and doCCexpr(T.CMP(ty, cc, t1, t2), rd, an) =
952 :     if C.sameColor(rd, C.eflags) then
953 :     (cmp(false, ty, cc, t1, t2, an); ())
954 :     else
955 :     error "doCCexpr: cmp"
956 : george 545 | doCCexpr(T.CCMARK(e,A.MARKREG f),rd,an) = (f rd; doCCexpr(e,rd,an))
957 :     | doCCexpr(T.CCMARK(e,a), rd, an) = doCCexpr(e,rd,a::an)
958 :     | doCCexpr(T.CCEXT e, cd, an) =
959 : george 555 ExtensionComp.compileCCext (reducer()) {e=e, ccd=cd, an=an}
960 : george 545 | doCCexpr _ = error "doCCexpr"
961 : monnier 247
962 : george 545 and ccExpr e = error "ccExpr"
963 : monnier 247
964 : george 545 (* generate a comparison and sets the condition code;
965 :     * return the actual cc used. If the flag swapable is true,
966 :     * we can also reorder the operands.
967 :     *)
968 :     and cmp(swapable, ty, cc, t1, t2, an) =
969 : leunga 695 (* == and <> can be always be reordered *)
970 :     let val swapable = swapable orelse cc = T.EQ orelse cc = T.NE
971 :     in (* Sometimes the comparison is not necessary because
972 :     * the bits are already set!
973 :     *)
974 :     if isZero t1 andalso setZeroBit2 t2 then
975 :     if swapable then
976 :     cmpWithZero(T.Basis.swapCond cc, t2, an)
977 :     else (* can't reorder the comparison! *)
978 :     genCmp(ty, false, cc, t1, t2, an)
979 :     else if isZero t2 andalso setZeroBit2 t1 then
980 :     cmpWithZero(cc, t1, an)
981 :     else genCmp(ty, swapable, cc, t1, t2, an)
982 :     end
983 : monnier 247
984 : george 545 (* Give a and b which are the operands to a comparison (or test)
985 :     * Return the appropriate condition code and operands.
986 :     * The available modes are:
987 :     * r/m, imm
988 :     * r/m, r
989 :     * r, r/m
990 :     *)
991 :     and commuteComparison(cc, swapable, a, b) =
992 :     let val (opnd1, opnd2) = (operand a, operand b)
993 :     in (* Try to fold in the operands whenever possible *)
994 :     case (isImmediate opnd1, isImmediate opnd2) of
995 :     (true, true) => (cc, moveToReg opnd1, opnd2)
996 :     | (true, false) =>
997 :     if swapable then (T.Basis.swapCond cc, opnd2, opnd1)
998 :     else (cc, moveToReg opnd1, opnd2)
999 :     | (false, true) => (cc, opnd1, opnd2)
1000 :     | (false, false) =>
1001 :     (case (opnd1, opnd2) of
1002 :     (_, I.Direct _) => (cc, opnd1, opnd2)
1003 :     | (I.Direct _, _) => (cc, opnd1, opnd2)
1004 :     | (_, _) => (cc, moveToReg opnd1, opnd2)
1005 :     )
1006 :     end
1007 :    
1008 :     (* generate a real comparison; return the real cc used *)
1009 :     and genCmp(ty, swapable, cc, a, b, an) =
1010 :     let val (cc, opnd1, opnd2) = commuteComparison(cc, swapable, a, b)
1011 :     in mark(I.CMPL{lsrc=opnd1, rsrc=opnd2}, an); cc
1012 :     end
1013 : monnier 247
1014 : george 545 (* generate code for jumps *)
1015 : leunga 775 and jmp(lexp as T.LABEL lab, labs, an) =
1016 : george 545 mark(I.JMP(I.ImmedLabel lexp, [lab]), an)
1017 : leunga 775 | jmp(T.LABEXP le, labs, an) = mark(I.JMP(I.ImmedLabel le, labs), an)
1018 :     | jmp(ea, labs, an) = mark(I.JMP(operand ea, labs), an)
1019 : george 545
1020 :     (* convert mlrisc to cellset:
1021 :     *)
1022 :     and cellset mlrisc =
1023 : leunga 744 let val addCCReg = C.CellSet.add
1024 : george 545 fun g([],acc) = acc
1025 :     | g(T.GPR(T.REG(_,r))::regs,acc) = g(regs,C.addReg(r,acc))
1026 :     | g(T.FPR(T.FREG(_,f))::regs,acc) = g(regs,C.addFreg(f,acc))
1027 :     | g(T.CCR(T.CC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc))
1028 :     | g(T.CCR(T.FCC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc))
1029 :     | g(_::regs, acc) = g(regs, acc)
1030 :     in g(mlrisc, C.empty) end
1031 :    
1032 :     (* generate code for calls *)
1033 :     and call(ea, flow, def, use, mem, an) =
1034 :     mark(I.CALL(operand ea,cellset(def),cellset(use),mem),an)
1035 :    
1036 :     (* generate code for integer stores *)
1037 :     and store8(ea, d, mem, an) =
1038 :     let val src = (* movb has to use %eax as source. Stupid x86! *)
1039 :     case immedOrReg(operand d) of
1040 :     src as I.Direct r =>
1041 : leunga 744 if C.sameColor(r,C.eax)
1042 :     then src else (move(src, eax); eax)
1043 : george 545 | src => src
1044 :     in mark(I.MOVE{mvOp=I.MOVB, src=src, dst=address(ea,mem)},an)
1045 :     end
1046 :     and store16(ea, d, mem, an) = error "store16"
1047 :     and store32(ea, d, mem, an) =
1048 :     move'(immedOrReg(operand d), address(ea, mem), an)
1049 :    
1050 :     (* generate code for branching *)
1051 :     and branch(T.CMP(ty, cc, t1, t2), lab, an) =
1052 :     (* allow reordering of operands *)
1053 :     let val cc = cmp(true, ty, cc, t1, t2, [])
1054 :     in mark(I.JCC{cond=cond cc, opnd=immedLabel lab}, an) end
1055 :     | branch(T.FCMP(fty, fcc, t1, t2), lab, an) =
1056 :     fbranch(fty, fcc, t1, t2, lab, an)
1057 :     | branch(ccexp, lab, an) =
1058 : leunga 744 (doCCexpr(ccexp, C.eflags, []);
1059 : george 545 mark(I.JCC{cond=cond(Gen.condOf ccexp), opnd=immedLabel lab}, an)
1060 :     )
1061 :    
1062 :     (* generate code for floating point compare and branch *)
1063 :     and fbranch(fty, fcc, t1, t2, lab, an) =
1064 : leunga 731 let fun ignoreOrder (T.FREG _) = true
1065 :     | ignoreOrder (T.FLOAD _) = true
1066 :     | ignoreOrder (T.FMARK(e,_)) = ignoreOrder e
1067 :     | ignoreOrder _ = false
1068 :    
1069 :     fun compare'() = (* Sethi-Ullman style *)
1070 :     (if ignoreOrder t1 orelse ignoreOrder t2 then
1071 :     (reduceFexp(fty, t2, []); reduceFexp(fty, t1, []))
1072 :     else (reduceFexp(fty, t1, []); reduceFexp(fty, t2, []);
1073 :     emit(I.FXCH{opnd=C.ST(1)}));
1074 :     emit(I.FUCOMPP);
1075 :     fcc
1076 :     )
1077 :    
1078 :     fun compare''() =
1079 :     (* direct style *)
1080 :     (* Try to make lsrc the memory operand *)
1081 :     let val lsrc = foperand(fty, t1)
1082 :     val rsrc = foperand(fty, t2)
1083 :     val fsize = fsize fty
1084 :     fun cmp(lsrc, rsrc, fcc) =
1085 :     (emit(I.FCMP{fsize=fsize,lsrc=lsrc,rsrc=rsrc}); fcc)
1086 :     in case (lsrc, rsrc) of
1087 :     (I.FPR _, I.FPR _) => cmp(lsrc, rsrc, fcc)
1088 :     | (I.FPR _, mem) => cmp(mem,lsrc,T.Basis.swapFcond fcc)
1089 :     | (mem, I.FPR _) => cmp(lsrc, rsrc, fcc)
1090 :     | (lsrc, rsrc) => (* can't be both memory! *)
1091 :     let val ftmpR = newFreg()
1092 :     val ftmp = I.FPR ftmpR
1093 :     in emit(I.FMOVE{fsize=fsize,src=rsrc,dst=ftmp});
1094 :     cmp(lsrc, ftmp, fcc)
1095 :     end
1096 :     end
1097 :    
1098 :     fun compare() =
1099 :     if enableFastFPMode andalso !fast_floating_point
1100 :     then compare''() else compare'()
1101 :    
1102 : george 545 fun andil i = emit(I.BINARY{binOp=I.ANDL,src=I.Immed(i),dst=eax})
1103 : leunga 585 fun testil i = emit(I.TESTL{lsrc=eax,rsrc=I.Immed(i)})
1104 : george 545 fun xoril i = emit(I.BINARY{binOp=I.XORL,src=I.Immed(i),dst=eax})
1105 :     fun cmpil i = emit(I.CMPL{rsrc=I.Immed(i), lsrc=eax})
1106 :     fun j(cc, lab) = mark(I.JCC{cond=cc, opnd=immedLabel lab},an)
1107 :     fun sahf() = emit(I.SAHF)
1108 : leunga 731 fun branch(fcc) =
1109 : george 545 case fcc
1110 :     of T.== => (andil 0x4400; xoril 0x4000; j(I.EQ, lab))
1111 :     | T.?<> => (andil 0x4400; xoril 0x4000; j(I.NE, lab))
1112 :     | T.? => (sahf(); j(I.P,lab))
1113 :     | T.<=> => (sahf(); j(I.NP,lab))
1114 : leunga 585 | T.> => (testil 0x4500; j(I.EQ,lab))
1115 :     | T.?<= => (testil 0x4500; j(I.NE,lab))
1116 :     | T.>= => (testil 0x500; j(I.EQ,lab))
1117 :     | T.?< => (testil 0x500; j(I.NE,lab))
1118 : george 545 | T.< => (andil 0x4500; cmpil 0x100; j(I.EQ,lab))
1119 :     | T.?>= => (andil 0x4500; cmpil 0x100; j(I.NE,lab))
1120 :     | T.<= => (andil 0x4100; cmpil 0x100; j(I.EQ,lab);
1121 :     cmpil 0x4000; j(I.EQ,lab))
1122 : leunga 585 | T.?> => (sahf(); j(I.P,lab); testil 0x4100; j(I.EQ,lab))
1123 :     | T.<> => (testil 0x4400; j(I.EQ,lab))
1124 :     | T.?= => (testil 0x4400; j(I.NE,lab))
1125 : george 545 | _ => error "fbranch"
1126 :     (*esac*)
1127 : leunga 731 val fcc = compare()
1128 :     in emit I.FNSTSW;
1129 :     branch(fcc)
1130 : monnier 411 end
1131 : monnier 247
1132 : leunga 731 (*========================================================
1133 :     * Floating point code generation starts here.
1134 :     * Some generic fp routines first.
1135 :     *========================================================*)
1136 :    
1137 :     (* Can this tree be folded into the src operand of a floating point
1138 :     * operations?
1139 :     *)
1140 :     and foldableFexp(T.FREG _) = true
1141 :     | foldableFexp(T.FLOAD _) = true
1142 :     | foldableFexp(T.CVTI2F(_, (16 | 32), _)) = true
1143 :     | foldableFexp(T.CVTF2F(_, _, t)) = foldableFexp t
1144 :     | foldableFexp(T.FMARK(t, _)) = foldableFexp t
1145 :     | foldableFexp _ = false
1146 :    
1147 :     (* Move integer e of size ty into a memory location.
1148 :     * Returns a quadruple:
1149 :     * (INTEGER,return ty,effect address of memory location,cleanup code)
1150 :     *)
1151 :     and convertIntToFloat(ty, e) =
1152 :     let val opnd = operand e
1153 :     in if isMemOpnd opnd andalso (ty = 16 orelse ty = 32)
1154 :     then (INTEGER, ty, opnd, [])
1155 :     else
1156 :     let val {instrs, tempMem, cleanup} = cvti2f{ty=ty, src=opnd}
1157 :     in emits instrs;
1158 :     (INTEGER, 32, tempMem, cleanup)
1159 :     end
1160 :     end
1161 :    
1162 :     (*========================================================
1163 :     * Sethi-Ullman based floating point code generation as
1164 :     * implemented by Lal
1165 :     *========================================================*)
1166 :    
1167 : george 545 and fld(32, opnd) = I.FLDS opnd
1168 :     | fld(64, opnd) = I.FLDL opnd
1169 : george 555 | fld(80, opnd) = I.FLDT opnd
1170 : george 545 | fld _ = error "fld"
1171 :    
1172 : leunga 565 and fild(16, opnd) = I.FILD opnd
1173 :     | fild(32, opnd) = I.FILDL opnd
1174 :     | fild(64, opnd) = I.FILDLL opnd
1175 :     | fild _ = error "fild"
1176 :    
1177 :     and fxld(INTEGER, ty, opnd) = fild(ty, opnd)
1178 :     | fxld(REAL, fty, opnd) = fld(fty, opnd)
1179 :    
1180 : george 545 and fstp(32, opnd) = I.FSTPS opnd
1181 :     | fstp(64, opnd) = I.FSTPL opnd
1182 : george 555 | fstp(80, opnd) = I.FSTPT opnd
1183 : george 545 | fstp _ = error "fstp"
1184 :    
1185 :     (* generate code for floating point stores *)
1186 : leunga 731 and fstore'(fty, ea, d, mem, an) =
1187 : george 545 (case d of
1188 :     T.FREG(fty, fs) => emit(fld(fty, I.FDirect fs))
1189 :     | _ => reduceFexp(fty, d, []);
1190 :     mark(fstp(fty, address(ea, mem)), an)
1191 :     )
1192 :    
1193 : leunga 731 (* generate code for floating point loads *)
1194 :     and fload'(fty, ea, mem, fd, an) =
1195 :     let val ea = address(ea, mem)
1196 :     in mark(fld(fty, ea), an);
1197 : leunga 744 if C.sameColor(fd,ST0) then ()
1198 :     else emit(fstp(fty, I.FDirect fd))
1199 : leunga 731 end
1200 :    
1201 :     and fexpr' e = (reduceFexp(64, e, []); C.ST(0))
1202 : george 545
1203 :     (* generate floating point expression and put the result in fd *)
1204 : leunga 731 and doFexpr'(fty, T.FREG(_, fs), fd, an) =
1205 : leunga 744 (if C.sameColor(fs,fd) then ()
1206 : george 545 else mark(I.FCOPY{dst=[fd], src=[fs], tmp=NONE}, an)
1207 :     )
1208 : leunga 731 | doFexpr'(_, T.FLOAD(fty, ea, mem), fd, an) =
1209 :     fload'(fty, ea, mem, fd, an)
1210 :     | doFexpr'(fty, T.FEXT fexp, fd, an) =
1211 :     (ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an};
1212 : leunga 744 if C.sameColor(fd,ST0) then () else emit(fstp(fty, I.FDirect fd))
1213 : leunga 731 )
1214 :     | doFexpr'(fty, e, fd, an) =
1215 : george 545 (reduceFexp(fty, e, []);
1216 : leunga 744 if C.sameColor(fd,ST0) then ()
1217 :     else mark(fstp(fty, I.FDirect fd), an)
1218 : george 545 )
1219 :    
1220 :     (*
1221 :     * Generate floating point expression using Sethi-Ullman's scheme:
1222 :     * This function evaluates a floating point expression,
1223 :     * and put result in %ST(0).
1224 :     *)
1225 :     and reduceFexp(fty, fexp, an) =
1226 : george 555 let val ST = I.ST(C.ST 0)
1227 :     val ST1 = I.ST(C.ST 1)
1228 : leunga 593 val cleanupCode = ref [] : I.instruction list ref
1229 : george 545
1230 : leunga 565 datatype su_tree =
1231 :     LEAF of int * T.fexp * ans
1232 :     | BINARY of int * T.fty * fbinop * su_tree * su_tree * ans
1233 :     | UNARY of int * T.fty * I.funOp * su_tree * ans
1234 :     and fbinop = FADD | FSUB | FMUL | FDIV
1235 :     | FIADD | FISUB | FIMUL | FIDIV
1236 :     withtype ans = Annotations.annotations
1237 : monnier 247
1238 : leunga 565 fun label(LEAF(n, _, _)) = n
1239 :     | label(BINARY(n, _, _, _, _, _)) = n
1240 :     | label(UNARY(n, _, _, _, _)) = n
1241 : george 545
1242 : leunga 565 fun annotate(LEAF(n, x, an), a) = LEAF(n,x,a::an)
1243 :     | annotate(BINARY(n,t,b,x,y,an), a) = BINARY(n,t,b,x,y,a::an)
1244 :     | annotate(UNARY(n,t,u,x,an), a) = UNARY(n,t,u,x,a::an)
1245 : george 545
1246 : leunga 565 (* Generate expression tree with sethi-ullman numbers *)
1247 :     fun su(e as T.FREG _) = LEAF(1, e, [])
1248 :     | su(e as T.FLOAD _) = LEAF(1, e, [])
1249 :     | su(e as T.CVTI2F _) = LEAF(1, e, [])
1250 :     | su(T.CVTF2F(_, _, t)) = su t
1251 :     | su(T.FMARK(t, a)) = annotate(su t, a)
1252 :     | su(T.FABS(fty, t)) = suUnary(fty, I.FABS, t)
1253 :     | su(T.FNEG(fty, t)) = suUnary(fty, I.FCHS, t)
1254 :     | su(T.FSQRT(fty, t)) = suUnary(fty, I.FSQRT, t)
1255 :     | su(T.FADD(fty, t1, t2)) = suComBinary(fty,FADD,FIADD,t1,t2)
1256 :     | su(T.FMUL(fty, t1, t2)) = suComBinary(fty,FMUL,FIMUL,t1,t2)
1257 :     | su(T.FSUB(fty, t1, t2)) = suBinary(fty,FSUB,FISUB,t1,t2)
1258 :     | su(T.FDIV(fty, t1, t2)) = suBinary(fty,FDIV,FIDIV,t1,t2)
1259 :     | su _ = error "su"
1260 :    
1261 :     (* Try to fold the the memory operand or integer conversion *)
1262 :     and suFold(e as T.FREG _) = (LEAF(0, e, []), false)
1263 :     | suFold(e as T.FLOAD _) = (LEAF(0, e, []), false)
1264 :     | suFold(e as T.CVTI2F(_,(16 | 32),_)) = (LEAF(0, e, []), true)
1265 :     | suFold(T.CVTF2F(_, _, t)) = suFold t
1266 :     | suFold(T.FMARK(t, a)) =
1267 :     let val (t, integer) = suFold t
1268 :     in (annotate(t, a), integer) end
1269 :     | suFold e = (su e, false)
1270 :    
1271 :     (* Form unary tree *)
1272 :     and suUnary(fty, funary, t) =
1273 :     let val t = su t
1274 :     in UNARY(label t, fty, funary, t, [])
1275 : george 545 end
1276 : leunga 565
1277 :     (* Form binary tree *)
1278 :     and suBinary(fty, binop, ibinop, t1, t2) =
1279 :     let val t1 = su t1
1280 :     val (t2, integer) = suFold t2
1281 :     val n1 = label t1
1282 :     val n2 = label t2
1283 :     val n = if n1=n2 then n1+1 else Int.max(n1,n2)
1284 :     val myOp = if integer then ibinop else binop
1285 :     in BINARY(n, fty, myOp, t1, t2, [])
1286 : george 545 end
1287 : george 555
1288 : leunga 565 (* Try to fold in the operand if possible.
1289 :     * This only applies to commutative operations.
1290 :     *)
1291 :     and suComBinary(fty, binop, ibinop, t1, t2) =
1292 : leunga 731 let val (t1, t2) = if foldableFexp t2
1293 :     then (t1, t2) else (t2, t1)
1294 : leunga 565 in suBinary(fty, binop, ibinop, t1, t2) end
1295 :    
1296 :     and sameTree(LEAF(_, T.FREG(t1,f1), []),
1297 : leunga 744 LEAF(_, T.FREG(t2,f2), [])) =
1298 :     t1 = t2 andalso C.sameColor(f1,f2)
1299 : leunga 565 | sameTree _ = false
1300 :    
1301 :     (* Traverse tree and generate code *)
1302 :     fun gencode(LEAF(_, t, an)) = mark(fxld(leafEA t), an)
1303 :     | gencode(BINARY(_, _, binop, x, t2 as LEAF(0, y, a1), a2)) =
1304 :     let val _ = gencode x
1305 :     val (_, fty, src) = leafEA y
1306 :     fun gen(code) = mark(code, a1 @ a2)
1307 :     fun binary(oper32, oper64) =
1308 :     if sameTree(x, t2) then
1309 :     gen(I.FBINARY{binOp=oper64, src=ST, dst=ST})
1310 : george 555 else
1311 :     let val oper =
1312 : leunga 565 if isMemOpnd src then
1313 :     case fty of
1314 :     32 => oper32
1315 :     | 64 => oper64
1316 :     | _ => error "gencode: BINARY"
1317 :     else oper64
1318 :     in gen(I.FBINARY{binOp=oper, src=src, dst=ST}) end
1319 :     fun ibinary(oper16, oper32) =
1320 :     let val oper = case fty of
1321 :     16 => oper16
1322 :     | 32 => oper32
1323 :     | _ => error "gencode: IBINARY"
1324 :     in gen(I.FIBINARY{binOp=oper, src=src}) end
1325 :     in case binop of
1326 :     FADD => binary(I.FADDS, I.FADDL)
1327 :     | FSUB => binary(I.FDIVS, I.FSUBL)
1328 :     | FMUL => binary(I.FMULS, I.FMULL)
1329 :     | FDIV => binary(I.FDIVS, I.FDIVL)
1330 :     | FIADD => ibinary(I.FIADDS, I.FIADDL)
1331 :     | FISUB => ibinary(I.FIDIVS, I.FISUBL)
1332 :     | FIMUL => ibinary(I.FIMULS, I.FIMULL)
1333 :     | FIDIV => ibinary(I.FIDIVS, I.FIDIVL)
1334 :     end
1335 :     | gencode(BINARY(_, fty, binop, t1, t2, an)) =
1336 :     let fun doit(t1, t2, oper, operP, operRP) =
1337 :     let (* oper[P] => ST(1) := ST oper ST(1); [pop]
1338 :     * operR[P] => ST(1) := ST(1) oper ST; [pop]
1339 :     *)
1340 :     val n1 = label t1
1341 :     val n2 = label t2
1342 :     in if n1 < n2 andalso n1 <= 7 then
1343 :     (gencode t2;
1344 :     gencode t1;
1345 :     mark(I.FBINARY{binOp=operP, src=ST, dst=ST1}, an))
1346 :     else if n2 <= n1 andalso n2 <= 7 then
1347 :     (gencode t1;
1348 :     gencode t2;
1349 :     mark(I.FBINARY{binOp=operRP, src=ST, dst=ST1}, an))
1350 :     else
1351 :     let (* both labels > 7 *)
1352 :     val fs = I.FDirect(newFreg())
1353 :     in gencode t2;
1354 :     emit(fstp(fty, fs));
1355 :     gencode t1;
1356 :     mark(I.FBINARY{binOp=oper, src=fs, dst=ST}, an)
1357 :     end
1358 :     end
1359 :     in case binop of
1360 :     FADD => doit(t1,t2,I.FADDL,I.FADDP,I.FADDP)
1361 :     | FMUL => doit(t1,t2,I.FMULL,I.FMULP,I.FMULP)
1362 :     | FSUB => doit(t1,t2,I.FSUBL,I.FSUBP,I.FSUBRP)
1363 :     | FDIV => doit(t1,t2,I.FDIVL,I.FDIVP,I.FDIVRP)
1364 : george 545 | _ => error "gencode.BINARY"
1365 :     end
1366 : leunga 565 | gencode(UNARY(_, _, unaryOp, su, an)) =
1367 :     (gencode(su); mark(I.FUNARY(unaryOp),an))
1368 :    
1369 :     (* Generate code for a leaf.
1370 :     * Returns the type and an effective address
1371 :     *)
1372 :     and leafEA(T.FREG(fty, f)) = (REAL, fty, I.FDirect f)
1373 :     | leafEA(T.FLOAD(fty, ea, mem)) = (REAL, fty, address(ea, mem))
1374 : leunga 593 | leafEA(T.CVTI2F(_, 32, t)) = int2real(32, t)
1375 :     | leafEA(T.CVTI2F(_, 16, t)) = int2real(16, t)
1376 :     | leafEA(T.CVTI2F(_, 8, t)) = int2real(8, t)
1377 : leunga 565 | leafEA _ = error "leafEA"
1378 :    
1379 : leunga 731 and int2real(ty, e) =
1380 :     let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e)
1381 :     in cleanupCode := !cleanupCode @ cleanup;
1382 :     (INTEGER, ty, ea)
1383 : george 545 end
1384 : leunga 731
1385 :     in gencode(su fexp);
1386 :     emits(!cleanupCode)
1387 : george 545 end (*reduceFexp*)
1388 : leunga 731
1389 :     (*========================================================
1390 :     * This section generates 3-address style floating
1391 :     * point code.
1392 :     *========================================================*)
1393 :    
1394 :     and isize 16 = I.I16
1395 :     | isize 32 = I.I32
1396 :     | isize _ = error "isize"
1397 :    
1398 :     and fstore''(fty, ea, d, mem, an) =
1399 :     (floatingPointUsed := true;
1400 :     mark(I.FMOVE{fsize=fsize fty, dst=address(ea,mem),
1401 :     src=foperand(fty, d)},
1402 :     an)
1403 :     )
1404 :    
1405 :     and fload''(fty, ea, mem, d, an) =
1406 :     (floatingPointUsed := true;
1407 :     mark(I.FMOVE{fsize=fsize fty, src=address(ea,mem),
1408 :     dst=RealReg d}, an)
1409 :     )
1410 :    
1411 :     and fiload''(ity, ea, d, an) =
1412 :     (floatingPointUsed := true;
1413 :     mark(I.FILOAD{isize=isize ity, ea=ea, dst=RealReg d}, an)
1414 :     )
1415 :    
1416 :     and fexpr''(e as T.FREG(_,f)) =
1417 :     if isFMemReg f then transFexpr e else f
1418 :     | fexpr'' e = transFexpr e
1419 :    
1420 :     and transFexpr e =
1421 :     let val fd = newFreg() in doFexpr''(64, e, fd, []); fd end
1422 :    
1423 :     (*
1424 :     * Process a floating point operand. Put operand in register
1425 :     * when possible. The operand should match the given fty.
1426 :     *)
1427 :     and foperand(fty, e as T.FREG(fty', f)) =
1428 :     if fty = fty' then RealReg f else I.FPR(fexpr'' e)
1429 :     | foperand(fty, T.CVTF2F(_, _, e)) =
1430 :     foperand(fty, e) (* nop on the x86 *)
1431 :     | foperand(fty, e as T.FLOAD(fty', ea, mem)) =
1432 :     (* fold operand when the precison matches *)
1433 :     if fty = fty' then address(ea, mem) else I.FPR(fexpr'' e)
1434 :     | foperand(fty, e) = I.FPR(fexpr'' e)
1435 :    
1436 :     (*
1437 :     * Process a floating point operand.
1438 :     * Try to fold in a memory operand or conversion from an integer.
1439 :     *)
1440 :     and fioperand(T.FREG(fty,f)) = (REAL, fty, RealReg f, [])
1441 :     | fioperand(T.FLOAD(fty, ea, mem)) =
1442 :     (REAL, fty, address(ea, mem), [])
1443 :     | fioperand(T.CVTF2F(_, _, e)) = fioperand(e) (* nop on the x86 *)
1444 :     | fioperand(T.CVTI2F(_, ty, e)) = convertIntToFloat(ty, e)
1445 :     | fioperand(T.FMARK(e,an)) = fioperand(e) (* XXX *)
1446 :     | fioperand(e) = (REAL, 64, I.FPR(fexpr'' e), [])
1447 :    
1448 :     (* Generate binary operator. Since the real binary operators
1449 :     * does not take memory as destination, we also ensure this
1450 :     * does not happen.
1451 :     *)
1452 :     and fbinop(targetFty,
1453 :     binOp, binOpR, ibinOp, ibinOpR, lsrc, rsrc, fd, an) =
1454 :     (* Put the mem operand in rsrc *)
1455 :     let val _ = floatingPointUsed := true;
1456 :     fun isMemOpnd(T.FREG(_, f)) = isFMemReg f
1457 :     | isMemOpnd(T.FLOAD _) = true
1458 :     | isMemOpnd(T.CVTI2F(_, (16 | 32), _)) = true
1459 :     | isMemOpnd(T.CVTF2F(_, _, t)) = isMemOpnd t
1460 :     | isMemOpnd(T.FMARK(t, _)) = isMemOpnd t
1461 :     | isMemOpnd _ = false
1462 :     val (binOp, ibinOp, lsrc, rsrc) =
1463 :     if isMemOpnd lsrc then (binOpR, ibinOpR, rsrc, lsrc)
1464 :     else (binOp, ibinOp, lsrc, rsrc)
1465 :     val lsrc = foperand(targetFty, lsrc)
1466 :     val (kind, fty, rsrc, code) = fioperand(rsrc)
1467 :     fun dstMustBeFreg f =
1468 :     if targetFty <> 64 then
1469 :     let val tmpR = newFreg()
1470 :     val tmp = I.FPR tmpR
1471 :     in mark(f tmp, an);
1472 :     emit(I.FMOVE{fsize=fsize targetFty,
1473 :     src=tmp, dst=RealReg fd})
1474 :     end
1475 :     else mark(f(RealReg fd), an)
1476 :     in case kind of
1477 :     REAL =>
1478 :     dstMustBeFreg(fn dst =>
1479 :     I.FBINOP{fsize=fsize fty, binOp=binOp,
1480 :     lsrc=lsrc, rsrc=rsrc, dst=dst})
1481 :     | INTEGER =>
1482 :     (dstMustBeFreg(fn dst =>
1483 :     I.FIBINOP{isize=isize fty, binOp=ibinOp,
1484 :     lsrc=lsrc, rsrc=rsrc, dst=dst});
1485 :     emits code
1486 :     )
1487 :     end
1488 : george 545
1489 : leunga 731 and funop(fty, unOp, src, fd, an) =
1490 :     let val src = foperand(fty, src)
1491 :     in mark(I.FUNOP{fsize=fsize fty,
1492 :     unOp=unOp, src=src, dst=RealReg fd},an)
1493 :     end
1494 :    
1495 :     and doFexpr''(fty, e, fd, an) =
1496 :     case e of
1497 : leunga 744 T.FREG(_,fs) => if C.sameColor(fs,fd) then ()
1498 : leunga 731 else fcopy''(fty, [fd], [fs], an)
1499 :     (* Stupid x86 does everything as 80-bits internally. *)
1500 :    
1501 :     (* Binary operators *)
1502 :     | T.FADD(_, a, b) => fbinop(fty,
1503 :     I.FADDL, I.FADDL, I.FIADDL, I.FIADDL,
1504 :     a, b, fd, an)
1505 :     | T.FSUB(_, a, b) => fbinop(fty,
1506 :     I.FSUBL, I.FSUBRL, I.FISUBL, I.FISUBRL,
1507 :     a, b, fd, an)
1508 :     | T.FMUL(_, a, b) => fbinop(fty,
1509 :     I.FMULL, I.FMULL, I.FIMULL, I.FIMULL,
1510 :     a, b, fd, an)
1511 :     | T.FDIV(_, a, b) => fbinop(fty,
1512 :     I.FDIVL, I.FDIVRL, I.FIDIVL, I.FIDIVRL,
1513 :     a, b, fd, an)
1514 :    
1515 :     (* Unary operators *)
1516 :     | T.FNEG(_, a) => funop(fty, I.FCHS, a, fd, an)
1517 :     | T.FABS(_, a) => funop(fty, I.FABS, a, fd, an)
1518 :     | T.FSQRT(_, a) => funop(fty, I.FSQRT, a, fd, an)
1519 :    
1520 :     (* Load *)
1521 :     | T.FLOAD(fty,ea,mem) => fload''(fty, ea, mem, fd, an)
1522 :    
1523 :     (* Type conversions *)
1524 :     | T.CVTF2F(_, _, e) => doFexpr''(fty, e, fd, an)
1525 :     | T.CVTI2F(_, ty, e) =>
1526 :     let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e)
1527 :     in fiload''(ty, ea, fd, an);
1528 :     emits cleanup
1529 :     end
1530 :    
1531 :     | T.FMARK(e,A.MARKREG f) => (f fd; doFexpr''(fty, e, fd, an))
1532 :     | T.FMARK(e, a) => doFexpr''(fty, e, fd, a::an)
1533 :     | T.FPRED(e, c) => doFexpr''(fty, e, fd, A.CTRLUSE c::an)
1534 :     | T.FEXT fexp =>
1535 :     ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an}
1536 :     | _ => error("doFexpr''")
1537 :    
1538 :     (*========================================================
1539 :     * Tie the two styles of fp code generation together
1540 :     *========================================================*)
1541 :     and fstore(fty, ea, d, mem, an) =
1542 :     if enableFastFPMode andalso !fast_floating_point
1543 :     then fstore''(fty, ea, d, mem, an)
1544 :     else fstore'(fty, ea, d, mem, an)
1545 :     and fload(fty, ea, d, mem, an) =
1546 :     if enableFastFPMode andalso !fast_floating_point
1547 :     then fload''(fty, ea, d, mem, an)
1548 :     else fload'(fty, ea, d, mem, an)
1549 :     and fexpr e =
1550 :     if enableFastFPMode andalso !fast_floating_point
1551 :     then fexpr'' e else fexpr' e
1552 :     and doFexpr(fty, e, fd, an) =
1553 :     if enableFastFPMode andalso !fast_floating_point
1554 :     then doFexpr''(fty, e, fd, an)
1555 :     else doFexpr'(fty, e, fd, an)
1556 :    
1557 : george 545 (* generate code for a statement *)
1558 :     and stmt(T.MV(_, rd, e), an) = doExpr(e, rd, an)
1559 :     | stmt(T.FMV(fty, fd, e), an) = doFexpr(fty, e, fd, an)
1560 :     | stmt(T.CCMV(ccd, e), an) = doCCexpr(e, ccd, an)
1561 :     | stmt(T.COPY(_, dst, src), an) = copy(dst, src, an)
1562 :     | stmt(T.FCOPY(fty, dst, src), an) = fcopy(fty, dst, src, an)
1563 : leunga 744 | stmt(T.JMP(e, labs), an) = jmp(e, labs, an)
1564 :     | stmt(T.CALL{funct, targets, defs, uses, region, ...}, an) =
1565 : leunga 591 call(funct,targets,defs,uses,region,an)
1566 : george 545 | stmt(T.RET _, an) = mark(I.RET NONE, an)
1567 :     | stmt(T.STORE(8, ea, d, mem), an) = store8(ea, d, mem, an)
1568 :     | stmt(T.STORE(16, ea, d, mem), an) = store16(ea, d, mem, an)
1569 :     | stmt(T.STORE(32, ea, d, mem), an) = store32(ea, d, mem, an)
1570 :     | stmt(T.FSTORE(fty, ea, d, mem), an) = fstore(fty, ea, d, mem, an)
1571 : leunga 744 | stmt(T.BCC(cc, lab), an) = branch(cc, lab, an)
1572 : george 545 | stmt(T.DEFINE l, _) = defineLabel l
1573 :     | stmt(T.ANNOTATION(s, a), an) = stmt(s, a::an)
1574 : george 555 | stmt(T.EXT s, an) =
1575 :     ExtensionComp.compileSext (reducer()) {stm=s, an=an}
1576 : george 545 | stmt(s, _) = doStmts(Gen.compileStm s)
1577 :    
1578 :     and doStmt s = stmt(s, [])
1579 :     and doStmts ss = app doStmt ss
1580 :    
1581 :     and beginCluster' _ =
1582 :     ((* Must be cleared by the client.
1583 :     * if rewriteMemReg then memRegsUsed := 0w0 else ();
1584 :     *)
1585 : leunga 731 floatingPointUsed := false;
1586 :     trapLabel := NONE;
1587 :     beginCluster 0
1588 :     )
1589 : george 545 and endCluster' a =
1590 : monnier 247 (case !trapLabel
1591 : monnier 411 of NONE => ()
1592 : george 545 | SOME(_, lab) => (defineLabel lab; emit(I.INTO))
1593 : monnier 411 (*esac*);
1594 : leunga 731 (* If floating point has been used allocate an extra
1595 :     * register just in case we didn't use any explicit register
1596 :     *)
1597 :     if !floatingPointUsed then (newFreg(); ())
1598 :     else ();
1599 : george 545 endCluster(a)
1600 :     )
1601 :    
1602 :     and reducer() =
1603 :     T.REDUCER{reduceRexp = expr,
1604 :     reduceFexp = fexpr,
1605 :     reduceCCexp = ccExpr,
1606 :     reduceStm = stmt,
1607 :     operand = operand,
1608 :     reduceOperand = reduceOpnd,
1609 :     addressOf = fn e => address(e, I.Region.memory), (*XXX*)
1610 :     emit = mark,
1611 :     instrStream = instrStream,
1612 :     mltreeStream = self()
1613 :     }
1614 :    
1615 :     and self() =
1616 :     S.STREAM
1617 :     { beginCluster= beginCluster',
1618 :     endCluster = endCluster',
1619 :     emit = doStmt,
1620 :     pseudoOp = pseudoOp,
1621 :     defineLabel = defineLabel,
1622 :     entryLabel = entryLabel,
1623 :     comment = comment,
1624 :     annotation = annotation,
1625 : leunga 744 exitBlock = fn mlrisc => exitBlock(cellset mlrisc)
1626 : george 545 }
1627 :    
1628 :     in self()
1629 : monnier 247 end
1630 :    
1631 : george 545 end (* functor *)
1632 :    
1633 :     end (* local *)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0