Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86.sml
ViewVC logotype

Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 585 - (view) (download)

1 : leunga 583 (*
2 : monnier 247 *
3 :     * COPYRIGHT (c) 1998 Bell Laboratories.
4 : george 545 *
5 :     * This is a revised version that takes into account of
6 :     * the extended x86 instruction set, and has better handling of
7 :     * non-standard types. I've factored out the integer/floating point
8 :     * comparison code, added optimizations for conditional moves.
9 :     * The latter generates SETcc and CMOVcc (Pentium Pro only) instructions.
10 :     * To avoid problems, I have tried to incorporate as much of
11 :     * Lal's original magic incantations as possible.
12 : monnier 247 *
13 : george 545 * Some changes:
14 :     *
15 :     * 1. REMU/REMS/REMT are now supported
16 :     * 2. COND is supported by generating SETcc and/or CMOVcc; this
17 :     * may require at least a Pentium II to work.
18 :     * 3. Division by a constant has been optimized. Division by
19 :     * a power of 2 generates SHRL or SARL.
20 :     * 4. Better addressing mode selection has been implemented. This should
21 :     * improve array indexing on SML/NJ.
22 :     * 5. Generate testl/testb instead of andl whenever appropriate. This
23 :     * is recommended by the Intel Optimization Guide and seems to improve
24 :     * boxity tests on SML/NJ.
25 :     * -- Allen
26 : monnier 247 *)
27 : george 545 local
28 :     val rewriteMemReg = true (* should we rewrite memRegs *)
29 :     in
30 :    
31 : monnier 247 functor X86
32 :     (structure X86Instr : X86INSTR
33 :     structure X86MLTree : MLTREE
34 : george 555 structure ExtensionComp : MLTREE_EXTENSION_COMP
35 :     where I = X86Instr and T = X86MLTree
36 : monnier 475 sharing X86MLTree.Region = X86Instr.Region
37 : george 545 sharing X86MLTree.LabelExp = X86Instr.LabelExp
38 :     datatype arch = Pentium | PentiumPro | PentiumII | PentiumIII
39 :     val arch : arch ref
40 :     val tempMem : X86Instr.operand (* temporary for CVTI2F *)
41 :     ) : sig include MLTREECOMP
42 :     val rewriteMemReg : bool
43 :     end =
44 : monnier 247 struct
45 :     structure T = X86MLTree
46 : monnier 429 structure S = T.Stream
47 : monnier 247 structure I = X86Instr
48 : george 545 structure C = I.C
49 :     structure Shuffle = Shuffle(I)
50 : monnier 247 structure W32 = Word32
51 : george 545 structure LE = I.LabelExp
52 :     structure A = MLRiscAnnotations
53 : monnier 247
54 : george 545 type instrStream = (I.instruction,C.regmap,C.cellset) T.stream
55 : george 555 type mltreeStream = (T.stm,C.regmap,T.mlrisc list) T.stream
56 : leunga 565
57 :     datatype kind = REAL | INTEGER
58 : george 545
59 :     structure Gen = MLTreeGen
60 :     (structure T = T
61 :     val intTy = 32
62 :     val naturalWidths = [32]
63 :     datatype rep = SE | ZE | NEITHER
64 :     val rep = NEITHER
65 :     )
66 :    
67 : monnier 411 fun error msg = MLRiscErrorMsg.error("X86",msg)
68 : monnier 247
69 : george 545 (* Should we perform automatic MemReg translation?
70 :     * If this is on, we can avoid doing RewritePseudo phase entirely.
71 :     *)
72 :     val rewriteMemReg = rewriteMemReg
73 :     fun isMemReg r = rewriteMemReg andalso r >= 8 andalso r < 32
74 : monnier 247
75 : george 555 val ST0 = C.ST 0
76 :     val ST7 = C.ST 7
77 :    
78 : george 545 (*
79 :     * The code generator
80 :     *)
81 : monnier 411 fun selectInstructions
82 : george 545 (instrStream as
83 :     S.STREAM{emit,defineLabel,entryLabel,pseudoOp,annotation,
84 : monnier 429 beginCluster,endCluster,exitBlock,alias,phi,comment,...}) =
85 : george 545 let exception EA
86 : monnier 411
87 : george 545 (* label where a trap is generated -- one per cluster *)
88 :     val trapLabel = ref (NONE: (I.instruction * Label.label) option)
89 : monnier 247
90 : george 545 (* effective address of an integer register *)
91 :     fun IntReg r = if isMemReg r then MemReg r else I.Direct r
92 :     and MemReg r =
93 :     ((* memRegsUsed := Word.orb(!memRegsUsed,
94 :     Word.<<(0w1, Word.fromInt r-0w8)); *)
95 :     I.MemReg r
96 :     )
97 : monnier 411
98 : george 545 (* Add an overflow trap *)
99 :     fun trap() =
100 :     let val jmp =
101 :     case !trapLabel of
102 :     NONE => let val label = Label.newLabel "trap"
103 :     val jmp = I.JCC{cond=I.O,
104 :     opnd=I.ImmedLabel(LE.LABEL label)}
105 :     in trapLabel := SOME(jmp, label); jmp end
106 :     | SOME(jmp, _) => jmp
107 :     in emit jmp end
108 : monnier 411
109 : george 545 val newReg = C.newReg
110 :     val newFreg = C.newFreg
111 : monnier 247
112 : george 545 (* mark an expression with a list of annotations *)
113 :     fun mark'(i,[]) = i
114 :     | mark'(i,a::an) = mark'(I.ANNOTATION{i=i,a=a},an)
115 : monnier 247
116 : george 545 (* annotate an expression and emit it *)
117 :     fun mark(i,an) = emit(mark'(i,an))
118 : monnier 247
119 : george 545 (* emit parallel copies for integers
120 :     * Translates parallel copies that involve memregs into
121 :     * individual copies.
122 :     *)
123 :     fun copy([], [], an) = ()
124 :     | copy(dst, src, an) =
125 :     let fun mvInstr{dst as I.MemReg rd, src as I.MemReg rs} =
126 :     if rd = rs then [] else
127 :     let val tmpR = I.Direct(newReg())
128 :     in [I.MOVE{mvOp=I.MOVL, src=src, dst=tmpR},
129 :     I.MOVE{mvOp=I.MOVL, src=tmpR, dst=dst}]
130 :     end
131 :     | mvInstr{dst=I.Direct rd, src=I.Direct rs} =
132 :     if rd = rs then []
133 :     else [I.COPY{dst=[rd], src=[rs], tmp=NONE}]
134 :     | mvInstr{dst, src} = [I.MOVE{mvOp=I.MOVL, src=src, dst=dst}]
135 :     in
136 :     app emit (Shuffle.shuffle{mvInstr=mvInstr, ea=IntReg}
137 :     {regmap=fn r => r, tmp=SOME(I.Direct(newReg())),
138 :     dst=dst, src=src})
139 :     end
140 :    
141 :     (* conversions *)
142 :     val itow = Word.fromInt
143 :     val wtoi = Word.toInt
144 :     fun toInt32 i = Int32.fromLarge(Int.toLarge i)
145 :     val w32toi32 = Word32.toLargeIntX
146 :     val i32tow32 = Word32.fromLargeInt
147 : monnier 247
148 : george 545 (* One day, this is going to bite us when precision(LargeInt)>32 *)
149 :     fun wToInt32 w = Int32.fromLarge(Word32.toLargeIntX w)
150 : monnier 247
151 : george 545 (* some useful registers *)
152 :     val eax = I.Direct(C.eax)
153 :     val ecx = I.Direct(C.ecx)
154 :     val edx = I.Direct(C.edx)
155 : monnier 247
156 : george 545 fun immedLabel lab = I.ImmedLabel(LE.LABEL lab)
157 :    
158 :     (* Is the expression zero? *)
159 :     fun isZero(T.LI 0) = true
160 :     | isZero(T.LI32 0w0) = true
161 :     | isZero(T.MARK(e,a)) = isZero e
162 :     | isZero _ = false
163 :     (* Does the expression set the zero bit?
164 :     * WARNING: we assume these things are not optimized out!
165 :     *)
166 :     fun setZeroBit(T.ANDB _) = true
167 :     | setZeroBit(T.ORB _) = true
168 :     | setZeroBit(T.XORB _) = true
169 :     | setZeroBit(T.SRA _) = true
170 :     | setZeroBit(T.SRL _) = true
171 :     | setZeroBit(T.SLL _) = true
172 :     | setZeroBit(T.MARK(e, _)) = setZeroBit e
173 :     | setZeroBit _ = false
174 : monnier 247
175 : george 545 (* emit parallel copies for floating point *)
176 :     fun fcopy(fty, [], [], _) = ()
177 :     | fcopy(fty, dst as [_], src as [_], an) =
178 :     mark(I.FCOPY{dst=dst,src=src,tmp=NONE}, an)
179 :     | fcopy(fty, dst, src, an) =
180 :     mark(I.FCOPY{dst=dst,src=src,tmp=SOME(I.FDirect(newFreg()))}, an)
181 : monnier 247
182 : george 545 (* Translates MLTREE condition code to x86 condition code *)
183 :     fun cond T.LT = I.LT | cond T.LTU = I.B
184 :     | cond T.LE = I.LE | cond T.LEU = I.BE
185 :     | cond T.EQ = I.EQ | cond T.NE = I.NE
186 :     | cond T.GE = I.GE | cond T.GEU = I.AE
187 :     | cond T.GT = I.GT | cond T.GTU = I.A
188 : monnier 247
189 : george 545 (* Move and annotate *)
190 :     fun move'(src as I.Direct s, dst as I.Direct d, an) =
191 :     if s=d then ()
192 :     else mark(I.COPY{dst=[d], src=[s], tmp=NONE}, an)
193 :     | move'(src, dst, an) = mark(I.MOVE{mvOp=I.MOVL, src=src, dst=dst}, an)
194 : monnier 247
195 : george 545 (* Move only! *)
196 :     fun move(src, dst) = move'(src, dst, [])
197 : monnier 247
198 : george 545 fun zero dst = emit(I.BINARY{binOp=I.XORL, src=dst, dst=dst})
199 : monnier 247
200 : george 545 val readonly = I.Region.readonly
201 : monnier 247
202 : george 545 (*
203 :     * Compute an effective address. This is a new version
204 :     *)
205 :     fun address(ea, mem) =
206 :     let (* tricky way to negate without overflow! *)
207 :     fun neg32 w = Word32.notb w + 0w1
208 : monnier 247
209 : george 545 (* Keep building a bigger and bigger effective address expressions
210 :     * The input is a list of trees
211 :     * b -- base
212 :     * i -- index
213 :     * s -- scale
214 :     * d -- immed displacement
215 :     *)
216 :     fun doEA([], b, i, s, d) = makeAddressingMode(b, i, s, d)
217 :     | doEA(t::trees, b, i, s, d) =
218 :     (case t of
219 :     T.LI n => doEAImmed(trees, n, b, i, s, d)
220 :     | T.LI32 n => doEAImmedw(trees, n, b, i, s, d)
221 :     | T.CONST c => doEALabel(trees, LE.CONST c, b, i, s, d)
222 :     | T.LABEL le => doEALabel(trees, le, b, i, s, d)
223 :     | T.ADD(32, t1, t2 as T.REG(_,r)) =>
224 :     if isMemReg r then doEA(t2::t1::trees, b, i, s, d)
225 :     else doEA(t1::t2::trees, b, i, s, d)
226 :     | T.ADD(32, t1, t2) => doEA(t1::t2::trees, b, i, s, d)
227 :     | T.SUB(32, t1, T.LI n) =>
228 :     (* can't overflow here *)
229 :     doEA(t1::T.LI32(neg32(Word32.fromInt n))::trees, b, i, s, d)
230 :     | T.SUB(32, t1, T.LI32 n) =>
231 :     doEA(t1::T.LI32(neg32 n)::trees, b, i, s, d)
232 :     | T.SLL(32, t1, T.LI 0) => displace(trees, t1, b, i, s, d)
233 :     | T.SLL(32, t1, T.LI 1) => indexed(trees, t1, t, 1, b, i, s, d)
234 :     | T.SLL(32, t1, T.LI 2) => indexed(trees, t1, t, 2, b, i, s, d)
235 :     | T.SLL(32, t1, T.LI 3) => indexed(trees, t1, t, 3, b, i, s, d)
236 :     | T.SLL(32, t1, T.LI32 0w0) => displace(trees, t1, b, i, s, d)
237 :     | T.SLL(32, t1, T.LI32 0w1) => indexed(trees,t1,t,1,b,i,s,d)
238 :     | T.SLL(32, t1, T.LI32 0w2) => indexed(trees,t1,t,2,b,i,s,d)
239 :     | T.SLL(32, t1, T.LI32 0w3) => indexed(trees,t1,t,3,b,i,s,d)
240 :     | t => displace(trees, t, b, i, s, d)
241 :     )
242 : monnier 247
243 : george 545 (* Add an immed constant *)
244 :     and doEAImmed(trees, 0, b, i, s, d) = doEA(trees, b, i, s, d)
245 :     | doEAImmed(trees, n, b, i, s, I.Immed m) =
246 :     doEA(trees, b, i, s, (* no overflow! *)
247 :     I.Immed(w32toi32(Word32.fromInt n + i32tow32 m)))
248 :     | doEAImmed(trees, n, b, i, s, I.ImmedLabel le) =
249 :     doEA(trees, b, i, s, I.ImmedLabel(LE.PLUS(le,LE.INT n)))
250 :     | doEAImmed(trees, n, b, i, s, _) = error "doEAImmed"
251 : monnier 247
252 : george 545 (* Add an immed32 constant *)
253 :     and doEAImmedw(trees, 0w0, b, i, s, d) = doEA(trees, b, i, s, d)
254 :     | doEAImmedw(trees, n, b, i, s, I.Immed m) =
255 :     (* no overflow! *)
256 :     doEA(trees, b, i, s, I.Immed(w32toi32(i32tow32 m + n)))
257 :     | doEAImmedw(trees, n, b, i, s, I.ImmedLabel le) =
258 :     doEA(trees, b, i, s,
259 :     I.ImmedLabel(LE.PLUS(le,LE.INT(Word32.toIntX n)))
260 :     handle Overflow => error "doEAImmedw: constant too large")
261 :     | doEAImmedw(trees, n, b, i, s, _) = error "doEAImmedw"
262 : monnier 247
263 : george 545 (* Add a label expression *)
264 :     and doEALabel(trees, le, b, i, s, I.Immed 0) =
265 :     doEA(trees, b, i, s, I.ImmedLabel le)
266 :     | doEALabel(trees, le, b, i, s, I.Immed m) =
267 :     doEA(trees, b, i, s,
268 :     I.ImmedLabel(LE.PLUS(le,LE.INT(Int32.toInt m)))
269 :     handle Overflow => error "doEALabel: constant too large")
270 :     | doEALabel(trees, le, b, i, s, I.ImmedLabel le') =
271 :     doEA(trees, b, i, s, I.ImmedLabel(LE.PLUS(le,le')))
272 :     | doEALabel(trees, le, b, i, s, _) = error "doEALabel"
273 : monnier 247
274 : george 545 and makeAddressingMode(NONE, NONE, _, disp) = disp
275 :     | makeAddressingMode(SOME base, NONE, _, disp) =
276 :     I.Displace{base=base, disp=disp, mem=mem}
277 :     | makeAddressingMode(base, SOME index, scale, disp) =
278 :     I.Indexed{base=base, index=index, scale=scale,
279 :     disp=disp, mem=mem}
280 : monnier 247
281 : george 545 (* generate code for tree and ensure that it is not in %esp *)
282 :     and exprNotEsp tree =
283 :     let val r = expr tree
284 :     in if r = C.esp then
285 :     let val tmp = newReg()
286 :     in move(I.Direct r, I.Direct tmp); tmp end
287 :     else r
288 :     end
289 : monnier 247
290 : george 545 (* Add a base register *)
291 :     and displace(trees, t, NONE, i, s, d) = (* no base yet *)
292 :     doEA(trees, SOME(expr t), i, s, d)
293 :     | displace(trees, t, b as SOME base, NONE, _, d) = (* no index *)
294 :     (* make t the index, but make sure that it is not %esp! *)
295 :     let val i = expr t
296 :     in if i = C.esp then
297 :     (* swap base and index *)
298 :     if base <> C.esp then
299 :     doEA(trees, SOME i, b, 0, d)
300 :     else (* base and index = %esp! *)
301 :     let val index = newReg()
302 :     in move(I.Direct i, I.Direct index);
303 :     doEA(trees, b, SOME index, 0, d)
304 :     end
305 :     else
306 :     doEA(trees, b, SOME i, 0, d)
307 :     end
308 :     | displace(trees, t, SOME base, i, s, d) = (* base and index *)
309 :     let val b = expr(T.ADD(32,T.REG(32,base),t))
310 :     in doEA(trees, SOME b, i, s, d) end
311 : monnier 247
312 : george 545 (* Add an indexed register *)
313 :     and indexed(trees, t, t0, scale, b, NONE, _, d) = (* no index yet *)
314 :     doEA(trees, b, SOME(exprNotEsp t), scale, d)
315 :     | indexed(trees, _, t0, _, NONE, i, s, d) = (* no base *)
316 :     doEA(trees, SOME(expr t0), i, s, d)
317 :     | indexed(trees, _, t0, _, SOME base, i, s, d) = (*base and index*)
318 :     let val b = expr(T.ADD(32, t0, T.REG(32, base)))
319 :     in doEA(trees, SOME b, i, s, d) end
320 :    
321 :     in case doEA([ea], NONE, NONE, 0, I.Immed 0) of
322 :     I.Immed _ => raise EA
323 :     | I.ImmedLabel le => I.LabelEA le
324 :     | ea => ea
325 :     end (* address *)
326 : monnier 247
327 : george 545 (* reduce an expression into an operand *)
328 :     and operand(T.LI i) = I.Immed(toInt32 i)
329 :     | operand(T.LI32 w) = I.Immed(wToInt32 w)
330 :     | operand(T.CONST c) = I.ImmedLabel(LE.CONST c)
331 :     | operand(T.LABEL lab) = I.ImmedLabel lab
332 :     | operand(T.REG(_,r)) = IntReg r
333 :     | operand(T.LOAD(32,ea,mem)) = address(ea, mem)
334 :     | operand(t) = I.Direct(expr t)
335 : monnier 247
336 : george 545 and moveToReg(opnd) =
337 :     let val dst = I.Direct(newReg())
338 :     in move(opnd, dst); dst
339 :     end
340 : monnier 247
341 : george 545 and reduceOpnd(I.Direct r) = r
342 :     | reduceOpnd opnd =
343 :     let val dst = newReg()
344 :     in move(opnd, I.Direct dst); dst
345 :     end
346 : monnier 247
347 : george 545 (* ensure that the operand is either an immed or register *)
348 :     and immedOrReg(opnd as I.Displace _) = moveToReg opnd
349 :     | immedOrReg(opnd as I.Indexed _) = moveToReg opnd
350 :     | immedOrReg(opnd as I.MemReg _) = moveToReg opnd
351 :     | immedOrReg(opnd as I.LabelEA _) = moveToReg opnd
352 :     | immedOrReg opnd = opnd
353 : monnier 247
354 : george 545 and isImmediate(I.Immed _) = true
355 :     | isImmediate(I.ImmedLabel _) = true
356 :     | isImmediate _ = false
357 : monnier 247
358 : george 545 and regOrMem opnd = if isImmediate opnd then moveToReg opnd else opnd
359 :    
360 :     and isMemOpnd opnd =
361 :     (case opnd of
362 :     I.Displace _ => true
363 :     | I.Indexed _ => true
364 :     | I.MemReg _ => true
365 :     | I.LabelEA _ => true
366 : george 555 | I.FDirect f => true
367 : george 545 | _ => false
368 :     )
369 :    
370 :     (*
371 :     * Compute an integer expression and put the result in
372 :     * the destination register rd.
373 :     *)
374 :     and doExpr(exp, rd : I.C.cell, an) =
375 :     let val rdOpnd = IntReg rd
376 : monnier 247
377 : george 545 fun equalRd(I.Direct r) = r = rd
378 :     | equalRd(I.MemReg r) = r = rd
379 :     | equalRd _ = false
380 : monnier 247
381 : george 545 (* Emit a binary operator. If the destination is
382 :     * a memReg, do something smarter.
383 :     *)
384 :     fun genBinary(binOp, opnd1, opnd2) =
385 :     if isMemReg rd andalso
386 :     (isMemOpnd opnd1 orelse isMemOpnd opnd2) orelse
387 :     equalRd(opnd2)
388 :     then
389 :     let val tmpR = newReg()
390 :     val tmp = I.Direct tmpR
391 :     in move(opnd1, tmp);
392 :     mark(I.BINARY{binOp=binOp, src=opnd2, dst=tmp}, an);
393 :     move(tmp, rdOpnd)
394 :     end
395 :     else
396 :     (move(opnd1, rdOpnd);
397 :     mark(I.BINARY{binOp=binOp, src=opnd2, dst=rdOpnd}, an)
398 :     )
399 : monnier 247
400 : george 545 (* Generate a binary operator; it may commute *)
401 :     fun binaryComm(binOp, e1, e2) =
402 :     let val (opnd1, opnd2) =
403 :     case (operand e1, operand e2) of
404 :     (x as I.Immed _, y) => (y, x)
405 :     | (x as I.ImmedLabel _, y) => (y, x)
406 :     | (x, y as I.Direct _) => (y, x)
407 :     | (x, y) => (x, y)
408 :     in genBinary(binOp, opnd1, opnd2)
409 :     end
410 :    
411 :     (* Generate a binary operator; non-commutative *)
412 :     fun binary(binOp, e1, e2) =
413 :     genBinary(binOp, operand e1, operand e2)
414 :    
415 :     (* Generate a unary operator *)
416 :     fun unary(unOp, e) =
417 :     let val opnd = operand e
418 :     in if isMemReg rd andalso isMemOpnd opnd then
419 :     let val tmp = I.Direct(newReg())
420 :     in move(opnd, tmp); move(tmp, rdOpnd)
421 :     end
422 :     else move(opnd, rdOpnd);
423 :     mark(I.UNARY{unOp=unOp, opnd=rdOpnd}, an)
424 :     end
425 :    
426 :     (* Generate shifts; the shift
427 :     * amount must be a constant or in %ecx *)
428 :     fun shift(opcode, e1, e2) =
429 :     let val (opnd1, opnd2) = (operand e1, operand e2)
430 :     in case opnd2 of
431 :     I.Immed _ => genBinary(opcode, opnd1, opnd2)
432 :     | _ =>
433 :     if equalRd(opnd2) then
434 :     let val tmpR = newReg()
435 :     val tmp = I.Direct tmpR
436 :     in move(opnd1, tmp);
437 :     move(opnd2, ecx);
438 :     mark(I.BINARY{binOp=opcode, src=ecx, dst=tmp},an);
439 :     move(tmp, rdOpnd)
440 :     end
441 :     else
442 :     (move(opnd1, rdOpnd);
443 :     move(opnd2, ecx);
444 :     mark(I.BINARY{binOp=opcode, src=ecx, dst=rdOpnd},an)
445 :     )
446 :     end
447 :    
448 :     (* Division or remainder: divisor must be in %edx:%eax pair *)
449 :     fun divrem(signed, overflow, e1, e2, resultReg) =
450 :     let val (opnd1, opnd2) = (operand e1, operand e2)
451 :     val _ = move(opnd1, eax)
452 :     val oper = if signed then (emit(I.CDQ); I.IDIV)
453 :     else (zero edx; I.UDIV)
454 :     in mark(I.MULTDIV{multDivOp=oper, src=regOrMem opnd2},an);
455 :     move(resultReg, rdOpnd);
456 :     if overflow then trap() else ()
457 :     end
458 :    
459 :     (* Optimize the special case for division *)
460 :     fun divide(signed, overflow, e1, e2 as T.LI n) =
461 :     let fun isPowerOf2 w = Word.andb((w - 0w1), w) = 0w0
462 :     fun log2 n = (* n must be > 0!!! *)
463 :     let fun loop(0w1,pow) = pow
464 :     | loop(w,pow) = loop(Word.>>(w, 0w1),pow+1)
465 :     in loop(n,0) end
466 :     val w = Word.fromInt n
467 :     in if n > 1 andalso isPowerOf2 w then
468 :     let val pow = T.LI(log2 w)
469 :     in if signed then
470 :     (* signed; simulate round towards zero *)
471 :     let val label = Label.newLabel ""
472 :     val reg1 = expr e1
473 :     val opnd1 = I.Direct reg1
474 :     in if setZeroBit e1 then ()
475 :     else emit(I.CMPL{lsrc=opnd1, rsrc=I.Immed 0});
476 :     emit(I.JCC{cond=I.GE, opnd=immedLabel label});
477 :     emit(if n = 2 then
478 :     I.UNARY{unOp=I.INCL, opnd=opnd1}
479 :     else
480 :     I.BINARY{binOp=I.ADDL,
481 :     src=I.Immed(toInt32 n - 1),
482 :     dst=opnd1});
483 :     defineLabel label;
484 :     shift(I.SARL, T.REG(32, reg1), pow)
485 :     end
486 :     else (* unsigned *)
487 :     shift(I.SHRL, e1, pow)
488 :     end
489 :     else
490 :     (* note the only way we can overflow is if
491 :     * n = 0 or n = -1
492 :     *)
493 :     divrem(signed, overflow andalso (n = ~1 orelse n = 0),
494 :     e1, e2, eax)
495 :     end
496 :     | divide(signed, overflow, e1, e2) =
497 :     divrem(signed, overflow, e1, e2, eax)
498 : monnier 247
499 : george 545 fun rem(signed, overflow, e1, e2) =
500 :     divrem(signed, overflow, e1, e2, edx)
501 :    
502 :     (* unsigned integer multiplication *)
503 :     fun uMultiply(e1, e2) =
504 :     (* note e2 can never be (I.Direct edx) *)
505 :     (move(operand e1, eax);
506 :     mark(I.MULTDIV{multDivOp=I.UMUL,
507 :     src=regOrMem(operand e2)},an);
508 :     move(eax, rdOpnd)
509 :     )
510 :    
511 :     (* signed integer multiplication:
512 :     * The only forms that are allowed that also sets the
513 :     * OF and CF flags are:
514 :     *
515 :     * imul r32, r32/m32, imm8
516 :     * imul r32, imm8
517 :     * imul r32, imm32
518 :     *)
519 :     fun multiply(e1, e2) =
520 :     let fun doit(i1 as I.Immed _, i2 as I.Immed _, dstR, dst) =
521 :     (move(i1, dst);
522 :     mark(I.MUL3{dst=dstR, src1=i2, src2=NONE},an))
523 :     | doit(rm, i2 as I.Immed _, dstR, dst) =
524 :     doit(i2, rm, dstR, dst)
525 :     | doit(imm as I.Immed(i), rm, dstR, dst) =
526 :     mark(I.MUL3{dst=dstR, src1=rm, src2=SOME i},an)
527 :     | doit(r1 as I.Direct _, r2 as I.Direct _, dstR, dst) =
528 :     (move(r1, dst);
529 :     mark(I.MUL3{dst=dstR, src1=r2, src2=NONE},an))
530 :     | doit(r1 as I.Direct _, rm, dstR, dst) =
531 :     (move(r1, dst);
532 :     mark(I.MUL3{dst=dstR, src1=rm, src2=NONE},an))
533 :     | doit(rm, r as I.Direct _, dstR, dst) =
534 :     doit(r, rm, dstR, dst)
535 :     | doit(rm1, rm2, dstR, dst) =
536 :     if equalRd rm2 then
537 :     let val tmpR = newReg()
538 :     val tmp = I.Direct tmpR
539 :     in move(rm1, tmp);
540 :     mark(I.MUL3{dst=tmpR, src1=rm2, src2=NONE},an);
541 :     move(tmp, dst)
542 :     end
543 :     else
544 :     (move(rm1, dst);
545 :     mark(I.MUL3{dst=dstR, src1=rm2, src2=NONE},an)
546 :     )
547 :     val (opnd1, opnd2) = (operand e1, operand e2)
548 :     in if isMemReg rd then (* destination must be a real reg *)
549 :     let val tmpR = newReg()
550 :     val tmp = I.Direct tmpR
551 :     in doit(opnd1, opnd2, tmpR, tmp);
552 :     move(tmp, rdOpnd)
553 :     end
554 :     else
555 :     doit(opnd1, opnd2, rd, rdOpnd)
556 :     end
557 : monnier 247
558 : george 545 (* Makes sure the destination must be a register *)
559 :     fun dstMustBeReg f =
560 :     if isMemReg rd then
561 :     let val tmpR = newReg()
562 :     val tmp = I.Direct(tmpR)
563 :     in f(tmpR, tmp); move(tmp, rdOpnd) end
564 :     else f(rd, rdOpnd)
565 : monnier 247
566 : george 545 (* Emit a load instruction; makes sure that the destination
567 :     * is a register
568 :     *)
569 :     fun genLoad(mvOp, ea, mem) =
570 :     dstMustBeReg(fn (_, dst) =>
571 :     mark(I.MOVE{mvOp=mvOp, src=address(ea, mem), dst=dst},an))
572 :    
573 :     (* Generate a zero extended loads *)
574 :     fun load8(ea, mem) = genLoad(I.MOVZBL, ea, mem)
575 :     fun load16(ea, mem) = genLoad(I.MOVZWL, ea, mem)
576 :     fun load8s(ea, mem) = genLoad(I.MOVSBL, ea, mem)
577 :     fun load16s(ea, mem) = genLoad(I.MOVSWL, ea, mem)
578 :     fun load32(ea, mem) = genLoad(I.MOVL, ea, mem)
579 :    
580 :     (* Generate a sign extended loads *)
581 :    
582 :     (* Generate setcc instruction:
583 :     * semantics: MV(rd, COND(_, T.CMP(ty, cc, t1, t2), yes, no))
584 : leunga 583 * Bug, if eax is either t1 or t2 then problem will occur!!!
585 :     * Note that we have to use eax as the destination of the
586 :     * setcc because it only works on the registers
587 :     * %al, %bl, %cl, %dl and %[abcd]h. The last four registers
588 :     * are inaccessible in 32 bit mode.
589 : george 545 *)
590 :     fun setcc(ty, cc, t1, t2, yes, no) =
591 : leunga 583 let val (cc, yes, no) =
592 :     if yes > no then (cc, yes, no)
593 :     else (T.Basis.negateCond cc, no, yes)
594 : george 545 in (* Clear the destination first.
595 :     * This this because stupid SETcc
596 :     * only writes to the low order
597 :     * byte. That's Intel architecture, folks.
598 :     *)
599 : leunga 583 zero eax;
600 : george 545 case (yes, no) of
601 :     (1, 0) => (* normal case *)
602 :     let val cc = cmp(true, ty, cc, t1, t2, [])
603 : leunga 583 in mark(I.SET{cond=cond cc, opnd=eax}, an);
604 :     move(eax, rdOpnd)
605 :     end
606 : george 545 | (C1, C2) =>
607 :     (* general case;
608 : leunga 583 * from the Intel optimization guide p3-5
609 :     *)
610 :     let val cc = cmp(true, ty, cc, t1, t2, [])
611 :     in case C1-C2 of
612 :     D as (1 | 2 | 3 | 4 | 5 | 8 | 9) =>
613 :     let val (base,scale) =
614 :     case D of
615 :     1 => (NONE, 0)
616 :     | 2 => (NONE, 1)
617 :     | 3 => (SOME C.eax, 1)
618 :     | 4 => (NONE, 2)
619 :     | 5 => (SOME C.eax, 2)
620 :     | 8 => (NONE, 3)
621 :     | 9 => (SOME C.eax, 3)
622 :     val addr = I.Indexed{base=base,
623 :     index=C.eax,
624 :     scale=scale,
625 :     disp=I.Immed C2,
626 : george 545 mem=readonly}
627 : leunga 583 val tmpR = newReg()
628 :     val tmp = I.Direct tmpR
629 :     in emit(I.SET{cond=cond cc, opnd=eax});
630 :     mark(I.LEA{r32=tmpR, addr=addr}, an);
631 :     move(tmp, rdOpnd)
632 :     end
633 :     | D =>
634 :     (emit(I.SET{cond=cond(T.Basis.negateCond cc),
635 :     opnd=eax});
636 :     emit(I.UNARY{unOp=I.DECL, opnd=eax});
637 :     emit(I.BINARY{binOp=I.ANDL,
638 :     src=I.Immed D, dst=eax});
639 :     if C2 = 0 then
640 :     move(eax, rdOpnd)
641 :     else
642 :     let val tmpR = newReg()
643 :     val tmp = I.Direct tmpR
644 :     in mark(I.LEA{addr=
645 :     I.Displace{
646 :     base=C.eax,
647 :     disp=I.Immed C2,
648 :     mem=readonly},
649 :     r32=tmpR}, an);
650 :     move(tmp, rdOpnd)
651 :     end
652 :     )
653 :     end
654 : george 545 end (* setcc *)
655 :    
656 :     (* Generate cmovcc instruction.
657 :     * on Pentium Pro and Pentium II only
658 :     *)
659 :     fun cmovcc(ty, cc, t1, t2, yes, no) =
660 :     let fun genCmov(dstR, _) =
661 :     let val _ = doExpr(no, dstR, []) (* false branch *)
662 :     val cc = cmp(true, ty, cc, t1, t2, []) (* compare *)
663 :     in mark(I.CMOV{cond=cond cc, src=operand yes, dst=dstR}, an)
664 :     end
665 :     in dstMustBeReg genCmov
666 :     end
667 :    
668 :     fun unknownExp exp = doExpr(Gen.compileRexp exp, rd, an)
669 : monnier 247
670 : george 545 (* Generate addition *)
671 :     fun addition(e1, e2) =
672 :     (dstMustBeReg(fn (dstR, _) =>
673 :     mark(I.LEA{r32=dstR, addr=address(exp, readonly)}, an))
674 :     handle EA => binaryComm(I.ADDL, e1, e2))
675 : monnier 247
676 : george 545 (* Add n to rd *)
677 :     fun addN n =
678 :     mark(I.BINARY{binOp=I.ADDL, src=I.Immed(toInt32 n),
679 :     dst=rdOpnd}, an)
680 : monnier 247
681 : george 545 in case exp of
682 :     T.REG(_,rs) =>
683 :     if isMemReg rs andalso isMemReg rd then
684 :     let val tmp = I.Direct(newReg())
685 :     in move'(MemReg rs, tmp, an);
686 :     move'(tmp, rdOpnd, [])
687 :     end
688 :     else move'(IntReg rs, rdOpnd, an)
689 :     | (T.LI 0 | T.LI32 0w0) =>
690 :     (* As per Fermin's request, special optimization for rd := 0.
691 :     * Currently we don't bother with the size.
692 :     *)
693 :     if isMemReg rd then move'(I.Immed 0, rdOpnd, an)
694 :     else mark(I.BINARY{binOp=I.XORL, src=rdOpnd, dst=rdOpnd}, an)
695 :     | T.LI n => move'(I.Immed(toInt32 n), rdOpnd, an)
696 :     | T.LI32 w => move'(I.Immed(wToInt32 w), rdOpnd, an)
697 :     | T.CONST c => move'(I.ImmedLabel(LE.CONST c), rdOpnd, an)
698 :     | T.LABEL lab => move'(I.ImmedLabel lab, rdOpnd, an)
699 : monnier 247
700 : george 545 (* 32-bit addition *)
701 :     | T.ADD(32, e, (T.LI 1|T.LI32 0w1)) => unary(I.INCL, e)
702 :     | T.ADD(32, (T.LI 1|T.LI32 0w1), e) => unary(I.INCL, e)
703 :     | T.ADD(32, e, T.LI ~1) => unary(I.DECL, e)
704 :     | T.ADD(32, T.LI ~1, e) => unary(I.DECL, e)
705 :     | T.ADD(32, e1 as T.REG(_, rs), e2 as T.LI n) =>
706 :     if rs = rd then addN n else addition(e1, e2)
707 :     | T.ADD(32, e1 as T.LI n, e2 as T.REG(_, rs)) =>
708 :     if rs = rd then addN n else addition(e1, e2)
709 :     | T.ADD(32, e1, e2) => addition(e1, e2)
710 : monnier 247
711 : george 545 (* 32-bit subtraction *)
712 :     | T.SUB(32, e, (T.LI 1 | T.LI32 0w1)) => unary(I.DECL, e)
713 :     | T.SUB(32, e, T.LI ~1) => unary(I.INCL, e)
714 :     | T.SUB(32, (T.LI 0 | T.LI32 0w0), e) => unary(I.NEGL, e)
715 : monnier 247
716 : george 545 (* Never mind:
717 :     | T.SUB(32, e1, e2 as T.LI n) =>
718 :     (mark(I.LEA{r32=rd, addr=address(T.ADD(32, e1, T.LI(~n)),
719 :     I.Region.readonly)}, an)
720 :     handle (Overflow|EA) => binary(I.SUBL, e1, e2))
721 :     *)
722 :     | T.SUB(32, e1, e2) => binary(I.SUBL, e1, e2)
723 : monnier 247
724 : george 545 | T.MULU(32, x, y) => uMultiply(x, y)
725 :     | T.DIVU(32, x, y) => divide(false, false, x, y)
726 :     | T.REMU(32, x, y) => rem(false, false, x, y)
727 : monnier 247
728 : george 545 | T.MULS(32, x, y) => multiply(x, y)
729 :     | T.DIVS(32, x, y) => divide(true, false, x, y)
730 :     | T.REMS(32, x, y) => rem(true, false, x, y)
731 : monnier 247
732 : george 545 | T.ADDT(32, x, y) => (binaryComm(I.ADDL, x, y); trap())
733 :     | T.SUBT(32, x, y) => (binary(I.SUBL, x, y); trap())
734 :     | T.MULT(32, x, y) => (multiply(x, y); trap())
735 :     | T.DIVT(32, x, y) => divide(true, true, x, y)
736 :     | T.REMT(32, x, y) => rem(true, true, x, y)
737 : monnier 247
738 : george 545 | T.ANDB(32, x, y) => binaryComm(I.ANDL, x, y)
739 :     | T.ORB(32, x, y) => binaryComm(I.ORL, x, y)
740 :     | T.XORB(32, x, y) => binaryComm(I.XORL, x, y)
741 :     | T.NOTB(32, x) => unary(I.NOTL, x)
742 : monnier 247
743 : george 545 | T.SRA(32, x, y) => shift(I.SARL, x, y)
744 :     | T.SRL(32, x, y) => shift(I.SHRL, x, y)
745 :     | T.SLL(32, x, y) => shift(I.SHLL, x, y)
746 : monnier 247
747 : george 545 | T.LOAD(8, ea, mem) => load8(ea, mem)
748 :     | T.LOAD(16, ea, mem) => load16(ea, mem)
749 :     | T.LOAD(32, ea, mem) => load32(ea, mem)
750 :     | T.CVTI2I(_,T.SIGN_EXTEND,_,T.LOAD(8,ea,mem)) => load8s(ea, mem)
751 :     | T.CVTI2I(_,T.SIGN_EXTEND,_,T.LOAD(16,ea,mem)) => load16s(ea, mem)
752 : monnier 498
753 : george 545 | T.COND(32, T.CMP(ty, cc, t1, t2), T.LI yes, T.LI no) =>
754 : leunga 583 setcc(ty, cc, t1, t2, toInt32 yes, toInt32 no)
755 :     | T.COND(32, T.CMP(ty, cc, t1, t2), T.LI32 yes, T.LI32 no) =>
756 :     setcc(ty, cc, t1, t2, Word32.toLargeIntX yes,
757 :     Word32.toLargeIntX no)
758 : george 545 | T.COND(32, T.CMP(ty, cc, t1, t2), yes, no) =>
759 :     (case !arch of (* PentiumPro and higher has CMOVcc *)
760 :     Pentium => unknownExp exp
761 :     | _ => cmovcc(ty, cc, t1, t2, yes, no)
762 :     )
763 :     | T.LET(s,e) => (doStmt s; doExpr(e, rd, an))
764 :     | T.MARK(e, A.MARKREG f) => (f rd; doExpr(e, rd, an))
765 :     | T.MARK(e, a) => doExpr(e, rd, a::an)
766 :     | T.PRED(e,c) => doExpr(e, rd, A.CTRLUSE c::an)
767 : george 555 | T.REXT e =>
768 :     ExtensionComp.compileRext (reducer()) {e=e, rd=rd, an=an}
769 : george 545 (* simplify and try again *)
770 :     | exp => unknownExp exp
771 :     end (* doExpr *)
772 : monnier 247
773 : george 545 (* generate an expression and return its result register
774 :     * If rewritePseudo is on, the result is guaranteed to be in a
775 :     * non memReg register
776 :     *)
777 :     and expr(exp as T.REG(_, rd)) =
778 :     if isMemReg rd then genExpr exp else rd
779 :     | expr exp = genExpr exp
780 : monnier 247
781 : george 545 and genExpr exp =
782 :     let val rd = newReg() in doExpr(exp, rd, []); rd end
783 : monnier 247
784 : george 545 (* Compare an expression with zero.
785 :     * On the x86, TEST is superior to AND for doing the same thing,
786 :     * since it doesn't need to write out the result in a register.
787 :     *)
788 :     and cmpWithZero(cc as (T.EQ | T.NE), e as T.ANDB(ty, a, b)) =
789 :     (case ty of
790 :     8 => test(I.TESTB, a, b)
791 :     | 16 => test(I.TESTW, a, b)
792 :     | 32 => test(I.TESTL, a, b)
793 :     | _ => (expr e; ())
794 :     ; cc)
795 :     | cmpWithZero(cc, e) = (expr e; cc)
796 : monnier 247
797 : george 545 (* Emit a test.
798 :     * The available modes are
799 :     * r/m, r
800 :     * r/m, imm
801 :     * On selecting the right instruction: TESTL/TESTW/TESTB.
802 :     * When anding an operand with a constant
803 :     * that fits within 8 (or 16) bits, it is possible to use TESTB,
804 :     * (or TESTW) instead of TESTL. Because x86 is little endian,
805 :     * this works for memory operands too. However, with TESTB, it is
806 :     * not possible to use registers other than
807 :     * AL, CL, BL, DL, and AH, CH, BH, DH. So, the best way is to
808 :     * perform register allocation first, and if the operand registers
809 :     * are one of EAX, ECX, EBX, or EDX, replace the TESTL instruction
810 :     * by TESTB.
811 :     *)
812 :     and test(testopcode, a, b) =
813 :     let val (_, opnd1, opnd2) = commuteComparison(T.EQ, true, a, b)
814 :     (* translate r, r/m => r/m, r *)
815 :     val (opnd1, opnd2) =
816 :     if isMemOpnd opnd2 then (opnd2, opnd1) else (opnd1, opnd2)
817 :     in emit(testopcode{lsrc=opnd1, rsrc=opnd2})
818 :     end
819 : monnier 247
820 : george 545 (* generate a condition code expression
821 :     * The zero is for setting the condition code!
822 :     * I have no idea why this is used.
823 :     *)
824 :     and doCCexpr(T.CMP(ty, cc, t1, t2), 0, an) =
825 :     (cmp(false, ty, cc, t1, t2, an); ())
826 :     | doCCexpr(T.CCMARK(e,A.MARKREG f),rd,an) = (f rd; doCCexpr(e,rd,an))
827 :     | doCCexpr(T.CCMARK(e,a), rd, an) = doCCexpr(e,rd,a::an)
828 :     | doCCexpr(T.CCEXT e, cd, an) =
829 : george 555 ExtensionComp.compileCCext (reducer()) {e=e, ccd=cd, an=an}
830 : george 545 | doCCexpr _ = error "doCCexpr"
831 : monnier 247
832 : george 545 and ccExpr e = error "ccExpr"
833 : monnier 247
834 : george 545 (* generate a comparison and sets the condition code;
835 :     * return the actual cc used. If the flag swapable is true,
836 :     * we can also reorder the operands.
837 :     *)
838 :     and cmp(swapable, ty, cc, t1, t2, an) =
839 :     (case cc of
840 :     (T.EQ | T.NE) =>
841 :     (* Sometimes the comparison is not necessary because
842 :     * the bits are already set!
843 :     *)
844 :     if isZero t1 andalso setZeroBit t2 then cmpWithZero(cc, t2)
845 :     else if isZero t2 andalso setZeroBit t1 then cmpWithZero(cc, t1)
846 :     (* == and <> can be reordered *)
847 :     else genCmp(ty, true, cc, t1, t2, an)
848 :     | _ => genCmp(ty, swapable, cc, t1, t2, an)
849 :     )
850 : monnier 247
851 : george 545 (* Give a and b which are the operands to a comparison (or test)
852 :     * Return the appropriate condition code and operands.
853 :     * The available modes are:
854 :     * r/m, imm
855 :     * r/m, r
856 :     * r, r/m
857 :     *)
858 :     and commuteComparison(cc, swapable, a, b) =
859 :     let val (opnd1, opnd2) = (operand a, operand b)
860 :     in (* Try to fold in the operands whenever possible *)
861 :     case (isImmediate opnd1, isImmediate opnd2) of
862 :     (true, true) => (cc, moveToReg opnd1, opnd2)
863 :     | (true, false) =>
864 :     if swapable then (T.Basis.swapCond cc, opnd2, opnd1)
865 :     else (cc, moveToReg opnd1, opnd2)
866 :     | (false, true) => (cc, opnd1, opnd2)
867 :     | (false, false) =>
868 :     (case (opnd1, opnd2) of
869 :     (_, I.Direct _) => (cc, opnd1, opnd2)
870 :     | (I.Direct _, _) => (cc, opnd1, opnd2)
871 :     | (_, _) => (cc, moveToReg opnd1, opnd2)
872 :     )
873 :     end
874 :    
875 :     (* generate a real comparison; return the real cc used *)
876 :     and genCmp(ty, swapable, cc, a, b, an) =
877 :     let val (cc, opnd1, opnd2) = commuteComparison(cc, swapable, a, b)
878 :     in mark(I.CMPL{lsrc=opnd1, rsrc=opnd2}, an); cc
879 :     end
880 : monnier 247
881 : george 545 (* generate code for jumps *)
882 :     and jmp(T.LABEL(lexp as LE.LABEL lab), labs, an) =
883 :     mark(I.JMP(I.ImmedLabel lexp, [lab]), an)
884 :     | jmp(T.LABEL lexp, labs, an) = mark(I.JMP(I.ImmedLabel lexp, labs), an)
885 :     | jmp(ea, labs, an) = mark(I.JMP(operand ea, labs), an)
886 :    
887 :     (* convert mlrisc to cellset:
888 :     *)
889 :     and cellset mlrisc =
890 :     let val addCCReg = C.addCell C.CC
891 :     fun g([],acc) = acc
892 :     | g(T.GPR(T.REG(_,r))::regs,acc) = g(regs,C.addReg(r,acc))
893 :     | g(T.FPR(T.FREG(_,f))::regs,acc) = g(regs,C.addFreg(f,acc))
894 :     | g(T.CCR(T.CC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc))
895 :     | g(T.CCR(T.FCC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc))
896 :     | g(_::regs, acc) = g(regs, acc)
897 :     in g(mlrisc, C.empty) end
898 :    
899 :     (* generate code for calls *)
900 :     and call(ea, flow, def, use, mem, an) =
901 :     mark(I.CALL(operand ea,cellset(def),cellset(use),mem),an)
902 :    
903 :     (* generate code for integer stores *)
904 :     and store8(ea, d, mem, an) =
905 :     let val src = (* movb has to use %eax as source. Stupid x86! *)
906 :     case immedOrReg(operand d) of
907 :     src as I.Direct r =>
908 :     if r = C.eax then src else (move(src, eax); eax)
909 :     | src => src
910 :     in mark(I.MOVE{mvOp=I.MOVB, src=src, dst=address(ea,mem)},an)
911 :     end
912 :     and store16(ea, d, mem, an) = error "store16"
913 :     and store32(ea, d, mem, an) =
914 :     move'(immedOrReg(operand d), address(ea, mem), an)
915 :    
916 :     (* generate code for branching *)
917 :     and branch(T.CMP(ty, cc, t1, t2), lab, an) =
918 :     (* allow reordering of operands *)
919 :     let val cc = cmp(true, ty, cc, t1, t2, [])
920 :     in mark(I.JCC{cond=cond cc, opnd=immedLabel lab}, an) end
921 :     | branch(T.FCMP(fty, fcc, t1, t2), lab, an) =
922 :     fbranch(fty, fcc, t1, t2, lab, an)
923 :     | branch(ccexp, lab, an) =
924 :     (doCCexpr(ccexp, 0, []);
925 :     mark(I.JCC{cond=cond(Gen.condOf ccexp), opnd=immedLabel lab}, an)
926 :     )
927 :    
928 :     (* generate code for floating point compare and branch *)
929 :     and fbranch(fty, fcc, t1, t2, lab, an) =
930 :     let fun compare() =
931 :     let fun ignoreOrder (T.FREG _) = true
932 :     | ignoreOrder (T.FLOAD _) = true
933 :     | ignoreOrder (T.FMARK(e,_)) = ignoreOrder e
934 :     | ignoreOrder _ = false
935 :     in if ignoreOrder t1 orelse ignoreOrder t2 then
936 :     (reduceFexp(fty, t2, []); reduceFexp(fty, t1, []))
937 :     else (reduceFexp(fty, t1, []); reduceFexp(fty, t2, []);
938 :     emit(I.FXCH{opnd=C.ST(1)}));
939 :     emit(I.FUCOMPP)
940 : monnier 411 end
941 : george 545 fun andil i = emit(I.BINARY{binOp=I.ANDL,src=I.Immed(i),dst=eax})
942 : leunga 585 fun testil i = emit(I.TESTL{lsrc=eax,rsrc=I.Immed(i)})
943 : george 545 fun xoril i = emit(I.BINARY{binOp=I.XORL,src=I.Immed(i),dst=eax})
944 :     fun cmpil i = emit(I.CMPL{rsrc=I.Immed(i), lsrc=eax})
945 :     fun j(cc, lab) = mark(I.JCC{cond=cc, opnd=immedLabel lab},an)
946 :     fun sahf() = emit(I.SAHF)
947 :     fun branch() =
948 :     case fcc
949 :     of T.== => (andil 0x4400; xoril 0x4000; j(I.EQ, lab))
950 :     | T.?<> => (andil 0x4400; xoril 0x4000; j(I.NE, lab))
951 :     | T.? => (sahf(); j(I.P,lab))
952 :     | T.<=> => (sahf(); j(I.NP,lab))
953 : leunga 585 | T.> => (testil 0x4500; j(I.EQ,lab))
954 :     | T.?<= => (testil 0x4500; j(I.NE,lab))
955 :     | T.>= => (testil 0x500; j(I.EQ,lab))
956 :     | T.?< => (testil 0x500; j(I.NE,lab))
957 : george 545 | T.< => (andil 0x4500; cmpil 0x100; j(I.EQ,lab))
958 :     | T.?>= => (andil 0x4500; cmpil 0x100; j(I.NE,lab))
959 :     | T.<= => (andil 0x4100; cmpil 0x100; j(I.EQ,lab);
960 :     cmpil 0x4000; j(I.EQ,lab))
961 : leunga 585 | T.?> => (sahf(); j(I.P,lab); testil 0x4100; j(I.EQ,lab))
962 :     | T.<> => (testil 0x4400; j(I.EQ,lab))
963 :     | T.?= => (testil 0x4400; j(I.NE,lab))
964 : george 545 | _ => error "fbranch"
965 :     (*esac*)
966 :     in compare(); emit I.FNSTSW; branch()
967 : monnier 411 end
968 : monnier 247
969 : george 545 and fld(32, opnd) = I.FLDS opnd
970 :     | fld(64, opnd) = I.FLDL opnd
971 : george 555 | fld(80, opnd) = I.FLDT opnd
972 : george 545 | fld _ = error "fld"
973 :    
974 : leunga 565 and fild(16, opnd) = I.FILD opnd
975 :     | fild(32, opnd) = I.FILDL opnd
976 :     | fild(64, opnd) = I.FILDLL opnd
977 :     | fild _ = error "fild"
978 :    
979 :     and fxld(INTEGER, ty, opnd) = fild(ty, opnd)
980 :     | fxld(REAL, fty, opnd) = fld(fty, opnd)
981 :    
982 : george 545 and fstp(32, opnd) = I.FSTPS opnd
983 :     | fstp(64, opnd) = I.FSTPL opnd
984 : george 555 | fstp(80, opnd) = I.FSTPT opnd
985 : george 545 | fstp _ = error "fstp"
986 :    
987 :     (* generate code for floating point stores *)
988 :     and fstore(fty, ea, d, mem, an) =
989 :     (case d of
990 :     T.FREG(fty, fs) => emit(fld(fty, I.FDirect fs))
991 :     | _ => reduceFexp(fty, d, []);
992 :     mark(fstp(fty, address(ea, mem)), an)
993 :     )
994 :    
995 :     and fexpr e = error "fexpr"
996 :    
997 :     (* generate floating point expression and put the result in fd *)
998 :     and doFexpr(fty, T.FREG(_, fs), fd, an) =
999 :     (if fs = fd then ()
1000 :     else mark(I.FCOPY{dst=[fd], src=[fs], tmp=NONE}, an)
1001 :     )
1002 :     | doFexpr(fty, T.FLOAD(fty', ea, mem), fd, an) =
1003 :     let val ea = address(ea, mem)
1004 :     in mark(fld(fty', ea), an);
1005 :     emit(fstp(fty, I.FDirect fd))
1006 :     end
1007 :     | doFexpr(fty, e, fd, an) =
1008 :     (reduceFexp(fty, e, []);
1009 :     mark(fstp(fty, I.FDirect fd), an)
1010 :     )
1011 :    
1012 :     (*
1013 :     * Generate floating point expression using Sethi-Ullman's scheme:
1014 :     * This function evaluates a floating point expression,
1015 :     * and put result in %ST(0).
1016 :     *)
1017 :     and reduceFexp(fty, fexp, an) =
1018 : george 555 let val ST = I.ST(C.ST 0)
1019 :     val ST1 = I.ST(C.ST 1)
1020 : george 545
1021 : leunga 565 datatype su_tree =
1022 :     LEAF of int * T.fexp * ans
1023 :     | BINARY of int * T.fty * fbinop * su_tree * su_tree * ans
1024 :     | UNARY of int * T.fty * I.funOp * su_tree * ans
1025 :     and fbinop = FADD | FSUB | FMUL | FDIV
1026 :     | FIADD | FISUB | FIMUL | FIDIV
1027 :     withtype ans = Annotations.annotations
1028 : monnier 247
1029 : leunga 565 fun label(LEAF(n, _, _)) = n
1030 :     | label(BINARY(n, _, _, _, _, _)) = n
1031 :     | label(UNARY(n, _, _, _, _)) = n
1032 : george 545
1033 : leunga 565 fun annotate(LEAF(n, x, an), a) = LEAF(n,x,a::an)
1034 :     | annotate(BINARY(n,t,b,x,y,an), a) = BINARY(n,t,b,x,y,a::an)
1035 :     | annotate(UNARY(n,t,u,x,an), a) = UNARY(n,t,u,x,a::an)
1036 : george 545
1037 : leunga 565 (* Generate expression tree with sethi-ullman numbers *)
1038 :     fun su(e as T.FREG _) = LEAF(1, e, [])
1039 :     | su(e as T.FLOAD _) = LEAF(1, e, [])
1040 :     | su(e as T.CVTI2F _) = LEAF(1, e, [])
1041 :     | su(T.CVTF2F(_, _, t)) = su t
1042 :     | su(T.FMARK(t, a)) = annotate(su t, a)
1043 :     | su(T.FABS(fty, t)) = suUnary(fty, I.FABS, t)
1044 :     | su(T.FNEG(fty, t)) = suUnary(fty, I.FCHS, t)
1045 :     | su(T.FSQRT(fty, t)) = suUnary(fty, I.FSQRT, t)
1046 :     | su(T.FADD(fty, t1, t2)) = suComBinary(fty,FADD,FIADD,t1,t2)
1047 :     | su(T.FMUL(fty, t1, t2)) = suComBinary(fty,FMUL,FIMUL,t1,t2)
1048 :     | su(T.FSUB(fty, t1, t2)) = suBinary(fty,FSUB,FISUB,t1,t2)
1049 :     | su(T.FDIV(fty, t1, t2)) = suBinary(fty,FDIV,FIDIV,t1,t2)
1050 :     | su _ = error "su"
1051 :    
1052 :     (* Try to fold the the memory operand or integer conversion *)
1053 :     and suFold(e as T.FREG _) = (LEAF(0, e, []), false)
1054 :     | suFold(e as T.FLOAD _) = (LEAF(0, e, []), false)
1055 :     | suFold(e as T.CVTI2F(_,(16 | 32),_)) = (LEAF(0, e, []), true)
1056 :     | suFold(T.CVTF2F(_, _, t)) = suFold t
1057 :     | suFold(T.FMARK(t, a)) =
1058 :     let val (t, integer) = suFold t
1059 :     in (annotate(t, a), integer) end
1060 :     | suFold e = (su e, false)
1061 :    
1062 :     (* Can the tree be folded into the src operand? *)
1063 :     and foldable(T.FREG _) = true
1064 :     | foldable(T.FLOAD _) = true
1065 :     | foldable(T.CVTI2F(_, (16 | 32), _)) = true
1066 :     | foldable(T.CVTF2F(_, _, t)) = foldable t
1067 :     | foldable(T.FMARK(t, _)) = foldable t
1068 :     | foldable _ = false
1069 :    
1070 :     (* Form unary tree *)
1071 :     and suUnary(fty, funary, t) =
1072 :     let val t = su t
1073 :     in UNARY(label t, fty, funary, t, [])
1074 : george 545 end
1075 : leunga 565
1076 :     (* Form binary tree *)
1077 :     and suBinary(fty, binop, ibinop, t1, t2) =
1078 :     let val t1 = su t1
1079 :     val (t2, integer) = suFold t2
1080 :     val n1 = label t1
1081 :     val n2 = label t2
1082 :     val n = if n1=n2 then n1+1 else Int.max(n1,n2)
1083 :     val myOp = if integer then ibinop else binop
1084 :     in BINARY(n, fty, myOp, t1, t2, [])
1085 : george 545 end
1086 : george 555
1087 : leunga 565 (* Try to fold in the operand if possible.
1088 :     * This only applies to commutative operations.
1089 :     *)
1090 :     and suComBinary(fty, binop, ibinop, t1, t2) =
1091 :     let val (t1, t2) = if foldable t2 then (t1, t2) else (t2, t1)
1092 :     in suBinary(fty, binop, ibinop, t1, t2) end
1093 :    
1094 :     and sameTree(LEAF(_, T.FREG(t1,f1), []),
1095 :     LEAF(_, T.FREG(t2,f2), [])) = t1=t2 andalso f1=f2
1096 :     | sameTree _ = false
1097 :    
1098 :     (* Traverse tree and generate code *)
1099 :     fun gencode(LEAF(_, t, an)) = mark(fxld(leafEA t), an)
1100 :     | gencode(BINARY(_, _, binop, x, t2 as LEAF(0, y, a1), a2)) =
1101 :     let val _ = gencode x
1102 :     val (_, fty, src) = leafEA y
1103 :     fun gen(code) = mark(code, a1 @ a2)
1104 :     fun binary(oper32, oper64) =
1105 :     if sameTree(x, t2) then
1106 :     gen(I.FBINARY{binOp=oper64, src=ST, dst=ST})
1107 : george 555 else
1108 :     let val oper =
1109 : leunga 565 if isMemOpnd src then
1110 :     case fty of
1111 :     32 => oper32
1112 :     | 64 => oper64
1113 :     | _ => error "gencode: BINARY"
1114 :     else oper64
1115 :     in gen(I.FBINARY{binOp=oper, src=src, dst=ST}) end
1116 :     fun ibinary(oper16, oper32) =
1117 :     let val oper = case fty of
1118 :     16 => oper16
1119 :     | 32 => oper32
1120 :     | _ => error "gencode: IBINARY"
1121 :     in gen(I.FIBINARY{binOp=oper, src=src}) end
1122 :     in case binop of
1123 :     FADD => binary(I.FADDS, I.FADDL)
1124 :     | FSUB => binary(I.FDIVS, I.FSUBL)
1125 :     | FMUL => binary(I.FMULS, I.FMULL)
1126 :     | FDIV => binary(I.FDIVS, I.FDIVL)
1127 :     | FIADD => ibinary(I.FIADDS, I.FIADDL)
1128 :     | FISUB => ibinary(I.FIDIVS, I.FISUBL)
1129 :     | FIMUL => ibinary(I.FIMULS, I.FIMULL)
1130 :     | FIDIV => ibinary(I.FIDIVS, I.FIDIVL)
1131 :     end
1132 :     | gencode(BINARY(_, fty, binop, t1, t2, an)) =
1133 :     let fun doit(t1, t2, oper, operP, operRP) =
1134 :     let (* oper[P] => ST(1) := ST oper ST(1); [pop]
1135 :     * operR[P] => ST(1) := ST(1) oper ST; [pop]
1136 :     *)
1137 :     val n1 = label t1
1138 :     val n2 = label t2
1139 :     in if n1 < n2 andalso n1 <= 7 then
1140 :     (gencode t2;
1141 :     gencode t1;
1142 :     mark(I.FBINARY{binOp=operP, src=ST, dst=ST1}, an))
1143 :     else if n2 <= n1 andalso n2 <= 7 then
1144 :     (gencode t1;
1145 :     gencode t2;
1146 :     mark(I.FBINARY{binOp=operRP, src=ST, dst=ST1}, an))
1147 :     else
1148 :     let (* both labels > 7 *)
1149 :     val fs = I.FDirect(newFreg())
1150 :     in gencode t2;
1151 :     emit(fstp(fty, fs));
1152 :     gencode t1;
1153 :     mark(I.FBINARY{binOp=oper, src=fs, dst=ST}, an)
1154 :     end
1155 :     end
1156 :     in case binop of
1157 :     FADD => doit(t1,t2,I.FADDL,I.FADDP,I.FADDP)
1158 :     | FMUL => doit(t1,t2,I.FMULL,I.FMULP,I.FMULP)
1159 :     | FSUB => doit(t1,t2,I.FSUBL,I.FSUBP,I.FSUBRP)
1160 :     | FDIV => doit(t1,t2,I.FDIVL,I.FDIVP,I.FDIVRP)
1161 : george 545 | _ => error "gencode.BINARY"
1162 :     end
1163 : leunga 565 | gencode(UNARY(_, _, unaryOp, su, an)) =
1164 :     (gencode(su); mark(I.FUNARY(unaryOp),an))
1165 :    
1166 :     (* Generate code for a leaf.
1167 :     * Returns the type and an effective address
1168 :     *)
1169 :     and leafEA(T.FREG(fty, f)) = (REAL, fty, I.FDirect f)
1170 :     | leafEA(T.FLOAD(fty, ea, mem)) = (REAL, fty, address(ea, mem))
1171 :     | leafEA(T.CVTI2F(_, 32, t)) = int2real(32, I.MOVL, t)
1172 :     | leafEA(T.CVTI2F(_, 16, t)) = int2real(16, I.MOVSWL, t)
1173 :     | leafEA(T.CVTI2F(_, 8, t)) = int2real(8, I.MOVSBL, t)
1174 :     | leafEA _ = error "leafEA"
1175 :    
1176 :     (* Move integer t of size ty into a memory location *)
1177 :     and int2real(ty, mov, t) =
1178 :     let val opnd = operand t
1179 :     in if isMemOpnd opnd andalso (ty = 16 orelse ty = 32)
1180 :     then (INTEGER, ty, opnd)
1181 :     else (emit(I.MOVE{mvOp=mov, src=opnd, dst=tempMem});
1182 :     (INTEGER, 32, tempMem))
1183 : george 545 end
1184 : leunga 565 in gencode(su fexp)
1185 : george 545 end (*reduceFexp*)
1186 :    
1187 :     (* generate code for a statement *)
1188 :     and stmt(T.MV(_, rd, e), an) = doExpr(e, rd, an)
1189 :     | stmt(T.FMV(fty, fd, e), an) = doFexpr(fty, e, fd, an)
1190 :     | stmt(T.CCMV(ccd, e), an) = doCCexpr(e, ccd, an)
1191 :     | stmt(T.COPY(_, dst, src), an) = copy(dst, src, an)
1192 :     | stmt(T.FCOPY(fty, dst, src), an) = fcopy(fty, dst, src, an)
1193 :     | stmt(T.JMP(ctrl, e, labs), an) = jmp(e, labs, an)
1194 :     | stmt(T.CALL(e, flow, def, use, cdef, cuse, mem), an) =
1195 :     call(e,flow,def,use,mem,an)
1196 :     | stmt(T.RET _, an) = mark(I.RET NONE, an)
1197 :     | stmt(T.STORE(8, ea, d, mem), an) = store8(ea, d, mem, an)
1198 :     | stmt(T.STORE(16, ea, d, mem), an) = store16(ea, d, mem, an)
1199 :     | stmt(T.STORE(32, ea, d, mem), an) = store32(ea, d, mem, an)
1200 :     | stmt(T.FSTORE(fty, ea, d, mem), an) = fstore(fty, ea, d, mem, an)
1201 :     | stmt(T.BCC(ctrl, cc, lab), an) = branch(cc, lab, an)
1202 :     | stmt(T.DEFINE l, _) = defineLabel l
1203 :     | stmt(T.ANNOTATION(s, a), an) = stmt(s, a::an)
1204 : george 555 | stmt(T.EXT s, an) =
1205 :     ExtensionComp.compileSext (reducer()) {stm=s, an=an}
1206 : george 545 | stmt(s, _) = doStmts(Gen.compileStm s)
1207 :    
1208 :     and doStmt s = stmt(s, [])
1209 :     and doStmts ss = app doStmt ss
1210 :    
1211 :     and beginCluster' _ =
1212 :     ((* Must be cleared by the client.
1213 :     * if rewriteMemReg then memRegsUsed := 0w0 else ();
1214 :     *)
1215 :     trapLabel := NONE; beginCluster 0)
1216 :     and endCluster' a =
1217 : monnier 247 (case !trapLabel
1218 : monnier 411 of NONE => ()
1219 : george 545 | SOME(_, lab) => (defineLabel lab; emit(I.INTO))
1220 : monnier 411 (*esac*);
1221 : george 545 endCluster(a)
1222 :     )
1223 :    
1224 :     and reducer() =
1225 :     T.REDUCER{reduceRexp = expr,
1226 :     reduceFexp = fexpr,
1227 :     reduceCCexp = ccExpr,
1228 :     reduceStm = stmt,
1229 :     operand = operand,
1230 :     reduceOperand = reduceOpnd,
1231 :     addressOf = fn e => address(e, I.Region.memory), (*XXX*)
1232 :     emit = mark,
1233 :     instrStream = instrStream,
1234 :     mltreeStream = self()
1235 :     }
1236 :    
1237 :     and self() =
1238 :     S.STREAM
1239 :     { beginCluster= beginCluster',
1240 :     endCluster = endCluster',
1241 :     emit = doStmt,
1242 :     pseudoOp = pseudoOp,
1243 :     defineLabel = defineLabel,
1244 :     entryLabel = entryLabel,
1245 :     comment = comment,
1246 :     annotation = annotation,
1247 :     exitBlock = fn mlrisc => exitBlock(cellset mlrisc),
1248 :     alias = alias,
1249 :     phi = phi
1250 :     }
1251 :    
1252 :     in self()
1253 : monnier 247 end
1254 :    
1255 : george 545 end (* functor *)
1256 :    
1257 :     end (* local *)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0