SCM Repository
Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86.sml
Parent Directory
|
Revision Log
Revision 1296 - (view) (download)
1 : | jhr | 1117 | (* x86.sml |
2 : | monnier | 247 | * |
3 : | * COPYRIGHT (c) 1998 Bell Laboratories. | ||
4 : | george | 545 | * |
5 : | * This is a revised version that takes into account of | ||
6 : | * the extended x86 instruction set, and has better handling of | ||
7 : | * non-standard types. I've factored out the integer/floating point | ||
8 : | * comparison code, added optimizations for conditional moves. | ||
9 : | * The latter generates SETcc and CMOVcc (Pentium Pro only) instructions. | ||
10 : | * To avoid problems, I have tried to incorporate as much of | ||
11 : | * Lal's original magic incantations as possible. | ||
12 : | monnier | 247 | * |
13 : | george | 545 | * Some changes: |
14 : | * | ||
15 : | blume | 1183 | * 1. REMU/REMS are now supported |
16 : | george | 545 | * 2. COND is supported by generating SETcc and/or CMOVcc; this |
17 : | * may require at least a Pentium II to work. | ||
18 : | * 3. Division by a constant has been optimized. Division by | ||
19 : | * a power of 2 generates SHRL or SARL. | ||
20 : | * 4. Better addressing mode selection has been implemented. This should | ||
21 : | * improve array indexing on SML/NJ. | ||
22 : | * 5. Generate testl/testb instead of andl whenever appropriate. This | ||
23 : | * is recommended by the Intel Optimization Guide and seems to improve | ||
24 : | * boxity tests on SML/NJ. | ||
25 : | leunga | 731 | * |
26 : | * More changes for floating point: | ||
27 : | * A new mode is implemented which generates pseudo 3-address instructions | ||
28 : | * for floating point. These instructions are register allocated the | ||
29 : | * normal way, with the virtual registers mapped onto a set of pseudo | ||
30 : | * %fp registers. These registers are then mapped onto the %st registers | ||
31 : | * with a new postprocessing phase. | ||
32 : | * | ||
33 : | george | 545 | * -- Allen |
34 : | monnier | 247 | *) |
35 : | george | 545 | local |
36 : | val rewriteMemReg = true (* should we rewrite memRegs *) | ||
37 : | leunga | 731 | val enableFastFPMode = true (* set this to false to disable the mode *) |
38 : | george | 545 | in |
39 : | |||
40 : | monnier | 247 | functor X86 |
41 : | (structure X86Instr : X86INSTR | ||
42 : | leunga | 797 | structure MLTreeUtils : MLTREE_UTILS |
43 : | george | 933 | where T = X86Instr.T |
44 : | george | 555 | structure ExtensionComp : MLTREE_EXTENSION_COMP |
45 : | george | 933 | where I = X86Instr and T = X86Instr.T |
46 : | george | 984 | structure MLTreeStream : MLTREE_STREAM |
47 : | where T = ExtensionComp.T | ||
48 : | george | 545 | datatype arch = Pentium | PentiumPro | PentiumII | PentiumIII |
49 : | val arch : arch ref | ||
50 : | leunga | 593 | val cvti2f : |
51 : | leunga | 815 | {ty: X86Instr.T.ty, |
52 : | src: X86Instr.operand, | ||
53 : | (* source operand, guaranteed to be non-memory! *) | ||
54 : | an: Annotations.annotations ref (* cluster annotations *) | ||
55 : | } -> | ||
56 : | leunga | 593 | {instrs : X86Instr.instruction list,(* the instructions *) |
57 : | tempMem: X86Instr.operand, (* temporary for CVTI2F *) | ||
58 : | cleanup: X86Instr.instruction list (* cleanup code *) | ||
59 : | } | ||
60 : | leunga | 731 | (* When the following flag is set, we allocate floating point registers |
61 : | * directly on the floating point stack | ||
62 : | *) | ||
63 : | val fast_floating_point : bool ref | ||
64 : | george | 545 | ) : sig include MLTREECOMP |
65 : | val rewriteMemReg : bool | ||
66 : | end = | ||
67 : | monnier | 247 | struct |
68 : | leunga | 775 | structure I = X86Instr |
69 : | structure T = I.T | ||
70 : | george | 984 | structure TS = ExtensionComp.TS |
71 : | george | 545 | structure C = I.C |
72 : | structure Shuffle = Shuffle(I) | ||
73 : | monnier | 247 | structure W32 = Word32 |
74 : | george | 545 | structure A = MLRiscAnnotations |
75 : | george | 909 | structure CFG = ExtensionComp.CFG |
76 : | george | 889 | structure CB = CellsBasis |
77 : | monnier | 247 | |
78 : | george | 984 | type instrStream = (I.instruction,C.cellset,CFG.cfg) TS.stream |
79 : | type mltreeStream = (T.stm,T.mlrisc list,CFG.cfg) TS.stream | ||
80 : | leunga | 565 | |
81 : | datatype kind = REAL | INTEGER | ||
82 : | george | 545 | |
83 : | structure Gen = MLTreeGen | ||
84 : | (structure T = T | ||
85 : | jhr | 1117 | structure Cells = C |
86 : | george | 545 | val intTy = 32 |
87 : | val naturalWidths = [32] | ||
88 : | datatype rep = SE | ZE | NEITHER | ||
89 : | val rep = NEITHER | ||
90 : | ) | ||
91 : | |||
92 : | monnier | 411 | fun error msg = MLRiscErrorMsg.error("X86",msg) |
93 : | monnier | 247 | |
94 : | george | 545 | (* Should we perform automatic MemReg translation? |
95 : | * If this is on, we can avoid doing RewritePseudo phase entirely. | ||
96 : | *) | ||
97 : | val rewriteMemReg = rewriteMemReg | ||
98 : | leunga | 731 | |
99 : | (* The following hardcoded *) | ||
100 : | leunga | 744 | fun isMemReg r = rewriteMemReg andalso |
101 : | george | 889 | let val r = CB.registerNum r |
102 : | leunga | 744 | in r >= 8 andalso r < 32 |
103 : | end | ||
104 : | leunga | 731 | fun isFMemReg r = if enableFastFPMode andalso !fast_floating_point |
105 : | george | 889 | then let val r = CB.registerNum r |
106 : | leunga | 744 | in r >= 8 andalso r < 32 end |
107 : | leunga | 731 | else true |
108 : | leunga | 744 | val isAnyFMemReg = List.exists (fn r => |
109 : | george | 889 | let val r = CB.registerNum r |
110 : | leunga | 744 | in r >= 8 andalso r < 32 end |
111 : | ) | ||
112 : | monnier | 247 | |
113 : | george | 555 | val ST0 = C.ST 0 |
114 : | val ST7 = C.ST 7 | ||
115 : | leunga | 797 | val one = T.I.int_1 |
116 : | george | 555 | |
117 : | leunga | 797 | val opcodes8 = {INC=I.INCB,DEC=I.DECB,ADD=I.ADDB,SUB=I.SUBB, |
118 : | NOT=I.NOTB,NEG=I.NEGB, | ||
119 : | SHL=I.SHLB,SHR=I.SHRB,SAR=I.SARB, | ||
120 : | OR=I.ORB,AND=I.ANDB,XOR=I.XORB} | ||
121 : | val opcodes16 = {INC=I.INCW,DEC=I.DECW,ADD=I.ADDW,SUB=I.SUBW, | ||
122 : | NOT=I.NOTW,NEG=I.NEGW, | ||
123 : | SHL=I.SHLW,SHR=I.SHRW,SAR=I.SARW, | ||
124 : | OR=I.ORW,AND=I.ANDW,XOR=I.XORW} | ||
125 : | val opcodes32 = {INC=I.INCL,DEC=I.DECL,ADD=I.ADDL,SUB=I.SUBL, | ||
126 : | NOT=I.NOTL,NEG=I.NEGL, | ||
127 : | SHL=I.SHLL,SHR=I.SHRL,SAR=I.SARL, | ||
128 : | OR=I.ORL,AND=I.ANDL,XOR=I.XORL} | ||
129 : | |||
130 : | george | 545 | (* |
131 : | * The code generator | ||
132 : | *) | ||
133 : | monnier | 411 | fun selectInstructions |
134 : | george | 545 | (instrStream as |
135 : | george | 1003 | TS.S.STREAM{emit=emitInstruction,defineLabel,entryLabel,pseudoOp, |
136 : | annotation,getAnnotations,beginCluster,endCluster,exitBlock,comment,...}) = | ||
137 : | let | ||
138 : | val emit = emitInstruction o I.INSTR | ||
139 : | exception EA | ||
140 : | monnier | 411 | |
141 : | george | 545 | (* label where a trap is generated -- one per cluster *) |
142 : | val trapLabel = ref (NONE: (I.instruction * Label.label) option) | ||
143 : | monnier | 247 | |
144 : | leunga | 731 | (* flag floating point generation *) |
145 : | val floatingPointUsed = ref false | ||
146 : | |||
147 : | george | 545 | (* effective address of an integer register *) |
148 : | leunga | 731 | fun IntReg r = if isMemReg r then I.MemReg r else I.Direct r |
149 : | and RealReg r = if isFMemReg r then I.FDirect r else I.FPR r | ||
150 : | monnier | 411 | |
151 : | george | 545 | (* Add an overflow trap *) |
152 : | fun trap() = | ||
153 : | george | 1136 | let |
154 : | val jmp = | ||
155 : | george | 545 | case !trapLabel of |
156 : | george | 909 | NONE => let val label = Label.label "trap" () |
157 : | george | 1136 | val jmp = |
158 : | I.ANNOTATION{i=I.jcc{cond=I.O, | ||
159 : | opnd=I.ImmedLabel(T.LABEL label)}, | ||
160 : | a=MLRiscAnnotations.BRANCHPROB (Probability.unlikely)} | ||
161 : | george | 545 | in trapLabel := SOME(jmp, label); jmp end |
162 : | | SOME(jmp, _) => jmp | ||
163 : | george | 1003 | in emitInstruction jmp end |
164 : | monnier | 411 | |
165 : | george | 545 | val newReg = C.newReg |
166 : | val newFreg = C.newFreg | ||
167 : | monnier | 247 | |
168 : | leunga | 731 | fun fsize 32 = I.FP32 |
169 : | | fsize 64 = I.FP64 | ||
170 : | | fsize 80 = I.FP80 | ||
171 : | | fsize _ = error "fsize" | ||
172 : | |||
173 : | george | 545 | (* mark an expression with a list of annotations *) |
174 : | george | 1009 | fun mark'(i,[]) = emitInstruction(i) |
175 : | george | 545 | | mark'(i,a::an) = mark'(I.ANNOTATION{i=i,a=a},an) |
176 : | monnier | 247 | |
177 : | george | 545 | (* annotate an expression and emit it *) |
178 : | george | 1009 | fun mark(i,an) = mark'(I.INSTR i,an) |
179 : | monnier | 247 | |
180 : | george | 1003 | val emits = app emitInstruction |
181 : | leunga | 731 | |
182 : | george | 545 | (* emit parallel copies for integers |
183 : | * Translates parallel copies that involve memregs into | ||
184 : | * individual copies. | ||
185 : | *) | ||
186 : | fun copy([], [], an) = () | ||
187 : | | copy(dst, src, an) = | ||
188 : | let fun mvInstr{dst as I.MemReg rd, src as I.MemReg rs} = | ||
189 : | george | 889 | if CB.sameColor(rd,rs) then [] else |
190 : | george | 545 | let val tmpR = I.Direct(newReg()) |
191 : | george | 1003 | in [I.move{mvOp=I.MOVL, src=src, dst=tmpR}, |
192 : | I.move{mvOp=I.MOVL, src=tmpR, dst=dst}] | ||
193 : | george | 545 | end |
194 : | | mvInstr{dst=I.Direct rd, src=I.Direct rs} = | ||
195 : | george | 889 | if CB.sameColor(rd,rs) then [] |
196 : | george | 1009 | else [I.COPY{k=CB.GP, sz=32, dst=[rd], src=[rs], tmp=NONE}] |
197 : | george | 1003 | | mvInstr{dst, src} = [I.move{mvOp=I.MOVL, src=src, dst=dst}] |
198 : | george | 545 | in |
199 : | leunga | 731 | emits (Shuffle.shuffle{mvInstr=mvInstr, ea=IntReg} |
200 : | leunga | 744 | {tmp=SOME(I.Direct(newReg())), |
201 : | george | 545 | dst=dst, src=src}) |
202 : | end | ||
203 : | |||
204 : | (* conversions *) | ||
205 : | val itow = Word.fromInt | ||
206 : | val wtoi = Word.toInt | ||
207 : | george | 761 | fun toInt32 i = T.I.toInt32(32, i) |
208 : | george | 545 | val w32toi32 = Word32.toLargeIntX |
209 : | val i32tow32 = Word32.fromLargeInt | ||
210 : | monnier | 247 | |
211 : | george | 545 | (* One day, this is going to bite us when precision(LargeInt)>32 *) |
212 : | fun wToInt32 w = Int32.fromLarge(Word32.toLargeIntX w) | ||
213 : | monnier | 247 | |
214 : | george | 545 | (* some useful registers *) |
215 : | val eax = I.Direct(C.eax) | ||
216 : | val ecx = I.Direct(C.ecx) | ||
217 : | val edx = I.Direct(C.edx) | ||
218 : | monnier | 247 | |
219 : | leunga | 775 | fun immedLabel lab = I.ImmedLabel(T.LABEL lab) |
220 : | george | 545 | |
221 : | (* Is the expression zero? *) | ||
222 : | george | 761 | fun isZero(T.LI z) = T.I.isZero z |
223 : | george | 545 | | isZero(T.MARK(e,a)) = isZero e |
224 : | | isZero _ = false | ||
225 : | (* Does the expression set the zero bit? | ||
226 : | * WARNING: we assume these things are not optimized out! | ||
227 : | *) | ||
228 : | fun setZeroBit(T.ANDB _) = true | ||
229 : | | setZeroBit(T.ORB _) = true | ||
230 : | | setZeroBit(T.XORB _) = true | ||
231 : | | setZeroBit(T.SRA _) = true | ||
232 : | | setZeroBit(T.SRL _) = true | ||
233 : | | setZeroBit(T.SLL _) = true | ||
234 : | leunga | 695 | | setZeroBit(T.SUB _) = true |
235 : | | setZeroBit(T.ADDT _) = true | ||
236 : | | setZeroBit(T.SUBT _) = true | ||
237 : | george | 545 | | setZeroBit(T.MARK(e, _)) = setZeroBit e |
238 : | | setZeroBit _ = false | ||
239 : | monnier | 247 | |
240 : | leunga | 695 | fun setZeroBit2(T.ANDB _) = true |
241 : | | setZeroBit2(T.ORB _) = true | ||
242 : | | setZeroBit2(T.XORB _) = true | ||
243 : | | setZeroBit2(T.SRA _) = true | ||
244 : | | setZeroBit2(T.SRL _) = true | ||
245 : | | setZeroBit2(T.SLL _) = true | ||
246 : | | setZeroBit2(T.ADD(32, _, _)) = true (* can't use leal! *) | ||
247 : | | setZeroBit2(T.SUB _) = true | ||
248 : | | setZeroBit2(T.ADDT _) = true | ||
249 : | | setZeroBit2(T.SUBT _) = true | ||
250 : | | setZeroBit2(T.MARK(e, _)) = setZeroBit2 e | ||
251 : | | setZeroBit2 _ = false | ||
252 : | |||
253 : | leunga | 731 | (* emit parallel copies for floating point |
254 : | * Normal version. | ||
255 : | *) | ||
256 : | fun fcopy'(fty, [], [], _) = () | ||
257 : | | fcopy'(fty, dst as [_], src as [_], an) = | ||
258 : | george | 1009 | mark'(I.COPY{k=CB.FP, sz=fty, dst=dst,src=src,tmp=NONE}, an) |
259 : | leunga | 731 | | fcopy'(fty, dst, src, an) = |
260 : | george | 1009 | mark'(I.COPY{k=CB.FP, sz=fty, dst=dst,src=src,tmp=SOME(I.FDirect(newFreg()))}, an) |
261 : | monnier | 247 | |
262 : | leunga | 731 | (* emit parallel copies for floating point. |
263 : | * Fast version. | ||
264 : | * Translates parallel copies that involve memregs into | ||
265 : | * individual copies. | ||
266 : | *) | ||
267 : | |||
268 : | fun fcopy''(fty, [], [], _) = () | ||
269 : | | fcopy''(fty, dst, src, an) = | ||
270 : | if true orelse isAnyFMemReg dst orelse isAnyFMemReg src then | ||
271 : | let val fsize = fsize fty | ||
272 : | george | 1003 | fun mvInstr{dst, src} = [I.fmove{fsize=fsize, src=src, dst=dst}] |
273 : | leunga | 731 | in |
274 : | emits (Shuffle.shuffle{mvInstr=mvInstr, ea=RealReg} | ||
275 : | leunga | 744 | {tmp=case dst of |
276 : | leunga | 731 | [_] => NONE |
277 : | | _ => SOME(I.FPR(newReg())), | ||
278 : | dst=dst, src=src}) | ||
279 : | end | ||
280 : | else | ||
281 : | george | 1009 | mark'(I.COPY{k=CB.FP, sz=fty, dst=dst, |
282 : | src=src,tmp= | ||
283 : | leunga | 731 | case dst of |
284 : | [_] => NONE | ||
285 : | | _ => SOME(I.FPR(newFreg()))}, an) | ||
286 : | |||
287 : | fun fcopy x = if enableFastFPMode andalso !fast_floating_point | ||
288 : | then fcopy'' x else fcopy' x | ||
289 : | |||
290 : | george | 545 | (* Translates MLTREE condition code to x86 condition code *) |
291 : | fun cond T.LT = I.LT | cond T.LTU = I.B | ||
292 : | | cond T.LE = I.LE | cond T.LEU = I.BE | ||
293 : | | cond T.EQ = I.EQ | cond T.NE = I.NE | ||
294 : | | cond T.GE = I.GE | cond T.GEU = I.AE | ||
295 : | | cond T.GT = I.GT | cond T.GTU = I.A | ||
296 : | jhr | 1119 | | cond cc = error(concat["cond(", T.Basis.condToString cc, ")"]) |
297 : | monnier | 247 | |
298 : | leunga | 815 | fun zero dst = emit(I.BINARY{binOp=I.XORL, src=dst, dst=dst}) |
299 : | |||
300 : | george | 545 | (* Move and annotate *) |
301 : | fun move'(src as I.Direct s, dst as I.Direct d, an) = | ||
302 : | george | 889 | if CB.sameColor(s,d) then () |
303 : | george | 1009 | else mark'(I.COPY{k=CB.GP, sz=32, dst=[d], src=[s], tmp=NONE}, an) |
304 : | leunga | 815 | | move'(I.Immed 0, dst as I.Direct d, an) = |
305 : | mark(I.BINARY{binOp=I.XORL, src=dst, dst=dst}, an) | ||
306 : | george | 545 | | move'(src, dst, an) = mark(I.MOVE{mvOp=I.MOVL, src=src, dst=dst}, an) |
307 : | monnier | 247 | |
308 : | george | 545 | (* Move only! *) |
309 : | fun move(src, dst) = move'(src, dst, []) | ||
310 : | monnier | 247 | |
311 : | george | 545 | val readonly = I.Region.readonly |
312 : | monnier | 247 | |
313 : | george | 545 | (* |
314 : | george | 761 | * Compute an effective address. |
315 : | george | 545 | *) |
316 : | george | 761 | fun address(ea, mem) = let |
317 : | george | 545 | (* Keep building a bigger and bigger effective address expressions |
318 : | * The input is a list of trees | ||
319 : | * b -- base | ||
320 : | * i -- index | ||
321 : | * s -- scale | ||
322 : | * d -- immed displacement | ||
323 : | *) | ||
324 : | fun doEA([], b, i, s, d) = makeAddressingMode(b, i, s, d) | ||
325 : | | doEA(t::trees, b, i, s, d) = | ||
326 : | (case t of | ||
327 : | george | 761 | T.LI n => doEAImmed(trees, toInt32 n, b, i, s, d) |
328 : | leunga | 775 | | T.CONST _ => doEALabel(trees, t, b, i, s, d) |
329 : | | T.LABEL _ => doEALabel(trees, t, b, i, s, d) | ||
330 : | | T.LABEXP le => doEALabel(trees, le, b, i, s, d) | ||
331 : | george | 545 | | T.ADD(32, t1, t2 as T.REG(_,r)) => |
332 : | if isMemReg r then doEA(t2::t1::trees, b, i, s, d) | ||
333 : | else doEA(t1::t2::trees, b, i, s, d) | ||
334 : | | T.ADD(32, t1, t2) => doEA(t1::t2::trees, b, i, s, d) | ||
335 : | | T.SUB(32, t1, T.LI n) => | ||
336 : | george | 761 | doEA(t1::T.LI(T.I.NEG(32,n))::trees, b, i, s, d) |
337 : | | T.SLL(32, t1, T.LI n) => let | ||
338 : | val n = T.I.toInt(32, n) | ||
339 : | in | ||
340 : | case n | ||
341 : | of 0 => displace(trees, t1, b, i, s, d) | ||
342 : | | 1 => indexed(trees, t1, t, 1, b, i, s, d) | ||
343 : | | 2 => indexed(trees, t1, t, 2, b, i, s, d) | ||
344 : | | 3 => indexed(trees, t1, t, 3, b, i, s, d) | ||
345 : | | _ => displace(trees, t, b, i, s, d) | ||
346 : | end | ||
347 : | george | 545 | | t => displace(trees, t, b, i, s, d) |
348 : | ) | ||
349 : | monnier | 247 | |
350 : | george | 545 | (* Add an immed constant *) |
351 : | and doEAImmed(trees, 0, b, i, s, d) = doEA(trees, b, i, s, d) | ||
352 : | | doEAImmed(trees, n, b, i, s, I.Immed m) = | ||
353 : | george | 761 | doEA(trees, b, i, s, I.Immed(n+m)) |
354 : | george | 545 | | doEAImmed(trees, n, b, i, s, I.ImmedLabel le) = |
355 : | leunga | 775 | doEA(trees, b, i, s, |
356 : | I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, n))))) | ||
357 : | george | 545 | | doEAImmed(trees, n, b, i, s, _) = error "doEAImmed" |
358 : | monnier | 247 | |
359 : | george | 545 | (* Add a label expression *) |
360 : | and doEALabel(trees, le, b, i, s, I.Immed 0) = | ||
361 : | doEA(trees, b, i, s, I.ImmedLabel le) | ||
362 : | | doEALabel(trees, le, b, i, s, I.Immed m) = | ||
363 : | doEA(trees, b, i, s, | ||
364 : | leunga | 775 | I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, m)))) |
365 : | george | 545 | handle Overflow => error "doEALabel: constant too large") |
366 : | | doEALabel(trees, le, b, i, s, I.ImmedLabel le') = | ||
367 : | leunga | 775 | doEA(trees, b, i, s, I.ImmedLabel(T.ADD(32,le,le'))) |
368 : | george | 545 | | doEALabel(trees, le, b, i, s, _) = error "doEALabel" |
369 : | monnier | 247 | |
370 : | george | 545 | and makeAddressingMode(NONE, NONE, _, disp) = disp |
371 : | | makeAddressingMode(SOME base, NONE, _, disp) = | ||
372 : | I.Displace{base=base, disp=disp, mem=mem} | ||
373 : | | makeAddressingMode(base, SOME index, scale, disp) = | ||
374 : | george | 761 | I.Indexed{base=base, index=index, scale=scale, |
375 : | george | 545 | disp=disp, mem=mem} |
376 : | monnier | 247 | |
377 : | george | 545 | (* generate code for tree and ensure that it is not in %esp *) |
378 : | and exprNotEsp tree = | ||
379 : | let val r = expr tree | ||
380 : | george | 889 | in if CB.sameColor(r, C.esp) then |
381 : | george | 545 | let val tmp = newReg() |
382 : | in move(I.Direct r, I.Direct tmp); tmp end | ||
383 : | else r | ||
384 : | end | ||
385 : | monnier | 247 | |
386 : | george | 545 | (* Add a base register *) |
387 : | and displace(trees, t, NONE, i, s, d) = (* no base yet *) | ||
388 : | doEA(trees, SOME(expr t), i, s, d) | ||
389 : | | displace(trees, t, b as SOME base, NONE, _, d) = (* no index *) | ||
390 : | (* make t the index, but make sure that it is not %esp! *) | ||
391 : | let val i = expr t | ||
392 : | george | 889 | in if CB.sameColor(i, C.esp) then |
393 : | george | 545 | (* swap base and index *) |
394 : | george | 889 | if CB.sameColor(base, C.esp) then |
395 : | george | 545 | doEA(trees, SOME i, b, 0, d) |
396 : | else (* base and index = %esp! *) | ||
397 : | let val index = newReg() | ||
398 : | in move(I.Direct i, I.Direct index); | ||
399 : | doEA(trees, b, SOME index, 0, d) | ||
400 : | end | ||
401 : | else | ||
402 : | doEA(trees, b, SOME i, 0, d) | ||
403 : | end | ||
404 : | | displace(trees, t, SOME base, i, s, d) = (* base and index *) | ||
405 : | let val b = expr(T.ADD(32,T.REG(32,base),t)) | ||
406 : | in doEA(trees, SOME b, i, s, d) end | ||
407 : | monnier | 247 | |
408 : | george | 545 | (* Add an indexed register *) |
409 : | and indexed(trees, t, t0, scale, b, NONE, _, d) = (* no index yet *) | ||
410 : | doEA(trees, b, SOME(exprNotEsp t), scale, d) | ||
411 : | | indexed(trees, _, t0, _, NONE, i, s, d) = (* no base *) | ||
412 : | doEA(trees, SOME(expr t0), i, s, d) | ||
413 : | | indexed(trees, _, t0, _, SOME base, i, s, d) = (*base and index*) | ||
414 : | let val b = expr(T.ADD(32, t0, T.REG(32, base))) | ||
415 : | in doEA(trees, SOME b, i, s, d) end | ||
416 : | |||
417 : | in case doEA([ea], NONE, NONE, 0, I.Immed 0) of | ||
418 : | I.Immed _ => raise EA | ||
419 : | | I.ImmedLabel le => I.LabelEA le | ||
420 : | | ea => ea | ||
421 : | end (* address *) | ||
422 : | monnier | 247 | |
423 : | george | 545 | (* reduce an expression into an operand *) |
424 : | george | 761 | and operand(T.LI i) = I.Immed(toInt32(i)) |
425 : | leunga | 775 | | operand(x as (T.CONST _ | T.LABEL _)) = I.ImmedLabel x |
426 : | | operand(T.LABEXP le) = I.ImmedLabel le | ||
427 : | george | 545 | | operand(T.REG(_,r)) = IntReg r |
428 : | | operand(T.LOAD(32,ea,mem)) = address(ea, mem) | ||
429 : | | operand(t) = I.Direct(expr t) | ||
430 : | monnier | 247 | |
431 : | george | 545 | and moveToReg(opnd) = |
432 : | let val dst = I.Direct(newReg()) | ||
433 : | in move(opnd, dst); dst | ||
434 : | end | ||
435 : | monnier | 247 | |
436 : | george | 545 | and reduceOpnd(I.Direct r) = r |
437 : | | reduceOpnd opnd = | ||
438 : | let val dst = newReg() | ||
439 : | in move(opnd, I.Direct dst); dst | ||
440 : | end | ||
441 : | monnier | 247 | |
442 : | george | 545 | (* ensure that the operand is either an immed or register *) |
443 : | and immedOrReg(opnd as I.Displace _) = moveToReg opnd | ||
444 : | | immedOrReg(opnd as I.Indexed _) = moveToReg opnd | ||
445 : | | immedOrReg(opnd as I.MemReg _) = moveToReg opnd | ||
446 : | | immedOrReg(opnd as I.LabelEA _) = moveToReg opnd | ||
447 : | | immedOrReg opnd = opnd | ||
448 : | monnier | 247 | |
449 : | george | 545 | and isImmediate(I.Immed _) = true |
450 : | | isImmediate(I.ImmedLabel _) = true | ||
451 : | | isImmediate _ = false | ||
452 : | monnier | 247 | |
453 : | george | 545 | and regOrMem opnd = if isImmediate opnd then moveToReg opnd else opnd |
454 : | |||
455 : | and isMemOpnd opnd = | ||
456 : | (case opnd of | ||
457 : | I.Displace _ => true | ||
458 : | | I.Indexed _ => true | ||
459 : | | I.MemReg _ => true | ||
460 : | | I.LabelEA _ => true | ||
461 : | george | 555 | | I.FDirect f => true |
462 : | george | 545 | | _ => false |
463 : | ) | ||
464 : | |||
465 : | (* | ||
466 : | * Compute an integer expression and put the result in | ||
467 : | * the destination register rd. | ||
468 : | *) | ||
469 : | george | 889 | and doExpr(exp, rd : CB.cell, an) = |
470 : | george | 545 | let val rdOpnd = IntReg rd |
471 : | monnier | 247 | |
472 : | george | 889 | fun equalRd(I.Direct r) = CB.sameColor(r, rd) |
473 : | | equalRd(I.MemReg r) = CB.sameColor(r, rd) | ||
474 : | george | 545 | | equalRd _ = false |
475 : | monnier | 247 | |
476 : | george | 545 | (* Emit a binary operator. If the destination is |
477 : | * a memReg, do something smarter. | ||
478 : | *) | ||
479 : | fun genBinary(binOp, opnd1, opnd2) = | ||
480 : | if isMemReg rd andalso | ||
481 : | (isMemOpnd opnd1 orelse isMemOpnd opnd2) orelse | ||
482 : | equalRd(opnd2) | ||
483 : | then | ||
484 : | let val tmpR = newReg() | ||
485 : | val tmp = I.Direct tmpR | ||
486 : | in move(opnd1, tmp); | ||
487 : | mark(I.BINARY{binOp=binOp, src=opnd2, dst=tmp}, an); | ||
488 : | move(tmp, rdOpnd) | ||
489 : | end | ||
490 : | else | ||
491 : | (move(opnd1, rdOpnd); | ||
492 : | mark(I.BINARY{binOp=binOp, src=opnd2, dst=rdOpnd}, an) | ||
493 : | ) | ||
494 : | monnier | 247 | |
495 : | george | 545 | (* Generate a binary operator; it may commute *) |
496 : | fun binaryComm(binOp, e1, e2) = | ||
497 : | let val (opnd1, opnd2) = | ||
498 : | case (operand e1, operand e2) of | ||
499 : | (x as I.Immed _, y) => (y, x) | ||
500 : | | (x as I.ImmedLabel _, y) => (y, x) | ||
501 : | | (x, y as I.Direct _) => (y, x) | ||
502 : | | (x, y) => (x, y) | ||
503 : | in genBinary(binOp, opnd1, opnd2) | ||
504 : | end | ||
505 : | |||
506 : | (* Generate a binary operator; non-commutative *) | ||
507 : | fun binary(binOp, e1, e2) = | ||
508 : | genBinary(binOp, operand e1, operand e2) | ||
509 : | |||
510 : | (* Generate a unary operator *) | ||
511 : | fun unary(unOp, e) = | ||
512 : | let val opnd = operand e | ||
513 : | in if isMemReg rd andalso isMemOpnd opnd then | ||
514 : | let val tmp = I.Direct(newReg()) | ||
515 : | in move(opnd, tmp); move(tmp, rdOpnd) | ||
516 : | end | ||
517 : | else move(opnd, rdOpnd); | ||
518 : | mark(I.UNARY{unOp=unOp, opnd=rdOpnd}, an) | ||
519 : | end | ||
520 : | |||
521 : | (* Generate shifts; the shift | ||
522 : | * amount must be a constant or in %ecx *) | ||
523 : | fun shift(opcode, e1, e2) = | ||
524 : | let val (opnd1, opnd2) = (operand e1, operand e2) | ||
525 : | in case opnd2 of | ||
526 : | I.Immed _ => genBinary(opcode, opnd1, opnd2) | ||
527 : | | _ => | ||
528 : | if equalRd(opnd2) then | ||
529 : | let val tmpR = newReg() | ||
530 : | val tmp = I.Direct tmpR | ||
531 : | in move(opnd1, tmp); | ||
532 : | move(opnd2, ecx); | ||
533 : | mark(I.BINARY{binOp=opcode, src=ecx, dst=tmp},an); | ||
534 : | move(tmp, rdOpnd) | ||
535 : | end | ||
536 : | else | ||
537 : | (move(opnd1, rdOpnd); | ||
538 : | move(opnd2, ecx); | ||
539 : | mark(I.BINARY{binOp=opcode, src=ecx, dst=rdOpnd},an) | ||
540 : | ) | ||
541 : | end | ||
542 : | |||
543 : | blume | 1185 | (* Division or remainder: divisor must be in %edx:%eax pair *) |
544 : | george | 545 | fun divrem(signed, overflow, e1, e2, resultReg) = |
545 : | let val (opnd1, opnd2) = (operand e1, operand e2) | ||
546 : | val _ = move(opnd1, eax) | ||
547 : | leunga | 815 | val oper = if signed then (emit(I.CDQ); I.IDIVL1) |
548 : | else (zero edx; I.DIVL1) | ||
549 : | george | 545 | in mark(I.MULTDIV{multDivOp=oper, src=regOrMem opnd2},an); |
550 : | move(resultReg, rdOpnd); | ||
551 : | if overflow then trap() else () | ||
552 : | end | ||
553 : | monnier | 247 | |
554 : | blume | 1185 | (* division with rounding towards negative infinity *) |
555 : | fun divinf0 (overflow, e1, e2) = let | ||
556 : | val o1 = operand e1 | ||
557 : | val o2 = operand e2 | ||
558 : | val l = Label.anon () | ||
559 : | in | ||
560 : | move (o1, eax); | ||
561 : | emit I.CDQ; | ||
562 : | mark (I.MULTDIV { multDivOp = I.IDIVL1, src = regOrMem o2 }, | ||
563 : | an); | ||
564 : | if overflow then trap() else (); | ||
565 : | app emit [I.CMPL { lsrc = edx, rsrc = I.Immed 0 }, | ||
566 : | I.JCC { cond = I.EQ, opnd = immedLabel l }, | ||
567 : | I.BINARY { binOp = I.XORL, | ||
568 : | src = regOrMem o2, | ||
569 : | dst = edx }, | ||
570 : | I.JCC { cond = I.GE, opnd = immedLabel l }, | ||
571 : | I.UNARY { unOp = I.DECL, opnd = eax }]; | ||
572 : | defineLabel l; | ||
573 : | move (eax, rdOpnd) | ||
574 : | end | ||
575 : | |||
576 : | (* analyze for power-of-two-ness *) | ||
577 : | fun analyze i' = let | ||
578 : | val i = toInt32 i' | ||
579 : | in | ||
580 : | let val (isneg, a, w) = | ||
581 : | if i >= 0 then (false, i, T.I.toWord32 (32, i')) | ||
582 : | else (true, ~i, T.I.toWord32 (32, T.I.NEG (32, i'))) | ||
583 : | fun log2 (0w1, p) = p | ||
584 : | | log2 (w, p) = log2 (W32.>> (w, 0w1), p + 1) | ||
585 : | in | ||
586 : | if w > 0w1 andalso W32.andb (w - 0w1, w) = 0w0 then | ||
587 : | (i, SOME (isneg, a, | ||
588 : | T.LI (T.I.fromInt32 (32, log2 (w, 0))))) | ||
589 : | else (i, NONE) | ||
590 : | end handle _ => (i, NONE) | ||
591 : | end | ||
592 : | |||
593 : | (* Division by a power of two when rounding to neginf is the | ||
594 : | * same as an arithmetic right shift. *) | ||
595 : | fun divinf (overflow, e1, e2 as T.LI n') = | ||
596 : | (case analyze n' of | ||
597 : | (_, NONE) => divinf0 (overflow, e1, e2) | ||
598 : | | (_, SOME (false, _, p)) => | ||
599 : | shift (I.SARL, T.REG (32, expr e1), p) | ||
600 : | | (_, SOME (true, _, p)) => let | ||
601 : | val reg = expr e1 | ||
602 : | in | ||
603 : | emit(I.UNARY { unOp = I.NEGL, opnd = I.Direct reg }); | ||
604 : | shift (I.SARL, T.REG (32, reg), p) | ||
605 : | end) | ||
606 : | | divinf (overflow, e1, e2) = divinf0 (overflow, e1, e2) | ||
607 : | |||
608 : | fun reminf0 (e1, e2) = let | ||
609 : | val o1 = operand e1 | ||
610 : | val o2 = operand e2 | ||
611 : | val l = Label.anon () | ||
612 : | in | ||
613 : | move (o1, eax); | ||
614 : | emit I.CDQ; | ||
615 : | mark (I.MULTDIV { multDivOp = I.IDIVL1, src = regOrMem o2 }, | ||
616 : | an); | ||
617 : | app emit [I.CMPL { lsrc = edx, rsrc = I.Immed 0 }, | ||
618 : | I.JCC { cond = I.EQ, opnd = immedLabel l }]; | ||
619 : | move (edx, eax); | ||
620 : | app emit [I.BINARY { binOp = I.XORL, | ||
621 : | src = regOrMem o2, dst = eax }, | ||
622 : | I.JCC { cond = I.GE, opnd = immedLabel l }, | ||
623 : | I.BINARY { binOp = I.ADDL, | ||
624 : | src = regOrMem o2, dst = edx }]; | ||
625 : | defineLabel l; | ||
626 : | move (edx, rdOpnd) | ||
627 : | end | ||
628 : | |||
629 : | (* n mod (power-of-2) corresponds to a bitmask (AND). | ||
630 : | * If the power is negative, then we must first negate | ||
631 : | * the argument and then again negate the result. *) | ||
632 : | fun reminf (e1, e2 as T.LI n') = | ||
633 : | (case analyze n' of | ||
634 : | (_, NONE) => reminf0 (e1, e2) | ||
635 : | | (_, SOME (false, a, _)) => | ||
636 : | binaryComm (I.ANDL, e1, | ||
637 : | T.LI (T.I.fromInt32 (32, a - 1))) | ||
638 : | | (_, SOME (true, a, _)) => let | ||
639 : | val r1 = expr e1 | ||
640 : | val o1 = I.Direct r1 | ||
641 : | in | ||
642 : | emit (I.UNARY { unOp = I.NEGL, opnd = o1 }); | ||
643 : | emit (I.BINARY { binOp = I.ANDL, | ||
644 : | src = I.Immed (a - 1), | ||
645 : | dst = o1 }); | ||
646 : | unary (I.NEGL, T.REG (32, r1)) | ||
647 : | end) | ||
648 : | | reminf (e1, e2) = reminf0 (e1, e2) | ||
649 : | |||
650 : | (* Optimize the special case for division *) | ||
651 : | fun divide (signed, overflow, e1, e2 as T.LI n') = | ||
652 : | (case analyze n' of | ||
653 : | (n, SOME (isneg, a, p)) => | ||
654 : | if signed then | ||
655 : | let val label = Label.anon () | ||
656 : | val reg1 = expr e1 | ||
657 : | val opnd1 = I.Direct reg1 | ||
658 : | in | ||
659 : | if isneg then | ||
660 : | emit (I.UNARY { unOp = I.NEGL, | ||
661 : | opnd = opnd1 }) | ||
662 : | else if setZeroBit e1 then () | ||
663 : | else emit (I.CMPL { lsrc = opnd1, | ||
664 : | rsrc = I.Immed 0 }); | ||
665 : | emit (I.JCC { cond = I.GE, | ||
666 : | opnd = immedLabel label }); | ||
667 : | emit (if a = 2 then | ||
668 : | I.UNARY { unOp = I.INCL, | ||
669 : | opnd = opnd1 } | ||
670 : | else | ||
671 : | I.BINARY { binOp = I.ADDL, | ||
672 : | src = I.Immed (a - 1), | ||
673 : | dst = opnd1 }); | ||
674 : | defineLabel label; | ||
675 : | shift (I.SARL, T.REG (32, reg1), p) | ||
676 : | end | ||
677 : | else shift (I.SHRL, e1, p) | ||
678 : | | (n, NONE) => | ||
679 : | divrem(signed, overflow andalso (n = ~1 orelse n = 0), | ||
680 : | e1, e2, eax)) | ||
681 : | | divide (signed, overflow, e1, e2) = | ||
682 : | divrem (signed, overflow, e1, e2, eax) | ||
683 : | |||
684 : | blume | 1183 | (* rem never causes overflow *) |
685 : | blume | 1185 | fun rem (signed, e1, e2 as T.LI n') = |
686 : | (case analyze n' of | ||
687 : | (n, SOME (isneg, a, _)) => | ||
688 : | if signed then | ||
689 : | (* The following logic should work uniformely | ||
690 : | * for both isneg and not isneg. It only uses | ||
691 : | * the absolute value (a) of the divisor. | ||
692 : | * Here is the formula: | ||
693 : | * let p be a power of two and a = abs(p): | ||
694 : | * | ||
695 : | * x % p = x - ((x < 0 ? x + a - 1 : x) & (-a)) | ||
696 : | * | ||
697 : | * (That's what GCC seems to do.) | ||
698 : | *) | ||
699 : | let val r1 = expr e1 | ||
700 : | val o1 = I.Direct r1 | ||
701 : | val rt = newReg () | ||
702 : | val tmp = I.Direct rt | ||
703 : | val l = Label.anon () | ||
704 : | in | ||
705 : | move (o1, tmp); | ||
706 : | if setZeroBit e1 then () | ||
707 : | else emit (I.CMPL { lsrc = o1, | ||
708 : | rsrc = I.Immed 0 }); | ||
709 : | emit (I.JCC { cond = I.GE, | ||
710 : | opnd = immedLabel l }); | ||
711 : | emit (I.BINARY { binOp = I.ADDL, | ||
712 : | src = I.Immed (a - 1), | ||
713 : | dst = tmp }); | ||
714 : | defineLabel l; | ||
715 : | emit (I.BINARY { binOp = I.ANDL, | ||
716 : | src = I.Immed (~a), | ||
717 : | dst = tmp }); | ||
718 : | leunga | 1296 | binary (I.SUBL, T.REG (32, r1), T.REG (32, rt)) |
719 : | blume | 1185 | end |
720 : | else | ||
721 : | if isneg then | ||
722 : | (* this is really strange... *) | ||
723 : | divrem (false, false, e1, e2, edx) | ||
724 : | else | ||
725 : | binaryComm (I.ANDL, e1, | ||
726 : | T.LI (T.I.fromInt32 (32, n - 1))) | ||
727 : | | (_, NONE) => divrem (signed, false, e1, e2, edx)) | ||
728 : | | rem(signed, e1, e2) = | ||
729 : | divrem(signed, false, e1, e2, edx) | ||
730 : | leunga | 815 | |
731 : | (* Makes sure the destination must be a register *) | ||
732 : | fun dstMustBeReg f = | ||
733 : | if isMemReg rd then | ||
734 : | let val tmpR = newReg() | ||
735 : | val tmp = I.Direct(tmpR) | ||
736 : | in f(tmpR, tmp); move(tmp, rdOpnd) end | ||
737 : | else f(rd, rdOpnd) | ||
738 : | |||
739 : | george | 545 | (* unsigned integer multiplication *) |
740 : | blume | 1185 | fun uMultiply0 (e1, e2) = |
741 : | george | 545 | (* note e2 can never be (I.Direct edx) *) |
742 : | (move(operand e1, eax); | ||
743 : | leunga | 815 | mark(I.MULTDIV{multDivOp=I.MULL1, |
744 : | george | 545 | src=regOrMem(operand e2)},an); |
745 : | move(eax, rdOpnd) | ||
746 : | ) | ||
747 : | |||
748 : | blume | 1185 | fun uMultiply (e1, e2 as T.LI n') = |
749 : | (case analyze n' of | ||
750 : | (_, SOME (false, _, p)) => shift (I.SHLL, e1, p) | ||
751 : | | _ => uMultiply0 (e1, e2)) | ||
752 : | | uMultiply (e1 as T.LI _, e2) = uMultiply (e2, e1) | ||
753 : | | uMultiply (e1, e2) = uMultiply0 (e1, e2) | ||
754 : | |||
755 : | george | 545 | (* signed integer multiplication: |
756 : | * The only forms that are allowed that also sets the | ||
757 : | * OF and CF flags are: | ||
758 : | * | ||
759 : | leunga | 815 | * (dst) (src1) (src2) |
760 : | george | 545 | * imul r32, r32/m32, imm8 |
761 : | leunga | 815 | * (dst) (src) |
762 : | george | 545 | * imul r32, imm8 |
763 : | * imul r32, imm32 | ||
764 : | leunga | 815 | * imul r32, r32/m32 |
765 : | * Note: destination must be a register! | ||
766 : | george | 545 | *) |
767 : | blume | 1185 | fun multiply (e1, e2) = |
768 : | leunga | 815 | dstMustBeReg(fn (rd, rdOpnd) => |
769 : | let fun doit(i1 as I.Immed _, i2 as I.Immed _) = | ||
770 : | (move(i1, rdOpnd); | ||
771 : | mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=i2},an)) | ||
772 : | | doit(rm, i2 as I.Immed _) = doit(i2, rm) | ||
773 : | | doit(imm as I.Immed(i), rm) = | ||
774 : | mark(I.MUL3{dst=rd, src1=rm, src2=i},an) | ||
775 : | | doit(r1 as I.Direct _, r2 as I.Direct _) = | ||
776 : | (move(r1, rdOpnd); | ||
777 : | mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=r2},an)) | ||
778 : | | doit(r1 as I.Direct _, rm) = | ||
779 : | (move(r1, rdOpnd); | ||
780 : | mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=rm},an)) | ||
781 : | | doit(rm, r as I.Direct _) = doit(r, rm) | ||
782 : | | doit(rm1, rm2) = | ||
783 : | george | 545 | if equalRd rm2 then |
784 : | let val tmpR = newReg() | ||
785 : | val tmp = I.Direct tmpR | ||
786 : | in move(rm1, tmp); | ||
787 : | leunga | 815 | mark(I.BINARY{binOp=I.IMULL, dst=tmp, src=rm2},an); |
788 : | move(tmp, rdOpnd) | ||
789 : | george | 545 | end |
790 : | else | ||
791 : | leunga | 815 | (move(rm1, rdOpnd); |
792 : | mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=rm2},an) | ||
793 : | george | 545 | ) |
794 : | val (opnd1, opnd2) = (operand e1, operand e2) | ||
795 : | leunga | 815 | in doit(opnd1, opnd2) |
796 : | george | 545 | end |
797 : | leunga | 815 | ) |
798 : | monnier | 247 | |
799 : | blume | 1185 | fun multiply_notrap (e1, e2 as T.LI n') = |
800 : | (case analyze n' of | ||
801 : | (_, SOME (isneg, _, p)) => let | ||
802 : | val r1 = expr e1 | ||
803 : | val o1 = I.Direct r1 | ||
804 : | in | ||
805 : | if isneg then | ||
806 : | emit (I.UNARY { unOp = I.NEGL, opnd = o1 }) | ||
807 : | else (); | ||
808 : | shift (I.SHLL, T.REG (32, r1), p) | ||
809 : | end | ||
810 : | | _ => multiply (e1, e2)) | ||
811 : | | multiply_notrap (e1 as T.LI _, e2) = multiply_notrap (e2, e1) | ||
812 : | | multiply_notrap (e1, e2) = multiply (e1, e2) | ||
813 : | |||
814 : | george | 545 | (* Emit a load instruction; makes sure that the destination |
815 : | * is a register | ||
816 : | *) | ||
817 : | fun genLoad(mvOp, ea, mem) = | ||
818 : | dstMustBeReg(fn (_, dst) => | ||
819 : | mark(I.MOVE{mvOp=mvOp, src=address(ea, mem), dst=dst},an)) | ||
820 : | |||
821 : | (* Generate a zero extended loads *) | ||
822 : | fun load8(ea, mem) = genLoad(I.MOVZBL, ea, mem) | ||
823 : | fun load16(ea, mem) = genLoad(I.MOVZWL, ea, mem) | ||
824 : | fun load8s(ea, mem) = genLoad(I.MOVSBL, ea, mem) | ||
825 : | fun load16s(ea, mem) = genLoad(I.MOVSWL, ea, mem) | ||
826 : | fun load32(ea, mem) = genLoad(I.MOVL, ea, mem) | ||
827 : | |||
828 : | (* Generate a sign extended loads *) | ||
829 : | |||
830 : | (* Generate setcc instruction: | ||
831 : | * semantics: MV(rd, COND(_, T.CMP(ty, cc, t1, t2), yes, no)) | ||
832 : | leunga | 583 | * Bug, if eax is either t1 or t2 then problem will occur!!! |
833 : | * Note that we have to use eax as the destination of the | ||
834 : | * setcc because it only works on the registers | ||
835 : | * %al, %bl, %cl, %dl and %[abcd]h. The last four registers | ||
836 : | * are inaccessible in 32 bit mode. | ||
837 : | george | 545 | *) |
838 : | fun setcc(ty, cc, t1, t2, yes, no) = | ||
839 : | leunga | 583 | let val (cc, yes, no) = |
840 : | if yes > no then (cc, yes, no) | ||
841 : | else (T.Basis.negateCond cc, no, yes) | ||
842 : | george | 545 | in (* Clear the destination first. |
843 : | * This this because stupid SETcc | ||
844 : | * only writes to the low order | ||
845 : | * byte. That's Intel architecture, folks. | ||
846 : | *) | ||
847 : | leunga | 695 | case (yes, no, cc) of |
848 : | (1, 0, T.LT) => | ||
849 : | let val tmp = I.Direct(expr(T.SUB(32,t1,t2))) | ||
850 : | in move(tmp, rdOpnd); | ||
851 : | emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd}) | ||
852 : | end | ||
853 : | | (1, 0, T.GT) => | ||
854 : | let val tmp = I.Direct(expr(T.SUB(32,t1,t2))) | ||
855 : | in emit(I.UNARY{unOp=I.NOTL,opnd=tmp}); | ||
856 : | move(tmp, rdOpnd); | ||
857 : | emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd}) | ||
858 : | end | ||
859 : | | (1, 0, _) => (* normal case *) | ||
860 : | george | 545 | let val cc = cmp(true, ty, cc, t1, t2, []) |
861 : | leunga | 583 | in mark(I.SET{cond=cond cc, opnd=eax}, an); |
862 : | leunga | 695 | emit(I.BINARY{binOp=I.ANDL,src=I.Immed 255, dst=eax}); |
863 : | leunga | 583 | move(eax, rdOpnd) |
864 : | end | ||
865 : | leunga | 695 | | (C1, C2, _) => |
866 : | george | 545 | (* general case; |
867 : | leunga | 583 | * from the Intel optimization guide p3-5 |
868 : | *) | ||
869 : | leunga | 695 | let val _ = zero eax; |
870 : | val cc = cmp(true, ty, cc, t1, t2, []) | ||
871 : | leunga | 583 | in case C1-C2 of |
872 : | D as (1 | 2 | 3 | 4 | 5 | 8 | 9) => | ||
873 : | let val (base,scale) = | ||
874 : | case D of | ||
875 : | 1 => (NONE, 0) | ||
876 : | | 2 => (NONE, 1) | ||
877 : | | 3 => (SOME C.eax, 1) | ||
878 : | | 4 => (NONE, 2) | ||
879 : | | 5 => (SOME C.eax, 2) | ||
880 : | | 8 => (NONE, 3) | ||
881 : | | 9 => (SOME C.eax, 3) | ||
882 : | val addr = I.Indexed{base=base, | ||
883 : | index=C.eax, | ||
884 : | scale=scale, | ||
885 : | disp=I.Immed C2, | ||
886 : | george | 545 | mem=readonly} |
887 : | leunga | 583 | val tmpR = newReg() |
888 : | val tmp = I.Direct tmpR | ||
889 : | in emit(I.SET{cond=cond cc, opnd=eax}); | ||
890 : | mark(I.LEA{r32=tmpR, addr=addr}, an); | ||
891 : | move(tmp, rdOpnd) | ||
892 : | end | ||
893 : | | D => | ||
894 : | (emit(I.SET{cond=cond(T.Basis.negateCond cc), | ||
895 : | opnd=eax}); | ||
896 : | emit(I.UNARY{unOp=I.DECL, opnd=eax}); | ||
897 : | emit(I.BINARY{binOp=I.ANDL, | ||
898 : | src=I.Immed D, dst=eax}); | ||
899 : | if C2 = 0 then | ||
900 : | move(eax, rdOpnd) | ||
901 : | else | ||
902 : | let val tmpR = newReg() | ||
903 : | val tmp = I.Direct tmpR | ||
904 : | in mark(I.LEA{addr= | ||
905 : | I.Displace{ | ||
906 : | base=C.eax, | ||
907 : | disp=I.Immed C2, | ||
908 : | mem=readonly}, | ||
909 : | r32=tmpR}, an); | ||
910 : | move(tmp, rdOpnd) | ||
911 : | end | ||
912 : | ) | ||
913 : | end | ||
914 : | george | 545 | end (* setcc *) |
915 : | |||
916 : | (* Generate cmovcc instruction. | ||
917 : | * on Pentium Pro and Pentium II only | ||
918 : | *) | ||
919 : | fun cmovcc(ty, cc, t1, t2, yes, no) = | ||
920 : | let fun genCmov(dstR, _) = | ||
921 : | let val _ = doExpr(no, dstR, []) (* false branch *) | ||
922 : | val cc = cmp(true, ty, cc, t1, t2, []) (* compare *) | ||
923 : | leunga | 1127 | in mark(I.CMOV{cond=cond cc, src=regOrMem(operand yes), |
924 : | dst=dstR}, an) | ||
925 : | george | 545 | end |
926 : | in dstMustBeReg genCmov | ||
927 : | end | ||
928 : | |||
929 : | fun unknownExp exp = doExpr(Gen.compileRexp exp, rd, an) | ||
930 : | monnier | 247 | |
931 : | leunga | 606 | (* Add n to rd *) |
932 : | fun addN n = | ||
933 : | let val n = operand n | ||
934 : | val src = if isMemReg rd then immedOrReg n else n | ||
935 : | in mark(I.BINARY{binOp=I.ADDL, src=src, dst=rdOpnd}, an) end | ||
936 : | |||
937 : | george | 545 | (* Generate addition *) |
938 : | fun addition(e1, e2) = | ||
939 : | leunga | 606 | case e1 of |
940 : | george | 889 | T.REG(_,rs) => if CB.sameColor(rs,rd) then addN e2 |
941 : | leunga | 744 | else addition1(e1,e2) |
942 : | leunga | 606 | | _ => addition1(e1,e2) |
943 : | and addition1(e1, e2) = | ||
944 : | case e2 of | ||
945 : | george | 889 | T.REG(_,rs) => if CB.sameColor(rs,rd) then addN e1 |
946 : | leunga | 744 | else addition2(e1,e2) |
947 : | leunga | 606 | | _ => addition2(e1,e2) |
948 : | and addition2(e1,e2) = | ||
949 : | george | 545 | (dstMustBeReg(fn (dstR, _) => |
950 : | mark(I.LEA{r32=dstR, addr=address(exp, readonly)}, an)) | ||
951 : | handle EA => binaryComm(I.ADDL, e1, e2)) | ||
952 : | monnier | 247 | |
953 : | |||
954 : | george | 545 | in case exp of |
955 : | T.REG(_,rs) => | ||
956 : | if isMemReg rs andalso isMemReg rd then | ||
957 : | let val tmp = I.Direct(newReg()) | ||
958 : | leunga | 731 | in move'(I.MemReg rs, tmp, an); |
959 : | george | 545 | move'(tmp, rdOpnd, []) |
960 : | end | ||
961 : | else move'(IntReg rs, rdOpnd, an) | ||
962 : | george | 761 | | T.LI z => let |
963 : | val n = toInt32 z | ||
964 : | in | ||
965 : | if n=0 then | ||
966 : | (* As per Fermin's request, special optimization for rd := 0. | ||
967 : | * Currently we don't bother with the size. | ||
968 : | *) | ||
969 : | if isMemReg rd then move'(I.Immed 0, rdOpnd, an) | ||
970 : | else mark(I.BINARY{binOp=I.XORL, src=rdOpnd, dst=rdOpnd}, an) | ||
971 : | else | ||
972 : | move'(I.Immed(n), rdOpnd, an) | ||
973 : | end | ||
974 : | leunga | 775 | | (T.CONST _ | T.LABEL _) => |
975 : | move'(I.ImmedLabel exp, rdOpnd, an) | ||
976 : | | T.LABEXP le => move'(I.ImmedLabel le, rdOpnd, an) | ||
977 : | monnier | 247 | |
978 : | george | 545 | (* 32-bit addition *) |
979 : | george | 761 | | T.ADD(32, e1, e2 as T.LI n) => let |
980 : | val n = toInt32 n | ||
981 : | in | ||
982 : | case n | ||
983 : | of 1 => unary(I.INCL, e1) | ||
984 : | | ~1 => unary(I.DECL, e1) | ||
985 : | | _ => addition(e1, e2) | ||
986 : | end | ||
987 : | | T.ADD(32, e1 as T.LI n, e2) => let | ||
988 : | val n = toInt32 n | ||
989 : | in | ||
990 : | case n | ||
991 : | of 1 => unary(I.INCL, e2) | ||
992 : | | ~1 => unary(I.DECL, e2) | ||
993 : | | _ => addition(e1, e2) | ||
994 : | end | ||
995 : | george | 545 | | T.ADD(32, e1, e2) => addition(e1, e2) |
996 : | monnier | 247 | |
997 : | leunga | 695 | (* 32-bit addition but set the flag! |
998 : | * This is a stupid hack for now. | ||
999 : | *) | ||
1000 : | george | 761 | | T.ADD(0, e, e1 as T.LI n) => let |
1001 : | val n = T.I.toInt(32, n) | ||
1002 : | in | ||
1003 : | if n=1 then unary(I.INCL, e) | ||
1004 : | else if n = ~1 then unary(I.DECL, e) | ||
1005 : | else binaryComm(I.ADDL, e, e1) | ||
1006 : | end | ||
1007 : | | T.ADD(0, e1 as T.LI n, e) => let | ||
1008 : | val n = T.I.toInt(32, n) | ||
1009 : | in | ||
1010 : | if n=1 then unary(I.INCL, e) | ||
1011 : | else if n = ~1 then unary(I.DECL, e) | ||
1012 : | else binaryComm(I.ADDL, e1, e) | ||
1013 : | end | ||
1014 : | | T.ADD(0, e1, e2) => binaryComm(I.ADDL, e1, e2) | ||
1015 : | |||
1016 : | george | 545 | (* 32-bit subtraction *) |
1017 : | george | 761 | | T.SUB(32, e1, e2 as T.LI n) => let |
1018 : | val n = toInt32 n | ||
1019 : | in | ||
1020 : | case n | ||
1021 : | of 0 => doExpr(e1, rd, an) | ||
1022 : | | 1 => unary(I.DECL, e1) | ||
1023 : | | ~1 => unary(I.INCL, e1) | ||
1024 : | | _ => binary(I.SUBL, e1, e2) | ||
1025 : | end | ||
1026 : | | T.SUB(32, e1 as T.LI n, e2) => | ||
1027 : | if T.I.isZero n then unary(I.NEGL, e2) | ||
1028 : | else binary(I.SUBL, e1, e2) | ||
1029 : | george | 545 | | T.SUB(32, e1, e2) => binary(I.SUBL, e1, e2) |
1030 : | monnier | 247 | |
1031 : | george | 545 | | T.MULU(32, x, y) => uMultiply(x, y) |
1032 : | | T.DIVU(32, x, y) => divide(false, false, x, y) | ||
1033 : | blume | 1183 | | T.REMU(32, x, y) => rem(false, x, y) |
1034 : | monnier | 247 | |
1035 : | blume | 1185 | | T.MULS(32, x, y) => multiply_notrap (x, y) |
1036 : | blume | 1181 | | T.DIVS(T.DIV_TO_ZERO, 32, x, y) => divide(true, false, x, y) |
1037 : | blume | 1185 | | T.DIVS(T.DIV_TO_NEGINF, 32, x, y) => divinf (false, x, y) |
1038 : | blume | 1183 | | T.REMS(T.DIV_TO_ZERO, 32, x, y) => rem(true, x, y) |
1039 : | blume | 1185 | | T.REMS(T.DIV_TO_NEGINF, 32, x, y) => reminf (x, y) |
1040 : | monnier | 247 | |
1041 : | george | 545 | | T.ADDT(32, x, y) => (binaryComm(I.ADDL, x, y); trap()) |
1042 : | | T.SUBT(32, x, y) => (binary(I.SUBL, x, y); trap()) | ||
1043 : | blume | 1185 | | T.MULT(32, x, y) => (multiply (x, y); trap ()) |
1044 : | blume | 1181 | | T.DIVT(T.DIV_TO_ZERO, 32, x, y) => divide(true, true, x, y) |
1045 : | blume | 1185 | | T.DIVT(T.DIV_TO_NEGINF, 32, x, y) => divinf (true, x, y) |
1046 : | monnier | 247 | |
1047 : | george | 545 | | T.ANDB(32, x, y) => binaryComm(I.ANDL, x, y) |
1048 : | | T.ORB(32, x, y) => binaryComm(I.ORL, x, y) | ||
1049 : | | T.XORB(32, x, y) => binaryComm(I.XORL, x, y) | ||
1050 : | | T.NOTB(32, x) => unary(I.NOTL, x) | ||
1051 : | monnier | 247 | |
1052 : | george | 545 | | T.SRA(32, x, y) => shift(I.SARL, x, y) |
1053 : | | T.SRL(32, x, y) => shift(I.SHRL, x, y) | ||
1054 : | | T.SLL(32, x, y) => shift(I.SHLL, x, y) | ||
1055 : | monnier | 247 | |
1056 : | george | 545 | | T.LOAD(8, ea, mem) => load8(ea, mem) |
1057 : | | T.LOAD(16, ea, mem) => load16(ea, mem) | ||
1058 : | | T.LOAD(32, ea, mem) => load32(ea, mem) | ||
1059 : | monnier | 498 | |
1060 : | leunga | 776 | | T.SX(32,8,T.LOAD(8,ea,mem)) => load8s(ea, mem) |
1061 : | | T.SX(32,16,T.LOAD(16,ea,mem)) => load16s(ea, mem) | ||
1062 : | | T.ZX(32,8,T.LOAD(8,ea,mem)) => load8(ea, mem) | ||
1063 : | leunga | 779 | | T.ZX(32,16,T.LOAD(16,ea,mem)) => load16(ea, mem) |
1064 : | leunga | 776 | |
1065 : | leunga | 1127 | | T.COND(32, T.CMP(ty, cc, t1, t2), y as T.LI yes, n as T.LI no) => |
1066 : | (case !arch of (* PentiumPro and higher has CMOVcc *) | ||
1067 : | Pentium => setcc(ty, cc, t1, t2, toInt32 yes, toInt32 no) | ||
1068 : | | _ => cmovcc(ty, cc, t1, t2, y, n) | ||
1069 : | ) | ||
1070 : | george | 545 | | T.COND(32, T.CMP(ty, cc, t1, t2), yes, no) => |
1071 : | (case !arch of (* PentiumPro and higher has CMOVcc *) | ||
1072 : | Pentium => unknownExp exp | ||
1073 : | | _ => cmovcc(ty, cc, t1, t2, yes, no) | ||
1074 : | ) | ||
1075 : | | T.LET(s,e) => (doStmt s; doExpr(e, rd, an)) | ||
1076 : | | T.MARK(e, A.MARKREG f) => (f rd; doExpr(e, rd, an)) | ||
1077 : | | T.MARK(e, a) => doExpr(e, rd, a::an) | ||
1078 : | | T.PRED(e,c) => doExpr(e, rd, A.CTRLUSE c::an) | ||
1079 : | george | 555 | | T.REXT e => |
1080 : | ExtensionComp.compileRext (reducer()) {e=e, rd=rd, an=an} | ||
1081 : | george | 545 | (* simplify and try again *) |
1082 : | | exp => unknownExp exp | ||
1083 : | end (* doExpr *) | ||
1084 : | monnier | 247 | |
1085 : | george | 545 | (* generate an expression and return its result register |
1086 : | * If rewritePseudo is on, the result is guaranteed to be in a | ||
1087 : | * non memReg register | ||
1088 : | *) | ||
1089 : | and expr(exp as T.REG(_, rd)) = | ||
1090 : | if isMemReg rd then genExpr exp else rd | ||
1091 : | | expr exp = genExpr exp | ||
1092 : | monnier | 247 | |
1093 : | george | 545 | and genExpr exp = |
1094 : | let val rd = newReg() in doExpr(exp, rd, []); rd end | ||
1095 : | monnier | 247 | |
1096 : | george | 545 | (* Compare an expression with zero. |
1097 : | * On the x86, TEST is superior to AND for doing the same thing, | ||
1098 : | * since it doesn't need to write out the result in a register. | ||
1099 : | *) | ||
1100 : | leunga | 695 | and cmpWithZero(cc as (T.EQ | T.NE), e as T.ANDB(ty, a, b), an) = |
1101 : | george | 545 | (case ty of |
1102 : | leunga | 695 | 8 => test(I.TESTB, a, b, an) |
1103 : | | 16 => test(I.TESTW, a, b, an) | ||
1104 : | | 32 => test(I.TESTL, a, b, an) | ||
1105 : | | _ => doExpr(e, newReg(), an); | ||
1106 : | cc) | ||
1107 : | | cmpWithZero(cc, e, an) = | ||
1108 : | let val e = | ||
1109 : | case e of (* hack to disable the lea optimization XXX *) | ||
1110 : | T.ADD(_, a, b) => T.ADD(0, a, b) | ||
1111 : | | e => e | ||
1112 : | in doExpr(e, newReg(), an); cc end | ||
1113 : | monnier | 247 | |
1114 : | george | 545 | (* Emit a test. |
1115 : | * The available modes are | ||
1116 : | * r/m, r | ||
1117 : | * r/m, imm | ||
1118 : | * On selecting the right instruction: TESTL/TESTW/TESTB. | ||
1119 : | * When anding an operand with a constant | ||
1120 : | * that fits within 8 (or 16) bits, it is possible to use TESTB, | ||
1121 : | * (or TESTW) instead of TESTL. Because x86 is little endian, | ||
1122 : | * this works for memory operands too. However, with TESTB, it is | ||
1123 : | * not possible to use registers other than | ||
1124 : | * AL, CL, BL, DL, and AH, CH, BH, DH. So, the best way is to | ||
1125 : | * perform register allocation first, and if the operand registers | ||
1126 : | * are one of EAX, ECX, EBX, or EDX, replace the TESTL instruction | ||
1127 : | * by TESTB. | ||
1128 : | *) | ||
1129 : | leunga | 695 | and test(testopcode, a, b, an) = |
1130 : | george | 545 | let val (_, opnd1, opnd2) = commuteComparison(T.EQ, true, a, b) |
1131 : | (* translate r, r/m => r/m, r *) | ||
1132 : | val (opnd1, opnd2) = | ||
1133 : | if isMemOpnd opnd2 then (opnd2, opnd1) else (opnd1, opnd2) | ||
1134 : | leunga | 695 | in mark(testopcode{lsrc=opnd1, rsrc=opnd2}, an) |
1135 : | george | 545 | end |
1136 : | monnier | 247 | |
1137 : | leunga | 815 | (* %eflags <- src *) |
1138 : | and moveToEflags src = | ||
1139 : | george | 889 | if CB.sameColor(src, C.eflags) then () |
1140 : | leunga | 815 | else (move(I.Direct src, eax); emit(I.LAHF)) |
1141 : | |||
1142 : | (* dst <- %eflags *) | ||
1143 : | and moveFromEflags dst = | ||
1144 : | george | 889 | if CB.sameColor(dst, C.eflags) then () |
1145 : | leunga | 815 | else (emit(I.SAHF); move(eax, I.Direct dst)) |
1146 : | |||
1147 : | george | 545 | (* generate a condition code expression |
1148 : | leunga | 744 | * The zero is for setting the condition code! |
1149 : | * I have no idea why this is used. | ||
1150 : | *) | ||
1151 : | and doCCexpr(T.CMP(ty, cc, t1, t2), rd, an) = | ||
1152 : | leunga | 815 | (cmp(false, ty, cc, t1, t2, an); |
1153 : | moveFromEflags rd | ||
1154 : | ) | ||
1155 : | | doCCexpr(T.CC(cond,rs), rd, an) = | ||
1156 : | george | 889 | if CB.sameColor(rs,C.eflags) orelse CB.sameColor(rd,C.eflags) then |
1157 : | leunga | 815 | (moveToEflags rs; moveFromEflags rd) |
1158 : | leunga | 744 | else |
1159 : | leunga | 815 | move'(I.Direct rs, I.Direct rd, an) |
1160 : | george | 545 | | doCCexpr(T.CCMARK(e,A.MARKREG f),rd,an) = (f rd; doCCexpr(e,rd,an)) |
1161 : | | doCCexpr(T.CCMARK(e,a), rd, an) = doCCexpr(e,rd,a::an) | ||
1162 : | | doCCexpr(T.CCEXT e, cd, an) = | ||
1163 : | george | 555 | ExtensionComp.compileCCext (reducer()) {e=e, ccd=cd, an=an} |
1164 : | george | 545 | | doCCexpr _ = error "doCCexpr" |
1165 : | monnier | 247 | |
1166 : | george | 545 | and ccExpr e = error "ccExpr" |
1167 : | monnier | 247 | |
1168 : | george | 545 | (* generate a comparison and sets the condition code; |
1169 : | * return the actual cc used. If the flag swapable is true, | ||
1170 : | * we can also reorder the operands. | ||
1171 : | *) | ||
1172 : | and cmp(swapable, ty, cc, t1, t2, an) = | ||
1173 : | leunga | 695 | (* == and <> can be always be reordered *) |
1174 : | let val swapable = swapable orelse cc = T.EQ orelse cc = T.NE | ||
1175 : | in (* Sometimes the comparison is not necessary because | ||
1176 : | * the bits are already set! | ||
1177 : | *) | ||
1178 : | if isZero t1 andalso setZeroBit2 t2 then | ||
1179 : | if swapable then | ||
1180 : | cmpWithZero(T.Basis.swapCond cc, t2, an) | ||
1181 : | else (* can't reorder the comparison! *) | ||
1182 : | genCmp(ty, false, cc, t1, t2, an) | ||
1183 : | else if isZero t2 andalso setZeroBit2 t1 then | ||
1184 : | cmpWithZero(cc, t1, an) | ||
1185 : | else genCmp(ty, swapable, cc, t1, t2, an) | ||
1186 : | end | ||
1187 : | monnier | 247 | |
1188 : | george | 545 | (* Give a and b which are the operands to a comparison (or test) |
1189 : | * Return the appropriate condition code and operands. | ||
1190 : | * The available modes are: | ||
1191 : | * r/m, imm | ||
1192 : | * r/m, r | ||
1193 : | * r, r/m | ||
1194 : | *) | ||
1195 : | and commuteComparison(cc, swapable, a, b) = | ||
1196 : | let val (opnd1, opnd2) = (operand a, operand b) | ||
1197 : | in (* Try to fold in the operands whenever possible *) | ||
1198 : | case (isImmediate opnd1, isImmediate opnd2) of | ||
1199 : | (true, true) => (cc, moveToReg opnd1, opnd2) | ||
1200 : | | (true, false) => | ||
1201 : | if swapable then (T.Basis.swapCond cc, opnd2, opnd1) | ||
1202 : | else (cc, moveToReg opnd1, opnd2) | ||
1203 : | | (false, true) => (cc, opnd1, opnd2) | ||
1204 : | | (false, false) => | ||
1205 : | (case (opnd1, opnd2) of | ||
1206 : | (_, I.Direct _) => (cc, opnd1, opnd2) | ||
1207 : | | (I.Direct _, _) => (cc, opnd1, opnd2) | ||
1208 : | | (_, _) => (cc, moveToReg opnd1, opnd2) | ||
1209 : | ) | ||
1210 : | end | ||
1211 : | |||
1212 : | (* generate a real comparison; return the real cc used *) | ||
1213 : | and genCmp(ty, swapable, cc, a, b, an) = | ||
1214 : | let val (cc, opnd1, opnd2) = commuteComparison(cc, swapable, a, b) | ||
1215 : | in mark(I.CMPL{lsrc=opnd1, rsrc=opnd2}, an); cc | ||
1216 : | end | ||
1217 : | monnier | 247 | |
1218 : | george | 545 | (* generate code for jumps *) |
1219 : | leunga | 775 | and jmp(lexp as T.LABEL lab, labs, an) = |
1220 : | george | 545 | mark(I.JMP(I.ImmedLabel lexp, [lab]), an) |
1221 : | leunga | 775 | | jmp(T.LABEXP le, labs, an) = mark(I.JMP(I.ImmedLabel le, labs), an) |
1222 : | | jmp(ea, labs, an) = mark(I.JMP(operand ea, labs), an) | ||
1223 : | george | 545 | |
1224 : | (* convert mlrisc to cellset: | ||
1225 : | *) | ||
1226 : | and cellset mlrisc = | ||
1227 : | jhr | 900 | let val addCCReg = CB.CellSet.add |
1228 : | george | 545 | fun g([],acc) = acc |
1229 : | | g(T.GPR(T.REG(_,r))::regs,acc) = g(regs,C.addReg(r,acc)) | ||
1230 : | | g(T.FPR(T.FREG(_,f))::regs,acc) = g(regs,C.addFreg(f,acc)) | ||
1231 : | | g(T.CCR(T.CC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc)) | ||
1232 : | | g(T.CCR(T.FCC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc)) | ||
1233 : | | g(_::regs, acc) = g(regs, acc) | ||
1234 : | in g(mlrisc, C.empty) end | ||
1235 : | |||
1236 : | (* generate code for calls *) | ||
1237 : | blume | 839 | and call(ea, flow, def, use, mem, cutsTo, an, pops) = |
1238 : | leunga | 815 | let fun return(set, []) = set |
1239 : | | return(set, a::an) = | ||
1240 : | case #peek A.RETURN_ARG a of | ||
1241 : | jhr | 900 | SOME r => return(CB.CellSet.add(r, set), an) |
1242 : | leunga | 815 | | NONE => return(set, an) |
1243 : | blume | 839 | in |
1244 : | mark(I.CALL{opnd=operand ea,defs=cellset(def),uses=cellset(use), | ||
1245 : | return=return(C.empty,an),cutsTo=cutsTo,mem=mem, | ||
1246 : | pops=pops},an) | ||
1247 : | leunga | 815 | end |
1248 : | george | 545 | |
1249 : | leunga | 815 | (* generate code for integer stores; first move data to %eax |
1250 : | * This is mainly because we can't allocate to registers like | ||
1251 : | * ah, dl, dx etc. | ||
1252 : | *) | ||
1253 : | and genStore(mvOp, ea, d, mem, an) = | ||
1254 : | let val src = | ||
1255 : | george | 545 | case immedOrReg(operand d) of |
1256 : | src as I.Direct r => | ||
1257 : | george | 889 | if CB.sameColor(r,C.eax) |
1258 : | leunga | 744 | then src else (move(src, eax); eax) |
1259 : | george | 545 | | src => src |
1260 : | leunga | 815 | in mark(I.MOVE{mvOp=mvOp, src=src, dst=address(ea,mem)},an) |
1261 : | george | 545 | end |
1262 : | leunga | 815 | |
1263 : | (* generate code for 8-bit integer stores *) | ||
1264 : | (* movb has to use %eax as source. Stupid x86! *) | ||
1265 : | and store8(ea, d, mem, an) = genStore(I.MOVB, ea, d, mem, an) | ||
1266 : | blume | 818 | and store16(ea, d, mem, an) = |
1267 : | mark(I.MOVE{mvOp=I.MOVW, src=immedOrReg(operand d), dst=address(ea, mem)}, an) | ||
1268 : | george | 545 | and store32(ea, d, mem, an) = |
1269 : | move'(immedOrReg(operand d), address(ea, mem), an) | ||
1270 : | |||
1271 : | (* generate code for branching *) | ||
1272 : | and branch(T.CMP(ty, cc, t1, t2), lab, an) = | ||
1273 : | (* allow reordering of operands *) | ||
1274 : | let val cc = cmp(true, ty, cc, t1, t2, []) | ||
1275 : | in mark(I.JCC{cond=cond cc, opnd=immedLabel lab}, an) end | ||
1276 : | | branch(T.FCMP(fty, fcc, t1, t2), lab, an) = | ||
1277 : | fbranch(fty, fcc, t1, t2, lab, an) | ||
1278 : | | branch(ccexp, lab, an) = | ||
1279 : | leunga | 744 | (doCCexpr(ccexp, C.eflags, []); |
1280 : | george | 545 | mark(I.JCC{cond=cond(Gen.condOf ccexp), opnd=immedLabel lab}, an) |
1281 : | ) | ||
1282 : | |||
1283 : | (* generate code for floating point compare and branch *) | ||
1284 : | and fbranch(fty, fcc, t1, t2, lab, an) = | ||
1285 : | leunga | 1156 | let fun j cc = mark(I.JCC{cond=cc, opnd=immedLabel lab},an) |
1286 : | in fbranching(fty, fcc, t1, t2, j) | ||
1287 : | end | ||
1288 : | |||
1289 : | and fbranching(fty, fcc, t1, t2, j) = | ||
1290 : | leunga | 731 | let fun ignoreOrder (T.FREG _) = true |
1291 : | | ignoreOrder (T.FLOAD _) = true | ||
1292 : | | ignoreOrder (T.FMARK(e,_)) = ignoreOrder e | ||
1293 : | | ignoreOrder _ = false | ||
1294 : | |||
1295 : | fun compare'() = (* Sethi-Ullman style *) | ||
1296 : | (if ignoreOrder t1 orelse ignoreOrder t2 then | ||
1297 : | (reduceFexp(fty, t2, []); reduceFexp(fty, t1, [])) | ||
1298 : | else (reduceFexp(fty, t1, []); reduceFexp(fty, t2, []); | ||
1299 : | emit(I.FXCH{opnd=C.ST(1)})); | ||
1300 : | emit(I.FUCOMPP); | ||
1301 : | fcc | ||
1302 : | ) | ||
1303 : | |||
1304 : | fun compare''() = | ||
1305 : | (* direct style *) | ||
1306 : | (* Try to make lsrc the memory operand *) | ||
1307 : | let val lsrc = foperand(fty, t1) | ||
1308 : | val rsrc = foperand(fty, t2) | ||
1309 : | val fsize = fsize fty | ||
1310 : | fun cmp(lsrc, rsrc, fcc) = | ||
1311 : | leunga | 1156 | let val i = !arch <> Pentium |
1312 : | in emit(I.FCMP{i=i,fsize=fsize,lsrc=lsrc,rsrc=rsrc}); | ||
1313 : | fcc | ||
1314 : | end | ||
1315 : | leunga | 731 | in case (lsrc, rsrc) of |
1316 : | (I.FPR _, I.FPR _) => cmp(lsrc, rsrc, fcc) | ||
1317 : | | (I.FPR _, mem) => cmp(mem,lsrc,T.Basis.swapFcond fcc) | ||
1318 : | | (mem, I.FPR _) => cmp(lsrc, rsrc, fcc) | ||
1319 : | | (lsrc, rsrc) => (* can't be both memory! *) | ||
1320 : | let val ftmpR = newFreg() | ||
1321 : | val ftmp = I.FPR ftmpR | ||
1322 : | in emit(I.FMOVE{fsize=fsize,src=rsrc,dst=ftmp}); | ||
1323 : | cmp(lsrc, ftmp, fcc) | ||
1324 : | end | ||
1325 : | end | ||
1326 : | |||
1327 : | fun compare() = | ||
1328 : | if enableFastFPMode andalso !fast_floating_point | ||
1329 : | then compare''() else compare'() | ||
1330 : | |||
1331 : | george | 545 | fun andil i = emit(I.BINARY{binOp=I.ANDL,src=I.Immed(i),dst=eax}) |
1332 : | leunga | 585 | fun testil i = emit(I.TESTL{lsrc=eax,rsrc=I.Immed(i)}) |
1333 : | george | 545 | fun xoril i = emit(I.BINARY{binOp=I.XORL,src=I.Immed(i),dst=eax}) |
1334 : | fun cmpil i = emit(I.CMPL{rsrc=I.Immed(i), lsrc=eax}) | ||
1335 : | fun sahf() = emit(I.SAHF) | ||
1336 : | leunga | 731 | fun branch(fcc) = |
1337 : | george | 545 | case fcc |
1338 : | leunga | 1156 | of T.== => (andil 0x4400; xoril 0x4000; j(I.EQ)) |
1339 : | | T.?<> => (andil 0x4400; xoril 0x4000; j(I.NE)) | ||
1340 : | | T.? => (sahf(); j(I.P)) | ||
1341 : | | T.<=> => (sahf(); j(I.NP)) | ||
1342 : | | T.> => (testil 0x4500; j(I.EQ)) | ||
1343 : | | T.?<= => (testil 0x4500; j(I.NE)) | ||
1344 : | | T.>= => (testil 0x500; j(I.EQ)) | ||
1345 : | | T.?< => (testil 0x500; j(I.NE)) | ||
1346 : | | T.< => (andil 0x4500; cmpil 0x100; j(I.EQ)) | ||
1347 : | | T.?>= => (andil 0x4500; cmpil 0x100; j(I.NE)) | ||
1348 : | | T.<= => (andil 0x4100; cmpil 0x100; j(I.EQ); | ||
1349 : | cmpil 0x4000; j(I.EQ)) | ||
1350 : | | T.?> => (sahf(); j(I.P); testil 0x4100; j(I.EQ)) | ||
1351 : | | T.<> => (testil 0x4400; j(I.EQ)) | ||
1352 : | | T.?= => (testil 0x4400; j(I.NE)) | ||
1353 : | jhr | 1119 | | _ => error(concat[ |
1354 : | "fbranch(", T.Basis.fcondToString fcc, ")" | ||
1355 : | ]) | ||
1356 : | george | 545 | (*esac*) |
1357 : | leunga | 1156 | |
1358 : | (* | ||
1359 : | * P Z C | ||
1360 : | * x < y 0 0 1 | ||
1361 : | * x > y 0 0 0 | ||
1362 : | * x = y 0 1 0 | ||
1363 : | * unordered 1 1 1 | ||
1364 : | * When it's unordered, all three flags, P, Z, C are set. | ||
1365 : | *) | ||
1366 : | |||
1367 : | fun fast_branch(fcc) = | ||
1368 : | case fcc | ||
1369 : | of T.== => orderedOnly(I.EQ) | ||
1370 : | | T.?<> => (j(I.P); j(I.NE)) | ||
1371 : | | T.? => j(I.P) | ||
1372 : | | T.<=> => j(I.NP) | ||
1373 : | | T.> => orderedOnly(I.A) | ||
1374 : | | T.?<= => j(I.BE) | ||
1375 : | | T.>= => orderedOnly(I.AE) | ||
1376 : | | T.?< => j(I.B) | ||
1377 : | | T.< => orderedOnly(I.B) | ||
1378 : | | T.?>= => (j(I.P); j(I.AE)) | ||
1379 : | | T.<= => orderedOnly(I.BE) | ||
1380 : | | T.?> => (j(I.P); j(I.A)) | ||
1381 : | | T.<> => orderedOnly(I.NE) | ||
1382 : | | T.?= => j(I.EQ) | ||
1383 : | | _ => error(concat[ | ||
1384 : | "fbranch(", T.Basis.fcondToString fcc, ")" | ||
1385 : | ]) | ||
1386 : | (*esac*) | ||
1387 : | and orderedOnly fcc = | ||
1388 : | let val label = Label.anon() | ||
1389 : | in emit(I.JCC{cond=I.P, opnd=immedLabel label}); | ||
1390 : | j fcc; | ||
1391 : | defineLabel label | ||
1392 : | end | ||
1393 : | |||
1394 : | leunga | 731 | val fcc = compare() |
1395 : | leunga | 1156 | in if !arch <> Pentium andalso |
1396 : | (enableFastFPMode andalso !fast_floating_point) then | ||
1397 : | fast_branch(fcc) | ||
1398 : | else | ||
1399 : | (emit I.FNSTSW; | ||
1400 : | branch(fcc) | ||
1401 : | ) | ||
1402 : | monnier | 411 | end |
1403 : | monnier | 247 | |
1404 : | leunga | 731 | (*======================================================== |
1405 : | * Floating point code generation starts here. | ||
1406 : | * Some generic fp routines first. | ||
1407 : | *========================================================*) | ||
1408 : | |||
1409 : | (* Can this tree be folded into the src operand of a floating point | ||
1410 : | * operations? | ||
1411 : | *) | ||
1412 : | and foldableFexp(T.FREG _) = true | ||
1413 : | | foldableFexp(T.FLOAD _) = true | ||
1414 : | | foldableFexp(T.CVTI2F(_, (16 | 32), _)) = true | ||
1415 : | | foldableFexp(T.CVTF2F(_, _, t)) = foldableFexp t | ||
1416 : | | foldableFexp(T.FMARK(t, _)) = foldableFexp t | ||
1417 : | | foldableFexp _ = false | ||
1418 : | |||
1419 : | (* Move integer e of size ty into a memory location. | ||
1420 : | * Returns a quadruple: | ||
1421 : | * (INTEGER,return ty,effect address of memory location,cleanup code) | ||
1422 : | *) | ||
1423 : | and convertIntToFloat(ty, e) = | ||
1424 : | let val opnd = operand e | ||
1425 : | in if isMemOpnd opnd andalso (ty = 16 orelse ty = 32) | ||
1426 : | then (INTEGER, ty, opnd, []) | ||
1427 : | else | ||
1428 : | leunga | 815 | let val {instrs, tempMem, cleanup} = |
1429 : | cvti2f{ty=ty, src=opnd, an=getAnnotations()} | ||
1430 : | leunga | 731 | in emits instrs; |
1431 : | (INTEGER, 32, tempMem, cleanup) | ||
1432 : | end | ||
1433 : | end | ||
1434 : | |||
1435 : | (*======================================================== | ||
1436 : | * Sethi-Ullman based floating point code generation as | ||
1437 : | * implemented by Lal | ||
1438 : | *========================================================*) | ||
1439 : | |||
1440 : | george | 545 | and fld(32, opnd) = I.FLDS opnd |
1441 : | | fld(64, opnd) = I.FLDL opnd | ||
1442 : | george | 555 | | fld(80, opnd) = I.FLDT opnd |
1443 : | george | 545 | | fld _ = error "fld" |
1444 : | |||
1445 : | leunga | 565 | and fild(16, opnd) = I.FILD opnd |
1446 : | | fild(32, opnd) = I.FILDL opnd | ||
1447 : | | fild(64, opnd) = I.FILDLL opnd | ||
1448 : | | fild _ = error "fild" | ||
1449 : | |||
1450 : | and fxld(INTEGER, ty, opnd) = fild(ty, opnd) | ||
1451 : | | fxld(REAL, fty, opnd) = fld(fty, opnd) | ||
1452 : | |||
1453 : | george | 545 | and fstp(32, opnd) = I.FSTPS opnd |
1454 : | | fstp(64, opnd) = I.FSTPL opnd | ||
1455 : | george | 555 | | fstp(80, opnd) = I.FSTPT opnd |
1456 : | george | 545 | | fstp _ = error "fstp" |
1457 : | |||
1458 : | (* generate code for floating point stores *) | ||
1459 : | leunga | 731 | and fstore'(fty, ea, d, mem, an) = |
1460 : | george | 545 | (case d of |
1461 : | T.FREG(fty, fs) => emit(fld(fty, I.FDirect fs)) | ||
1462 : | | _ => reduceFexp(fty, d, []); | ||
1463 : | mark(fstp(fty, address(ea, mem)), an) | ||
1464 : | ) | ||
1465 : | |||
1466 : | leunga | 731 | (* generate code for floating point loads *) |
1467 : | and fload'(fty, ea, mem, fd, an) = | ||
1468 : | let val ea = address(ea, mem) | ||
1469 : | in mark(fld(fty, ea), an); | ||
1470 : | george | 889 | if CB.sameColor(fd,ST0) then () |
1471 : | leunga | 744 | else emit(fstp(fty, I.FDirect fd)) |
1472 : | leunga | 731 | end |
1473 : | |||
1474 : | and fexpr' e = (reduceFexp(64, e, []); C.ST(0)) | ||
1475 : | george | 545 | |
1476 : | (* generate floating point expression and put the result in fd *) | ||
1477 : | leunga | 731 | and doFexpr'(fty, T.FREG(_, fs), fd, an) = |
1478 : | george | 889 | (if CB.sameColor(fs,fd) then () |
1479 : | george | 1009 | else mark'(I.COPY{k=CB.FP, sz=64, dst=[fd], src=[fs], tmp=NONE}, an) |
1480 : | george | 545 | ) |
1481 : | leunga | 731 | | doFexpr'(_, T.FLOAD(fty, ea, mem), fd, an) = |
1482 : | fload'(fty, ea, mem, fd, an) | ||
1483 : | | doFexpr'(fty, T.FEXT fexp, fd, an) = | ||
1484 : | (ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an}; | ||
1485 : | george | 889 | if CB.sameColor(fd,ST0) then () else emit(fstp(fty, I.FDirect fd)) |
1486 : | leunga | 731 | ) |
1487 : | | doFexpr'(fty, e, fd, an) = | ||
1488 : | george | 545 | (reduceFexp(fty, e, []); |
1489 : | george | 889 | if CB.sameColor(fd,ST0) then () |
1490 : | leunga | 744 | else mark(fstp(fty, I.FDirect fd), an) |
1491 : | george | 545 | ) |
1492 : | |||
1493 : | (* | ||
1494 : | * Generate floating point expression using Sethi-Ullman's scheme: | ||
1495 : | * This function evaluates a floating point expression, | ||
1496 : | * and put result in %ST(0). | ||
1497 : | *) | ||
1498 : | and reduceFexp(fty, fexp, an) = | ||
1499 : | george | 555 | let val ST = I.ST(C.ST 0) |
1500 : | val ST1 = I.ST(C.ST 1) | ||
1501 : | leunga | 593 | val cleanupCode = ref [] : I.instruction list ref |
1502 : | george | 545 | |
1503 : | leunga | 565 | datatype su_tree = |
1504 : | LEAF of int * T.fexp * ans | ||
1505 : | | BINARY of int * T.fty * fbinop * su_tree * su_tree * ans | ||
1506 : | | UNARY of int * T.fty * I.funOp * su_tree * ans | ||
1507 : | and fbinop = FADD | FSUB | FMUL | FDIV | ||
1508 : | | FIADD | FISUB | FIMUL | FIDIV | ||
1509 : | withtype ans = Annotations.annotations | ||
1510 : | monnier | 247 | |
1511 : | leunga | 565 | fun label(LEAF(n, _, _)) = n |
1512 : | | label(BINARY(n, _, _, _, _, _)) = n | ||
1513 : | | label(UNARY(n, _, _, _, _)) = n | ||
1514 : | george | 545 | |
1515 : | leunga | 565 | fun annotate(LEAF(n, x, an), a) = LEAF(n,x,a::an) |
1516 : | | annotate(BINARY(n,t,b,x,y,an), a) = BINARY(n,t,b,x,y,a::an) | ||
1517 : | | annotate(UNARY(n,t,u,x,an), a) = UNARY(n,t,u,x,a::an) | ||
1518 : | george | 545 | |
1519 : | leunga | 565 | (* Generate expression tree with sethi-ullman numbers *) |
1520 : | fun su(e as T.FREG _) = LEAF(1, e, []) | ||
1521 : | | su(e as T.FLOAD _) = LEAF(1, e, []) | ||
1522 : | | su(e as T.CVTI2F _) = LEAF(1, e, []) | ||
1523 : | | su(T.CVTF2F(_, _, t)) = su t | ||
1524 : | | su(T.FMARK(t, a)) = annotate(su t, a) | ||
1525 : | | su(T.FABS(fty, t)) = suUnary(fty, I.FABS, t) | ||
1526 : | | su(T.FNEG(fty, t)) = suUnary(fty, I.FCHS, t) | ||
1527 : | | su(T.FSQRT(fty, t)) = suUnary(fty, I.FSQRT, t) | ||
1528 : | | su(T.FADD(fty, t1, t2)) = suComBinary(fty,FADD,FIADD,t1,t2) | ||
1529 : | | su(T.FMUL(fty, t1, t2)) = suComBinary(fty,FMUL,FIMUL,t1,t2) | ||
1530 : | | su(T.FSUB(fty, t1, t2)) = suBinary(fty,FSUB,FISUB,t1,t2) | ||
1531 : | | su(T.FDIV(fty, t1, t2)) = suBinary(fty,FDIV,FIDIV,t1,t2) | ||
1532 : | | su _ = error "su" | ||
1533 : | |||
1534 : | (* Try to fold the the memory operand or integer conversion *) | ||
1535 : | and suFold(e as T.FREG _) = (LEAF(0, e, []), false) | ||
1536 : | | suFold(e as T.FLOAD _) = (LEAF(0, e, []), false) | ||
1537 : | | suFold(e as T.CVTI2F(_,(16 | 32),_)) = (LEAF(0, e, []), true) | ||
1538 : | | suFold(T.CVTF2F(_, _, t)) = suFold t | ||
1539 : | | suFold(T.FMARK(t, a)) = | ||
1540 : | let val (t, integer) = suFold t | ||
1541 : | in (annotate(t, a), integer) end | ||
1542 : | | suFold e = (su e, false) | ||
1543 : | |||
1544 : | (* Form unary tree *) | ||
1545 : | and suUnary(fty, funary, t) = | ||
1546 : | let val t = su t | ||
1547 : | in UNARY(label t, fty, funary, t, []) | ||
1548 : | george | 545 | end |
1549 : | leunga | 565 | |
1550 : | (* Form binary tree *) | ||
1551 : | and suBinary(fty, binop, ibinop, t1, t2) = | ||
1552 : | let val t1 = su t1 | ||
1553 : | val (t2, integer) = suFold t2 | ||
1554 : | val n1 = label t1 | ||
1555 : | val n2 = label t2 | ||
1556 : | val n = if n1=n2 then n1+1 else Int.max(n1,n2) | ||
1557 : | val myOp = if integer then ibinop else binop | ||
1558 : | in BINARY(n, fty, myOp, t1, t2, []) | ||
1559 : | george | 545 | end |
1560 : | george | 555 | |
1561 : | leunga | 565 | (* Try to fold in the operand if possible. |
1562 : | * This only applies to commutative operations. | ||
1563 : | *) | ||
1564 : | and suComBinary(fty, binop, ibinop, t1, t2) = | ||
1565 : | leunga | 731 | let val (t1, t2) = if foldableFexp t2 |
1566 : | then (t1, t2) else (t2, t1) | ||
1567 : | leunga | 565 | in suBinary(fty, binop, ibinop, t1, t2) end |
1568 : | |||
1569 : | and sameTree(LEAF(_, T.FREG(t1,f1), []), | ||
1570 : | leunga | 744 | LEAF(_, T.FREG(t2,f2), [])) = |
1571 : | george | 889 | t1 = t2 andalso CB.sameColor(f1,f2) |
1572 : | leunga | 565 | | sameTree _ = false |
1573 : | |||
1574 : | (* Traverse tree and generate code *) | ||
1575 : | fun gencode(LEAF(_, t, an)) = mark(fxld(leafEA t), an) | ||
1576 : | | gencode(BINARY(_, _, binop, x, t2 as LEAF(0, y, a1), a2)) = | ||
1577 : | let val _ = gencode x | ||
1578 : | val (_, fty, src) = leafEA y | ||
1579 : | fun gen(code) = mark(code, a1 @ a2) | ||
1580 : | fun binary(oper32, oper64) = | ||
1581 : | if sameTree(x, t2) then | ||
1582 : | gen(I.FBINARY{binOp=oper64, src=ST, dst=ST}) | ||
1583 : | george | 555 | else |
1584 : | let val oper = | ||
1585 : | leunga | 565 | if isMemOpnd src then |
1586 : | case fty of | ||
1587 : | 32 => oper32 | ||
1588 : | | 64 => oper64 | ||
1589 : | | _ => error "gencode: BINARY" | ||
1590 : | else oper64 | ||
1591 : | in gen(I.FBINARY{binOp=oper, src=src, dst=ST}) end | ||
1592 : | fun ibinary(oper16, oper32) = | ||
1593 : | let val oper = case fty of | ||
1594 : | 16 => oper16 | ||
1595 : | | 32 => oper32 | ||
1596 : | | _ => error "gencode: IBINARY" | ||
1597 : | in gen(I.FIBINARY{binOp=oper, src=src}) end | ||
1598 : | in case binop of | ||
1599 : | FADD => binary(I.FADDS, I.FADDL) | ||
1600 : | | FSUB => binary(I.FDIVS, I.FSUBL) | ||
1601 : | | FMUL => binary(I.FMULS, I.FMULL) | ||
1602 : | | FDIV => binary(I.FDIVS, I.FDIVL) | ||
1603 : | | FIADD => ibinary(I.FIADDS, I.FIADDL) | ||
1604 : | | FISUB => ibinary(I.FIDIVS, I.FISUBL) | ||
1605 : | | FIMUL => ibinary(I.FIMULS, I.FIMULL) | ||
1606 : | | FIDIV => ibinary(I.FIDIVS, I.FIDIVL) | ||
1607 : | end | ||
1608 : | | gencode(BINARY(_, fty, binop, t1, t2, an)) = | ||
1609 : | let fun doit(t1, t2, oper, operP, operRP) = | ||
1610 : | let (* oper[P] => ST(1) := ST oper ST(1); [pop] | ||
1611 : | * operR[P] => ST(1) := ST(1) oper ST; [pop] | ||
1612 : | *) | ||
1613 : | val n1 = label t1 | ||
1614 : | val n2 = label t2 | ||
1615 : | in if n1 < n2 andalso n1 <= 7 then | ||
1616 : | (gencode t2; | ||
1617 : | gencode t1; | ||
1618 : | mark(I.FBINARY{binOp=operP, src=ST, dst=ST1}, an)) | ||
1619 : | else if n2 <= n1 andalso n2 <= 7 then | ||
1620 : | (gencode t1; | ||
1621 : | gencode t2; | ||
1622 : | mark(I.FBINARY{binOp=operRP, src=ST, dst=ST1}, an)) | ||
1623 : | else | ||
1624 : | let (* both labels > 7 *) | ||
1625 : | val fs = I.FDirect(newFreg()) | ||
1626 : | in gencode t2; | ||
1627 : | emit(fstp(fty, fs)); | ||
1628 : | gencode t1; | ||
1629 : | mark(I.FBINARY{binOp=oper, src=fs, dst=ST}, an) | ||
1630 : | end | ||
1631 : | end | ||
1632 : | in case binop of | ||
1633 : | FADD => doit(t1,t2,I.FADDL,I.FADDP,I.FADDP) | ||
1634 : | | FMUL => doit(t1,t2,I.FMULL,I.FMULP,I.FMULP) | ||
1635 : | | FSUB => doit(t1,t2,I.FSUBL,I.FSUBP,I.FSUBRP) | ||
1636 : | | FDIV => doit(t1,t2,I.FDIVL,I.FDIVP,I.FDIVRP) | ||
1637 : | george | 545 | | _ => error "gencode.BINARY" |
1638 : | end | ||
1639 : | leunga | 565 | | gencode(UNARY(_, _, unaryOp, su, an)) = |
1640 : | (gencode(su); mark(I.FUNARY(unaryOp),an)) | ||
1641 : | |||
1642 : | (* Generate code for a leaf. | ||
1643 : | * Returns the type and an effective address | ||
1644 : | *) | ||
1645 : | and leafEA(T.FREG(fty, f)) = (REAL, fty, I.FDirect f) | ||
1646 : | | leafEA(T.FLOAD(fty, ea, mem)) = (REAL, fty, address(ea, mem)) | ||
1647 : | leunga | 593 | | leafEA(T.CVTI2F(_, 32, t)) = int2real(32, t) |
1648 : | | leafEA(T.CVTI2F(_, 16, t)) = int2real(16, t) | ||
1649 : | | leafEA(T.CVTI2F(_, 8, t)) = int2real(8, t) | ||
1650 : | leunga | 565 | | leafEA _ = error "leafEA" |
1651 : | |||
1652 : | leunga | 731 | and int2real(ty, e) = |
1653 : | let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e) | ||
1654 : | in cleanupCode := !cleanupCode @ cleanup; | ||
1655 : | (INTEGER, ty, ea) | ||
1656 : | george | 545 | end |
1657 : | leunga | 731 | |
1658 : | in gencode(su fexp); | ||
1659 : | emits(!cleanupCode) | ||
1660 : | george | 545 | end (*reduceFexp*) |
1661 : | leunga | 731 | |
1662 : | (*======================================================== | ||
1663 : | * This section generates 3-address style floating | ||
1664 : | * point code. | ||
1665 : | *========================================================*) | ||
1666 : | |||
1667 : | and isize 16 = I.I16 | ||
1668 : | | isize 32 = I.I32 | ||
1669 : | | isize _ = error "isize" | ||
1670 : | |||
1671 : | and fstore''(fty, ea, d, mem, an) = | ||
1672 : | (floatingPointUsed := true; | ||
1673 : | mark(I.FMOVE{fsize=fsize fty, dst=address(ea,mem), | ||
1674 : | src=foperand(fty, d)}, | ||
1675 : | an) | ||
1676 : | ) | ||
1677 : | |||
1678 : | and fload''(fty, ea, mem, d, an) = | ||
1679 : | (floatingPointUsed := true; | ||
1680 : | mark(I.FMOVE{fsize=fsize fty, src=address(ea,mem), | ||
1681 : | dst=RealReg d}, an) | ||
1682 : | ) | ||
1683 : | |||
1684 : | and fiload''(ity, ea, d, an) = | ||
1685 : | (floatingPointUsed := true; | ||
1686 : | mark(I.FILOAD{isize=isize ity, ea=ea, dst=RealReg d}, an) | ||
1687 : | ) | ||
1688 : | |||
1689 : | and fexpr''(e as T.FREG(_,f)) = | ||
1690 : | if isFMemReg f then transFexpr e else f | ||
1691 : | | fexpr'' e = transFexpr e | ||
1692 : | |||
1693 : | and transFexpr e = | ||
1694 : | let val fd = newFreg() in doFexpr''(64, e, fd, []); fd end | ||
1695 : | |||
1696 : | (* | ||
1697 : | * Process a floating point operand. Put operand in register | ||
1698 : | * when possible. The operand should match the given fty. | ||
1699 : | *) | ||
1700 : | and foperand(fty, e as T.FREG(fty', f)) = | ||
1701 : | if fty = fty' then RealReg f else I.FPR(fexpr'' e) | ||
1702 : | | foperand(fty, T.CVTF2F(_, _, e)) = | ||
1703 : | foperand(fty, e) (* nop on the x86 *) | ||
1704 : | | foperand(fty, e as T.FLOAD(fty', ea, mem)) = | ||
1705 : | (* fold operand when the precison matches *) | ||
1706 : | if fty = fty' then address(ea, mem) else I.FPR(fexpr'' e) | ||
1707 : | | foperand(fty, e) = I.FPR(fexpr'' e) | ||
1708 : | |||
1709 : | (* | ||
1710 : | * Process a floating point operand. | ||
1711 : | * Try to fold in a memory operand or conversion from an integer. | ||
1712 : | *) | ||
1713 : | and fioperand(T.FREG(fty,f)) = (REAL, fty, RealReg f, []) | ||
1714 : | | fioperand(T.FLOAD(fty, ea, mem)) = | ||
1715 : | (REAL, fty, address(ea, mem), []) | ||
1716 : | | fioperand(T.CVTF2F(_, _, e)) = fioperand(e) (* nop on the x86 *) | ||
1717 : | | fioperand(T.CVTI2F(_, ty, e)) = convertIntToFloat(ty, e) | ||
1718 : | | fioperand(T.FMARK(e,an)) = fioperand(e) (* XXX *) | ||
1719 : | | fioperand(e) = (REAL, 64, I.FPR(fexpr'' e), []) | ||
1720 : | |||
1721 : | (* Generate binary operator. Since the real binary operators | ||
1722 : | * does not take memory as destination, we also ensure this | ||
1723 : | * does not happen. | ||
1724 : | *) | ||
1725 : | and fbinop(targetFty, | ||
1726 : | binOp, binOpR, ibinOp, ibinOpR, lsrc, rsrc, fd, an) = | ||
1727 : | (* Put the mem operand in rsrc *) | ||
1728 : | leunga | 1142 | let |
1729 : | leunga | 731 | fun isMemOpnd(T.FREG(_, f)) = isFMemReg f |
1730 : | | isMemOpnd(T.FLOAD _) = true | ||
1731 : | | isMemOpnd(T.CVTI2F(_, (16 | 32), _)) = true | ||
1732 : | | isMemOpnd(T.CVTF2F(_, _, t)) = isMemOpnd t | ||
1733 : | | isMemOpnd(T.FMARK(t, _)) = isMemOpnd t | ||
1734 : | | isMemOpnd _ = false | ||
1735 : | val (binOp, ibinOp, lsrc, rsrc) = | ||
1736 : | if isMemOpnd lsrc then (binOpR, ibinOpR, rsrc, lsrc) | ||
1737 : | else (binOp, ibinOp, lsrc, rsrc) | ||
1738 : | val lsrc = foperand(targetFty, lsrc) | ||
1739 : | val (kind, fty, rsrc, code) = fioperand(rsrc) | ||
1740 : | fun dstMustBeFreg f = | ||
1741 : | if targetFty <> 64 then | ||
1742 : | let val tmpR = newFreg() | ||
1743 : | val tmp = I.FPR tmpR | ||
1744 : | in mark(f tmp, an); | ||
1745 : | emit(I.FMOVE{fsize=fsize targetFty, | ||
1746 : | src=tmp, dst=RealReg fd}) | ||
1747 : | end | ||
1748 : | else mark(f(RealReg fd), an) | ||
1749 : | in case kind of | ||
1750 : | REAL => | ||
1751 : | dstMustBeFreg(fn dst => | ||
1752 : | I.FBINOP{fsize=fsize fty, binOp=binOp, | ||
1753 : | lsrc=lsrc, rsrc=rsrc, dst=dst}) | ||
1754 : | | INTEGER => | ||
1755 : | (dstMustBeFreg(fn dst => | ||
1756 : | I.FIBINOP{isize=isize fty, binOp=ibinOp, | ||
1757 : | lsrc=lsrc, rsrc=rsrc, dst=dst}); | ||
1758 : | emits code | ||
1759 : | ) | ||
1760 : | end | ||
1761 : | george | 545 | |
1762 : | leunga | 731 | and funop(fty, unOp, src, fd, an) = |
1763 : | let val src = foperand(fty, src) | ||
1764 : | in mark(I.FUNOP{fsize=fsize fty, | ||
1765 : | unOp=unOp, src=src, dst=RealReg fd},an) | ||
1766 : | end | ||
1767 : | |||
1768 : | and doFexpr''(fty, e, fd, an) = | ||
1769 : | leunga | 1142 | (floatingPointUsed := true; |
1770 : | leunga | 731 | case e of |
1771 : | george | 889 | T.FREG(_,fs) => if CB.sameColor(fs,fd) then () |
1772 : | leunga | 731 | else fcopy''(fty, [fd], [fs], an) |
1773 : | (* Stupid x86 does everything as 80-bits internally. *) | ||
1774 : | |||
1775 : | (* Binary operators *) | ||
1776 : | | T.FADD(_, a, b) => fbinop(fty, | ||
1777 : | I.FADDL, I.FADDL, I.FIADDL, I.FIADDL, | ||
1778 : | a, b, fd, an) | ||
1779 : | | T.FSUB(_, a, b) => fbinop(fty, | ||
1780 : | I.FSUBL, I.FSUBRL, I.FISUBL, I.FISUBRL, | ||
1781 : | a, b, fd, an) | ||
1782 : | | T.FMUL(_, a, b) => fbinop(fty, | ||
1783 : | I.FMULL, I.FMULL, I.FIMULL, I.FIMULL, | ||
1784 : | a, b, fd, an) | ||
1785 : | | T.FDIV(_, a, b) => fbinop(fty, | ||
1786 : | I.FDIVL, I.FDIVRL, I.FIDIVL, I.FIDIVRL, | ||
1787 : | a, b, fd, an) | ||
1788 : | |||
1789 : | (* Unary operators *) | ||
1790 : | | T.FNEG(_, a) => funop(fty, I.FCHS, a, fd, an) | ||
1791 : | | T.FABS(_, a) => funop(fty, I.FABS, a, fd, an) | ||
1792 : | | T.FSQRT(_, a) => funop(fty, I.FSQRT, a, fd, an) | ||
1793 : | |||
1794 : | (* Load *) | ||
1795 : | | T.FLOAD(fty,ea,mem) => fload''(fty, ea, mem, fd, an) | ||
1796 : | |||
1797 : | (* Type conversions *) | ||
1798 : | | T.CVTF2F(_, _, e) => doFexpr''(fty, e, fd, an) | ||
1799 : | | T.CVTI2F(_, ty, e) => | ||
1800 : | let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e) | ||
1801 : | in fiload''(ty, ea, fd, an); | ||
1802 : | emits cleanup | ||
1803 : | end | ||
1804 : | |||
1805 : | | T.FMARK(e,A.MARKREG f) => (f fd; doFexpr''(fty, e, fd, an)) | ||
1806 : | | T.FMARK(e, a) => doFexpr''(fty, e, fd, a::an) | ||
1807 : | | T.FPRED(e, c) => doFexpr''(fty, e, fd, A.CTRLUSE c::an) | ||
1808 : | | T.FEXT fexp => | ||
1809 : | ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an} | ||
1810 : | | _ => error("doFexpr''") | ||
1811 : | leunga | 1142 | ) |
1812 : | leunga | 731 | |
1813 : | (*======================================================== | ||
1814 : | * Tie the two styles of fp code generation together | ||
1815 : | *========================================================*) | ||
1816 : | and fstore(fty, ea, d, mem, an) = | ||
1817 : | if enableFastFPMode andalso !fast_floating_point | ||
1818 : | then fstore''(fty, ea, d, mem, an) | ||
1819 : | else fstore'(fty, ea, d, mem, an) | ||
1820 : | and fload(fty, ea, d, mem, an) = | ||
1821 : | if enableFastFPMode andalso !fast_floating_point | ||
1822 : | then fload''(fty, ea, d, mem, an) | ||
1823 : | else fload'(fty, ea, d, mem, an) | ||
1824 : | and fexpr e = | ||
1825 : | if enableFastFPMode andalso !fast_floating_point | ||
1826 : | then fexpr'' e else fexpr' e | ||
1827 : | and doFexpr(fty, e, fd, an) = | ||
1828 : | if enableFastFPMode andalso !fast_floating_point | ||
1829 : | then doFexpr''(fty, e, fd, an) | ||
1830 : | else doFexpr'(fty, e, fd, an) | ||
1831 : | |||
1832 : | leunga | 797 | (*================================================================ |
1833 : | * Optimizations for x := x op y | ||
1834 : | * Special optimizations: | ||
1835 : | * Generate a binary operator, result must in memory. | ||
1836 : | * The source must not be in memory | ||
1837 : | *================================================================*) | ||
1838 : | and binaryMem(binOp, src, dst, mem, an) = | ||
1839 : | mark(I.BINARY{binOp=binOp, src=immedOrReg(operand src), | ||
1840 : | dst=address(dst,mem)}, an) | ||
1841 : | and unaryMem(unOp, opnd, mem, an) = | ||
1842 : | mark(I.UNARY{unOp=unOp, opnd=address(opnd,mem)}, an) | ||
1843 : | |||
1844 : | and isOne(T.LI n) = n = one | ||
1845 : | | isOne _ = false | ||
1846 : | |||
1847 : | (* | ||
1848 : | * Perform optimizations based on recognizing | ||
1849 : | * x := x op y or | ||
1850 : | * x := y op x | ||
1851 : | * first. | ||
1852 : | *) | ||
1853 : | and store(ty, ea, d, mem, an, | ||
1854 : | {INC,DEC,ADD,SUB,NOT,NEG,SHL,SHR,SAR,OR,AND,XOR}, | ||
1855 : | doStore | ||
1856 : | ) = | ||
1857 : | let fun default() = doStore(ea, d, mem, an) | ||
1858 : | fun binary1(t, t', unary, binary, ea', x) = | ||
1859 : | if t = ty andalso t' = ty then | ||
1860 : | if MLTreeUtils.eqRexp(ea, ea') then | ||
1861 : | if isOne x then unaryMem(unary, ea, mem, an) | ||
1862 : | else binaryMem(binary, x, ea, mem, an) | ||
1863 : | else default() | ||
1864 : | else default() | ||
1865 : | fun unary(t,unOp, ea') = | ||
1866 : | if t = ty andalso MLTreeUtils.eqRexp(ea, ea') then | ||
1867 : | unaryMem(unOp, ea, mem, an) | ||
1868 : | else default() | ||
1869 : | fun binary(t,t',binOp,ea',x) = | ||
1870 : | if t = ty andalso t' = ty andalso | ||
1871 : | MLTreeUtils.eqRexp(ea, ea') then | ||
1872 : | binaryMem(binOp, x, ea, mem, an) | ||
1873 : | else default() | ||
1874 : | |||
1875 : | fun binaryCom1(t,unOp,binOp,x,y) = | ||
1876 : | if t = ty then | ||
1877 : | let fun again() = | ||
1878 : | case y of | ||
1879 : | T.LOAD(ty',ea',_) => | ||
1880 : | if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then | ||
1881 : | if isOne x then unaryMem(unOp, ea, mem, an) | ||
1882 : | else binaryMem(binOp,x,ea,mem,an) | ||
1883 : | else default() | ||
1884 : | | _ => default() | ||
1885 : | in case x of | ||
1886 : | T.LOAD(ty',ea',_) => | ||
1887 : | if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then | ||
1888 : | if isOne y then unaryMem(unOp, ea, mem, an) | ||
1889 : | else binaryMem(binOp,y,ea,mem,an) | ||
1890 : | else again() | ||
1891 : | | _ => again() | ||
1892 : | end | ||
1893 : | else default() | ||
1894 : | |||
1895 : | fun binaryCom(t,binOp,x,y) = | ||
1896 : | if t = ty then | ||
1897 : | let fun again() = | ||
1898 : | case y of | ||
1899 : | T.LOAD(ty',ea',_) => | ||
1900 : | if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then | ||
1901 : | binaryMem(binOp,x,ea,mem,an) | ||
1902 : | else default() | ||
1903 : | | _ => default() | ||
1904 : | in case x of | ||
1905 : | T.LOAD(ty',ea',_) => | ||
1906 : | if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then | ||
1907 : | binaryMem(binOp,y,ea,mem,an) | ||
1908 : | else again() | ||
1909 : | | _ => again() | ||
1910 : | end | ||
1911 : | else default() | ||
1912 : | |||
1913 : | in case d of | ||
1914 : | T.ADD(t,x,y) => binaryCom1(t,INC,ADD,x,y) | ||
1915 : | | T.SUB(t,T.LOAD(t',ea',_),x) => binary1(t,t',DEC,SUB,ea',x) | ||
1916 : | | T.ORB(t,x,y) => binaryCom(t,OR,x,y) | ||
1917 : | | T.ANDB(t,x,y) => binaryCom(t,AND,x,y) | ||
1918 : | | T.XORB(t,x,y) => binaryCom(t,XOR,x,y) | ||
1919 : | | T.SLL(t,T.LOAD(t',ea',_),x) => binary(t,t',SHL,ea',x) | ||
1920 : | | T.SRL(t,T.LOAD(t',ea',_),x) => binary(t,t',SHR,ea',x) | ||
1921 : | | T.SRA(t,T.LOAD(t',ea',_),x) => binary(t,t',SAR,ea',x) | ||
1922 : | | T.NEG(t,T.LOAD(t',ea',_)) => unary(t,NEG,ea') | ||
1923 : | | T.NOTB(t,T.LOAD(t',ea',_)) => unary(t,NOT,ea') | ||
1924 : | | _ => default() | ||
1925 : | end (* store *) | ||
1926 : | |||
1927 : | george | 545 | (* generate code for a statement *) |
1928 : | and stmt(T.MV(_, rd, e), an) = doExpr(e, rd, an) | ||
1929 : | | stmt(T.FMV(fty, fd, e), an) = doFexpr(fty, e, fd, an) | ||
1930 : | | stmt(T.CCMV(ccd, e), an) = doCCexpr(e, ccd, an) | ||
1931 : | | stmt(T.COPY(_, dst, src), an) = copy(dst, src, an) | ||
1932 : | | stmt(T.FCOPY(fty, dst, src), an) = fcopy(fty, dst, src, an) | ||
1933 : | leunga | 744 | | stmt(T.JMP(e, labs), an) = jmp(e, labs, an) |
1934 : | blume | 839 | | stmt(T.CALL{funct, targets, defs, uses, region, pops, ...}, an) = |
1935 : | call(funct,targets,defs,uses,region,[],an, pops) | ||
1936 : | | stmt(T.FLOW_TO(T.CALL{funct, targets, defs, uses, region, pops, ...}, | ||
1937 : | leunga | 796 | cutTo), an) = |
1938 : | blume | 839 | call(funct,targets,defs,uses,region,cutTo,an, pops) |
1939 : | george | 545 | | stmt(T.RET _, an) = mark(I.RET NONE, an) |
1940 : | leunga | 797 | | stmt(T.STORE(8, ea, d, mem), an) = |
1941 : | store(8, ea, d, mem, an, opcodes8, store8) | ||
1942 : | | stmt(T.STORE(16, ea, d, mem), an) = | ||
1943 : | store(16, ea, d, mem, an, opcodes16, store16) | ||
1944 : | | stmt(T.STORE(32, ea, d, mem), an) = | ||
1945 : | store(32, ea, d, mem, an, opcodes32, store32) | ||
1946 : | |||
1947 : | george | 545 | | stmt(T.FSTORE(fty, ea, d, mem), an) = fstore(fty, ea, d, mem, an) |
1948 : | leunga | 744 | | stmt(T.BCC(cc, lab), an) = branch(cc, lab, an) |
1949 : | george | 545 | | stmt(T.DEFINE l, _) = defineLabel l |
1950 : | | stmt(T.ANNOTATION(s, a), an) = stmt(s, a::an) | ||
1951 : | george | 555 | | stmt(T.EXT s, an) = |
1952 : | ExtensionComp.compileSext (reducer()) {stm=s, an=an} | ||
1953 : | george | 545 | | stmt(s, _) = doStmts(Gen.compileStm s) |
1954 : | |||
1955 : | and doStmt s = stmt(s, []) | ||
1956 : | and doStmts ss = app doStmt ss | ||
1957 : | |||
1958 : | and beginCluster' _ = | ||
1959 : | ((* Must be cleared by the client. | ||
1960 : | * if rewriteMemReg then memRegsUsed := 0w0 else (); | ||
1961 : | *) | ||
1962 : | leunga | 731 | floatingPointUsed := false; |
1963 : | trapLabel := NONE; | ||
1964 : | beginCluster 0 | ||
1965 : | ) | ||
1966 : | george | 545 | and endCluster' a = |
1967 : | monnier | 247 | (case !trapLabel |
1968 : | monnier | 411 | of NONE => () |
1969 : | george | 545 | | SOME(_, lab) => (defineLabel lab; emit(I.INTO)) |
1970 : | monnier | 411 | (*esac*); |
1971 : | leunga | 731 | (* If floating point has been used allocate an extra |
1972 : | * register just in case we didn't use any explicit register | ||
1973 : | *) | ||
1974 : | if !floatingPointUsed then (newFreg(); ()) | ||
1975 : | else (); | ||
1976 : | george | 545 | endCluster(a) |
1977 : | ) | ||
1978 : | |||
1979 : | and reducer() = | ||
1980 : | george | 984 | TS.REDUCER{reduceRexp = expr, |
1981 : | george | 545 | reduceFexp = fexpr, |
1982 : | reduceCCexp = ccExpr, | ||
1983 : | reduceStm = stmt, | ||
1984 : | operand = operand, | ||
1985 : | reduceOperand = reduceOpnd, | ||
1986 : | addressOf = fn e => address(e, I.Region.memory), (*XXX*) | ||
1987 : | george | 1009 | emit = mark', |
1988 : | george | 545 | instrStream = instrStream, |
1989 : | mltreeStream = self() | ||
1990 : | } | ||
1991 : | |||
1992 : | and self() = | ||
1993 : | george | 984 | TS.S.STREAM |
1994 : | leunga | 815 | { beginCluster = beginCluster', |
1995 : | endCluster = endCluster', | ||
1996 : | emit = doStmt, | ||
1997 : | pseudoOp = pseudoOp, | ||
1998 : | defineLabel = defineLabel, | ||
1999 : | entryLabel = entryLabel, | ||
2000 : | comment = comment, | ||
2001 : | annotation = annotation, | ||
2002 : | getAnnotations = getAnnotations, | ||
2003 : | exitBlock = fn mlrisc => exitBlock(cellset mlrisc) | ||
2004 : | george | 545 | } |
2005 : | |||
2006 : | in self() | ||
2007 : | monnier | 247 | end |
2008 : | |||
2009 : | george | 545 | end (* functor *) |
2010 : | |||
2011 : | end (* local *) |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |