SCM Repository
Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86.sml
Parent Directory
|
Revision Log
Revision 1117 - (view) (download)
1 : | jhr | 1117 | (* x86.sml |
2 : | monnier | 247 | * |
3 : | * COPYRIGHT (c) 1998 Bell Laboratories. | ||
4 : | george | 545 | * |
5 : | * This is a revised version that takes into account of | ||
6 : | * the extended x86 instruction set, and has better handling of | ||
7 : | * non-standard types. I've factored out the integer/floating point | ||
8 : | * comparison code, added optimizations for conditional moves. | ||
9 : | * The latter generates SETcc and CMOVcc (Pentium Pro only) instructions. | ||
10 : | * To avoid problems, I have tried to incorporate as much of | ||
11 : | * Lal's original magic incantations as possible. | ||
12 : | monnier | 247 | * |
13 : | george | 545 | * Some changes: |
14 : | * | ||
15 : | * 1. REMU/REMS/REMT are now supported | ||
16 : | * 2. COND is supported by generating SETcc and/or CMOVcc; this | ||
17 : | * may require at least a Pentium II to work. | ||
18 : | * 3. Division by a constant has been optimized. Division by | ||
19 : | * a power of 2 generates SHRL or SARL. | ||
20 : | * 4. Better addressing mode selection has been implemented. This should | ||
21 : | * improve array indexing on SML/NJ. | ||
22 : | * 5. Generate testl/testb instead of andl whenever appropriate. This | ||
23 : | * is recommended by the Intel Optimization Guide and seems to improve | ||
24 : | * boxity tests on SML/NJ. | ||
25 : | leunga | 731 | * |
26 : | * More changes for floating point: | ||
27 : | * A new mode is implemented which generates pseudo 3-address instructions | ||
28 : | * for floating point. These instructions are register allocated the | ||
29 : | * normal way, with the virtual registers mapped onto a set of pseudo | ||
30 : | * %fp registers. These registers are then mapped onto the %st registers | ||
31 : | * with a new postprocessing phase. | ||
32 : | * | ||
33 : | george | 545 | * -- Allen |
34 : | monnier | 247 | *) |
35 : | george | 545 | local |
36 : | val rewriteMemReg = true (* should we rewrite memRegs *) | ||
37 : | leunga | 731 | val enableFastFPMode = true (* set this to false to disable the mode *) |
38 : | george | 545 | in |
39 : | |||
40 : | monnier | 247 | functor X86 |
41 : | (structure X86Instr : X86INSTR | ||
42 : | leunga | 797 | structure MLTreeUtils : MLTREE_UTILS |
43 : | george | 933 | where T = X86Instr.T |
44 : | george | 555 | structure ExtensionComp : MLTREE_EXTENSION_COMP |
45 : | george | 933 | where I = X86Instr and T = X86Instr.T |
46 : | george | 984 | structure MLTreeStream : MLTREE_STREAM |
47 : | where T = ExtensionComp.T | ||
48 : | george | 545 | datatype arch = Pentium | PentiumPro | PentiumII | PentiumIII |
49 : | val arch : arch ref | ||
50 : | leunga | 593 | val cvti2f : |
51 : | leunga | 815 | {ty: X86Instr.T.ty, |
52 : | src: X86Instr.operand, | ||
53 : | (* source operand, guaranteed to be non-memory! *) | ||
54 : | an: Annotations.annotations ref (* cluster annotations *) | ||
55 : | } -> | ||
56 : | leunga | 593 | {instrs : X86Instr.instruction list,(* the instructions *) |
57 : | tempMem: X86Instr.operand, (* temporary for CVTI2F *) | ||
58 : | cleanup: X86Instr.instruction list (* cleanup code *) | ||
59 : | } | ||
60 : | leunga | 731 | (* When the following flag is set, we allocate floating point registers |
61 : | * directly on the floating point stack | ||
62 : | *) | ||
63 : | val fast_floating_point : bool ref | ||
64 : | george | 545 | ) : sig include MLTREECOMP |
65 : | val rewriteMemReg : bool | ||
66 : | end = | ||
67 : | monnier | 247 | struct |
68 : | leunga | 775 | structure I = X86Instr |
69 : | structure T = I.T | ||
70 : | george | 984 | structure TS = ExtensionComp.TS |
71 : | george | 545 | structure C = I.C |
72 : | structure Shuffle = Shuffle(I) | ||
73 : | monnier | 247 | structure W32 = Word32 |
74 : | george | 545 | structure A = MLRiscAnnotations |
75 : | george | 909 | structure CFG = ExtensionComp.CFG |
76 : | george | 889 | structure CB = CellsBasis |
77 : | monnier | 247 | |
78 : | george | 984 | type instrStream = (I.instruction,C.cellset,CFG.cfg) TS.stream |
79 : | type mltreeStream = (T.stm,T.mlrisc list,CFG.cfg) TS.stream | ||
80 : | leunga | 565 | |
81 : | datatype kind = REAL | INTEGER | ||
82 : | george | 545 | |
83 : | structure Gen = MLTreeGen | ||
84 : | (structure T = T | ||
85 : | jhr | 1117 | structure Cells = C |
86 : | george | 545 | val intTy = 32 |
87 : | val naturalWidths = [32] | ||
88 : | datatype rep = SE | ZE | NEITHER | ||
89 : | val rep = NEITHER | ||
90 : | ) | ||
91 : | |||
92 : | monnier | 411 | fun error msg = MLRiscErrorMsg.error("X86",msg) |
93 : | monnier | 247 | |
94 : | george | 545 | (* Should we perform automatic MemReg translation? |
95 : | * If this is on, we can avoid doing RewritePseudo phase entirely. | ||
96 : | *) | ||
97 : | val rewriteMemReg = rewriteMemReg | ||
98 : | leunga | 731 | |
99 : | (* The following hardcoded *) | ||
100 : | leunga | 744 | fun isMemReg r = rewriteMemReg andalso |
101 : | george | 889 | let val r = CB.registerNum r |
102 : | leunga | 744 | in r >= 8 andalso r < 32 |
103 : | end | ||
104 : | leunga | 731 | fun isFMemReg r = if enableFastFPMode andalso !fast_floating_point |
105 : | george | 889 | then let val r = CB.registerNum r |
106 : | leunga | 744 | in r >= 8 andalso r < 32 end |
107 : | leunga | 731 | else true |
108 : | leunga | 744 | val isAnyFMemReg = List.exists (fn r => |
109 : | george | 889 | let val r = CB.registerNum r |
110 : | leunga | 744 | in r >= 8 andalso r < 32 end |
111 : | ) | ||
112 : | monnier | 247 | |
113 : | george | 555 | val ST0 = C.ST 0 |
114 : | val ST7 = C.ST 7 | ||
115 : | leunga | 797 | val one = T.I.int_1 |
116 : | george | 555 | |
117 : | leunga | 797 | val opcodes8 = {INC=I.INCB,DEC=I.DECB,ADD=I.ADDB,SUB=I.SUBB, |
118 : | NOT=I.NOTB,NEG=I.NEGB, | ||
119 : | SHL=I.SHLB,SHR=I.SHRB,SAR=I.SARB, | ||
120 : | OR=I.ORB,AND=I.ANDB,XOR=I.XORB} | ||
121 : | val opcodes16 = {INC=I.INCW,DEC=I.DECW,ADD=I.ADDW,SUB=I.SUBW, | ||
122 : | NOT=I.NOTW,NEG=I.NEGW, | ||
123 : | SHL=I.SHLW,SHR=I.SHRW,SAR=I.SARW, | ||
124 : | OR=I.ORW,AND=I.ANDW,XOR=I.XORW} | ||
125 : | val opcodes32 = {INC=I.INCL,DEC=I.DECL,ADD=I.ADDL,SUB=I.SUBL, | ||
126 : | NOT=I.NOTL,NEG=I.NEGL, | ||
127 : | SHL=I.SHLL,SHR=I.SHRL,SAR=I.SARL, | ||
128 : | OR=I.ORL,AND=I.ANDL,XOR=I.XORL} | ||
129 : | |||
130 : | george | 545 | (* |
131 : | * The code generator | ||
132 : | *) | ||
133 : | monnier | 411 | fun selectInstructions |
134 : | george | 545 | (instrStream as |
135 : | george | 1003 | TS.S.STREAM{emit=emitInstruction,defineLabel,entryLabel,pseudoOp, |
136 : | annotation,getAnnotations,beginCluster,endCluster,exitBlock,comment,...}) = | ||
137 : | let | ||
138 : | val emit = emitInstruction o I.INSTR | ||
139 : | exception EA | ||
140 : | monnier | 411 | |
141 : | george | 545 | (* label where a trap is generated -- one per cluster *) |
142 : | val trapLabel = ref (NONE: (I.instruction * Label.label) option) | ||
143 : | monnier | 247 | |
144 : | leunga | 731 | (* flag floating point generation *) |
145 : | val floatingPointUsed = ref false | ||
146 : | |||
147 : | george | 545 | (* effective address of an integer register *) |
148 : | leunga | 731 | fun IntReg r = if isMemReg r then I.MemReg r else I.Direct r |
149 : | and RealReg r = if isFMemReg r then I.FDirect r else I.FPR r | ||
150 : | monnier | 411 | |
151 : | george | 545 | (* Add an overflow trap *) |
152 : | fun trap() = | ||
153 : | let val jmp = | ||
154 : | case !trapLabel of | ||
155 : | george | 909 | NONE => let val label = Label.label "trap" () |
156 : | george | 1003 | val jmp = I.jcc{cond=I.O, |
157 : | leunga | 775 | opnd=I.ImmedLabel(T.LABEL label)} |
158 : | george | 545 | in trapLabel := SOME(jmp, label); jmp end |
159 : | | SOME(jmp, _) => jmp | ||
160 : | george | 1003 | in emitInstruction jmp end |
161 : | monnier | 411 | |
162 : | george | 545 | val newReg = C.newReg |
163 : | val newFreg = C.newFreg | ||
164 : | monnier | 247 | |
165 : | leunga | 731 | fun fsize 32 = I.FP32 |
166 : | | fsize 64 = I.FP64 | ||
167 : | | fsize 80 = I.FP80 | ||
168 : | | fsize _ = error "fsize" | ||
169 : | |||
170 : | george | 545 | (* mark an expression with a list of annotations *) |
171 : | george | 1009 | fun mark'(i,[]) = emitInstruction(i) |
172 : | george | 545 | | mark'(i,a::an) = mark'(I.ANNOTATION{i=i,a=a},an) |
173 : | monnier | 247 | |
174 : | george | 545 | (* annotate an expression and emit it *) |
175 : | george | 1009 | fun mark(i,an) = mark'(I.INSTR i,an) |
176 : | monnier | 247 | |
177 : | george | 1003 | val emits = app emitInstruction |
178 : | leunga | 731 | |
179 : | george | 545 | (* emit parallel copies for integers |
180 : | * Translates parallel copies that involve memregs into | ||
181 : | * individual copies. | ||
182 : | *) | ||
183 : | fun copy([], [], an) = () | ||
184 : | | copy(dst, src, an) = | ||
185 : | let fun mvInstr{dst as I.MemReg rd, src as I.MemReg rs} = | ||
186 : | george | 889 | if CB.sameColor(rd,rs) then [] else |
187 : | george | 545 | let val tmpR = I.Direct(newReg()) |
188 : | george | 1003 | in [I.move{mvOp=I.MOVL, src=src, dst=tmpR}, |
189 : | I.move{mvOp=I.MOVL, src=tmpR, dst=dst}] | ||
190 : | george | 545 | end |
191 : | | mvInstr{dst=I.Direct rd, src=I.Direct rs} = | ||
192 : | george | 889 | if CB.sameColor(rd,rs) then [] |
193 : | george | 1009 | else [I.COPY{k=CB.GP, sz=32, dst=[rd], src=[rs], tmp=NONE}] |
194 : | george | 1003 | | mvInstr{dst, src} = [I.move{mvOp=I.MOVL, src=src, dst=dst}] |
195 : | george | 545 | in |
196 : | leunga | 731 | emits (Shuffle.shuffle{mvInstr=mvInstr, ea=IntReg} |
197 : | leunga | 744 | {tmp=SOME(I.Direct(newReg())), |
198 : | george | 545 | dst=dst, src=src}) |
199 : | end | ||
200 : | |||
201 : | (* conversions *) | ||
202 : | val itow = Word.fromInt | ||
203 : | val wtoi = Word.toInt | ||
204 : | george | 761 | fun toInt32 i = T.I.toInt32(32, i) |
205 : | george | 545 | val w32toi32 = Word32.toLargeIntX |
206 : | val i32tow32 = Word32.fromLargeInt | ||
207 : | monnier | 247 | |
208 : | george | 545 | (* One day, this is going to bite us when precision(LargeInt)>32 *) |
209 : | fun wToInt32 w = Int32.fromLarge(Word32.toLargeIntX w) | ||
210 : | monnier | 247 | |
211 : | george | 545 | (* some useful registers *) |
212 : | val eax = I.Direct(C.eax) | ||
213 : | val ecx = I.Direct(C.ecx) | ||
214 : | val edx = I.Direct(C.edx) | ||
215 : | monnier | 247 | |
216 : | leunga | 775 | fun immedLabel lab = I.ImmedLabel(T.LABEL lab) |
217 : | george | 545 | |
218 : | (* Is the expression zero? *) | ||
219 : | george | 761 | fun isZero(T.LI z) = T.I.isZero z |
220 : | george | 545 | | isZero(T.MARK(e,a)) = isZero e |
221 : | | isZero _ = false | ||
222 : | (* Does the expression set the zero bit? | ||
223 : | * WARNING: we assume these things are not optimized out! | ||
224 : | *) | ||
225 : | fun setZeroBit(T.ANDB _) = true | ||
226 : | | setZeroBit(T.ORB _) = true | ||
227 : | | setZeroBit(T.XORB _) = true | ||
228 : | | setZeroBit(T.SRA _) = true | ||
229 : | | setZeroBit(T.SRL _) = true | ||
230 : | | setZeroBit(T.SLL _) = true | ||
231 : | leunga | 695 | | setZeroBit(T.SUB _) = true |
232 : | | setZeroBit(T.ADDT _) = true | ||
233 : | | setZeroBit(T.SUBT _) = true | ||
234 : | george | 545 | | setZeroBit(T.MARK(e, _)) = setZeroBit e |
235 : | | setZeroBit _ = false | ||
236 : | monnier | 247 | |
237 : | leunga | 695 | fun setZeroBit2(T.ANDB _) = true |
238 : | | setZeroBit2(T.ORB _) = true | ||
239 : | | setZeroBit2(T.XORB _) = true | ||
240 : | | setZeroBit2(T.SRA _) = true | ||
241 : | | setZeroBit2(T.SRL _) = true | ||
242 : | | setZeroBit2(T.SLL _) = true | ||
243 : | | setZeroBit2(T.ADD(32, _, _)) = true (* can't use leal! *) | ||
244 : | | setZeroBit2(T.SUB _) = true | ||
245 : | | setZeroBit2(T.ADDT _) = true | ||
246 : | | setZeroBit2(T.SUBT _) = true | ||
247 : | | setZeroBit2(T.MARK(e, _)) = setZeroBit2 e | ||
248 : | | setZeroBit2 _ = false | ||
249 : | |||
250 : | leunga | 731 | (* emit parallel copies for floating point |
251 : | * Normal version. | ||
252 : | *) | ||
253 : | fun fcopy'(fty, [], [], _) = () | ||
254 : | | fcopy'(fty, dst as [_], src as [_], an) = | ||
255 : | george | 1009 | mark'(I.COPY{k=CB.FP, sz=fty, dst=dst,src=src,tmp=NONE}, an) |
256 : | leunga | 731 | | fcopy'(fty, dst, src, an) = |
257 : | george | 1009 | mark'(I.COPY{k=CB.FP, sz=fty, dst=dst,src=src,tmp=SOME(I.FDirect(newFreg()))}, an) |
258 : | monnier | 247 | |
259 : | leunga | 731 | (* emit parallel copies for floating point. |
260 : | * Fast version. | ||
261 : | * Translates parallel copies that involve memregs into | ||
262 : | * individual copies. | ||
263 : | *) | ||
264 : | |||
265 : | fun fcopy''(fty, [], [], _) = () | ||
266 : | | fcopy''(fty, dst, src, an) = | ||
267 : | if true orelse isAnyFMemReg dst orelse isAnyFMemReg src then | ||
268 : | let val fsize = fsize fty | ||
269 : | george | 1003 | fun mvInstr{dst, src} = [I.fmove{fsize=fsize, src=src, dst=dst}] |
270 : | leunga | 731 | in |
271 : | emits (Shuffle.shuffle{mvInstr=mvInstr, ea=RealReg} | ||
272 : | leunga | 744 | {tmp=case dst of |
273 : | leunga | 731 | [_] => NONE |
274 : | | _ => SOME(I.FPR(newReg())), | ||
275 : | dst=dst, src=src}) | ||
276 : | end | ||
277 : | else | ||
278 : | george | 1009 | mark'(I.COPY{k=CB.FP, sz=fty, dst=dst, |
279 : | src=src,tmp= | ||
280 : | leunga | 731 | case dst of |
281 : | [_] => NONE | ||
282 : | | _ => SOME(I.FPR(newFreg()))}, an) | ||
283 : | |||
284 : | fun fcopy x = if enableFastFPMode andalso !fast_floating_point | ||
285 : | then fcopy'' x else fcopy' x | ||
286 : | |||
287 : | george | 545 | (* Translates MLTREE condition code to x86 condition code *) |
288 : | fun cond T.LT = I.LT | cond T.LTU = I.B | ||
289 : | | cond T.LE = I.LE | cond T.LEU = I.BE | ||
290 : | | cond T.EQ = I.EQ | cond T.NE = I.NE | ||
291 : | | cond T.GE = I.GE | cond T.GEU = I.AE | ||
292 : | | cond T.GT = I.GT | cond T.GTU = I.A | ||
293 : | monnier | 247 | |
294 : | leunga | 815 | fun zero dst = emit(I.BINARY{binOp=I.XORL, src=dst, dst=dst}) |
295 : | |||
296 : | george | 545 | (* Move and annotate *) |
297 : | fun move'(src as I.Direct s, dst as I.Direct d, an) = | ||
298 : | george | 889 | if CB.sameColor(s,d) then () |
299 : | george | 1009 | else mark'(I.COPY{k=CB.GP, sz=32, dst=[d], src=[s], tmp=NONE}, an) |
300 : | leunga | 815 | | move'(I.Immed 0, dst as I.Direct d, an) = |
301 : | mark(I.BINARY{binOp=I.XORL, src=dst, dst=dst}, an) | ||
302 : | george | 545 | | move'(src, dst, an) = mark(I.MOVE{mvOp=I.MOVL, src=src, dst=dst}, an) |
303 : | monnier | 247 | |
304 : | george | 545 | (* Move only! *) |
305 : | fun move(src, dst) = move'(src, dst, []) | ||
306 : | monnier | 247 | |
307 : | george | 545 | val readonly = I.Region.readonly |
308 : | monnier | 247 | |
309 : | george | 545 | (* |
310 : | george | 761 | * Compute an effective address. |
311 : | george | 545 | *) |
312 : | george | 761 | fun address(ea, mem) = let |
313 : | george | 545 | (* Keep building a bigger and bigger effective address expressions |
314 : | * The input is a list of trees | ||
315 : | * b -- base | ||
316 : | * i -- index | ||
317 : | * s -- scale | ||
318 : | * d -- immed displacement | ||
319 : | *) | ||
320 : | fun doEA([], b, i, s, d) = makeAddressingMode(b, i, s, d) | ||
321 : | | doEA(t::trees, b, i, s, d) = | ||
322 : | (case t of | ||
323 : | george | 761 | T.LI n => doEAImmed(trees, toInt32 n, b, i, s, d) |
324 : | leunga | 775 | | T.CONST _ => doEALabel(trees, t, b, i, s, d) |
325 : | | T.LABEL _ => doEALabel(trees, t, b, i, s, d) | ||
326 : | | T.LABEXP le => doEALabel(trees, le, b, i, s, d) | ||
327 : | george | 545 | | T.ADD(32, t1, t2 as T.REG(_,r)) => |
328 : | if isMemReg r then doEA(t2::t1::trees, b, i, s, d) | ||
329 : | else doEA(t1::t2::trees, b, i, s, d) | ||
330 : | | T.ADD(32, t1, t2) => doEA(t1::t2::trees, b, i, s, d) | ||
331 : | | T.SUB(32, t1, T.LI n) => | ||
332 : | george | 761 | doEA(t1::T.LI(T.I.NEG(32,n))::trees, b, i, s, d) |
333 : | | T.SLL(32, t1, T.LI n) => let | ||
334 : | val n = T.I.toInt(32, n) | ||
335 : | in | ||
336 : | case n | ||
337 : | of 0 => displace(trees, t1, b, i, s, d) | ||
338 : | | 1 => indexed(trees, t1, t, 1, b, i, s, d) | ||
339 : | | 2 => indexed(trees, t1, t, 2, b, i, s, d) | ||
340 : | | 3 => indexed(trees, t1, t, 3, b, i, s, d) | ||
341 : | | _ => displace(trees, t, b, i, s, d) | ||
342 : | end | ||
343 : | george | 545 | | t => displace(trees, t, b, i, s, d) |
344 : | ) | ||
345 : | monnier | 247 | |
346 : | george | 545 | (* Add an immed constant *) |
347 : | and doEAImmed(trees, 0, b, i, s, d) = doEA(trees, b, i, s, d) | ||
348 : | | doEAImmed(trees, n, b, i, s, I.Immed m) = | ||
349 : | george | 761 | doEA(trees, b, i, s, I.Immed(n+m)) |
350 : | george | 545 | | doEAImmed(trees, n, b, i, s, I.ImmedLabel le) = |
351 : | leunga | 775 | doEA(trees, b, i, s, |
352 : | I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, n))))) | ||
353 : | george | 545 | | doEAImmed(trees, n, b, i, s, _) = error "doEAImmed" |
354 : | monnier | 247 | |
355 : | george | 545 | (* Add a label expression *) |
356 : | and doEALabel(trees, le, b, i, s, I.Immed 0) = | ||
357 : | doEA(trees, b, i, s, I.ImmedLabel le) | ||
358 : | | doEALabel(trees, le, b, i, s, I.Immed m) = | ||
359 : | doEA(trees, b, i, s, | ||
360 : | leunga | 775 | I.ImmedLabel(T.ADD(32,le,T.LI(T.I.fromInt32(32, m)))) |
361 : | george | 545 | handle Overflow => error "doEALabel: constant too large") |
362 : | | doEALabel(trees, le, b, i, s, I.ImmedLabel le') = | ||
363 : | leunga | 775 | doEA(trees, b, i, s, I.ImmedLabel(T.ADD(32,le,le'))) |
364 : | george | 545 | | doEALabel(trees, le, b, i, s, _) = error "doEALabel" |
365 : | monnier | 247 | |
366 : | george | 545 | and makeAddressingMode(NONE, NONE, _, disp) = disp |
367 : | | makeAddressingMode(SOME base, NONE, _, disp) = | ||
368 : | I.Displace{base=base, disp=disp, mem=mem} | ||
369 : | | makeAddressingMode(base, SOME index, scale, disp) = | ||
370 : | george | 761 | I.Indexed{base=base, index=index, scale=scale, |
371 : | george | 545 | disp=disp, mem=mem} |
372 : | monnier | 247 | |
373 : | george | 545 | (* generate code for tree and ensure that it is not in %esp *) |
374 : | and exprNotEsp tree = | ||
375 : | let val r = expr tree | ||
376 : | george | 889 | in if CB.sameColor(r, C.esp) then |
377 : | george | 545 | let val tmp = newReg() |
378 : | in move(I.Direct r, I.Direct tmp); tmp end | ||
379 : | else r | ||
380 : | end | ||
381 : | monnier | 247 | |
382 : | george | 545 | (* Add a base register *) |
383 : | and displace(trees, t, NONE, i, s, d) = (* no base yet *) | ||
384 : | doEA(trees, SOME(expr t), i, s, d) | ||
385 : | | displace(trees, t, b as SOME base, NONE, _, d) = (* no index *) | ||
386 : | (* make t the index, but make sure that it is not %esp! *) | ||
387 : | let val i = expr t | ||
388 : | george | 889 | in if CB.sameColor(i, C.esp) then |
389 : | george | 545 | (* swap base and index *) |
390 : | george | 889 | if CB.sameColor(base, C.esp) then |
391 : | george | 545 | doEA(trees, SOME i, b, 0, d) |
392 : | else (* base and index = %esp! *) | ||
393 : | let val index = newReg() | ||
394 : | in move(I.Direct i, I.Direct index); | ||
395 : | doEA(trees, b, SOME index, 0, d) | ||
396 : | end | ||
397 : | else | ||
398 : | doEA(trees, b, SOME i, 0, d) | ||
399 : | end | ||
400 : | | displace(trees, t, SOME base, i, s, d) = (* base and index *) | ||
401 : | let val b = expr(T.ADD(32,T.REG(32,base),t)) | ||
402 : | in doEA(trees, SOME b, i, s, d) end | ||
403 : | monnier | 247 | |
404 : | george | 545 | (* Add an indexed register *) |
405 : | and indexed(trees, t, t0, scale, b, NONE, _, d) = (* no index yet *) | ||
406 : | doEA(trees, b, SOME(exprNotEsp t), scale, d) | ||
407 : | | indexed(trees, _, t0, _, NONE, i, s, d) = (* no base *) | ||
408 : | doEA(trees, SOME(expr t0), i, s, d) | ||
409 : | | indexed(trees, _, t0, _, SOME base, i, s, d) = (*base and index*) | ||
410 : | let val b = expr(T.ADD(32, t0, T.REG(32, base))) | ||
411 : | in doEA(trees, SOME b, i, s, d) end | ||
412 : | |||
413 : | in case doEA([ea], NONE, NONE, 0, I.Immed 0) of | ||
414 : | I.Immed _ => raise EA | ||
415 : | | I.ImmedLabel le => I.LabelEA le | ||
416 : | | ea => ea | ||
417 : | end (* address *) | ||
418 : | monnier | 247 | |
419 : | george | 545 | (* reduce an expression into an operand *) |
420 : | george | 761 | and operand(T.LI i) = I.Immed(toInt32(i)) |
421 : | leunga | 775 | | operand(x as (T.CONST _ | T.LABEL _)) = I.ImmedLabel x |
422 : | | operand(T.LABEXP le) = I.ImmedLabel le | ||
423 : | george | 545 | | operand(T.REG(_,r)) = IntReg r |
424 : | | operand(T.LOAD(32,ea,mem)) = address(ea, mem) | ||
425 : | | operand(t) = I.Direct(expr t) | ||
426 : | monnier | 247 | |
427 : | george | 545 | and moveToReg(opnd) = |
428 : | let val dst = I.Direct(newReg()) | ||
429 : | in move(opnd, dst); dst | ||
430 : | end | ||
431 : | monnier | 247 | |
432 : | george | 545 | and reduceOpnd(I.Direct r) = r |
433 : | | reduceOpnd opnd = | ||
434 : | let val dst = newReg() | ||
435 : | in move(opnd, I.Direct dst); dst | ||
436 : | end | ||
437 : | monnier | 247 | |
438 : | george | 545 | (* ensure that the operand is either an immed or register *) |
439 : | and immedOrReg(opnd as I.Displace _) = moveToReg opnd | ||
440 : | | immedOrReg(opnd as I.Indexed _) = moveToReg opnd | ||
441 : | | immedOrReg(opnd as I.MemReg _) = moveToReg opnd | ||
442 : | | immedOrReg(opnd as I.LabelEA _) = moveToReg opnd | ||
443 : | | immedOrReg opnd = opnd | ||
444 : | monnier | 247 | |
445 : | george | 545 | and isImmediate(I.Immed _) = true |
446 : | | isImmediate(I.ImmedLabel _) = true | ||
447 : | | isImmediate _ = false | ||
448 : | monnier | 247 | |
449 : | george | 545 | and regOrMem opnd = if isImmediate opnd then moveToReg opnd else opnd |
450 : | |||
451 : | and isMemOpnd opnd = | ||
452 : | (case opnd of | ||
453 : | I.Displace _ => true | ||
454 : | | I.Indexed _ => true | ||
455 : | | I.MemReg _ => true | ||
456 : | | I.LabelEA _ => true | ||
457 : | george | 555 | | I.FDirect f => true |
458 : | george | 545 | | _ => false |
459 : | ) | ||
460 : | |||
461 : | (* | ||
462 : | * Compute an integer expression and put the result in | ||
463 : | * the destination register rd. | ||
464 : | *) | ||
465 : | george | 889 | and doExpr(exp, rd : CB.cell, an) = |
466 : | george | 545 | let val rdOpnd = IntReg rd |
467 : | monnier | 247 | |
468 : | george | 889 | fun equalRd(I.Direct r) = CB.sameColor(r, rd) |
469 : | | equalRd(I.MemReg r) = CB.sameColor(r, rd) | ||
470 : | george | 545 | | equalRd _ = false |
471 : | monnier | 247 | |
472 : | george | 545 | (* Emit a binary operator. If the destination is |
473 : | * a memReg, do something smarter. | ||
474 : | *) | ||
475 : | fun genBinary(binOp, opnd1, opnd2) = | ||
476 : | if isMemReg rd andalso | ||
477 : | (isMemOpnd opnd1 orelse isMemOpnd opnd2) orelse | ||
478 : | equalRd(opnd2) | ||
479 : | then | ||
480 : | let val tmpR = newReg() | ||
481 : | val tmp = I.Direct tmpR | ||
482 : | in move(opnd1, tmp); | ||
483 : | mark(I.BINARY{binOp=binOp, src=opnd2, dst=tmp}, an); | ||
484 : | move(tmp, rdOpnd) | ||
485 : | end | ||
486 : | else | ||
487 : | (move(opnd1, rdOpnd); | ||
488 : | mark(I.BINARY{binOp=binOp, src=opnd2, dst=rdOpnd}, an) | ||
489 : | ) | ||
490 : | monnier | 247 | |
491 : | george | 545 | (* Generate a binary operator; it may commute *) |
492 : | fun binaryComm(binOp, e1, e2) = | ||
493 : | let val (opnd1, opnd2) = | ||
494 : | case (operand e1, operand e2) of | ||
495 : | (x as I.Immed _, y) => (y, x) | ||
496 : | | (x as I.ImmedLabel _, y) => (y, x) | ||
497 : | | (x, y as I.Direct _) => (y, x) | ||
498 : | | (x, y) => (x, y) | ||
499 : | in genBinary(binOp, opnd1, opnd2) | ||
500 : | end | ||
501 : | |||
502 : | (* Generate a binary operator; non-commutative *) | ||
503 : | fun binary(binOp, e1, e2) = | ||
504 : | genBinary(binOp, operand e1, operand e2) | ||
505 : | |||
506 : | (* Generate a unary operator *) | ||
507 : | fun unary(unOp, e) = | ||
508 : | let val opnd = operand e | ||
509 : | in if isMemReg rd andalso isMemOpnd opnd then | ||
510 : | let val tmp = I.Direct(newReg()) | ||
511 : | in move(opnd, tmp); move(tmp, rdOpnd) | ||
512 : | end | ||
513 : | else move(opnd, rdOpnd); | ||
514 : | mark(I.UNARY{unOp=unOp, opnd=rdOpnd}, an) | ||
515 : | end | ||
516 : | |||
517 : | (* Generate shifts; the shift | ||
518 : | * amount must be a constant or in %ecx *) | ||
519 : | fun shift(opcode, e1, e2) = | ||
520 : | let val (opnd1, opnd2) = (operand e1, operand e2) | ||
521 : | in case opnd2 of | ||
522 : | I.Immed _ => genBinary(opcode, opnd1, opnd2) | ||
523 : | | _ => | ||
524 : | if equalRd(opnd2) then | ||
525 : | let val tmpR = newReg() | ||
526 : | val tmp = I.Direct tmpR | ||
527 : | in move(opnd1, tmp); | ||
528 : | move(opnd2, ecx); | ||
529 : | mark(I.BINARY{binOp=opcode, src=ecx, dst=tmp},an); | ||
530 : | move(tmp, rdOpnd) | ||
531 : | end | ||
532 : | else | ||
533 : | (move(opnd1, rdOpnd); | ||
534 : | move(opnd2, ecx); | ||
535 : | mark(I.BINARY{binOp=opcode, src=ecx, dst=rdOpnd},an) | ||
536 : | ) | ||
537 : | end | ||
538 : | |||
539 : | (* Division or remainder: divisor must be in %edx:%eax pair *) | ||
540 : | fun divrem(signed, overflow, e1, e2, resultReg) = | ||
541 : | let val (opnd1, opnd2) = (operand e1, operand e2) | ||
542 : | val _ = move(opnd1, eax) | ||
543 : | leunga | 815 | val oper = if signed then (emit(I.CDQ); I.IDIVL1) |
544 : | else (zero edx; I.DIVL1) | ||
545 : | george | 545 | in mark(I.MULTDIV{multDivOp=oper, src=regOrMem opnd2},an); |
546 : | move(resultReg, rdOpnd); | ||
547 : | if overflow then trap() else () | ||
548 : | end | ||
549 : | |||
550 : | (* Optimize the special case for division *) | ||
551 : | george | 761 | fun divide(signed, overflow, e1, e2 as T.LI n') = let |
552 : | val n = toInt32 n' | ||
553 : | val w = T.I.toWord32(32, n') | ||
554 : | fun isPowerOf2 w = W32.andb((w - 0w1), w) = 0w0 | ||
555 : | george | 545 | fun log2 n = (* n must be > 0!!! *) |
556 : | let fun loop(0w1,pow) = pow | ||
557 : | george | 761 | | loop(w,pow) = loop(W32.>>(w, 0w1),pow+1) |
558 : | george | 545 | in loop(n,0) end |
559 : | in if n > 1 andalso isPowerOf2 w then | ||
560 : | george | 761 | let val pow = T.LI(T.I.fromInt(32,log2 w)) |
561 : | george | 545 | in if signed then |
562 : | (* signed; simulate round towards zero *) | ||
563 : | george | 909 | let val label = Label.anon() |
564 : | george | 545 | val reg1 = expr e1 |
565 : | val opnd1 = I.Direct reg1 | ||
566 : | in if setZeroBit e1 then () | ||
567 : | else emit(I.CMPL{lsrc=opnd1, rsrc=I.Immed 0}); | ||
568 : | emit(I.JCC{cond=I.GE, opnd=immedLabel label}); | ||
569 : | emit(if n = 2 then | ||
570 : | I.UNARY{unOp=I.INCL, opnd=opnd1} | ||
571 : | else | ||
572 : | I.BINARY{binOp=I.ADDL, | ||
573 : | george | 761 | src=I.Immed(n - 1), |
574 : | george | 545 | dst=opnd1}); |
575 : | defineLabel label; | ||
576 : | shift(I.SARL, T.REG(32, reg1), pow) | ||
577 : | end | ||
578 : | else (* unsigned *) | ||
579 : | shift(I.SHRL, e1, pow) | ||
580 : | end | ||
581 : | else | ||
582 : | (* note the only way we can overflow is if | ||
583 : | * n = 0 or n = -1 | ||
584 : | *) | ||
585 : | divrem(signed, overflow andalso (n = ~1 orelse n = 0), | ||
586 : | e1, e2, eax) | ||
587 : | end | ||
588 : | | divide(signed, overflow, e1, e2) = | ||
589 : | divrem(signed, overflow, e1, e2, eax) | ||
590 : | monnier | 247 | |
591 : | george | 545 | fun rem(signed, overflow, e1, e2) = |
592 : | divrem(signed, overflow, e1, e2, edx) | ||
593 : | leunga | 815 | |
594 : | (* Makes sure the destination must be a register *) | ||
595 : | fun dstMustBeReg f = | ||
596 : | if isMemReg rd then | ||
597 : | let val tmpR = newReg() | ||
598 : | val tmp = I.Direct(tmpR) | ||
599 : | in f(tmpR, tmp); move(tmp, rdOpnd) end | ||
600 : | else f(rd, rdOpnd) | ||
601 : | |||
602 : | george | 545 | (* unsigned integer multiplication *) |
603 : | fun uMultiply(e1, e2) = | ||
604 : | (* note e2 can never be (I.Direct edx) *) | ||
605 : | (move(operand e1, eax); | ||
606 : | leunga | 815 | mark(I.MULTDIV{multDivOp=I.MULL1, |
607 : | george | 545 | src=regOrMem(operand e2)},an); |
608 : | move(eax, rdOpnd) | ||
609 : | ) | ||
610 : | |||
611 : | (* signed integer multiplication: | ||
612 : | * The only forms that are allowed that also sets the | ||
613 : | * OF and CF flags are: | ||
614 : | * | ||
615 : | leunga | 815 | * (dst) (src1) (src2) |
616 : | george | 545 | * imul r32, r32/m32, imm8 |
617 : | leunga | 815 | * (dst) (src) |
618 : | george | 545 | * imul r32, imm8 |
619 : | * imul r32, imm32 | ||
620 : | leunga | 815 | * imul r32, r32/m32 |
621 : | * Note: destination must be a register! | ||
622 : | george | 545 | *) |
623 : | fun multiply(e1, e2) = | ||
624 : | leunga | 815 | dstMustBeReg(fn (rd, rdOpnd) => |
625 : | let fun doit(i1 as I.Immed _, i2 as I.Immed _) = | ||
626 : | (move(i1, rdOpnd); | ||
627 : | mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=i2},an)) | ||
628 : | | doit(rm, i2 as I.Immed _) = doit(i2, rm) | ||
629 : | | doit(imm as I.Immed(i), rm) = | ||
630 : | mark(I.MUL3{dst=rd, src1=rm, src2=i},an) | ||
631 : | | doit(r1 as I.Direct _, r2 as I.Direct _) = | ||
632 : | (move(r1, rdOpnd); | ||
633 : | mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=r2},an)) | ||
634 : | | doit(r1 as I.Direct _, rm) = | ||
635 : | (move(r1, rdOpnd); | ||
636 : | mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=rm},an)) | ||
637 : | | doit(rm, r as I.Direct _) = doit(r, rm) | ||
638 : | | doit(rm1, rm2) = | ||
639 : | george | 545 | if equalRd rm2 then |
640 : | let val tmpR = newReg() | ||
641 : | val tmp = I.Direct tmpR | ||
642 : | in move(rm1, tmp); | ||
643 : | leunga | 815 | mark(I.BINARY{binOp=I.IMULL, dst=tmp, src=rm2},an); |
644 : | move(tmp, rdOpnd) | ||
645 : | george | 545 | end |
646 : | else | ||
647 : | leunga | 815 | (move(rm1, rdOpnd); |
648 : | mark(I.BINARY{binOp=I.IMULL, dst=rdOpnd, src=rm2},an) | ||
649 : | george | 545 | ) |
650 : | val (opnd1, opnd2) = (operand e1, operand e2) | ||
651 : | leunga | 815 | in doit(opnd1, opnd2) |
652 : | george | 545 | end |
653 : | leunga | 815 | ) |
654 : | monnier | 247 | |
655 : | george | 545 | (* Emit a load instruction; makes sure that the destination |
656 : | * is a register | ||
657 : | *) | ||
658 : | fun genLoad(mvOp, ea, mem) = | ||
659 : | dstMustBeReg(fn (_, dst) => | ||
660 : | mark(I.MOVE{mvOp=mvOp, src=address(ea, mem), dst=dst},an)) | ||
661 : | |||
662 : | (* Generate a zero extended loads *) | ||
663 : | fun load8(ea, mem) = genLoad(I.MOVZBL, ea, mem) | ||
664 : | fun load16(ea, mem) = genLoad(I.MOVZWL, ea, mem) | ||
665 : | fun load8s(ea, mem) = genLoad(I.MOVSBL, ea, mem) | ||
666 : | fun load16s(ea, mem) = genLoad(I.MOVSWL, ea, mem) | ||
667 : | fun load32(ea, mem) = genLoad(I.MOVL, ea, mem) | ||
668 : | |||
669 : | (* Generate a sign extended loads *) | ||
670 : | |||
671 : | (* Generate setcc instruction: | ||
672 : | * semantics: MV(rd, COND(_, T.CMP(ty, cc, t1, t2), yes, no)) | ||
673 : | leunga | 583 | * Bug, if eax is either t1 or t2 then problem will occur!!! |
674 : | * Note that we have to use eax as the destination of the | ||
675 : | * setcc because it only works on the registers | ||
676 : | * %al, %bl, %cl, %dl and %[abcd]h. The last four registers | ||
677 : | * are inaccessible in 32 bit mode. | ||
678 : | george | 545 | *) |
679 : | fun setcc(ty, cc, t1, t2, yes, no) = | ||
680 : | leunga | 583 | let val (cc, yes, no) = |
681 : | if yes > no then (cc, yes, no) | ||
682 : | else (T.Basis.negateCond cc, no, yes) | ||
683 : | george | 545 | in (* Clear the destination first. |
684 : | * This this because stupid SETcc | ||
685 : | * only writes to the low order | ||
686 : | * byte. That's Intel architecture, folks. | ||
687 : | *) | ||
688 : | leunga | 695 | case (yes, no, cc) of |
689 : | (1, 0, T.LT) => | ||
690 : | let val tmp = I.Direct(expr(T.SUB(32,t1,t2))) | ||
691 : | in move(tmp, rdOpnd); | ||
692 : | emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd}) | ||
693 : | end | ||
694 : | | (1, 0, T.GT) => | ||
695 : | let val tmp = I.Direct(expr(T.SUB(32,t1,t2))) | ||
696 : | in emit(I.UNARY{unOp=I.NOTL,opnd=tmp}); | ||
697 : | move(tmp, rdOpnd); | ||
698 : | emit(I.BINARY{binOp=I.SHRL,src=I.Immed 31,dst=rdOpnd}) | ||
699 : | end | ||
700 : | | (1, 0, _) => (* normal case *) | ||
701 : | george | 545 | let val cc = cmp(true, ty, cc, t1, t2, []) |
702 : | leunga | 583 | in mark(I.SET{cond=cond cc, opnd=eax}, an); |
703 : | leunga | 695 | emit(I.BINARY{binOp=I.ANDL,src=I.Immed 255, dst=eax}); |
704 : | leunga | 583 | move(eax, rdOpnd) |
705 : | end | ||
706 : | leunga | 695 | | (C1, C2, _) => |
707 : | george | 545 | (* general case; |
708 : | leunga | 583 | * from the Intel optimization guide p3-5 |
709 : | *) | ||
710 : | leunga | 695 | let val _ = zero eax; |
711 : | val cc = cmp(true, ty, cc, t1, t2, []) | ||
712 : | leunga | 583 | in case C1-C2 of |
713 : | D as (1 | 2 | 3 | 4 | 5 | 8 | 9) => | ||
714 : | let val (base,scale) = | ||
715 : | case D of | ||
716 : | 1 => (NONE, 0) | ||
717 : | | 2 => (NONE, 1) | ||
718 : | | 3 => (SOME C.eax, 1) | ||
719 : | | 4 => (NONE, 2) | ||
720 : | | 5 => (SOME C.eax, 2) | ||
721 : | | 8 => (NONE, 3) | ||
722 : | | 9 => (SOME C.eax, 3) | ||
723 : | val addr = I.Indexed{base=base, | ||
724 : | index=C.eax, | ||
725 : | scale=scale, | ||
726 : | disp=I.Immed C2, | ||
727 : | george | 545 | mem=readonly} |
728 : | leunga | 583 | val tmpR = newReg() |
729 : | val tmp = I.Direct tmpR | ||
730 : | in emit(I.SET{cond=cond cc, opnd=eax}); | ||
731 : | mark(I.LEA{r32=tmpR, addr=addr}, an); | ||
732 : | move(tmp, rdOpnd) | ||
733 : | end | ||
734 : | | D => | ||
735 : | (emit(I.SET{cond=cond(T.Basis.negateCond cc), | ||
736 : | opnd=eax}); | ||
737 : | emit(I.UNARY{unOp=I.DECL, opnd=eax}); | ||
738 : | emit(I.BINARY{binOp=I.ANDL, | ||
739 : | src=I.Immed D, dst=eax}); | ||
740 : | if C2 = 0 then | ||
741 : | move(eax, rdOpnd) | ||
742 : | else | ||
743 : | let val tmpR = newReg() | ||
744 : | val tmp = I.Direct tmpR | ||
745 : | in mark(I.LEA{addr= | ||
746 : | I.Displace{ | ||
747 : | base=C.eax, | ||
748 : | disp=I.Immed C2, | ||
749 : | mem=readonly}, | ||
750 : | r32=tmpR}, an); | ||
751 : | move(tmp, rdOpnd) | ||
752 : | end | ||
753 : | ) | ||
754 : | end | ||
755 : | george | 545 | end (* setcc *) |
756 : | |||
757 : | (* Generate cmovcc instruction. | ||
758 : | * on Pentium Pro and Pentium II only | ||
759 : | *) | ||
760 : | fun cmovcc(ty, cc, t1, t2, yes, no) = | ||
761 : | let fun genCmov(dstR, _) = | ||
762 : | let val _ = doExpr(no, dstR, []) (* false branch *) | ||
763 : | val cc = cmp(true, ty, cc, t1, t2, []) (* compare *) | ||
764 : | in mark(I.CMOV{cond=cond cc, src=operand yes, dst=dstR}, an) | ||
765 : | end | ||
766 : | in dstMustBeReg genCmov | ||
767 : | end | ||
768 : | |||
769 : | fun unknownExp exp = doExpr(Gen.compileRexp exp, rd, an) | ||
770 : | monnier | 247 | |
771 : | leunga | 606 | (* Add n to rd *) |
772 : | fun addN n = | ||
773 : | let val n = operand n | ||
774 : | val src = if isMemReg rd then immedOrReg n else n | ||
775 : | in mark(I.BINARY{binOp=I.ADDL, src=src, dst=rdOpnd}, an) end | ||
776 : | |||
777 : | george | 545 | (* Generate addition *) |
778 : | fun addition(e1, e2) = | ||
779 : | leunga | 606 | case e1 of |
780 : | george | 889 | T.REG(_,rs) => if CB.sameColor(rs,rd) then addN e2 |
781 : | leunga | 744 | else addition1(e1,e2) |
782 : | leunga | 606 | | _ => addition1(e1,e2) |
783 : | and addition1(e1, e2) = | ||
784 : | case e2 of | ||
785 : | george | 889 | T.REG(_,rs) => if CB.sameColor(rs,rd) then addN e1 |
786 : | leunga | 744 | else addition2(e1,e2) |
787 : | leunga | 606 | | _ => addition2(e1,e2) |
788 : | and addition2(e1,e2) = | ||
789 : | george | 545 | (dstMustBeReg(fn (dstR, _) => |
790 : | mark(I.LEA{r32=dstR, addr=address(exp, readonly)}, an)) | ||
791 : | handle EA => binaryComm(I.ADDL, e1, e2)) | ||
792 : | monnier | 247 | |
793 : | |||
794 : | george | 545 | in case exp of |
795 : | T.REG(_,rs) => | ||
796 : | if isMemReg rs andalso isMemReg rd then | ||
797 : | let val tmp = I.Direct(newReg()) | ||
798 : | leunga | 731 | in move'(I.MemReg rs, tmp, an); |
799 : | george | 545 | move'(tmp, rdOpnd, []) |
800 : | end | ||
801 : | else move'(IntReg rs, rdOpnd, an) | ||
802 : | george | 761 | | T.LI z => let |
803 : | val n = toInt32 z | ||
804 : | in | ||
805 : | if n=0 then | ||
806 : | (* As per Fermin's request, special optimization for rd := 0. | ||
807 : | * Currently we don't bother with the size. | ||
808 : | *) | ||
809 : | if isMemReg rd then move'(I.Immed 0, rdOpnd, an) | ||
810 : | else mark(I.BINARY{binOp=I.XORL, src=rdOpnd, dst=rdOpnd}, an) | ||
811 : | else | ||
812 : | move'(I.Immed(n), rdOpnd, an) | ||
813 : | end | ||
814 : | leunga | 775 | | (T.CONST _ | T.LABEL _) => |
815 : | move'(I.ImmedLabel exp, rdOpnd, an) | ||
816 : | | T.LABEXP le => move'(I.ImmedLabel le, rdOpnd, an) | ||
817 : | monnier | 247 | |
818 : | george | 545 | (* 32-bit addition *) |
819 : | george | 761 | | T.ADD(32, e1, e2 as T.LI n) => let |
820 : | val n = toInt32 n | ||
821 : | in | ||
822 : | case n | ||
823 : | of 1 => unary(I.INCL, e1) | ||
824 : | | ~1 => unary(I.DECL, e1) | ||
825 : | | _ => addition(e1, e2) | ||
826 : | end | ||
827 : | | T.ADD(32, e1 as T.LI n, e2) => let | ||
828 : | val n = toInt32 n | ||
829 : | in | ||
830 : | case n | ||
831 : | of 1 => unary(I.INCL, e2) | ||
832 : | | ~1 => unary(I.DECL, e2) | ||
833 : | | _ => addition(e1, e2) | ||
834 : | end | ||
835 : | george | 545 | | T.ADD(32, e1, e2) => addition(e1, e2) |
836 : | monnier | 247 | |
837 : | leunga | 695 | (* 32-bit addition but set the flag! |
838 : | * This is a stupid hack for now. | ||
839 : | *) | ||
840 : | george | 761 | | T.ADD(0, e, e1 as T.LI n) => let |
841 : | val n = T.I.toInt(32, n) | ||
842 : | in | ||
843 : | if n=1 then unary(I.INCL, e) | ||
844 : | else if n = ~1 then unary(I.DECL, e) | ||
845 : | else binaryComm(I.ADDL, e, e1) | ||
846 : | end | ||
847 : | | T.ADD(0, e1 as T.LI n, e) => let | ||
848 : | val n = T.I.toInt(32, n) | ||
849 : | in | ||
850 : | if n=1 then unary(I.INCL, e) | ||
851 : | else if n = ~1 then unary(I.DECL, e) | ||
852 : | else binaryComm(I.ADDL, e1, e) | ||
853 : | end | ||
854 : | | T.ADD(0, e1, e2) => binaryComm(I.ADDL, e1, e2) | ||
855 : | |||
856 : | george | 545 | (* 32-bit subtraction *) |
857 : | george | 761 | | T.SUB(32, e1, e2 as T.LI n) => let |
858 : | val n = toInt32 n | ||
859 : | in | ||
860 : | case n | ||
861 : | of 0 => doExpr(e1, rd, an) | ||
862 : | | 1 => unary(I.DECL, e1) | ||
863 : | | ~1 => unary(I.INCL, e1) | ||
864 : | | _ => binary(I.SUBL, e1, e2) | ||
865 : | end | ||
866 : | | T.SUB(32, e1 as T.LI n, e2) => | ||
867 : | if T.I.isZero n then unary(I.NEGL, e2) | ||
868 : | else binary(I.SUBL, e1, e2) | ||
869 : | george | 545 | | T.SUB(32, e1, e2) => binary(I.SUBL, e1, e2) |
870 : | monnier | 247 | |
871 : | george | 545 | | T.MULU(32, x, y) => uMultiply(x, y) |
872 : | | T.DIVU(32, x, y) => divide(false, false, x, y) | ||
873 : | | T.REMU(32, x, y) => rem(false, false, x, y) | ||
874 : | monnier | 247 | |
875 : | george | 545 | | T.MULS(32, x, y) => multiply(x, y) |
876 : | | T.DIVS(32, x, y) => divide(true, false, x, y) | ||
877 : | | T.REMS(32, x, y) => rem(true, false, x, y) | ||
878 : | monnier | 247 | |
879 : | george | 545 | | T.ADDT(32, x, y) => (binaryComm(I.ADDL, x, y); trap()) |
880 : | | T.SUBT(32, x, y) => (binary(I.SUBL, x, y); trap()) | ||
881 : | | T.MULT(32, x, y) => (multiply(x, y); trap()) | ||
882 : | | T.DIVT(32, x, y) => divide(true, true, x, y) | ||
883 : | | T.REMT(32, x, y) => rem(true, true, x, y) | ||
884 : | monnier | 247 | |
885 : | george | 545 | | T.ANDB(32, x, y) => binaryComm(I.ANDL, x, y) |
886 : | | T.ORB(32, x, y) => binaryComm(I.ORL, x, y) | ||
887 : | | T.XORB(32, x, y) => binaryComm(I.XORL, x, y) | ||
888 : | | T.NOTB(32, x) => unary(I.NOTL, x) | ||
889 : | monnier | 247 | |
890 : | george | 545 | | T.SRA(32, x, y) => shift(I.SARL, x, y) |
891 : | | T.SRL(32, x, y) => shift(I.SHRL, x, y) | ||
892 : | | T.SLL(32, x, y) => shift(I.SHLL, x, y) | ||
893 : | monnier | 247 | |
894 : | george | 545 | | T.LOAD(8, ea, mem) => load8(ea, mem) |
895 : | | T.LOAD(16, ea, mem) => load16(ea, mem) | ||
896 : | | T.LOAD(32, ea, mem) => load32(ea, mem) | ||
897 : | monnier | 498 | |
898 : | leunga | 776 | | T.SX(32,8,T.LOAD(8,ea,mem)) => load8s(ea, mem) |
899 : | | T.SX(32,16,T.LOAD(16,ea,mem)) => load16s(ea, mem) | ||
900 : | | T.ZX(32,8,T.LOAD(8,ea,mem)) => load8(ea, mem) | ||
901 : | leunga | 779 | | T.ZX(32,16,T.LOAD(16,ea,mem)) => load16(ea, mem) |
902 : | leunga | 776 | |
903 : | george | 545 | | T.COND(32, T.CMP(ty, cc, t1, t2), T.LI yes, T.LI no) => |
904 : | leunga | 583 | setcc(ty, cc, t1, t2, toInt32 yes, toInt32 no) |
905 : | george | 545 | | T.COND(32, T.CMP(ty, cc, t1, t2), yes, no) => |
906 : | (case !arch of (* PentiumPro and higher has CMOVcc *) | ||
907 : | Pentium => unknownExp exp | ||
908 : | | _ => cmovcc(ty, cc, t1, t2, yes, no) | ||
909 : | ) | ||
910 : | | T.LET(s,e) => (doStmt s; doExpr(e, rd, an)) | ||
911 : | | T.MARK(e, A.MARKREG f) => (f rd; doExpr(e, rd, an)) | ||
912 : | | T.MARK(e, a) => doExpr(e, rd, a::an) | ||
913 : | | T.PRED(e,c) => doExpr(e, rd, A.CTRLUSE c::an) | ||
914 : | george | 555 | | T.REXT e => |
915 : | ExtensionComp.compileRext (reducer()) {e=e, rd=rd, an=an} | ||
916 : | george | 545 | (* simplify and try again *) |
917 : | | exp => unknownExp exp | ||
918 : | end (* doExpr *) | ||
919 : | monnier | 247 | |
920 : | george | 545 | (* generate an expression and return its result register |
921 : | * If rewritePseudo is on, the result is guaranteed to be in a | ||
922 : | * non memReg register | ||
923 : | *) | ||
924 : | and expr(exp as T.REG(_, rd)) = | ||
925 : | if isMemReg rd then genExpr exp else rd | ||
926 : | | expr exp = genExpr exp | ||
927 : | monnier | 247 | |
928 : | george | 545 | and genExpr exp = |
929 : | let val rd = newReg() in doExpr(exp, rd, []); rd end | ||
930 : | monnier | 247 | |
931 : | george | 545 | (* Compare an expression with zero. |
932 : | * On the x86, TEST is superior to AND for doing the same thing, | ||
933 : | * since it doesn't need to write out the result in a register. | ||
934 : | *) | ||
935 : | leunga | 695 | and cmpWithZero(cc as (T.EQ | T.NE), e as T.ANDB(ty, a, b), an) = |
936 : | george | 545 | (case ty of |
937 : | leunga | 695 | 8 => test(I.TESTB, a, b, an) |
938 : | | 16 => test(I.TESTW, a, b, an) | ||
939 : | | 32 => test(I.TESTL, a, b, an) | ||
940 : | | _ => doExpr(e, newReg(), an); | ||
941 : | cc) | ||
942 : | | cmpWithZero(cc, e, an) = | ||
943 : | let val e = | ||
944 : | case e of (* hack to disable the lea optimization XXX *) | ||
945 : | T.ADD(_, a, b) => T.ADD(0, a, b) | ||
946 : | | e => e | ||
947 : | in doExpr(e, newReg(), an); cc end | ||
948 : | monnier | 247 | |
949 : | george | 545 | (* Emit a test. |
950 : | * The available modes are | ||
951 : | * r/m, r | ||
952 : | * r/m, imm | ||
953 : | * On selecting the right instruction: TESTL/TESTW/TESTB. | ||
954 : | * When anding an operand with a constant | ||
955 : | * that fits within 8 (or 16) bits, it is possible to use TESTB, | ||
956 : | * (or TESTW) instead of TESTL. Because x86 is little endian, | ||
957 : | * this works for memory operands too. However, with TESTB, it is | ||
958 : | * not possible to use registers other than | ||
959 : | * AL, CL, BL, DL, and AH, CH, BH, DH. So, the best way is to | ||
960 : | * perform register allocation first, and if the operand registers | ||
961 : | * are one of EAX, ECX, EBX, or EDX, replace the TESTL instruction | ||
962 : | * by TESTB. | ||
963 : | *) | ||
964 : | leunga | 695 | and test(testopcode, a, b, an) = |
965 : | george | 545 | let val (_, opnd1, opnd2) = commuteComparison(T.EQ, true, a, b) |
966 : | (* translate r, r/m => r/m, r *) | ||
967 : | val (opnd1, opnd2) = | ||
968 : | if isMemOpnd opnd2 then (opnd2, opnd1) else (opnd1, opnd2) | ||
969 : | leunga | 695 | in mark(testopcode{lsrc=opnd1, rsrc=opnd2}, an) |
970 : | george | 545 | end |
971 : | monnier | 247 | |
972 : | leunga | 815 | (* %eflags <- src *) |
973 : | and moveToEflags src = | ||
974 : | george | 889 | if CB.sameColor(src, C.eflags) then () |
975 : | leunga | 815 | else (move(I.Direct src, eax); emit(I.LAHF)) |
976 : | |||
977 : | (* dst <- %eflags *) | ||
978 : | and moveFromEflags dst = | ||
979 : | george | 889 | if CB.sameColor(dst, C.eflags) then () |
980 : | leunga | 815 | else (emit(I.SAHF); move(eax, I.Direct dst)) |
981 : | |||
982 : | george | 545 | (* generate a condition code expression |
983 : | leunga | 744 | * The zero is for setting the condition code! |
984 : | * I have no idea why this is used. | ||
985 : | *) | ||
986 : | and doCCexpr(T.CMP(ty, cc, t1, t2), rd, an) = | ||
987 : | leunga | 815 | (cmp(false, ty, cc, t1, t2, an); |
988 : | moveFromEflags rd | ||
989 : | ) | ||
990 : | | doCCexpr(T.CC(cond,rs), rd, an) = | ||
991 : | george | 889 | if CB.sameColor(rs,C.eflags) orelse CB.sameColor(rd,C.eflags) then |
992 : | leunga | 815 | (moveToEflags rs; moveFromEflags rd) |
993 : | leunga | 744 | else |
994 : | leunga | 815 | move'(I.Direct rs, I.Direct rd, an) |
995 : | george | 545 | | doCCexpr(T.CCMARK(e,A.MARKREG f),rd,an) = (f rd; doCCexpr(e,rd,an)) |
996 : | | doCCexpr(T.CCMARK(e,a), rd, an) = doCCexpr(e,rd,a::an) | ||
997 : | | doCCexpr(T.CCEXT e, cd, an) = | ||
998 : | george | 555 | ExtensionComp.compileCCext (reducer()) {e=e, ccd=cd, an=an} |
999 : | george | 545 | | doCCexpr _ = error "doCCexpr" |
1000 : | monnier | 247 | |
1001 : | george | 545 | and ccExpr e = error "ccExpr" |
1002 : | monnier | 247 | |
1003 : | george | 545 | (* generate a comparison and sets the condition code; |
1004 : | * return the actual cc used. If the flag swapable is true, | ||
1005 : | * we can also reorder the operands. | ||
1006 : | *) | ||
1007 : | and cmp(swapable, ty, cc, t1, t2, an) = | ||
1008 : | leunga | 695 | (* == and <> can be always be reordered *) |
1009 : | let val swapable = swapable orelse cc = T.EQ orelse cc = T.NE | ||
1010 : | in (* Sometimes the comparison is not necessary because | ||
1011 : | * the bits are already set! | ||
1012 : | *) | ||
1013 : | if isZero t1 andalso setZeroBit2 t2 then | ||
1014 : | if swapable then | ||
1015 : | cmpWithZero(T.Basis.swapCond cc, t2, an) | ||
1016 : | else (* can't reorder the comparison! *) | ||
1017 : | genCmp(ty, false, cc, t1, t2, an) | ||
1018 : | else if isZero t2 andalso setZeroBit2 t1 then | ||
1019 : | cmpWithZero(cc, t1, an) | ||
1020 : | else genCmp(ty, swapable, cc, t1, t2, an) | ||
1021 : | end | ||
1022 : | monnier | 247 | |
1023 : | george | 545 | (* Give a and b which are the operands to a comparison (or test) |
1024 : | * Return the appropriate condition code and operands. | ||
1025 : | * The available modes are: | ||
1026 : | * r/m, imm | ||
1027 : | * r/m, r | ||
1028 : | * r, r/m | ||
1029 : | *) | ||
1030 : | and commuteComparison(cc, swapable, a, b) = | ||
1031 : | let val (opnd1, opnd2) = (operand a, operand b) | ||
1032 : | in (* Try to fold in the operands whenever possible *) | ||
1033 : | case (isImmediate opnd1, isImmediate opnd2) of | ||
1034 : | (true, true) => (cc, moveToReg opnd1, opnd2) | ||
1035 : | | (true, false) => | ||
1036 : | if swapable then (T.Basis.swapCond cc, opnd2, opnd1) | ||
1037 : | else (cc, moveToReg opnd1, opnd2) | ||
1038 : | | (false, true) => (cc, opnd1, opnd2) | ||
1039 : | | (false, false) => | ||
1040 : | (case (opnd1, opnd2) of | ||
1041 : | (_, I.Direct _) => (cc, opnd1, opnd2) | ||
1042 : | | (I.Direct _, _) => (cc, opnd1, opnd2) | ||
1043 : | | (_, _) => (cc, moveToReg opnd1, opnd2) | ||
1044 : | ) | ||
1045 : | end | ||
1046 : | |||
1047 : | (* generate a real comparison; return the real cc used *) | ||
1048 : | and genCmp(ty, swapable, cc, a, b, an) = | ||
1049 : | let val (cc, opnd1, opnd2) = commuteComparison(cc, swapable, a, b) | ||
1050 : | in mark(I.CMPL{lsrc=opnd1, rsrc=opnd2}, an); cc | ||
1051 : | end | ||
1052 : | monnier | 247 | |
1053 : | george | 545 | (* generate code for jumps *) |
1054 : | leunga | 775 | and jmp(lexp as T.LABEL lab, labs, an) = |
1055 : | george | 545 | mark(I.JMP(I.ImmedLabel lexp, [lab]), an) |
1056 : | leunga | 775 | | jmp(T.LABEXP le, labs, an) = mark(I.JMP(I.ImmedLabel le, labs), an) |
1057 : | | jmp(ea, labs, an) = mark(I.JMP(operand ea, labs), an) | ||
1058 : | george | 545 | |
1059 : | (* convert mlrisc to cellset: | ||
1060 : | *) | ||
1061 : | and cellset mlrisc = | ||
1062 : | jhr | 900 | let val addCCReg = CB.CellSet.add |
1063 : | george | 545 | fun g([],acc) = acc |
1064 : | | g(T.GPR(T.REG(_,r))::regs,acc) = g(regs,C.addReg(r,acc)) | ||
1065 : | | g(T.FPR(T.FREG(_,f))::regs,acc) = g(regs,C.addFreg(f,acc)) | ||
1066 : | | g(T.CCR(T.CC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc)) | ||
1067 : | | g(T.CCR(T.FCC(_,cc))::regs,acc) = g(regs,addCCReg(cc,acc)) | ||
1068 : | | g(_::regs, acc) = g(regs, acc) | ||
1069 : | in g(mlrisc, C.empty) end | ||
1070 : | |||
1071 : | (* generate code for calls *) | ||
1072 : | blume | 839 | and call(ea, flow, def, use, mem, cutsTo, an, pops) = |
1073 : | leunga | 815 | let fun return(set, []) = set |
1074 : | | return(set, a::an) = | ||
1075 : | case #peek A.RETURN_ARG a of | ||
1076 : | jhr | 900 | SOME r => return(CB.CellSet.add(r, set), an) |
1077 : | leunga | 815 | | NONE => return(set, an) |
1078 : | blume | 839 | in |
1079 : | mark(I.CALL{opnd=operand ea,defs=cellset(def),uses=cellset(use), | ||
1080 : | return=return(C.empty,an),cutsTo=cutsTo,mem=mem, | ||
1081 : | pops=pops},an) | ||
1082 : | leunga | 815 | end |
1083 : | george | 545 | |
1084 : | leunga | 815 | (* generate code for integer stores; first move data to %eax |
1085 : | * This is mainly because we can't allocate to registers like | ||
1086 : | * ah, dl, dx etc. | ||
1087 : | *) | ||
1088 : | and genStore(mvOp, ea, d, mem, an) = | ||
1089 : | let val src = | ||
1090 : | george | 545 | case immedOrReg(operand d) of |
1091 : | src as I.Direct r => | ||
1092 : | george | 889 | if CB.sameColor(r,C.eax) |
1093 : | leunga | 744 | then src else (move(src, eax); eax) |
1094 : | george | 545 | | src => src |
1095 : | leunga | 815 | in mark(I.MOVE{mvOp=mvOp, src=src, dst=address(ea,mem)},an) |
1096 : | george | 545 | end |
1097 : | leunga | 815 | |
1098 : | (* generate code for 8-bit integer stores *) | ||
1099 : | (* movb has to use %eax as source. Stupid x86! *) | ||
1100 : | and store8(ea, d, mem, an) = genStore(I.MOVB, ea, d, mem, an) | ||
1101 : | blume | 818 | and store16(ea, d, mem, an) = |
1102 : | mark(I.MOVE{mvOp=I.MOVW, src=immedOrReg(operand d), dst=address(ea, mem)}, an) | ||
1103 : | george | 545 | and store32(ea, d, mem, an) = |
1104 : | move'(immedOrReg(operand d), address(ea, mem), an) | ||
1105 : | |||
1106 : | (* generate code for branching *) | ||
1107 : | and branch(T.CMP(ty, cc, t1, t2), lab, an) = | ||
1108 : | (* allow reordering of operands *) | ||
1109 : | let val cc = cmp(true, ty, cc, t1, t2, []) | ||
1110 : | in mark(I.JCC{cond=cond cc, opnd=immedLabel lab}, an) end | ||
1111 : | | branch(T.FCMP(fty, fcc, t1, t2), lab, an) = | ||
1112 : | fbranch(fty, fcc, t1, t2, lab, an) | ||
1113 : | | branch(ccexp, lab, an) = | ||
1114 : | leunga | 744 | (doCCexpr(ccexp, C.eflags, []); |
1115 : | george | 545 | mark(I.JCC{cond=cond(Gen.condOf ccexp), opnd=immedLabel lab}, an) |
1116 : | ) | ||
1117 : | |||
1118 : | (* generate code for floating point compare and branch *) | ||
1119 : | and fbranch(fty, fcc, t1, t2, lab, an) = | ||
1120 : | leunga | 731 | let fun ignoreOrder (T.FREG _) = true |
1121 : | | ignoreOrder (T.FLOAD _) = true | ||
1122 : | | ignoreOrder (T.FMARK(e,_)) = ignoreOrder e | ||
1123 : | | ignoreOrder _ = false | ||
1124 : | |||
1125 : | fun compare'() = (* Sethi-Ullman style *) | ||
1126 : | (if ignoreOrder t1 orelse ignoreOrder t2 then | ||
1127 : | (reduceFexp(fty, t2, []); reduceFexp(fty, t1, [])) | ||
1128 : | else (reduceFexp(fty, t1, []); reduceFexp(fty, t2, []); | ||
1129 : | emit(I.FXCH{opnd=C.ST(1)})); | ||
1130 : | emit(I.FUCOMPP); | ||
1131 : | fcc | ||
1132 : | ) | ||
1133 : | |||
1134 : | fun compare''() = | ||
1135 : | (* direct style *) | ||
1136 : | (* Try to make lsrc the memory operand *) | ||
1137 : | let val lsrc = foperand(fty, t1) | ||
1138 : | val rsrc = foperand(fty, t2) | ||
1139 : | val fsize = fsize fty | ||
1140 : | fun cmp(lsrc, rsrc, fcc) = | ||
1141 : | (emit(I.FCMP{fsize=fsize,lsrc=lsrc,rsrc=rsrc}); fcc) | ||
1142 : | in case (lsrc, rsrc) of | ||
1143 : | (I.FPR _, I.FPR _) => cmp(lsrc, rsrc, fcc) | ||
1144 : | | (I.FPR _, mem) => cmp(mem,lsrc,T.Basis.swapFcond fcc) | ||
1145 : | | (mem, I.FPR _) => cmp(lsrc, rsrc, fcc) | ||
1146 : | | (lsrc, rsrc) => (* can't be both memory! *) | ||
1147 : | let val ftmpR = newFreg() | ||
1148 : | val ftmp = I.FPR ftmpR | ||
1149 : | in emit(I.FMOVE{fsize=fsize,src=rsrc,dst=ftmp}); | ||
1150 : | cmp(lsrc, ftmp, fcc) | ||
1151 : | end | ||
1152 : | end | ||
1153 : | |||
1154 : | fun compare() = | ||
1155 : | if enableFastFPMode andalso !fast_floating_point | ||
1156 : | then compare''() else compare'() | ||
1157 : | |||
1158 : | george | 545 | fun andil i = emit(I.BINARY{binOp=I.ANDL,src=I.Immed(i),dst=eax}) |
1159 : | leunga | 585 | fun testil i = emit(I.TESTL{lsrc=eax,rsrc=I.Immed(i)}) |
1160 : | george | 545 | fun xoril i = emit(I.BINARY{binOp=I.XORL,src=I.Immed(i),dst=eax}) |
1161 : | fun cmpil i = emit(I.CMPL{rsrc=I.Immed(i), lsrc=eax}) | ||
1162 : | fun j(cc, lab) = mark(I.JCC{cond=cc, opnd=immedLabel lab},an) | ||
1163 : | fun sahf() = emit(I.SAHF) | ||
1164 : | leunga | 731 | fun branch(fcc) = |
1165 : | george | 545 | case fcc |
1166 : | of T.== => (andil 0x4400; xoril 0x4000; j(I.EQ, lab)) | ||
1167 : | | T.?<> => (andil 0x4400; xoril 0x4000; j(I.NE, lab)) | ||
1168 : | | T.? => (sahf(); j(I.P,lab)) | ||
1169 : | | T.<=> => (sahf(); j(I.NP,lab)) | ||
1170 : | leunga | 585 | | T.> => (testil 0x4500; j(I.EQ,lab)) |
1171 : | | T.?<= => (testil 0x4500; j(I.NE,lab)) | ||
1172 : | | T.>= => (testil 0x500; j(I.EQ,lab)) | ||
1173 : | | T.?< => (testil 0x500; j(I.NE,lab)) | ||
1174 : | george | 545 | | T.< => (andil 0x4500; cmpil 0x100; j(I.EQ,lab)) |
1175 : | | T.?>= => (andil 0x4500; cmpil 0x100; j(I.NE,lab)) | ||
1176 : | | T.<= => (andil 0x4100; cmpil 0x100; j(I.EQ,lab); | ||
1177 : | cmpil 0x4000; j(I.EQ,lab)) | ||
1178 : | leunga | 585 | | T.?> => (sahf(); j(I.P,lab); testil 0x4100; j(I.EQ,lab)) |
1179 : | | T.<> => (testil 0x4400; j(I.EQ,lab)) | ||
1180 : | | T.?= => (testil 0x4400; j(I.NE,lab)) | ||
1181 : | george | 545 | | _ => error "fbranch" |
1182 : | (*esac*) | ||
1183 : | leunga | 731 | val fcc = compare() |
1184 : | in emit I.FNSTSW; | ||
1185 : | branch(fcc) | ||
1186 : | monnier | 411 | end |
1187 : | monnier | 247 | |
1188 : | leunga | 731 | (*======================================================== |
1189 : | * Floating point code generation starts here. | ||
1190 : | * Some generic fp routines first. | ||
1191 : | *========================================================*) | ||
1192 : | |||
1193 : | (* Can this tree be folded into the src operand of a floating point | ||
1194 : | * operations? | ||
1195 : | *) | ||
1196 : | and foldableFexp(T.FREG _) = true | ||
1197 : | | foldableFexp(T.FLOAD _) = true | ||
1198 : | | foldableFexp(T.CVTI2F(_, (16 | 32), _)) = true | ||
1199 : | | foldableFexp(T.CVTF2F(_, _, t)) = foldableFexp t | ||
1200 : | | foldableFexp(T.FMARK(t, _)) = foldableFexp t | ||
1201 : | | foldableFexp _ = false | ||
1202 : | |||
1203 : | (* Move integer e of size ty into a memory location. | ||
1204 : | * Returns a quadruple: | ||
1205 : | * (INTEGER,return ty,effect address of memory location,cleanup code) | ||
1206 : | *) | ||
1207 : | and convertIntToFloat(ty, e) = | ||
1208 : | let val opnd = operand e | ||
1209 : | in if isMemOpnd opnd andalso (ty = 16 orelse ty = 32) | ||
1210 : | then (INTEGER, ty, opnd, []) | ||
1211 : | else | ||
1212 : | leunga | 815 | let val {instrs, tempMem, cleanup} = |
1213 : | cvti2f{ty=ty, src=opnd, an=getAnnotations()} | ||
1214 : | leunga | 731 | in emits instrs; |
1215 : | (INTEGER, 32, tempMem, cleanup) | ||
1216 : | end | ||
1217 : | end | ||
1218 : | |||
1219 : | (*======================================================== | ||
1220 : | * Sethi-Ullman based floating point code generation as | ||
1221 : | * implemented by Lal | ||
1222 : | *========================================================*) | ||
1223 : | |||
1224 : | george | 545 | and fld(32, opnd) = I.FLDS opnd |
1225 : | | fld(64, opnd) = I.FLDL opnd | ||
1226 : | george | 555 | | fld(80, opnd) = I.FLDT opnd |
1227 : | george | 545 | | fld _ = error "fld" |
1228 : | |||
1229 : | leunga | 565 | and fild(16, opnd) = I.FILD opnd |
1230 : | | fild(32, opnd) = I.FILDL opnd | ||
1231 : | | fild(64, opnd) = I.FILDLL opnd | ||
1232 : | | fild _ = error "fild" | ||
1233 : | |||
1234 : | and fxld(INTEGER, ty, opnd) = fild(ty, opnd) | ||
1235 : | | fxld(REAL, fty, opnd) = fld(fty, opnd) | ||
1236 : | |||
1237 : | george | 545 | and fstp(32, opnd) = I.FSTPS opnd |
1238 : | | fstp(64, opnd) = I.FSTPL opnd | ||
1239 : | george | 555 | | fstp(80, opnd) = I.FSTPT opnd |
1240 : | george | 545 | | fstp _ = error "fstp" |
1241 : | |||
1242 : | (* generate code for floating point stores *) | ||
1243 : | leunga | 731 | and fstore'(fty, ea, d, mem, an) = |
1244 : | george | 545 | (case d of |
1245 : | T.FREG(fty, fs) => emit(fld(fty, I.FDirect fs)) | ||
1246 : | | _ => reduceFexp(fty, d, []); | ||
1247 : | mark(fstp(fty, address(ea, mem)), an) | ||
1248 : | ) | ||
1249 : | |||
1250 : | leunga | 731 | (* generate code for floating point loads *) |
1251 : | and fload'(fty, ea, mem, fd, an) = | ||
1252 : | let val ea = address(ea, mem) | ||
1253 : | in mark(fld(fty, ea), an); | ||
1254 : | george | 889 | if CB.sameColor(fd,ST0) then () |
1255 : | leunga | 744 | else emit(fstp(fty, I.FDirect fd)) |
1256 : | leunga | 731 | end |
1257 : | |||
1258 : | and fexpr' e = (reduceFexp(64, e, []); C.ST(0)) | ||
1259 : | george | 545 | |
1260 : | (* generate floating point expression and put the result in fd *) | ||
1261 : | leunga | 731 | and doFexpr'(fty, T.FREG(_, fs), fd, an) = |
1262 : | george | 889 | (if CB.sameColor(fs,fd) then () |
1263 : | george | 1009 | else mark'(I.COPY{k=CB.FP, sz=64, dst=[fd], src=[fs], tmp=NONE}, an) |
1264 : | george | 545 | ) |
1265 : | leunga | 731 | | doFexpr'(_, T.FLOAD(fty, ea, mem), fd, an) = |
1266 : | fload'(fty, ea, mem, fd, an) | ||
1267 : | | doFexpr'(fty, T.FEXT fexp, fd, an) = | ||
1268 : | (ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an}; | ||
1269 : | george | 889 | if CB.sameColor(fd,ST0) then () else emit(fstp(fty, I.FDirect fd)) |
1270 : | leunga | 731 | ) |
1271 : | | doFexpr'(fty, e, fd, an) = | ||
1272 : | george | 545 | (reduceFexp(fty, e, []); |
1273 : | george | 889 | if CB.sameColor(fd,ST0) then () |
1274 : | leunga | 744 | else mark(fstp(fty, I.FDirect fd), an) |
1275 : | george | 545 | ) |
1276 : | |||
1277 : | (* | ||
1278 : | * Generate floating point expression using Sethi-Ullman's scheme: | ||
1279 : | * This function evaluates a floating point expression, | ||
1280 : | * and put result in %ST(0). | ||
1281 : | *) | ||
1282 : | and reduceFexp(fty, fexp, an) = | ||
1283 : | george | 555 | let val ST = I.ST(C.ST 0) |
1284 : | val ST1 = I.ST(C.ST 1) | ||
1285 : | leunga | 593 | val cleanupCode = ref [] : I.instruction list ref |
1286 : | george | 545 | |
1287 : | leunga | 565 | datatype su_tree = |
1288 : | LEAF of int * T.fexp * ans | ||
1289 : | | BINARY of int * T.fty * fbinop * su_tree * su_tree * ans | ||
1290 : | | UNARY of int * T.fty * I.funOp * su_tree * ans | ||
1291 : | and fbinop = FADD | FSUB | FMUL | FDIV | ||
1292 : | | FIADD | FISUB | FIMUL | FIDIV | ||
1293 : | withtype ans = Annotations.annotations | ||
1294 : | monnier | 247 | |
1295 : | leunga | 565 | fun label(LEAF(n, _, _)) = n |
1296 : | | label(BINARY(n, _, _, _, _, _)) = n | ||
1297 : | | label(UNARY(n, _, _, _, _)) = n | ||
1298 : | george | 545 | |
1299 : | leunga | 565 | fun annotate(LEAF(n, x, an), a) = LEAF(n,x,a::an) |
1300 : | | annotate(BINARY(n,t,b,x,y,an), a) = BINARY(n,t,b,x,y,a::an) | ||
1301 : | | annotate(UNARY(n,t,u,x,an), a) = UNARY(n,t,u,x,a::an) | ||
1302 : | george | 545 | |
1303 : | leunga | 565 | (* Generate expression tree with sethi-ullman numbers *) |
1304 : | fun su(e as T.FREG _) = LEAF(1, e, []) | ||
1305 : | | su(e as T.FLOAD _) = LEAF(1, e, []) | ||
1306 : | | su(e as T.CVTI2F _) = LEAF(1, e, []) | ||
1307 : | | su(T.CVTF2F(_, _, t)) = su t | ||
1308 : | | su(T.FMARK(t, a)) = annotate(su t, a) | ||
1309 : | | su(T.FABS(fty, t)) = suUnary(fty, I.FABS, t) | ||
1310 : | | su(T.FNEG(fty, t)) = suUnary(fty, I.FCHS, t) | ||
1311 : | | su(T.FSQRT(fty, t)) = suUnary(fty, I.FSQRT, t) | ||
1312 : | | su(T.FADD(fty, t1, t2)) = suComBinary(fty,FADD,FIADD,t1,t2) | ||
1313 : | | su(T.FMUL(fty, t1, t2)) = suComBinary(fty,FMUL,FIMUL,t1,t2) | ||
1314 : | | su(T.FSUB(fty, t1, t2)) = suBinary(fty,FSUB,FISUB,t1,t2) | ||
1315 : | | su(T.FDIV(fty, t1, t2)) = suBinary(fty,FDIV,FIDIV,t1,t2) | ||
1316 : | | su _ = error "su" | ||
1317 : | |||
1318 : | (* Try to fold the the memory operand or integer conversion *) | ||
1319 : | and suFold(e as T.FREG _) = (LEAF(0, e, []), false) | ||
1320 : | | suFold(e as T.FLOAD _) = (LEAF(0, e, []), false) | ||
1321 : | | suFold(e as T.CVTI2F(_,(16 | 32),_)) = (LEAF(0, e, []), true) | ||
1322 : | | suFold(T.CVTF2F(_, _, t)) = suFold t | ||
1323 : | | suFold(T.FMARK(t, a)) = | ||
1324 : | let val (t, integer) = suFold t | ||
1325 : | in (annotate(t, a), integer) end | ||
1326 : | | suFold e = (su e, false) | ||
1327 : | |||
1328 : | (* Form unary tree *) | ||
1329 : | and suUnary(fty, funary, t) = | ||
1330 : | let val t = su t | ||
1331 : | in UNARY(label t, fty, funary, t, []) | ||
1332 : | george | 545 | end |
1333 : | leunga | 565 | |
1334 : | (* Form binary tree *) | ||
1335 : | and suBinary(fty, binop, ibinop, t1, t2) = | ||
1336 : | let val t1 = su t1 | ||
1337 : | val (t2, integer) = suFold t2 | ||
1338 : | val n1 = label t1 | ||
1339 : | val n2 = label t2 | ||
1340 : | val n = if n1=n2 then n1+1 else Int.max(n1,n2) | ||
1341 : | val myOp = if integer then ibinop else binop | ||
1342 : | in BINARY(n, fty, myOp, t1, t2, []) | ||
1343 : | george | 545 | end |
1344 : | george | 555 | |
1345 : | leunga | 565 | (* Try to fold in the operand if possible. |
1346 : | * This only applies to commutative operations. | ||
1347 : | *) | ||
1348 : | and suComBinary(fty, binop, ibinop, t1, t2) = | ||
1349 : | leunga | 731 | let val (t1, t2) = if foldableFexp t2 |
1350 : | then (t1, t2) else (t2, t1) | ||
1351 : | leunga | 565 | in suBinary(fty, binop, ibinop, t1, t2) end |
1352 : | |||
1353 : | and sameTree(LEAF(_, T.FREG(t1,f1), []), | ||
1354 : | leunga | 744 | LEAF(_, T.FREG(t2,f2), [])) = |
1355 : | george | 889 | t1 = t2 andalso CB.sameColor(f1,f2) |
1356 : | leunga | 565 | | sameTree _ = false |
1357 : | |||
1358 : | (* Traverse tree and generate code *) | ||
1359 : | fun gencode(LEAF(_, t, an)) = mark(fxld(leafEA t), an) | ||
1360 : | | gencode(BINARY(_, _, binop, x, t2 as LEAF(0, y, a1), a2)) = | ||
1361 : | let val _ = gencode x | ||
1362 : | val (_, fty, src) = leafEA y | ||
1363 : | fun gen(code) = mark(code, a1 @ a2) | ||
1364 : | fun binary(oper32, oper64) = | ||
1365 : | if sameTree(x, t2) then | ||
1366 : | gen(I.FBINARY{binOp=oper64, src=ST, dst=ST}) | ||
1367 : | george | 555 | else |
1368 : | let val oper = | ||
1369 : | leunga | 565 | if isMemOpnd src then |
1370 : | case fty of | ||
1371 : | 32 => oper32 | ||
1372 : | | 64 => oper64 | ||
1373 : | | _ => error "gencode: BINARY" | ||
1374 : | else oper64 | ||
1375 : | in gen(I.FBINARY{binOp=oper, src=src, dst=ST}) end | ||
1376 : | fun ibinary(oper16, oper32) = | ||
1377 : | let val oper = case fty of | ||
1378 : | 16 => oper16 | ||
1379 : | | 32 => oper32 | ||
1380 : | | _ => error "gencode: IBINARY" | ||
1381 : | in gen(I.FIBINARY{binOp=oper, src=src}) end | ||
1382 : | in case binop of | ||
1383 : | FADD => binary(I.FADDS, I.FADDL) | ||
1384 : | | FSUB => binary(I.FDIVS, I.FSUBL) | ||
1385 : | | FMUL => binary(I.FMULS, I.FMULL) | ||
1386 : | | FDIV => binary(I.FDIVS, I.FDIVL) | ||
1387 : | | FIADD => ibinary(I.FIADDS, I.FIADDL) | ||
1388 : | | FISUB => ibinary(I.FIDIVS, I.FISUBL) | ||
1389 : | | FIMUL => ibinary(I.FIMULS, I.FIMULL) | ||
1390 : | | FIDIV => ibinary(I.FIDIVS, I.FIDIVL) | ||
1391 : | end | ||
1392 : | | gencode(BINARY(_, fty, binop, t1, t2, an)) = | ||
1393 : | let fun doit(t1, t2, oper, operP, operRP) = | ||
1394 : | let (* oper[P] => ST(1) := ST oper ST(1); [pop] | ||
1395 : | * operR[P] => ST(1) := ST(1) oper ST; [pop] | ||
1396 : | *) | ||
1397 : | val n1 = label t1 | ||
1398 : | val n2 = label t2 | ||
1399 : | in if n1 < n2 andalso n1 <= 7 then | ||
1400 : | (gencode t2; | ||
1401 : | gencode t1; | ||
1402 : | mark(I.FBINARY{binOp=operP, src=ST, dst=ST1}, an)) | ||
1403 : | else if n2 <= n1 andalso n2 <= 7 then | ||
1404 : | (gencode t1; | ||
1405 : | gencode t2; | ||
1406 : | mark(I.FBINARY{binOp=operRP, src=ST, dst=ST1}, an)) | ||
1407 : | else | ||
1408 : | let (* both labels > 7 *) | ||
1409 : | val fs = I.FDirect(newFreg()) | ||
1410 : | in gencode t2; | ||
1411 : | emit(fstp(fty, fs)); | ||
1412 : | gencode t1; | ||
1413 : | mark(I.FBINARY{binOp=oper, src=fs, dst=ST}, an) | ||
1414 : | end | ||
1415 : | end | ||
1416 : | in case binop of | ||
1417 : | FADD => doit(t1,t2,I.FADDL,I.FADDP,I.FADDP) | ||
1418 : | | FMUL => doit(t1,t2,I.FMULL,I.FMULP,I.FMULP) | ||
1419 : | | FSUB => doit(t1,t2,I.FSUBL,I.FSUBP,I.FSUBRP) | ||
1420 : | | FDIV => doit(t1,t2,I.FDIVL,I.FDIVP,I.FDIVRP) | ||
1421 : | george | 545 | | _ => error "gencode.BINARY" |
1422 : | end | ||
1423 : | leunga | 565 | | gencode(UNARY(_, _, unaryOp, su, an)) = |
1424 : | (gencode(su); mark(I.FUNARY(unaryOp),an)) | ||
1425 : | |||
1426 : | (* Generate code for a leaf. | ||
1427 : | * Returns the type and an effective address | ||
1428 : | *) | ||
1429 : | and leafEA(T.FREG(fty, f)) = (REAL, fty, I.FDirect f) | ||
1430 : | | leafEA(T.FLOAD(fty, ea, mem)) = (REAL, fty, address(ea, mem)) | ||
1431 : | leunga | 593 | | leafEA(T.CVTI2F(_, 32, t)) = int2real(32, t) |
1432 : | | leafEA(T.CVTI2F(_, 16, t)) = int2real(16, t) | ||
1433 : | | leafEA(T.CVTI2F(_, 8, t)) = int2real(8, t) | ||
1434 : | leunga | 565 | | leafEA _ = error "leafEA" |
1435 : | |||
1436 : | leunga | 731 | and int2real(ty, e) = |
1437 : | let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e) | ||
1438 : | in cleanupCode := !cleanupCode @ cleanup; | ||
1439 : | (INTEGER, ty, ea) | ||
1440 : | george | 545 | end |
1441 : | leunga | 731 | |
1442 : | in gencode(su fexp); | ||
1443 : | emits(!cleanupCode) | ||
1444 : | george | 545 | end (*reduceFexp*) |
1445 : | leunga | 731 | |
1446 : | (*======================================================== | ||
1447 : | * This section generates 3-address style floating | ||
1448 : | * point code. | ||
1449 : | *========================================================*) | ||
1450 : | |||
1451 : | and isize 16 = I.I16 | ||
1452 : | | isize 32 = I.I32 | ||
1453 : | | isize _ = error "isize" | ||
1454 : | |||
1455 : | and fstore''(fty, ea, d, mem, an) = | ||
1456 : | (floatingPointUsed := true; | ||
1457 : | mark(I.FMOVE{fsize=fsize fty, dst=address(ea,mem), | ||
1458 : | src=foperand(fty, d)}, | ||
1459 : | an) | ||
1460 : | ) | ||
1461 : | |||
1462 : | and fload''(fty, ea, mem, d, an) = | ||
1463 : | (floatingPointUsed := true; | ||
1464 : | mark(I.FMOVE{fsize=fsize fty, src=address(ea,mem), | ||
1465 : | dst=RealReg d}, an) | ||
1466 : | ) | ||
1467 : | |||
1468 : | and fiload''(ity, ea, d, an) = | ||
1469 : | (floatingPointUsed := true; | ||
1470 : | mark(I.FILOAD{isize=isize ity, ea=ea, dst=RealReg d}, an) | ||
1471 : | ) | ||
1472 : | |||
1473 : | and fexpr''(e as T.FREG(_,f)) = | ||
1474 : | if isFMemReg f then transFexpr e else f | ||
1475 : | | fexpr'' e = transFexpr e | ||
1476 : | |||
1477 : | and transFexpr e = | ||
1478 : | let val fd = newFreg() in doFexpr''(64, e, fd, []); fd end | ||
1479 : | |||
1480 : | (* | ||
1481 : | * Process a floating point operand. Put operand in register | ||
1482 : | * when possible. The operand should match the given fty. | ||
1483 : | *) | ||
1484 : | and foperand(fty, e as T.FREG(fty', f)) = | ||
1485 : | if fty = fty' then RealReg f else I.FPR(fexpr'' e) | ||
1486 : | | foperand(fty, T.CVTF2F(_, _, e)) = | ||
1487 : | foperand(fty, e) (* nop on the x86 *) | ||
1488 : | | foperand(fty, e as T.FLOAD(fty', ea, mem)) = | ||
1489 : | (* fold operand when the precison matches *) | ||
1490 : | if fty = fty' then address(ea, mem) else I.FPR(fexpr'' e) | ||
1491 : | | foperand(fty, e) = I.FPR(fexpr'' e) | ||
1492 : | |||
1493 : | (* | ||
1494 : | * Process a floating point operand. | ||
1495 : | * Try to fold in a memory operand or conversion from an integer. | ||
1496 : | *) | ||
1497 : | and fioperand(T.FREG(fty,f)) = (REAL, fty, RealReg f, []) | ||
1498 : | | fioperand(T.FLOAD(fty, ea, mem)) = | ||
1499 : | (REAL, fty, address(ea, mem), []) | ||
1500 : | | fioperand(T.CVTF2F(_, _, e)) = fioperand(e) (* nop on the x86 *) | ||
1501 : | | fioperand(T.CVTI2F(_, ty, e)) = convertIntToFloat(ty, e) | ||
1502 : | | fioperand(T.FMARK(e,an)) = fioperand(e) (* XXX *) | ||
1503 : | | fioperand(e) = (REAL, 64, I.FPR(fexpr'' e), []) | ||
1504 : | |||
1505 : | (* Generate binary operator. Since the real binary operators | ||
1506 : | * does not take memory as destination, we also ensure this | ||
1507 : | * does not happen. | ||
1508 : | *) | ||
1509 : | and fbinop(targetFty, | ||
1510 : | binOp, binOpR, ibinOp, ibinOpR, lsrc, rsrc, fd, an) = | ||
1511 : | (* Put the mem operand in rsrc *) | ||
1512 : | let val _ = floatingPointUsed := true; | ||
1513 : | fun isMemOpnd(T.FREG(_, f)) = isFMemReg f | ||
1514 : | | isMemOpnd(T.FLOAD _) = true | ||
1515 : | | isMemOpnd(T.CVTI2F(_, (16 | 32), _)) = true | ||
1516 : | | isMemOpnd(T.CVTF2F(_, _, t)) = isMemOpnd t | ||
1517 : | | isMemOpnd(T.FMARK(t, _)) = isMemOpnd t | ||
1518 : | | isMemOpnd _ = false | ||
1519 : | val (binOp, ibinOp, lsrc, rsrc) = | ||
1520 : | if isMemOpnd lsrc then (binOpR, ibinOpR, rsrc, lsrc) | ||
1521 : | else (binOp, ibinOp, lsrc, rsrc) | ||
1522 : | val lsrc = foperand(targetFty, lsrc) | ||
1523 : | val (kind, fty, rsrc, code) = fioperand(rsrc) | ||
1524 : | fun dstMustBeFreg f = | ||
1525 : | if targetFty <> 64 then | ||
1526 : | let val tmpR = newFreg() | ||
1527 : | val tmp = I.FPR tmpR | ||
1528 : | in mark(f tmp, an); | ||
1529 : | emit(I.FMOVE{fsize=fsize targetFty, | ||
1530 : | src=tmp, dst=RealReg fd}) | ||
1531 : | end | ||
1532 : | else mark(f(RealReg fd), an) | ||
1533 : | in case kind of | ||
1534 : | REAL => | ||
1535 : | dstMustBeFreg(fn dst => | ||
1536 : | I.FBINOP{fsize=fsize fty, binOp=binOp, | ||
1537 : | lsrc=lsrc, rsrc=rsrc, dst=dst}) | ||
1538 : | | INTEGER => | ||
1539 : | (dstMustBeFreg(fn dst => | ||
1540 : | I.FIBINOP{isize=isize fty, binOp=ibinOp, | ||
1541 : | lsrc=lsrc, rsrc=rsrc, dst=dst}); | ||
1542 : | emits code | ||
1543 : | ) | ||
1544 : | end | ||
1545 : | george | 545 | |
1546 : | leunga | 731 | and funop(fty, unOp, src, fd, an) = |
1547 : | let val src = foperand(fty, src) | ||
1548 : | in mark(I.FUNOP{fsize=fsize fty, | ||
1549 : | unOp=unOp, src=src, dst=RealReg fd},an) | ||
1550 : | end | ||
1551 : | |||
1552 : | and doFexpr''(fty, e, fd, an) = | ||
1553 : | case e of | ||
1554 : | george | 889 | T.FREG(_,fs) => if CB.sameColor(fs,fd) then () |
1555 : | leunga | 731 | else fcopy''(fty, [fd], [fs], an) |
1556 : | (* Stupid x86 does everything as 80-bits internally. *) | ||
1557 : | |||
1558 : | (* Binary operators *) | ||
1559 : | | T.FADD(_, a, b) => fbinop(fty, | ||
1560 : | I.FADDL, I.FADDL, I.FIADDL, I.FIADDL, | ||
1561 : | a, b, fd, an) | ||
1562 : | | T.FSUB(_, a, b) => fbinop(fty, | ||
1563 : | I.FSUBL, I.FSUBRL, I.FISUBL, I.FISUBRL, | ||
1564 : | a, b, fd, an) | ||
1565 : | | T.FMUL(_, a, b) => fbinop(fty, | ||
1566 : | I.FMULL, I.FMULL, I.FIMULL, I.FIMULL, | ||
1567 : | a, b, fd, an) | ||
1568 : | | T.FDIV(_, a, b) => fbinop(fty, | ||
1569 : | I.FDIVL, I.FDIVRL, I.FIDIVL, I.FIDIVRL, | ||
1570 : | a, b, fd, an) | ||
1571 : | |||
1572 : | (* Unary operators *) | ||
1573 : | | T.FNEG(_, a) => funop(fty, I.FCHS, a, fd, an) | ||
1574 : | | T.FABS(_, a) => funop(fty, I.FABS, a, fd, an) | ||
1575 : | | T.FSQRT(_, a) => funop(fty, I.FSQRT, a, fd, an) | ||
1576 : | |||
1577 : | (* Load *) | ||
1578 : | | T.FLOAD(fty,ea,mem) => fload''(fty, ea, mem, fd, an) | ||
1579 : | |||
1580 : | (* Type conversions *) | ||
1581 : | | T.CVTF2F(_, _, e) => doFexpr''(fty, e, fd, an) | ||
1582 : | | T.CVTI2F(_, ty, e) => | ||
1583 : | let val (_, ty, ea, cleanup) = convertIntToFloat(ty, e) | ||
1584 : | in fiload''(ty, ea, fd, an); | ||
1585 : | emits cleanup | ||
1586 : | end | ||
1587 : | |||
1588 : | | T.FMARK(e,A.MARKREG f) => (f fd; doFexpr''(fty, e, fd, an)) | ||
1589 : | | T.FMARK(e, a) => doFexpr''(fty, e, fd, a::an) | ||
1590 : | | T.FPRED(e, c) => doFexpr''(fty, e, fd, A.CTRLUSE c::an) | ||
1591 : | | T.FEXT fexp => | ||
1592 : | ExtensionComp.compileFext (reducer()) {e=fexp, fd=fd, an=an} | ||
1593 : | | _ => error("doFexpr''") | ||
1594 : | |||
1595 : | (*======================================================== | ||
1596 : | * Tie the two styles of fp code generation together | ||
1597 : | *========================================================*) | ||
1598 : | and fstore(fty, ea, d, mem, an) = | ||
1599 : | if enableFastFPMode andalso !fast_floating_point | ||
1600 : | then fstore''(fty, ea, d, mem, an) | ||
1601 : | else fstore'(fty, ea, d, mem, an) | ||
1602 : | and fload(fty, ea, d, mem, an) = | ||
1603 : | if enableFastFPMode andalso !fast_floating_point | ||
1604 : | then fload''(fty, ea, d, mem, an) | ||
1605 : | else fload'(fty, ea, d, mem, an) | ||
1606 : | and fexpr e = | ||
1607 : | if enableFastFPMode andalso !fast_floating_point | ||
1608 : | then fexpr'' e else fexpr' e | ||
1609 : | and doFexpr(fty, e, fd, an) = | ||
1610 : | if enableFastFPMode andalso !fast_floating_point | ||
1611 : | then doFexpr''(fty, e, fd, an) | ||
1612 : | else doFexpr'(fty, e, fd, an) | ||
1613 : | |||
1614 : | leunga | 797 | (*================================================================ |
1615 : | * Optimizations for x := x op y | ||
1616 : | * Special optimizations: | ||
1617 : | * Generate a binary operator, result must in memory. | ||
1618 : | * The source must not be in memory | ||
1619 : | *================================================================*) | ||
1620 : | and binaryMem(binOp, src, dst, mem, an) = | ||
1621 : | mark(I.BINARY{binOp=binOp, src=immedOrReg(operand src), | ||
1622 : | dst=address(dst,mem)}, an) | ||
1623 : | and unaryMem(unOp, opnd, mem, an) = | ||
1624 : | mark(I.UNARY{unOp=unOp, opnd=address(opnd,mem)}, an) | ||
1625 : | |||
1626 : | and isOne(T.LI n) = n = one | ||
1627 : | | isOne _ = false | ||
1628 : | |||
1629 : | (* | ||
1630 : | * Perform optimizations based on recognizing | ||
1631 : | * x := x op y or | ||
1632 : | * x := y op x | ||
1633 : | * first. | ||
1634 : | *) | ||
1635 : | and store(ty, ea, d, mem, an, | ||
1636 : | {INC,DEC,ADD,SUB,NOT,NEG,SHL,SHR,SAR,OR,AND,XOR}, | ||
1637 : | doStore | ||
1638 : | ) = | ||
1639 : | let fun default() = doStore(ea, d, mem, an) | ||
1640 : | fun binary1(t, t', unary, binary, ea', x) = | ||
1641 : | if t = ty andalso t' = ty then | ||
1642 : | if MLTreeUtils.eqRexp(ea, ea') then | ||
1643 : | if isOne x then unaryMem(unary, ea, mem, an) | ||
1644 : | else binaryMem(binary, x, ea, mem, an) | ||
1645 : | else default() | ||
1646 : | else default() | ||
1647 : | fun unary(t,unOp, ea') = | ||
1648 : | if t = ty andalso MLTreeUtils.eqRexp(ea, ea') then | ||
1649 : | unaryMem(unOp, ea, mem, an) | ||
1650 : | else default() | ||
1651 : | fun binary(t,t',binOp,ea',x) = | ||
1652 : | if t = ty andalso t' = ty andalso | ||
1653 : | MLTreeUtils.eqRexp(ea, ea') then | ||
1654 : | binaryMem(binOp, x, ea, mem, an) | ||
1655 : | else default() | ||
1656 : | |||
1657 : | fun binaryCom1(t,unOp,binOp,x,y) = | ||
1658 : | if t = ty then | ||
1659 : | let fun again() = | ||
1660 : | case y of | ||
1661 : | T.LOAD(ty',ea',_) => | ||
1662 : | if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then | ||
1663 : | if isOne x then unaryMem(unOp, ea, mem, an) | ||
1664 : | else binaryMem(binOp,x,ea,mem,an) | ||
1665 : | else default() | ||
1666 : | | _ => default() | ||
1667 : | in case x of | ||
1668 : | T.LOAD(ty',ea',_) => | ||
1669 : | if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then | ||
1670 : | if isOne y then unaryMem(unOp, ea, mem, an) | ||
1671 : | else binaryMem(binOp,y,ea,mem,an) | ||
1672 : | else again() | ||
1673 : | | _ => again() | ||
1674 : | end | ||
1675 : | else default() | ||
1676 : | |||
1677 : | fun binaryCom(t,binOp,x,y) = | ||
1678 : | if t = ty then | ||
1679 : | let fun again() = | ||
1680 : | case y of | ||
1681 : | T.LOAD(ty',ea',_) => | ||
1682 : | if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then | ||
1683 : | binaryMem(binOp,x,ea,mem,an) | ||
1684 : | else default() | ||
1685 : | | _ => default() | ||
1686 : | in case x of | ||
1687 : | T.LOAD(ty',ea',_) => | ||
1688 : | if ty' = ty andalso MLTreeUtils.eqRexp(ea, ea') then | ||
1689 : | binaryMem(binOp,y,ea,mem,an) | ||
1690 : | else again() | ||
1691 : | | _ => again() | ||
1692 : | end | ||
1693 : | else default() | ||
1694 : | |||
1695 : | in case d of | ||
1696 : | T.ADD(t,x,y) => binaryCom1(t,INC,ADD,x,y) | ||
1697 : | | T.SUB(t,T.LOAD(t',ea',_),x) => binary1(t,t',DEC,SUB,ea',x) | ||
1698 : | | T.ORB(t,x,y) => binaryCom(t,OR,x,y) | ||
1699 : | | T.ANDB(t,x,y) => binaryCom(t,AND,x,y) | ||
1700 : | | T.XORB(t,x,y) => binaryCom(t,XOR,x,y) | ||
1701 : | | T.SLL(t,T.LOAD(t',ea',_),x) => binary(t,t',SHL,ea',x) | ||
1702 : | | T.SRL(t,T.LOAD(t',ea',_),x) => binary(t,t',SHR,ea',x) | ||
1703 : | | T.SRA(t,T.LOAD(t',ea',_),x) => binary(t,t',SAR,ea',x) | ||
1704 : | | T.NEG(t,T.LOAD(t',ea',_)) => unary(t,NEG,ea') | ||
1705 : | | T.NOTB(t,T.LOAD(t',ea',_)) => unary(t,NOT,ea') | ||
1706 : | | _ => default() | ||
1707 : | end (* store *) | ||
1708 : | |||
1709 : | george | 545 | (* generate code for a statement *) |
1710 : | and stmt(T.MV(_, rd, e), an) = doExpr(e, rd, an) | ||
1711 : | | stmt(T.FMV(fty, fd, e), an) = doFexpr(fty, e, fd, an) | ||
1712 : | | stmt(T.CCMV(ccd, e), an) = doCCexpr(e, ccd, an) | ||
1713 : | | stmt(T.COPY(_, dst, src), an) = copy(dst, src, an) | ||
1714 : | | stmt(T.FCOPY(fty, dst, src), an) = fcopy(fty, dst, src, an) | ||
1715 : | leunga | 744 | | stmt(T.JMP(e, labs), an) = jmp(e, labs, an) |
1716 : | blume | 839 | | stmt(T.CALL{funct, targets, defs, uses, region, pops, ...}, an) = |
1717 : | call(funct,targets,defs,uses,region,[],an, pops) | ||
1718 : | | stmt(T.FLOW_TO(T.CALL{funct, targets, defs, uses, region, pops, ...}, | ||
1719 : | leunga | 796 | cutTo), an) = |
1720 : | blume | 839 | call(funct,targets,defs,uses,region,cutTo,an, pops) |
1721 : | george | 545 | | stmt(T.RET _, an) = mark(I.RET NONE, an) |
1722 : | leunga | 797 | | stmt(T.STORE(8, ea, d, mem), an) = |
1723 : | store(8, ea, d, mem, an, opcodes8, store8) | ||
1724 : | | stmt(T.STORE(16, ea, d, mem), an) = | ||
1725 : | store(16, ea, d, mem, an, opcodes16, store16) | ||
1726 : | | stmt(T.STORE(32, ea, d, mem), an) = | ||
1727 : | store(32, ea, d, mem, an, opcodes32, store32) | ||
1728 : | |||
1729 : | george | 545 | | stmt(T.FSTORE(fty, ea, d, mem), an) = fstore(fty, ea, d, mem, an) |
1730 : | leunga | 744 | | stmt(T.BCC(cc, lab), an) = branch(cc, lab, an) |
1731 : | george | 545 | | stmt(T.DEFINE l, _) = defineLabel l |
1732 : | | stmt(T.ANNOTATION(s, a), an) = stmt(s, a::an) | ||
1733 : | george | 555 | | stmt(T.EXT s, an) = |
1734 : | ExtensionComp.compileSext (reducer()) {stm=s, an=an} | ||
1735 : | george | 545 | | stmt(s, _) = doStmts(Gen.compileStm s) |
1736 : | |||
1737 : | and doStmt s = stmt(s, []) | ||
1738 : | and doStmts ss = app doStmt ss | ||
1739 : | |||
1740 : | and beginCluster' _ = | ||
1741 : | ((* Must be cleared by the client. | ||
1742 : | * if rewriteMemReg then memRegsUsed := 0w0 else (); | ||
1743 : | *) | ||
1744 : | leunga | 731 | floatingPointUsed := false; |
1745 : | trapLabel := NONE; | ||
1746 : | beginCluster 0 | ||
1747 : | ) | ||
1748 : | george | 545 | and endCluster' a = |
1749 : | monnier | 247 | (case !trapLabel |
1750 : | monnier | 411 | of NONE => () |
1751 : | george | 545 | | SOME(_, lab) => (defineLabel lab; emit(I.INTO)) |
1752 : | monnier | 411 | (*esac*); |
1753 : | leunga | 731 | (* If floating point has been used allocate an extra |
1754 : | * register just in case we didn't use any explicit register | ||
1755 : | *) | ||
1756 : | if !floatingPointUsed then (newFreg(); ()) | ||
1757 : | else (); | ||
1758 : | george | 545 | endCluster(a) |
1759 : | ) | ||
1760 : | |||
1761 : | and reducer() = | ||
1762 : | george | 984 | TS.REDUCER{reduceRexp = expr, |
1763 : | george | 545 | reduceFexp = fexpr, |
1764 : | reduceCCexp = ccExpr, | ||
1765 : | reduceStm = stmt, | ||
1766 : | operand = operand, | ||
1767 : | reduceOperand = reduceOpnd, | ||
1768 : | addressOf = fn e => address(e, I.Region.memory), (*XXX*) | ||
1769 : | george | 1009 | emit = mark', |
1770 : | george | 545 | instrStream = instrStream, |
1771 : | mltreeStream = self() | ||
1772 : | } | ||
1773 : | |||
1774 : | and self() = | ||
1775 : | george | 984 | TS.S.STREAM |
1776 : | leunga | 815 | { beginCluster = beginCluster', |
1777 : | endCluster = endCluster', | ||
1778 : | emit = doStmt, | ||
1779 : | pseudoOp = pseudoOp, | ||
1780 : | defineLabel = defineLabel, | ||
1781 : | entryLabel = entryLabel, | ||
1782 : | comment = comment, | ||
1783 : | annotation = annotation, | ||
1784 : | getAnnotations = getAnnotations, | ||
1785 : | exitBlock = fn mlrisc => exitBlock(cellset mlrisc) | ||
1786 : | george | 545 | } |
1787 : | |||
1788 : | in self() | ||
1789 : | monnier | 247 | end |
1790 : | |||
1791 : | george | 545 | end (* functor *) |
1792 : | |||
1793 : | end (* local *) |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |