SCM Repository
Annotation of /sml/trunk/src/MLRISC/alpha/mltree/alpha.sml
Parent Directory
|
Revision Log
Revision 1117 - (view) (download)
1 : | monnier | 409 | (* |
2 : | * This is a revamping of the Alpha32 instruction selection module | ||
3 : | * using the new MLTREE and instruction representation. I've dropped | ||
4 : | * the suffix 32 since we now support 64 bit datatypes. | ||
5 : | * | ||
6 : | * -- Allen | ||
7 : | * | ||
8 : | george | 1009 | * Notes: places with optimizations are marked ***OPT**n* |
9 : | monnier | 409 | *) |
10 : | |||
11 : | george | 1009 | |
12 : | monnier | 409 | functor Alpha |
13 : | leunga | 744 | (structure AlphaInstr : ALPHAINSTR |
14 : | monnier | 409 | structure PseudoInstrs : ALPHA_PSEUDO_INSTR |
15 : | george | 933 | where I = AlphaInstr |
16 : | george | 555 | structure ExtensionComp : MLTREE_EXTENSION_COMP |
17 : | george | 933 | where I = AlphaInstr |
18 : | and T = AlphaInstr.T | ||
19 : | monnier | 409 | |
20 : | (* Cost of multiplication in cycles *) | ||
21 : | val multCost : int ref | ||
22 : | |||
23 : | (* Should we just use the native multiply by a constant? *) | ||
24 : | val useMultByConst : bool ref | ||
25 : | george | 545 | |
26 : | (* Should we use SUD flags for floating point and generate DEFFREG? | ||
27 : | * This should be set to false for C-like clients but true for SML/NJ. | ||
28 : | *) | ||
29 : | val SMLNJfloatingPoint : bool | ||
30 : | leunga | 583 | |
31 : | (* Should we use generate special byte/word load instructions | ||
32 : | * like LDBU, LDWU, STB, STW. | ||
33 : | *) | ||
34 : | val byteWordLoadStores : bool ref | ||
35 : | monnier | 409 | ) : MLTREECOMP = |
36 : | struct | ||
37 : | |||
38 : | leunga | 775 | structure I = AlphaInstr |
39 : | structure C = I.C | ||
40 : | structure T = I.T | ||
41 : | george | 984 | structure TS = ExtensionComp.TS |
42 : | leunga | 775 | structure R = T.Region |
43 : | monnier | 409 | structure W32 = Word32 |
44 : | monnier | 429 | structure P = PseudoInstrs |
45 : | george | 545 | structure A = MLRiscAnnotations |
46 : | george | 889 | structure CB = CellsBasis |
47 : | george | 909 | structure CFG = ExtensionComp.CFG |
48 : | monnier | 409 | |
49 : | (********************************************************* | ||
50 : | |||
51 : | Trap Shadows, Floating Exceptions, and Denormalized | ||
52 : | Numbers on the DEC Alpha | ||
53 : | |||
54 : | Andrew W. Appel and Lal George | ||
55 : | Nov 28, 1995 | ||
56 : | |||
57 : | See section 4.7.5.1 of the Alpha Architecture Reference Manual. | ||
58 : | |||
59 : | The Alpha has imprecise exceptions, meaning that if a floating | ||
60 : | point instruction raises an IEEE exception, the exception may | ||
61 : | not interrupt the processor until several successive instructions have | ||
62 : | completed. ML, on the other hand, may want a "precise" model | ||
63 : | of floating point exceptions. | ||
64 : | |||
65 : | Furthermore, the Alpha hardware does not support denormalized numbers | ||
66 : | (for "gradual underflow"). Instead, underflow always rounds to zero. | ||
67 : | However, each floating operation (add, mult, etc.) has a trapping | ||
68 : | variant that will raise an exception (imprecisely, of course) on | ||
69 : | underflow; in that case, the instruction will produce a zero result | ||
70 : | AND an exception will occur. In fact, there are several variants | ||
71 : | of each instruction; three variants of MULT are: | ||
72 : | |||
73 : | MULT s1,s2,d truncate denormalized result to zero; no exception | ||
74 : | MULT/U s1,s2,d truncate denormalized result to zero; raise UNDERFLOW | ||
75 : | MULT/SU s1,s2,d software completion, producing denormalized result | ||
76 : | |||
77 : | The hardware treats the MULT/U and MULT/SU instructions identically, | ||
78 : | truncating a denormalized result to zero and raising the UNDERFLOW | ||
79 : | exception. But the operating system, on an UNDERFLOW exception, | ||
80 : | examines the faulting instruction to see if it's an /SU form, and if so, | ||
81 : | recalculates s1*s2, puts the right answer in d, and continues, | ||
82 : | all without invoking the user's signal handler. | ||
83 : | |||
84 : | Because most machines compute with denormalized numbers in hardware, | ||
85 : | to maximize portability of SML programs, we use the MULT/SU form. | ||
86 : | (and ADD/SU, SUB/SU, etc.) But to use this form successfully, | ||
87 : | certain rules have to be followed. Basically, d cannot be the same | ||
88 : | register as s1 or s2, because the opsys needs to be able to | ||
89 : | recalculate the operation using the original contents of s1 and s2, | ||
90 : | and the MULT/SU instruction will overwrite d even if it traps. | ||
91 : | |||
92 : | More generally, we may want to have a sequence of floating-point | ||
93 : | instructions. The rules for such a sequence are: | ||
94 : | |||
95 : | 1. The sequence should end with a TRAPB (trap barrier) instruction. | ||
96 : | (This could be relaxed somewhat, but certainly a TRAPB would | ||
97 : | be a good idea sometime before the next branch instruction or | ||
98 : | update of an ML reference variable, or any other ML side effect.) | ||
99 : | 2. No instruction in the sequence should destroy any operand of itself | ||
100 : | or of any previous instruction in the sequence. | ||
101 : | 3. No two instructions in the sequence should write the same destination | ||
102 : | register. | ||
103 : | |||
104 : | We can achieve these conditions by the following trick in the | ||
105 : | Alpha code generator. Each instruction in the sequence will write | ||
106 : | to a different temporary; this is guaranteed by the translation from | ||
107 : | ML-RISC. At the beginning of the sequence, we will put a special | ||
108 : | pseudo-instruction (we call it DEFFREG) that "defines" the destination | ||
109 : | register of the arithmetic instruction. If there are K arithmetic | ||
110 : | instructions in the sequence, then we'll insert K DEFFREG instructions | ||
111 : | all at the beginning of the sequence. | ||
112 : | Then, each arithop will not only "define" its destination temporary | ||
113 : | but will "use" it as well. When all these instructions are fed to | ||
114 : | the liveness analyzer, the resulting interference graph will then | ||
115 : | have inteference edges satisfying conditions 2 and 3 above. | ||
116 : | |||
117 : | Of course, DEFFREG doesn't actually generate any code. In our model | ||
118 : | of the Alpha, every instruction generates exactly 4 bytes of code | ||
119 : | except the "span-dependent" ones. Therefore, we'll specify DEFFREG | ||
120 : | as a span-dependent instruction whose minimum and maximum sizes are zero. | ||
121 : | |||
122 : | At the moment, we do not group arithmetic operations into sequences; | ||
123 : | that is, each arithop will be preceded by a single DEFFREG and | ||
124 : | followed by a TRAPB. To avoid the cost of all those TRAPB's, we | ||
125 : | should improve this when we have time. Warning: Don't put more | ||
126 : | than 31 instructions in the sequence, because they're all required | ||
127 : | to write to different destination registers! | ||
128 : | |||
129 : | What about multiple traps? For example, suppose a sequence of | ||
130 : | instructions produces an Overflow and a Divide-by-Zero exception? | ||
131 : | ML would like to know only about the earliest trap, but the hardware | ||
132 : | will report BOTH traps to the operating system. However, as long | ||
133 : | as the rules above are followed (and the software-completion versions | ||
134 : | of the arithmetic instructions are used), the operating system will | ||
135 : | have enough information to know which instruction produced the | ||
136 : | trap. It is very probable that the operating system will report ONLY | ||
137 : | the earlier trap to the user process, but I'm not sure. | ||
138 : | |||
139 : | For a hint about what the operating system is doing in its own | ||
140 : | trap-handler (with software completion), see section 6.3.2 of | ||
141 : | "OpenVMS Alpha Software" (Part II of the Alpha Architecture | ||
142 : | Manual). This stuff should apply to Unix (OSF1) as well as VMS. | ||
143 : | |||
144 : | george | 1003 | |
145 : | |||
146 : | |||
147 : | |||
148 : | |||
149 : | |||
150 : | -------------------o*o---------------------- | ||
151 : | LIVE/KILL instructions | ||
152 : | Nov 28, 2001 | ||
153 : | Lal George | ||
154 : | |||
155 : | The mechanism described above is now obsolete. We no longer use | ||
156 : | the DEFFREG instruction but the zero length LIVE instruction. | ||
157 : | Therefore the code that gets generated is something like; | ||
158 : | |||
159 : | f1 := f2 + f3 | ||
160 : | trap | ||
161 : | LIVE f1, f2, f3 | ||
162 : | |||
163 : | The live ranges for f1, f2, and f3 are extended by the LIVE | ||
164 : | instruction, and are live simultaneously and therefore cannot | ||
165 : | be allocated to the same register. | ||
166 : | |||
167 : | Multiple floating point instructions should be surrounded | ||
168 : | by parallel copies. That is to say, if we have: | ||
169 : | |||
170 : | f1 := f2 + f3 | ||
171 : | trapb | ||
172 : | LIVE f1, f2, f3 | ||
173 : | |||
174 : | f4 := f1 * f2 | ||
175 : | trapb | ||
176 : | LIVE f1, f2, f4 | ||
177 : | |||
178 : | Then the sequence above should be transformed to: | ||
179 : | |||
180 : | [f2', f3'] := [f1, f2] ; parallel copy | ||
181 : | f1' := f2' + f3' | ||
182 : | f4' := f1' * f2' | ||
183 : | trapb | ||
184 : | LIVE f1', f2', f3', f4' | ||
185 : | [f4] := [f4'] ; copy assuming f4 is the only value live. | ||
186 : | |||
187 : | The parallel copies are to ensure that the primed variables will | ||
188 : | not spill, and there should never be more than K reigsters in the LIVE | ||
189 : | instruction (K is the number of registers on the machine). | ||
190 : | monnier | 409 | ****************************************************************) |
191 : | |||
192 : | fun error msg = MLRiscErrorMsg.error("Alpha",msg) | ||
193 : | |||
194 : | george | 984 | type instrStream = (I.instruction, C.cellset, CFG.cfg) TS.stream |
195 : | type mltreeStream = (T.stm, T.mlrisc list, CFG.cfg) TS.stream | ||
196 : | monnier | 409 | |
197 : | (* | ||
198 : | * This module is used to simulate operations of non-standard widths. | ||
199 : | *) | ||
200 : | structure Gen = MLTreeGen(structure T = T | ||
201 : | jhr | 1117 | structure Cells = C |
202 : | monnier | 409 | val intTy = 64 |
203 : | val naturalWidths = [32,64] | ||
204 : | monnier | 429 | datatype rep = SE | ZE | NEITHER |
205 : | val rep = SE | ||
206 : | monnier | 409 | ) |
207 : | |||
208 : | leunga | 744 | val zeroR = C.r31 |
209 : | monnier | 409 | val zeroOpn = I.REGop zeroR |
210 : | george | 761 | fun LI i = T.LI(T.I.fromInt(32, i)) |
211 : | fun toInt i = T.I.toInt(32, i) | ||
212 : | val int_0 = T.I.int_0 | ||
213 : | val int_1 = T.I.int_1 | ||
214 : | leunga | 775 | fun EQ(x:IntInf.int,y) = x=y |
215 : | monnier | 409 | |
216 : | (* | ||
217 : | * Specialize the modules for multiplication/division | ||
218 : | * by constant optimizations. | ||
219 : | *) | ||
220 : | functor Multiply32 = MLTreeMult | ||
221 : | (structure I = I | ||
222 : | structure T = T | ||
223 : | george | 889 | structure CB = CellsBasis |
224 : | monnier | 409 | |
225 : | val intTy = 32 | ||
226 : | |||
227 : | george | 889 | type arg = {r1:CB.cell,r2:CB.cell,d:CB.cell} |
228 : | type argi = {r:CB.cell,i:int,d:CB.cell} | ||
229 : | monnier | 409 | |
230 : | george | 1009 | fun mov{r,d} = I.COPY{k=CB.GP, sz=intTy, dst=[d],src=[r],tmp=NONE} |
231 : | george | 1003 | fun add{r1,r2,d} = I.operate{oper=I.ADDL,ra=r1,rb=I.REGop r2,rc=d} |
232 : | monnier | 409 | (* |
233 : | * How to left shift by a constant (32bits) | ||
234 : | *) | ||
235 : | george | 1003 | fun slli{r,i=1,d} = [I.operate{oper=I.ADDL,ra=r,rb=I.REGop r,rc=d}] |
236 : | | slli{r,i=2,d} = [I.operate{oper=I.S4ADDL,ra=r,rb=zeroOpn,rc=d}] | ||
237 : | | slli{r,i=3,d} = [I.operate{oper=I.S8ADDL,ra=r,rb=zeroOpn,rc=d}] | ||
238 : | monnier | 409 | | slli{r,i,d} = |
239 : | let val tmp = C.newReg() | ||
240 : | george | 1003 | in [I.operate{oper=I.SLL,ra=r,rb=I.IMMop i,rc=tmp}, |
241 : | I.operate{oper=I.ADDL,ra=tmp,rb=zeroOpn,rc=d}] | ||
242 : | monnier | 409 | end |
243 : | |||
244 : | (* | ||
245 : | * How to right shift (unsigned) by a constant (32bits) | ||
246 : | *) | ||
247 : | fun srli{r,i,d} = | ||
248 : | let val tmp = C.newReg() | ||
249 : | george | 1003 | in [I.operate{oper=I.ZAP,ra=r,rb=I.IMMop 0xf0,rc=tmp}, |
250 : | I.operate{oper=I.SRL,ra=tmp,rb=I.IMMop i,rc=d}] | ||
251 : | monnier | 409 | end |
252 : | |||
253 : | (* | ||
254 : | * How to right shift (signed) by a constant (32bits) | ||
255 : | *) | ||
256 : | fun srai{r,i,d} = | ||
257 : | let val tmp = C.newReg() | ||
258 : | george | 1003 | in [I.operate{oper=I.ADDL,ra=r,rb=zeroOpn,rc=tmp}, |
259 : | I.operate{oper=I.SRA,ra=tmp,rb=I.IMMop i,rc=d}] | ||
260 : | monnier | 409 | end |
261 : | ) | ||
262 : | |||
263 : | functor Multiply64 = MLTreeMult | ||
264 : | (structure I = I | ||
265 : | structure T = T | ||
266 : | george | 889 | structure CB = CellsBasis |
267 : | monnier | 409 | |
268 : | val intTy = 64 | ||
269 : | |||
270 : | george | 889 | type arg = {r1:CB.cell, r2:CB.cell, d:CB.cell} |
271 : | type argi = {r:CB.cell, i:int, d:CB.cell} | ||
272 : | monnier | 409 | |
273 : | george | 1009 | fun mov{r,d} = I.COPY{k=CB.GP, sz=intTy, dst=[d],src=[r],tmp=NONE} |
274 : | george | 1003 | fun add{r1,r2,d}= I.operate{oper=I.ADDQ,ra=r1,rb=I.REGop r2,rc=d} |
275 : | fun slli{r,i,d} = [I.operate{oper=I.SLL,ra=r,rb=I.IMMop i,rc=d}] | ||
276 : | fun srli{r,i,d} = [I.operate{oper=I.SRL,ra=r,rb=I.IMMop i,rc=d}] | ||
277 : | fun srai{r,i,d} = [I.operate{oper=I.SRA,ra=r,rb=I.IMMop i,rc=d}] | ||
278 : | monnier | 409 | ) |
279 : | |||
280 : | (* signed, trapping version of multiply and divide *) | ||
281 : | structure Mult32 = Multiply32 | ||
282 : | (val trapping = true | ||
283 : | val multCost = multCost | ||
284 : | george | 1003 | fun addv{r1,r2,d} = [I.operatev{oper=I.ADDLV,ra=r1,rb=I.REGop r2,rc=d}] |
285 : | fun subv{r1,r2,d} = [I.operatev{oper=I.SUBLV,ra=r1,rb=I.REGop r2,rc=d}] | ||
286 : | monnier | 409 | val sh1addv = NONE |
287 : | val sh2addv = NONE | ||
288 : | val sh3addv = NONE | ||
289 : | ) | ||
290 : | monnier | 429 | (val signed = true) |
291 : | monnier | 409 | |
292 : | monnier | 429 | (* non-trapping version of multiply and divide *) |
293 : | functor Mul32 = Multiply32 | ||
294 : | monnier | 409 | (val trapping = false |
295 : | val multCost = multCost | ||
296 : | george | 1003 | fun addv{r1,r2,d} = [I.operate{oper=I.ADDL,ra=r1,rb=I.REGop r2,rc=d}] |
297 : | fun subv{r1,r2,d} = [I.operate{oper=I.SUBL,ra=r1,rb=I.REGop r2,rc=d}] | ||
298 : | monnier | 409 | val sh1addv = NONE |
299 : | val sh2addv = SOME(fn {r1,r2,d} => | ||
300 : | george | 1003 | [I.operate{oper=I.S4ADDL,ra=r1,rb=I.REGop r2,rc=d}]) |
301 : | monnier | 409 | val sh3addv = SOME(fn {r1,r2,d} => |
302 : | george | 1003 | [I.operate{oper=I.S8ADDL,ra=r1,rb=I.REGop r2,rc=d}]) |
303 : | monnier | 409 | ) |
304 : | monnier | 429 | structure Mulu32 = Mul32(val signed = false) |
305 : | structure Muls32 = Mul32(val signed = true) | ||
306 : | monnier | 409 | |
307 : | (* signed, trapping version of multiply and divide *) | ||
308 : | structure Mult64 = Multiply64 | ||
309 : | (val trapping = true | ||
310 : | val multCost = multCost | ||
311 : | george | 1003 | fun addv{r1,r2,d} = [I.operatev{oper=I.ADDQV,ra=r1,rb=I.REGop r2,rc=d}] |
312 : | fun subv{r1,r2,d} = [I.operatev{oper=I.SUBQV,ra=r1,rb=I.REGop r2,rc=d}] | ||
313 : | monnier | 409 | val sh1addv = NONE |
314 : | val sh2addv = NONE | ||
315 : | val sh3addv = NONE | ||
316 : | ) | ||
317 : | monnier | 429 | (val signed = true) |
318 : | monnier | 409 | |
319 : | (* unsigned, non-trapping version of multiply and divide *) | ||
320 : | monnier | 429 | functor Mul64 = Multiply64 |
321 : | monnier | 409 | (val trapping = false |
322 : | val multCost = multCost | ||
323 : | george | 1003 | fun addv{r1,r2,d} = [I.operate{oper=I.ADDQ,ra=r1,rb=I.REGop r2,rc=d}] |
324 : | fun subv{r1,r2,d} = [I.operate{oper=I.SUBQ,ra=r1,rb=I.REGop r2,rc=d}] | ||
325 : | monnier | 409 | val sh1addv = NONE |
326 : | val sh2addv = SOME(fn {r1,r2,d} => | ||
327 : | george | 1003 | [I.operate{oper=I.S4ADDQ,ra=r1,rb=I.REGop r2,rc=d}]) |
328 : | monnier | 409 | val sh3addv = SOME(fn {r1,r2,d} => |
329 : | george | 1003 | [I.operate{oper=I.S8ADDQ,ra=r1,rb=I.REGop r2,rc=d}]) |
330 : | monnier | 409 | ) |
331 : | monnier | 429 | structure Mulu64 = Mul64(val signed = false) |
332 : | structure Muls64 = Mul64(val signed = true) | ||
333 : | monnier | 409 | |
334 : | (* | ||
335 : | * The main stuff | ||
336 : | *) | ||
337 : | |||
338 : | george | 761 | datatype times4or8 = TIMES1 | TIMES4 | TIMES8 |
339 : | monnier | 409 | datatype zeroOne = ZERO | ONE | OTHER |
340 : | datatype commutative = COMMUTE | NOCOMMUTE | ||
341 : | |||
342 : | leunga | 744 | val zeroFR = C.f31 |
343 : | leunga | 583 | val zeroEA = I.Direct zeroR |
344 : | george | 761 | val zeroT = T.LI int_0 |
345 : | george | 1003 | val trapb = [I.trapb] |
346 : | leunga | 583 | val zeroImm = I.IMMop 0 |
347 : | |||
348 : | monnier | 409 | fun selectInstructions |
349 : | george | 545 | (instrStream as |
350 : | george | 1003 | TS.S.STREAM{emit=emitInstruction,beginCluster,endCluster,getAnnotations, |
351 : | george | 984 | defineLabel,entryLabel,pseudoOp,annotation, |
352 : | exitBlock,comment,...}) = | ||
353 : | monnier | 409 | let |
354 : | george | 1003 | |
355 : | monnier | 409 | infix || && << >> ~>> |
356 : | |||
357 : | val op || = W32.orb | ||
358 : | val op && = W32.andb | ||
359 : | val op << = W32.<< | ||
360 : | val op >> = W32.>> | ||
361 : | val op ~>> = W32.~>> | ||
362 : | |||
363 : | val itow = Word.fromInt | ||
364 : | val wtoi = Word.toIntX | ||
365 : | |||
366 : | george | 1003 | val emit = emitInstruction o I.INSTR |
367 : | |||
368 : | monnier | 409 | val newReg = C.newReg |
369 : | val newFreg = C.newFreg | ||
370 : | |||
371 : | (* Choose the appropriate rounding mode to generate. | ||
372 : | * This stuff is used to support the alpha32x SML/NJ backend. | ||
373 : | monnier | 429 | * |
374 : | * | ||
375 : | * Floating point rounding mode. | ||
376 : | * When this is set to true, we use the /SU rounding mode | ||
377 : | * (chopped towards zero) for floating point arithmetic. | ||
378 : | * This flag is only used to support the old alpha32x backend. | ||
379 : | * | ||
380 : | * Otherwise, we use /SUD. This is the default for SML/NJ. | ||
381 : | * | ||
382 : | monnier | 409 | *) |
383 : | george | 545 | val (ADDTX,SUBTX,MULTX,DIVTX) = |
384 : | (I.ADDTSUD,I.SUBTSUD,I.MULTSUD,I.DIVTSUD) | ||
385 : | val (ADDSX,SUBSX,MULSX,DIVSX) = | ||
386 : | (I.ADDSSUD,I.SUBSSUD,I.MULSSUD,I.DIVSSUD) | ||
387 : | monnier | 409 | |
388 : | george | 1003 | fun annotate(i, an) = List.foldl (fn (a, i) => I.ANNOTATION{i=i,a=a}) i an |
389 : | fun mark'(i, an) = emitInstruction(annotate(i,an)) | ||
390 : | fun mark(i,an) = emitInstruction(annotate(I.INSTR i,an)) | ||
391 : | monnier | 409 | |
392 : | (* Fit within 16 bits? *) | ||
393 : | fun literal16 n = ~32768 <= n andalso n < 32768 | ||
394 : | fun literal16w w = | ||
395 : | let val hi = W32.~>>(w,0wx16) | ||
396 : | in hi = 0w0 orelse (W32.notb hi) = 0w0 end | ||
397 : | |||
398 : | (* emit an LDA instruction; return the register that holds the value *) | ||
399 : | fun lda(base,I.IMMop 0) = base | ||
400 : | | lda(base,offset) = | ||
401 : | let val r = newReg() | ||
402 : | in emit(I.LDA{r=r, b=base, d=offset}); r end | ||
403 : | |||
404 : | (* emit load immed *) | ||
405 : | george | 761 | fun loadImmed(n, base, rd, an) = let |
406 : | val n = T.I.toInt32(32, n) | ||
407 : | in | ||
408 : | if n = 0 then move(base, rd, an) | ||
409 : | else if ~32768 <= n andalso n < 32768 then | ||
410 : | mark(I.LDA{r=rd, b=base, d=I.IMMop(Int32.toInt n)}, an) | ||
411 : | else loadImmed32(n, base, rd, an) | ||
412 : | monnier | 409 | end |
413 : | |||
414 : | (* loadImmed32 is used to load int32 and word32 constants. | ||
415 : | * In either case we sign extend the 32-bit value. This is compatible | ||
416 : | * with LDL which sign extends a 32-bit valued memory location. | ||
417 : | *) | ||
418 : | george | 761 | (* TODO: |
419 : | * Should handle 64 bits if immediate is not in the 32 bit range. | ||
420 : | *) | ||
421 : | and loadImmed32(n, base, rd, an) = let | ||
422 : | fun immed(0, high) = | ||
423 : | mark(I.LDAH{r=rd, b=base, d=I.IMMop(high)}, an) | ||
424 : | | immed(low, 0) = | ||
425 : | mark(I.LDA{r=rd, b=base, d=I.IMMop(low)}, an) | ||
426 : | | immed(low, high) = | ||
427 : | (emit(I.LDA{r=rd, b=base, d=I.IMMop(low)}); | ||
428 : | mark(I.LDAH{r=rd, b=rd, d=I.IMMop(high)}, an) | ||
429 : | ) | ||
430 : | val w = Word32.fromLargeInt(Int32.toLarge n) | ||
431 : | val low = W32.andb(w, 0wxffff) | ||
432 : | val high = W32.~>>(w, 0w16) | ||
433 : | in | ||
434 : | if W32.<(low, 0wx8000) then | ||
435 : | immed(W32.toInt low, W32.toIntX high) | ||
436 : | else let | ||
437 : | val low = W32.toIntX(W32.-(low, 0wx10000)) | ||
438 : | val high = W32.toIntX(W32.+(high, 0w1)) | ||
439 : | monnier | 409 | in |
440 : | george | 761 | if high <> 0x8000 then immed(low, high) |
441 : | else let (* transition of high from pos to neg *) | ||
442 : | val tmpR1 = newReg() | ||
443 : | val tmpR2 = newReg() | ||
444 : | val tmpR3=newReg() | ||
445 : | in | ||
446 : | (* you just gotta do, what you gotta do! *) | ||
447 : | emit(I.LDA{r=tmpR3, b=base, d=I.IMMop(low)}); | ||
448 : | emit(I.OPERATE{oper=I.ADDQ, ra=zeroR, rb=I.IMMop 1, rc=tmpR1}); | ||
449 : | emit(I.OPERATE{oper=I.SLL, ra=tmpR1, rb=I.IMMop 31, rc=tmpR2}); | ||
450 : | mark(I.OPERATE{oper=I.ADDQ, ra=tmpR2, rb=I.REGop tmpR3, rc=rd},an) | ||
451 : | end | ||
452 : | end | ||
453 : | end | ||
454 : | monnier | 409 | |
455 : | george | 761 | |
456 : | leunga | 775 | (* emit load label expression *) |
457 : | and loadLabexp(le,d,an) = mark(I.LDA{r=d,b=zeroR,d=I.LABop le},an) | ||
458 : | monnier | 409 | |
459 : | (* emit a copy *) | ||
460 : | monnier | 469 | and copy(dst,src,an) = |
461 : | george | 1009 | mark'(I.COPY{k=CB.GP, sz=32, dst=dst,src=src, |
462 : | monnier | 409 | tmp=case dst of |
463 : | [_] => NONE | _ => SOME(I.Direct(newReg()))},an) | ||
464 : | |||
465 : | (* emit a floating point copy *) | ||
466 : | monnier | 469 | and fcopy(dst,src,an) = |
467 : | george | 1009 | mark'(I.COPY{k=CB.FP, sz=64, dst=dst,src=src, |
468 : | monnier | 409 | tmp=case dst of |
469 : | [_] => NONE | _ => SOME(I.FDirect(newFreg()))},an) | ||
470 : | |||
471 : | monnier | 469 | and move(s,d,an) = |
472 : | george | 889 | if CB.sameCell(s,d) orelse CB.sameCell(d,zeroR) then () else |
473 : | george | 1009 | mark'(I.COPY{k=CB.GP, sz=32, dst=[d],src=[s],tmp=NONE},an) |
474 : | monnier | 409 | |
475 : | monnier | 469 | and fmove(s,d,an) = |
476 : | george | 889 | if CB.sameCell(s,d) orelse CB.sameCell(d,zeroFR) then () else |
477 : | george | 1009 | mark'(I.COPY{k=CB.FP, sz=64, dst=[d],src=[s],tmp=NONE},an) |
478 : | monnier | 409 | |
479 : | (* emit an sign extension op *) | ||
480 : | monnier | 469 | and signExt32(r,d) = |
481 : | leunga | 646 | emit(I.OPERATE{oper=I.ADDL,ra=r,rb=zeroOpn,rc=d}) |
482 : | monnier | 409 | |
483 : | (* emit an commutative arithmetic op *) | ||
484 : | monnier | 469 | and commArith(opcode,a,b,d,an) = |
485 : | monnier | 409 | case (opn a,opn b) of |
486 : | (I.REGop r,i) => mark(I.OPERATE{oper=opcode,ra=r,rb=i,rc=d},an) | ||
487 : | | (i,I.REGop r) => mark(I.OPERATE{oper=opcode,ra=r,rb=i,rc=d},an) | ||
488 : | | (r,i) => mark(I.OPERATE{oper=opcode,ra=reduceOpn r,rb=i,rc=d},an) | ||
489 : | |||
490 : | (* emit an arithmetic op *) | ||
491 : | and arith(opcode,a,b,d,an) = | ||
492 : | mark(I.OPERATE{oper=opcode,ra=expr a,rb=opn b,rc=d},an) | ||
493 : | and arith'(opcode,a,b,d,an) = | ||
494 : | let val rb = opn b | ||
495 : | val ra = expr a | ||
496 : | in mark(I.OPERATE{oper=opcode,ra=ra,rb=rb,rc=d},an) end | ||
497 : | |||
498 : | (* emit a trapping commutative arithmetic op *) | ||
499 : | and commArithTrap(opcode,a,b,d,an) = | ||
500 : | (case (opn a,opn b) of | ||
501 : | (I.REGop r,i) => mark(I.OPERATEV{oper=opcode,ra=r,rb=i,rc=d},an) | ||
502 : | | (i,I.REGop r) => mark(I.OPERATEV{oper=opcode,ra=r,rb=i,rc=d},an) | ||
503 : | | (r,i) => mark(I.OPERATEV{oper=opcode,ra=reduceOpn r,rb=i,rc=d},an); | ||
504 : | emit(I.TRAPB) | ||
505 : | ) | ||
506 : | |||
507 : | (* emit a trapping arithmetic op *) | ||
508 : | and arithTrap(opcode,a,b,d,an) = | ||
509 : | (mark(I.OPERATEV{oper=opcode,ra=expr a,rb=opn b,rc=d},an); | ||
510 : | emit(I.TRAPB) | ||
511 : | ) | ||
512 : | |||
513 : | (* convert an operand into a register *) | ||
514 : | and reduceOpn(I.REGop r) = r | ||
515 : | | reduceOpn(I.IMMop 0) = zeroR | ||
516 : | | reduceOpn opn = | ||
517 : | let val d = newReg() | ||
518 : | in emit(I.OPERATE{oper=I.BIS,ra=zeroR,rb=opn,rc=d}); d end | ||
519 : | |||
520 : | (* convert an expression into an operand *) | ||
521 : | and opn(T.REG(_,r)) = I.REGop r | ||
522 : | | opn(e as T.LI n) = | ||
523 : | leunga | 775 | if IntInf.<=(n, T.I.int_0xff) andalso IntInf.>=(n, T.I.int_0) then |
524 : | george | 761 | I.IMMop(toInt(n)) |
525 : | monnier | 409 | else let val tmpR = newReg() |
526 : | in loadImmed(n,zeroR,tmpR,[]); I.REGop tmpR end | ||
527 : | leunga | 815 | | opn(e as T.CONST _) = I.LABop e |
528 : | leunga | 775 | | opn(T.LABEXP x) = I.LABop x |
529 : | monnier | 409 | | opn e = I.REGop(expr e) |
530 : | |||
531 : | leunga | 583 | (* compute base+displacement from an expression |
532 : | *) | ||
533 : | monnier | 409 | and addr exp = |
534 : | leunga | 775 | let fun toLexp(I.IMMop i) = T.LI(IntInf.fromInt i) |
535 : | leunga | 583 | | toLexp(I.LABop le) = le |
536 : | | toLexp _ = error "addr.toLexp" | ||
537 : | monnier | 409 | |
538 : | leunga | 775 | fun add(t,n,I.IMMop m) = |
539 : | I.IMMop(toInt(T.I.ADD(t,n,IntInf.fromInt m))) | ||
540 : | | add(t,n,I.LABop le) = I.LABop(T.ADD(t,T.LI n,le)) | ||
541 : | | add(t,n,_) = error "addr.add" | ||
542 : | |||
543 : | fun addLe(ty,le,I.IMMop 0) = I.LABop le | ||
544 : | | addLe(ty,le,disp) = I.LABop(T.ADD(ty,le,toLexp disp)) | ||
545 : | |||
546 : | fun sub(t,n,I.IMMop m) = | ||
547 : | I.IMMop(toInt(T.I.SUB(t,IntInf.fromInt m,n))) | ||
548 : | | sub(t,n,I.LABop le) = I.LABop(T.SUB(t,le,T.LI n)) | ||
549 : | | sub(t,n,_) = error "addr.sub" | ||
550 : | |||
551 : | fun subLe(ty,le,I.IMMop 0) = I.LABop le | ||
552 : | | subLe(ty,le,disp) = I.LABop(T.SUB(ty,le,toLexp disp)) | ||
553 : | leunga | 583 | |
554 : | (* Should really take into account of the address width XXX *) | ||
555 : | leunga | 775 | fun fold(T.ADD(t,e,T.LI n),disp) = fold(e,add(t,n,disp)) |
556 : | | fold(T.ADD(t,e,x as T.CONST _),disp) = fold(e,addLe(t,x,disp)) | ||
557 : | | fold(T.ADD(t,e,x as T.LABEL _),disp) = fold(e,addLe(t,x,disp)) | ||
558 : | | fold(T.ADD(t,e,T.LABEXP l),disp) = fold(e,addLe(t,l,disp)) | ||
559 : | | fold(T.ADD(t,T.LI n,e),disp) = fold(e, add(t,n,disp)) | ||
560 : | | fold(T.ADD(t,x as T.CONST _,e),disp) = fold(e,addLe(t,x,disp)) | ||
561 : | | fold(T.ADD(t,x as T.LABEL _,e),disp) = fold(e,addLe(t,x,disp)) | ||
562 : | | fold(T.ADD(t,T.LABEXP l,e),disp) = fold(e,addLe(t,l,disp)) | ||
563 : | | fold(T.SUB(t,e,T.LI n),disp) = fold(e,sub(t,n,disp)) | ||
564 : | | fold(T.SUB(t,e,x as T.CONST _),disp) = fold(e,subLe(t,x,disp)) | ||
565 : | | fold(T.SUB(t,e,x as T.LABEL _),disp) = fold(e,subLe(t,x,disp)) | ||
566 : | | fold(T.SUB(t,e,T.LABEXP l),disp) = fold(e,subLe(t,l,disp)) | ||
567 : | leunga | 583 | | fold(e,disp) = (expr e,disp) |
568 : | |||
569 : | in makeEA(fold(exp, zeroImm)) | ||
570 : | end | ||
571 : | |||
572 : | monnier | 409 | (* compute base+displacement+small offset *) |
573 : | and offset(base,disp as I.IMMop n,off) = | ||
574 : | let val n' = n+off | ||
575 : | in if literal16 n' then (base,I.IMMop n') | ||
576 : | else | ||
577 : | let val tmp = newReg() | ||
578 : | in emit(I.OPERATE{oper=I.ADDQ,ra=base,rb=disp,rc=tmp}); | ||
579 : | (tmp,I.IMMop off) | ||
580 : | end | ||
581 : | end | ||
582 : | leunga | 583 | | offset(base,disp as I.LABop le,off) = |
583 : | leunga | 775 | (base, I.LABop(T.ADD(64,le,T.LI(IntInf.fromInt off)))) |
584 : | monnier | 409 | | offset(base,disp,off) = |
585 : | let val tmp = newReg() | ||
586 : | in emit(I.OPERATE{oper=I.ADDQ,ra=base,rb=disp,rc=tmp}); | ||
587 : | (tmp,I.IMMop off) | ||
588 : | end | ||
589 : | |||
590 : | leunga | 583 | (* check if base offset fits within the field *) |
591 : | and makeEA(base, off as I.IMMop offset) = | ||
592 : | if ~32768 <= offset andalso offset <= 32767 | ||
593 : | then (base, off) | ||
594 : | monnier | 409 | else |
595 : | let val tmpR = newReg() | ||
596 : | (* unsigned low 16 bits *) | ||
597 : | val low = wtoi(Word.andb(itow offset, 0wxffff)) | ||
598 : | val high = offset div 65536 | ||
599 : | val (lowsgn, highsgn) = (* Sign-extend *) | ||
600 : | if low <= 32767 then (low, high) else (low -65536, high+1) | ||
601 : | in | ||
602 : | (emit(I.LDAH{r=tmpR, b=base, d=I.IMMop highsgn}); | ||
603 : | (tmpR, I.IMMop lowsgn)) | ||
604 : | end | ||
605 : | leunga | 583 | | makeEA(base, offset) = (base, offset) |
606 : | monnier | 409 | |
607 : | (* look for multiply by 4 and 8 of the given type *) | ||
608 : | and times4or8(ty,e) = | ||
609 : | george | 761 | let |
610 : | fun f(t,a,n) = if t = ty then | ||
611 : | if EQ(n, T.I.int_4) then (TIMES4,a) | ||
612 : | else if EQ(n, T.I.int_8) then (TIMES8,a) | ||
613 : | monnier | 409 | else (TIMES1,e) |
614 : | else (TIMES1,e) | ||
615 : | george | 761 | |
616 : | monnier | 409 | fun u(t,a,n) = if t = ty then |
617 : | george | 761 | if EQ(n, T.I.int_2) then (TIMES4,a) |
618 : | else if EQ(n, T.I.int_3) then (TIMES8,a) | ||
619 : | monnier | 409 | else (TIMES1,e) |
620 : | else (TIMES1,e) | ||
621 : | in case e of | ||
622 : | T.MULU(t,a,T.LI n) => f(t,a,n) | ||
623 : | | T.MULS(t,T.LI n,a) => f(t,a,n) | ||
624 : | | T.SLL(t,a,T.LI n) => u(t,a,n) | ||
625 : | | _ => (TIMES1,e) | ||
626 : | end | ||
627 : | |||
628 : | (* generate an add instruction | ||
629 : | * ***OPT*** look for multiply by 4 and 8 and use the S4ADD and S8ADD | ||
630 : | * forms. | ||
631 : | *) | ||
632 : | and plus(ty,add,s4add,s8add,a,b,d,an) = | ||
633 : | (case times4or8(ty,a) of | ||
634 : | (TIMES4,a) => arith(s4add,a,b,d,an) | ||
635 : | | (TIMES8,a) => arith(s8add,a,b,d,an) | ||
636 : | | _ => | ||
637 : | case times4or8(ty,b) of | ||
638 : | (TIMES4,b) => arith'(s4add,b,a,d,an) | ||
639 : | | (TIMES8,b) => arith'(s8add,b,a,d,an) | ||
640 : | | _ => commArith(add,a,b,d,an) | ||
641 : | ) | ||
642 : | |||
643 : | (* generate a subtract instruction | ||
644 : | * ***OPT*** look for multiply by 4 and 8 | ||
645 : | *) | ||
646 : | and minus(ty,sub,s4sub,s8sub,a,b,d,an) = | ||
647 : | (case times4or8(ty,a) of | ||
648 : | (TIMES4,a) => arith(s4sub,a,b,d,an) | ||
649 : | | (TIMES8,a) => arith(s8sub,a,b,d,an) | ||
650 : | | _ => | ||
651 : | monnier | 429 | if ty = 64 then |
652 : | monnier | 409 | (case b of |
653 : | (* use LDA to handle subtraction when possible | ||
654 : | * Note: this may have sign extension problems later. | ||
655 : | *) | ||
656 : | george | 761 | T.LI i => (loadImmed(T.I.NEGT(32,i),expr a,d,an) handle _ => |
657 : | monnier | 409 | arith(sub,a,b,d,an)) |
658 : | | _ => arith(sub,a,b,d,an) | ||
659 : | monnier | 429 | ) else arith(sub,a,b,d,an) |
660 : | monnier | 409 | ) |
661 : | |||
662 : | (* look for special constants *) | ||
663 : | george | 761 | and wordOpn(T.LI n) = SOME(T.I.toWord32(32, n)) |
664 : | monnier | 409 | | wordOpn e = NONE |
665 : | |||
666 : | monnier | 429 | (* look for special byte mask constants |
667 : | * IMPORTANT: we must ALWAYS keep the sign bit! | ||
668 : | *) | ||
669 : | and byteMask(_,SOME 0wx00000000) = 0xff | ||
670 : | | byteMask(_,SOME 0wx000000ff) = 0xfe | ||
671 : | | byteMask(_,SOME 0wx0000ff00) = 0xfd | ||
672 : | | byteMask(_,SOME 0wx0000ffff) = 0xfc | ||
673 : | | byteMask(_,SOME 0wx00ff0000) = 0xfb | ||
674 : | | byteMask(_,SOME 0wx00ff00ff) = 0xfa | ||
675 : | | byteMask(_,SOME 0wx00ffff00) = 0xf9 | ||
676 : | | byteMask(_,SOME 0wx00ffffff) = 0xf8 | ||
677 : | | byteMask(ty,SOME 0wxff000000) = if ty = 64 then 0xf7 else 0x07 | ||
678 : | | byteMask(ty,SOME 0wxff0000ff) = if ty = 64 then 0xf6 else 0x06 | ||
679 : | | byteMask(ty,SOME 0wxff00ff00) = if ty = 64 then 0xf5 else 0x05 | ||
680 : | | byteMask(ty,SOME 0wxff00ffff) = if ty = 64 then 0xf4 else 0x04 | ||
681 : | | byteMask(ty,SOME 0wxffff0000) = if ty = 64 then 0xf3 else 0x03 | ||
682 : | | byteMask(ty,SOME 0wxffff00ff) = if ty = 64 then 0xf2 else 0x02 | ||
683 : | | byteMask(ty,SOME 0wxffffff00) = if ty = 64 then 0xf1 else 0x01 | ||
684 : | | byteMask(ty,SOME 0wxffffffff) = if ty = 64 then 0xf0 else 0x00 | ||
685 : | | byteMask _ = ~1 | ||
686 : | monnier | 409 | |
687 : | (* generate an and instruction | ||
688 : | * look for special masks. | ||
689 : | *) | ||
690 : | and andb(ty,a,b,d,an) = | ||
691 : | case byteMask(ty,wordOpn a) of | ||
692 : | monnier | 429 | ~1 => (case byteMask(ty,wordOpn b) of |
693 : | ~1 => commArith(I.AND,a,b,d,an) | ||
694 : | george | 761 | | mask => arith(I.ZAP,a,LI mask,d,an) |
695 : | monnier | 429 | ) |
696 : | george | 761 | | mask => arith(I.ZAP,b,LI mask,d,an) |
697 : | monnier | 409 | |
698 : | (* generate sll/sra/srl *) | ||
699 : | and sll32(a,b,d,an) = | ||
700 : | case wordOpn b of | ||
701 : | SOME 0w0 => doExpr(a,d,an) | ||
702 : | | SOME 0w1 => | ||
703 : | let val r = T.REG(32,expr a) in arith(I.ADDL,r,r,d,an) end | ||
704 : | | SOME 0w2 => arith(I.S4ADDL,a,zeroT,d,an) | ||
705 : | | SOME 0w3 => arith(I.S8ADDL,a,zeroT,d,an) | ||
706 : | | _ => let val t = newReg() | ||
707 : | in arith(I.SLL,a,b,t,an); | ||
708 : | signExt32(t,d) | ||
709 : | end | ||
710 : | |||
711 : | and sll64(a,b,d,an) = | ||
712 : | case wordOpn b of | ||
713 : | SOME 0w0 => doExpr(a,d,an) | ||
714 : | | SOME 0w1 => | ||
715 : | let val r = T.REG(64,expr a) in arith(I.ADDQ,r,r,d,an) end | ||
716 : | | SOME 0w2 => arith(I.S4ADDQ,a,zeroT,d,an) | ||
717 : | | SOME 0w3 => arith(I.S8ADDQ,a,zeroT,d,an) | ||
718 : | | _ => arith(I.SLL,a,b,d,an) | ||
719 : | |||
720 : | leunga | 796 | (* On the alpha, all 32 bit values are already sign extended. |
721 : | * So no sign extension is necessary. We do the same for | ||
722 : | * sra32 and sra64 | ||
723 : | *) | ||
724 : | and sra(a,b,d,an) = | ||
725 : | monnier | 409 | mark(I.OPERATE{oper=I.SRA,ra=expr a,rb=opn b,rc=d},an) |
726 : | |||
727 : | and srl32(a,b,d,an) = | ||
728 : | let val ra = expr a | ||
729 : | val rb = opn b | ||
730 : | val t = newReg() | ||
731 : | in emit(I.OPERATE{oper=I.ZAP,ra=ra,rb=I.IMMop 0xf0,rc=t}); | ||
732 : | mark(I.OPERATE{oper=I.SRL,ra=t,rb=rb,rc=d},an) | ||
733 : | end | ||
734 : | |||
735 : | and srl64(a,b,d,an) = | ||
736 : | mark(I.OPERATE{oper=I.SRL,ra=expr a,rb=opn b,rc=d},an) | ||
737 : | |||
738 : | (* | ||
739 : | * Generic multiply. | ||
740 : | * We first try to use the multiply by constant heuristic | ||
741 : | *) | ||
742 : | and multiply(ty,gen,genConst,e1,e2,rd,trapb,an) = | ||
743 : | let fun nonconst(e1,e2) = | ||
744 : | let val instr = | ||
745 : | case (opn e1,opn e2) of | ||
746 : | (i,I.REGop r) => gen{ra=r,rb=i,rc=rd} | ||
747 : | | (I.REGop r,i) => gen{ra=r,rb=i,rc=rd} | ||
748 : | | (r,i) => gen{ra=reduceOpn r,rb=i,rc=rd} | ||
749 : | george | 1003 | in annotate(instr,an)::trapb end |
750 : | monnier | 409 | fun const(e,i) = |
751 : | let val r = expr e | ||
752 : | george | 761 | in if !useMultByConst andalso |
753 : | leunga | 775 | IntInf.>=(i, T.I.int_0) andalso |
754 : | IntInf.<(i, T.I.int_0x100) then | ||
755 : | george | 1003 | annotate(gen{ra=r,rb=I.IMMop(toInt i),rc=rd},an)::trapb |
756 : | monnier | 409 | else |
757 : | george | 761 | (genConst{r=r,i=toInt i,d=rd}@trapb |
758 : | monnier | 409 | handle _ => nonconst(T.REG(ty,r),T.LI i)) |
759 : | end | ||
760 : | val instrs = | ||
761 : | george | 761 | case (e1, e2) |
762 : | of (_, T.LI i) => const(e1, i) | ||
763 : | | (T.LI i, _) => const(e2, i) | ||
764 : | | _ => nonconst(e1, e2) | ||
765 : | george | 1003 | in app emitInstruction instrs |
766 : | monnier | 409 | end |
767 : | |||
768 : | (* Round r towards zero. | ||
769 : | * I generate the following sequence of code, which should get | ||
770 : | * mapped into conditional moves. | ||
771 : | * | ||
772 : | * d <- r + i; | ||
773 : | * d <- if (r > 0) then r else d | ||
774 : | *) | ||
775 : | george | 545 | (* |
776 : | monnier | 409 | and roundToZero{ty,r,i,d} = |
777 : | (doStmt(T.MV(ty,d,T.ADD(ty,T.REG(ty,r),T.LI i))); | ||
778 : | doStmt(T.MV(ty,d,T.COND(ty,T.CMP(ty,T.GE,T.REG(ty,r),T.LI 0), | ||
779 : | T.REG(ty,r),T.REG(ty,d)))) | ||
780 : | ) | ||
781 : | george | 545 | *) |
782 : | monnier | 409 | |
783 : | (* | ||
784 : | * Generic division. | ||
785 : | * We first try to use the division by constant heuristic | ||
786 : | *) | ||
787 : | and divide(ty,pseudo,genDiv,e1,e2,rd,an) = | ||
788 : | let fun nonconst(e1,e2) = | ||
789 : | pseudo({ra=expr e1,rb=opn e2,rc=rd},reduceOpn) | ||
790 : | |||
791 : | fun const(e,i) = | ||
792 : | let val r = expr e | ||
793 : | george | 545 | in genDiv{mode=T.TO_ZERO,stm=doStmt} |
794 : | george | 761 | {r=r,i=toInt i,d=rd} |
795 : | monnier | 409 | handle _ => nonconst(T.REG(ty,r),T.LI i) |
796 : | end | ||
797 : | val instrs = | ||
798 : | case e2 of | ||
799 : | T.LI i => const(e1,i) | ||
800 : | | _ => nonconst(e1,e2) | ||
801 : | george | 1003 | in app emitInstruction instrs |
802 : | monnier | 409 | end |
803 : | |||
804 : | |||
805 : | (* | ||
806 : | and multTrap(MULV,ADD,ADDV,e1,e2,rd,an) = (* signed multiply and trap *) | ||
807 : | let val ADD = fn {ra,rb,rc} => I.OPERATE{oper=ADD,ra=ra,rb=rb,rc=rc} | ||
808 : | val ADDV = fn {ra,rb,rc} => I.OPERATEV{oper=ADDV,ra=ra,rb=rb,rc=rc} | ||
809 : | val MULV = fn {ra,rb,rc} => I.OPERATEV{oper=MULV,ra=ra,rb=rb,rc=rc} | ||
810 : | in multiply(MULV,ADD,ADDV,e1,e2,rd,an); | ||
811 : | emit(I.TRAPB) | ||
812 : | end | ||
813 : | |||
814 : | and mulu(MUL,ADD,e1,e2,rd,an) = (* unsigned multiply *) | ||
815 : | let val ADD = fn {ra,rb,rc} => I.OPERATE{oper=ADD,ra=ra,rb=rb,rc=rc} | ||
816 : | val MUL = fn {ra,rb,rc} => I.OPERATE{oper=MUL,ra=ra,rb=rb,rc=rc} | ||
817 : | in multiply(MUL,ADD,ADD,e1,e2,rd,an) | ||
818 : | end | ||
819 : | |||
820 : | (* Multiplication *) | ||
821 : | and multiply(MULV, ADD, ADDV, e1, e2, rd, an) = | ||
822 : | let val reg = expr e1 | ||
823 : | val opn = opn e2 | ||
824 : | fun emitMulvImmed (reg, 0, rd) = | ||
825 : | emit(I.LDA{r=rd, b=zeroR, d=I.IMMop 0}) | ||
826 : | | emitMulvImmed (reg, 1, rd) = | ||
827 : | emit(ADD{ra=reg, rb=zeroOpn, rc=rd}) | ||
828 : | | emitMulvImmed (reg, multiplier, rd) = | ||
829 : | let fun log2 0w1 = 0 | log2 n = 1 + (log2 (Word.>> (n, 0w1))) | ||
830 : | fun exp2 n = Word.<<(0w1, n) | ||
831 : | fun bitIsSet (x,n) = Word.andb(x,exp2 n) <> 0w0 | ||
832 : | fun loop (~1) = () | ||
833 : | | loop n = | ||
834 : | (if bitIsSet(itow multiplier, itow n) then | ||
835 : | emit(ADDV{ra=reg,rb=I.REGop rd,rc=rd}) | ||
836 : | else (); | ||
837 : | if n>0 then | ||
838 : | emit(ADDV{ra=rd,rb=I.REGop rd,rc=rd}) | ||
839 : | else (); | ||
840 : | loop (n-1)) | ||
841 : | in emit(ADDV{ra=reg, rb=I.REGop reg, rc=rd}); | ||
842 : | loop ((log2 (itow multiplier)) - 1) | ||
843 : | end | ||
844 : | in case opn of | ||
845 : | (I.IMMop multiplier) => emitMulvImmed (reg, multiplier, rd) | ||
846 : | | _ => mark(MULV{ra=reg, rb=opn, rc=rd},an) | ||
847 : | (*esac*) | ||
848 : | end | ||
849 : | *) | ||
850 : | |||
851 : | (* generate pseudo instruction *) | ||
852 : | and pseudo(instr,e1,e2,rc) = | ||
853 : | george | 1003 | app emitInstruction (instr({ra=expr e1,rb=opn e2,rc=rc}, reduceOpn)) |
854 : | monnier | 409 | |
855 : | (* generate a load *) | ||
856 : | and load(ldOp,ea,d,mem,an) = | ||
857 : | let val (base,disp) = addr ea | ||
858 : | in mark(I.LOAD{ldOp=ldOp,r=d,b=base,d=disp,mem=mem},an) end | ||
859 : | |||
860 : | (* generate a load with zero extension *) | ||
861 : | and loadZext(ea,rd,mem,EXT,an) = | ||
862 : | let val (b, d) = addr ea | ||
863 : | val t1 = newReg() | ||
864 : | val _ = mark(I.LOAD{ldOp=I.LDQ_U, r=t1, b=b, d=d, mem=mem},an); | ||
865 : | val t2 = lda(b,d) | ||
866 : | in emit(I.OPERATE{oper=EXT, ra=t1, rb=I.REGop t2, rc=rd}) end | ||
867 : | |||
868 : | (* generate a load with sign extension *) | ||
869 : | and loadSext(ea,rd,mem,off,EXT,shift,an) = | ||
870 : | let val (b, d) = addr ea | ||
871 : | val (b',d') = offset(b,d,off) | ||
872 : | val t1 = newReg() | ||
873 : | val t2 = newReg() | ||
874 : | val t3 = newReg() | ||
875 : | in mark(I.LOAD{ldOp=I.LDQ_U, r=t1, b=b, d=d, mem=mem},an); | ||
876 : | emit(I.LDA{r=t2, b=b', d=d'}); | ||
877 : | emit(I.OPERATE{oper=EXT, ra=t1, rb=I.REGop t2, rc=t3}); | ||
878 : | emit(I.OPERATE{oper=I.SRA, ra=t3, rb=I.IMMop shift, rc=rd}) | ||
879 : | end | ||
880 : | |||
881 : | (* generate a load byte with zero extension (page 4-48) *) | ||
882 : | leunga | 583 | and load8(ea,rd,mem,an) = |
883 : | if !byteWordLoadStores then load(I.LDBU,ea,rd,mem,an) | ||
884 : | else loadZext(ea,rd,mem,I.EXTBL,an) | ||
885 : | monnier | 409 | |
886 : | (* generate a load byte with sign extension (page 4-48) *) | ||
887 : | leunga | 583 | and load8s(ea,rd,mem,an) = |
888 : | leunga | 585 | if !byteWordLoadStores then load(I.LDB,ea,rd,mem,an) |
889 : | leunga | 583 | else loadSext(ea,rd,mem,1,I.EXTQH,56,an) |
890 : | monnier | 409 | |
891 : | (* generate a load 16 bit *) | ||
892 : | leunga | 583 | and load16(ea,rd,mem,an) = |
893 : | if !byteWordLoadStores then load(I.LDWU,ea,rd,mem,an) | ||
894 : | else loadZext(ea,rd,mem,I.EXTWL,an) | ||
895 : | monnier | 409 | |
896 : | (* generate a load 16 bit with sign extension *) | ||
897 : | leunga | 583 | and load16s(ea,rd,mem,an) = |
898 : | leunga | 585 | if !byteWordLoadStores then load(I.LDW,ea,rd,mem,an) |
899 : | leunga | 583 | else loadSext(ea,rd,mem,2,I.EXTQH,48,an) |
900 : | monnier | 409 | |
901 : | (* generate a load 32 bit with sign extension *) | ||
902 : | and load32s(ea,rd,mem,an) = load(I.LDL,ea,rd,mem,an) | ||
903 : | |||
904 : | (* generate a floating point load *) | ||
905 : | and fload(ldOp,ea,d,mem,an) = | ||
906 : | let val (base,disp) = addr ea | ||
907 : | in mark(I.FLOAD{ldOp=ldOp,r=d,b=base,d=disp,mem=mem},an) end | ||
908 : | |||
909 : | (* generate a store *) | ||
910 : | and store(stOp,ea,data,mem,an) = | ||
911 : | let val (base,disp) = addr ea | ||
912 : | in mark(I.STORE{stOp=stOp,r=expr data,b=base,d=disp,mem=mem},an) end | ||
913 : | |||
914 : | (* generate an store8 or store16 *) | ||
915 : | and storeUnaligned(ea,data,mem,INS,MSK,an) = | ||
916 : | let val (base,disp) = addr ea | ||
917 : | val data = expr data | ||
918 : | val t1 = newReg() | ||
919 : | val t3 = newReg() | ||
920 : | val t4 = newReg() | ||
921 : | val t5 = newReg() | ||
922 : | val _ = emit(I.LOAD{ldOp=I.LDQ_U, r=t1, b=base, d=disp, mem=mem}) | ||
923 : | val t2 = lda(base,disp) | ||
924 : | in emit(I.OPERATE{oper=INS, ra=data, rb=I.REGop(t2), rc=t3}); | ||
925 : | emit(I.OPERATE{oper=MSK, ra=t1, rb=I.REGop(t2), rc=t4}); | ||
926 : | emit(I.OPERATE{oper=I.BIS, ra=t4, rb=I.REGop(t3), rc=t5}); | ||
927 : | mark(I.STORE{stOp=I.STQ_U, r=t5, b=base, d=disp, mem=mem},an) | ||
928 : | end | ||
929 : | |||
930 : | (* generate a store byte *) | ||
931 : | and store8(ea,data,mem,an) = | ||
932 : | leunga | 585 | if !byteWordLoadStores then store(I.STB, ea, data, mem, an) |
933 : | else storeUnaligned(ea,data,mem,I.INSBL,I.MSKBL,an) | ||
934 : | monnier | 409 | |
935 : | (* generate a store16 *) | ||
936 : | and store16(ea,data,mem,an) = | ||
937 : | leunga | 585 | if !byteWordLoadStores then store(I.STW, ea, data, mem, an) |
938 : | else storeUnaligned(ea,data,mem,I.INSWL,I.MSKWL,an) | ||
939 : | monnier | 409 | |
940 : | monnier | 429 | (* generate conversion from floating point to integer *) |
941 : | and cvtf2i(pseudo,rounding,e,rd,an) = | ||
942 : | george | 1003 | app emitInstruction (pseudo{mode=rounding, fs=fexpr e, rd=rd}) |
943 : | monnier | 429 | |
944 : | monnier | 409 | (* generate an expression and return the register that holds the result *) |
945 : | george | 761 | and expr(e) = let |
946 : | fun comp() = let | ||
947 : | val r = newReg() | ||
948 : | in doExpr(e, r, []); r | ||
949 : | end | ||
950 : | in | ||
951 : | case e | ||
952 : | of T.REG(_, r) => r | ||
953 : | | T.LI z => if T.I.isZero(z) then zeroR else comp() | ||
954 : | leunga | 624 | (* On the alpha: all 32 bit values are already sign extended. |
955 : | * So no sign extension is necessary | ||
956 : | *) | ||
957 : | george | 761 | | T.SX(64, 32, e) => expr e |
958 : | | T.ZX(64, 32, e) => expr e | ||
959 : | | _ => comp() | ||
960 : | end | ||
961 : | leunga | 624 | |
962 : | monnier | 409 | (* generate an expression that targets register d *) |
963 : | monnier | 429 | and doExpr(exp,d,an) = |
964 : | case exp of | ||
965 : | monnier | 409 | T.REG(_,r) => move(r,d,an) |
966 : | | T.LI n => loadImmed(n,zeroR,d,an) | ||
967 : | leunga | 775 | | T.LABEL l => loadLabexp(exp,d,an) |
968 : | | T.CONST c => loadLabexp(exp,d,an) | ||
969 : | | T.LABEXP le => loadLabexp(le,d,an) | ||
970 : | monnier | 409 | |
971 : | (* special optimizations for additions and subtraction | ||
972 : | * Question: using LDA for all widths is not really correct | ||
973 : | * since the result may not fit into the sign extension scheme. | ||
974 : | *) | ||
975 : | leunga | 775 | | T.ADD(64,e,T.LABEXP le) => mark(I.LDA{r=d,b=expr e,d=I.LABop le},an) |
976 : | | T.ADD(64,T.LABEXP le,e) => mark(I.LDA{r=d,b=expr e,d=I.LABop le},an) | ||
977 : | | T.ADD(64,e,x as (T.CONST _ | T.LABEL _)) => | ||
978 : | mark(I.LDA{r=d,b=expr e,d=I.LABop x},an) | ||
979 : | | T.ADD(64,x as (T.CONST _ | T.LABEL _),e) => | ||
980 : | mark(I.LDA{r=d,b=expr e,d=I.LABop x},an) | ||
981 : | monnier | 429 | | T.ADD(64,e,T.LI i) => loadImmed(i, expr e, d, an) |
982 : | | T.ADD(64,T.LI i,e) => loadImmed(i, expr e, d, an) | ||
983 : | george | 761 | | T.SUB(sz, a, b as T.LI z) => |
984 : | if T.I.isZero(z) then | ||
985 : | doExpr(a,d,an) | ||
986 : | else (case sz | ||
987 : | of 32 => minus(32,I.SUBL,I.S4SUBL,I.S8SUBL,a,b,d,an) | ||
988 : | | 64 => minus(64,I.SUBQ,I.S4SUBQ,I.S8SUBQ,a,b,d,an) | ||
989 : | | _ => doExpr(Gen.compileRexp exp,d,an) | ||
990 : | (*esac*)) | ||
991 : | monnier | 409 | |
992 : | (* 32-bit support *) | ||
993 : | | T.ADD(32,a,b) => plus(32,I.ADDL,I.S4ADDL,I.S8ADDL,a,b,d,an) | ||
994 : | | T.SUB(32,a,b) => minus(32,I.SUBL,I.S4SUBL,I.S8SUBL,a,b,d,an) | ||
995 : | | T.ADDT(32,a,b) => commArithTrap(I.ADDLV,a,b,d,an) | ||
996 : | | T.SUBT(32,a,b) => arithTrap(I.SUBLV,a,b,d,an) | ||
997 : | monnier | 429 | | T.MULT(32,a,b) => |
998 : | monnier | 409 | multiply(32, |
999 : | george | 1003 | fn{ra,rb,rc} => I.operatev{oper=I.MULLV,ra=ra,rb=rb,rc=rc}, |
1000 : | monnier | 409 | Mult32.multiply,a,b,d,trapb,an) |
1001 : | monnier | 429 | | T.MULU(32,a,b) => |
1002 : | monnier | 409 | multiply(32, |
1003 : | george | 1003 | fn{ra,rb,rc} => I.operate{oper=I.MULL,ra=ra,rb=rb,rc=rc}, |
1004 : | monnier | 409 | Mulu32.multiply,a,b,d,[],an) |
1005 : | monnier | 429 | | T.MULS(32,a,b) => |
1006 : | multiply(32, | ||
1007 : | george | 1003 | fn{ra,rb,rc} => I.operate{oper=I.MULL,ra=ra,rb=rb,rc=rc}, |
1008 : | monnier | 429 | Muls32.multiply,a,b,d,[],an) |
1009 : | | T.DIVT(32,a,b) => divide(32,P.divlv,Mult32.divide,a,b,d,an) | ||
1010 : | | T.DIVU(32,a,b) => divide(32,P.divlu,Mulu32.divide,a,b,d,an) | ||
1011 : | | T.DIVS(32,a,b) => divide(32,P.divl,Muls32.divide,a,b,d,an) | ||
1012 : | | T.REMT(32,a,b) => pseudo(P.remlv,a,b,d) | ||
1013 : | | T.REMU(32,a,b) => pseudo(P.remlu,a,b,d) | ||
1014 : | | T.REMS(32,a,b) => pseudo(P.reml,a,b,d) | ||
1015 : | |||
1016 : | monnier | 409 | | T.SLL(32,a,b) => sll32(a,b,d,an) |
1017 : | leunga | 796 | | T.SRA(32,a,b) => sra(a,b,d,an) |
1018 : | monnier | 409 | | T.SRL(32,a,b) => srl32(a,b,d,an) |
1019 : | |||
1020 : | (* 64 bit support *) | ||
1021 : | | T.ADD(64,a,b) => plus(64,I.ADDQ,I.S4ADDQ,I.S8ADDQ,a,b,d,an) | ||
1022 : | | T.SUB(64,a,b) => minus(64,I.SUBQ,I.S4SUBQ,I.S8SUBQ,a,b,d,an) | ||
1023 : | | T.ADDT(64,a,b) => commArithTrap(I.ADDQV,a,b,d,an) | ||
1024 : | | T.SUBT(64,a,b) => arithTrap(I.SUBQV,a,b,d,an) | ||
1025 : | monnier | 429 | | T.MULT(64,a,b) => |
1026 : | monnier | 409 | multiply(64, |
1027 : | george | 1003 | fn{ra,rb,rc} => I.operatev{oper=I.MULQV,ra=ra,rb=rb,rc=rc}, |
1028 : | monnier | 409 | Mult64.multiply,a,b,d,trapb,an) |
1029 : | monnier | 429 | | T.MULU(64,a,b) => |
1030 : | monnier | 409 | multiply(64, |
1031 : | george | 1003 | fn{ra,rb,rc} => I.operate{oper=I.MULQ,ra=ra,rb=rb,rc=rc}, |
1032 : | monnier | 409 | Mulu64.multiply,a,b,d,[],an) |
1033 : | monnier | 429 | | T.MULS(64,a,b) => |
1034 : | multiply(64, | ||
1035 : | george | 1003 | fn{ra,rb,rc} => I.operate{oper=I.MULQ,ra=ra,rb=rb,rc=rc}, |
1036 : | monnier | 429 | Muls64.multiply,a,b,d,[],an) |
1037 : | | T.DIVT(64,a,b) => divide(64,P.divqv,Mult64.divide,a,b,d,an) | ||
1038 : | | T.DIVU(64,a,b) => divide(64,P.divqu,Mulu64.divide,a,b,d,an) | ||
1039 : | | T.DIVS(64,a,b) => divide(64,P.divq,Muls64.divide,a,b,d,an) | ||
1040 : | | T.REMT(64,a,b) => pseudo(P.remqv,a,b,d) | ||
1041 : | | T.REMU(64,a,b) => pseudo(P.remqu,a,b,d) | ||
1042 : | | T.REMS(64,a,b) => pseudo(P.remq,a,b,d) | ||
1043 : | |||
1044 : | monnier | 409 | | T.SLL(64,a,b) => sll64(a,b,d,an) |
1045 : | leunga | 796 | | T.SRA(64,a,b) => sra(a,b,d,an) |
1046 : | monnier | 409 | | T.SRL(64,a,b) => srl64(a,b,d,an) |
1047 : | |||
1048 : | (* special bit operations with complement *) | ||
1049 : | | T.ANDB(_,a,T.NOTB(_,b)) => arith(I.BIC,a,b,d,an) | ||
1050 : | | T.ORB(_,a,T.NOTB(_,b)) => arith(I.ORNOT,a,b,d,an) | ||
1051 : | | T.XORB(_,a,T.NOTB(_,b)) => commArith(I.EQV,a,b,d,an) | ||
1052 : | | T.ANDB(_,T.NOTB(_,a),b) => arith(I.BIC,b,a,d,an) | ||
1053 : | | T.ORB(_,T.NOTB(_,a),b) => arith(I.ORNOT,b,a,d,an) | ||
1054 : | | T.XORB(_,T.NOTB(_,a),b) => commArith(I.EQV,b,a,d,an) | ||
1055 : | | T.NOTB(_,T.XORB(_,a,b)) => commArith(I.EQV,b,a,d,an) | ||
1056 : | |||
1057 : | (* bit operations *) | ||
1058 : | | T.ANDB(ty,a,b) => andb(ty,a,b,d,an) | ||
1059 : | | T.XORB(_,a,b) => commArith(I.XOR,a,b,d,an) | ||
1060 : | | T.ORB(_,a,b) => commArith(I.BIS,a,b,d,an) | ||
1061 : | | T.NOTB(_,e) => arith(I.ORNOT,zeroT,e,d,an) | ||
1062 : | |||
1063 : | (* loads *) | ||
1064 : | leunga | 744 | | T.SX(_,_,T.LOAD(8,ea,mem)) => load8s(ea,d,mem,an) |
1065 : | | T.SX(_,_,T.LOAD(16,ea,mem))=> load16s(ea,d,mem,an) | ||
1066 : | | T.SX(_,_,T.LOAD(32,ea,mem))=> load32s(ea,d,mem,an) | ||
1067 : | | T.ZX((8|16|32|64),_,T.LOAD(8,ea,mem)) => load8(ea,d,mem,an) | ||
1068 : | | T.ZX((16|32|64),_,T.LOAD(16,ea,mem))=> load16(ea,d,mem,an) | ||
1069 : | | T.ZX(64,_,T.LOAD(64,ea,mem)) => load(I.LDQ,ea,d,mem,an) | ||
1070 : | monnier | 409 | | T.LOAD(8,ea,mem) => load8(ea,d,mem,an) |
1071 : | | T.LOAD(16,ea,mem) => load16(ea,d,mem,an) | ||
1072 : | monnier | 429 | | T.LOAD(32,ea,mem) => load32s(ea,d,mem,an) |
1073 : | monnier | 409 | | T.LOAD(64,ea,mem) => load(I.LDQ,ea,d,mem,an) |
1074 : | |||
1075 : | monnier | 429 | (* floating -> int conversion *) |
1076 : | monnier | 475 | | T.CVTF2I(ty,rounding,fty,e) => |
1077 : | (case (fty,ty) of | ||
1078 : | monnier | 429 | (32,32) => cvtf2i(P.cvtsl,rounding,e,d,an) |
1079 : | | (32,64) => cvtf2i(P.cvtsq,rounding,e,d,an) | ||
1080 : | | (64,32) => cvtf2i(P.cvttl,rounding,e,d,an) | ||
1081 : | | (64,64) => cvtf2i(P.cvttq,rounding,e,d,an) | ||
1082 : | george | 545 | | _ => doExpr(Gen.compileRexp exp,d,an) (* other cases *) |
1083 : | monnier | 429 | ) |
1084 : | |||
1085 : | monnier | 409 | (* conversion to boolean *) |
1086 : | george | 761 | | T.COND(_, T.CMP(ty,cond,e1,e2), x, y) => |
1087 : | (case (x, y) | ||
1088 : | of (T.LI n, T.LI m) => | ||
1089 : | if EQ(n, int_1) andalso EQ(m, int_0) then | ||
1090 : | compare(ty,cond,e1,e2,d,an) | ||
1091 : | else if EQ(n, int_0) andalso EQ(m, int_1) then | ||
1092 : | compare(ty,T.Basis.negateCond cond,e1,e2,d,an) | ||
1093 : | else | ||
1094 : | cmove(ty,cond,e1,e2,x,y,d,an) | ||
1095 : | | _ => cmove(ty,cond,e1,e2,x,y,d,an) | ||
1096 : | (*esac*)) | ||
1097 : | monnier | 409 | |
1098 : | george | 545 | | T.LET(s,e) => (doStmt s; doExpr(e, d, an)) |
1099 : | | T.MARK(e,A.MARKREG f) => (f d; doExpr(e,d,an)) | ||
1100 : | | T.MARK(e,a) => doExpr(e,d,a::an) | ||
1101 : | monnier | 475 | (* On the alpha: all 32 bit values are already sign extended. |
1102 : | * So no sign extension is necessary | ||
1103 : | *) | ||
1104 : | leunga | 744 | | T.SX(64, 32, e) => doExpr(e, d, an) |
1105 : | | T.ZX(64, 32, e) => doExpr(e, d, an) | ||
1106 : | george | 545 | |
1107 : | | T.PRED(e, c) => doExpr(e, d, A.CTRLUSE c::an) | ||
1108 : | george | 555 | | T.REXT e => ExtensionComp.compileRext (reducer()) {e=e, an=an, rd=d} |
1109 : | monnier | 429 | |
1110 : | (* Defaults *) | ||
1111 : | george | 545 | | e => doExpr(Gen.compileRexp e,d,an) |
1112 : | monnier | 409 | |
1113 : | (* Hmmm... this is the funky thing described in the comments | ||
1114 : | * in at the top of the file. This should be made parametrizable | ||
1115 : | * for other backends. | ||
1116 : | *) | ||
1117 : | george | 545 | and farith(opcode,opcodeSMLNJ,a,b,d,an) = |
1118 : | monnier | 409 | let val fa = fexpr a |
1119 : | val fb = fexpr b | ||
1120 : | george | 545 | in if SMLNJfloatingPoint then |
1121 : | george | 1003 | ((* emit(I.DEFFREG d); *) |
1122 : | george | 545 | mark(I.FOPERATEV{oper=opcodeSMLNJ,fa=fa,fb=fb,fc=d},an); |
1123 : | george | 1003 | emit(I.TRAPB); |
1124 : | emitInstruction(I.LIVE{regs=List.foldl C.addFreg C.empty [fa,fb,d], | ||
1125 : | spilled=[]}) | ||
1126 : | |||
1127 : | george | 545 | ) |
1128 : | else mark(I.FOPERATE{oper=opcode,fa=fa,fb=fb,fc=d},an) | ||
1129 : | monnier | 409 | end |
1130 : | |||
1131 : | george | 545 | and farith'(opcode,a,b,d,an) = |
1132 : | mark(I.FOPERATE{oper=opcode,fa=fexpr a,fb=fexpr b,fc=d},an) | ||
1133 : | |||
1134 : | monnier | 429 | and funary(opcode,e,d,an) = mark(I.FUNARY{oper=opcode,fb=fexpr e,fc=d},an) |
1135 : | |||
1136 : | |||
1137 : | monnier | 409 | (* generate an floating point expression |
1138 : | * return the register that holds the result | ||
1139 : | *) | ||
1140 : | and fexpr(T.FREG(_,r)) = r | ||
1141 : | | fexpr e = let val d = newFreg() in doFexpr(e,d,[]); d end | ||
1142 : | |||
1143 : | (* generate an external floating point operation *) | ||
1144 : | monnier | 429 | and fcvti2f(pseudo,e,fd,an) = |
1145 : | monnier | 409 | let val opnd = opn e |
1146 : | george | 1003 | in app emitInstruction (pseudo({opnd=opnd, fd=fd}, reduceOpn)) |
1147 : | monnier | 409 | end |
1148 : | |||
1149 : | (* generate a floating point store *) | ||
1150 : | and fstore(stOp,ea,data,mem,an) = | ||
1151 : | let val (base,disp) = addr ea | ||
1152 : | in mark(I.FSTORE{stOp=stOp,r=fexpr data,b=base,d=disp,mem=mem},an) | ||
1153 : | end | ||
1154 : | |||
1155 : | (* generate a floating point expression that targets register d *) | ||
1156 : | and doFexpr(e,d,an) = | ||
1157 : | case e of | ||
1158 : | T.FREG(_,f) => fmove(f,d,an) | ||
1159 : | |||
1160 : | (* single precision support *) | ||
1161 : | george | 545 | | T.FADD(32,a,b) => farith(I.ADDS,ADDSX,a,b,d,an) |
1162 : | | T.FSUB(32,a,b) => farith(I.SUBS,SUBSX,a,b,d,an) | ||
1163 : | | T.FMUL(32,a,b) => farith(I.MULS,MULSX,a,b,d,an) | ||
1164 : | | T.FDIV(32,a,b) => farith(I.DIVS,DIVSX,a,b,d,an) | ||
1165 : | monnier | 409 | |
1166 : | (* double precision support *) | ||
1167 : | george | 545 | | T.FADD(64,a,b) => farith(I.ADDT,ADDTX,a,b,d,an) |
1168 : | | T.FSUB(64,a,b) => farith(I.SUBT,SUBTX,a,b,d,an) | ||
1169 : | | T.FMUL(64,a,b) => farith(I.MULT,MULTX,a,b,d,an) | ||
1170 : | | T.FDIV(64,a,b) => farith(I.DIVT,DIVTX,a,b,d,an) | ||
1171 : | monnier | 409 | |
1172 : | george | 545 | (* copy sign (correct?) XXX *) |
1173 : | | T.FCOPYSIGN(_,T.FNEG(_,a),b) => farith'(I.CPYSN,a,b,d,an) | ||
1174 : | | T.FCOPYSIGN(_,a,T.FNEG(_,b)) => farith'(I.CPYSN,a,b,d,an) | ||
1175 : | | T.FNEG(_,T.FCOPYSIGN(_,a,b)) => farith'(I.CPYSN,a,b,d,an) | ||
1176 : | | T.FCOPYSIGN(_,a,b) => farith'(I.CPYS,a,b,d,an) | ||
1177 : | monnier | 429 | |
1178 : | monnier | 409 | (* generic *) |
1179 : | | T.FABS(_,a) => | ||
1180 : | mark(I.FOPERATE{oper=I.CPYS,fa=zeroFR,fb=fexpr a,fc=d},an) | ||
1181 : | | T.FNEG(_,a) => | ||
1182 : | let val fs = fexpr a | ||
1183 : | in mark(I.FOPERATE{oper=I.CPYSN,fa=fs,fb=fs,fc=d},an) end | ||
1184 : | | T.FSQRT(_,a) => error "fsqrt" | ||
1185 : | |||
1186 : | (* loads *) | ||
1187 : | | T.FLOAD(32,ea,mem) => fload(I.LDS,ea,d,mem,an) | ||
1188 : | | T.FLOAD(64,ea,mem) => fload(I.LDT,ea,d,mem,an) | ||
1189 : | monnier | 429 | |
1190 : | (* floating/floating conversion | ||
1191 : | * Note: it is not necessary to convert single precision | ||
1192 : | * to double on the alpha. | ||
1193 : | *) | ||
1194 : | george | 545 | | T.CVTF2F(fty,fty',e) => (* ignore rounding mode for now *) |
1195 : | monnier | 475 | (case (fty,fty') of |
1196 : | monnier | 429 | (64,64) => doFexpr(e,d,an) |
1197 : | | (64,32) => doFexpr(e,d,an) | ||
1198 : | | (32,32) => doFexpr(e,d,an) | ||
1199 : | | (32,64) => funary(I.CVTTS,e,d,an) (* use normal rounding *) | ||
1200 : | | _ => error "CVTF2F" | ||
1201 : | ) | ||
1202 : | monnier | 409 | |
1203 : | monnier | 429 | (* integer -> floating point conversion *) |
1204 : | george | 545 | | T.CVTI2F(fty,ty,e) => |
1205 : | monnier | 429 | let val pseudo = |
1206 : | monnier | 475 | case (ty,fty) of |
1207 : | monnier | 429 | (ty,32) => if ty <= 32 then P.cvtls else P.cvtqs |
1208 : | | (ty,64) => if ty <= 32 then P.cvtlt else P.cvtqt | ||
1209 : | | _ => error "CVTI2F" | ||
1210 : | in fcvti2f(pseudo,e,d,an) end | ||
1211 : | |||
1212 : | george | 545 | | T.FMARK(e,A.MARKREG f) => (f d; doFexpr(e,d,an)) |
1213 : | | T.FMARK(e,a) => doFexpr(e,d,a::an) | ||
1214 : | | T.FPRED(e,c) => doFexpr(e, d, A.CTRLUSE c::an) | ||
1215 : | george | 555 | | T.FEXT e => ExtensionComp.compileFext (reducer()) {e=e, fd=d, an=an} |
1216 : | monnier | 409 | | _ => error "doFexpr" |
1217 : | |||
1218 : | (* check whether an expression is andb(e,1) *) | ||
1219 : | george | 761 | and isAndb1(e as T.ANDB(_, e1, e2)) = let |
1220 : | fun isOne(n, ei) = | ||
1221 : | if EQ(n, int_1) then (true, ei) else (false, e) | ||
1222 : | in | ||
1223 : | case(e1, e2) | ||
1224 : | of (T.LI n, _) => isOne(n, e2) | ||
1225 : | | (_, T.LI n) => isOne(n, e1) | ||
1226 : | | _ => (false, e) | ||
1227 : | end | ||
1228 : | | isAndb1 e = (false, e) | ||
1229 : | monnier | 409 | |
1230 : | george | 761 | and zeroOrOne(T.LI n) = |
1231 : | if T.I.isZero n then ZERO | ||
1232 : | else if EQ(n, int_1) then ONE | ||
1233 : | else OTHER | ||
1234 : | | zeroOrOne _ = OTHER | ||
1235 : | monnier | 409 | |
1236 : | (* compile a branch *) | ||
1237 : | george | 545 | and branch(e,lab,an) = |
1238 : | monnier | 409 | case e of |
1239 : | T.CMP(ty,cc,e1 as T.LI _,e2) => | ||
1240 : | george | 545 | branchBS(ty,T.Basis.swapCond cc,e2,e1,lab,an) |
1241 : | monnier | 409 | | T.CMP(ty,cc,e1,e2) => branchBS(ty,cc,e1,e2,lab,an) |
1242 : | george | 545 | (* generate an floating point branch *) |
1243 : | | T.FCMP(fty,cc,e1,e2) => | ||
1244 : | let val f1 = fexpr e1 | ||
1245 : | val f2 = fexpr e2 | ||
1246 : | fun bcc(cmp,br) = | ||
1247 : | let val tmpR = C.newFreg() | ||
1248 : | george | 1003 | in (*emit(I.DEFFREG(tmpR));*) |
1249 : | george | 545 | emit(I.FOPERATE{oper=cmp,fa=f1,fb=f2,fc=tmpR}); |
1250 : | emit(I.TRAPB); | ||
1251 : | george | 1003 | emitInstruction(I.LIVE{regs=List.foldl C.addFreg C.empty [f1,f2,tmpR], |
1252 : | spilled=[]}); | ||
1253 : | george | 545 | mark(I.FBRANCH{b=br,f=tmpR,lab=lab},an) |
1254 : | end | ||
1255 : | fun fall(cmp1, br1, cmp2, br2) = | ||
1256 : | let val tmpR1 = newFreg() | ||
1257 : | val tmpR2 = newFreg() | ||
1258 : | george | 909 | val fallLab = Label.anon() |
1259 : | george | 1003 | in (*emit(I.DEFFREG(tmpR1));*) |
1260 : | george | 545 | emit(I.FOPERATE{oper=cmp1, fa=f1, fb=f2, fc=tmpR1}); |
1261 : | emit(I.TRAPB); | ||
1262 : | george | 1003 | emitInstruction(I.LIVE{regs=List.foldl C.addFreg C.empty [f1,f2,tmpR1], |
1263 : | spilled=[]}); | ||
1264 : | george | 545 | mark(I.FBRANCH{b=br1, f=tmpR1, lab=fallLab},an); |
1265 : | george | 1003 | (* emit(I.DEFFREG(tmpR2)); *) |
1266 : | george | 545 | emit(I.FOPERATE{oper=cmp2, fa=f1, fb=f2, fc=tmpR2}); |
1267 : | emit(I.TRAPB); | ||
1268 : | george | 1003 | emitInstruction(I.LIVE{regs=List.foldl C.addFreg C.empty [f1,f2,tmpR2], |
1269 : | spilled=[]}); | ||
1270 : | george | 545 | mark(I.FBRANCH{b=br2, f=tmpR2, lab=lab},an); |
1271 : | defineLabel fallLab | ||
1272 : | end | ||
1273 : | fun bcc2(cmp1, br1, cmp2, br2) = | ||
1274 : | (bcc(cmp1, br1); bcc(cmp2, br2)) | ||
1275 : | in case cc of | ||
1276 : | T.== => bcc(I.CMPTEQSU, I.FBNE) | ||
1277 : | | T.?<> => bcc(I.CMPTEQSU, I.FBEQ) | ||
1278 : | | T.? => bcc(I.CMPTUNSU, I.FBNE) | ||
1279 : | | T.<=> => bcc(I.CMPTUNSU, I.FBEQ) | ||
1280 : | | T.> => fall(I.CMPTLESU, I.FBNE, I.CMPTUNSU, I.FBEQ) | ||
1281 : | | T.>= => fall(I.CMPTLTSU, I.FBNE, I.CMPTUNSU, I.FBEQ) | ||
1282 : | | T.?> => bcc(I.CMPTLESU, I.FBEQ) | ||
1283 : | | T.?>= => bcc(I.CMPTLTSU, I.FBEQ) | ||
1284 : | | T.< => bcc(I.CMPTLTSU, I.FBNE) | ||
1285 : | | T.<= => bcc(I.CMPTLESU, I.FBNE) | ||
1286 : | | T.?< => bcc2(I.CMPTLTSU, I.FBNE, I.CMPTUNSU, I.FBNE) | ||
1287 : | leunga | 744 | | T.?<= => bcc2(I.CMPTLESU, I.FBNE, I.CMPTUNSU, I.FBNE) |
1288 : | | T.<> => fall(I.CMPTEQSU, I.FBNE, I.CMPTUNSU, I.FBEQ) | ||
1289 : | | T.?= => bcc2(I.CMPTEQSU, I.FBNE, I.CMPTUNSU, I.FBNE) | ||
1290 : | | _ => error "branch" | ||
1291 : | george | 545 | end |
1292 : | | e => mark(I.BRANCH{b=I.BNE,r=ccExpr e,lab=lab},an) | ||
1293 : | monnier | 409 | |
1294 : | george | 545 | and br(opcode,exp,lab,an) = mark(I.BRANCH{b=opcode,r=expr exp,lab=lab},an) |
1295 : | monnier | 409 | |
1296 : | (* Use the branch on bit set/clear instruction when possible *) | ||
1297 : | and branchBS(ty,cc,a,b,lab,an) = | ||
1298 : | (case (cc,isAndb1 a,zeroOrOne b) of | ||
1299 : | (T.EQ,(true,e),ONE) => br(I.BLBS,e,lab,an) | ||
1300 : | | (T.EQ,(true,e),ZERO) => br(I.BLBC,e,lab,an) | ||
1301 : | | (T.NE,(true,e),ZERO) => br(I.BLBS,e,lab,an) | ||
1302 : | | (T.NE,(true,e),ONE) => br(I.BLBC,e,lab,an) | ||
1303 : | | (cc,_,_) => branchIt(ty,cc,a,b,lab,an) | ||
1304 : | ) | ||
1305 : | |||
1306 : | (* generate a branch instruction. | ||
1307 : | * Check for branch on zero as a special case | ||
1308 : | *) | ||
1309 : | george | 761 | |
1310 : | and branchIt(ty,cc,e1,e2 as T.LI z,lab,an) = | ||
1311 : | if T.I.isZero z then branchIt0(cc,e1,lab,an) | ||
1312 : | else branchItOther(ty,cc,e1,e2,lab,an) | ||
1313 : | monnier | 409 | | branchIt(ty,cc,e1,e2,lab,an) = branchItOther(ty,cc,e1,e2,lab,an) |
1314 : | |||
1315 : | (* generate a branch instruction. | ||
1316 : | * This function optimizes the special case of comparison with zero. | ||
1317 : | *) | ||
1318 : | and branchIt0(T.EQ,e,lab,an) = br(I.BEQ,e,lab,an) | ||
1319 : | | branchIt0(T.NE,e,lab,an) = br(I.BNE,e,lab,an) | ||
1320 : | | branchIt0(T.GT,e,lab,an) = br(I.BGT,e,lab,an) | ||
1321 : | | branchIt0(T.GE,e,lab,an) = br(I.BGE,e,lab,an) | ||
1322 : | | branchIt0(T.LE,e,lab,an) = br(I.BLE,e,lab,an) | ||
1323 : | | branchIt0(T.LT,e,lab,an) = br(I.BLT,e,lab,an) | ||
1324 : | | branchIt0(T.GTU,e,lab,an) = br(I.BNE,e,lab,an) (* always > 0! *) | ||
1325 : | | branchIt0(T.GEU,e,lab,an) = (* always true! *) goto(lab,an) | ||
1326 : | | branchIt0(T.LTU,e,lab,an) = (* always false! *) () | ||
1327 : | | branchIt0(T.LEU,e,lab,an) = br(I.BEQ,e,lab,an) (* never < 0! *) | ||
1328 : | leunga | 744 | | branchIt0 _ = error "brnachIt0" |
1329 : | monnier | 409 | |
1330 : | (* Generate the operands for unsigned comparisons | ||
1331 : | * Mask out high order bits whenever necessary. | ||
1332 : | *) | ||
1333 : | and unsignedCmpOpnds(ty,e1,e2) = | ||
1334 : | let fun zapHi(r,mask) = | ||
1335 : | let val d = newReg() | ||
1336 : | in emit(I.OPERATE{oper=I.ZAP, ra=r, rb=I.IMMop mask,rc=d}); | ||
1337 : | I.REGop d | ||
1338 : | end | ||
1339 : | |||
1340 : | fun zap(opn as I.REGop r) = | ||
1341 : | (case ty of | ||
1342 : | 8 => zapHi(r,0xfd) | ||
1343 : | | 16 => zapHi(r,0xfc) | ||
1344 : | | 32 => zapHi(r,0xf0) | ||
1345 : | | 64 => opn | ||
1346 : | | _ => error "unsignedCmpOpnds" | ||
1347 : | ) | ||
1348 : | | zap opn = opn | ||
1349 : | val opn1 = opn e1 | ||
1350 : | val opn2 = opn e2 | ||
1351 : | in (zap opn1,zap opn2) end | ||
1352 : | |||
1353 : | (* Generate a branch *) | ||
1354 : | and branchItOther(ty,cond,e1,e2,lab,an) = | ||
1355 : | let val tmpR = newReg() | ||
1356 : | fun signedCmp(cmp,br) = | ||
1357 : | (emit(I.OPERATE{oper=cmp, ra=expr e1, rb=opn e2, rc=tmpR}); | ||
1358 : | george | 545 | mark(I.BRANCH{b=br, r=tmpR, lab=lab},an) |
1359 : | monnier | 409 | ) |
1360 : | fun unsignedCmp(ty,cmp,br) = | ||
1361 : | let val (x,y) = unsignedCmpOpnds(ty,e1,e2) | ||
1362 : | in emit(I.OPERATE{oper=cmp,ra=reduceOpn x,rb=y,rc=tmpR}); | ||
1363 : | george | 545 | mark(I.BRANCH{b=br, r=tmpR, lab=lab},an) |
1364 : | monnier | 409 | end |
1365 : | in case cond of | ||
1366 : | T.LT => signedCmp(I.CMPLT,I.BNE) | ||
1367 : | | T.LE => signedCmp(I.CMPLE,I.BNE) | ||
1368 : | | T.GT => signedCmp(I.CMPLE,I.BEQ) | ||
1369 : | | T.GE => signedCmp(I.CMPLT,I.BEQ) | ||
1370 : | | T.EQ => signedCmp(I.CMPEQ,I.BNE) | ||
1371 : | | T.NE => signedCmp(I.CMPEQ,I.BEQ) | ||
1372 : | | T.LTU => unsignedCmp(ty,I.CMPULT,I.BNE) | ||
1373 : | | T.LEU => unsignedCmp(ty,I.CMPULE,I.BNE) | ||
1374 : | | T.GTU => unsignedCmp(ty,I.CMPULE,I.BEQ) | ||
1375 : | | T.GEU => unsignedCmp(ty,I.CMPULT,I.BEQ) | ||
1376 : | leunga | 744 | | _ => error "branchItOther" |
1377 : | monnier | 409 | end |
1378 : | |||
1379 : | (* This function generates a conditional move: | ||
1380 : | * d = if cond(a,b) then x else y | ||
1381 : | * Apparently, only signed comparisons conditional moves | ||
1382 : | * are supported on the alpha. | ||
1383 : | *) | ||
1384 : | and cmove(ty,cond,a,b,x,y,d,an) = | ||
1385 : | george | 545 | let val tmp = newReg() |
1386 : | val _ = doExpr(y,tmp,[]) (* evaluate false case *) | ||
1387 : | monnier | 409 | |
1388 : | val (cond,a,b) = | ||
1389 : | (* move the immed operand to b *) | ||
1390 : | case a of | ||
1391 : | leunga | 775 | (T.LI _ | T.CONST _ | T.LABEL _ | T.LABEXP _) => |
1392 : | (T.Basis.swapCond cond,b,a) | ||
1393 : | monnier | 409 | | _ => (cond,a,b) |
1394 : | |||
1395 : | george | 761 | fun sub(a, T.LI z) = |
1396 : | if T.I.isZero z then expr a else expr(T.SUB(ty,a,b)) | ||
1397 : | | sub(a,b) = expr(T.SUB(ty,a,b)) | ||
1398 : | monnier | 409 | |
1399 : | fun cmp(cond,e1,e2) = | ||
1400 : | george | 545 | let val flag = newReg() |
1401 : | in compare(ty,cond,e1,e2,flag,[]); flag end | ||
1402 : | monnier | 409 | |
1403 : | val (oper,ra,x,y) = | ||
1404 : | case (cond,isAndb1 a,zeroOrOne b) of | ||
1405 : | (* low bit set/clear? *) | ||
1406 : | (T.EQ,(true,e),ONE) => (I.CMOVLBS,expr e,x,y) | ||
1407 : | | (T.EQ,(true,e),ZERO) => (I.CMOVLBC,expr e,x,y) | ||
1408 : | | (T.NE,(true,e),ZERO) => (I.CMOVLBS,expr e,x,y) | ||
1409 : | | (T.NE,(true,e),ONE) => (I.CMOVLBC,expr e,x,y) | ||
1410 : | (* signed *) | ||
1411 : | | (T.EQ,_,_) => (I.CMOVEQ,sub(a,b),x,y) | ||
1412 : | leunga | 788 | | (T.NE,_,_) => (I.CMOVNE,sub(a,b),x,y) |
1413 : | monnier | 409 | | (T.GT,_,_) => (I.CMOVGT,sub(a,b),x,y) |
1414 : | | (T.GE,_,_) => (I.CMOVGE,sub(a,b),x,y) | ||
1415 : | | (T.LT,_,_) => (I.CMOVLT,sub(a,b),x,y) | ||
1416 : | | (T.LE,_,_) => (I.CMOVLE,sub(a,b),x,y) | ||
1417 : | |||
1418 : | (* unsigned: do compare then use the condition code *) | ||
1419 : | | (T.LTU,_,_) => (I.CMOVEQ,cmp(T.GEU,a,b),x,y) | ||
1420 : | | (T.LEU,_,_) => (I.CMOVEQ,cmp(T.GTU,a,b),x,y) | ||
1421 : | | (T.GTU,_,_) => (I.CMOVEQ,cmp(T.LEU,a,b),x,y) | ||
1422 : | | (T.GEU,_,_) => (I.CMOVEQ,cmp(T.LTU,a,b),x,y) | ||
1423 : | leunga | 744 | | _ => error "cmove" |
1424 : | george | 545 | in mark(I.CMOVE{oper=oper,ra=ra,rb=opn x,rc=tmp},an); (* true case *) |
1425 : | move(tmp, d, []) | ||
1426 : | monnier | 409 | end |
1427 : | |||
1428 : | |||
1429 : | (* This function generates a comparion between e1 and e2 and writes | ||
1430 : | * the result to register d. | ||
1431 : | * It'll mask out the high order 32-bits when performing | ||
1432 : | * unsigned 32-bit integer comparisons. | ||
1433 : | *) | ||
1434 : | and compare(ty,cond,e1,e2,d,an) = | ||
1435 : | let fun signedCmp(oper,a,b,d) = | ||
1436 : | mark(I.OPERATE{oper=oper,ra=expr a,rb=opn b,rc=d},an) | ||
1437 : | fun unsignedCmp(ty,oper,a,b,d) = | ||
1438 : | let val (x,y) = unsignedCmpOpnds(ty,a,b) | ||
1439 : | in mark(I.OPERATE{oper=oper,ra=reduceOpn x,rb=y,rc=d},an) | ||
1440 : | end | ||
1441 : | fun eq(a,b,d) = | ||
1442 : | (case (opn a,opn b) of | ||
1443 : | (a,I.REGop r) => | ||
1444 : | mark(I.OPERATE{oper=I.CMPEQ,ra=r,rb=a,rc=d},an) | ||
1445 : | | (a,b) => | ||
1446 : | mark(I.OPERATE{oper=I.CMPEQ,ra=reduceOpn a,rb=b,rc=d},an) | ||
1447 : | ) | ||
1448 : | fun neq(a,b,d) = | ||
1449 : | let val tmp = newReg() | ||
1450 : | in eq(a,b,tmp); | ||
1451 : | emit(I.OPERATE{oper=I.CMPEQ,ra=tmp,rb=zeroOpn,rc=d}) | ||
1452 : | end | ||
1453 : | val (cond,e1,e2) = | ||
1454 : | case e1 of | ||
1455 : | leunga | 775 | (T.LI _ | T.CONST _ | T.LABEL _ | T.LABEXP _) => |
1456 : | george | 545 | (T.Basis.swapCond cond,e2,e1) |
1457 : | monnier | 409 | | _ => (cond,e1,e2) |
1458 : | in case cond of | ||
1459 : | T.EQ => eq(e1,e2,d) | ||
1460 : | | T.NE => neq(e1,e2,d) | ||
1461 : | | T.GT => signedCmp(I.CMPLT,e2,e1,d) | ||
1462 : | | T.GE => signedCmp(I.CMPLE,e2,e1,d) | ||
1463 : | | T.LT => signedCmp(I.CMPLT,e1,e2,d) | ||
1464 : | | T.LE => signedCmp(I.CMPLE,e1,e2,d) | ||
1465 : | | T.GTU => unsignedCmp(ty,I.CMPULT,e2,e1,d) | ||
1466 : | | T.GEU => unsignedCmp(ty,I.CMPULE,e2,e1,d) | ||
1467 : | | T.LTU => unsignedCmp(ty,I.CMPULT,e1,e2,d) | ||
1468 : | | T.LEU => unsignedCmp(ty,I.CMPULE,e1,e2,d) | ||
1469 : | leunga | 744 | | _ => error "compare" |
1470 : | monnier | 409 | end |
1471 : | |||
1472 : | (* generate an unconditional branch *) | ||
1473 : | george | 545 | and goto(lab,an) = mark(I.BRANCH{b=I.BR,r=zeroR,lab=lab},an) |
1474 : | monnier | 409 | |
1475 : | (* generate an call instruction *) | ||
1476 : | blume | 839 | and call(ea,flow,defs,uses,mem,cutTo,an,0) = |
1477 : | let val defs=cellset defs | ||
1478 : | val uses=cellset uses | ||
1479 : | val instr = | ||
1480 : | case (ea, flow) of | ||
1481 : | (T.LABEL lab, [_]) => | ||
1482 : | I.BSR{lab=lab,r=C.returnAddr,defs=defs,uses=uses, | ||
1483 : | cutsTo=cutTo,mem=mem} | ||
1484 : | | _ => I.JSR{r=C.returnAddr,b=expr ea, | ||
1485 : | d=0,defs=defs,uses=uses,cutsTo=cutTo,mem=mem} | ||
1486 : | in mark(instr,an) | ||
1487 : | end | ||
1488 : | | call _ = error "pops<>0 not implemented" | ||
1489 : | monnier | 409 | |
1490 : | george | 545 | and doCCexpr(T.CC(_,r),d,an) = move(r,d,an) |
1491 : | | doCCexpr(T.FCC(_,r),d,an) = fmove(r,d,an) | ||
1492 : | monnier | 409 | | doCCexpr(T.CMP(ty,cond,e1,e2),d,an) = compare(ty,cond,e1,e2,d,an) |
1493 : | | doCCexpr(T.FCMP(fty,cond,e1,e2),d,an) = error "doCCexpr" | ||
1494 : | george | 545 | | doCCexpr(T.CCMARK(e,A.MARKREG f),d,an) = (f d; doCCexpr(e,d,an)) |
1495 : | | doCCexpr(T.CCMARK(e,a),d,an) = doCCexpr(e,d,a::an) | ||
1496 : | george | 555 | | doCCexpr(T.CCEXT e,d,an) = |
1497 : | ExtensionComp.compileCCext (reducer()) {e=e, ccd=d, an=an} | ||
1498 : | george | 545 | | doCCexpr _ = error "doCCexpr" |
1499 : | monnier | 409 | |
1500 : | george | 545 | and ccExpr(T.CC(_,r)) = r |
1501 : | | ccExpr(T.FCC(_,r)) = r | ||
1502 : | monnier | 409 | | ccExpr e = let val d = newReg() |
1503 : | in doCCexpr(e,d,[]); d end | ||
1504 : | |||
1505 : | (* compile a statement *) | ||
1506 : | and stmt(s,an) = | ||
1507 : | case s of | ||
1508 : | T.MV(ty,r,e) => doExpr(e,r,an) | ||
1509 : | | T.FMV(ty,r,e) => doFexpr(e,r,an) | ||
1510 : | | T.CCMV(r,e) => doCCexpr(e,r,an) | ||
1511 : | | T.COPY(ty,dst,src) => copy(dst,src,an) | ||
1512 : | | T.FCOPY(ty,dst,src) => fcopy(dst,src,an) | ||
1513 : | leunga | 775 | | T.JMP(T.LABEL lab,_) => goto(lab,an) |
1514 : | leunga | 744 | | T.JMP(e,labs) => mark(I.JMPL({r=zeroR,b=expr e,d=0},labs),an) |
1515 : | | T.BCC(cc,lab) => branch(cc,lab,an) | ||
1516 : | blume | 839 | | T.CALL{funct,targets,defs,uses,region,pops,...} => |
1517 : | call(funct,targets,defs,uses,region,[],an,pops) | ||
1518 : | | T.FLOW_TO(T.CALL{funct,targets,defs,uses,region,pops,...},cutTo) => | ||
1519 : | call(funct,targets,defs,uses,region,cutTo,an,pops) | ||
1520 : | leunga | 628 | | T.RET _ => mark(I.RET{r=zeroR,b=C.returnAddr,d=0},an) |
1521 : | monnier | 409 | | T.STORE(8,ea,data,mem) => store8(ea,data,mem,an) |
1522 : | | T.STORE(16,ea,data,mem) => store16(ea,data,mem,an) | ||
1523 : | | T.STORE(32,ea,data,mem) => store(I.STL,ea,data,mem,an) | ||
1524 : | | T.STORE(64,ea,data,mem) => store(I.STQ,ea,data,mem,an) | ||
1525 : | | T.FSTORE(32,ea,data,mem) => fstore(I.STS,ea,data,mem,an) | ||
1526 : | | T.FSTORE(64,ea,data,mem) => fstore(I.STT,ea,data,mem,an) | ||
1527 : | george | 545 | | T.DEFINE l => defineLabel l |
1528 : | monnier | 409 | | T.ANNOTATION(s,a) => stmt(s,a::an) |
1529 : | george | 555 | | T.EXT s => ExtensionComp.compileSext (reducer()) {stm=s,an=an} |
1530 : | george | 1003 | | T.LIVE rs => mark'(I.LIVE{regs=cellset rs, spilled=[]}, an) |
1531 : | | T.KILL rs => mark'(I.KILL{regs=cellset rs, spilled=[]}, an) | ||
1532 : | george | 545 | | s => doStmts (Gen.compileStm s) |
1533 : | monnier | 409 | |
1534 : | george | 545 | and reducer() = |
1535 : | george | 984 | TS.REDUCER{reduceRexp = expr, |
1536 : | reduceFexp = fexpr, | ||
1537 : | reduceCCexp = ccExpr, | ||
1538 : | reduceStm = stmt, | ||
1539 : | operand = opn, | ||
1540 : | reduceOperand = reduceOpn, | ||
1541 : | addressOf = addr, | ||
1542 : | george | 1003 | emit = emitInstruction o annotate, |
1543 : | george | 984 | instrStream = instrStream, |
1544 : | mltreeStream = self() | ||
1545 : | } | ||
1546 : | george | 545 | |
1547 : | monnier | 409 | and doStmt s = stmt(s,[]) |
1548 : | george | 545 | and doStmts ss = app doStmt ss |
1549 : | monnier | 409 | |
1550 : | george | 545 | (* convert mlrisc to cellset: |
1551 : | * condition code registers are mapped onto general registers | ||
1552 : | *) | ||
1553 : | and cellset mlrisc = | ||
1554 : | let fun g([],acc) = acc | ||
1555 : | | g(T.GPR(T.REG(_,r))::regs,acc) = g(regs,C.addReg(r,acc)) | ||
1556 : | | g(T.FPR(T.FREG(_,f))::regs,acc) = g(regs,C.addFreg(f,acc)) | ||
1557 : | | g(T.CCR(T.CC(_,cc))::regs,acc) = g(regs,C.addReg(cc,acc)) | ||
1558 : | | g(T.CCR(T.FCC(_,cc))::regs,acc) = g(regs,C.addReg(cc,acc)) | ||
1559 : | | g(_::regs, acc) = g(regs, acc) | ||
1560 : | in g(mlrisc, C.empty) end | ||
1561 : | monnier | 409 | |
1562 : | george | 545 | and self() = |
1563 : | george | 984 | TS.S.STREAM |
1564 : | leunga | 815 | { beginCluster = beginCluster, |
1565 : | endCluster = endCluster, | ||
1566 : | emit = doStmt, | ||
1567 : | pseudoOp = pseudoOp, | ||
1568 : | defineLabel = defineLabel, | ||
1569 : | entryLabel = entryLabel, | ||
1570 : | comment = comment, | ||
1571 : | annotation = annotation, | ||
1572 : | getAnnotations = getAnnotations, | ||
1573 : | exitBlock = fn regs => exitBlock(cellset regs) | ||
1574 : | george | 545 | } |
1575 : | in self() | ||
1576 : | monnier | 409 | end |
1577 : | |||
1578 : | end | ||
1579 : |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |