Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86-fp.sml
ViewVC logotype

Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86-fp.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1009 - (view) (download)

1 : jhr 925 (* x86-fp.sml
2 :     *
3 :     * COPYRIGHT (c) 2001 Bell Labs, Lucent Technologies
4 :     *
5 : leunga 731 * This phase takes a cluster with pseudo x86 fp instructions, performs
6 :     * liveness analysis to determine their live ranges, and rewrite the
7 :     * program into the correct stack based code.
8 :     *
9 :     * The Basics
10 :     * ----------
11 :     * o We assume there are 7 pseudo fp registers, %fp(0), ..., %fp(6),
12 :     * which are mapped onto the %st stack. One stack location is reserved
13 :     * for holding temporaries.
14 :     * o Important: for floating point comparisons, we actually need
15 :     * two extra stack locations in the worst case. We handle this by
16 :     * specifying that the instruction define an extra temporary fp register
17 :     * when necessary.
18 :     * o The mapping between %fp <-> %st may change from program point to
19 :     * program point. We keep track of this lazy renaming and try to minimize
20 :     * the number of FXCH that we insert.
21 :     * o At split and merge points, we may get inconsistent %fp <-> %st mappings.
22 :     * We handle this by inserting the appropriate renaming code.
23 :     * o Parallel copies (renaming) are rewritten into a sequence of FXCHs!
24 :     *
25 :     * Pseudo fp instructions Semantics
26 :     * --------------------------------------
27 :     * FMOVE src, dst dst := src
28 :     * FILOAD ea, dst dst := cvti2f(mem[ea])
29 :     * FBINOP lsrc, rsrc, dst dst := lsrc * rsrc
30 :     * FIBINOP lsrc, rsrc, dst dst := lsrc * cvti2f(rsrc)
31 :     * FUNOP src, dst dst := unaryOp src
32 :     * FCMP lsrc, rsrc fp condition code := fcmp(lsrc, rsrc)
33 :     *
34 :     * An instruction may use its source operand(s) destructively.
35 :     * We find this info using a global liveness analysis.
36 :     *
37 :     * The Translation
38 :     * ---------------
39 :     * o We keep track of the bindings between %fp registers and the
40 :     * %st(..) staack locations.
41 :     * o FXCH and FLDL are inserted at the appropriate places to move operands
42 :     * to %st(0). FLDL is used if the operand is not dead. FXCH is used
43 :     * if the operand is the last use.
44 :     * o FCOPY's between pseudo %fp registers are done by software renaming
45 :     * and generate no code by itself!
46 :     * o FSTL %st(1) are also generated to pop the stack after the last use
47 :     * of an operand.
48 :     *
49 :     * Note
50 :     * ----
51 :     * 1. This module should be run after floating point register allocation.
52 :     * 2. Due to the extra complication of critical edge splitting, the cellset
53 :     * and frequency info are not preserved.
54 :     *
55 :     * -- Allen Leung (leunga@cs.nyu.edu)
56 :     *)
57 :    
58 :     local
59 : jhr 925 val debug = false (* set this to true to debug this module
60 : leunga 731 * set this to false for production use.
61 :     *)
62 : jhr 925 val debugLiveness = true (* debug liveness analysis *)
63 : leunga 731 val debugDead = false (* debug dead code removal *)
64 : jhr 925 val sanityCheck = true
65 : leunga 731 in
66 :     functor X86FP
67 :     (structure X86Instr : X86INSTR
68 : george 984 structure X86Props : INSN_PROPERTIES
69 :     where I = X86Instr
70 :     structure Flowgraph : CONTROL_FLOW_GRAPH
71 :     where I = X86Instr
72 :     structure Liveness : LIVENESS
73 :     where CFG = Flowgraph
74 :     structure Asm : INSTRUCTION_EMITTER
75 :     where I = X86Instr
76 :     and S.P = Flowgraph.P
77 : george 909 ) : CFG_OPTIMIZATION =
78 : leunga 731 struct
79 : george 909 structure CFG = Flowgraph
80 : jhr 925 structure G = Graph
81 : leunga 731 structure I = X86Instr
82 : leunga 775 structure T = I.T
83 : leunga 731 structure P = X86Props
84 :     structure C = I.C
85 :     structure A = Array
86 :     structure L = Label
87 :     structure An = Annotations
88 : george 889 structure CB = CellsBasis
89 :     structure SL = CB.SortedCells
90 : jhr 925 structure HT = IntHashTable
91 : leunga 731
92 : george 909 type flowgraph = CFG.cfg
93 : leunga 731 type an = An.annotations
94 :    
95 :     val name = "X86 floating point rewrite"
96 :    
97 :     val debugOn = MLRiscControl.getFlag "x86-fp-debug"
98 :     val traceOn = MLRiscControl.getFlag "x86-fp-trace"
99 :    
100 :     fun error msg = MLRiscErrorMsg.error("X86FP",msg)
101 :     fun pr msg = TextIO.output(!MLRiscControl.debug_stream,msg)
102 :    
103 :     val i2s = Int.toString
104 :    
105 :     (*
106 :     * No overflow checking is needed for integer arithmetic in this module
107 :     *)
108 :     fun x + y = Word.toIntX(Word.+(Word.fromInt x, Word.fromInt y))
109 :     fun x - y = Word.toIntX(Word.-(Word.fromInt x, Word.fromInt y))
110 :    
111 : jhr 925 fun celllistToCellset l = List.foldr CB.CellSet.add CB.CellSet.empty l
112 :     fun celllistToString l = CB.CellSet.toString(celllistToCellset l)
113 :    
114 :     (* Annotation to mark split edges *)
115 :     exception TargetMovedTo of G.node_id
116 :    
117 : leunga 731 (*-----------------------------------------------------------------------
118 :     * Primitive instruction handling routines
119 :     *-----------------------------------------------------------------------*)
120 :    
121 :     (* Annotation an instruction *)
122 :     fun mark(instr, []) = instr
123 :     | mark(instr, a::an) = mark(I.ANNOTATION{i=instr,a=a}, an)
124 :    
125 :     (* Add pop suffix to a binary operator *)
126 :     fun pop I.FADDL = I.FADDP | pop I.FADDS = I.FADDP
127 :     | pop I.FSUBL = I.FSUBP | pop I.FSUBS = I.FSUBP
128 :     | pop I.FSUBRL = I.FSUBRP | pop I.FSUBRS = I.FSUBRP
129 :     | pop I.FMULL = I.FMULP | pop I.FMULS = I.FMULP
130 :     | pop I.FDIVL = I.FDIVP | pop I.FDIVS = I.FDIVP
131 :     | pop I.FDIVRL = I.FDIVRP | pop I.FDIVRS = I.FDIVRP
132 :     | pop _ = error "fbinop.pop"
133 :    
134 :     (* Invert the operator *)
135 :     fun invert I.FADDL = I.FADDL | invert I.FADDS = I.FADDS
136 :     | invert I.FSUBL = I.FSUBRL | invert I.FSUBS = I.FSUBRS
137 :     | invert I.FSUBRL = I.FSUBL | invert I.FSUBRS = I.FSUBS
138 :     | invert I.FMULL = I.FMULL | invert I.FMULS = I.FMULS
139 :     | invert I.FDIVL = I.FDIVRL | invert I.FDIVS = I.FDIVRS
140 :     | invert I.FDIVRL = I.FDIVL | invert I.FDIVRS = I.FDIVS
141 :     | invert I.FADDP = I.FADDP | invert I.FMULP = I.FMULP
142 :     | invert I.FSUBP = I.FSUBRP | invert I.FSUBRP = I.FSUBP
143 :     | invert I.FDIVP = I.FDIVRP | invert I.FDIVRP = I.FDIVP
144 :     | invert _ = error "invert"
145 :    
146 :     (* Pseudo instructions *)
147 : george 1003 fun FLD(I.FP32, ea) = I.flds ea
148 :     | FLD(I.FP64, ea) = I.fldl ea
149 :     | FLD(I.FP80, ea) = I.fldt ea
150 : leunga 731
151 :     fun FILD(I.I8, ea) = error "FILD"
152 : george 1003 | FILD(I.I16, ea) = I.fild ea
153 :     | FILD(I.I32, ea) = I.fildl ea
154 :     | FILD(I.I64, ea) = I.fildll ea
155 : leunga 731
156 : george 1003 fun FSTP(I.FP32, ea) = I.fstps ea
157 :     | FSTP(I.FP64, ea) = I.fstpl ea
158 :     | FSTP(I.FP80, ea) = I.fstpt ea
159 : leunga 731
160 : george 1003 fun FST(I.FP32, ea) = I.fsts ea
161 :     | FST(I.FP64, ea) = I.fstl ea
162 : leunga 731 | FST(I.FP80, ea) = error "FSTT"
163 :    
164 :     (*-----------------------------------------------------------------------
165 :     * Pretty print routines
166 :     *-----------------------------------------------------------------------*)
167 : george 889 fun fregToString f = "%f"^i2s(CB.registerNum f)
168 : leunga 731 fun fregsToString s =
169 :     List.foldr (fn (r,"") => fregToString r |
170 :     (r,s) => fregToString r^" "^s) "" s
171 :    
172 : jhr 925 fun blknumOf(CFG.BLOCK{id, ...}) = id
173 : george 909
174 : leunga 731 (*-----------------------------------------------------------------------
175 :     * A stack datatype that mimics the x86 floating point stack
176 :     * and keeps track of bindings between %st(n) and %fp(n).
177 :     *-----------------------------------------------------------------------*)
178 :     structure ST :>
179 :     sig
180 :     type stack
181 :     type stnum = int (* 0 -- 7 *)
182 :     val create : unit -> stack
183 :     val stack0 : stack
184 :     val copy : stack -> stack
185 :     val clear : stack -> unit
186 : george 889 val fp : stack * CB.register_id -> stnum
187 :     val st : stack * stnum -> CB.register_id
188 :     val set : stack * stnum * CB.register_id -> unit
189 :     val push : stack * CB.register_id -> unit
190 : leunga 731 val xch : stack * stnum * stnum -> unit
191 :     val pop : stack -> unit
192 :     val depth : stack -> int
193 :     val nonFull : stack -> unit
194 : george 889 val kill : stack * CellsBasis.cell -> unit
195 : leunga 731 val stackToString : stack -> string
196 :     val equal : stack * stack -> bool
197 :     end =
198 :     struct
199 :     type stnum = int
200 :     datatype stack =
201 : leunga 744 STACK of
202 : george 889 { st : CB.register_id A.array, (* mapping %st -> %fp registers *)
203 : leunga 744 fp : stnum A.array, (* mapping %fp -> %st registers *)
204 :     sp : int ref (* stack pointer *)
205 :     }
206 : leunga 731
207 :     (* Create a new stack *)
208 :     fun create() = STACK{st=A.array(8,~1), fp=A.array(7,16), sp=ref ~1}
209 :    
210 :     val stack0 = create()
211 :    
212 :     (* Copy a stack *)
213 :     fun copy(STACK{st, fp, sp}) =
214 :     let val st' = A.array(8, ~1)
215 :     val fp' = A.array(7, 16)
216 :     in A.copy{src=st,dst=st',si=0,di=0,len=NONE};
217 :     A.copy{src=fp,dst=fp',si=0,di=0,len=NONE};
218 :     STACK{st=st', fp=fp', sp=ref(!sp)}
219 :     end
220 :    
221 :     (* Depth of stack *)
222 :     fun depth(STACK{sp, ...}) = !sp + 1
223 :    
224 :     fun nonFull(STACK{sp, ...}) =
225 :     if !sp >= 7 then error "stack overflow" else ()
226 :    
227 :     (* Given %st(n), lookup the corresponding %fp(n) *)
228 :     fun st(STACK{st, sp, ...}, n) = A.sub(st, !sp - n)
229 :    
230 :     (* Given %fp(n), lookup the corresponding %st(n) *)
231 : leunga 744 fun fp(STACK{fp, sp, ...}, n) = !sp - A.sub(fp, n)
232 : leunga 731
233 :     fun stackToString stack =
234 :     let val depth = depth stack
235 :     fun f i = if i >= depth then " ]"
236 : leunga 744 else "%st("^i2s i^")=%f"^i2s(st(stack,i))^" "^f(i+1)
237 : leunga 731 in "[ "^f 0 end
238 :    
239 :     fun clear(STACK{st, fp, sp, ...}) =
240 :     (sp := ~1; A.modify(fn _ => ~1) st; A.modify(fn _ => 16) fp)
241 :    
242 :     (* Set %st(n) := %f *)
243 :     fun set(STACK{st, fp, sp, ...}, n, f) =
244 :     (A.update(st, !sp - n, f);
245 : leunga 744 if f >= 0 then A.update(fp, f, !sp - n) else ()
246 : leunga 731 )
247 :    
248 :     (* Pop one entry *)
249 :     fun pop(STACK{sp, st, fp, ...}) = sp := !sp - 1
250 :    
251 :     (* Push %fp(f) onto %st(0) *)
252 :     fun push(stack as STACK{sp, ...}, f) = (sp := !sp + 1; set(stack, 0, f))
253 :    
254 :     (* Exchange the contents of %st(m) and %st(n) *)
255 :     fun xch(stack, m, n) =
256 :     let val f_m = st(stack, m)
257 :     val f_n = st(stack, n)
258 :     in set(stack, m, f_n);
259 :     set(stack, n, f_m)
260 :     end
261 :    
262 : george 889 fun kill(STACK{fp, ...}, f) = A.update(fp, CB.registerNum f, 16)
263 : leunga 731
264 :     fun equal(st1, st2) =
265 :     let val m = depth st1
266 :     val n = depth st2
267 :     fun loop i =
268 :     i >= m orelse (st(st1, i) = st(st2, i) andalso loop(i+1))
269 :     in m = n andalso loop(0)
270 :     end
271 :    
272 :     end (* struct *)
273 :    
274 :     (*-----------------------------------------------------------------------
275 :     * Module to handle forward propagation.
276 :     * Forward propagation does the following:
277 :     * Given an instruction
278 :     * fmove mem, %fp(n)
279 :     * We delay the generation of the load until the first use of %fp(n),
280 :     * which we can further optimize by folding the load into the operand
281 :     * of the instruction, if it is the last use of this operand.
282 :     * If %fp(n) is dead then no load is necessary.
283 :     * Of course, we have to be careful whenever we encounter other
284 :     * instruction with a write.
285 :     *-----------------------------------------------------------------------*)
286 : leunga 744 (*
287 : leunga 731 structure ForwardPropagation :>
288 :     sig
289 :     type readbuffer
290 :     val create : ST.stack -> readbuffer
291 :     val load : readbuffer * C.cell * I.fsize * I.ea -> unit
292 :     val getreg : readbuffer * bool * C.cell * I.instruction list ->
293 :     I.operand * I.instruction list
294 :     val flush : readbuffer * I.instruction list -> I.instruction list
295 :     end =
296 :     struct
297 :    
298 :     datatype readbuffer =
299 :     READ of { stack : ST.stack,
300 :     loads : (I.fsize * I.ea) option A.array,
301 :     pending : int ref
302 :     }
303 :    
304 :     fun create stack =
305 :     READ{stack =stack,
306 :     loads =A.array(8, NONE),
307 :     pending =ref 0
308 :     }
309 :    
310 :     fun load(READ{pending, loads, ...}, fd, fsize, mem) =
311 : leunga 744 (A.update(loads, fd, SOME(fsize, mem));
312 : leunga 731 pending := !pending + 1
313 :     )
314 :    
315 :     (* Extract the operand for a register
316 :     * If it has a delayed load associated with it then
317 :     * we perform the load at this time.
318 :     *)
319 :     fun getreg(READ{pending, loads, stack, ...}, isLastUse, fs, code) =
320 : leunga 744 case A.sub(loads, fs) of
321 : leunga 731 NONE =>
322 :     let val n = ST.st(stack, fs)
323 :     in if isLastUse
324 :     then (ST n, code)
325 :     else let val code = I.FLDL(ST n)::code
326 :     in ST.push(stack, fs); (ST0, code)
327 :     end
328 :     end
329 :     | SOME(fsize, mem) =>
330 :     let val code = FLD(fsize, mem)::code
331 :     in A.update(loads, fs, NONE); (* delete load *)
332 :     pending := !pending - 1;
333 :     ST.push(stack, fs); (* fs is now in place *)
334 :     (ST0, code)
335 :     end
336 :    
337 :     (* Extract a binary operand.
338 :     * We'll try to fold this into the operand
339 :     *)
340 :     fun getopnd(READ{pending, loads, stack,...}, isLastUse, I.FPR fs, code) =
341 : leunga 744 (case A.sub(loads, fs) of
342 : leunga 731 NONE =>
343 :     let val n = ST.st(stack, fs)
344 :     in if isLastUse fs (* regmap XXX *)
345 :     then (ST n, code)
346 :     else let val code = I.FLDL(ST n)::code
347 :     in ST.push(stack, fs); (ST0, code)
348 :     end
349 :     end
350 :     | SOME(fsize, mem) =>
351 :     (A.update(loads, fs, NONE); (* delete load *)
352 :     pending := !pending - 1;
353 :     if isLastUse fs then (mem, code)
354 :     else let val code = FLD(fsize, mem)::code
355 :     in ST.push(stack, fs);
356 :     (ST0, code)
357 :     end
358 :     )
359 :     )
360 :     | getopnd(_, _, ea, code) = (ea, code)
361 :    
362 :     fun flush(READ{pending=ref 0,...}, code) = code
363 :    
364 :     end (* struct *)
365 : leunga 744 *)
366 : leunga 731
367 :     (*-----------------------------------------------------------------------
368 :     * Module to handle delayed stores.
369 :     * Delayed store does the following:
370 :     * Given an instruction
371 :     * fstore %fp(n), %mem
372 :     * We delay the generation of the store until necessary.
373 :     * This gives us an opportunity to rearrange the order of the stores
374 :     * to eliminate unnecessary fxch.
375 :     *-----------------------------------------------------------------------*)
376 : leunga 744 (*
377 : leunga 731 structure DelayStore :>
378 :     sig
379 :     type writebuffer
380 :     val create : ST.stack -> writebuffer
381 :     val flush : writebuffer * I.instruction list -> I.instruction list
382 :     end =
383 :     struct
384 :     datatype writebuffer =
385 :     WRITE of { front : (I.ea * C.cell) list ref,
386 :     back : (I.ea * C.cell) list ref,
387 :     stack : ST.stack,
388 :     pending : int ref
389 :     }
390 :     fun create stack = WRITE{front=ref [], back=ref [],
391 :     stack=stack, pending=ref 0}
392 :     fun flush(WRITE{pending=ref 0,...}, code) = code
393 :     end (* struct *)
394 : leunga 744 *)
395 : leunga 731
396 :     (*-----------------------------------------------------------------------
397 :     * Main routine.
398 :     *
399 :     * Algorithm:
400 :     * 1. Perform liveness analysis.
401 :     * 2. For each fp register, mark all its last use point(s).
402 :     * Registers are popped at their last uses.
403 :     * 3. Rewrite the instructions basic block by basic block.
404 :     * 4. Insert shuffle code at basic block boundaries.
405 :     * When necessary, split critical edges.
406 :     * 5. Sacrifice a goat to make sure things don't go wrong.
407 :     *-----------------------------------------------------------------------*)
408 : jhr 925 fun run(Cfg as G.GRAPH cfg) =
409 :     let
410 :     val numberOfBlks = #capacity cfg ()
411 :     val ENTRY = List.hd (#entries cfg ())
412 :     val EXIT = List.hd (#exits cfg ())
413 : leunga 731
414 : jhr 925 val getCell = C.getCellsByKind CB.FP
415 :     (*extract the fp component of cellset*)
416 :    
417 : leunga 744 val stTable = A.tabulate(8, fn n => I.ST(C.ST n))
418 :    
419 :     fun ST n = (if sanityCheck andalso (n < 0 orelse n >= 8) then
420 :     pr("WARNING BAD %st("^i2s n^")\n")
421 :     else ();
422 :     A.sub(stTable, n)
423 :     )
424 :    
425 : george 1003 fun FXCH n = I.fxch{opnd=C.ST n}
426 : leunga 744
427 :     val ST0 = ST 0
428 :     val ST1 = ST 1
429 : george 1003 val POP_ST = I.fstpl ST0 (* Instruction to pop an entry *)
430 : leunga 744
431 : leunga 731 (* Dump instructions *)
432 :     fun dump instrs =
433 :     let val Asm.S.STREAM{emit, ...} =
434 :     AsmStream.withStream (!MLRiscControl.debug_stream)
435 :     Asm.makeStream []
436 :     in app emit (rev instrs)
437 :     end
438 :    
439 :     (* Create assembly of instruction *)
440 :     fun assemble instr =
441 :     let val buf = StringOutStream.mkStreamBuf()
442 :     val stream = StringOutStream.openStringOut buf
443 :     val Asm.S.STREAM{emit, ...} =
444 :     AsmStream.withStream stream Asm.makeStream []
445 : leunga 744 val _ = emit instr
446 : leunga 731 val s = StringOutStream.getString buf
447 :     val n = String.size s
448 :     in if n = 0 then s else String.substring(s, 0, n - 1)
449 :     end
450 :    
451 :     (*------------------------------------------------------------------
452 :     * Perform liveness analysis on the floating point variables
453 :     * P.S. I'm glad I didn't throw away the code liveness code.
454 :     *------------------------------------------------------------------*)
455 : george 889 val defUse = P.defUse CB.FP (* def/use properties *)
456 : jhr 925 val {liveIn=liveInTable, liveOut=liveOutTable} = Liveness.liveness {
457 :     defUse=defUse,
458 :     (* updateCell=C.updateCellsByKind CB.FP, *)
459 :     getCell=getCell
460 :     } Cfg
461 : leunga 731 (*------------------------------------------------------------------
462 :     * Scan the instructions compute the last uses and dead definitions
463 :     * at each program point. Ideally we can do this during the code
464 :     * rewriting phase. But that's probably too error prone for now.
465 :     *------------------------------------------------------------------*)
466 :     fun computeLastUse(blknum, insns, liveOut) =
467 :     let fun scan([], _, lastUse) = lastUse
468 :     | scan(i::instrs, live, lastUse) =
469 :     let val (d, u) = defUse i
470 : leunga 744 val d = SL.uniq(d)(* definitions *)
471 :     val u = SL.uniq(u)(* uses *)
472 :     val dead = SL.return(SL.difference(d, live))
473 : leunga 731 val live = SL.difference(live, d)
474 : leunga 744 val last = SL.return(SL.difference(u, live))
475 :     val live = SL.union(live, u)
476 : leunga 731 val _ =
477 :     if debug andalso debugLiveness then
478 :     (case last of
479 :     [] => ()
480 :     | _ => print(assemble i^"\tlast use="^
481 :     fregsToString last^"\n")
482 :     )
483 :     else ()
484 :     in scan(instrs, live, (last,dead)::lastUse)
485 :     end
486 : jhr 925 val liveOutSet = SL.uniq liveOut
487 : leunga 731 val _ =
488 :     if debug andalso debugLiveness then
489 :     print("LiveOut("^i2s blknum^") = "^
490 : leunga 744 fregsToString(SL.return liveOutSet)^"\n")
491 : leunga 731 else ()
492 :     in scan(!insns, liveOutSet, [])
493 :     end
494 :    
495 :     (*------------------------------------------------------------------
496 :     * Temporary work space
497 :     *------------------------------------------------------------------*)
498 : george 889 val {high, low} = C.cellRange CB.FP
499 : leunga 731 val n = high+1
500 :     val lastUseTbl = A.array(n,~1) (* table for marking last uses *)
501 :     val useTbl = A.array(n,~1) (* table for marking uses *)
502 :    
503 :     (* %fp register bindings before and after a basic block *)
504 : jhr 925 val bindingsIn = A.array(numberOfBlks, NONE)
505 :     val bindingsOut = A.array(numberOfBlks, NONE)
506 : leunga 731 val stampCounter = ref ~4096
507 :    
508 :     (* Edges that need splitting *)
509 :     exception NoEdgesToSplit
510 : blume 733 val edgesToSplit = IntHashTable.mkTable(32, NoEdgesToSplit)
511 :     val addEdgesToSplit = IntHashTable.insert edgesToSplit
512 : leunga 744 fun lookupEdgesToSplit b =
513 :     getOpt(IntHashTable.find edgesToSplit b, [])
514 : leunga 731
515 :     (*------------------------------------------------------------------
516 :     * Code for handling bindings between basic block
517 :     *------------------------------------------------------------------*)
518 :    
519 : jhr 925 fun splitEdge(title, source, target, e) =
520 : leunga 731 (if debug andalso !traceOn then
521 : jhr 925 pr(title^" SPLITTING "^i2s source^"->"^ i2s target^"\n")
522 : leunga 731 else ();
523 : jhr 925 addEdgesToSplit(target,(source,target,e)::lookupEdgesToSplit target)
524 : leunga 731 )
525 :    
526 :     (* Given a cellset, return a sorted and unique
527 :     * list of elements with all non-physical registers removed
528 :     *)
529 : jhr 925 fun removeNonPhysical celllist =
530 : leunga 744 let fun loop([], S) = SL.return(SL.uniq S)
531 : leunga 731 | loop(f::fs, S) =
532 : george 889 let val fx = CB.registerNum f
533 : leunga 744 in loop(fs,if fx <= 7 then f::S else S)
534 : leunga 731 end
535 : jhr 925 in loop(celllist, [])
536 : leunga 731 end
537 :    
538 :     (* Given a sorted and unique list of registers,
539 :     * Return a stack with these elements
540 :     *)
541 :     fun newStack fregs =
542 :     let val stack = ST.create()
543 : george 889 in app (fn f => ST.push(stack, CB.registerNum f)) (rev fregs);
544 : leunga 731 stack
545 :     end
546 :    
547 :     (*
548 :     * This function looks at all the entries on the stack,
549 :     * and generate code to deallocate all the dead values.
550 :     * The stack is updated.
551 :     *)
552 :     fun removeDeadValues(stack, liveSet, code) =
553 :     let val stamp = !stampCounter
554 :     val _ = stampCounter := !stampCounter - 1
555 :     fun markLive [] = ()
556 : leunga 744 | markLive(r::rs) =
557 : george 889 (A.update(useTbl, CB.registerNum r, stamp); markLive rs)
558 : leunga 731 fun isLive f = A.sub(useTbl, f) = stamp
559 :     fun loop(i, depth, code) =
560 :     if i >= depth then code else
561 :     let val f = ST.st(stack, i)
562 :     in if isLive f (* live? *)
563 :     then loop(i+1, depth, code)
564 :     else
565 :     (if debug andalso !traceOn then
566 : leunga 744 pr("REMOVING %f"^i2s f^" in %st("^i2s i^")"^
567 : leunga 731 " current stack="^ST.stackToString stack^"\n")
568 :     else ();
569 :     if i = 0 then
570 :     (ST.pop stack;
571 :     loop(0, depth-1, POP_ST::code)
572 :     )
573 :     else (ST.xch(stack,0,i);
574 :     ST.pop stack;
575 : george 1003 loop(0, depth-1, I.fstpl(ST i)::code)
576 : leunga 731 )
577 :     )
578 :     end
579 :     in markLive liveSet;
580 :     loop(0, ST.depth stack, code)
581 :     end
582 :    
583 :    
584 :     (*------------------------------------------------------------------
585 :     * Given two stacks, source and target, where the bindings are
586 :     * permutation of each other, generate the minimal number of
587 :     * fxchs to match source with target.
588 :     *
589 :     * Important: source and target MUST be permutations of each other.
590 :     *
591 :     * Essentially, we first decompose the permutation into cycles,
592 :     * and process each cycle.
593 :     *------------------------------------------------------------------*)
594 :     fun shuffle(source, target, code) =
595 :     let val stamp = !stampCounter
596 :     val _ = stampCounter := !stampCounter - 1
597 :     val permutation = lastUseTbl (* reuse the space *)
598 :    
599 :     val _ = if debug andalso !traceOn then
600 :     pr("SHUFFLE "^ST.stackToString source^
601 :     "->"^ST.stackToString target^"\n")
602 :     else ()
603 :    
604 :     (* Compute the initial permutation *)
605 :     val n = ST.depth source
606 :     fun computeInitialPermutation(i) =
607 :     if i >= n
608 :     then ()
609 :     else let val f = ST.st(source, i)
610 :     val j = ST.fp(target, f)
611 :     in A.update(permutation, j, i);
612 :     computeInitialPermutation(i+1)
613 :     end
614 :     val _ = computeInitialPermutation 0
615 :    
616 :     (* Decompose the initial permutation into cycles.
617 :     * The cycle involving 0 is treated specially.
618 :     *)
619 :     val visited = useTbl
620 :     fun isVisited i = A.sub(visited,i) = stamp
621 :     fun markAsVisited i = A.update(visited,i,stamp)
622 :     fun decomposeCycles(i, cycle0, cycles) =
623 :     if i >= n then (cycle0, cycles)
624 :     else if isVisited i orelse
625 :     A.sub(permutation, i) = i (* trivial cycle *)
626 :     then decomposeCycles(i+1, cycle0, cycles)
627 :     else let fun makeCycle(j, cycle, zero) =
628 :     let val k = A.sub(permutation, j)
629 :     val cycle = j::cycle
630 :     val zero = zero orelse j = 0
631 :     in markAsVisited j;
632 :     if k = i then (cycle, zero)
633 :     else makeCycle(k, cycle, zero)
634 :     end
635 :     val (cycle, zero) = makeCycle(i, [], false)
636 :     in if zero then decomposeCycles(i+1, [cycle], cycles)
637 :     else decomposeCycles(i+1, cycle0, cycle::cycles)
638 :     end
639 :    
640 :     val (cycle0, cycles) = decomposeCycles(0, [], [])
641 :    
642 :     (*
643 :     * Generate shuffle for a cycle that does not involve 0.
644 :     * Given a cycle (c_1, ..., c_k), we generate this code:
645 :     * fxch %st(c_1),
646 :     * fxch %st(c_2),
647 :     * ...
648 :     * fxch %st(c_k),
649 :     * fxch %st(c_1)
650 :     *)
651 :     fun genxch([], code) = code
652 :     | genxch(c::cs, code) = genxch(cs, FXCH c::code)
653 :    
654 :     fun gen([], code) = error "shuffle.gen"
655 :     | gen(cs as (c::_), code) = FXCH c::genxch(cs, code)
656 :    
657 :     (*
658 :     * Generate shuffle for a cycle that involves 0.
659 :     * Given a cycle (c_1,...,c_k) we first shuffle this to
660 :     * an equivalent cycle (c_1, ..., c_k) where c'_k = 0,
661 :     * then we generate this code:
662 :     * fxch %st(c'_1),
663 :     * fxch %st(c'_2),
664 :     * ...
665 :     * fxch %st(c'_{k-1}),
666 :     *)
667 :     fun gen0([], code) = error "shuffle.gen0"
668 :     | gen0(cs, code) =
669 :     let fun rearrange(0::cs, cs') = cs@rev cs'
670 :     | rearrange(c::cs, cs') = rearrange(cs, c::cs')
671 :     | rearrange([], _) = error "shuffle.rearrange"
672 :     val cs = rearrange(cs, [])
673 :     in genxch(cs, code)
674 :     end
675 :    
676 :     (*
677 :     * Generate code. Must process the non-zero cycles first.
678 :     *)
679 :     val code = List.foldr gen code cycles
680 :     val code = List.foldr gen0 code cycle0
681 :     in code
682 :     end (* shuffle *)
683 :    
684 :     (*------------------------------------------------------------------
685 :     * Insert code at the end of a basic block.
686 :     * Make sure we put code in front of a transfer instruction
687 :     *------------------------------------------------------------------*)
688 :     fun insertAtEnd(insns, code) =
689 :     (case insns of
690 :     [] => code
691 :     | jmp::rest =>
692 :     if P.instrKind jmp = P.IK_JUMP then
693 :     jmp::code@rest
694 :     else
695 :     code@insns
696 :     )
697 :    
698 :     (*------------------------------------------------------------------
699 :     * Magic for inserting shuffle code at the end of a basic block
700 :     *------------------------------------------------------------------*)
701 : jhr 925 fun shuffleOut(stackOut, insns, b, block, liveOut) =
702 :     let
703 :     val liveOut = removeNonPhysical(liveOut)
704 : leunga 731
705 :     (* Generate code that remove unnecessary values *)
706 :     val code = removeDeadValues(stackOut, liveOut, [])
707 :    
708 :     fun done(stackOut, insns, code) =
709 :     (A.update(bindingsOut,b,SOME stackOut);
710 :     insertAtEnd(insns, code)
711 :     )
712 :    
713 :     (* Generate code that shuffle values from source to target *)
714 :     fun match(source, target) =
715 :     done(target, insns, shuffle(source, target, []))
716 :    
717 :     (* Generate code that shuffle values from source to liveOut *)
718 :     fun matchLiveOut() =
719 :     case liveOut of
720 :     [] => done(stackOut, insns, code)
721 :     | _ => match(stackOut, newStack liveOut)
722 :    
723 :     (* With multiple successors, find out which one we
724 :     * should connect to. Choose the one from the block that
725 :     * follows from this one, if that exists, or else choose
726 :     * from the edge with the highest frequency.
727 :     *)
728 :     fun find([], _, id, best) = (id, best)
729 : jhr 925 | find((_, target, _)::edges, highestFreq, id, best) =
730 :     let val CFG.BLOCK{freq, ...} = #node_info cfg target
731 :     in if target = b+1 then (target, A.sub(bindingsIn, target))
732 :     else (case A.sub(bindingsIn, target) of
733 :     NONE => find(edges, highestFreq, id, best)
734 :     | this as SOME stack =>
735 :     if highestFreq < !freq then
736 :     find(edges, !freq, target, this)
737 :     else
738 :     find(edges, highestFreq, id, best)
739 :     )
740 :     end
741 : leunga 731
742 : jhr 925 (*
743 :     * Split all edges source->target except omitThis.
744 :     *)
745 :     fun splitAllEdgesExcept([], omitThis) = ()
746 :     | splitAllEdgesExcept((source,target,e)::edges, omitThis) =
747 :     if target = EXIT then error "can't split exit edge!"
748 :     else
749 :     (if target <> omitThis andalso
750 :     target <= b andalso (* XXX *)
751 :     target <> ENTRY
752 :     then splitEdge("ShuffleOut",source,target,e) else ();
753 :     splitAllEdgesExcept(edges, omitThis)
754 : leunga 731 )
755 :    
756 :     (* Just one successor;
757 :     * try to match the bindings of the successor if it exist.
758 :     *)
759 : jhr 925 fun matchIt succ =
760 :     let val (succBlock, target) = find(succ, ~1, ~1, NONE)
761 :     in splitAllEdgesExcept(succ, succBlock);
762 :     case target of
763 :     SOME stackIn => match(stackOut, stackIn)
764 :     | NONE => done(stackOut,insns,code)
765 :     end
766 :    
767 :     in case #out_edges cfg b of
768 :     [] => matchLiveOut()
769 :     | succ as [(_,target,_)] =>
770 :     if target = EXIT then matchLiveOut()
771 :     else matchIt succ
772 :     | succ => matchIt succ
773 : leunga 731 end (* shuffleOut *)
774 :    
775 :     (*------------------------------------------------------------------
776 :     * Compute the initial fp stack bindings for basic block b.
777 :     *------------------------------------------------------------------*)
778 : jhr 925 fun shuffleIn(b, block, liveIn) =
779 :     let
780 :     val liveInSet = removeNonPhysical liveIn
781 : leunga 731
782 :     (* With multiple predecessors, find out which one we
783 :     * should connect to. Choose the one from the block that
784 :     * falls into this one, if that exists, or else choose
785 :     * from the edge with the highest frequency.
786 :     *)
787 :     fun find([], _, best) = best
788 : jhr 925 | find((source, _, _)::edges, highestFreq, best) =
789 : george 984 let val CFG.BLOCK{freq, ...} = #node_info cfg source
790 : jhr 925 in case A.sub(bindingsOut, source) of
791 :     NONE => find(edges, highestFreq, best)
792 :     | this as SOME stack =>
793 : george 984 if source = b-1
794 :     then this (* falls into b *)
795 :     else if highestFreq < !freq then find(edges, !freq, this)
796 :     else find(edges, highestFreq, best)
797 : jhr 925 end
798 : leunga 731
799 :     fun splitAllDoneEdges [] = ()
800 : jhr 925 | splitAllDoneEdges ((source, target, e)::edges) =
801 :     (if source < b andalso
802 :     source <> ENTRY andalso
803 :     source <> EXIT
804 :     then splitEdge("ShuffleIn", source, target, e) else ();
805 : leunga 731 splitAllDoneEdges edges
806 :     )
807 :    
808 : leunga 744 (* The initial stack bindings are determined by the live set.
809 : leunga 731 * No compensation code is needed.
810 :     *)
811 :     fun fromLiveIn() =
812 :     let val stackIn =
813 :     case liveInSet of
814 :     [] => ST.stack0
815 :     | _ =>
816 : jhr 925 (pr("liveIn="^celllistToString liveIn^"\n");
817 : leunga 731 newStack liveInSet
818 :     )
819 :     val stackOut = ST.copy stackIn
820 :     in (stackIn, stackOut, [])
821 :     end
822 :    
823 : jhr 925 val pred = #in_edges cfg b
824 :    
825 : leunga 731 val (stackIn, stackOut, code) =
826 : jhr 925 case find(pred, ~1, NONE) of
827 :     NONE => (splitAllDoneEdges(pred); fromLiveIn())
828 : leunga 731 | SOME stackIn' =>
829 : jhr 925 (case pred of
830 : leunga 731 [_] => (* one predecessor *)
831 :     (* Use the bindings as from the previous block
832 :     * We first have to deallocate all unused values.
833 :     *)
834 :     let val stackOut = ST.copy stackIn'
835 :     (* Clean the stack of unused entries *)
836 :     val code = removeDeadValues(stackOut, liveInSet, [])
837 :     in (stackIn', stackOut, code) end
838 : jhr 925 | pred => (* more than one predecessors *)
839 : leunga 731 let val stackIn = ST.copy stackIn'
840 :     val code = removeDeadValues(stackIn, liveInSet, [])
841 :     val stackOut = ST.copy stackIn
842 :     in (* If we have to generate code to deallocate
843 :     * the stack then we have split the edge.
844 :     *)
845 :     case code of
846 :     [] => ()
847 : jhr 925 | _ => splitAllDoneEdges(pred);
848 : leunga 731 (stackIn, stackOut, [])
849 :     end
850 :     )
851 :     in A.update(bindingsIn, b, SOME stackIn);
852 :     A.update(bindingsOut, b, SOME stackOut);
853 :     (stackIn, stackOut, code)
854 :     end
855 :    
856 :     (*------------------------------------------------------------------
857 :     * Code for patching up critical edges.
858 :     * The trick is finding a good place to insert the critical edges.
859 : jhr 925 * Let's call an edge x->y that requires compensation
860 :     * code c to be inserted an candidate edge. We write this as x->y(c)
861 :     *
862 :     * Here are the heuristics that we use to improve the final code:
863 :     *
864 :     * 1. Given two candidate edges a->x(c1) and b->x(c2) where c1=c2
865 :     * then we can merge the two copies of compensation code.
866 :     * This is quite common. This generalizes to any number of edges.
867 :     *
868 :     * 2. Given two candidate edges a->x(c1) and b->x(c2) and where
869 :     * c1 and c2 are pops, we can partially share c1 and c2.
870 :     * Currently, I think I only recognize this case when
871 :     * x has no fp registers live-in.
872 :     *
873 :     * 3. Given two candidate edges a->x(c1) and b->x(c2),
874 :     * if a->x has a higher frequency then put the compensation
875 :     * code in front of x (so that it falls through into x)
876 :     * whenever possible.
877 :     *
878 :     * As you can see, the voodoo is strong here.
879 :     *
880 :     * The routine has two main phases:
881 :     * 1. Determine the compensation code by applying the heuristics
882 :     * above.
883 :     * 2. Then insert them and rebuild the cfg by renaming all block
884 :     * ids. This is currently necessary to keep the layout order
885 :     * consistent with the order of the id.
886 : leunga 731 *------------------------------------------------------------------*)
887 : jhr 925 fun repairCriticalEdges(Cfg as G.GRAPH cfg) =
888 : leunga 731 let (* Data structure for recording critical edge splitting info *)
889 :     datatype compensationCode =
890 :     NEWEDGE of
891 : jhr 925 {label:L.label, (* label of new block *)
892 :     entries:CFG.edge list ref, (* edges going into this code *)
893 :     code:I.instruction list, (* code *)
894 : leunga 731 comment:an
895 :     }
896 :    
897 :     val cleanup = [#create MLRiscAnnotations.COMMENT "cleanup edge"]
898 :     val critical = [#create MLRiscAnnotations.COMMENT "critical edge"]
899 :    
900 :     exception Nothing
901 :    
902 :     (* Repair code table; mapping from block id -> compensation code *)
903 : blume 733 val repairCodeTable = IntHashTable.mkTable(32, Nothing)
904 :     val addRepairCode = IntHashTable.insert repairCodeTable
905 : leunga 744 fun lookupRepairCode b =
906 :     getOpt(IntHashTable.find repairCodeTable b,[])
907 : leunga 731
908 :     (* Repair code table; mapping from block id -> compensation code
909 : jhr 925 * These must be relocated ...
910 : leunga 731 *)
911 : blume 733 val repairCodeTable' = IntHashTable.mkTable(32, Nothing)
912 :     val addRepairCode' = IntHashTable.insert repairCodeTable'
913 : leunga 744 fun lookupRepairCode' b =
914 :     getOpt(IntHashTable.find repairCodeTable' b,[])
915 : leunga 731
916 : jhr 925 (* Does the given block falls thru from the previous block?
917 :     * If the previous block is ENTRY then also consider this to be true
918 :     *)
919 :     fun isFallsThru b =
920 :     case #in_edges cfg b of
921 :     [(b',_,_)] => (case CFG.fallsThruTo(Cfg,b') of
922 :     SOME b'' => b'' = b
923 :     | NONE => b' = ENTRY
924 :     )
925 :     | _ => false
926 : leunga 731
927 :     (* Create jump instruction to a block *)
928 : jhr 925 fun jump(CFG.BLOCK{labels, ...}) =
929 :     (case !labels of
930 : leunga 731 [] => error "no label to target of critical edge!"
931 :     | l::_ => P.jump l
932 :     )
933 :    
934 :     (*
935 :     * Special case: target block has stack depth of 0.
936 :     * Just generate code that pop entries from the sources.
937 :     * To make things interesting, we try to share code among
938 :     * all the critical edges.
939 :     *)
940 :     fun genPoppingCode(_, []) = ()
941 : jhr 925 | genPoppingCode(targetBlk, edges as (_,target,_)::_) =
942 :     let val entries =
943 :     map (fn edge as (source, _, _) =>
944 :     let val n = ST.depth(valOf(A.sub(bindingsOut,source)))
945 :     in (n, edge) end
946 : leunga 731 ) edges
947 : jhr 925
948 : leunga 731 (* Ordered by increasing stack height *)
949 : jhr 925 val entries =
950 :     ListMergeSort.sort (fn ((n,_),(m,_)) => n > m) entries
951 : leunga 731
952 :     val relocate = isFallsThru target
953 :    
954 :     fun pop(0, code) = code
955 :     | pop(n, code) = pop(n-1,POP_ST::code)
956 :    
957 :     fun makeCode(popCount, rest) =
958 :     let val code = pop(popCount, [])
959 :     in case rest of
960 : jhr 925 [] => if relocate then
961 :     jump(#node_info cfg target)::code
962 : leunga 731 else code
963 :     | _ => code
964 :     end
965 :    
966 :     (* Generate code, share code between edges that
967 :     * have to pop the same number of elements
968 :     *)
969 :     fun gen([], h, code) = code
970 : jhr 925 | gen((n,e)::rest, _, []) =
971 : leunga 731 gen(rest, n,
972 : jhr 925 [NEWEDGE{label=L.anon(),
973 :     entries=ref [e],
974 :     code=makeCode(n,rest),
975 :     comment=cleanup
976 :     }
977 :     ])
978 :     | gen((n,e)::rest, h, all as (NEWEDGE{entries, ...}::_)) =
979 : leunga 731 gen(rest,h,
980 :     if n = h then
981 : jhr 925 (entries := e :: !entries; all)
982 : leunga 731 else
983 : jhr 925 NEWEDGE{label=L.anon(),
984 :     entries=ref [e],
985 : leunga 731 code=makeCode(n-h,rest),
986 : jhr 925 comment=cleanup
987 :     }::all
988 : leunga 731 )
989 : jhr 925 val repairCode = gen(entries, 0, [])
990 : leunga 731 in (if relocate then addRepairCode' else addRepairCode)
991 : jhr 925 (target, repairCode)
992 : leunga 731 end
993 :    
994 :     (* The general case:
995 :     * Remove dead values, then
996 :     * Shuffle.
997 :     *)
998 : jhr 925 fun genRepairCode(target, targetBlk, stackIn, edges) =
999 : leunga 731 let val repairList = ref []
1000 :     val repairCount = ref 0
1001 : jhr 925 val SOME stackIn = A.sub(bindingsIn, target)
1002 :     fun repair(edge as (source, _, _)) =
1003 :     let val SOME stackOut' = A.sub(bindingsOut, source)
1004 : leunga 731 fun createNewRepairEdge() =
1005 :     let val stackOut = ST.copy stackOut'
1006 : jhr 925 val liveIn = IntHashTable.lookup liveInTable target
1007 : leunga 731 val liveInSet = removeNonPhysical liveIn
1008 :     val _ =
1009 :     if debug then
1010 : jhr 925 pr("LiveIn = "^celllistToString liveIn^"\n")
1011 : leunga 731 else ()
1012 :    
1013 :     (* deallocate unused values *)
1014 :     val code = removeDeadValues(stackOut, liveInSet, [])
1015 :     (* shuffle values *)
1016 :     val code = shuffle(stackOut, stackIn, code)
1017 :     fun addNewEdge() =
1018 :     let (* Do we need to relocate this block? *)
1019 :     val relocate = !repairCount > 0 orelse
1020 : jhr 925 isFallsThru target
1021 :     andalso source + 1 <> target
1022 : leunga 731
1023 :     (* add a jump to the target block *)
1024 : jhr 925 val code = if relocate then jump targetBlk::code
1025 : leunga 731 else code
1026 :    
1027 :     val repairCode =
1028 : jhr 925 NEWEDGE{label=L.anon(),
1029 :     entries=ref [edge],
1030 :     code=code,
1031 :     comment=critical
1032 :     }
1033 : leunga 731 in repairCount := !repairCount + 1;
1034 :     repairList := (repairCode, stackOut')
1035 :     :: !repairList;
1036 :     if relocate then
1037 : jhr 925 addRepairCode'(target,
1038 :     repairCode::lookupRepairCode' target)
1039 : leunga 731 else
1040 : jhr 925 addRepairCode(target,
1041 :     repairCode::lookupRepairCode target)
1042 : leunga 731 end
1043 : jhr 925 in case #out_edges cfg source of
1044 :     [(_,j,_)] =>
1045 :     if j = target then (*insert code at predecessor*)
1046 :     let val CFG.BLOCK{insns,...} =
1047 :     #node_info cfg source
1048 :     in insns := insertAtEnd(!insns, code)
1049 :     end
1050 :     else
1051 :     addNewEdge()
1052 : leunga 731 | _ => addNewEdge()
1053 :     end
1054 :    
1055 :     fun shareRepairEdge [] = false
1056 : jhr 925 | shareRepairEdge
1057 :     ((NEWEDGE{entries,...},stackOut'')::rest) =
1058 : leunga 731 if ST.equal(stackOut'', stackOut') then
1059 : jhr 925 (entries := edge :: !entries; true)
1060 : leunga 731 else shareRepairEdge rest
1061 :    
1062 :     in if shareRepairEdge(!repairList) then ()
1063 :     else createNewRepairEdge()
1064 :     end
1065 :     in app repair edges
1066 :     end
1067 :    
1068 :     (*
1069 : jhr 925 * Code to split critical edges entering block target
1070 : leunga 731 *)
1071 : jhr 925 fun split(target, edges) =
1072 :     let val SOME stackIn = A.sub(bindingsIn,target)
1073 :     fun log(s, t, e) =
1074 :     let val SOME stackOut = A.sub(bindingsOut,s)
1075 : leunga 731 in pr("SPLIT "^i2s s^"->"^i2s t^" "^
1076 :     ST.stackToString stackOut^"->"^
1077 :     ST.stackToString stackIn^"\n")
1078 :     end
1079 :     val _ = if debug andalso !traceOn then app log edges else ()
1080 : jhr 925 val targetBlk = #node_info cfg target
1081 :     in if ST.depth stackIn = 0 then genPoppingCode(targetBlk,edges)
1082 :     else genRepairCode(target, targetBlk, stackIn, edges)
1083 : leunga 731 end
1084 :    
1085 : jhr 925
1086 :     (*
1087 :     * Create a new empty cfg with the same graph info as the old one.
1088 :     *)
1089 :     val Cfg' as G.GRAPH cfg' = CFG.cfg (#graph_info cfg)
1090 :    
1091 :     (*
1092 :     * Renumber all the blocks and insert compensation code at the
1093 : leunga 731 * right places.
1094 :     *)
1095 :     fun renumberBlocks() =
1096 : jhr 925 let (* Mapping from label to new node ids *)
1097 :     val labelMap = HashTable.mkTable (L.hash,L.same) (32, Nothing)
1098 :     val mapLabelToId = HashTable.insert labelMap
1099 : leunga 731
1100 : jhr 925 (* Mapping from old id to new id *)
1101 :     val idMap = IntHashTable.mkTable (32, Nothing)
1102 :     val mapOldIdToNewId = IntHashTable.insert idMap
1103 :     val oldIdToNewId = IntHashTable.lookup idMap
1104 : leunga 731
1105 : jhr 925 (* Retarget a jump instruction to label l *)
1106 : george 1003 fun retargetJump(I.INSTR(I.JMP(I.ImmedLabel(T.LABEL _), [_])), l) =
1107 :     I.jmp(I.ImmedLabel(T.LABEL l), [l])
1108 :     | retargetJump(I.INSTR(I.JCC{cond,opnd=I.ImmedLabel(T.LABEL _)}),l)=
1109 :     I.jcc{cond=cond,opnd=I.ImmedLabel(T.LABEL l)}
1110 : jhr 925 | retargetJump(I.ANNOTATION{i,a},l) =
1111 :     I.ANNOTATION{i=retargetJump(i,l),a=a}
1112 :     | retargetJump(_,l) = error "retargetJump"
1113 : leunga 731
1114 : jhr 925 (*
1115 :     * Given a candidate edge, generate compensation code.
1116 :     *)
1117 :     fun transRepair(n, []) = n
1118 :     | transRepair(n, NEWEDGE{label,entries,code,comment}::rest) =
1119 :     let val this =
1120 :     CFG.BLOCK{id=n,
1121 :     kind=CFG.NORMAL,
1122 :     freq=ref 0, (* XXX Wrong frequency! *)
1123 :     labels=ref [label],
1124 :     insns=ref code,
1125 : george 984 annotations=ref comment,
1126 :     align=ref NONE
1127 : jhr 925 }
1128 :    
1129 :     (*
1130 :     * Update the instructions to predecessors of this edge.
1131 :     *)
1132 :     fun retarget(CFG.BLOCK{kind=CFG.START,...}) = ()
1133 :     | retarget(CFG.BLOCK{insns as ref(jmp::rest), ...}) =
1134 :     insns := retargetJump(jmp, label)::rest
1135 :     | retarget _ = error "retarget"
1136 :    
1137 :     fun retargetEntries(pred,_,CFG.EDGE{a,...}) =
1138 :     (retarget(#node_info cfg pred);
1139 :     a := TargetMovedTo n :: !a
1140 :     )
1141 :    
1142 : leunga 731 in if debug andalso !traceOn then
1143 :     pr("Inserting critical edge at block "^i2s n^" "^
1144 : jhr 925 L.toString label^"\n")
1145 : leunga 731 else ();
1146 : jhr 925 #add_node cfg' (n, this); (* insert block *)
1147 :     mapLabelToId(label, n);
1148 :     app retargetEntries (!entries);
1149 :     transRepair(n+1, rest)
1150 : leunga 731 end
1151 :    
1152 : jhr 925 (*
1153 :     * Renumber all the blocks and insert repair code.
1154 :     *)
1155 :     fun renumber(n, [], repairCode') = transRepair(n, repairCode')
1156 :     | renumber(n, (blknum, block as
1157 : george 984 CFG.BLOCK{kind,annotations,insns,freq,align,labels, ...})::rest,
1158 : jhr 925 repairCode') =
1159 : leunga 731 let (* If we have outstanding repair code and this is
1160 :     * NOT a fallsthru entry, then insert them here.
1161 :     *)
1162 : jhr 925 val (n, repairCode') =
1163 : leunga 731 case repairCode' of
1164 : jhr 925 [] => (n, [])
1165 :     | _ => if isFallsThru blknum then
1166 :     (n, repairCode')
1167 : leunga 731 else
1168 : jhr 925 (transRepair(n, repairCode'), [])
1169 : leunga 731
1170 :     (* Insert non-relocatable repair code *)
1171 :     val repairCode = lookupRepairCode blknum
1172 : jhr 925 val n = transRepair(n, repairCode)
1173 : leunga 731
1174 :     (* Create this block *)
1175 : jhr 925 val this = CFG.BLOCK{id=n,
1176 :     kind=kind,
1177 :     freq=freq,
1178 : george 984 align=align,
1179 : jhr 925 labels=labels,
1180 :     insns=insns,
1181 :     annotations=annotations
1182 :     }
1183 : leunga 731
1184 :     (* Insert new relocatable repair code *)
1185 :     val repairCode' = repairCode' @
1186 :     lookupRepairCode' blknum
1187 :    
1188 : jhr 925 (* Insert labels that map to this block *)
1189 :     val _ = app (fn l => mapLabelToId(l, n)) (!labels)
1190 : leunga 731
1191 : jhr 925 (* Insert block *)
1192 :     val _ = #add_node cfg' (n, this)
1193 :     val _ = mapOldIdToNewId(blknum, n)
1194 : leunga 731
1195 : jhr 925 in case kind of
1196 :     CFG.START => #set_entries cfg' [n]
1197 :     | CFG.STOP => #set_exits cfg' [n]
1198 :     | _ => ();
1199 :     renumber(n+1, rest, repairCode')
1200 :     end
1201 : leunga 731
1202 : jhr 925 (* Do all the blocks *)
1203 :     val n = renumber(0, #nodes cfg (), [])
1204 : leunga 731
1205 : jhr 925 val [newExit] = #exits cfg' ()
1206 : leunga 731
1207 : jhr 925 (*
1208 :     * Given a label, finds out which block it targets.
1209 :     * If not found then it means the block is escaping.
1210 :     *)
1211 :     val findLabel = HashTable.find labelMap
1212 :     fun labelToBlockId l = getOpt(findLabel l, newExit)
1213 : leunga 731
1214 : jhr 925 fun hasJump x =
1215 :     let val CFG.BLOCK{insns, ...} = #node_info cfg' x
1216 :     in case !insns of
1217 :     [] => false
1218 :     | jmp::_ => P.instrKind jmp = P.IK_JUMP
1219 :     end
1220 : leunga 731
1221 : jhr 925 (*
1222 :     * Now rebuild all the old edges.
1223 :     * For each edge, makes sure the target hasn't been moved.
1224 :     *)
1225 :     fun renameEdge(x, y, e as CFG.EDGE{a,k,w,...}) =
1226 :     let val x = oldIdToNewId x
1227 :     val (z, e) =
1228 :     case !a of
1229 :     TargetMovedTo z::an =>
1230 :     let val e =
1231 :     case k of
1232 :     (CFG.FALLSTHRU | CFG.BRANCH false) =>
1233 :     if hasJump x then
1234 :     CFG.EDGE{a=a, w=w, k=CFG.JUMP}
1235 :     else e
1236 :     | _ => e
1237 :     in a := an; (* remove the marker *)
1238 :     (z, e)
1239 :     end
1240 :     | _ => (oldIdToNewId y, e)
1241 :     in #add_edge cfg' (x,z,e)
1242 :     end
1243 :    
1244 :     val _ = #forall_edges cfg renameEdge
1245 : leunga 731
1246 : jhr 925 (*
1247 :     * Now add new edges x->y where x is a new compensation block
1248 :     *)
1249 :     fun addNewEdge(NEWEDGE{label, code, entries, ...}) =
1250 :     let val x = labelToBlockId label
1251 :     val (y, k) =
1252 :     case code of
1253 :     [] => (x + 1, CFG.FALLSTHRU) (* next block *)
1254 :     | jmp::_ =>
1255 :     if P.instrKind jmp = P.IK_JUMP then
1256 :     (case P.branchTargets jmp of
1257 :     [P.LABELLED l] => (labelToBlockId l, CFG.JUMP)
1258 :     | _ => error "addNewEdge where is the target?"
1259 :     )
1260 :     else
1261 :     (x + 1, CFG.FALLSTHRU)
1262 :     (* compute weight *)
1263 :     val w = List.foldr (fn ((_,_,CFG.EDGE{w,...}),n) => !w+n)
1264 :     0 (!entries)
1265 :     in #add_edge cfg' (x, y, CFG.EDGE{a=ref [], w=ref w, k=k})
1266 :     end
1267 : leunga 731
1268 : jhr 925 val addNewEdges = app addNewEdge
1269 :     val _ = IntHashTable.app addNewEdges repairCodeTable
1270 :     val _ = IntHashTable.app addNewEdges repairCodeTable'
1271 : leunga 731
1272 : jhr 925 in Cfg'
1273 : leunga 731 end
1274 :    
1275 : jhr 925 in IntHashTable.appi split edgesToSplit;
1276 : leunga 744 renumberBlocks()
1277 : jhr 925 end
1278 : leunga 731
1279 :     (*------------------------------------------------------------------
1280 : jhr 925 * Process all blocks which are not the entry or the exit
1281 : leunga 731 *------------------------------------------------------------------*)
1282 : jhr 925 val stamp = ref 0
1283 :     fun rewriteAllBlocks (_, CFG.BLOCK{kind=CFG.START, ...}) = ()
1284 :     | rewriteAllBlocks (_, CFG.BLOCK{kind=CFG.STOP, ...}) = ()
1285 :     | rewriteAllBlocks
1286 :     (blknum, block as CFG.BLOCK{insns, labels, annotations, ...}) =
1287 :     let val _ =
1288 :     if debug andalso !debugOn then
1289 :     app (fn l => pr(L.toString l^":\n")) (!labels)
1290 :     else ();
1291 :     val liveIn = HT.lookup liveInTable blknum
1292 :     val liveOut = HT.lookup liveOutTable blknum
1293 :     val st = rewrite(!stamp, blknum, block,
1294 : leunga 731 insns, liveIn, liveOut,
1295 : jhr 925 annotations)
1296 :     in stamp := st (* update stamp *)
1297 : leunga 731 end
1298 :    
1299 :     (*------------------------------------------------------------------
1300 :     * Translate code within a basic block.
1301 :     * Each instruction is given a unique stamp for identifying last
1302 :     * uses.
1303 :     *------------------------------------------------------------------*)
1304 :     and rewrite(stamp, blknum, block, insns, liveIn, liveOut,
1305 : jhr 925 annotations) =
1306 :     let val (stackIn, stack, code) = shuffleIn(blknum, block, liveIn)
1307 : leunga 731
1308 :     (* Dump instructions when encountering a bug *)
1309 :     fun bug msg =
1310 :     (pr("-------- bug in block "^i2s blknum^" ----\n");
1311 :     dump(!insns);
1312 :     error msg
1313 :     )
1314 :    
1315 :     fun loop(stamp, [], [], code) = (stamp, code)
1316 :     | loop(stamp, instr::rest, (lastUse,dead)::lastUses, code) =
1317 :     let fun mark(tbl, []) = ()
1318 :     | mark(tbl, r::rs) =
1319 : george 889 (A.update(tbl, CB.registerNum r, stamp); mark(tbl, rs))
1320 : leunga 731 in mark(lastUseTbl,lastUse); (* mark all last uses *)
1321 :     trans(stamp, instr, [], rest, dead, lastUses, code)
1322 :     end
1323 :     | loop _ = error "loop"
1324 :    
1325 :     (*
1326 :     * Main routine that does the actual translation.
1327 :     * A few reminders:
1328 :     * o The instructions are processed in normal order
1329 :     * and generated in the reversed order.
1330 :     * o (Local) liveness is computed at the same time.
1331 :     * o For each use, we have to find out whether it is
1332 :     * the last use. If so, we can kill it and reclaim
1333 :     * the stack entry at the same time.
1334 :     *)
1335 :     and trans(stamp, instr, an, rest, dead, lastUses, code) =
1336 :     let (* Call this continuation when done with code generation *)
1337 :     fun FINISH code = loop(stamp+1, rest, lastUses, code)
1338 :    
1339 :     (* Call this continuation when done with floating point
1340 :     * code generation. Remove all dead code first.
1341 :     *)
1342 :     fun DONE code =
1343 :     let fun kill([], code) = FINISH code
1344 :     | kill(f::fs, code) =
1345 : george 889 let val fx = CB.registerNum f
1346 : leunga 744 in if debug andalso debugDead then
1347 :     pr("DEAD "^fregToString f^" in "^
1348 :     ST.stackToString stack^"\n")
1349 :     else ();
1350 :     (* not a physical register *)
1351 :     if fx >= 8 then kill(fs, code)
1352 :     else
1353 :     let val i = ST.fp(stack, fx)
1354 :     in if debug andalso debugDead then
1355 :     pr("KILLING "^fregToString f^
1356 :     "=%st("^i2s i^")\n")
1357 :     else ();
1358 :     if i < 0 then kill(fs, code) (* dead already *)
1359 :     else if i = 0 then
1360 :     (ST.pop stack; kill(fs, POP_ST::code))
1361 :     else
1362 :     (ST.xch(stack,0,i); ST.pop stack;
1363 : george 1003 kill(fs, I.fstpl(ST i)::code)
1364 : leunga 744 )
1365 :     end
1366 :     end
1367 : leunga 731 in kill(dead, code)
1368 :     end
1369 :    
1370 :     (* Is this the last use of register f? *)
1371 :     fun isLastUse f = A.sub(lastUseTbl, f) = stamp
1372 :    
1373 :     (* Is this value dead? *)
1374 :     fun isDead f =
1375 :     let fun loop [] = false
1376 : george 889 | loop(r::rs) = CB.sameColor(f,r) orelse loop rs
1377 : leunga 731 in loop dead end
1378 :    
1379 :     (* Dump the stack before each intruction for debugging *)
1380 :     fun log() = if debug andalso !traceOn then
1381 :     pr(ST.stackToString stack^assemble instr^"...\n")
1382 :     else ()
1383 :    
1384 :     (* Find the location of a source register *)
1385 :     fun getfs(f) =
1386 : george 889 let val fx = CB.registerNum f
1387 : leunga 744 val s = ST.fp(stack, fx)
1388 :     in (isLastUse fx,s) end
1389 : leunga 731
1390 :     (* Generate memory to memory move *)
1391 :     fun mmmove(fsize,src,dst) =
1392 :     let val _ = ST.nonFull stack
1393 :     val code = FLD(fsize,src)::code
1394 :     val code = mark(FSTP(fsize,dst),an)::code
1395 :     in DONE code end
1396 :    
1397 :     (* Allocate a new register in %st(0) *)
1398 : george 889 fun alloc(f,code) = (ST.push(stack,CB.registerNum f); code)
1399 : leunga 731
1400 :     (* register -> register move *)
1401 :     fun rrmove(fs,fd) =
1402 : george 889 if CB.sameColor(fs,fd) then DONE code
1403 : leunga 731 else
1404 : leunga 744 let val (dead,ss) = getfs fs
1405 : leunga 731 in if dead then (* fs is dead *)
1406 : george 889 (ST.set(stack,ss,CB.registerNum fd); (* rename fd to fs *)
1407 : leunga 731 DONE code (* no code is generated *)
1408 :     )
1409 :     else (* fs is not dead; push it onto %st(0);
1410 :     * set fd to %st(0)
1411 :     *)
1412 :     let val code = alloc(fd, code)
1413 : george 1003 in DONE(mark(I.fldl(ST ss),an)::code)
1414 : leunga 731 end
1415 :     end
1416 :    
1417 :     (* memory -> register move.
1418 :     * Do dead code elimination here.
1419 :     *)
1420 :     fun mrmove(fsize,src,fd) =
1421 : leunga 744 if isDead fd
1422 : leunga 731 then FINISH code (* value has been killed *)
1423 :     else
1424 :     let val code = alloc(fd, code)
1425 :     in DONE(mark(FLD(fsize,src),an)::code)
1426 :     end
1427 :    
1428 :     (* exchange %st(n) and %st(0) *)
1429 :     fun xch(n) = (ST.xch(stack,0,n); FXCH n)
1430 :    
1431 :     (* push %st(n) onto the stack *)
1432 : george 1003 fun push(n) = (ST.push(stack,~2); I.fldl(ST n))
1433 : leunga 731
1434 :    
1435 :     (* push mem onto the stack *)
1436 : george 1003 fun pushmem(src) = (ST.push(stack,~2); I.fldl(src))
1437 : leunga 731
1438 :     (* register -> memory move.
1439 :     * Use pop version of the opcode if it is the last use.
1440 :     *)
1441 :     fun rmmove(fsize,fs,dst) =
1442 :     let fun fstp(code) =
1443 :     (ST.pop stack; DONE(mark(FSTP(fsize,dst),an)::code))
1444 :     fun fst(code) = DONE(mark(FST(fsize,dst),an)::code)
1445 :     in case getfs fs of
1446 :     (true, 0) => fstp code
1447 :     | (true, n) => fstp(xch n::code)
1448 :     | (false, 0) => fst(code)
1449 :     | (false, n) => fst(xch n::code)
1450 :     end
1451 :    
1452 :     (* Floating point move *)
1453 :     fun fmove{fsize,src=I.FPR fs,dst=I.FPR fd} = rrmove(fs,fd)
1454 :     | fmove{fsize,src,dst=I.FPR fd} = mrmove(fsize,src,fd)
1455 :     | fmove{fsize,src=I.FPR fs,dst} = rmmove(fsize,fs,dst)
1456 :     | fmove{fsize,src,dst} = mmmove(fsize,src,dst)
1457 :    
1458 :     (* Floating point integer load operator *)
1459 :     fun fiload{isize,ea,dst=I.FPR fd} =
1460 : leunga 744 let val code = alloc(fd, code)
1461 : leunga 731 val code = mark(FILD(isize,ea),an)::code
1462 :     in DONE code
1463 :     end
1464 :     | fiload{isize,ea,dst} =
1465 :     let val code = mark(FILD(isize,ea),an)::code
1466 : george 1003 val code = I.fstpl(dst)::code (* XXX *)
1467 : leunga 731 in DONE code
1468 :     end
1469 :    
1470 :     (* Make a copy of register fs to %st(0). *)
1471 :     fun moveregtotop(fs, code) =
1472 :     (case getfs fs of
1473 :     (true, 0) => code
1474 :     | (true, n) => xch n::code
1475 :     | (false, n) => push n::code
1476 :     )
1477 :    
1478 :     fun movememtotop(fsize, mem, code) =
1479 :     (ST.push(stack, ~2); FLD(fsize, mem)::code)
1480 :    
1481 :     (* Move an operand to top of stack *)
1482 :     fun movetotop(fsize, I.FPR fs, code) = moveregtotop(fs, code)
1483 :     | movetotop(fsize, mem, code) = movememtotop(fsize, mem, code)
1484 :    
1485 :     fun storeResult(fsize, dst, n, code) =
1486 :     case dst of
1487 : george 889 I.FPR fd => (ST.set(stack, n, CB.registerNum fd); DONE code)
1488 : leunga 731 | mem =>
1489 :     let val code = if n = 0 then code else xch n::code
1490 :     in ST.pop stack; DONE(FSTP(fsize, mem)::code) end
1491 :    
1492 :     (* Floating point unary operator *)
1493 :     fun funop{fsize,unOp,src,dst} =
1494 :     let val code = movetotop(fsize, src, code)
1495 : george 1003 val code = mark(I.funary unOp,an)::code
1496 : leunga 731
1497 :     (* Moronic hack to deal with partial tangent! *)
1498 :     val code =
1499 :     case unOp of
1500 :     I.FPTAN =>
1501 :     (if ST.depth stack >= 7 then error "FPTAN"
1502 :     else ();
1503 :     POP_ST::code (* pop the useless 1.0 *)
1504 :     )
1505 :     | _ => code
1506 :     in storeResult(fsize, dst, 0, code)
1507 :     end
1508 :    
1509 :     (* Floating point binary operator.
1510 :     * Note:
1511 :     * binop src, dst
1512 :     * means dst := dst binop src
1513 :     * (lsrc := lsrc binop rsrc)
1514 :     * on the x86
1515 :     *)
1516 :     fun fbinop{fsize,binOp,lsrc,rsrc,dst} =
1517 :     let (* generate code and set %st(n) = fd *)
1518 :     (* op2 := op1 - op2 *)
1519 :     fun oper(binOp,op1,op2,n,code) =
1520 :     let val code =
1521 : george 1003 mark(I.fbinary{binOp=binOp,src=op1,dst=op2},an)
1522 : leunga 731 ::code
1523 :     in storeResult(I.FP64, dst, n, code)
1524 :     end
1525 :    
1526 :     fun operR(binOp,op1,op2,n,code) =
1527 :     oper(invert binOp,op1,op2,n,code)
1528 :    
1529 :     fun operP(binOp,op1,op2,n,code) =
1530 :     (ST.pop stack; oper(pop binOp,op1,op2,n-1,code))
1531 :    
1532 :     fun operRP(binOp,op1,op2,n,code) =
1533 :     (ST.pop stack; operR(pop binOp,op1,op2,n-1,code))
1534 :    
1535 :     (* Many special cases to consider.
1536 :     * Basically, try to reuse stack space as
1537 :     * much as possible by taking advantage of last uses.
1538 :     *
1539 :     * Stack=[st(0)=3.0 st(1)=2.0]
1540 :     * fsub %st(1), %st [1,2.0]
1541 :     * fsubr %st(1), %st [-1,2.0]
1542 :     * fsub %st, %st(1) [3.0,1.0]
1543 :     * fsubr %st, %st(1) [3.0,-1.0]
1544 :     *
1545 :     * fsubp %st, %st(1) [1]
1546 :     * fsubrp %st, %st(1) [-1]
1547 :     * So,
1548 :     * fsub %st(n), %st (means %st - %st(n) -> %st)
1549 :     * fsub %st, %st(n) (means %st - %st(n) -> %st(n))
1550 :     * fsubr %st(n), %st (means %st(n) - %st -> %st)
1551 :     * fsubr %st, %st(n) (means %st(n) - %st -> %st(n))
1552 :     *)
1553 :     fun reg2(fx, fy) =
1554 :     let val (dx, sx) = getfs fx
1555 :     val (dy, sy) = getfs fy
1556 :     fun loop(dx, sx, dy, sy, code) =
1557 :     (* op1, op2 (dst) *)
1558 :     case (dx, sx, dy, sy) of
1559 :     (true, 0, false, n) => oper(binOp,ST n,ST0,0,code)
1560 :     | (false, n, true, 0) => operR(binOp,ST n,ST0,0,code)
1561 :     | (true, n, true, 0) => operRP(binOp,ST0,ST n,n,code)
1562 :     | (true, 0, true, n) => operP(binOp,ST0,ST n,n,code)
1563 :     | (false, 0, true, n) => oper(binOp,ST0,ST n,n,code)
1564 :     | (true, n, false, 0) => operR(binOp,ST0,ST n,n,code)
1565 :     | (true, sx, dy, sy) =>
1566 :     loop(true, 0, dy, sy, xch sx::code)
1567 :     | (dx, sx, true, sy) =>
1568 :     loop(dx, sx, true, 0, xch sy::code)
1569 :     | (false, sx, false, sy) =>
1570 :     loop(true, 0, false, sy+1, push sx::code)
1571 :     in if sx = sy then (* same register *)
1572 :     let val code =
1573 :     case (dx, sx) of
1574 :     (true, 0) => code
1575 :     | (true, n) => xch n::code
1576 :     | (false, n) => push n::code
1577 :     in oper(binOp,ST0,ST0,0,code)
1578 :     end
1579 :     else loop(dx, sx, dy, sy, code)
1580 :     end
1581 :    
1582 :     (* reg/mem operands *)
1583 :     fun regmem(binOp, fx, mem) =
1584 :     case getfs fx of
1585 :     (true, 0) => oper(binOp,mem,ST0,0,code)
1586 :     | (true, n) => oper(binOp,mem,ST0,0,xch n::code)
1587 :     | (false, n) => oper(binOp,mem,ST0,0,push n::code)
1588 :    
1589 :     (* Two memory operands. Optimize the case when
1590 :     * the two operands are identical.
1591 :     *)
1592 :     fun mem2(lsrc, rsrc) =
1593 :     let val _ = ST.push(stack,~2)
1594 :     val code = FLD(fsize,lsrc)::code
1595 :     val rsrc = if P.eqOpn(lsrc, rsrc) then ST0 else rsrc
1596 :     in oper(binOp,rsrc,ST0,0,code)
1597 :     end
1598 :    
1599 :     fun process(I.FPR fx, I.FPR fy) = reg2(fx, fy)
1600 :     | process(I.FPR fx, mem) = regmem(binOp, fx, mem)
1601 :     | process(mem, I.FPR fy) = regmem(invert binOp, fy, mem)
1602 :     | process(lsrc, rsrc) = mem2(lsrc, rsrc)
1603 :    
1604 :     in process(lsrc, rsrc)
1605 :     end
1606 :    
1607 :     (* Floating point binary operator with integer conversion *)
1608 :     fun fibinop{isize,binOp,lsrc,rsrc,dst} =
1609 :     let fun oper(binOp,src,code) =
1610 : george 1003 let val code = mark(I.fibinary{binOp=binOp,src=src},an)
1611 : leunga 731 ::code
1612 :     in storeResult(I.FP64, dst, 0, code)
1613 :     end
1614 :    
1615 :     fun regmem(binOp, fx, mem) =
1616 :     case getfs fx of
1617 :     (true, 0) => oper(binOp, mem, code)
1618 :     | (true, n) => oper(binOp, mem, xch n::code)
1619 :     | (false, n) => oper(binOp, mem, push n::code)
1620 :    
1621 :     in case (lsrc, rsrc) of
1622 :     (I.FPR fx, mem) => regmem(binOp, fx, mem)
1623 :     | (lsrc, rsrc) => oper(binOp, rsrc, pushmem lsrc::code)
1624 :     end
1625 :    
1626 :     (* Floating point comparison
1627 :     * We have to make sure there are enough registers.
1628 :     * The trick is that tmp is always a physical register.
1629 :     * So we can always use it as temporary space if we
1630 :     * have run out.
1631 :     *)
1632 :     fun fcmp{fsize,lsrc,rsrc} =
1633 :     let fun fucompp() =
1634 : george 1003 (ST.pop stack; ST.pop stack; mark(I.fucompp,an))
1635 : leunga 731 fun fucomp(n) =
1636 : george 1003 (ST.pop stack; mark(I.fucomp(ST n),an))
1637 :     fun fucom(n) = mark(I.fucom(ST n),an)
1638 : leunga 731
1639 :     fun genmemcmp() =
1640 :     let val code = movememtotop(fsize, rsrc, code)
1641 :     val code = movememtotop(fsize, lsrc, code)
1642 :     in FINISH(fucompp()::code)
1643 :     end
1644 :    
1645 :     fun genmemregcmp(lsrc, fy) =
1646 :     case getfs fy of
1647 :     (false, n) =>
1648 :     let val code = movememtotop(fsize, lsrc, code)
1649 :     in FINISH(fucomp(n+1)::code) end
1650 :     | (true, n) =>
1651 :     let val code = if n = 0 then code else xch n::code
1652 :     val code = movememtotop(fsize, lsrc, code)
1653 :     in FINISH(fucompp()::code)
1654 :     end
1655 :    
1656 :     fun genregmemcmp(fx, rsrc) =
1657 :     let val code =
1658 :     case getfs fx of
1659 :     (true, n) =>
1660 :     let val code = if n = 0 then code
1661 :     else xch n::code
1662 :     val code = movememtotop(fsize, rsrc, code)
1663 :     in xch 1::code end
1664 :     | (false, n) =>
1665 :     let val code = movememtotop(fsize, rsrc, code)
1666 :     in push(n+1)::code
1667 :     end
1668 :     in FINISH(fucompp()::code)
1669 :     end
1670 :    
1671 :     (* Deal with the special case when both sources are
1672 :     * in the same register
1673 :     *)
1674 :     fun regsame(dx, sx) =
1675 :     let val (code, cmp) =
1676 :     case (dx, sx) of
1677 :     (true, 0) => (code, fucomp 0) (* pop once! *)
1678 :     | (false, 0) => (code, fucom 0) (* don't pop! *)
1679 :     | (true, n) => (xch n::code, fucomp 0)
1680 :     | (false, n) => (xch n::code, fucom 0)
1681 :     in FINISH(cmp::code) end
1682 :    
1683 :     fun reg2(fx, fy) =
1684 :     (* special case is when things are already in place.
1685 :     * Note: should also generate FUCOM and FUCOMP!!!
1686 :     *)
1687 :     let val (dx, sx) = getfs fx
1688 :     val (dy, sy) = getfs fy
1689 :     fun fstp(n) =
1690 : george 1003 (ST.xch(stack,n,0); ST.pop stack; I.fstpl(ST n))
1691 : leunga 731 in if sx = sy then regsame(dx, sx) (* same register!*)
1692 :     else
1693 :     (* first, move sx to %st(0) *)
1694 :     let val (sy, code) =
1695 :     if sx = 0 then (sy, code) (* there already *)
1696 :     else (if sy = 0 then sx else sy,
1697 :     xch sx::code)
1698 :    
1699 :     (* Generate the appropriate comparison op *)
1700 :     val (sy, cmp, popY) =
1701 :     case (dx, dy, sy) of
1702 :     (true, true, 0) => (~1, fucompp(), false)
1703 :     | (true, _, _) => (sy-1, fucomp sy, dy)
1704 :     | (false, _, _) => (sy, fucom sy, dy)
1705 :    
1706 :     val code = cmp::code
1707 :    
1708 :     (* Pop fy if it is dead and hasn't already
1709 :     * been popped.
1710 :     *)
1711 :     val code = if popY then fstp sy::code else code
1712 :     in FINISH code
1713 :     end
1714 :     end
1715 :    
1716 :     in case (lsrc, rsrc) of
1717 :     (I.FPR x, I.FPR y) => reg2(x, y)
1718 :     | (I.FPR x, mem) => genregmemcmp(x, mem)
1719 :     | (mem, I.FPR y) => genmemregcmp(mem, y)
1720 :     | _ => genmemcmp()
1721 :     end
1722 :    
1723 :    
1724 :     fun prCopy(dst, src) =
1725 :     ListPair.app(fn (fd, fs) =>
1726 : leunga 744 pr(fregToString(fd)^"<-"^fregToString fs^" "))
1727 : leunga 731 (dst, src)
1728 :    
1729 :     (* Parallel copy magic.
1730 :     * For each src registers, we find out
1731 :     * 1. whether it is the last use, and if so,
1732 :     * 2. whether it is used more than once.
1733 :     * If a source is a last and unique use, then we
1734 :     * can simply rename it to appropriate destination register.
1735 :     *)
1736 : george 1009 fun fcopy(I.COPY{dst,src,tmp,...}) =
1737 : leunga 731 let fun loop([], [], copies, renames) = (copies, renames)
1738 :     | loop(fd::fds, fs::fss, copies, renames) =
1739 : george 889 let val fsx = CB.registerNum fs
1740 : leunga 744 in if isLastUse fsx then
1741 :     if A.sub(useTbl,fsx) <> stamp
1742 :     (* unused *)
1743 :     then (A.update(useTbl,fsx,stamp);
1744 : leunga 731 loop(fds, fss, copies,
1745 : george 889 if CB.sameColor(fd,fs) then renames
1746 : leunga 731 else (fd, fs)::renames)
1747 : leunga 744 )
1748 : leunga 731 else loop(fds, fss, (fd, fs)::copies, renames)
1749 :     else loop(fds, fss, (fd, fs)::copies, renames)
1750 :     end
1751 :     | loop _ = error "fcopy.loop"
1752 :    
1753 :     (* generate code for the copies *)
1754 :     fun genCopy([], code) = code
1755 :     | genCopy((fd, fs)::copies, code) =
1756 : george 889 let val ss = ST.fp(stack, CB.registerNum fs)
1757 :     val _ = ST.push(stack, CB.registerNum fd)
1758 : george 1003 val code = I.fldl(ST ss)::code
1759 : leunga 731 in genCopy(copies, code) end
1760 :    
1761 :     (* perform the renaming; it must be done in parallel! *)
1762 :     fun renaming(renames) =
1763 : leunga 744 let val ss = map (fn (_,fs) =>
1764 : george 889 ST.fp(stack,CB.registerNum fs)) renames
1765 : leunga 744 in ListPair.app (fn ((fd,_),ss) =>
1766 : george 889 ST.set(stack,ss,CB.registerNum fd))
1767 : leunga 731 (renames, ss)
1768 :     end
1769 :    
1770 :     (* val _ = if debug then
1771 :     (ListPair.app (fn (fd, fs) =>
1772 :     pr(fregToString(regmap fd)^"<-"^
1773 :     fregToString(regmap fs)^" ")
1774 :     ) (dst, src);
1775 :     pr "\n")
1776 :     else () *)
1777 :    
1778 :     val (copies, renames) = loop(dst, src, [], [])
1779 :     val code = genCopy(copies, code)
1780 :     in renaming renames;
1781 :     case tmp of
1782 :     SOME(I.FPR f) =>
1783 : leunga 744 (if debug andalso debugDead
1784 :     then pr("KILLING tmp "^fregToString f^"\n")
1785 :     else ();
1786 :     ST.kill(stack, f)
1787 :     )
1788 : leunga 731 | _ => ();
1789 :     DONE code
1790 :     end
1791 :    
1792 : george 895 fun call(instr, return) = let
1793 : george 1003 val code = mark(I.INSTR instr, an)::code
1794 : george 895 val returnSet = SL.return(SL.uniq(getCell return))
1795 :     in
1796 :     case returnSet of
1797 : leunga 815 [] => ()
1798 : george 889 | [r] => ST.push(stack, CB.registerNum r)
1799 : leunga 815 | _ =>
1800 :     error "can't return more than one fp argument (yet)";
1801 :     DONE code
1802 :     end
1803 : george 1003 fun x86trans instr =
1804 :     (case instr
1805 :     of I.FMOVE x => (log(); fmove x)
1806 :     | I.FBINOP x => (log(); fbinop x)
1807 :     | I.FIBINOP x => (log(); fibinop x)
1808 :     | I.FUNOP x => (log(); funop x)
1809 :     | I.FILOAD x => (log(); fiload x)
1810 :     | I.FCMP x => (log(); fcmp x)
1811 : leunga 815
1812 : george 1003 (* handle calling convention *)
1813 :     | I.CALL{return, ...} => (log(); call(instr,return))
1814 : leunga 731
1815 : george 1003 (*
1816 :     * Catch instructions that absolutely
1817 :     * should not have been generated at this point.
1818 :     *)
1819 :     | (I.FLD1 | I.FLDL2E | I.FLDLG2 | I.FLDLN2 | I.FLDPI |
1820 :     I.FLDZ | I.FLDL _ | I.FLDS _ | I.FLDT _ |
1821 :     I.FILD _ | I.FILDL _ | I.FILDLL _ |
1822 :     I.FENV _ | I.FBINARY _ | I.FIBINARY _ | I.FUNARY _ |
1823 :     I.FUCOMPP | I.FUCOM _ | I.FUCOMP _ | I.FCOMPP | I.FXCH _ |
1824 :     I.FSTPL _ | I.FSTPS _ | I.FSTPT _ | I.FSTL _ | I.FSTS _
1825 :     ) => bug("Illegal FP instructions")
1826 : leunga 731
1827 : george 1003 (* Other instructions are untouched *)
1828 :     | instr => FINISH(mark(I.INSTR instr, an)::code)
1829 :     (*esac*))
1830 :     in
1831 :     case instr
1832 :     of I.ANNOTATION{a,i} =>
1833 :     trans(stamp, i, a::an, rest, dead, lastUses, code)
1834 : george 1009 | I.COPY{k=CB.FP, ...} => (log(); fcopy instr)
1835 : george 1003 | I.INSTR instr => x86trans(instr)
1836 : george 1009 | _ => FINISH(mark(instr, an)::code)
1837 : leunga 731 end (* trans *)
1838 :    
1839 :     (*
1840 :     * Check the translation result to see if it matches the original
1841 :     * code.
1842 :     *)
1843 :     fun checkTranslation(stackIn, stackOut, insns) =
1844 :     let val n = ref(ST.depth stackIn)
1845 :     fun push() = n := !n + 1
1846 :     fun pop() = n := !n - 1
1847 : george 1003 fun scan(I.INSTR(I.FBINARY{binOp, ...})) =
1848 : leunga 731 (case binOp of
1849 :     ( I.FADDP | I.FSUBP | I.FSUBRP | I.FMULP
1850 :     | I.FDIVP | I.FDIVRP) => pop()
1851 :     | _ => ()
1852 :     )
1853 : george 1003 | scan(I.INSTR(I.FIBINARY{binOp, ...})) = ()
1854 :     | scan(I.INSTR(I.FUNARY I.FPTAN)) = push()
1855 :     | scan(I.INSTR(I.FUNARY _)) = ()
1856 :     | scan(I.INSTR(I.FLDL(I.ST n))) = push()
1857 :     | scan(I.INSTR(I.FLDL mem)) = push()
1858 :     | scan(I.INSTR(I.FLDS mem)) = push()
1859 :     | scan(I.INSTR(I.FLDT mem)) = push()
1860 :     | scan(I.INSTR(I.FSTL(I.ST n))) = ()
1861 :     | scan(I.INSTR(I.FSTPL(I.ST n))) = pop()
1862 :     | scan(I.INSTR(I.FSTL mem)) = ()
1863 :     | scan(I.INSTR(I.FSTS mem)) = ()
1864 :     | scan(I.INSTR(I.FSTPL mem)) = pop()
1865 :     | scan(I.INSTR(I.FSTPS mem)) = pop()
1866 :     | scan(I.INSTR(I.FSTPT mem)) = pop()
1867 :     | scan(I.INSTR(I.FXCH{opnd=i,...})) = ()
1868 :     | scan(I.INSTR(I.FUCOM _)) = ()
1869 :     | scan(I.INSTR(I.FUCOMP _)) = pop()
1870 :     | scan(I.INSTR(I.FUCOMPP)) = (pop(); pop())
1871 :     | scan(I.INSTR(I.FILD mem)) = push()
1872 :     | scan(I.INSTR(I.FILDL mem)) = push()
1873 :     | scan(I.INSTR(I.FILDLL mem)) = push()
1874 : leunga 731 | scan _ = ()
1875 :     val _ = app scan (rev insns);
1876 :     val n = !n
1877 :     val m = ST.depth stackOut
1878 : george 1003 in
1879 :     if n <> m then
1880 : leunga 731 (dump(insns);
1881 :     bug("Bad translation n="^i2s n^ " expected="^i2s m^"\n")
1882 :     )
1883 :     else ()
1884 :     end
1885 :    
1886 :    
1887 :     (* Dump the initial code *)
1888 :     val _ = if debug andalso !debugOn then
1889 :     (pr("-------- block "^i2s blknum^" ----"^
1890 : jhr 925 celllistToString liveIn^" "^
1891 : leunga 731 ST.stackToString stackIn^"\n");
1892 : jhr 925 dump (!insns);
1893 :     pr("succ=");
1894 :     app (fn b => pr(i2s b^" ")) (#succ cfg blknum);
1895 :     pr("\n")
1896 : leunga 731 )
1897 :     else ()
1898 :    
1899 :     (* Compute the last uses *)
1900 :     val lastUse = computeLastUse(blknum, insns, liveOut)
1901 :    
1902 :     (* Rewrite the code *)
1903 :     val (stamp, insns') = loop(stamp, rev(!insns), lastUse, code)
1904 :    
1905 :     (* Insert shuffle code at the end if necessary *)
1906 : jhr 925 val insns' = shuffleOut(stack, insns', blknum, block, liveOut)
1907 : leunga 731
1908 :     (* Dump translation *)
1909 :     val _ = if debug andalso !debugOn then
1910 :     (pr("-------- translation "^i2s blknum^"----"^
1911 : jhr 925 celllistToString liveIn^" "^
1912 : leunga 731 ST.stackToString stackIn^"\n");
1913 :     dump insns';
1914 :     pr("-------- done "^i2s blknum^"----"^
1915 : jhr 925 celllistToString liveOut^" "^
1916 : leunga 731 ST.stackToString stack^"\n")
1917 :     )
1918 :     else ()
1919 :    
1920 :     (* Check if things are okay *)
1921 :     val _ = if debug andalso sanityCheck then
1922 :     checkTranslation(stackIn, stack, insns')
1923 :     else ()
1924 :    
1925 :     in insns := insns'; (* update the instructions *)
1926 :     stamp
1927 :     end (* process *)
1928 :    
1929 :     in (* Translate all blocks *)
1930 : jhr 925 stamp := C.firstPseudo;
1931 :     #forall_nodes cfg rewriteAllBlocks;
1932 : leunga 731 (* If we found critical edges, then we have to split them... *)
1933 : jhr 925 if IntHashTable.numItems edgesToSplit = 0 then Cfg
1934 :     else repairCriticalEdges(Cfg)
1935 : leunga 744 end
1936 : leunga 731 end (* functor *)
1937 :    
1938 :     end (* local *)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0