Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86-fp.sml
ViewVC logotype

Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86-fp.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 775 - (view) (download)

1 : leunga 731 (*
2 :     * This phase takes a cluster with pseudo x86 fp instructions, performs
3 :     * liveness analysis to determine their live ranges, and rewrite the
4 :     * program into the correct stack based code.
5 :     *
6 :     * The Basics
7 :     * ----------
8 :     * o We assume there are 7 pseudo fp registers, %fp(0), ..., %fp(6),
9 :     * which are mapped onto the %st stack. One stack location is reserved
10 :     * for holding temporaries.
11 :     * o Important: for floating point comparisons, we actually need
12 :     * two extra stack locations in the worst case. We handle this by
13 :     * specifying that the instruction define an extra temporary fp register
14 :     * when necessary.
15 :     * o The mapping between %fp <-> %st may change from program point to
16 :     * program point. We keep track of this lazy renaming and try to minimize
17 :     * the number of FXCH that we insert.
18 :     * o At split and merge points, we may get inconsistent %fp <-> %st mappings.
19 :     * We handle this by inserting the appropriate renaming code.
20 :     * o Parallel copies (renaming) are rewritten into a sequence of FXCHs!
21 :     *
22 :     * Pseudo fp instructions Semantics
23 :     * --------------------------------------
24 :     * FMOVE src, dst dst := src
25 :     * FILOAD ea, dst dst := cvti2f(mem[ea])
26 :     * FBINOP lsrc, rsrc, dst dst := lsrc * rsrc
27 :     * FIBINOP lsrc, rsrc, dst dst := lsrc * cvti2f(rsrc)
28 :     * FUNOP src, dst dst := unaryOp src
29 :     * FCMP lsrc, rsrc fp condition code := fcmp(lsrc, rsrc)
30 :     *
31 :     * An instruction may use its source operand(s) destructively.
32 :     * We find this info using a global liveness analysis.
33 :     *
34 :     * The Translation
35 :     * ---------------
36 :     * o We keep track of the bindings between %fp registers and the
37 :     * %st(..) staack locations.
38 :     * o FXCH and FLDL are inserted at the appropriate places to move operands
39 :     * to %st(0). FLDL is used if the operand is not dead. FXCH is used
40 :     * if the operand is the last use.
41 :     * o FCOPY's between pseudo %fp registers are done by software renaming
42 :     * and generate no code by itself!
43 :     * o FSTL %st(1) are also generated to pop the stack after the last use
44 :     * of an operand.
45 :     *
46 :     * Note
47 :     * ----
48 :     * 1. This module should be run after floating point register allocation.
49 :     * 2. Due to the extra complication of critical edge splitting, the cellset
50 :     * and frequency info are not preserved.
51 :     *
52 :     * -- Allen Leung (leunga@cs.nyu.edu)
53 :     *)
54 :    
55 :     local
56 :     val debug = false (* set this to true to debug this module
57 :     * set this to false for production use.
58 :     *)
59 :     val debugLiveness = false (* debug liveness analysis *)
60 :     val debugDead = false (* debug dead code removal *)
61 :     val sanityCheck = false
62 :     in
63 :     functor X86FP
64 :     (structure X86Instr : X86INSTR
65 :     structure X86Props : INSN_PROPERTIES where I = X86Instr
66 :     structure Flowgraph : FLOWGRAPH where I = X86Instr
67 :     structure Liveness : LIVENESS where F = Flowgraph
68 :     structure Asm : INSTRUCTION_EMITTER where I = X86Instr
69 :     sharing Flowgraph.P = Asm.P
70 :     ) : CLUSTER_OPTIMIZATION =
71 :     struct
72 :     structure F = Flowgraph
73 :     structure I = X86Instr
74 : leunga 775 structure T = I.T
75 : leunga 731 structure P = X86Props
76 :     structure C = I.C
77 :     structure A = Array
78 :     structure L = Label
79 :     structure LE = I.LabelExp
80 :     structure An = Annotations
81 : leunga 744 structure SL = C.SortedCells
82 : leunga 731
83 :     type flowgraph = F.cluster
84 :     type an = An.annotations
85 :    
86 :     val name = "X86 floating point rewrite"
87 :    
88 :     val debugOn = MLRiscControl.getFlag "x86-fp-debug"
89 :     val traceOn = MLRiscControl.getFlag "x86-fp-trace"
90 :    
91 :     fun error msg = MLRiscErrorMsg.error("X86FP",msg)
92 :     fun pr msg = TextIO.output(!MLRiscControl.debug_stream,msg)
93 :    
94 :     val i2s = Int.toString
95 :    
96 :     (*
97 :     * No overflow checking is needed for integer arithmetic in this module
98 :     *)
99 :     fun x + y = Word.toIntX(Word.+(Word.fromInt x, Word.fromInt y))
100 :     fun x - y = Word.toIntX(Word.-(Word.fromInt x, Word.fromInt y))
101 :    
102 :     (*-----------------------------------------------------------------------
103 :     * Primitive instruction handling routines
104 :     *-----------------------------------------------------------------------*)
105 :    
106 :     (* Annotation an instruction *)
107 :     fun mark(instr, []) = instr
108 :     | mark(instr, a::an) = mark(I.ANNOTATION{i=instr,a=a}, an)
109 :    
110 :     (* Add pop suffix to a binary operator *)
111 :     fun pop I.FADDL = I.FADDP | pop I.FADDS = I.FADDP
112 :     | pop I.FSUBL = I.FSUBP | pop I.FSUBS = I.FSUBP
113 :     | pop I.FSUBRL = I.FSUBRP | pop I.FSUBRS = I.FSUBRP
114 :     | pop I.FMULL = I.FMULP | pop I.FMULS = I.FMULP
115 :     | pop I.FDIVL = I.FDIVP | pop I.FDIVS = I.FDIVP
116 :     | pop I.FDIVRL = I.FDIVRP | pop I.FDIVRS = I.FDIVRP
117 :     | pop _ = error "fbinop.pop"
118 :    
119 :     (* Invert the operator *)
120 :     fun invert I.FADDL = I.FADDL | invert I.FADDS = I.FADDS
121 :     | invert I.FSUBL = I.FSUBRL | invert I.FSUBS = I.FSUBRS
122 :     | invert I.FSUBRL = I.FSUBL | invert I.FSUBRS = I.FSUBS
123 :     | invert I.FMULL = I.FMULL | invert I.FMULS = I.FMULS
124 :     | invert I.FDIVL = I.FDIVRL | invert I.FDIVS = I.FDIVRS
125 :     | invert I.FDIVRL = I.FDIVL | invert I.FDIVRS = I.FDIVS
126 :     | invert I.FADDP = I.FADDP | invert I.FMULP = I.FMULP
127 :     | invert I.FSUBP = I.FSUBRP | invert I.FSUBRP = I.FSUBP
128 :     | invert I.FDIVP = I.FDIVRP | invert I.FDIVRP = I.FDIVP
129 :     | invert _ = error "invert"
130 :    
131 :     (* Pseudo instructions *)
132 :     fun FLD(I.FP32, ea) = I.FLDS ea
133 :     | FLD(I.FP64, ea) = I.FLDL ea
134 :     | FLD(I.FP80, ea) = I.FLDT ea
135 :    
136 :     fun FILD(I.I8, ea) = error "FILD"
137 :     | FILD(I.I16, ea) = I.FILD ea
138 :     | FILD(I.I32, ea) = I.FILDL ea
139 :     | FILD(I.I64, ea) = I.FILDLL ea
140 :    
141 :     fun FSTP(I.FP32, ea) = I.FSTPS ea
142 :     | FSTP(I.FP64, ea) = I.FSTPL ea
143 :     | FSTP(I.FP80, ea) = I.FSTPT ea
144 :    
145 :     fun FST(I.FP32, ea) = I.FSTS ea
146 :     | FST(I.FP64, ea) = I.FSTL ea
147 :     | FST(I.FP80, ea) = error "FSTT"
148 :    
149 :     (*-----------------------------------------------------------------------
150 :     * Pretty print routines
151 :     *-----------------------------------------------------------------------*)
152 : leunga 744 fun fregToString f = "%f"^i2s(C.registerNum f)
153 : leunga 731 fun fregsToString s =
154 :     List.foldr (fn (r,"") => fregToString r |
155 :     (r,s) => fregToString r^" "^s) "" s
156 :     fun blknumOf(F.BBLOCK{blknum, ...}) = blknum
157 :    
158 :     (*-----------------------------------------------------------------------
159 :     * A stack datatype that mimics the x86 floating point stack
160 :     * and keeps track of bindings between %st(n) and %fp(n).
161 :     *-----------------------------------------------------------------------*)
162 :     structure ST :>
163 :     sig
164 :     type stack
165 :     type stnum = int (* 0 -- 7 *)
166 :     val create : unit -> stack
167 :     val stack0 : stack
168 :     val copy : stack -> stack
169 :     val clear : stack -> unit
170 : leunga 744 val fp : stack * C.register_id -> stnum
171 :     val st : stack * stnum -> C.register_id
172 :     val set : stack * stnum * C.register_id -> unit
173 :     val push : stack * C.register_id -> unit
174 : leunga 731 val xch : stack * stnum * stnum -> unit
175 :     val pop : stack -> unit
176 :     val depth : stack -> int
177 :     val nonFull : stack -> unit
178 :     val kill : stack * C.cell -> unit
179 :     val stackToString : stack -> string
180 :     val equal : stack * stack -> bool
181 :     end =
182 :     struct
183 :     type stnum = int
184 :     datatype stack =
185 : leunga 744 STACK of
186 :     { st : C.register_id A.array, (* mapping %st -> %fp registers *)
187 :     fp : stnum A.array, (* mapping %fp -> %st registers *)
188 :     sp : int ref (* stack pointer *)
189 :     }
190 : leunga 731
191 :     (* Create a new stack *)
192 :     fun create() = STACK{st=A.array(8,~1), fp=A.array(7,16), sp=ref ~1}
193 :    
194 :     val stack0 = create()
195 :    
196 :     (* Copy a stack *)
197 :     fun copy(STACK{st, fp, sp}) =
198 :     let val st' = A.array(8, ~1)
199 :     val fp' = A.array(7, 16)
200 :     in A.copy{src=st,dst=st',si=0,di=0,len=NONE};
201 :     A.copy{src=fp,dst=fp',si=0,di=0,len=NONE};
202 :     STACK{st=st', fp=fp', sp=ref(!sp)}
203 :     end
204 :    
205 :     (* Depth of stack *)
206 :     fun depth(STACK{sp, ...}) = !sp + 1
207 :    
208 :     fun nonFull(STACK{sp, ...}) =
209 :     if !sp >= 7 then error "stack overflow" else ()
210 :    
211 :     (* Given %st(n), lookup the corresponding %fp(n) *)
212 :     fun st(STACK{st, sp, ...}, n) = A.sub(st, !sp - n)
213 :    
214 :     (* Given %fp(n), lookup the corresponding %st(n) *)
215 : leunga 744 fun fp(STACK{fp, sp, ...}, n) = !sp - A.sub(fp, n)
216 : leunga 731
217 :     fun stackToString stack =
218 :     let val depth = depth stack
219 :     fun f i = if i >= depth then " ]"
220 : leunga 744 else "%st("^i2s i^")=%f"^i2s(st(stack,i))^" "^f(i+1)
221 : leunga 731 in "[ "^f 0 end
222 :    
223 :     fun clear(STACK{st, fp, sp, ...}) =
224 :     (sp := ~1; A.modify(fn _ => ~1) st; A.modify(fn _ => 16) fp)
225 :    
226 :     (* Set %st(n) := %f *)
227 :     fun set(STACK{st, fp, sp, ...}, n, f) =
228 :     (A.update(st, !sp - n, f);
229 : leunga 744 if f >= 0 then A.update(fp, f, !sp - n) else ()
230 : leunga 731 )
231 :    
232 :     (* Pop one entry *)
233 :     fun pop(STACK{sp, st, fp, ...}) = sp := !sp - 1
234 :    
235 :     (* Push %fp(f) onto %st(0) *)
236 :     fun push(stack as STACK{sp, ...}, f) = (sp := !sp + 1; set(stack, 0, f))
237 :    
238 :     (* Exchange the contents of %st(m) and %st(n) *)
239 :     fun xch(stack, m, n) =
240 :     let val f_m = st(stack, m)
241 :     val f_n = st(stack, n)
242 :     in set(stack, m, f_n);
243 :     set(stack, n, f_m)
244 :     end
245 :    
246 : leunga 744 fun kill(STACK{fp, ...}, f) = A.update(fp, C.registerNum f, 16)
247 : leunga 731
248 :     fun equal(st1, st2) =
249 :     let val m = depth st1
250 :     val n = depth st2
251 :     fun loop i =
252 :     i >= m orelse (st(st1, i) = st(st2, i) andalso loop(i+1))
253 :     in m = n andalso loop(0)
254 :     end
255 :    
256 :     end (* struct *)
257 :    
258 :     (*-----------------------------------------------------------------------
259 :     * Module to handle forward propagation.
260 :     * Forward propagation does the following:
261 :     * Given an instruction
262 :     * fmove mem, %fp(n)
263 :     * We delay the generation of the load until the first use of %fp(n),
264 :     * which we can further optimize by folding the load into the operand
265 :     * of the instruction, if it is the last use of this operand.
266 :     * If %fp(n) is dead then no load is necessary.
267 :     * Of course, we have to be careful whenever we encounter other
268 :     * instruction with a write.
269 :     *-----------------------------------------------------------------------*)
270 : leunga 744 (*
271 : leunga 731 structure ForwardPropagation :>
272 :     sig
273 :     type readbuffer
274 :     val create : ST.stack -> readbuffer
275 :     val load : readbuffer * C.cell * I.fsize * I.ea -> unit
276 :     val getreg : readbuffer * bool * C.cell * I.instruction list ->
277 :     I.operand * I.instruction list
278 :     val flush : readbuffer * I.instruction list -> I.instruction list
279 :     end =
280 :     struct
281 :    
282 :     datatype readbuffer =
283 :     READ of { stack : ST.stack,
284 :     loads : (I.fsize * I.ea) option A.array,
285 :     pending : int ref
286 :     }
287 :    
288 :     fun create stack =
289 :     READ{stack =stack,
290 :     loads =A.array(8, NONE),
291 :     pending =ref 0
292 :     }
293 :    
294 :     fun load(READ{pending, loads, ...}, fd, fsize, mem) =
295 : leunga 744 (A.update(loads, fd, SOME(fsize, mem));
296 : leunga 731 pending := !pending + 1
297 :     )
298 :    
299 :     (* Extract the operand for a register
300 :     * If it has a delayed load associated with it then
301 :     * we perform the load at this time.
302 :     *)
303 :     fun getreg(READ{pending, loads, stack, ...}, isLastUse, fs, code) =
304 : leunga 744 case A.sub(loads, fs) of
305 : leunga 731 NONE =>
306 :     let val n = ST.st(stack, fs)
307 :     in if isLastUse
308 :     then (ST n, code)
309 :     else let val code = I.FLDL(ST n)::code
310 :     in ST.push(stack, fs); (ST0, code)
311 :     end
312 :     end
313 :     | SOME(fsize, mem) =>
314 :     let val code = FLD(fsize, mem)::code
315 :     in A.update(loads, fs, NONE); (* delete load *)
316 :     pending := !pending - 1;
317 :     ST.push(stack, fs); (* fs is now in place *)
318 :     (ST0, code)
319 :     end
320 :    
321 :     (* Extract a binary operand.
322 :     * We'll try to fold this into the operand
323 :     *)
324 :     fun getopnd(READ{pending, loads, stack,...}, isLastUse, I.FPR fs, code) =
325 : leunga 744 (case A.sub(loads, fs) of
326 : leunga 731 NONE =>
327 :     let val n = ST.st(stack, fs)
328 :     in if isLastUse fs (* regmap XXX *)
329 :     then (ST n, code)
330 :     else let val code = I.FLDL(ST n)::code
331 :     in ST.push(stack, fs); (ST0, code)
332 :     end
333 :     end
334 :     | SOME(fsize, mem) =>
335 :     (A.update(loads, fs, NONE); (* delete load *)
336 :     pending := !pending - 1;
337 :     if isLastUse fs then (mem, code)
338 :     else let val code = FLD(fsize, mem)::code
339 :     in ST.push(stack, fs);
340 :     (ST0, code)
341 :     end
342 :     )
343 :     )
344 :     | getopnd(_, _, ea, code) = (ea, code)
345 :    
346 :     fun flush(READ{pending=ref 0,...}, code) = code
347 :    
348 :     end (* struct *)
349 : leunga 744 *)
350 : leunga 731
351 :     (*-----------------------------------------------------------------------
352 :     * Module to handle delayed stores.
353 :     * Delayed store does the following:
354 :     * Given an instruction
355 :     * fstore %fp(n), %mem
356 :     * We delay the generation of the store until necessary.
357 :     * This gives us an opportunity to rearrange the order of the stores
358 :     * to eliminate unnecessary fxch.
359 :     *-----------------------------------------------------------------------*)
360 : leunga 744 (*
361 : leunga 731 structure DelayStore :>
362 :     sig
363 :     type writebuffer
364 :     val create : ST.stack -> writebuffer
365 :     val flush : writebuffer * I.instruction list -> I.instruction list
366 :     end =
367 :     struct
368 :     datatype writebuffer =
369 :     WRITE of { front : (I.ea * C.cell) list ref,
370 :     back : (I.ea * C.cell) list ref,
371 :     stack : ST.stack,
372 :     pending : int ref
373 :     }
374 :     fun create stack = WRITE{front=ref [], back=ref [],
375 :     stack=stack, pending=ref 0}
376 :     fun flush(WRITE{pending=ref 0,...}, code) = code
377 :     end (* struct *)
378 : leunga 744 *)
379 : leunga 731
380 :     (*-----------------------------------------------------------------------
381 :     * Main routine.
382 :     *
383 :     * Algorithm:
384 :     * 1. Perform liveness analysis.
385 :     * 2. For each fp register, mark all its last use point(s).
386 :     * Registers are popped at their last uses.
387 :     * 3. Rewrite the instructions basic block by basic block.
388 :     * 4. Insert shuffle code at basic block boundaries.
389 :     * When necessary, split critical edges.
390 :     * 5. Sacrifice a goat to make sure things don't go wrong.
391 :     *-----------------------------------------------------------------------*)
392 : leunga 744 fun run(cluster as F.CLUSTER{blocks, blkCounter, ...}) =
393 :     let val getCell = C.CellSet.get C.FP (*extract the fp component of cellset*)
394 : leunga 731
395 : leunga 744 val stTable = A.tabulate(8, fn n => I.ST(C.ST n))
396 :    
397 :     fun ST n = (if sanityCheck andalso (n < 0 orelse n >= 8) then
398 :     pr("WARNING BAD %st("^i2s n^")\n")
399 :     else ();
400 :     A.sub(stTable, n)
401 :     )
402 :    
403 :     fun FXCH n = I.FXCH{opnd=C.ST n}
404 :    
405 :     val ST0 = ST 0
406 :     val ST1 = ST 1
407 :     val POP_ST = I.FSTPL ST0 (* Instruction to pop an entry *)
408 :    
409 : leunga 731 (* Dump instructions *)
410 :     fun dump instrs =
411 :     let val Asm.S.STREAM{emit, ...} =
412 :     AsmStream.withStream (!MLRiscControl.debug_stream)
413 :     Asm.makeStream []
414 :     in app emit (rev instrs)
415 :     end
416 :    
417 :     (* Create assembly of instruction *)
418 :     fun assemble instr =
419 :     let val buf = StringOutStream.mkStreamBuf()
420 :     val stream = StringOutStream.openStringOut buf
421 :     val Asm.S.STREAM{emit, ...} =
422 :     AsmStream.withStream stream Asm.makeStream []
423 : leunga 744 val _ = emit instr
424 : leunga 731 val s = StringOutStream.getString buf
425 :     val n = String.size s
426 :     in if n = 0 then s else String.substring(s, 0, n - 1)
427 :     end
428 :    
429 :     (*------------------------------------------------------------------
430 :     * Perform liveness analysis on the floating point variables
431 :     * P.S. I'm glad I didn't throw away the code liveness code.
432 :     *------------------------------------------------------------------*)
433 :     val defUse = P.defUse C.FP (* def/use properties *)
434 :     val _ = Liveness.liveness{defUse=defUse,
435 : leunga 744 updateCell=C.CellSet.update C.FP,
436 : leunga 731 getCell=getCell,
437 :     blocks=blocks
438 :     }
439 :     (*------------------------------------------------------------------
440 :     * Scan the instructions compute the last uses and dead definitions
441 :     * at each program point. Ideally we can do this during the code
442 :     * rewriting phase. But that's probably too error prone for now.
443 :     *------------------------------------------------------------------*)
444 :     fun computeLastUse(blknum, insns, liveOut) =
445 :     let fun scan([], _, lastUse) = lastUse
446 :     | scan(i::instrs, live, lastUse) =
447 :     let val (d, u) = defUse i
448 : leunga 744 val d = SL.uniq(d)(* definitions *)
449 :     val u = SL.uniq(u)(* uses *)
450 :     val dead = SL.return(SL.difference(d, live))
451 : leunga 731 val live = SL.difference(live, d)
452 : leunga 744 val last = SL.return(SL.difference(u, live))
453 :     val live = SL.union(live, u)
454 : leunga 731 val _ =
455 :     if debug andalso debugLiveness then
456 :     (case last of
457 :     [] => ()
458 :     | _ => print(assemble i^"\tlast use="^
459 :     fregsToString last^"\n")
460 :     )
461 :     else ()
462 :     in scan(instrs, live, (last,dead)::lastUse)
463 :     end
464 : leunga 744 val liveOutSet = SL.uniq(getCell (!liveOut))
465 : leunga 731 val _ =
466 :     if debug andalso debugLiveness then
467 :     print("LiveOut("^i2s blknum^") = "^
468 : leunga 744 fregsToString(SL.return liveOutSet)^"\n")
469 : leunga 731 else ()
470 :     in scan(!insns, liveOutSet, [])
471 :     end
472 :    
473 :     (*------------------------------------------------------------------
474 :     * Temporary work space
475 :     *------------------------------------------------------------------*)
476 :     val {high, low} = C.cellRange C.FP
477 :     val n = high+1
478 :     val lastUseTbl = A.array(n,~1) (* table for marking last uses *)
479 :     val useTbl = A.array(n,~1) (* table for marking uses *)
480 :    
481 :     (* %fp register bindings before and after a basic block *)
482 :     val bindingsIn = A.array(!blkCounter, NONE)
483 :     val bindingsOut = A.array(!blkCounter, NONE)
484 :     val stampCounter = ref ~4096
485 :    
486 :     (* Edges that need splitting *)
487 :     exception NoEdgesToSplit
488 : blume 733 val edgesToSplit = IntHashTable.mkTable(32, NoEdgesToSplit)
489 :     val addEdgesToSplit = IntHashTable.insert edgesToSplit
490 : leunga 744 fun lookupEdgesToSplit b =
491 :     getOpt(IntHashTable.find edgesToSplit b, [])
492 : leunga 731
493 :     (*------------------------------------------------------------------
494 :     * Code for handling bindings between basic block
495 :     *------------------------------------------------------------------*)
496 :    
497 :     fun splitEdge(targetId, source, target) =
498 :     (if debug andalso !traceOn then
499 :     pr("SPLITTING "^i2s(blknumOf source)^"->"^
500 :     i2s(blknumOf target)^"\n")
501 :     else ();
502 :     addEdgesToSplit(targetId,
503 :     (source,target)::lookupEdgesToSplit targetId)
504 :     )
505 :    
506 :     (* Given a cellset, return a sorted and unique
507 :     * list of elements with all non-physical registers removed
508 :     *)
509 :     fun removeNonPhysical cellSet =
510 : leunga 744 let fun loop([], S) = SL.return(SL.uniq S)
511 : leunga 731 | loop(f::fs, S) =
512 : leunga 744 let val fx = C.registerNum f
513 :     in loop(fs,if fx <= 7 then f::S else S)
514 : leunga 731 end
515 :     in loop(getCell(!cellSet), [])
516 :     end
517 :    
518 :     (* Given a sorted and unique list of registers,
519 :     * Return a stack with these elements
520 :     *)
521 :     fun newStack fregs =
522 :     let val stack = ST.create()
523 : leunga 744 in app (fn f => ST.push(stack, C.registerNum f)) (rev fregs);
524 : leunga 731 stack
525 :     end
526 :    
527 :     (*
528 :     * This function looks at all the entries on the stack,
529 :     * and generate code to deallocate all the dead values.
530 :     * The stack is updated.
531 :     *)
532 :     fun removeDeadValues(stack, liveSet, code) =
533 :     let val stamp = !stampCounter
534 :     val _ = stampCounter := !stampCounter - 1
535 :     fun markLive [] = ()
536 : leunga 744 | markLive(r::rs) =
537 :     (A.update(useTbl, C.registerNum r, stamp); markLive rs)
538 : leunga 731 fun isLive f = A.sub(useTbl, f) = stamp
539 :     fun loop(i, depth, code) =
540 :     if i >= depth then code else
541 :     let val f = ST.st(stack, i)
542 :     in if isLive f (* live? *)
543 :     then loop(i+1, depth, code)
544 :     else
545 :     (if debug andalso !traceOn then
546 : leunga 744 pr("REMOVING %f"^i2s f^" in %st("^i2s i^")"^
547 : leunga 731 " current stack="^ST.stackToString stack^"\n")
548 :     else ();
549 :     if i = 0 then
550 :     (ST.pop stack;
551 :     loop(0, depth-1, POP_ST::code)
552 :     )
553 :     else (ST.xch(stack,0,i);
554 :     ST.pop stack;
555 :     loop(0, depth-1, I.FSTPL(ST i)::code)
556 :     )
557 :     )
558 :     end
559 :     in markLive liveSet;
560 :     loop(0, ST.depth stack, code)
561 :     end
562 :    
563 :    
564 :     (*------------------------------------------------------------------
565 :     * Given two stacks, source and target, where the bindings are
566 :     * permutation of each other, generate the minimal number of
567 :     * fxchs to match source with target.
568 :     *
569 :     * Important: source and target MUST be permutations of each other.
570 :     *
571 :     * Essentially, we first decompose the permutation into cycles,
572 :     * and process each cycle.
573 :     *------------------------------------------------------------------*)
574 :     fun shuffle(source, target, code) =
575 :     let val stamp = !stampCounter
576 :     val _ = stampCounter := !stampCounter - 1
577 :     val permutation = lastUseTbl (* reuse the space *)
578 :    
579 :     val _ = if debug andalso !traceOn then
580 :     pr("SHUFFLE "^ST.stackToString source^
581 :     "->"^ST.stackToString target^"\n")
582 :     else ()
583 :    
584 :     (* Compute the initial permutation *)
585 :     val n = ST.depth source
586 :     fun computeInitialPermutation(i) =
587 :     if i >= n
588 :     then ()
589 :     else let val f = ST.st(source, i)
590 :     val j = ST.fp(target, f)
591 :     in A.update(permutation, j, i);
592 :     computeInitialPermutation(i+1)
593 :     end
594 :     val _ = computeInitialPermutation 0
595 :    
596 :     (* Decompose the initial permutation into cycles.
597 :     * The cycle involving 0 is treated specially.
598 :     *)
599 :     val visited = useTbl
600 :     fun isVisited i = A.sub(visited,i) = stamp
601 :     fun markAsVisited i = A.update(visited,i,stamp)
602 :     fun decomposeCycles(i, cycle0, cycles) =
603 :     if i >= n then (cycle0, cycles)
604 :     else if isVisited i orelse
605 :     A.sub(permutation, i) = i (* trivial cycle *)
606 :     then decomposeCycles(i+1, cycle0, cycles)
607 :     else let fun makeCycle(j, cycle, zero) =
608 :     let val k = A.sub(permutation, j)
609 :     val cycle = j::cycle
610 :     val zero = zero orelse j = 0
611 :     in markAsVisited j;
612 :     if k = i then (cycle, zero)
613 :     else makeCycle(k, cycle, zero)
614 :     end
615 :     val (cycle, zero) = makeCycle(i, [], false)
616 :     in if zero then decomposeCycles(i+1, [cycle], cycles)
617 :     else decomposeCycles(i+1, cycle0, cycle::cycles)
618 :     end
619 :    
620 :     val (cycle0, cycles) = decomposeCycles(0, [], [])
621 :    
622 :     (*
623 :     * Generate shuffle for a cycle that does not involve 0.
624 :     * Given a cycle (c_1, ..., c_k), we generate this code:
625 :     * fxch %st(c_1),
626 :     * fxch %st(c_2),
627 :     * ...
628 :     * fxch %st(c_k),
629 :     * fxch %st(c_1)
630 :     *)
631 :     fun genxch([], code) = code
632 :     | genxch(c::cs, code) = genxch(cs, FXCH c::code)
633 :    
634 :     fun gen([], code) = error "shuffle.gen"
635 :     | gen(cs as (c::_), code) = FXCH c::genxch(cs, code)
636 :    
637 :     (*
638 :     * Generate shuffle for a cycle that involves 0.
639 :     * Given a cycle (c_1,...,c_k) we first shuffle this to
640 :     * an equivalent cycle (c_1, ..., c_k) where c'_k = 0,
641 :     * then we generate this code:
642 :     * fxch %st(c'_1),
643 :     * fxch %st(c'_2),
644 :     * ...
645 :     * fxch %st(c'_{k-1}),
646 :     *)
647 :     fun gen0([], code) = error "shuffle.gen0"
648 :     | gen0(cs, code) =
649 :     let fun rearrange(0::cs, cs') = cs@rev cs'
650 :     | rearrange(c::cs, cs') = rearrange(cs, c::cs')
651 :     | rearrange([], _) = error "shuffle.rearrange"
652 :     val cs = rearrange(cs, [])
653 :     in genxch(cs, code)
654 :     end
655 :    
656 :     (*
657 :     * Generate code. Must process the non-zero cycles first.
658 :     *)
659 :     val code = List.foldr gen code cycles
660 :     val code = List.foldr gen0 code cycle0
661 :     in code
662 :     end (* shuffle *)
663 :    
664 :     (*------------------------------------------------------------------
665 :     * Insert code at the end of a basic block.
666 :     * Make sure we put code in front of a transfer instruction
667 :     *------------------------------------------------------------------*)
668 :     fun insertAtEnd(insns, code) =
669 :     (case insns of
670 :     [] => code
671 :     | jmp::rest =>
672 :     if P.instrKind jmp = P.IK_JUMP then
673 :     jmp::code@rest
674 :     else
675 :     code@insns
676 :     )
677 :    
678 :     (*------------------------------------------------------------------
679 :     * Magic for inserting shuffle code at the end of a basic block
680 :     *------------------------------------------------------------------*)
681 :     fun shuffleOut(stackOut, insns, b, block, succ, liveOut) =
682 :     let val liveOut = removeNonPhysical liveOut
683 :    
684 :     (* Generate code that remove unnecessary values *)
685 :     val code = removeDeadValues(stackOut, liveOut, [])
686 :    
687 :     fun done(stackOut, insns, code) =
688 :     (A.update(bindingsOut,b,SOME stackOut);
689 :     insertAtEnd(insns, code)
690 :     )
691 :    
692 :     (* Generate code that shuffle values from source to target *)
693 :     fun match(source, target) =
694 :     done(target, insns, shuffle(source, target, []))
695 :    
696 :     (* Generate code that shuffle values from source to liveOut *)
697 :     fun matchLiveOut() =
698 :     case liveOut of
699 :     [] => done(stackOut, insns, code)
700 :     | _ => match(stackOut, newStack liveOut)
701 :    
702 :     (* With multiple successors, find out which one we
703 :     * should connect to. Choose the one from the block that
704 :     * follows from this one, if that exists, or else choose
705 :     * from the edge with the highest frequency.
706 :     *)
707 :     fun find([], _, id, best) = (id, best)
708 :     | find((F.BBLOCK{blknum, insns, ...},freq)::edges,
709 :     highestFreq, id, best) =
710 :     if blknum = b+1 then (blknum, A.sub(bindingsIn, blknum))
711 :     else (case A.sub(bindingsIn, blknum) of
712 :     NONE => find(edges, highestFreq, id, best)
713 :     | this as SOME stack =>
714 :     if highestFreq < !freq then
715 :     find(edges, !freq, blknum, this)
716 :     else
717 :     find(edges, highestFreq, id, best)
718 :     )
719 :     | find(_::edges, highestFreq, id, best) =
720 :     find(edges, highestFreq, id, best)
721 :    
722 :     fun splitAllEdgesExcept([], succBlock) = ()
723 :     | splitAllEdgesExcept((next as F.BBLOCK{blknum, ...},_)::edges,
724 :     succBlock) =
725 :     (if blknum <> succBlock andalso blknum <= b
726 :     then splitEdge(blknum,block,next) else ();
727 :     splitAllEdgesExcept(edges, succBlock)
728 :     )
729 :     | splitAllEdgesExcept((F.EXIT _,_)::_, _) =
730 :     error "can't split exit edge!"
731 :     | splitAllEdgesExcept(_::edges, succBlock) =
732 :     splitAllEdgesExcept(edges, succBlock)
733 :    
734 :     in case !succ of
735 :     [] => matchLiveOut()
736 :     | [(F.EXIT _,_)] => matchLiveOut()
737 :     | succ =>
738 :     (* Just one successor;
739 :     * try to match the bindings of the successor if it exist.
740 :     *)
741 :     let val (succBlock, target) = find(succ, ~1, ~1, NONE)
742 :     in splitAllEdgesExcept(succ, succBlock);
743 :     case target of
744 :     SOME stackIn => match(stackOut, stackIn)
745 :     | NONE => done(stackOut,insns,code)
746 :     end
747 :     end (* shuffleOut *)
748 :    
749 :     (*------------------------------------------------------------------
750 :     * Compute the initial fp stack bindings for basic block b.
751 :     *------------------------------------------------------------------*)
752 :     fun shuffleIn(b, block, pred, liveIn) =
753 :     let val liveInSet = removeNonPhysical liveIn
754 :    
755 :     (* With multiple predecessors, find out which one we
756 :     * should connect to. Choose the one from the block that
757 :     * falls into this one, if that exists, or else choose
758 :     * from the edge with the highest frequency.
759 :     *)
760 :     fun find([], _, best) = best
761 :     | find((F.BBLOCK{blknum, insns, ...},freq)::edges,
762 :     highestFreq, best) =
763 :     (case A.sub(bindingsOut, blknum) of
764 :     NONE => find(edges, highestFreq, best)
765 :     | this as SOME stack =>
766 :     if blknum = b-1 then (* falls into b *)
767 :     this
768 :     else if highestFreq < !freq then
769 :     find(edges, !freq, this)
770 :     else
771 :     find(edges, highestFreq, best)
772 :     )
773 :     | find(_::edges, highestFreq, best) =
774 :     find(edges, highestFreq, best)
775 :    
776 :     fun splitAllDoneEdges [] = ()
777 :     | splitAllDoneEdges
778 :     ((prev as F.BBLOCK{blknum, ...},_)::edges) =
779 :     (if blknum < b
780 :     then splitEdge(b,prev,block) else ();
781 :     splitAllDoneEdges edges
782 :     )
783 :     | splitAllDoneEdges(_::edges) = splitAllDoneEdges edges
784 :    
785 : leunga 744 (* The initial stack bindings are determined by the live set.
786 : leunga 731 * No compensation code is needed.
787 :     *)
788 :     fun fromLiveIn() =
789 :     let val stackIn =
790 :     case liveInSet of
791 :     [] => ST.stack0
792 :     | _ =>
793 : leunga 744 (pr("liveIn="^C.CellSet.toString (!liveIn)^"\n");
794 : leunga 731 newStack liveInSet
795 :     )
796 :     val stackOut = ST.copy stackIn
797 :     in (stackIn, stackOut, [])
798 :     end
799 :    
800 :     val (stackIn, stackOut, code) =
801 :     case find(!pred, ~1, NONE) of
802 :     NONE => (splitAllDoneEdges(!pred); fromLiveIn())
803 :     | SOME stackIn' =>
804 :     (case !pred of
805 :     [_] => (* one predecessor *)
806 :     (* Use the bindings as from the previous block
807 :     * We first have to deallocate all unused values.
808 :     *)
809 :     let val stackOut = ST.copy stackIn'
810 :     (* Clean the stack of unused entries *)
811 :     val code = removeDeadValues(stackOut, liveInSet, [])
812 :     in (stackIn', stackOut, code) end
813 :     | _ => (* more than one predecessors *)
814 :     let val stackIn = ST.copy stackIn'
815 :     val code = removeDeadValues(stackIn, liveInSet, [])
816 :     val stackOut = ST.copy stackIn
817 :     in (* If we have to generate code to deallocate
818 :     * the stack then we have split the edge.
819 :     *)
820 :     case code of
821 :     [] => ()
822 :     | _ => splitAllDoneEdges(!pred);
823 :     (stackIn, stackOut, [])
824 :     end
825 :     )
826 :     in A.update(bindingsIn, b, SOME stackIn);
827 :     A.update(bindingsOut, b, SOME stackOut);
828 :     (stackIn, stackOut, code)
829 :     end
830 :    
831 :     (*------------------------------------------------------------------
832 :     * Code for patching up critical edges.
833 :     * The trick is finding a good place to insert the critical edges.
834 :     * The cluster representation is very hard to work with.
835 :     *------------------------------------------------------------------*)
836 :     fun repairCriticalEdges
837 :     (cluster as F.CLUSTER{blocks, entry, exit, annotations,
838 : leunga 744 blkCounter}) =
839 : leunga 731 let (* Data structure for recording critical edge splitting info *)
840 :     datatype compensationCode =
841 :     NEWEDGE of
842 :     {label:L.label, (* label of new block *)
843 :     preds:F.block list ref, (* predecessors *)
844 :     code:I.instruction list,(* code *)
845 :     comment:an
846 :     }
847 :    
848 :     val cleanup = [#create MLRiscAnnotations.COMMENT "cleanup edge"]
849 :     val critical = [#create MLRiscAnnotations.COMMENT "critical edge"]
850 :    
851 :     exception Nothing
852 :    
853 :     (* Repair code table; mapping from block id -> compensation code *)
854 : blume 733 val repairCodeTable = IntHashTable.mkTable(32, Nothing)
855 :     val addRepairCode = IntHashTable.insert repairCodeTable
856 : leunga 744 fun lookupRepairCode b =
857 :     getOpt(IntHashTable.find repairCodeTable b,[])
858 : leunga 731
859 :     (* Repair code table; mapping from block id -> compensation code
860 :     * These must be relocated...
861 :     *)
862 : blume 733 val repairCodeTable' = IntHashTable.mkTable(32, Nothing)
863 :     val addRepairCode' = IntHashTable.insert repairCodeTable'
864 : leunga 744 fun lookupRepairCode' b =
865 :     getOpt(IntHashTable.find repairCodeTable' b,[])
866 : leunga 731
867 :     (* Mapping from block id -> labels *)
868 : blume 733 val labelTable = IntHashTable.mkTable(32, Nothing)
869 :     val addLabel = IntHashTable.insert labelTable
870 : leunga 744 fun lookupLabel b = getOpt(IntHashTable.find labelTable b, [])
871 : leunga 731
872 :     (* Scan code and insert labels *)
873 :     fun insertLabels([], []) = ()
874 :     | insertLabels(labels, []) = error "orphan labels"
875 :     | insertLabels(labels, F.LABEL l::blocks) =
876 :     insertLabels(l::labels, blocks)
877 :     | insertLabels(labels, (b as F.BBLOCK{blknum,...})::blocks) =
878 :     (addLabel(blknum, labels); insertLabels([], blocks))
879 :     | insertLabels(_, _::blocks) = insertLabels([], blocks)
880 :     (* skip labels to pseudo ops *)
881 :    
882 :     val _ = insertLabels([], blocks)
883 :    
884 :     (* Does the block falls thru from the previous block? *)
885 :     fun isFallsThru(F.BBLOCK{pred, blknum=j, ...}) =
886 :     let fun loop((F.BBLOCK{blknum=i,...},_)::rest) =
887 :     i+1 = j orelse loop rest
888 :     | loop((F.ENTRY _,_)::_) = true
889 :     | loop(_) = false
890 :     in loop(!pred)
891 :     end
892 :     | isFallsThru _ = false
893 :    
894 :     (* Create jump instruction to a block *)
895 :     fun jump(F.BBLOCK{blknum,...}) =
896 :     (case lookupLabel blknum of
897 :     [] => error "no label to target of critical edge!"
898 :     | l::_ => P.jump l
899 :     )
900 :     | jump _ = error "jump"
901 :    
902 :     (*
903 :     * Special case: target block has stack depth of 0.
904 :     * Just generate code that pop entries from the sources.
905 :     * To make things interesting, we try to share code among
906 :     * all the critical edges.
907 :     *)
908 :     fun genPoppingCode(_, []) = ()
909 :     | genPoppingCode(targetId, edges as (_,target)::_) =
910 :     let val preds =
911 :     map (fn (source, _) =>
912 :     let val sourceId = blknumOf source
913 :     val SOME stackOut = A.sub(bindingsOut,sourceId)
914 :     val n = ST.depth stackOut
915 :     in (n, source) end
916 :     ) edges
917 :     (* Ordered by increasing stack height *)
918 :     val preds =
919 :     ListMergeSort.sort (fn ((n,_),(m,_)) => n > m) preds
920 :    
921 :     val relocate = isFallsThru target
922 :    
923 :     fun pop(0, code) = code
924 :     | pop(n, code) = pop(n-1,POP_ST::code)
925 :    
926 :     fun makeCode(popCount, rest) =
927 :     let val code = pop(popCount, [])
928 :     in case rest of
929 :     [] => if relocate then jump target::code
930 :     else code
931 :     | _ => code
932 :     end
933 :    
934 :     (* Generate code, share code between edges that
935 :     * have to pop the same number of elements
936 :     *)
937 :     fun gen([], h, code) = code
938 :     | gen((n,b)::rest, _, []) =
939 :     gen(rest, n,
940 :     [NEWEDGE{label=L.newLabel "",preds=ref [b],
941 :     code=makeCode(n,rest), comment=cleanup}])
942 :     | gen((n,b)::rest, h, all as (NEWEDGE{preds, ...}::_)) =
943 :     gen(rest,h,
944 :     if n = h then
945 :     (preds := b :: !preds; all)
946 :     else
947 :     NEWEDGE{label=L.newLabel "", preds=ref [b],
948 :     code=makeCode(n-h,rest),
949 :     comment=cleanup}::all
950 :     )
951 :     val repairCode = gen(preds, 0, [])
952 :     in (if relocate then addRepairCode' else addRepairCode)
953 :     (targetId, repairCode)
954 :     end
955 :    
956 :     (* The general case:
957 :     * Remove dead values, then
958 :     * Shuffle.
959 :     *)
960 :     fun genRepairCode(targetId, stackIn, edges) =
961 :     let val repairList = ref []
962 :     val repairCount = ref 0
963 :     val SOME stackIn = A.sub(bindingsIn, targetId)
964 :     fun repair(source, target) =
965 :     let val F.BBLOCK{blknum=sourceId, ...} = source
966 :     val SOME stackOut' = A.sub(bindingsOut, sourceId)
967 :     fun createNewRepairEdge() =
968 :     let val stackOut = ST.copy stackOut'
969 :     val F.BBLOCK{liveIn, ...} = target
970 :     val liveInSet = removeNonPhysical liveIn
971 :     val _ =
972 :     if debug then
973 :     pr("LiveIn = "^
974 : leunga 744 C.CellSet.toString (!liveIn)^
975 : leunga 731 "\n")
976 :     else ()
977 :    
978 :     (* deallocate unused values *)
979 :     val code = removeDeadValues(stackOut, liveInSet, [])
980 :     (* shuffle values *)
981 :     val code = shuffle(stackOut, stackIn, code)
982 :     fun addNewEdge() =
983 :     let (* Do we need to relocate this block? *)
984 :     val relocate = !repairCount > 0 orelse
985 :     isFallsThru target andalso
986 :     sourceId + 1 <> targetId
987 :    
988 :     (* add a jump to the target block *)
989 :     val code = if relocate then jump target::code
990 :     else code
991 :    
992 :     val repairCode =
993 :     NEWEDGE{label=L.newLabel "",
994 :     preds=ref [source],
995 :     code=code, comment=critical}
996 :     in repairCount := !repairCount + 1;
997 :     repairList := (repairCode, stackOut')
998 :     :: !repairList;
999 :     if relocate then
1000 :     addRepairCode'(targetId,
1001 :     repairCode::lookupRepairCode' targetId)
1002 :     else
1003 :     addRepairCode(targetId,
1004 :     repairCode::lookupRepairCode targetId)
1005 :     end
1006 :     in case source of
1007 :     F.BBLOCK{succ=ref [(F.BBLOCK{blknum=j,...},_)],
1008 :     insns, ...} =>
1009 :     if j = targetId then (*insert code at predecessor*)
1010 :     insns := insertAtEnd(!insns, code)
1011 :     else
1012 :     addNewEdge()
1013 :     | _ => addNewEdge()
1014 :     end
1015 :    
1016 :     fun shareRepairEdge [] = false
1017 :     | shareRepairEdge((NEWEDGE{preds,...},stackOut'')::rest) =
1018 :     if ST.equal(stackOut'', stackOut') then
1019 :     (preds := source :: !preds; true)
1020 :     else shareRepairEdge rest
1021 :    
1022 :     in if shareRepairEdge(!repairList) then ()
1023 :     else createNewRepairEdge()
1024 :     end
1025 :     in app repair edges
1026 :     end
1027 :    
1028 :     (*
1029 :     * Code to split critical edges entering block targetId
1030 :     *)
1031 :     fun split(targetId, edges) =
1032 :     let val SOME stackIn = A.sub(bindingsIn,targetId)
1033 :     fun log(source, target) =
1034 :     let val s = blknumOf source
1035 :     val t = blknumOf target
1036 :     val SOME stackOut = A.sub(bindingsOut,s)
1037 :     in pr("SPLIT "^i2s s^"->"^i2s t^" "^
1038 :     ST.stackToString stackOut^"->"^
1039 :     ST.stackToString stackIn^"\n")
1040 :     end
1041 :     val _ = if debug andalso !traceOn then app log edges else ()
1042 :     in if ST.depth stackIn = 0 then genPoppingCode(targetId, edges)
1043 :     else genRepairCode(targetId, stackIn, edges)
1044 :     end
1045 :    
1046 :     (* Renumber all the blocks and insert compensation code at the
1047 :     * right places.
1048 :     *)
1049 :     fun renumberBlocks() =
1050 : blume 733 let val labelTbl = IntHashTable.mkTable(32, Nothing)
1051 :     val addLabel = IntHashTable.insert labelTbl
1052 : leunga 731 fun insertLabel(L.Label{id, ...},block) = addLabel(id, block)
1053 :    
1054 :     val entries = ref []
1055 :     val exits = ref []
1056 :    
1057 :     (* retarget the branch of block *)
1058 : leunga 775 fun retarget(I.JMP(I.ImmedLabel(T.LABEL _), [_])::rest, l) =
1059 :     I.JMP(I.ImmedLabel(T.LABEL l), [l])::rest
1060 :     | retarget(I.JCC{cond,opnd=I.ImmedLabel(T.LABEL _)}::rest,l)=
1061 :     I.JCC{cond=cond,opnd=I.ImmedLabel(T.LABEL l)}::rest
1062 : leunga 731 | retarget(_,l) = error "retarget"
1063 :    
1064 :     (* Translate repair code to actual block *)
1065 :     fun transRepair(n, [], blocks) = (n, blocks)
1066 :     | transRepair(n, NEWEDGE{label, preds, code, comment}::rest,
1067 :     blocks) =
1068 :     let val blocks = F.LABEL label::blocks
1069 :     val this = F.BBLOCK{blknum=n, freq=ref 0,
1070 :     pred=ref [],
1071 :     succ=ref [],
1072 :     annotations=ref comment,
1073 :     liveIn=ref C.empty,
1074 :     liveOut=ref C.empty,
1075 :     insns=ref code}
1076 :     fun retargetBlock(F.BBLOCK{insns, ...}) =
1077 :     insns := retarget(!insns, label)
1078 :     | retargetBlock _ = ()
1079 :     in if debug andalso !traceOn then
1080 :     pr("Inserting critical edge at block "^i2s n^" "^
1081 :     L.nameOf label^"\n")
1082 :     else ();
1083 :     insertLabel(label, this);
1084 :     app retargetBlock (!preds);
1085 :     transRepair(n+1, rest, this::blocks)
1086 :     end
1087 :    
1088 :     fun renumber(n, [], pseudoOps, repairCode', blocks) =
1089 :     let val (n, blocks) = transRepair(n, repairCode', blocks)
1090 :     val blocks = pseudoOps @ blocks
1091 :     in (n, rev blocks)
1092 :     end
1093 :     | renumber(n, (block as
1094 :     F.BBLOCK{blknum,annotations,insns,freq,
1095 :     pred,succ,liveIn,liveOut,...})::rest,
1096 :     pseudoOps, repairCode', blocks) =
1097 :     let (* If we have outstanding repair code and this is
1098 :     * NOT a fallsthru entry, then insert them here.
1099 :     *)
1100 :     val (n, blocks, repairCode') =
1101 :     case repairCode' of
1102 :     [] => (n, blocks, [])
1103 :     | _ => if isFallsThru block then
1104 :     (n, blocks, repairCode')
1105 :     else
1106 :     let val (n, blocks) =
1107 :     transRepair(n, repairCode', blocks)
1108 :     in (n, blocks, [])
1109 :     end
1110 :    
1111 :     (* Insert non-relocatable repair code *)
1112 :     val repairCode = lookupRepairCode blknum
1113 :     val (n, blocks) = transRepair(n, repairCode, blocks)
1114 :    
1115 :     (* Create this block *)
1116 :     val this = F.BBLOCK{blknum=n,annotations=annotations,
1117 :     freq=freq, insns=insns,
1118 :     pred=ref [], succ=ref [],
1119 :     liveIn=liveIn,liveOut=liveOut
1120 :     }
1121 :    
1122 :     (* Insert new relocatable repair code *)
1123 :     val repairCode' = repairCode' @
1124 :     lookupRepairCode' blknum
1125 :    
1126 :     (* Insert labels *)
1127 :     fun insertLabels((p as F.LABEL l)::ps) =
1128 :     (insertLabel(l, this); insertLabels ps)
1129 :     | insertLabels(p::ps) = insertLabels ps
1130 :     | insertLabels [] = blocks
1131 :    
1132 :     val _ = insertLabels pseudoOps
1133 :    
1134 :     val blocks = this::pseudoOps @ blocks
1135 :    
1136 :     fun addEntry((F.ENTRY _,w)::_) =
1137 :     entries := (this,w):: !entries
1138 :     | addEntry(_::es) = addEntry es
1139 :     | addEntry [] = ()
1140 :     fun addExit((F.EXIT _,w)::_) =
1141 :     exits := (this,w) :: !exits
1142 :     | addExit(_::es) = addExit es
1143 :     | addExit [] = ()
1144 :    
1145 :     in addEntry(!pred); (* check if this is an entry *)
1146 :     addExit(!succ); (* check if this ia an exit *)
1147 :     renumber(n+1, rest, [], repairCode', blocks)
1148 :     end
1149 :     | renumber(n, p::rest, pseudoOps, repairCode', blocks) =
1150 :     renumber(n, rest, p::pseudoOps, repairCode', blocks)
1151 :    
1152 :     val (n, blocks) = renumber(0, blocks, [], [], [])
1153 :    
1154 :     (* New entry and exits *)
1155 :     val F.ENTRY{freq=entryFreq, ...} = entry
1156 :     val newEntry = F.ENTRY{blknum=n, freq=entryFreq, succ=entries}
1157 :     val n = n+1
1158 :     val F.EXIT{freq=exitFreq, ...} = exit
1159 :     val newExit = F.EXIT{blknum=n, freq=exitFreq, pred=exits}
1160 :     val n = n+1
1161 :    
1162 : leunga 744 val lookupLabel = IntHashTable.find labelTbl
1163 :     val lookupLabel =
1164 :     fn l => case lookupLabel l of
1165 :     SOME b => b
1166 :     | NONE => newExit
1167 : leunga 731
1168 :     fun addPred b (F.BBLOCK{pred, ...},w) = pred := (b,w) :: !pred
1169 :     | addPred b (F.EXIT{pred, ...},w) = pred := (b,w) :: !pred
1170 :     | addPred _ _ = error "addPred"
1171 :    
1172 :     fun adjustSucc(
1173 :     (blk as F.BBLOCK{blknum,insns,succ,pred,...})::rest) =
1174 :     let fun follows(F.LABEL _::rest) = follows rest
1175 :     | follows((b as F.BBLOCK _)::rest) = (b, ref 0)
1176 :     | follows [] = (newExit, ref 0)
1177 :     fun succBlocks([], succ) = succ
1178 :     | succBlocks(P.ESCAPES::targets, succ) =
1179 :     succBlocks(targets, (newExit, ref 0)::succ)
1180 :     | succBlocks(P.FALLTHROUGH::targets, succ) =
1181 :     succBlocks(targets, follows rest::succ)
1182 :     | succBlocks(P.LABELLED(L.Label{id,...})::targets,
1183 :     succ) =
1184 :     succBlocks(targets, (lookupLabel id, ref 0)::succ)
1185 :     fun fallsThru rest = [follows rest]
1186 :     in case !insns of
1187 :     [] => succ := fallsThru rest
1188 :     | jmp::_ =>
1189 :     case P.instrKind jmp of
1190 :     P.IK_JUMP =>
1191 :     succ := succBlocks(P.branchTargets jmp,[])
1192 :     | _ => succ := fallsThru rest;
1193 :     app (addPred blk) (!succ);
1194 :     adjustSucc rest
1195 :     end
1196 :     | adjustSucc(_::rest) = adjustSucc rest
1197 :     | adjustSucc [] = ()
1198 :    
1199 :     val _ = adjustSucc blocks
1200 :     val _ = app (addPred entry) (!entries)
1201 :    
1202 :     in F.CLUSTER{blkCounter=ref n,
1203 :     annotations=annotations,
1204 :     blocks=blocks,
1205 :     entry=newEntry,
1206 :     exit=newExit
1207 :     }
1208 :     end
1209 :    
1210 : leunga 744 in insertLabels([], blocks);
1211 :     IntHashTable.appi split edgesToSplit;
1212 :     renumberBlocks()
1213 : leunga 731 end
1214 :    
1215 :    
1216 :     (*------------------------------------------------------------------
1217 :     * Process all blocks
1218 :     *------------------------------------------------------------------*)
1219 :     fun rewriteAllBlocks
1220 :     (stamp,
1221 :     (block as F.BBLOCK{blknum, insns, liveIn, liveOut,
1222 :     annotations, pred, succ, ...})::rest) =
1223 :     let val stamp = rewrite(stamp, blknum, block,
1224 :     insns, liveIn, liveOut,
1225 :     pred, succ, annotations)
1226 :     in rewriteAllBlocks(stamp+1, rest)
1227 :     end
1228 :     | rewriteAllBlocks(stamp, F.LABEL l::rest) =
1229 :     (if debug andalso !debugOn then pr(Label.nameOf l^":\n") else ();
1230 :     rewriteAllBlocks(stamp, rest)
1231 :     )
1232 :     | rewriteAllBlocks(stamp, _::rest) = rewriteAllBlocks(stamp, rest)
1233 :     | rewriteAllBlocks(stamp, []) = ()
1234 :    
1235 :     (*------------------------------------------------------------------
1236 :     * Translate code within a basic block.
1237 :     * Each instruction is given a unique stamp for identifying last
1238 :     * uses.
1239 :     *------------------------------------------------------------------*)
1240 :     and rewrite(stamp, blknum, block, insns, liveIn, liveOut,
1241 :     pred, succ, annotations) =
1242 :     let val (stackIn, stack, code) = shuffleIn(blknum, block, pred, liveIn)
1243 :    
1244 :     (* Dump instructions when encountering a bug *)
1245 :     fun bug msg =
1246 :     (pr("-------- bug in block "^i2s blknum^" ----\n");
1247 :     dump(!insns);
1248 :     error msg
1249 :     )
1250 :    
1251 :     fun loop(stamp, [], [], code) = (stamp, code)
1252 :     | loop(stamp, instr::rest, (lastUse,dead)::lastUses, code) =
1253 :     let fun mark(tbl, []) = ()
1254 :     | mark(tbl, r::rs) =
1255 : leunga 744 (A.update(tbl, C.registerNum r, stamp); mark(tbl, rs))
1256 : leunga 731 in mark(lastUseTbl,lastUse); (* mark all last uses *)
1257 :     trans(stamp, instr, [], rest, dead, lastUses, code)
1258 :     end
1259 :     | loop _ = error "loop"
1260 :    
1261 :     (*
1262 :     * Main routine that does the actual translation.
1263 :     * A few reminders:
1264 :     * o The instructions are processed in normal order
1265 :     * and generated in the reversed order.
1266 :     * o (Local) liveness is computed at the same time.
1267 :     * o For each use, we have to find out whether it is
1268 :     * the last use. If so, we can kill it and reclaim
1269 :     * the stack entry at the same time.
1270 :     *)
1271 :     and trans(stamp, instr, an, rest, dead, lastUses, code) =
1272 :     let (* Call this continuation when done with code generation *)
1273 :     fun FINISH code = loop(stamp+1, rest, lastUses, code)
1274 :    
1275 :     (* Call this continuation when done with floating point
1276 :     * code generation. Remove all dead code first.
1277 :     *)
1278 :     fun DONE code =
1279 :     let fun kill([], code) = FINISH code
1280 :     | kill(f::fs, code) =
1281 : leunga 744 let val fx = C.registerNum f
1282 :     in if debug andalso debugDead then
1283 :     pr("DEAD "^fregToString f^" in "^
1284 :     ST.stackToString stack^"\n")
1285 :     else ();
1286 :     (* not a physical register *)
1287 :     if fx >= 8 then kill(fs, code)
1288 :     else
1289 :     let val i = ST.fp(stack, fx)
1290 :     in if debug andalso debugDead then
1291 :     pr("KILLING "^fregToString f^
1292 :     "=%st("^i2s i^")\n")
1293 :     else ();
1294 :     if i < 0 then kill(fs, code) (* dead already *)
1295 :     else if i = 0 then
1296 :     (ST.pop stack; kill(fs, POP_ST::code))
1297 :     else
1298 :     (ST.xch(stack,0,i); ST.pop stack;
1299 :     kill(fs, I.FSTPL(ST i)::code)
1300 :     )
1301 :     end
1302 :     end
1303 : leunga 731 in kill(dead, code)
1304 :     end
1305 :    
1306 :     (* Is this the last use of register f? *)
1307 :     fun isLastUse f = A.sub(lastUseTbl, f) = stamp
1308 :    
1309 :     (* Is this value dead? *)
1310 :     fun isDead f =
1311 :     let fun loop [] = false
1312 : leunga 744 | loop(r::rs) = C.sameColor(f,r) orelse loop rs
1313 : leunga 731 in loop dead end
1314 :    
1315 :     (* Dump the stack before each intruction for debugging *)
1316 :     fun log() = if debug andalso !traceOn then
1317 :     pr(ST.stackToString stack^assemble instr^"...\n")
1318 :     else ()
1319 :    
1320 :     (* Find the location of a source register *)
1321 :     fun getfs(f) =
1322 : leunga 744 let val fx = C.registerNum f
1323 :     val s = ST.fp(stack, fx)
1324 :     in (isLastUse fx,s) end
1325 : leunga 731
1326 :     (* Generate memory to memory move *)
1327 :     fun mmmove(fsize,src,dst) =
1328 :     let val _ = ST.nonFull stack
1329 :     val code = FLD(fsize,src)::code
1330 :     val code = mark(FSTP(fsize,dst),an)::code
1331 :     in DONE code end
1332 :    
1333 :     (* Allocate a new register in %st(0) *)
1334 : leunga 744 fun alloc(f,code) = (ST.push(stack,C.registerNum f); code)
1335 : leunga 731
1336 :     (* register -> register move *)
1337 :     fun rrmove(fs,fd) =
1338 : leunga 744 if C.sameColor(fs,fd) then DONE code
1339 : leunga 731 else
1340 : leunga 744 let val (dead,ss) = getfs fs
1341 : leunga 731 in if dead then (* fs is dead *)
1342 : leunga 744 (ST.set(stack,ss,C.registerNum fd); (* rename fd to fs *)
1343 : leunga 731 DONE code (* no code is generated *)
1344 :     )
1345 :     else (* fs is not dead; push it onto %st(0);
1346 :     * set fd to %st(0)
1347 :     *)
1348 :     let val code = alloc(fd, code)
1349 :     in DONE(mark(I.FLDL(ST ss),an)::code)
1350 :     end
1351 :     end
1352 :    
1353 :     (* memory -> register move.
1354 :     * Do dead code elimination here.
1355 :     *)
1356 :     fun mrmove(fsize,src,fd) =
1357 : leunga 744 if isDead fd
1358 : leunga 731 then FINISH code (* value has been killed *)
1359 :     else
1360 :     let val code = alloc(fd, code)
1361 :     in DONE(mark(FLD(fsize,src),an)::code)
1362 :     end
1363 :    
1364 :     (* exchange %st(n) and %st(0) *)
1365 :     fun xch(n) = (ST.xch(stack,0,n); FXCH n)
1366 :    
1367 :     (* push %st(n) onto the stack *)
1368 :     fun push(n) = (ST.push(stack,~2); I.FLDL(ST n))
1369 :    
1370 :    
1371 :     (* push mem onto the stack *)
1372 :     fun pushmem(src) = (ST.push(stack,~2); I.FLDL(src))
1373 :    
1374 :     (* register -> memory move.
1375 :     * Use pop version of the opcode if it is the last use.
1376 :     *)
1377 :     fun rmmove(fsize,fs,dst) =
1378 :     let fun fstp(code) =
1379 :     (ST.pop stack; DONE(mark(FSTP(fsize,dst),an)::code))
1380 :     fun fst(code) = DONE(mark(FST(fsize,dst),an)::code)
1381 :     in case getfs fs of
1382 :     (true, 0) => fstp code
1383 :     | (true, n) => fstp(xch n::code)
1384 :     | (false, 0) => fst(code)
1385 :     | (false, n) => fst(xch n::code)
1386 :     end
1387 :    
1388 :     (* Floating point move *)
1389 :     fun fmove{fsize,src=I.FPR fs,dst=I.FPR fd} = rrmove(fs,fd)
1390 :     | fmove{fsize,src,dst=I.FPR fd} = mrmove(fsize,src,fd)
1391 :     | fmove{fsize,src=I.FPR fs,dst} = rmmove(fsize,fs,dst)
1392 :     | fmove{fsize,src,dst} = mmmove(fsize,src,dst)
1393 :    
1394 :     (* Floating point integer load operator *)
1395 :     fun fiload{isize,ea,dst=I.FPR fd} =
1396 : leunga 744 let val code = alloc(fd, code)
1397 : leunga 731 val code = mark(FILD(isize,ea),an)::code
1398 :     in DONE code
1399 :     end
1400 :     | fiload{isize,ea,dst} =
1401 :     let val code = mark(FILD(isize,ea),an)::code
1402 :     val code = I.FSTPL(dst)::code (* XXX *)
1403 :     in DONE code
1404 :     end
1405 :    
1406 :     (* Make a copy of register fs to %st(0). *)
1407 :     fun moveregtotop(fs, code) =
1408 :     (case getfs fs of
1409 :     (true, 0) => code
1410 :     | (true, n) => xch n::code
1411 :     | (false, n) => push n::code
1412 :     )
1413 :    
1414 :     fun movememtotop(fsize, mem, code) =
1415 :     (ST.push(stack, ~2); FLD(fsize, mem)::code)
1416 :    
1417 :     (* Move an operand to top of stack *)
1418 :     fun movetotop(fsize, I.FPR fs, code) = moveregtotop(fs, code)
1419 :     | movetotop(fsize, mem, code) = movememtotop(fsize, mem, code)
1420 :    
1421 :     fun storeResult(fsize, dst, n, code) =
1422 :     case dst of
1423 : leunga 744 I.FPR fd => (ST.set(stack, n, C.registerNum fd); DONE code)
1424 : leunga 731 | mem =>
1425 :     let val code = if n = 0 then code else xch n::code
1426 :     in ST.pop stack; DONE(FSTP(fsize, mem)::code) end
1427 :    
1428 :     (* Floating point unary operator *)
1429 :     fun funop{fsize,unOp,src,dst} =
1430 :     let val code = movetotop(fsize, src, code)
1431 :     val code = mark(I.FUNARY unOp,an)::code
1432 :    
1433 :     (* Moronic hack to deal with partial tangent! *)
1434 :     val code =
1435 :     case unOp of
1436 :     I.FPTAN =>
1437 :     (if ST.depth stack >= 7 then error "FPTAN"
1438 :     else ();
1439 :     POP_ST::code (* pop the useless 1.0 *)
1440 :     )
1441 :     | _ => code
1442 :     in storeResult(fsize, dst, 0, code)
1443 :     end
1444 :    
1445 :     (* Floating point binary operator.
1446 :     * Note:
1447 :     * binop src, dst
1448 :     * means dst := dst binop src
1449 :     * (lsrc := lsrc binop rsrc)
1450 :     * on the x86
1451 :     *)
1452 :     fun fbinop{fsize,binOp,lsrc,rsrc,dst} =
1453 :     let (* generate code and set %st(n) = fd *)
1454 :     (* op2 := op1 - op2 *)
1455 :     fun oper(binOp,op1,op2,n,code) =
1456 :     let val code =
1457 :     mark(I.FBINARY{binOp=binOp,src=op1,dst=op2},an)
1458 :     ::code
1459 :     in storeResult(I.FP64, dst, n, code)
1460 :     end
1461 :    
1462 :     fun operR(binOp,op1,op2,n,code) =
1463 :     oper(invert binOp,op1,op2,n,code)
1464 :    
1465 :     fun operP(binOp,op1,op2,n,code) =
1466 :     (ST.pop stack; oper(pop binOp,op1,op2,n-1,code))
1467 :    
1468 :     fun operRP(binOp,op1,op2,n,code) =
1469 :     (ST.pop stack; operR(pop binOp,op1,op2,n-1,code))
1470 :    
1471 :     (* Many special cases to consider.
1472 :     * Basically, try to reuse stack space as
1473 :     * much as possible by taking advantage of last uses.
1474 :     *
1475 :     * Stack=[st(0)=3.0 st(1)=2.0]
1476 :     * fsub %st(1), %st [1,2.0]
1477 :     * fsubr %st(1), %st [-1,2.0]
1478 :     * fsub %st, %st(1) [3.0,1.0]
1479 :     * fsubr %st, %st(1) [3.0,-1.0]
1480 :     *
1481 :     * fsubp %st, %st(1) [1]
1482 :     * fsubrp %st, %st(1) [-1]
1483 :     * So,
1484 :     * fsub %st(n), %st (means %st - %st(n) -> %st)
1485 :     * fsub %st, %st(n) (means %st - %st(n) -> %st(n))
1486 :     * fsubr %st(n), %st (means %st(n) - %st -> %st)
1487 :     * fsubr %st, %st(n) (means %st(n) - %st -> %st(n))
1488 :     *)
1489 :     fun reg2(fx, fy) =
1490 :     let val (dx, sx) = getfs fx
1491 :     val (dy, sy) = getfs fy
1492 :     fun loop(dx, sx, dy, sy, code) =
1493 :     (* op1, op2 (dst) *)
1494 :     case (dx, sx, dy, sy) of
1495 :     (true, 0, false, n) => oper(binOp,ST n,ST0,0,code)
1496 :     | (false, n, true, 0) => operR(binOp,ST n,ST0,0,code)
1497 :     | (true, n, true, 0) => operRP(binOp,ST0,ST n,n,code)
1498 :     | (true, 0, true, n) => operP(binOp,ST0,ST n,n,code)
1499 :     | (false, 0, true, n) => oper(binOp,ST0,ST n,n,code)
1500 :     | (true, n, false, 0) => operR(binOp,ST0,ST n,n,code)
1501 :     | (true, sx, dy, sy) =>
1502 :     loop(true, 0, dy, sy, xch sx::code)
1503 :     | (dx, sx, true, sy) =>
1504 :     loop(dx, sx, true, 0, xch sy::code)
1505 :     | (false, sx, false, sy) =>
1506 :     loop(true, 0, false, sy+1, push sx::code)
1507 :     in if sx = sy then (* same register *)
1508 :     let val code =
1509 :     case (dx, sx) of
1510 :     (true, 0) => code
1511 :     | (true, n) => xch n::code
1512 :     | (false, n) => push n::code
1513 :     in oper(binOp,ST0,ST0,0,code)
1514 :     end
1515 :     else loop(dx, sx, dy, sy, code)
1516 :     end
1517 :    
1518 :     (* reg/mem operands *)
1519 :     fun regmem(binOp, fx, mem) =
1520 :     case getfs fx of
1521 :     (true, 0) => oper(binOp,mem,ST0,0,code)
1522 :     | (true, n) => oper(binOp,mem,ST0,0,xch n::code)
1523 :     | (false, n) => oper(binOp,mem,ST0,0,push n::code)
1524 :    
1525 :     (* Two memory operands. Optimize the case when
1526 :     * the two operands are identical.
1527 :     *)
1528 :     fun mem2(lsrc, rsrc) =
1529 :     let val _ = ST.push(stack,~2)
1530 :     val code = FLD(fsize,lsrc)::code
1531 :     val rsrc = if P.eqOpn(lsrc, rsrc) then ST0 else rsrc
1532 :     in oper(binOp,rsrc,ST0,0,code)
1533 :     end
1534 :    
1535 :     fun process(I.FPR fx, I.FPR fy) = reg2(fx, fy)
1536 :     | process(I.FPR fx, mem) = regmem(binOp, fx, mem)
1537 :     | process(mem, I.FPR fy) = regmem(invert binOp, fy, mem)
1538 :     | process(lsrc, rsrc) = mem2(lsrc, rsrc)
1539 :    
1540 :     in process(lsrc, rsrc)
1541 :     end
1542 :    
1543 :     (* Floating point binary operator with integer conversion *)
1544 :     fun fibinop{isize,binOp,lsrc,rsrc,dst} =
1545 :     let fun oper(binOp,src,code) =
1546 :     let val code = mark(I.FIBINARY{binOp=binOp,src=src},an)
1547 :     ::code
1548 :     in storeResult(I.FP64, dst, 0, code)
1549 :     end
1550 :    
1551 :     fun regmem(binOp, fx, mem) =
1552 :     case getfs fx of
1553 :     (true, 0) => oper(binOp, mem, code)
1554 :     | (true, n) => oper(binOp, mem, xch n::code)
1555 :     | (false, n) => oper(binOp, mem, push n::code)
1556 :    
1557 :     in case (lsrc, rsrc) of
1558 :     (I.FPR fx, mem) => regmem(binOp, fx, mem)
1559 :     | (lsrc, rsrc) => oper(binOp, rsrc, pushmem lsrc::code)
1560 :     end
1561 :    
1562 :     (* Floating point comparison
1563 :     * We have to make sure there are enough registers.
1564 :     * The trick is that tmp is always a physical register.
1565 :     * So we can always use it as temporary space if we
1566 :     * have run out.
1567 :     *)
1568 :     fun fcmp{fsize,lsrc,rsrc} =
1569 :     let fun fucompp() =
1570 :     (ST.pop stack; ST.pop stack; mark(I.FUCOMPP,an))
1571 :     fun fucomp(n) =
1572 :     (ST.pop stack; mark(I.FUCOMP(ST n),an))
1573 :     fun fucom(n) = mark(I.FUCOM(ST n),an)
1574 :    
1575 :     fun genmemcmp() =
1576 :     let val code = movememtotop(fsize, rsrc, code)
1577 :     val code = movememtotop(fsize, lsrc, code)
1578 :     in FINISH(fucompp()::code)
1579 :     end
1580 :    
1581 :     fun genmemregcmp(lsrc, fy) =
1582 :     case getfs fy of
1583 :     (false, n) =>
1584 :     let val code = movememtotop(fsize, lsrc, code)
1585 :     in FINISH(fucomp(n+1)::code) end
1586 :     | (true, n) =>
1587 :     let val code = if n = 0 then code else xch n::code
1588 :     val code = movememtotop(fsize, lsrc, code)
1589 :     in FINISH(fucompp()::code)
1590 :     end
1591 :    
1592 :     fun genregmemcmp(fx, rsrc) =
1593 :     let val code =
1594 :     case getfs fx of
1595 :     (true, n) =>
1596 :     let val code = if n = 0 then code
1597 :     else xch n::code
1598 :     val code = movememtotop(fsize, rsrc, code)
1599 :     in xch 1::code end
1600 :     | (false, n) =>
1601 :     let val code = movememtotop(fsize, rsrc, code)
1602 :     in push(n+1)::code
1603 :     end
1604 :     in FINISH(fucompp()::code)
1605 :     end
1606 :    
1607 :     (* Deal with the special case when both sources are
1608 :     * in the same register
1609 :     *)
1610 :     fun regsame(dx, sx) =
1611 :     let val (code, cmp) =
1612 :     case (dx, sx) of
1613 :     (true, 0) => (code, fucomp 0) (* pop once! *)
1614 :     | (false, 0) => (code, fucom 0) (* don't pop! *)
1615 :     | (true, n) => (xch n::code, fucomp 0)
1616 :     | (false, n) => (xch n::code, fucom 0)
1617 :     in FINISH(cmp::code) end
1618 :    
1619 :     fun reg2(fx, fy) =
1620 :     (* special case is when things are already in place.
1621 :     * Note: should also generate FUCOM and FUCOMP!!!
1622 :     *)
1623 :     let val (dx, sx) = getfs fx
1624 :     val (dy, sy) = getfs fy
1625 :     fun fstp(n) =
1626 :     (ST.xch(stack,n,0); ST.pop stack; I.FSTPL(ST n))
1627 :     in if sx = sy then regsame(dx, sx) (* same register!*)
1628 :     else
1629 :     (* first, move sx to %st(0) *)
1630 :     let val (sy, code) =
1631 :     if sx = 0 then (sy, code) (* there already *)
1632 :     else (if sy = 0 then sx else sy,
1633 :     xch sx::code)
1634 :    
1635 :     (* Generate the appropriate comparison op *)
1636 :     val (sy, cmp, popY) =
1637 :     case (dx, dy, sy) of
1638 :     (true, true, 0) => (~1, fucompp(), false)
1639 :     | (true, _, _) => (sy-1, fucomp sy, dy)
1640 :     | (false, _, _) => (sy, fucom sy, dy)
1641 :    
1642 :     val code = cmp::code
1643 :    
1644 :     (* Pop fy if it is dead and hasn't already
1645 :     * been popped.
1646 :     *)
1647 :     val code = if popY then fstp sy::code else code
1648 :     in FINISH code
1649 :     end
1650 :     end
1651 :    
1652 :     in case (lsrc, rsrc) of
1653 :     (I.FPR x, I.FPR y) => reg2(x, y)
1654 :     | (I.FPR x, mem) => genregmemcmp(x, mem)
1655 :     | (mem, I.FPR y) => genmemregcmp(mem, y)
1656 :     | _ => genmemcmp()
1657 :     end
1658 :    
1659 :    
1660 :     fun prCopy(dst, src) =
1661 :     ListPair.app(fn (fd, fs) =>
1662 : leunga 744 pr(fregToString(fd)^"<-"^fregToString fs^" "))
1663 : leunga 731 (dst, src)
1664 :    
1665 :     (* Parallel copy magic.
1666 :     * For each src registers, we find out
1667 :     * 1. whether it is the last use, and if so,
1668 :     * 2. whether it is used more than once.
1669 :     * If a source is a last and unique use, then we
1670 :     * can simply rename it to appropriate destination register.
1671 :     *)
1672 :     fun fcopy{dst,src,tmp} =
1673 :     let fun loop([], [], copies, renames) = (copies, renames)
1674 :     | loop(fd::fds, fs::fss, copies, renames) =
1675 : leunga 744 let val fsx = C.registerNum fs
1676 :     in if isLastUse fsx then
1677 :     if A.sub(useTbl,fsx) <> stamp
1678 :     (* unused *)
1679 :     then (A.update(useTbl,fsx,stamp);
1680 : leunga 731 loop(fds, fss, copies,
1681 : leunga 744 if C.sameColor(fd,fs) then renames
1682 : leunga 731 else (fd, fs)::renames)
1683 : leunga 744 )
1684 : leunga 731 else loop(fds, fss, (fd, fs)::copies, renames)
1685 :     else loop(fds, fss, (fd, fs)::copies, renames)
1686 :     end
1687 :     | loop _ = error "fcopy.loop"
1688 :    
1689 :     (* generate code for the copies *)
1690 :     fun genCopy([], code) = code
1691 :     | genCopy((fd, fs)::copies, code) =
1692 : leunga 744 let val ss = ST.fp(stack, C.registerNum fs)
1693 :     val _ = ST.push(stack, C.registerNum fd)
1694 : leunga 731 val code = I.FLDL(ST ss)::code
1695 :     in genCopy(copies, code) end
1696 :    
1697 :     (* perform the renaming; it must be done in parallel! *)
1698 :     fun renaming(renames) =
1699 : leunga 744 let val ss = map (fn (_,fs) =>
1700 :     ST.fp(stack,C.registerNum fs)) renames
1701 :     in ListPair.app (fn ((fd,_),ss) =>
1702 :     ST.set(stack,ss,C.registerNum fd))
1703 : leunga 731 (renames, ss)
1704 :     end
1705 :    
1706 :     (* val _ = if debug then
1707 :     (ListPair.app (fn (fd, fs) =>
1708 :     pr(fregToString(regmap fd)^"<-"^
1709 :     fregToString(regmap fs)^" ")
1710 :     ) (dst, src);
1711 :     pr "\n")
1712 :     else () *)
1713 :    
1714 :     val (copies, renames) = loop(dst, src, [], [])
1715 :     val code = genCopy(copies, code)
1716 :     in renaming renames;
1717 :     case tmp of
1718 :     SOME(I.FPR f) =>
1719 : leunga 744 (if debug andalso debugDead
1720 :     then pr("KILLING tmp "^fregToString f^"\n")
1721 :     else ();
1722 :     ST.kill(stack, f)
1723 :     )
1724 : leunga 731 | _ => ();
1725 :     DONE code
1726 :     end
1727 :    
1728 :     in case instr of
1729 :     (* annotation handling *)
1730 :     I.ANNOTATION{i,a} =>
1731 :     trans(stamp, i, a::an, rest, dead, lastUses, code)
1732 :    
1733 :     (* handle floating point stuff *)
1734 :     | I.FMOVE x => (log(); fmove x)
1735 :     | I.FBINOP x => (log(); fbinop x)
1736 :     | I.FIBINOP x => (log(); fibinop x)
1737 :     | I.FUNOP x => (log(); funop x)
1738 :     | I.FILOAD x => (log(); fiload x)
1739 :     | I.FCMP x => (log(); fcmp x)
1740 :     | I.FCOPY x => (log(); fcopy x)
1741 :    
1742 :     (*
1743 :     * Catch instructions that absolutely
1744 :     * should not have been generated at this point.
1745 :     *)
1746 :     | (I.FLD1 | I.FLDL2E | I.FLDLG2 | I.FLDLN2 | I.FLDPI |
1747 :     I.FLDZ | I.FLDL _ | I.FLDS _ | I.FLDT _ |
1748 :     I.FILD _ | I.FILDL _ | I.FILDLL _ |
1749 :     I.FENV _ | I.FBINARY _ | I.FIBINARY _ | I.FUNARY _ |
1750 :     I.FUCOMPP | I.FUCOM _ | I.FUCOMP _ | I.FCOMPP | I.FXCH _ |
1751 :     I.FSTPL _ | I.FSTPS _ | I.FSTPT _ | I.FSTL _ | I.FSTS _
1752 :     ) => bug("Illegal FP instructions")
1753 :    
1754 :     (* Other instructions are untouched *)
1755 :     | instr => FINISH(mark(instr, an)::code)
1756 :     end (* trans *)
1757 :    
1758 :     (*
1759 :     * Check the translation result to see if it matches the original
1760 :     * code.
1761 :     *)
1762 :     fun checkTranslation(stackIn, stackOut, insns) =
1763 :     let val n = ref(ST.depth stackIn)
1764 :     fun push() = n := !n + 1
1765 :     fun pop() = n := !n - 1
1766 :     fun scan(I.FBINARY{binOp, ...}) =
1767 :     (case binOp of
1768 :     ( I.FADDP | I.FSUBP | I.FSUBRP | I.FMULP
1769 :     | I.FDIVP | I.FDIVRP) => pop()
1770 :     | _ => ()
1771 :     )
1772 :     | scan(I.FIBINARY{binOp, ...}) = ()
1773 :     | scan(I.FUNARY I.FPTAN) = push()
1774 :     | scan(I.FUNARY _) = ()
1775 :     | scan(I.FLDL(I.ST n)) = push()
1776 :     | scan(I.FLDL mem) = push()
1777 :     | scan(I.FLDS mem) = push()
1778 :     | scan(I.FLDT mem) = push()
1779 :     | scan(I.FSTL(I.ST n)) = ()
1780 :     | scan(I.FSTPL(I.ST n)) = pop()
1781 :     | scan(I.FSTL mem) = ()
1782 :     | scan(I.FSTS mem) = ()
1783 :     | scan(I.FSTPL mem) = pop()
1784 :     | scan(I.FSTPS mem) = pop()
1785 :     | scan(I.FSTPT mem) = pop()
1786 :     | scan(I.FXCH{opnd=i,...}) = ()
1787 :     | scan(I.FUCOM _) = ()
1788 :     | scan(I.FUCOMP _) = pop()
1789 :     | scan(I.FUCOMPP) = (pop(); pop())
1790 :     | scan(I.FILD mem) = push()
1791 :     | scan(I.FILDL mem) = push()
1792 :     | scan(I.FILDLL mem) = push()
1793 :     | scan _ = ()
1794 :     val _ = app scan (rev insns);
1795 :     val n = !n
1796 :     val m = ST.depth stackOut
1797 :     in if n <> m then
1798 :     (dump(insns);
1799 :     bug("Bad translation n="^i2s n^ " expected="^i2s m^"\n")
1800 :     )
1801 :     else ()
1802 :     end
1803 :    
1804 :    
1805 :     (* Dump the initial code *)
1806 :     val _ = if debug andalso !debugOn then
1807 :     (pr("-------- block "^i2s blknum^" ----"^
1808 : leunga 744 C.CellSet.toString (!liveIn)^" "^
1809 : leunga 731 ST.stackToString stackIn^"\n");
1810 :     dump (!insns)
1811 :     )
1812 :     else ()
1813 :    
1814 :     (* Compute the last uses *)
1815 :     val lastUse = computeLastUse(blknum, insns, liveOut)
1816 :    
1817 :     (* Rewrite the code *)
1818 :     val (stamp, insns') = loop(stamp, rev(!insns), lastUse, code)
1819 :    
1820 :     (* Insert shuffle code at the end if necessary *)
1821 :     val insns' = shuffleOut(stack, insns', blknum, block, succ, liveOut)
1822 :    
1823 :     (* Dump translation *)
1824 :     val _ = if debug andalso !debugOn then
1825 :     (pr("-------- translation "^i2s blknum^"----"^
1826 : leunga 744 C.CellSet.toString (!liveIn)^" "^
1827 : leunga 731 ST.stackToString stackIn^"\n");
1828 :     dump insns';
1829 :     pr("-------- done "^i2s blknum^"----"^
1830 : leunga 744 C.CellSet.toString (!liveOut)^" "^
1831 : leunga 731 ST.stackToString stack^"\n")
1832 :     )
1833 :     else ()
1834 :    
1835 :     (* Check if things are okay *)
1836 :     val _ = if debug andalso sanityCheck then
1837 :     checkTranslation(stackIn, stack, insns')
1838 :     else ()
1839 :    
1840 :     in insns := insns'; (* update the instructions *)
1841 :     stamp
1842 :     end (* process *)
1843 :    
1844 :     in (* Translate all blocks *)
1845 :     rewriteAllBlocks(C.firstPseudo, blocks);
1846 :     (* If we found critical edges, then we have to split them... *)
1847 : blume 733 if IntHashTable.numItems edgesToSplit = 0 then cluster
1848 : leunga 731 else repairCriticalEdges(cluster)
1849 : leunga 744 end
1850 : leunga 731
1851 :     end (* functor *)
1852 :    
1853 :     end (* local *)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0