Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86-fp.sml
ViewVC logotype

Annotation of /sml/trunk/src/MLRISC/x86/mltree/x86-fp.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 895 - (view) (download)

1 : leunga 731 (*
2 :     * This phase takes a cluster with pseudo x86 fp instructions, performs
3 :     * liveness analysis to determine their live ranges, and rewrite the
4 :     * program into the correct stack based code.
5 :     *
6 :     * The Basics
7 :     * ----------
8 :     * o We assume there are 7 pseudo fp registers, %fp(0), ..., %fp(6),
9 :     * which are mapped onto the %st stack. One stack location is reserved
10 :     * for holding temporaries.
11 :     * o Important: for floating point comparisons, we actually need
12 :     * two extra stack locations in the worst case. We handle this by
13 :     * specifying that the instruction define an extra temporary fp register
14 :     * when necessary.
15 :     * o The mapping between %fp <-> %st may change from program point to
16 :     * program point. We keep track of this lazy renaming and try to minimize
17 :     * the number of FXCH that we insert.
18 :     * o At split and merge points, we may get inconsistent %fp <-> %st mappings.
19 :     * We handle this by inserting the appropriate renaming code.
20 :     * o Parallel copies (renaming) are rewritten into a sequence of FXCHs!
21 :     *
22 :     * Pseudo fp instructions Semantics
23 :     * --------------------------------------
24 :     * FMOVE src, dst dst := src
25 :     * FILOAD ea, dst dst := cvti2f(mem[ea])
26 :     * FBINOP lsrc, rsrc, dst dst := lsrc * rsrc
27 :     * FIBINOP lsrc, rsrc, dst dst := lsrc * cvti2f(rsrc)
28 :     * FUNOP src, dst dst := unaryOp src
29 :     * FCMP lsrc, rsrc fp condition code := fcmp(lsrc, rsrc)
30 :     *
31 :     * An instruction may use its source operand(s) destructively.
32 :     * We find this info using a global liveness analysis.
33 :     *
34 :     * The Translation
35 :     * ---------------
36 :     * o We keep track of the bindings between %fp registers and the
37 :     * %st(..) staack locations.
38 :     * o FXCH and FLDL are inserted at the appropriate places to move operands
39 :     * to %st(0). FLDL is used if the operand is not dead. FXCH is used
40 :     * if the operand is the last use.
41 :     * o FCOPY's between pseudo %fp registers are done by software renaming
42 :     * and generate no code by itself!
43 :     * o FSTL %st(1) are also generated to pop the stack after the last use
44 :     * of an operand.
45 :     *
46 :     * Note
47 :     * ----
48 :     * 1. This module should be run after floating point register allocation.
49 :     * 2. Due to the extra complication of critical edge splitting, the cellset
50 :     * and frequency info are not preserved.
51 :     *
52 :     * -- Allen Leung (leunga@cs.nyu.edu)
53 :     *)
54 :    
55 :     local
56 :     val debug = false (* set this to true to debug this module
57 :     * set this to false for production use.
58 :     *)
59 :     val debugLiveness = false (* debug liveness analysis *)
60 :     val debugDead = false (* debug dead code removal *)
61 :     val sanityCheck = false
62 :     in
63 :     functor X86FP
64 :     (structure X86Instr : X86INSTR
65 :     structure X86Props : INSN_PROPERTIES where I = X86Instr
66 :     structure Flowgraph : FLOWGRAPH where I = X86Instr
67 :     structure Liveness : LIVENESS where F = Flowgraph
68 :     structure Asm : INSTRUCTION_EMITTER where I = X86Instr
69 :     sharing Flowgraph.P = Asm.P
70 :     ) : CLUSTER_OPTIMIZATION =
71 :     struct
72 :     structure F = Flowgraph
73 :     structure I = X86Instr
74 : leunga 775 structure T = I.T
75 : leunga 731 structure P = X86Props
76 :     structure C = I.C
77 :     structure A = Array
78 :     structure L = Label
79 :     structure LE = I.LabelExp
80 :     structure An = Annotations
81 : george 889 structure CB = CellsBasis
82 :     structure SL = CB.SortedCells
83 : leunga 731
84 :     type flowgraph = F.cluster
85 :     type an = An.annotations
86 :    
87 :     val name = "X86 floating point rewrite"
88 :    
89 :     val debugOn = MLRiscControl.getFlag "x86-fp-debug"
90 :     val traceOn = MLRiscControl.getFlag "x86-fp-trace"
91 :    
92 :     fun error msg = MLRiscErrorMsg.error("X86FP",msg)
93 :     fun pr msg = TextIO.output(!MLRiscControl.debug_stream,msg)
94 :    
95 :     val i2s = Int.toString
96 :    
97 :     (*
98 :     * No overflow checking is needed for integer arithmetic in this module
99 :     *)
100 :     fun x + y = Word.toIntX(Word.+(Word.fromInt x, Word.fromInt y))
101 :     fun x - y = Word.toIntX(Word.-(Word.fromInt x, Word.fromInt y))
102 :    
103 :     (*-----------------------------------------------------------------------
104 :     * Primitive instruction handling routines
105 :     *-----------------------------------------------------------------------*)
106 :    
107 :     (* Annotation an instruction *)
108 :     fun mark(instr, []) = instr
109 :     | mark(instr, a::an) = mark(I.ANNOTATION{i=instr,a=a}, an)
110 :    
111 :     (* Add pop suffix to a binary operator *)
112 :     fun pop I.FADDL = I.FADDP | pop I.FADDS = I.FADDP
113 :     | pop I.FSUBL = I.FSUBP | pop I.FSUBS = I.FSUBP
114 :     | pop I.FSUBRL = I.FSUBRP | pop I.FSUBRS = I.FSUBRP
115 :     | pop I.FMULL = I.FMULP | pop I.FMULS = I.FMULP
116 :     | pop I.FDIVL = I.FDIVP | pop I.FDIVS = I.FDIVP
117 :     | pop I.FDIVRL = I.FDIVRP | pop I.FDIVRS = I.FDIVRP
118 :     | pop _ = error "fbinop.pop"
119 :    
120 :     (* Invert the operator *)
121 :     fun invert I.FADDL = I.FADDL | invert I.FADDS = I.FADDS
122 :     | invert I.FSUBL = I.FSUBRL | invert I.FSUBS = I.FSUBRS
123 :     | invert I.FSUBRL = I.FSUBL | invert I.FSUBRS = I.FSUBS
124 :     | invert I.FMULL = I.FMULL | invert I.FMULS = I.FMULS
125 :     | invert I.FDIVL = I.FDIVRL | invert I.FDIVS = I.FDIVRS
126 :     | invert I.FDIVRL = I.FDIVL | invert I.FDIVRS = I.FDIVS
127 :     | invert I.FADDP = I.FADDP | invert I.FMULP = I.FMULP
128 :     | invert I.FSUBP = I.FSUBRP | invert I.FSUBRP = I.FSUBP
129 :     | invert I.FDIVP = I.FDIVRP | invert I.FDIVRP = I.FDIVP
130 :     | invert _ = error "invert"
131 :    
132 :     (* Pseudo instructions *)
133 :     fun FLD(I.FP32, ea) = I.FLDS ea
134 :     | FLD(I.FP64, ea) = I.FLDL ea
135 :     | FLD(I.FP80, ea) = I.FLDT ea
136 :    
137 :     fun FILD(I.I8, ea) = error "FILD"
138 :     | FILD(I.I16, ea) = I.FILD ea
139 :     | FILD(I.I32, ea) = I.FILDL ea
140 :     | FILD(I.I64, ea) = I.FILDLL ea
141 :    
142 :     fun FSTP(I.FP32, ea) = I.FSTPS ea
143 :     | FSTP(I.FP64, ea) = I.FSTPL ea
144 :     | FSTP(I.FP80, ea) = I.FSTPT ea
145 :    
146 :     fun FST(I.FP32, ea) = I.FSTS ea
147 :     | FST(I.FP64, ea) = I.FSTL ea
148 :     | FST(I.FP80, ea) = error "FSTT"
149 :    
150 :     (*-----------------------------------------------------------------------
151 :     * Pretty print routines
152 :     *-----------------------------------------------------------------------*)
153 : george 889 fun fregToString f = "%f"^i2s(CB.registerNum f)
154 : leunga 731 fun fregsToString s =
155 :     List.foldr (fn (r,"") => fregToString r |
156 :     (r,s) => fregToString r^" "^s) "" s
157 :     fun blknumOf(F.BBLOCK{blknum, ...}) = blknum
158 :    
159 :     (*-----------------------------------------------------------------------
160 :     * A stack datatype that mimics the x86 floating point stack
161 :     * and keeps track of bindings between %st(n) and %fp(n).
162 :     *-----------------------------------------------------------------------*)
163 :     structure ST :>
164 :     sig
165 :     type stack
166 :     type stnum = int (* 0 -- 7 *)
167 :     val create : unit -> stack
168 :     val stack0 : stack
169 :     val copy : stack -> stack
170 :     val clear : stack -> unit
171 : george 889 val fp : stack * CB.register_id -> stnum
172 :     val st : stack * stnum -> CB.register_id
173 :     val set : stack * stnum * CB.register_id -> unit
174 :     val push : stack * CB.register_id -> unit
175 : leunga 731 val xch : stack * stnum * stnum -> unit
176 :     val pop : stack -> unit
177 :     val depth : stack -> int
178 :     val nonFull : stack -> unit
179 : george 889 val kill : stack * CellsBasis.cell -> unit
180 : leunga 731 val stackToString : stack -> string
181 :     val equal : stack * stack -> bool
182 :     end =
183 :     struct
184 :     type stnum = int
185 :     datatype stack =
186 : leunga 744 STACK of
187 : george 889 { st : CB.register_id A.array, (* mapping %st -> %fp registers *)
188 : leunga 744 fp : stnum A.array, (* mapping %fp -> %st registers *)
189 :     sp : int ref (* stack pointer *)
190 :     }
191 : leunga 731
192 :     (* Create a new stack *)
193 :     fun create() = STACK{st=A.array(8,~1), fp=A.array(7,16), sp=ref ~1}
194 :    
195 :     val stack0 = create()
196 :    
197 :     (* Copy a stack *)
198 :     fun copy(STACK{st, fp, sp}) =
199 :     let val st' = A.array(8, ~1)
200 :     val fp' = A.array(7, 16)
201 :     in A.copy{src=st,dst=st',si=0,di=0,len=NONE};
202 :     A.copy{src=fp,dst=fp',si=0,di=0,len=NONE};
203 :     STACK{st=st', fp=fp', sp=ref(!sp)}
204 :     end
205 :    
206 :     (* Depth of stack *)
207 :     fun depth(STACK{sp, ...}) = !sp + 1
208 :    
209 :     fun nonFull(STACK{sp, ...}) =
210 :     if !sp >= 7 then error "stack overflow" else ()
211 :    
212 :     (* Given %st(n), lookup the corresponding %fp(n) *)
213 :     fun st(STACK{st, sp, ...}, n) = A.sub(st, !sp - n)
214 :    
215 :     (* Given %fp(n), lookup the corresponding %st(n) *)
216 : leunga 744 fun fp(STACK{fp, sp, ...}, n) = !sp - A.sub(fp, n)
217 : leunga 731
218 :     fun stackToString stack =
219 :     let val depth = depth stack
220 :     fun f i = if i >= depth then " ]"
221 : leunga 744 else "%st("^i2s i^")=%f"^i2s(st(stack,i))^" "^f(i+1)
222 : leunga 731 in "[ "^f 0 end
223 :    
224 :     fun clear(STACK{st, fp, sp, ...}) =
225 :     (sp := ~1; A.modify(fn _ => ~1) st; A.modify(fn _ => 16) fp)
226 :    
227 :     (* Set %st(n) := %f *)
228 :     fun set(STACK{st, fp, sp, ...}, n, f) =
229 :     (A.update(st, !sp - n, f);
230 : leunga 744 if f >= 0 then A.update(fp, f, !sp - n) else ()
231 : leunga 731 )
232 :    
233 :     (* Pop one entry *)
234 :     fun pop(STACK{sp, st, fp, ...}) = sp := !sp - 1
235 :    
236 :     (* Push %fp(f) onto %st(0) *)
237 :     fun push(stack as STACK{sp, ...}, f) = (sp := !sp + 1; set(stack, 0, f))
238 :    
239 :     (* Exchange the contents of %st(m) and %st(n) *)
240 :     fun xch(stack, m, n) =
241 :     let val f_m = st(stack, m)
242 :     val f_n = st(stack, n)
243 :     in set(stack, m, f_n);
244 :     set(stack, n, f_m)
245 :     end
246 :    
247 : george 889 fun kill(STACK{fp, ...}, f) = A.update(fp, CB.registerNum f, 16)
248 : leunga 731
249 :     fun equal(st1, st2) =
250 :     let val m = depth st1
251 :     val n = depth st2
252 :     fun loop i =
253 :     i >= m orelse (st(st1, i) = st(st2, i) andalso loop(i+1))
254 :     in m = n andalso loop(0)
255 :     end
256 :    
257 :     end (* struct *)
258 :    
259 :     (*-----------------------------------------------------------------------
260 :     * Module to handle forward propagation.
261 :     * Forward propagation does the following:
262 :     * Given an instruction
263 :     * fmove mem, %fp(n)
264 :     * We delay the generation of the load until the first use of %fp(n),
265 :     * which we can further optimize by folding the load into the operand
266 :     * of the instruction, if it is the last use of this operand.
267 :     * If %fp(n) is dead then no load is necessary.
268 :     * Of course, we have to be careful whenever we encounter other
269 :     * instruction with a write.
270 :     *-----------------------------------------------------------------------*)
271 : leunga 744 (*
272 : leunga 731 structure ForwardPropagation :>
273 :     sig
274 :     type readbuffer
275 :     val create : ST.stack -> readbuffer
276 :     val load : readbuffer * C.cell * I.fsize * I.ea -> unit
277 :     val getreg : readbuffer * bool * C.cell * I.instruction list ->
278 :     I.operand * I.instruction list
279 :     val flush : readbuffer * I.instruction list -> I.instruction list
280 :     end =
281 :     struct
282 :    
283 :     datatype readbuffer =
284 :     READ of { stack : ST.stack,
285 :     loads : (I.fsize * I.ea) option A.array,
286 :     pending : int ref
287 :     }
288 :    
289 :     fun create stack =
290 :     READ{stack =stack,
291 :     loads =A.array(8, NONE),
292 :     pending =ref 0
293 :     }
294 :    
295 :     fun load(READ{pending, loads, ...}, fd, fsize, mem) =
296 : leunga 744 (A.update(loads, fd, SOME(fsize, mem));
297 : leunga 731 pending := !pending + 1
298 :     )
299 :    
300 :     (* Extract the operand for a register
301 :     * If it has a delayed load associated with it then
302 :     * we perform the load at this time.
303 :     *)
304 :     fun getreg(READ{pending, loads, stack, ...}, isLastUse, fs, code) =
305 : leunga 744 case A.sub(loads, fs) of
306 : leunga 731 NONE =>
307 :     let val n = ST.st(stack, fs)
308 :     in if isLastUse
309 :     then (ST n, code)
310 :     else let val code = I.FLDL(ST n)::code
311 :     in ST.push(stack, fs); (ST0, code)
312 :     end
313 :     end
314 :     | SOME(fsize, mem) =>
315 :     let val code = FLD(fsize, mem)::code
316 :     in A.update(loads, fs, NONE); (* delete load *)
317 :     pending := !pending - 1;
318 :     ST.push(stack, fs); (* fs is now in place *)
319 :     (ST0, code)
320 :     end
321 :    
322 :     (* Extract a binary operand.
323 :     * We'll try to fold this into the operand
324 :     *)
325 :     fun getopnd(READ{pending, loads, stack,...}, isLastUse, I.FPR fs, code) =
326 : leunga 744 (case A.sub(loads, fs) of
327 : leunga 731 NONE =>
328 :     let val n = ST.st(stack, fs)
329 :     in if isLastUse fs (* regmap XXX *)
330 :     then (ST n, code)
331 :     else let val code = I.FLDL(ST n)::code
332 :     in ST.push(stack, fs); (ST0, code)
333 :     end
334 :     end
335 :     | SOME(fsize, mem) =>
336 :     (A.update(loads, fs, NONE); (* delete load *)
337 :     pending := !pending - 1;
338 :     if isLastUse fs then (mem, code)
339 :     else let val code = FLD(fsize, mem)::code
340 :     in ST.push(stack, fs);
341 :     (ST0, code)
342 :     end
343 :     )
344 :     )
345 :     | getopnd(_, _, ea, code) = (ea, code)
346 :    
347 :     fun flush(READ{pending=ref 0,...}, code) = code
348 :    
349 :     end (* struct *)
350 : leunga 744 *)
351 : leunga 731
352 :     (*-----------------------------------------------------------------------
353 :     * Module to handle delayed stores.
354 :     * Delayed store does the following:
355 :     * Given an instruction
356 :     * fstore %fp(n), %mem
357 :     * We delay the generation of the store until necessary.
358 :     * This gives us an opportunity to rearrange the order of the stores
359 :     * to eliminate unnecessary fxch.
360 :     *-----------------------------------------------------------------------*)
361 : leunga 744 (*
362 : leunga 731 structure DelayStore :>
363 :     sig
364 :     type writebuffer
365 :     val create : ST.stack -> writebuffer
366 :     val flush : writebuffer * I.instruction list -> I.instruction list
367 :     end =
368 :     struct
369 :     datatype writebuffer =
370 :     WRITE of { front : (I.ea * C.cell) list ref,
371 :     back : (I.ea * C.cell) list ref,
372 :     stack : ST.stack,
373 :     pending : int ref
374 :     }
375 :     fun create stack = WRITE{front=ref [], back=ref [],
376 :     stack=stack, pending=ref 0}
377 :     fun flush(WRITE{pending=ref 0,...}, code) = code
378 :     end (* struct *)
379 : leunga 744 *)
380 : leunga 731
381 :     (*-----------------------------------------------------------------------
382 :     * Main routine.
383 :     *
384 :     * Algorithm:
385 :     * 1. Perform liveness analysis.
386 :     * 2. For each fp register, mark all its last use point(s).
387 :     * Registers are popped at their last uses.
388 :     * 3. Rewrite the instructions basic block by basic block.
389 :     * 4. Insert shuffle code at basic block boundaries.
390 :     * When necessary, split critical edges.
391 :     * 5. Sacrifice a goat to make sure things don't go wrong.
392 :     *-----------------------------------------------------------------------*)
393 : leunga 744 fun run(cluster as F.CLUSTER{blocks, blkCounter, ...}) =
394 : george 889 let val getCell = C.CellSet.get CB.FP (*extract the fp component of cellset*)
395 : leunga 731
396 : leunga 744 val stTable = A.tabulate(8, fn n => I.ST(C.ST n))
397 :    
398 :     fun ST n = (if sanityCheck andalso (n < 0 orelse n >= 8) then
399 :     pr("WARNING BAD %st("^i2s n^")\n")
400 :     else ();
401 :     A.sub(stTable, n)
402 :     )
403 :    
404 :     fun FXCH n = I.FXCH{opnd=C.ST n}
405 :    
406 :     val ST0 = ST 0
407 :     val ST1 = ST 1
408 :     val POP_ST = I.FSTPL ST0 (* Instruction to pop an entry *)
409 :    
410 : leunga 731 (* Dump instructions *)
411 :     fun dump instrs =
412 :     let val Asm.S.STREAM{emit, ...} =
413 :     AsmStream.withStream (!MLRiscControl.debug_stream)
414 :     Asm.makeStream []
415 :     in app emit (rev instrs)
416 :     end
417 :    
418 :     (* Create assembly of instruction *)
419 :     fun assemble instr =
420 :     let val buf = StringOutStream.mkStreamBuf()
421 :     val stream = StringOutStream.openStringOut buf
422 :     val Asm.S.STREAM{emit, ...} =
423 :     AsmStream.withStream stream Asm.makeStream []
424 : leunga 744 val _ = emit instr
425 : leunga 731 val s = StringOutStream.getString buf
426 :     val n = String.size s
427 :     in if n = 0 then s else String.substring(s, 0, n - 1)
428 :     end
429 :    
430 :     (*------------------------------------------------------------------
431 :     * Perform liveness analysis on the floating point variables
432 :     * P.S. I'm glad I didn't throw away the code liveness code.
433 :     *------------------------------------------------------------------*)
434 : george 889 val defUse = P.defUse CB.FP (* def/use properties *)
435 : leunga 731 val _ = Liveness.liveness{defUse=defUse,
436 : george 889 updateCell=C.CellSet.update CB.FP,
437 : leunga 731 getCell=getCell,
438 :     blocks=blocks
439 :     }
440 :     (*------------------------------------------------------------------
441 :     * Scan the instructions compute the last uses and dead definitions
442 :     * at each program point. Ideally we can do this during the code
443 :     * rewriting phase. But that's probably too error prone for now.
444 :     *------------------------------------------------------------------*)
445 :     fun computeLastUse(blknum, insns, liveOut) =
446 :     let fun scan([], _, lastUse) = lastUse
447 :     | scan(i::instrs, live, lastUse) =
448 :     let val (d, u) = defUse i
449 : leunga 744 val d = SL.uniq(d)(* definitions *)
450 :     val u = SL.uniq(u)(* uses *)
451 :     val dead = SL.return(SL.difference(d, live))
452 : leunga 731 val live = SL.difference(live, d)
453 : leunga 744 val last = SL.return(SL.difference(u, live))
454 :     val live = SL.union(live, u)
455 : leunga 731 val _ =
456 :     if debug andalso debugLiveness then
457 :     (case last of
458 :     [] => ()
459 :     | _ => print(assemble i^"\tlast use="^
460 :     fregsToString last^"\n")
461 :     )
462 :     else ()
463 :     in scan(instrs, live, (last,dead)::lastUse)
464 :     end
465 : leunga 744 val liveOutSet = SL.uniq(getCell (!liveOut))
466 : leunga 731 val _ =
467 :     if debug andalso debugLiveness then
468 :     print("LiveOut("^i2s blknum^") = "^
469 : leunga 744 fregsToString(SL.return liveOutSet)^"\n")
470 : leunga 731 else ()
471 :     in scan(!insns, liveOutSet, [])
472 :     end
473 :    
474 :     (*------------------------------------------------------------------
475 :     * Temporary work space
476 :     *------------------------------------------------------------------*)
477 : george 889 val {high, low} = C.cellRange CB.FP
478 : leunga 731 val n = high+1
479 :     val lastUseTbl = A.array(n,~1) (* table for marking last uses *)
480 :     val useTbl = A.array(n,~1) (* table for marking uses *)
481 :    
482 :     (* %fp register bindings before and after a basic block *)
483 :     val bindingsIn = A.array(!blkCounter, NONE)
484 :     val bindingsOut = A.array(!blkCounter, NONE)
485 :     val stampCounter = ref ~4096
486 :    
487 :     (* Edges that need splitting *)
488 :     exception NoEdgesToSplit
489 : blume 733 val edgesToSplit = IntHashTable.mkTable(32, NoEdgesToSplit)
490 :     val addEdgesToSplit = IntHashTable.insert edgesToSplit
491 : leunga 744 fun lookupEdgesToSplit b =
492 :     getOpt(IntHashTable.find edgesToSplit b, [])
493 : leunga 731
494 :     (*------------------------------------------------------------------
495 :     * Code for handling bindings between basic block
496 :     *------------------------------------------------------------------*)
497 :    
498 :     fun splitEdge(targetId, source, target) =
499 :     (if debug andalso !traceOn then
500 :     pr("SPLITTING "^i2s(blknumOf source)^"->"^
501 :     i2s(blknumOf target)^"\n")
502 :     else ();
503 :     addEdgesToSplit(targetId,
504 :     (source,target)::lookupEdgesToSplit targetId)
505 :     )
506 :    
507 :     (* Given a cellset, return a sorted and unique
508 :     * list of elements with all non-physical registers removed
509 :     *)
510 :     fun removeNonPhysical cellSet =
511 : leunga 744 let fun loop([], S) = SL.return(SL.uniq S)
512 : leunga 731 | loop(f::fs, S) =
513 : george 889 let val fx = CB.registerNum f
514 : leunga 744 in loop(fs,if fx <= 7 then f::S else S)
515 : leunga 731 end
516 :     in loop(getCell(!cellSet), [])
517 :     end
518 :    
519 :     (* Given a sorted and unique list of registers,
520 :     * Return a stack with these elements
521 :     *)
522 :     fun newStack fregs =
523 :     let val stack = ST.create()
524 : george 889 in app (fn f => ST.push(stack, CB.registerNum f)) (rev fregs);
525 : leunga 731 stack
526 :     end
527 :    
528 :     (*
529 :     * This function looks at all the entries on the stack,
530 :     * and generate code to deallocate all the dead values.
531 :     * The stack is updated.
532 :     *)
533 :     fun removeDeadValues(stack, liveSet, code) =
534 :     let val stamp = !stampCounter
535 :     val _ = stampCounter := !stampCounter - 1
536 :     fun markLive [] = ()
537 : leunga 744 | markLive(r::rs) =
538 : george 889 (A.update(useTbl, CB.registerNum r, stamp); markLive rs)
539 : leunga 731 fun isLive f = A.sub(useTbl, f) = stamp
540 :     fun loop(i, depth, code) =
541 :     if i >= depth then code else
542 :     let val f = ST.st(stack, i)
543 :     in if isLive f (* live? *)
544 :     then loop(i+1, depth, code)
545 :     else
546 :     (if debug andalso !traceOn then
547 : leunga 744 pr("REMOVING %f"^i2s f^" in %st("^i2s i^")"^
548 : leunga 731 " current stack="^ST.stackToString stack^"\n")
549 :     else ();
550 :     if i = 0 then
551 :     (ST.pop stack;
552 :     loop(0, depth-1, POP_ST::code)
553 :     )
554 :     else (ST.xch(stack,0,i);
555 :     ST.pop stack;
556 :     loop(0, depth-1, I.FSTPL(ST i)::code)
557 :     )
558 :     )
559 :     end
560 :     in markLive liveSet;
561 :     loop(0, ST.depth stack, code)
562 :     end
563 :    
564 :    
565 :     (*------------------------------------------------------------------
566 :     * Given two stacks, source and target, where the bindings are
567 :     * permutation of each other, generate the minimal number of
568 :     * fxchs to match source with target.
569 :     *
570 :     * Important: source and target MUST be permutations of each other.
571 :     *
572 :     * Essentially, we first decompose the permutation into cycles,
573 :     * and process each cycle.
574 :     *------------------------------------------------------------------*)
575 :     fun shuffle(source, target, code) =
576 :     let val stamp = !stampCounter
577 :     val _ = stampCounter := !stampCounter - 1
578 :     val permutation = lastUseTbl (* reuse the space *)
579 :    
580 :     val _ = if debug andalso !traceOn then
581 :     pr("SHUFFLE "^ST.stackToString source^
582 :     "->"^ST.stackToString target^"\n")
583 :     else ()
584 :    
585 :     (* Compute the initial permutation *)
586 :     val n = ST.depth source
587 :     fun computeInitialPermutation(i) =
588 :     if i >= n
589 :     then ()
590 :     else let val f = ST.st(source, i)
591 :     val j = ST.fp(target, f)
592 :     in A.update(permutation, j, i);
593 :     computeInitialPermutation(i+1)
594 :     end
595 :     val _ = computeInitialPermutation 0
596 :    
597 :     (* Decompose the initial permutation into cycles.
598 :     * The cycle involving 0 is treated specially.
599 :     *)
600 :     val visited = useTbl
601 :     fun isVisited i = A.sub(visited,i) = stamp
602 :     fun markAsVisited i = A.update(visited,i,stamp)
603 :     fun decomposeCycles(i, cycle0, cycles) =
604 :     if i >= n then (cycle0, cycles)
605 :     else if isVisited i orelse
606 :     A.sub(permutation, i) = i (* trivial cycle *)
607 :     then decomposeCycles(i+1, cycle0, cycles)
608 :     else let fun makeCycle(j, cycle, zero) =
609 :     let val k = A.sub(permutation, j)
610 :     val cycle = j::cycle
611 :     val zero = zero orelse j = 0
612 :     in markAsVisited j;
613 :     if k = i then (cycle, zero)
614 :     else makeCycle(k, cycle, zero)
615 :     end
616 :     val (cycle, zero) = makeCycle(i, [], false)
617 :     in if zero then decomposeCycles(i+1, [cycle], cycles)
618 :     else decomposeCycles(i+1, cycle0, cycle::cycles)
619 :     end
620 :    
621 :     val (cycle0, cycles) = decomposeCycles(0, [], [])
622 :    
623 :     (*
624 :     * Generate shuffle for a cycle that does not involve 0.
625 :     * Given a cycle (c_1, ..., c_k), we generate this code:
626 :     * fxch %st(c_1),
627 :     * fxch %st(c_2),
628 :     * ...
629 :     * fxch %st(c_k),
630 :     * fxch %st(c_1)
631 :     *)
632 :     fun genxch([], code) = code
633 :     | genxch(c::cs, code) = genxch(cs, FXCH c::code)
634 :    
635 :     fun gen([], code) = error "shuffle.gen"
636 :     | gen(cs as (c::_), code) = FXCH c::genxch(cs, code)
637 :    
638 :     (*
639 :     * Generate shuffle for a cycle that involves 0.
640 :     * Given a cycle (c_1,...,c_k) we first shuffle this to
641 :     * an equivalent cycle (c_1, ..., c_k) where c'_k = 0,
642 :     * then we generate this code:
643 :     * fxch %st(c'_1),
644 :     * fxch %st(c'_2),
645 :     * ...
646 :     * fxch %st(c'_{k-1}),
647 :     *)
648 :     fun gen0([], code) = error "shuffle.gen0"
649 :     | gen0(cs, code) =
650 :     let fun rearrange(0::cs, cs') = cs@rev cs'
651 :     | rearrange(c::cs, cs') = rearrange(cs, c::cs')
652 :     | rearrange([], _) = error "shuffle.rearrange"
653 :     val cs = rearrange(cs, [])
654 :     in genxch(cs, code)
655 :     end
656 :    
657 :     (*
658 :     * Generate code. Must process the non-zero cycles first.
659 :     *)
660 :     val code = List.foldr gen code cycles
661 :     val code = List.foldr gen0 code cycle0
662 :     in code
663 :     end (* shuffle *)
664 :    
665 :     (*------------------------------------------------------------------
666 :     * Insert code at the end of a basic block.
667 :     * Make sure we put code in front of a transfer instruction
668 :     *------------------------------------------------------------------*)
669 :     fun insertAtEnd(insns, code) =
670 :     (case insns of
671 :     [] => code
672 :     | jmp::rest =>
673 :     if P.instrKind jmp = P.IK_JUMP then
674 :     jmp::code@rest
675 :     else
676 :     code@insns
677 :     )
678 :    
679 :     (*------------------------------------------------------------------
680 :     * Magic for inserting shuffle code at the end of a basic block
681 :     *------------------------------------------------------------------*)
682 :     fun shuffleOut(stackOut, insns, b, block, succ, liveOut) =
683 :     let val liveOut = removeNonPhysical liveOut
684 :    
685 :     (* Generate code that remove unnecessary values *)
686 :     val code = removeDeadValues(stackOut, liveOut, [])
687 :    
688 :     fun done(stackOut, insns, code) =
689 :     (A.update(bindingsOut,b,SOME stackOut);
690 :     insertAtEnd(insns, code)
691 :     )
692 :    
693 :     (* Generate code that shuffle values from source to target *)
694 :     fun match(source, target) =
695 :     done(target, insns, shuffle(source, target, []))
696 :    
697 :     (* Generate code that shuffle values from source to liveOut *)
698 :     fun matchLiveOut() =
699 :     case liveOut of
700 :     [] => done(stackOut, insns, code)
701 :     | _ => match(stackOut, newStack liveOut)
702 :    
703 :     (* With multiple successors, find out which one we
704 :     * should connect to. Choose the one from the block that
705 :     * follows from this one, if that exists, or else choose
706 :     * from the edge with the highest frequency.
707 :     *)
708 :     fun find([], _, id, best) = (id, best)
709 :     | find((F.BBLOCK{blknum, insns, ...},freq)::edges,
710 :     highestFreq, id, best) =
711 :     if blknum = b+1 then (blknum, A.sub(bindingsIn, blknum))
712 :     else (case A.sub(bindingsIn, blknum) of
713 :     NONE => find(edges, highestFreq, id, best)
714 :     | this as SOME stack =>
715 :     if highestFreq < !freq then
716 :     find(edges, !freq, blknum, this)
717 :     else
718 :     find(edges, highestFreq, id, best)
719 :     )
720 :     | find(_::edges, highestFreq, id, best) =
721 :     find(edges, highestFreq, id, best)
722 :    
723 :     fun splitAllEdgesExcept([], succBlock) = ()
724 :     | splitAllEdgesExcept((next as F.BBLOCK{blknum, ...},_)::edges,
725 :     succBlock) =
726 :     (if blknum <> succBlock andalso blknum <= b
727 :     then splitEdge(blknum,block,next) else ();
728 :     splitAllEdgesExcept(edges, succBlock)
729 :     )
730 :     | splitAllEdgesExcept((F.EXIT _,_)::_, _) =
731 :     error "can't split exit edge!"
732 :     | splitAllEdgesExcept(_::edges, succBlock) =
733 :     splitAllEdgesExcept(edges, succBlock)
734 :    
735 :     in case !succ of
736 :     [] => matchLiveOut()
737 :     | [(F.EXIT _,_)] => matchLiveOut()
738 :     | succ =>
739 :     (* Just one successor;
740 :     * try to match the bindings of the successor if it exist.
741 :     *)
742 :     let val (succBlock, target) = find(succ, ~1, ~1, NONE)
743 :     in splitAllEdgesExcept(succ, succBlock);
744 :     case target of
745 :     SOME stackIn => match(stackOut, stackIn)
746 :     | NONE => done(stackOut,insns,code)
747 :     end
748 :     end (* shuffleOut *)
749 :    
750 :     (*------------------------------------------------------------------
751 :     * Compute the initial fp stack bindings for basic block b.
752 :     *------------------------------------------------------------------*)
753 :     fun shuffleIn(b, block, pred, liveIn) =
754 :     let val liveInSet = removeNonPhysical liveIn
755 :    
756 :     (* With multiple predecessors, find out which one we
757 :     * should connect to. Choose the one from the block that
758 :     * falls into this one, if that exists, or else choose
759 :     * from the edge with the highest frequency.
760 :     *)
761 :     fun find([], _, best) = best
762 :     | find((F.BBLOCK{blknum, insns, ...},freq)::edges,
763 :     highestFreq, best) =
764 :     (case A.sub(bindingsOut, blknum) of
765 :     NONE => find(edges, highestFreq, best)
766 :     | this as SOME stack =>
767 :     if blknum = b-1 then (* falls into b *)
768 :     this
769 :     else if highestFreq < !freq then
770 :     find(edges, !freq, this)
771 :     else
772 :     find(edges, highestFreq, best)
773 :     )
774 :     | find(_::edges, highestFreq, best) =
775 :     find(edges, highestFreq, best)
776 :    
777 :     fun splitAllDoneEdges [] = ()
778 :     | splitAllDoneEdges
779 :     ((prev as F.BBLOCK{blknum, ...},_)::edges) =
780 :     (if blknum < b
781 :     then splitEdge(b,prev,block) else ();
782 :     splitAllDoneEdges edges
783 :     )
784 :     | splitAllDoneEdges(_::edges) = splitAllDoneEdges edges
785 :    
786 : leunga 744 (* The initial stack bindings are determined by the live set.
787 : leunga 731 * No compensation code is needed.
788 :     *)
789 :     fun fromLiveIn() =
790 :     let val stackIn =
791 :     case liveInSet of
792 :     [] => ST.stack0
793 :     | _ =>
794 : leunga 744 (pr("liveIn="^C.CellSet.toString (!liveIn)^"\n");
795 : leunga 731 newStack liveInSet
796 :     )
797 :     val stackOut = ST.copy stackIn
798 :     in (stackIn, stackOut, [])
799 :     end
800 :    
801 :     val (stackIn, stackOut, code) =
802 :     case find(!pred, ~1, NONE) of
803 :     NONE => (splitAllDoneEdges(!pred); fromLiveIn())
804 :     | SOME stackIn' =>
805 :     (case !pred of
806 :     [_] => (* one predecessor *)
807 :     (* Use the bindings as from the previous block
808 :     * We first have to deallocate all unused values.
809 :     *)
810 :     let val stackOut = ST.copy stackIn'
811 :     (* Clean the stack of unused entries *)
812 :     val code = removeDeadValues(stackOut, liveInSet, [])
813 :     in (stackIn', stackOut, code) end
814 :     | _ => (* more than one predecessors *)
815 :     let val stackIn = ST.copy stackIn'
816 :     val code = removeDeadValues(stackIn, liveInSet, [])
817 :     val stackOut = ST.copy stackIn
818 :     in (* If we have to generate code to deallocate
819 :     * the stack then we have split the edge.
820 :     *)
821 :     case code of
822 :     [] => ()
823 :     | _ => splitAllDoneEdges(!pred);
824 :     (stackIn, stackOut, [])
825 :     end
826 :     )
827 :     in A.update(bindingsIn, b, SOME stackIn);
828 :     A.update(bindingsOut, b, SOME stackOut);
829 :     (stackIn, stackOut, code)
830 :     end
831 :    
832 :     (*------------------------------------------------------------------
833 :     * Code for patching up critical edges.
834 :     * The trick is finding a good place to insert the critical edges.
835 :     * The cluster representation is very hard to work with.
836 :     *------------------------------------------------------------------*)
837 :     fun repairCriticalEdges
838 :     (cluster as F.CLUSTER{blocks, entry, exit, annotations,
839 : leunga 744 blkCounter}) =
840 : leunga 731 let (* Data structure for recording critical edge splitting info *)
841 :     datatype compensationCode =
842 :     NEWEDGE of
843 :     {label:L.label, (* label of new block *)
844 :     preds:F.block list ref, (* predecessors *)
845 :     code:I.instruction list,(* code *)
846 :     comment:an
847 :     }
848 :    
849 :     val cleanup = [#create MLRiscAnnotations.COMMENT "cleanup edge"]
850 :     val critical = [#create MLRiscAnnotations.COMMENT "critical edge"]
851 :    
852 :     exception Nothing
853 :    
854 :     (* Repair code table; mapping from block id -> compensation code *)
855 : blume 733 val repairCodeTable = IntHashTable.mkTable(32, Nothing)
856 :     val addRepairCode = IntHashTable.insert repairCodeTable
857 : leunga 744 fun lookupRepairCode b =
858 :     getOpt(IntHashTable.find repairCodeTable b,[])
859 : leunga 731
860 :     (* Repair code table; mapping from block id -> compensation code
861 :     * These must be relocated...
862 :     *)
863 : blume 733 val repairCodeTable' = IntHashTable.mkTable(32, Nothing)
864 :     val addRepairCode' = IntHashTable.insert repairCodeTable'
865 : leunga 744 fun lookupRepairCode' b =
866 :     getOpt(IntHashTable.find repairCodeTable' b,[])
867 : leunga 731
868 :     (* Mapping from block id -> labels *)
869 : blume 733 val labelTable = IntHashTable.mkTable(32, Nothing)
870 :     val addLabel = IntHashTable.insert labelTable
871 : leunga 744 fun lookupLabel b = getOpt(IntHashTable.find labelTable b, [])
872 : leunga 731
873 :     (* Scan code and insert labels *)
874 :     fun insertLabels([], []) = ()
875 :     | insertLabels(labels, []) = error "orphan labels"
876 :     | insertLabels(labels, F.LABEL l::blocks) =
877 :     insertLabels(l::labels, blocks)
878 :     | insertLabels(labels, (b as F.BBLOCK{blknum,...})::blocks) =
879 :     (addLabel(blknum, labels); insertLabels([], blocks))
880 :     | insertLabels(_, _::blocks) = insertLabels([], blocks)
881 :     (* skip labels to pseudo ops *)
882 :    
883 :     val _ = insertLabels([], blocks)
884 :    
885 :     (* Does the block falls thru from the previous block? *)
886 :     fun isFallsThru(F.BBLOCK{pred, blknum=j, ...}) =
887 :     let fun loop((F.BBLOCK{blknum=i,...},_)::rest) =
888 :     i+1 = j orelse loop rest
889 :     | loop((F.ENTRY _,_)::_) = true
890 :     | loop(_) = false
891 :     in loop(!pred)
892 :     end
893 :     | isFallsThru _ = false
894 :    
895 :     (* Create jump instruction to a block *)
896 :     fun jump(F.BBLOCK{blknum,...}) =
897 :     (case lookupLabel blknum of
898 :     [] => error "no label to target of critical edge!"
899 :     | l::_ => P.jump l
900 :     )
901 :     | jump _ = error "jump"
902 :    
903 :     (*
904 :     * Special case: target block has stack depth of 0.
905 :     * Just generate code that pop entries from the sources.
906 :     * To make things interesting, we try to share code among
907 :     * all the critical edges.
908 :     *)
909 :     fun genPoppingCode(_, []) = ()
910 :     | genPoppingCode(targetId, edges as (_,target)::_) =
911 :     let val preds =
912 :     map (fn (source, _) =>
913 :     let val sourceId = blknumOf source
914 :     val SOME stackOut = A.sub(bindingsOut,sourceId)
915 :     val n = ST.depth stackOut
916 :     in (n, source) end
917 :     ) edges
918 :     (* Ordered by increasing stack height *)
919 :     val preds =
920 :     ListMergeSort.sort (fn ((n,_),(m,_)) => n > m) preds
921 :    
922 :     val relocate = isFallsThru target
923 :    
924 :     fun pop(0, code) = code
925 :     | pop(n, code) = pop(n-1,POP_ST::code)
926 :    
927 :     fun makeCode(popCount, rest) =
928 :     let val code = pop(popCount, [])
929 :     in case rest of
930 :     [] => if relocate then jump target::code
931 :     else code
932 :     | _ => code
933 :     end
934 :    
935 :     (* Generate code, share code between edges that
936 :     * have to pop the same number of elements
937 :     *)
938 :     fun gen([], h, code) = code
939 :     | gen((n,b)::rest, _, []) =
940 :     gen(rest, n,
941 :     [NEWEDGE{label=L.newLabel "",preds=ref [b],
942 :     code=makeCode(n,rest), comment=cleanup}])
943 :     | gen((n,b)::rest, h, all as (NEWEDGE{preds, ...}::_)) =
944 :     gen(rest,h,
945 :     if n = h then
946 :     (preds := b :: !preds; all)
947 :     else
948 :     NEWEDGE{label=L.newLabel "", preds=ref [b],
949 :     code=makeCode(n-h,rest),
950 :     comment=cleanup}::all
951 :     )
952 :     val repairCode = gen(preds, 0, [])
953 :     in (if relocate then addRepairCode' else addRepairCode)
954 :     (targetId, repairCode)
955 :     end
956 :    
957 :     (* The general case:
958 :     * Remove dead values, then
959 :     * Shuffle.
960 :     *)
961 :     fun genRepairCode(targetId, stackIn, edges) =
962 :     let val repairList = ref []
963 :     val repairCount = ref 0
964 :     val SOME stackIn = A.sub(bindingsIn, targetId)
965 :     fun repair(source, target) =
966 :     let val F.BBLOCK{blknum=sourceId, ...} = source
967 :     val SOME stackOut' = A.sub(bindingsOut, sourceId)
968 :     fun createNewRepairEdge() =
969 :     let val stackOut = ST.copy stackOut'
970 :     val F.BBLOCK{liveIn, ...} = target
971 :     val liveInSet = removeNonPhysical liveIn
972 :     val _ =
973 :     if debug then
974 :     pr("LiveIn = "^
975 : leunga 744 C.CellSet.toString (!liveIn)^
976 : leunga 731 "\n")
977 :     else ()
978 :    
979 :     (* deallocate unused values *)
980 :     val code = removeDeadValues(stackOut, liveInSet, [])
981 :     (* shuffle values *)
982 :     val code = shuffle(stackOut, stackIn, code)
983 :     fun addNewEdge() =
984 :     let (* Do we need to relocate this block? *)
985 :     val relocate = !repairCount > 0 orelse
986 :     isFallsThru target andalso
987 :     sourceId + 1 <> targetId
988 :    
989 :     (* add a jump to the target block *)
990 :     val code = if relocate then jump target::code
991 :     else code
992 :    
993 :     val repairCode =
994 :     NEWEDGE{label=L.newLabel "",
995 :     preds=ref [source],
996 :     code=code, comment=critical}
997 :     in repairCount := !repairCount + 1;
998 :     repairList := (repairCode, stackOut')
999 :     :: !repairList;
1000 :     if relocate then
1001 :     addRepairCode'(targetId,
1002 :     repairCode::lookupRepairCode' targetId)
1003 :     else
1004 :     addRepairCode(targetId,
1005 :     repairCode::lookupRepairCode targetId)
1006 :     end
1007 :     in case source of
1008 :     F.BBLOCK{succ=ref [(F.BBLOCK{blknum=j,...},_)],
1009 :     insns, ...} =>
1010 :     if j = targetId then (*insert code at predecessor*)
1011 :     insns := insertAtEnd(!insns, code)
1012 :     else
1013 :     addNewEdge()
1014 :     | _ => addNewEdge()
1015 :     end
1016 :    
1017 :     fun shareRepairEdge [] = false
1018 :     | shareRepairEdge((NEWEDGE{preds,...},stackOut'')::rest) =
1019 :     if ST.equal(stackOut'', stackOut') then
1020 :     (preds := source :: !preds; true)
1021 :     else shareRepairEdge rest
1022 :    
1023 :     in if shareRepairEdge(!repairList) then ()
1024 :     else createNewRepairEdge()
1025 :     end
1026 :     in app repair edges
1027 :     end
1028 :    
1029 :     (*
1030 :     * Code to split critical edges entering block targetId
1031 :     *)
1032 :     fun split(targetId, edges) =
1033 :     let val SOME stackIn = A.sub(bindingsIn,targetId)
1034 :     fun log(source, target) =
1035 :     let val s = blknumOf source
1036 :     val t = blknumOf target
1037 :     val SOME stackOut = A.sub(bindingsOut,s)
1038 :     in pr("SPLIT "^i2s s^"->"^i2s t^" "^
1039 :     ST.stackToString stackOut^"->"^
1040 :     ST.stackToString stackIn^"\n")
1041 :     end
1042 :     val _ = if debug andalso !traceOn then app log edges else ()
1043 :     in if ST.depth stackIn = 0 then genPoppingCode(targetId, edges)
1044 :     else genRepairCode(targetId, stackIn, edges)
1045 :     end
1046 :    
1047 :     (* Renumber all the blocks and insert compensation code at the
1048 :     * right places.
1049 :     *)
1050 :     fun renumberBlocks() =
1051 : blume 733 let val labelTbl = IntHashTable.mkTable(32, Nothing)
1052 :     val addLabel = IntHashTable.insert labelTbl
1053 : leunga 731 fun insertLabel(L.Label{id, ...},block) = addLabel(id, block)
1054 :    
1055 :     val entries = ref []
1056 :     val exits = ref []
1057 :    
1058 :     (* retarget the branch of block *)
1059 : leunga 775 fun retarget(I.JMP(I.ImmedLabel(T.LABEL _), [_])::rest, l) =
1060 :     I.JMP(I.ImmedLabel(T.LABEL l), [l])::rest
1061 :     | retarget(I.JCC{cond,opnd=I.ImmedLabel(T.LABEL _)}::rest,l)=
1062 :     I.JCC{cond=cond,opnd=I.ImmedLabel(T.LABEL l)}::rest
1063 : leunga 731 | retarget(_,l) = error "retarget"
1064 :    
1065 :     (* Translate repair code to actual block *)
1066 :     fun transRepair(n, [], blocks) = (n, blocks)
1067 :     | transRepair(n, NEWEDGE{label, preds, code, comment}::rest,
1068 :     blocks) =
1069 :     let val blocks = F.LABEL label::blocks
1070 :     val this = F.BBLOCK{blknum=n, freq=ref 0,
1071 :     pred=ref [],
1072 :     succ=ref [],
1073 :     annotations=ref comment,
1074 :     liveIn=ref C.empty,
1075 :     liveOut=ref C.empty,
1076 :     insns=ref code}
1077 :     fun retargetBlock(F.BBLOCK{insns, ...}) =
1078 :     insns := retarget(!insns, label)
1079 :     | retargetBlock _ = ()
1080 :     in if debug andalso !traceOn then
1081 :     pr("Inserting critical edge at block "^i2s n^" "^
1082 :     L.nameOf label^"\n")
1083 :     else ();
1084 :     insertLabel(label, this);
1085 :     app retargetBlock (!preds);
1086 :     transRepair(n+1, rest, this::blocks)
1087 :     end
1088 :    
1089 :     fun renumber(n, [], pseudoOps, repairCode', blocks) =
1090 :     let val (n, blocks) = transRepair(n, repairCode', blocks)
1091 :     val blocks = pseudoOps @ blocks
1092 :     in (n, rev blocks)
1093 :     end
1094 :     | renumber(n, (block as
1095 :     F.BBLOCK{blknum,annotations,insns,freq,
1096 :     pred,succ,liveIn,liveOut,...})::rest,
1097 :     pseudoOps, repairCode', blocks) =
1098 :     let (* If we have outstanding repair code and this is
1099 :     * NOT a fallsthru entry, then insert them here.
1100 :     *)
1101 :     val (n, blocks, repairCode') =
1102 :     case repairCode' of
1103 :     [] => (n, blocks, [])
1104 :     | _ => if isFallsThru block then
1105 :     (n, blocks, repairCode')
1106 :     else
1107 :     let val (n, blocks) =
1108 :     transRepair(n, repairCode', blocks)
1109 :     in (n, blocks, [])
1110 :     end
1111 :    
1112 :     (* Insert non-relocatable repair code *)
1113 :     val repairCode = lookupRepairCode blknum
1114 :     val (n, blocks) = transRepair(n, repairCode, blocks)
1115 :    
1116 :     (* Create this block *)
1117 :     val this = F.BBLOCK{blknum=n,annotations=annotations,
1118 :     freq=freq, insns=insns,
1119 :     pred=ref [], succ=ref [],
1120 :     liveIn=liveIn,liveOut=liveOut
1121 :     }
1122 :    
1123 :     (* Insert new relocatable repair code *)
1124 :     val repairCode' = repairCode' @
1125 :     lookupRepairCode' blknum
1126 :    
1127 :     (* Insert labels *)
1128 :     fun insertLabels((p as F.LABEL l)::ps) =
1129 :     (insertLabel(l, this); insertLabels ps)
1130 :     | insertLabels(p::ps) = insertLabels ps
1131 :     | insertLabels [] = blocks
1132 :    
1133 :     val _ = insertLabels pseudoOps
1134 :    
1135 :     val blocks = this::pseudoOps @ blocks
1136 :    
1137 :     fun addEntry((F.ENTRY _,w)::_) =
1138 :     entries := (this,w):: !entries
1139 :     | addEntry(_::es) = addEntry es
1140 :     | addEntry [] = ()
1141 :     fun addExit((F.EXIT _,w)::_) =
1142 :     exits := (this,w) :: !exits
1143 :     | addExit(_::es) = addExit es
1144 :     | addExit [] = ()
1145 :    
1146 :     in addEntry(!pred); (* check if this is an entry *)
1147 :     addExit(!succ); (* check if this ia an exit *)
1148 :     renumber(n+1, rest, [], repairCode', blocks)
1149 :     end
1150 :     | renumber(n, p::rest, pseudoOps, repairCode', blocks) =
1151 :     renumber(n, rest, p::pseudoOps, repairCode', blocks)
1152 :    
1153 :     val (n, blocks) = renumber(0, blocks, [], [], [])
1154 :    
1155 :     (* New entry and exits *)
1156 :     val F.ENTRY{freq=entryFreq, ...} = entry
1157 :     val newEntry = F.ENTRY{blknum=n, freq=entryFreq, succ=entries}
1158 :     val n = n+1
1159 :     val F.EXIT{freq=exitFreq, ...} = exit
1160 :     val newExit = F.EXIT{blknum=n, freq=exitFreq, pred=exits}
1161 :     val n = n+1
1162 :    
1163 : leunga 744 val lookupLabel = IntHashTable.find labelTbl
1164 :     val lookupLabel =
1165 :     fn l => case lookupLabel l of
1166 :     SOME b => b
1167 :     | NONE => newExit
1168 : leunga 731
1169 :     fun addPred b (F.BBLOCK{pred, ...},w) = pred := (b,w) :: !pred
1170 :     | addPred b (F.EXIT{pred, ...},w) = pred := (b,w) :: !pred
1171 :     | addPred _ _ = error "addPred"
1172 :    
1173 :     fun adjustSucc(
1174 :     (blk as F.BBLOCK{blknum,insns,succ,pred,...})::rest) =
1175 :     let fun follows(F.LABEL _::rest) = follows rest
1176 :     | follows((b as F.BBLOCK _)::rest) = (b, ref 0)
1177 :     | follows [] = (newExit, ref 0)
1178 :     fun succBlocks([], succ) = succ
1179 :     | succBlocks(P.ESCAPES::targets, succ) =
1180 :     succBlocks(targets, (newExit, ref 0)::succ)
1181 :     | succBlocks(P.FALLTHROUGH::targets, succ) =
1182 :     succBlocks(targets, follows rest::succ)
1183 :     | succBlocks(P.LABELLED(L.Label{id,...})::targets,
1184 :     succ) =
1185 :     succBlocks(targets, (lookupLabel id, ref 0)::succ)
1186 :     fun fallsThru rest = [follows rest]
1187 :     in case !insns of
1188 :     [] => succ := fallsThru rest
1189 :     | jmp::_ =>
1190 :     case P.instrKind jmp of
1191 :     P.IK_JUMP =>
1192 :     succ := succBlocks(P.branchTargets jmp,[])
1193 :     | _ => succ := fallsThru rest;
1194 :     app (addPred blk) (!succ);
1195 :     adjustSucc rest
1196 :     end
1197 :     | adjustSucc(_::rest) = adjustSucc rest
1198 :     | adjustSucc [] = ()
1199 :    
1200 :     val _ = adjustSucc blocks
1201 :     val _ = app (addPred entry) (!entries)
1202 :    
1203 :     in F.CLUSTER{blkCounter=ref n,
1204 :     annotations=annotations,
1205 :     blocks=blocks,
1206 :     entry=newEntry,
1207 :     exit=newExit
1208 :     }
1209 :     end
1210 :    
1211 : leunga 744 in insertLabels([], blocks);
1212 :     IntHashTable.appi split edgesToSplit;
1213 :     renumberBlocks()
1214 : leunga 731 end
1215 :    
1216 :    
1217 :     (*------------------------------------------------------------------
1218 :     * Process all blocks
1219 :     *------------------------------------------------------------------*)
1220 :     fun rewriteAllBlocks
1221 :     (stamp,
1222 :     (block as F.BBLOCK{blknum, insns, liveIn, liveOut,
1223 :     annotations, pred, succ, ...})::rest) =
1224 :     let val stamp = rewrite(stamp, blknum, block,
1225 :     insns, liveIn, liveOut,
1226 :     pred, succ, annotations)
1227 :     in rewriteAllBlocks(stamp+1, rest)
1228 :     end
1229 :     | rewriteAllBlocks(stamp, F.LABEL l::rest) =
1230 :     (if debug andalso !debugOn then pr(Label.nameOf l^":\n") else ();
1231 :     rewriteAllBlocks(stamp, rest)
1232 :     )
1233 :     | rewriteAllBlocks(stamp, _::rest) = rewriteAllBlocks(stamp, rest)
1234 :     | rewriteAllBlocks(stamp, []) = ()
1235 :    
1236 :     (*------------------------------------------------------------------
1237 :     * Translate code within a basic block.
1238 :     * Each instruction is given a unique stamp for identifying last
1239 :     * uses.
1240 :     *------------------------------------------------------------------*)
1241 :     and rewrite(stamp, blknum, block, insns, liveIn, liveOut,
1242 :     pred, succ, annotations) =
1243 :     let val (stackIn, stack, code) = shuffleIn(blknum, block, pred, liveIn)
1244 :    
1245 :     (* Dump instructions when encountering a bug *)
1246 :     fun bug msg =
1247 :     (pr("-------- bug in block "^i2s blknum^" ----\n");
1248 :     dump(!insns);
1249 :     error msg
1250 :     )
1251 :    
1252 :     fun loop(stamp, [], [], code) = (stamp, code)
1253 :     | loop(stamp, instr::rest, (lastUse,dead)::lastUses, code) =
1254 :     let fun mark(tbl, []) = ()
1255 :     | mark(tbl, r::rs) =
1256 : george 889 (A.update(tbl, CB.registerNum r, stamp); mark(tbl, rs))
1257 : leunga 731 in mark(lastUseTbl,lastUse); (* mark all last uses *)
1258 :     trans(stamp, instr, [], rest, dead, lastUses, code)
1259 :     end
1260 :     | loop _ = error "loop"
1261 :    
1262 :     (*
1263 :     * Main routine that does the actual translation.
1264 :     * A few reminders:
1265 :     * o The instructions are processed in normal order
1266 :     * and generated in the reversed order.
1267 :     * o (Local) liveness is computed at the same time.
1268 :     * o For each use, we have to find out whether it is
1269 :     * the last use. If so, we can kill it and reclaim
1270 :     * the stack entry at the same time.
1271 :     *)
1272 :     and trans(stamp, instr, an, rest, dead, lastUses, code) =
1273 :     let (* Call this continuation when done with code generation *)
1274 :     fun FINISH code = loop(stamp+1, rest, lastUses, code)
1275 :    
1276 :     (* Call this continuation when done with floating point
1277 :     * code generation. Remove all dead code first.
1278 :     *)
1279 :     fun DONE code =
1280 :     let fun kill([], code) = FINISH code
1281 :     | kill(f::fs, code) =
1282 : george 889 let val fx = CB.registerNum f
1283 : leunga 744 in if debug andalso debugDead then
1284 :     pr("DEAD "^fregToString f^" in "^
1285 :     ST.stackToString stack^"\n")
1286 :     else ();
1287 :     (* not a physical register *)
1288 :     if fx >= 8 then kill(fs, code)
1289 :     else
1290 :     let val i = ST.fp(stack, fx)
1291 :     in if debug andalso debugDead then
1292 :     pr("KILLING "^fregToString f^
1293 :     "=%st("^i2s i^")\n")
1294 :     else ();
1295 :     if i < 0 then kill(fs, code) (* dead already *)
1296 :     else if i = 0 then
1297 :     (ST.pop stack; kill(fs, POP_ST::code))
1298 :     else
1299 :     (ST.xch(stack,0,i); ST.pop stack;
1300 :     kill(fs, I.FSTPL(ST i)::code)
1301 :     )
1302 :     end
1303 :     end
1304 : leunga 731 in kill(dead, code)
1305 :     end
1306 :    
1307 :     (* Is this the last use of register f? *)
1308 :     fun isLastUse f = A.sub(lastUseTbl, f) = stamp
1309 :    
1310 :     (* Is this value dead? *)
1311 :     fun isDead f =
1312 :     let fun loop [] = false
1313 : george 889 | loop(r::rs) = CB.sameColor(f,r) orelse loop rs
1314 : leunga 731 in loop dead end
1315 :    
1316 :     (* Dump the stack before each intruction for debugging *)
1317 :     fun log() = if debug andalso !traceOn then
1318 :     pr(ST.stackToString stack^assemble instr^"...\n")
1319 :     else ()
1320 :    
1321 :     (* Find the location of a source register *)
1322 :     fun getfs(f) =
1323 : george 889 let val fx = CB.registerNum f
1324 : leunga 744 val s = ST.fp(stack, fx)
1325 :     in (isLastUse fx,s) end
1326 : leunga 731
1327 :     (* Generate memory to memory move *)
1328 :     fun mmmove(fsize,src,dst) =
1329 :     let val _ = ST.nonFull stack
1330 :     val code = FLD(fsize,src)::code
1331 :     val code = mark(FSTP(fsize,dst),an)::code
1332 :     in DONE code end
1333 :    
1334 :     (* Allocate a new register in %st(0) *)
1335 : george 889 fun alloc(f,code) = (ST.push(stack,CB.registerNum f); code)
1336 : leunga 731
1337 :     (* register -> register move *)
1338 :     fun rrmove(fs,fd) =
1339 : george 889 if CB.sameColor(fs,fd) then DONE code
1340 : leunga 731 else
1341 : leunga 744 let val (dead,ss) = getfs fs
1342 : leunga 731 in if dead then (* fs is dead *)
1343 : george 889 (ST.set(stack,ss,CB.registerNum fd); (* rename fd to fs *)
1344 : leunga 731 DONE code (* no code is generated *)
1345 :     )
1346 :     else (* fs is not dead; push it onto %st(0);
1347 :     * set fd to %st(0)
1348 :     *)
1349 :     let val code = alloc(fd, code)
1350 :     in DONE(mark(I.FLDL(ST ss),an)::code)
1351 :     end
1352 :     end
1353 :    
1354 :     (* memory -> register move.
1355 :     * Do dead code elimination here.
1356 :     *)
1357 :     fun mrmove(fsize,src,fd) =
1358 : leunga 744 if isDead fd
1359 : leunga 731 then FINISH code (* value has been killed *)
1360 :     else
1361 :     let val code = alloc(fd, code)
1362 :     in DONE(mark(FLD(fsize,src),an)::code)
1363 :     end
1364 :    
1365 :     (* exchange %st(n) and %st(0) *)
1366 :     fun xch(n) = (ST.xch(stack,0,n); FXCH n)
1367 :    
1368 :     (* push %st(n) onto the stack *)
1369 :     fun push(n) = (ST.push(stack,~2); I.FLDL(ST n))
1370 :    
1371 :    
1372 :     (* push mem onto the stack *)
1373 :     fun pushmem(src) = (ST.push(stack,~2); I.FLDL(src))
1374 :    
1375 :     (* register -> memory move.
1376 :     * Use pop version of the opcode if it is the last use.
1377 :     *)
1378 :     fun rmmove(fsize,fs,dst) =
1379 :     let fun fstp(code) =
1380 :     (ST.pop stack; DONE(mark(FSTP(fsize,dst),an)::code))
1381 :     fun fst(code) = DONE(mark(FST(fsize,dst),an)::code)
1382 :     in case getfs fs of
1383 :     (true, 0) => fstp code
1384 :     | (true, n) => fstp(xch n::code)
1385 :     | (false, 0) => fst(code)
1386 :     | (false, n) => fst(xch n::code)
1387 :     end
1388 :    
1389 :     (* Floating point move *)
1390 :     fun fmove{fsize,src=I.FPR fs,dst=I.FPR fd} = rrmove(fs,fd)
1391 :     | fmove{fsize,src,dst=I.FPR fd} = mrmove(fsize,src,fd)
1392 :     | fmove{fsize,src=I.FPR fs,dst} = rmmove(fsize,fs,dst)
1393 :     | fmove{fsize,src,dst} = mmmove(fsize,src,dst)
1394 :    
1395 :     (* Floating point integer load operator *)
1396 :     fun fiload{isize,ea,dst=I.FPR fd} =
1397 : leunga 744 let val code = alloc(fd, code)
1398 : leunga 731 val code = mark(FILD(isize,ea),an)::code
1399 :     in DONE code
1400 :     end
1401 :     | fiload{isize,ea,dst} =
1402 :     let val code = mark(FILD(isize,ea),an)::code
1403 :     val code = I.FSTPL(dst)::code (* XXX *)
1404 :     in DONE code
1405 :     end
1406 :    
1407 :     (* Make a copy of register fs to %st(0). *)
1408 :     fun moveregtotop(fs, code) =
1409 :     (case getfs fs of
1410 :     (true, 0) => code
1411 :     | (true, n) => xch n::code
1412 :     | (false, n) => push n::code
1413 :     )
1414 :    
1415 :     fun movememtotop(fsize, mem, code) =
1416 :     (ST.push(stack, ~2); FLD(fsize, mem)::code)
1417 :    
1418 :     (* Move an operand to top of stack *)
1419 :     fun movetotop(fsize, I.FPR fs, code) = moveregtotop(fs, code)
1420 :     | movetotop(fsize, mem, code) = movememtotop(fsize, mem, code)
1421 :    
1422 :     fun storeResult(fsize, dst, n, code) =
1423 :     case dst of
1424 : george 889 I.FPR fd => (ST.set(stack, n, CB.registerNum fd); DONE code)
1425 : leunga 731 | mem =>
1426 :     let val code = if n = 0 then code else xch n::code
1427 :     in ST.pop stack; DONE(FSTP(fsize, mem)::code) end
1428 :    
1429 :     (* Floating point unary operator *)
1430 :     fun funop{fsize,unOp,src,dst} =
1431 :     let val code = movetotop(fsize, src, code)
1432 :     val code = mark(I.FUNARY unOp,an)::code
1433 :    
1434 :     (* Moronic hack to deal with partial tangent! *)
1435 :     val code =
1436 :     case unOp of
1437 :     I.FPTAN =>
1438 :     (if ST.depth stack >= 7 then error "FPTAN"
1439 :     else ();
1440 :     POP_ST::code (* pop the useless 1.0 *)
1441 :     )
1442 :     | _ => code
1443 :     in storeResult(fsize, dst, 0, code)
1444 :     end
1445 :    
1446 :     (* Floating point binary operator.
1447 :     * Note:
1448 :     * binop src, dst
1449 :     * means dst := dst binop src
1450 :     * (lsrc := lsrc binop rsrc)
1451 :     * on the x86
1452 :     *)
1453 :     fun fbinop{fsize,binOp,lsrc,rsrc,dst} =
1454 :     let (* generate code and set %st(n) = fd *)
1455 :     (* op2 := op1 - op2 *)
1456 :     fun oper(binOp,op1,op2,n,code) =
1457 :     let val code =
1458 :     mark(I.FBINARY{binOp=binOp,src=op1,dst=op2},an)
1459 :     ::code
1460 :     in storeResult(I.FP64, dst, n, code)
1461 :     end
1462 :    
1463 :     fun operR(binOp,op1,op2,n,code) =
1464 :     oper(invert binOp,op1,op2,n,code)
1465 :    
1466 :     fun operP(binOp,op1,op2,n,code) =
1467 :     (ST.pop stack; oper(pop binOp,op1,op2,n-1,code))
1468 :    
1469 :     fun operRP(binOp,op1,op2,n,code) =
1470 :     (ST.pop stack; operR(pop binOp,op1,op2,n-1,code))
1471 :    
1472 :     (* Many special cases to consider.
1473 :     * Basically, try to reuse stack space as
1474 :     * much as possible by taking advantage of last uses.
1475 :     *
1476 :     * Stack=[st(0)=3.0 st(1)=2.0]
1477 :     * fsub %st(1), %st [1,2.0]
1478 :     * fsubr %st(1), %st [-1,2.0]
1479 :     * fsub %st, %st(1) [3.0,1.0]
1480 :     * fsubr %st, %st(1) [3.0,-1.0]
1481 :     *
1482 :     * fsubp %st, %st(1) [1]
1483 :     * fsubrp %st, %st(1) [-1]
1484 :     * So,
1485 :     * fsub %st(n), %st (means %st - %st(n) -> %st)
1486 :     * fsub %st, %st(n) (means %st - %st(n) -> %st(n))
1487 :     * fsubr %st(n), %st (means %st(n) - %st -> %st)
1488 :     * fsubr %st, %st(n) (means %st(n) - %st -> %st(n))
1489 :     *)
1490 :     fun reg2(fx, fy) =
1491 :     let val (dx, sx) = getfs fx
1492 :     val (dy, sy) = getfs fy
1493 :     fun loop(dx, sx, dy, sy, code) =
1494 :     (* op1, op2 (dst) *)
1495 :     case (dx, sx, dy, sy) of
1496 :     (true, 0, false, n) => oper(binOp,ST n,ST0,0,code)
1497 :     | (false, n, true, 0) => operR(binOp,ST n,ST0,0,code)
1498 :     | (true, n, true, 0) => operRP(binOp,ST0,ST n,n,code)
1499 :     | (true, 0, true, n) => operP(binOp,ST0,ST n,n,code)
1500 :     | (false, 0, true, n) => oper(binOp,ST0,ST n,n,code)
1501 :     | (true, n, false, 0) => operR(binOp,ST0,ST n,n,code)
1502 :     | (true, sx, dy, sy) =>
1503 :     loop(true, 0, dy, sy, xch sx::code)
1504 :     | (dx, sx, true, sy) =>
1505 :     loop(dx, sx, true, 0, xch sy::code)
1506 :     | (false, sx, false, sy) =>
1507 :     loop(true, 0, false, sy+1, push sx::code)
1508 :     in if sx = sy then (* same register *)
1509 :     let val code =
1510 :     case (dx, sx) of
1511 :     (true, 0) => code
1512 :     | (true, n) => xch n::code
1513 :     | (false, n) => push n::code
1514 :     in oper(binOp,ST0,ST0,0,code)
1515 :     end
1516 :     else loop(dx, sx, dy, sy, code)
1517 :     end
1518 :    
1519 :     (* reg/mem operands *)
1520 :     fun regmem(binOp, fx, mem) =
1521 :     case getfs fx of
1522 :     (true, 0) => oper(binOp,mem,ST0,0,code)
1523 :     | (true, n) => oper(binOp,mem,ST0,0,xch n::code)
1524 :     | (false, n) => oper(binOp,mem,ST0,0,push n::code)
1525 :    
1526 :     (* Two memory operands. Optimize the case when
1527 :     * the two operands are identical.
1528 :     *)
1529 :     fun mem2(lsrc, rsrc) =
1530 :     let val _ = ST.push(stack,~2)
1531 :     val code = FLD(fsize,lsrc)::code
1532 :     val rsrc = if P.eqOpn(lsrc, rsrc) then ST0 else rsrc
1533 :     in oper(binOp,rsrc,ST0,0,code)
1534 :     end
1535 :    
1536 :     fun process(I.FPR fx, I.FPR fy) = reg2(fx, fy)
1537 :     | process(I.FPR fx, mem) = regmem(binOp, fx, mem)
1538 :     | process(mem, I.FPR fy) = regmem(invert binOp, fy, mem)
1539 :     | process(lsrc, rsrc) = mem2(lsrc, rsrc)
1540 :    
1541 :     in process(lsrc, rsrc)
1542 :     end
1543 :    
1544 :     (* Floating point binary operator with integer conversion *)
1545 :     fun fibinop{isize,binOp,lsrc,rsrc,dst} =
1546 :     let fun oper(binOp,src,code) =
1547 :     let val code = mark(I.FIBINARY{binOp=binOp,src=src},an)
1548 :     ::code
1549 :     in storeResult(I.FP64, dst, 0, code)
1550 :     end
1551 :    
1552 :     fun regmem(binOp, fx, mem) =
1553 :     case getfs fx of
1554 :     (true, 0) => oper(binOp, mem, code)
1555 :     | (true, n) => oper(binOp, mem, xch n::code)
1556 :     | (false, n) => oper(binOp, mem, push n::code)
1557 :    
1558 :     in case (lsrc, rsrc) of
1559 :     (I.FPR fx, mem) => regmem(binOp, fx, mem)
1560 :     | (lsrc, rsrc) => oper(binOp, rsrc, pushmem lsrc::code)
1561 :     end
1562 :    
1563 :     (* Floating point comparison
1564 :     * We have to make sure there are enough registers.
1565 :     * The trick is that tmp is always a physical register.
1566 :     * So we can always use it as temporary space if we
1567 :     * have run out.
1568 :     *)
1569 :     fun fcmp{fsize,lsrc,rsrc} =
1570 :     let fun fucompp() =
1571 :     (ST.pop stack; ST.pop stack; mark(I.FUCOMPP,an))
1572 :     fun fucomp(n) =
1573 :     (ST.pop stack; mark(I.FUCOMP(ST n),an))
1574 :     fun fucom(n) = mark(I.FUCOM(ST n),an)
1575 :    
1576 :     fun genmemcmp() =
1577 :     let val code = movememtotop(fsize, rsrc, code)
1578 :     val code = movememtotop(fsize, lsrc, code)
1579 :     in FINISH(fucompp()::code)
1580 :     end
1581 :    
1582 :     fun genmemregcmp(lsrc, fy) =
1583 :     case getfs fy of
1584 :     (false, n) =>
1585 :     let val code = movememtotop(fsize, lsrc, code)
1586 :     in FINISH(fucomp(n+1)::code) end
1587 :     | (true, n) =>
1588 :     let val code = if n = 0 then code else xch n::code
1589 :     val code = movememtotop(fsize, lsrc, code)
1590 :     in FINISH(fucompp()::code)
1591 :     end
1592 :    
1593 :     fun genregmemcmp(fx, rsrc) =
1594 :     let val code =
1595 :     case getfs fx of
1596 :     (true, n) =>
1597 :     let val code = if n = 0 then code
1598 :     else xch n::code
1599 :     val code = movememtotop(fsize, rsrc, code)
1600 :     in xch 1::code end
1601 :     | (false, n) =>
1602 :     let val code = movememtotop(fsize, rsrc, code)
1603 :     in push(n+1)::code
1604 :     end
1605 :     in FINISH(fucompp()::code)
1606 :     end
1607 :    
1608 :     (* Deal with the special case when both sources are
1609 :     * in the same register
1610 :     *)
1611 :     fun regsame(dx, sx) =
1612 :     let val (code, cmp) =
1613 :     case (dx, sx) of
1614 :     (true, 0) => (code, fucomp 0) (* pop once! *)
1615 :     | (false, 0) => (code, fucom 0) (* don't pop! *)
1616 :     | (true, n) => (xch n::code, fucomp 0)
1617 :     | (false, n) => (xch n::code, fucom 0)
1618 :     in FINISH(cmp::code) end
1619 :    
1620 :     fun reg2(fx, fy) =
1621 :     (* special case is when things are already in place.
1622 :     * Note: should also generate FUCOM and FUCOMP!!!
1623 :     *)
1624 :     let val (dx, sx) = getfs fx
1625 :     val (dy, sy) = getfs fy
1626 :     fun fstp(n) =
1627 :     (ST.xch(stack,n,0); ST.pop stack; I.FSTPL(ST n))
1628 :     in if sx = sy then regsame(dx, sx) (* same register!*)
1629 :     else
1630 :     (* first, move sx to %st(0) *)
1631 :     let val (sy, code) =
1632 :     if sx = 0 then (sy, code) (* there already *)
1633 :     else (if sy = 0 then sx else sy,
1634 :     xch sx::code)
1635 :    
1636 :     (* Generate the appropriate comparison op *)
1637 :     val (sy, cmp, popY) =
1638 :     case (dx, dy, sy) of
1639 :     (true, true, 0) => (~1, fucompp(), false)
1640 :     | (true, _, _) => (sy-1, fucomp sy, dy)
1641 :     | (false, _, _) => (sy, fucom sy, dy)
1642 :    
1643 :     val code = cmp::code
1644 :    
1645 :     (* Pop fy if it is dead and hasn't already
1646 :     * been popped.
1647 :     *)
1648 :     val code = if popY then fstp sy::code else code
1649 :     in FINISH code
1650 :     end
1651 :     end
1652 :    
1653 :     in case (lsrc, rsrc) of
1654 :     (I.FPR x, I.FPR y) => reg2(x, y)
1655 :     | (I.FPR x, mem) => genregmemcmp(x, mem)
1656 :     | (mem, I.FPR y) => genmemregcmp(mem, y)
1657 :     | _ => genmemcmp()
1658 :     end
1659 :    
1660 :    
1661 :     fun prCopy(dst, src) =
1662 :     ListPair.app(fn (fd, fs) =>
1663 : leunga 744 pr(fregToString(fd)^"<-"^fregToString fs^" "))
1664 : leunga 731 (dst, src)
1665 :    
1666 :     (* Parallel copy magic.
1667 :     * For each src registers, we find out
1668 :     * 1. whether it is the last use, and if so,
1669 :     * 2. whether it is used more than once.
1670 :     * If a source is a last and unique use, then we
1671 :     * can simply rename it to appropriate destination register.
1672 :     *)
1673 :     fun fcopy{dst,src,tmp} =
1674 :     let fun loop([], [], copies, renames) = (copies, renames)
1675 :     | loop(fd::fds, fs::fss, copies, renames) =
1676 : george 889 let val fsx = CB.registerNum fs
1677 : leunga 744 in if isLastUse fsx then
1678 :     if A.sub(useTbl,fsx) <> stamp
1679 :     (* unused *)
1680 :     then (A.update(useTbl,fsx,stamp);
1681 : leunga 731 loop(fds, fss, copies,
1682 : george 889 if CB.sameColor(fd,fs) then renames
1683 : leunga 731 else (fd, fs)::renames)
1684 : leunga 744 )
1685 : leunga 731 else loop(fds, fss, (fd, fs)::copies, renames)
1686 :     else loop(fds, fss, (fd, fs)::copies, renames)
1687 :     end
1688 :     | loop _ = error "fcopy.loop"
1689 :    
1690 :     (* generate code for the copies *)
1691 :     fun genCopy([], code) = code
1692 :     | genCopy((fd, fs)::copies, code) =
1693 : george 889 let val ss = ST.fp(stack, CB.registerNum fs)
1694 :     val _ = ST.push(stack, CB.registerNum fd)
1695 : leunga 731 val code = I.FLDL(ST ss)::code
1696 :     in genCopy(copies, code) end
1697 :    
1698 :     (* perform the renaming; it must be done in parallel! *)
1699 :     fun renaming(renames) =
1700 : leunga 744 let val ss = map (fn (_,fs) =>
1701 : george 889 ST.fp(stack,CB.registerNum fs)) renames
1702 : leunga 744 in ListPair.app (fn ((fd,_),ss) =>
1703 : george 889 ST.set(stack,ss,CB.registerNum fd))
1704 : leunga 731 (renames, ss)
1705 :     end
1706 :    
1707 :     (* val _ = if debug then
1708 :     (ListPair.app (fn (fd, fs) =>
1709 :     pr(fregToString(regmap fd)^"<-"^
1710 :     fregToString(regmap fs)^" ")
1711 :     ) (dst, src);
1712 :     pr "\n")
1713 :     else () *)
1714 :    
1715 :     val (copies, renames) = loop(dst, src, [], [])
1716 :     val code = genCopy(copies, code)
1717 :     in renaming renames;
1718 :     case tmp of
1719 :     SOME(I.FPR f) =>
1720 : leunga 744 (if debug andalso debugDead
1721 :     then pr("KILLING tmp "^fregToString f^"\n")
1722 :     else ();
1723 :     ST.kill(stack, f)
1724 :     )
1725 : leunga 731 | _ => ();
1726 :     DONE code
1727 :     end
1728 :    
1729 : george 895 fun call(instr, return) = let
1730 :     val code = mark(instr, an)::code
1731 :     val returnSet = SL.return(SL.uniq(getCell return))
1732 :     in
1733 :     case returnSet of
1734 : leunga 815 [] => ()
1735 : george 889 | [r] => ST.push(stack, CB.registerNum r)
1736 : leunga 815 | _ =>
1737 :     error "can't return more than one fp argument (yet)";
1738 :     DONE code
1739 :     end
1740 :    
1741 : leunga 731 in case instr of
1742 :     (* annotation handling *)
1743 :     I.ANNOTATION{i,a} =>
1744 :     trans(stamp, i, a::an, rest, dead, lastUses, code)
1745 :    
1746 :     (* handle floating point stuff *)
1747 :     | I.FMOVE x => (log(); fmove x)
1748 :     | I.FBINOP x => (log(); fbinop x)
1749 :     | I.FIBINOP x => (log(); fibinop x)
1750 :     | I.FUNOP x => (log(); funop x)
1751 :     | I.FILOAD x => (log(); fiload x)
1752 :     | I.FCMP x => (log(); fcmp x)
1753 :     | I.FCOPY x => (log(); fcopy x)
1754 :    
1755 : leunga 815 (* handle calling convention *)
1756 : george 895 | I.CALL{return, ...} => (log(); call(instr,return))
1757 : leunga 815
1758 : leunga 731 (*
1759 :     * Catch instructions that absolutely
1760 :     * should not have been generated at this point.
1761 :     *)
1762 :     | (I.FLD1 | I.FLDL2E | I.FLDLG2 | I.FLDLN2 | I.FLDPI |
1763 :     I.FLDZ | I.FLDL _ | I.FLDS _ | I.FLDT _ |
1764 :     I.FILD _ | I.FILDL _ | I.FILDLL _ |
1765 :     I.FENV _ | I.FBINARY _ | I.FIBINARY _ | I.FUNARY _ |
1766 :     I.FUCOMPP | I.FUCOM _ | I.FUCOMP _ | I.FCOMPP | I.FXCH _ |
1767 :     I.FSTPL _ | I.FSTPS _ | I.FSTPT _ | I.FSTL _ | I.FSTS _
1768 :     ) => bug("Illegal FP instructions")
1769 :    
1770 :     (* Other instructions are untouched *)
1771 :     | instr => FINISH(mark(instr, an)::code)
1772 :     end (* trans *)
1773 :    
1774 :     (*
1775 :     * Check the translation result to see if it matches the original
1776 :     * code.
1777 :     *)
1778 :     fun checkTranslation(stackIn, stackOut, insns) =
1779 :     let val n = ref(ST.depth stackIn)
1780 :     fun push() = n := !n + 1
1781 :     fun pop() = n := !n - 1
1782 :     fun scan(I.FBINARY{binOp, ...}) =
1783 :     (case binOp of
1784 :     ( I.FADDP | I.FSUBP | I.FSUBRP | I.FMULP
1785 :     | I.FDIVP | I.FDIVRP) => pop()
1786 :     | _ => ()
1787 :     )
1788 :     | scan(I.FIBINARY{binOp, ...}) = ()
1789 :     | scan(I.FUNARY I.FPTAN) = push()
1790 :     | scan(I.FUNARY _) = ()
1791 :     | scan(I.FLDL(I.ST n)) = push()
1792 :     | scan(I.FLDL mem) = push()
1793 :     | scan(I.FLDS mem) = push()
1794 :     | scan(I.FLDT mem) = push()
1795 :     | scan(I.FSTL(I.ST n)) = ()
1796 :     | scan(I.FSTPL(I.ST n)) = pop()
1797 :     | scan(I.FSTL mem) = ()
1798 :     | scan(I.FSTS mem) = ()
1799 :     | scan(I.FSTPL mem) = pop()
1800 :     | scan(I.FSTPS mem) = pop()
1801 :     | scan(I.FSTPT mem) = pop()
1802 :     | scan(I.FXCH{opnd=i,...}) = ()
1803 :     | scan(I.FUCOM _) = ()
1804 :     | scan(I.FUCOMP _) = pop()
1805 :     | scan(I.FUCOMPP) = (pop(); pop())
1806 :     | scan(I.FILD mem) = push()
1807 :     | scan(I.FILDL mem) = push()
1808 :     | scan(I.FILDLL mem) = push()
1809 :     | scan _ = ()
1810 :     val _ = app scan (rev insns);
1811 :     val n = !n
1812 :     val m = ST.depth stackOut
1813 :     in if n <> m then
1814 :     (dump(insns);
1815 :     bug("Bad translation n="^i2s n^ " expected="^i2s m^"\n")
1816 :     )
1817 :     else ()
1818 :     end
1819 :    
1820 :    
1821 :     (* Dump the initial code *)
1822 :     val _ = if debug andalso !debugOn then
1823 :     (pr("-------- block "^i2s blknum^" ----"^
1824 : leunga 744 C.CellSet.toString (!liveIn)^" "^
1825 : leunga 731 ST.stackToString stackIn^"\n");
1826 :     dump (!insns)
1827 :     )
1828 :     else ()
1829 :    
1830 :     (* Compute the last uses *)
1831 :     val lastUse = computeLastUse(blknum, insns, liveOut)
1832 :    
1833 :     (* Rewrite the code *)
1834 :     val (stamp, insns') = loop(stamp, rev(!insns), lastUse, code)
1835 :    
1836 :     (* Insert shuffle code at the end if necessary *)
1837 :     val insns' = shuffleOut(stack, insns', blknum, block, succ, liveOut)
1838 :    
1839 :     (* Dump translation *)
1840 :     val _ = if debug andalso !debugOn then
1841 :     (pr("-------- translation "^i2s blknum^"----"^
1842 : leunga 744 C.CellSet.toString (!liveIn)^" "^
1843 : leunga 731 ST.stackToString stackIn^"\n");
1844 :     dump insns';
1845 :     pr("-------- done "^i2s blknum^"----"^
1846 : leunga 744 C.CellSet.toString (!liveOut)^" "^
1847 : leunga 731 ST.stackToString stack^"\n")
1848 :     )
1849 :     else ()
1850 :    
1851 :     (* Check if things are okay *)
1852 :     val _ = if debug andalso sanityCheck then
1853 :     checkTranslation(stackIn, stack, insns')
1854 :     else ()
1855 :    
1856 :     in insns := insns'; (* update the instructions *)
1857 :     stamp
1858 :     end (* process *)
1859 :    
1860 :     in (* Translate all blocks *)
1861 :     rewriteAllBlocks(C.firstPseudo, blocks);
1862 :     (* If we found critical edges, then we have to split them... *)
1863 : blume 733 if IntHashTable.numItems edgesToSplit = 0 then cluster
1864 : leunga 731 else repairCriticalEdges(cluster)
1865 : leunga 744 end
1866 : leunga 731
1867 :     end (* functor *)
1868 :    
1869 :     end (* local *)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0