Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /MLRISC/trunk/SSA/ssa-gcm.sml
ViewVC logotype

Annotation of /MLRISC/trunk/SSA/ssa-gcm.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 695 - (view) (download)
Original Path: sml/trunk/src/MLRISC/SSA/ssa-gcm.sml

1 : leunga 695 (*
2 :     * Global code motion algorithm in SSA form.
3 :     * This is based on Cliff Click's algorithm.
4 :     * I've generalized it to
5 :     *
6 :     * 1. Take into account of execution frequencies to find the ``best''
7 :     * position to move an instruction.
8 :     * 2. Perform partial redundancy elimination. (not finished)
9 :     *
10 :     * -- Allen (leunga@cs.nyu.edu)
11 :     *)
12 :    
13 :     signature SSA_GLOBAL_CODE_MOTION =
14 :     sig
15 :     include SSA_OPTIMIZATION
16 :     val computeEarliest : SSA.ssa -> SSA.block Array.array
17 :     val computeBest : SSA.ssa * SSA.block Array.array -> SSA.ssa
18 :     end
19 :    
20 :     functor SSAGCM(SSA : SSA) : SSA_GLOBAL_CODE_MOTION =
21 :     struct
22 :     structure SSA = SSA
23 :     structure CFG = SSA.CFG
24 :     structure Dom = SSA.Dom
25 :     structure SP = SSA.SP
26 :     structure RTL = SSA.RTL
27 :     structure T = RTL.T
28 :     structure W = CFG.W
29 :     structure G = Graph
30 :     structure A = Array
31 :    
32 :     type flowgraph = SSA.ssa
33 :    
34 :     val debug = MLRiscControl.getFlag "ssa-debug-gcm";
35 :    
36 :     val name = "Global Code Motion"
37 :    
38 :     fun error msg = MLRiscErrorMsg.error("SSAGCM",msg)
39 :    
40 :     val i2s = Int.toString
41 :     val b2s = Bool.toString
42 :    
43 :     (* Compute the a mapping from ssa_id to earliest block that can be moved. *)
44 :     fun computeEarliest(SSA as G.GRAPH ssa) =
45 :     let val Dom = SSA.dom SSA
46 :     val M = #capacity ssa ()
47 :    
48 :     val rtlTbl = SSA.rtlTbl SSA
49 :     val blockTbl = SSA.blockTbl SSA
50 :     val usesTbl = SSA.usesTbl SSA
51 :     val defSiteTbl = SSA.defSiteTbl SSA
52 :    
53 :     (* Tables *)
54 :     val earliestTbl = A.array(M,~1) (* position for an instruction *)
55 :     val entryPos = Dom.entryPos Dom (* block_id -> entry position *)
56 :     val levels = Dom.levelsMap Dom (* block id -> dominator level *)
57 :    
58 :     (*
59 :     * For each value, try to schedule it as early as possible.
60 :     * This is constrained by the position of an instruction's operand.
61 :     *)
62 :     fun scheduleEarly(i, i') =
63 :     if A.sub(earliestTbl,i) >= 0 then ()
64 :     else if RTL.can'tMoveUp(i') then
65 :     (A.update(earliestTbl, i, A.sub(blockTbl,i)))
66 :     else
67 :     let val b = A.sub(blockTbl, i)
68 :     val _ = A.update(earliestTbl,i,b) (* the initial position *)
69 :     fun scan([],pos_i,depth_i) = pos_i
70 :     | scan(v::uses,pos_i,depth_i) =
71 :     if v < 0 then scan(uses, pos_i, depth_i)
72 :     else
73 :     let val j = A.sub(defSiteTbl, v)
74 :     val _ = scheduleEarly(j, A.sub(rtlTbl, j))
75 :     val pos_j = A.sub(earliestTbl,j)
76 :     val depth_j = A.sub(levels,pos_j)
77 :     in (* print("\tearliest("^showVal v^")="^i2s pos_j^"\n"); *)
78 :     if depth_j > depth_i then
79 :     scan(uses,pos_j,depth_j)
80 :     else
81 :     scan(uses,pos_i,depth_i)
82 :     end
83 :     val pos_i = A.sub(entryPos,b)
84 :     val depth_i = A.sub(levels,pos_i)
85 :     val earliest = scan(A.sub(usesTbl, i), pos_i, depth_i)
86 :     in (* if RTL.isLooker(A.sub(rtlTbl,i)) then *)
87 :     (* print("Pass 1 "^showOp i^" orig="^i2s b^
88 :     " earliest="^i2s earliest^
89 :     "\n"); *)
90 :     (* if dominates(earliest, b) then ()
91 :     else error "WTF! earliest > b"; *)
92 :     A.update(earliestTbl,i,earliest)
93 :     end
94 :    
95 :     (* Visit all the pinned instructions first *)
96 :     fun pass1 i =
97 :     if RTL.can'tMoveUp(A.sub(rtlTbl, i)) then
98 :     (A.update(earliestTbl,i,A.sub(blockTbl, i));
99 :     app (fn (j,_,_) => scheduleEarly(j, A.sub(rtlTbl,j)))
100 :     (#in_edges ssa i)
101 :     )
102 :     else ()
103 :     in SSA.forallNodes SSA pass1; (* schedule earliest *)
104 :     earliestTbl
105 :     end
106 :    
107 :    
108 :     (* Find the best position and move the instructions *)
109 :     fun computeBest(SSA as G.GRAPH ssa, earliestTbl) =
110 :     let val Dom = SSA.dom SSA
111 :     val M = #capacity ssa ()
112 :    
113 :     val rtlTbl = SSA.rtlTbl SSA
114 :     val blockTbl = SSA.blockTbl SSA
115 :     val usesTbl = SSA.usesTbl SSA
116 :     val defSiteTbl = SSA.defSiteTbl SSA
117 :    
118 :     val levels = Dom.levelsMap Dom (* block id -> dominator level *)
119 :     val succTbl = SSA.succTbl SSA
120 :     val freqTbl = SSA.freqTbl SSA (* frequencies indexed by block id *)
121 :     val idoms = Dom.idomsMap Dom (* block id -> immediate dominator *)
122 :     val moveOp = SSA.moveOp SSA
123 :    
124 :     val showOp = SSA.showOp SSA
125 :     val showVal = SSA.showVal SSA
126 :    
127 :     fun dump(i,b,earliest,latest,best) =
128 :     print(showOp i^" orig="^i2s b^
129 :     " earliest="^i2s earliest^
130 :     " latest="^i2s latest^
131 :     " best="^i2s best^
132 :     "\n")
133 :    
134 :     (*
135 :     * Schedule an instruction as late as possible.
136 :     * Visited nodes are indicated by earliest = ~1
137 :     *)
138 :     fun scheduleLate i = scheduleLate'(i,A.sub(rtlTbl,i))
139 :     and scheduleLate'(i,T.PHI _) = A.sub(blockTbl,i)
140 :     | scheduleLate'(i,T.SINK _) = A.sub(blockTbl,i)
141 :     | scheduleLate'(i,T.SOURCE _) = A.sub(blockTbl,i)
142 :     | scheduleLate'(i,i') =
143 :     if A.sub(earliestTbl,i) < 0 then A.sub(blockTbl, i)
144 :     else
145 :     let val earliest = A.sub(earliestTbl,i)
146 :     val _ = A.update(earliestTbl,i,~1) (* mark node as visited *)
147 :     val b = A.sub(blockTbl, i)
148 :    
149 :     (* Find the least common ancestor of two nodes in the dominator*)
150 :     fun LCA(~1,b) = b
151 :     | LCA(a,~1) = a
152 :     | LCA(a,b) =
153 :     let val la = A.sub(levels,a)
154 :     val lb = A.sub(levels,b)
155 :     in if la > lb then LCA'(a,b,la-lb) else LCA'(b,a,lb-la) end
156 :     and LCA'(a,b,0) = LCA''(a,b)
157 :     | LCA'(a,b,l) = LCA'(A.sub(idoms,a),b,l-1)
158 :     and LCA''(a,b) =
159 :     if a = b then a else LCA''(A.sub(idoms,a),A.sub(idoms,b))
160 :    
161 :     fun scan([],~1,trivialUsesOnly) = (b,trivialUsesOnly)
162 :     | scan([],latest,trivialUsesOnly) = (latest,trivialUsesOnly)
163 :     | scan((_,j,r)::es,latest,trivialUsesOnly) =
164 :     let val j' = A.sub(rtlTbl, j)
165 :     val pos_j = scheduleLate'(j, j')
166 :     val (latest,trivialUsesOnly) =
167 :     case j' of
168 :     T.PHI{preds,...} =>
169 :     let val s = A.sub(usesTbl,j)
170 :     fun loop(b::bs,s::ss,lca) =
171 :     if s = r then loop(bs,ss,LCA(lca,b))
172 :     else loop(bs,ss,lca)
173 :     | loop(_,_,bs') = bs'
174 :     in (loop(preds,s,latest), trivialUsesOnly) end
175 :     | T.SINK _ => (LCA(latest,pos_j), trivialUsesOnly)
176 :     | _ => (LCA(latest,pos_j), false)
177 :     in scan(es,latest,trivialUsesOnly) end
178 :     val (latest,trivialUsesOnly) =
179 :     scan(A.sub(succTbl, i),
180 :     if RTL.can'tMoveDown i' then b else ~1,
181 :     true)
182 :    
183 :     (* if latest = ~1 then the instruction is dead *)
184 :     (* Performance hack: if the instruction is a constant
185 :     * computation and if it is only used in a phi function,
186 :     * then DO NOT hoist them up because that will just increase
187 :     * register pressure!
188 :     *)
189 :     fun find_best(pos,best_pos,best_freq) =
190 :     let val w = A.sub(freqTbl,pos) handle _ =>
191 :     error(showOp i^
192 :     " pos "^i2s pos^" earliest="^i2s earliest^
193 :     " latest="^i2s latest^" orig="^i2s b^
194 :     " pinned="^b2s(RTL.can'tMoveUp i'))
195 :     val (best_pos,best_freq) =
196 :     if w < best_freq then (pos,w)
197 :     else (best_pos,best_freq)
198 :     in if pos = earliest then best_pos
199 :     else find_best(A.sub(idoms,pos),best_pos,best_freq)
200 :     end
201 :    
202 :     fun isConstant i =
203 :     (case A.sub(usesTbl, i) of
204 :     [v] => v < 0
205 :     | _ => false
206 :     )
207 :     val best = if earliest = ~1 then ~1
208 :     else if latest = ~1 then ~1
209 :     else if trivialUsesOnly andalso isConstant(i)
210 :     then latest
211 :     else find_best(latest,latest,A.sub(freqTbl,latest))
212 :     in
213 :     (* A.update(pos,i,best); *)
214 :     if best = ~1 then ()
215 :     else if best <> b then
216 :     (if !debug then dump(i,b,earliest,latest,best) else ();
217 :     (* dump(i,b,earliest,latest,best); *)
218 :     (*if dominates(best, latest) then () else error "best>latest";
219 :     if dominates(earliest, best) then () else error "early>best";
220 :     *)
221 :     moveOp{id=i,block=best}
222 :     )
223 :     else ();
224 :     (* dump(i,b,earliest,latest,best); *)
225 :     best
226 :     end
227 :    
228 :     fun pass2 i =
229 :     (* Warning: this must be pinned instead of can'tMoveDown
230 :     * because an op that can't be moved down doesn't mean it
231 :     * can't be moved up! If this is not done the pos of i would
232 :     * be wrong!
233 :     *)
234 :     if RTL.pinned(A.sub(rtlTbl, i)) then
235 :     (A.update(earliestTbl,i,~1);
236 :     app (fn (_,j,_) => (scheduleLate j;()))
237 :     (A.sub(succTbl,i))
238 :     )
239 :     else ()
240 :    
241 :     fun pass3 i = (scheduleLate i; ())
242 :    
243 :     (*
244 :     * Now, try to perform partial redundancy elimination.
245 :     * Given an instruction i, find its earliest position not constrained
246 :     * by the closest dominating uses that are phi-nodes.
247 :     *)
248 :     (*
249 :     val defsTbl = SSA.defsTbl SSA
250 :     fun pre i =
251 :     if W8A.sub(visited,i) = 0w3 then ()
252 :     else
253 :     let val _ = W8A.update(visited, i, 0w3)
254 :     val rtl_i = A.sub(rtlTbl,i)
255 :     val b_i = A.sub(blockTbl, i)
256 :     in if RTL.can'tMoveUp rtl_i then () else
257 :     let val earliest_i = A.sub(earliestTbl, i)
258 :     val depth_i = A.sub(levels, earliest_i)
259 :     val uses_i = A.sub(usesTbl, i)
260 :    
261 :     fun moveable([], phis, preds, earliest, depth) =
262 :     (phis, preds, earliest)
263 :     | moveable(v::uses, phis, preds, earliest, depth) =
264 :     if v < 0 then
265 :     moveable(uses, phis, preds, earliest, depth)
266 :     else
267 :     let val j = A.sub(defSiteTbl,v)
268 :     val b_j = A.sub(blockTbl, j)
269 :     in if b_j = earliest_i then
270 :     case A.sub(rtlTbl, j) of
271 :     T.PHI{preds, ...} =>
272 :     moveable(uses, j::phis, preds,
273 :     earliest, depth)
274 :     | _ => ([], [], earliest)
275 :     (* not hoistable *)
276 :     else
277 :     let val d_j = A.sub(levels, b_j)
278 :     in if d_j < depth_i then
279 :     if d_j > depth then
280 :     moveable(uses, phis, preds, b_j, d_j)
281 :     else
282 :     moveable(uses, phis, preds,
283 :     earliest, depth)
284 :     else
285 :     ([], [], earliest)
286 :     end
287 :     end
288 :    
289 :     (* Is the phi-node partially redundant?
290 :     * It is if is of the form t <- phi(...,t,...)
291 :     *)
292 :     fun replicationCost(newEarliest, preds, phi) =
293 :     let val [t] = A.sub(defsTbl, phi)
294 :     val uses = A.sub(usesTbl, phi)
295 :    
296 :     fun search(~1, best, bestCost) = (best, bestCost)
297 :     | search(X, best, bestCost) =
298 :     let val costX = A.sub(freqTbl, X)
299 :     val (best, bestCost) =
300 :     if costX < bestCost then
301 :     (X, costX)
302 :     else
303 :     (best, bestCost)
304 :     in if X = newEarliest then (best, bestCost)
305 :     else search(A.sub(idoms, X), best, bestCost)
306 :     end
307 :    
308 :     fun cost([], [], c) = c
309 :     | cost(p::ps, v::vs, c) =
310 :     if v = t then
311 :     cost(ps, vs, c)
312 :     else
313 :     let val (best, bestCost) =
314 :     search(p, p, A.sub(freqTbl, p))
315 :     in cost(ps, vs, c + bestCost) end
316 :     in cost(preds, uses, 0) end
317 :    
318 :    
319 :     val start = A.sub(earliestTbl,i)
320 :     val d = A.sub(levels,start)
321 :     val (phis, preds, newEarliest) =
322 :     moveable(uses_i, [], [], start, d)
323 :     in case phis of
324 :     [] => ()
325 :     | [phi] =>
326 :     let val cost = A.sub(freqTbl,b_i)
327 :     val cost' = replicationCost(newEarliest, preds, phi)
328 :     in if cost' < cost then
329 :     (print("PARTITIALLY REDUNDANT ["^i2s b_i^"] "^
330 :     showOp i^" depth="^i2s depth_i^"\n");
331 :     print("Cost="^W.toString cost^
332 :     " new cost="^W.toString cost'^"\n");
333 :     print("USES:\n");
334 :     app (fn v =>
335 :     if v < 0 then ()
336 :     else let val j = A.sub(defSiteTbl, v)
337 :     val b_j = A.sub(blockTbl, j)
338 :     val d_j = A.sub(levels, b_j)
339 :     in print("\t["^i2s b_j^"] "^showOp j^
340 :     " depth="^i2s d_j^"\n")
341 :     end) uses_i
342 :     )
343 :     else ()
344 :     end
345 :     | _ => ()
346 :     end
347 :     end
348 :    
349 :     fun pass4 i =
350 :     if RTL.can'tMoveUp(A.sub(rtlTbl, i)) then
351 :     (W8A.update(visited,i,0w3);
352 :     app (fn (j,_,_) => pre j) (#in_edges ssa i)
353 :     )
354 :     else ()
355 :     *)
356 :    
357 :     in
358 :     SSA.forallNodes SSA pass2; (* schedule latest *)
359 :     SSA.forallNodes SSA pass3; (* schedule remaining source instructions *)
360 :     (* SSA.forallNodes SSA pass4; *)
361 :     SSA.changed SSA;
362 :     SSA
363 :     end
364 :    
365 :     (* Run the optimization *)
366 :     fun run SSA = computeBest(SSA, computeEarliest SSA)
367 :    
368 :     end

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0