Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /MLRISC/trunk/mltree/mltree-mult.sml
ViewVC logotype

Annotation of /MLRISC/trunk/mltree/mltree-mult.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 651 - (view) (download)
Original Path: sml/trunk/src/MLRISC/mltree/mltree-mult.sml

1 : monnier 409 (*
2 :     * Generate multiplication/division by a constant.
3 :     * This module is mainly used for architectures without fast integer multiply.
4 :     *
5 :     * -- Allen
6 :     *)
7 :     functor MLTreeMult
8 :     (structure T : MLTREE
9 :     structure I : INSTRUCTIONS
10 :     sharing I.Constant = T.Constant
11 :    
12 :     val intTy : int (* width of integer type *)
13 :    
14 : monnier 429 type argi = {r:I.C.cell, i:int, d:I.C.cell}
15 :     type arg = {r1:I.C.cell, r2:I.C.cell, d:I.C.cell}
16 : monnier 409
17 :     (* these are always non-overflow trapping *)
18 : monnier 429 val mov : {r:I.C.cell,d:I.C.cell} -> I.instruction
19 : monnier 409 val add : arg -> I.instruction
20 :     val slli : argi -> I.instruction list
21 :     val srli : argi -> I.instruction list
22 :     val srai : argi -> I.instruction list
23 :     )
24 :     (val trapping : bool (* trap on overflow? *)
25 :     val multCost : int ref (* cost of multiplication *)
26 :    
27 :     (* basic ops; these have to implemented by the architecture *)
28 :    
29 :     (* if trapping = true, then the following MUST trap on overflow *)
30 :     val addv : arg -> I.instruction list
31 :     val subv : arg -> I.instruction list
32 :    
33 :     (* some architectures, like the PA-RISC and the Alpha,
34 :     * have these types of special ops
35 :     * if trapping = true, then the following MUST also trap on overflow
36 :     *)
37 :     val sh1addv : (arg -> I.instruction list) option (* a*2 + b *)
38 :     val sh2addv : (arg -> I.instruction list) option (* a*4 + b *)
39 :     val sh3addv : (arg -> I.instruction list) option (* a*8 + b *)
40 : monnier 429 )
41 :     (val signed : bool (* signed? *)
42 :     ) : MLTREE_MULT_DIV =
43 : monnier 409 struct
44 :     structure T = T
45 :     structure I = I
46 :     structure C = I.C
47 :     structure W = Word
48 :     structure A = Array
49 :    
50 :     type arg = argi
51 :    
52 :     infix << >> ~>> || &&
53 :     val itow = W.fromInt
54 :     val wtoi = W.toIntX
55 :     val op << = W.<<
56 :     val op >> = W.>>
57 :     val op ~>> = W.~>>
58 :     val op || = W.orb
59 :     val op && = W.andb
60 :    
61 :     exception TooComplex
62 :    
63 :     fun error msg = MLRiscErrorMsg.error("MLTreeMult",msg)
64 :    
65 :     val zeroR = C.zeroReg C.GP
66 :     val shiftri = if signed then srai else srli
67 :    
68 :     fun isPowerOf2 w = ((w - 0w1) && w) = 0w0
69 :    
70 :     fun log2 n = (* n must be > 0!!! *)
71 :     let fun loop(0w1,pow) = pow
72 :     | loop(w,pow) = loop(w >> 0w1,pow+1)
73 :     in loop(n,0) end
74 :    
75 :     fun zeroBits(w,lowZeroBits) =
76 :     if (w && 0w1) = 0w1 then (w,lowZeroBits)
77 :     else zeroBits(w >> 0w1,lowZeroBits+0w1)
78 :    
79 :     (* Non overflow trapping version of multiply:
80 :     * We can use add, shadd, shift, sub to perform the multiplication
81 :     *)
82 :     fun multiplyNonTrap{r,i,d} =
83 :     let fun mult(r,w,maxCost,d) =
84 :     if maxCost <= 0 then raise TooComplex
85 :     else if isPowerOf2 w then slli{r=r,i=log2 w,d=d}
86 :     else
87 :     (case (w,sh1addv,sh2addv,sh3addv) of
88 :     (* some base cases *)
89 :     (0w3,SOME f,_,_) => f{r1=r,r2=r,d=d}
90 :     | (0w5,_,SOME f,_) => f{r1=r,r2=r,d=d}
91 :     | (0w9,_,_,SOME f) => f{r1=r,r2=r,d=d}
92 :     | _ => (* recurse on the bit patterns of w *)
93 :     let val tmp = C.newReg()
94 :     in if (w && 0w1) = 0w1 then (* low order bit is 1 *)
95 :     if (w && 0w2) = 0w2 then (* second bit is 1 *)
96 :     mult(r,w+0w1,maxCost-1,tmp) @
97 :     subv{r1=tmp,r2=r,d=d}
98 :     else (* second bit is 0 *)
99 :     mult(r,w-0w1,maxCost-1,tmp) @
100 :     addv{r1=tmp,r2=r,d=d}
101 :     else (* low order bit is 0 *)
102 :     let val (w,lowZeroBits) = zeroBits(w,0w0)
103 :     in mult(r,w,maxCost-1,tmp) @
104 :     slli{r=tmp,i=wtoi lowZeroBits,d=d}
105 :     end
106 :     end
107 :     )
108 :     in if i <= 0 then raise TooComplex
109 :     else if i = 1 then [mov{r=r,d=d}]
110 :     else mult(r,itow i,!multCost,d)
111 :     end
112 :    
113 : george 545 (* The semantics of roundToZero{r,i,d} is:
114 :     * if r >= 0 then d <- r
115 :     * else d <- r + i
116 :     *)
117 :     fun roundToZero stm {ty,r,i,d} =
118 :     let val reg = T.REG(ty,r)
119 :     in stm(T.MV(ty,d,
120 :     T.COND(ty,T.CMP(ty,T.GE,reg,T.LI 0),reg,
121 :     T.ADD(ty,reg,T.LI i))))
122 :     end
123 :    
124 :    
125 : monnier 409 (*
126 :     * Simulate rounding towards zero for signed division
127 :     *)
128 :     fun roundDiv{mode=T.TO_NEGINF,r,...} = ([],r) (* no rounding necessary *)
129 : george 545 | roundDiv{mode=T.TO_ZERO,stm,r,i} =
130 : monnier 409 if signed then
131 :     let val d = C.newReg()
132 :     in if i = 2 then (* special case for division by 2 *)
133 :     let val tmpR = C.newReg()
134 :     in (srli{r=r,i=intTy - 1,d=tmpR}@[add{r1=r,r2=tmpR,d=d}], d)
135 :     end
136 :     else
137 :     (* invoke rounding callback *)
138 : george 545 let val () = roundToZero stm {ty=intTy,r=r,i=i-1,d=d}
139 : monnier 409 in ([],d) end
140 :     end
141 :     else ([],r) (* no rounding for unsigned division *)
142 :     | roundDiv{mode,...} =
143 :     error("Integer rounding mode "^
144 : george 545 T.Basis.roundingModeToString mode^" is not supported")
145 : monnier 409
146 : george 545 fun divideNonTrap{mode,stm}{r,i,d} =
147 : monnier 409 if i > 0 andalso isPowerOf2(itow i)
148 :     then
149 : george 545 let val (code,r) = roundDiv{mode=mode,stm=stm,r=r,i=i}
150 : monnier 409 in code@shiftri{r=r,i=log2(itow i),d=d} end (* won't overflow *)
151 :     else raise TooComplex
152 :    
153 :     (* Overflow trapping version of multiply:
154 :     * We can use only add and shadd to perform the multiplication,
155 :     * because of overflow trapping problem.
156 :     *)
157 :     fun multiplyTrap{r,i,d} =
158 :     let fun mult(r,w,maxCost,d) =
159 :     if maxCost <= 0 then raise TooComplex
160 :     else
161 :     (case (w,sh1addv,sh2addv,sh3addv,zeroR) of
162 :     (* some simple base cases *)
163 :     (0w2,_,_,_,_) => addv{r1=r,r2=r,d=d}
164 :     | (0w3,SOME f,_,_,_) => f{r1=r,r2=r,d=d}
165 :     | (0w4,_,SOME f,_,SOME z) => f{r1=r,r2=z,d=d}
166 :     | (0w5,_,SOME f,_,_) => f{r1=r,r2=r,d=d}
167 :     | (0w8,_,_,SOME f,SOME z) => f{r1=r,r2=z,d=d}
168 :     | (0w9,_,_,SOME f,_) => f{r1=r,r2=r,d=d}
169 :     | _ => (* recurse on the bit patterns of w *)
170 :     let val tmp = C.newReg()
171 :     in if (w && 0w1) = 0w1 then
172 :     mult(r,w - 0w1,maxCost-1,tmp) @ addv{r1=tmp,r2=r,d=d}
173 :     else
174 :     case (w && 0w7, sh3addv, zeroR) of
175 :     (0w0, SOME f, SOME z) => (* times 8 *)
176 :     mult(r,w >> 0w3,maxCost-1,tmp) @ f{r1=tmp,r2=z,d=d}
177 :     | _ =>
178 :     case (w && 0w3, sh2addv, zeroR) of
179 :     (0w0, SOME f, SOME z) => (* times 4 *)
180 :     mult(r,w >> 0w2,maxCost-1,tmp) @ f{r1=tmp,r2=z,d=d}
181 :     | _ =>
182 :     mult(r,w >> 0w1,maxCost-1,tmp) @ addv{r1=tmp,r2=tmp,d=d}
183 :     end
184 :     )
185 :     in if i <= 0 then raise TooComplex
186 :     else if i = 1 then [mov{r=r,d=d}]
187 :     else mult(r,itow i,!multCost,d)
188 :     end
189 :    
190 : george 545 fun divideTrap{mode,stm}{r,i,d} =
191 : monnier 409 if i > 0 andalso isPowerOf2(itow i)
192 :     then
193 : george 545 let val (code,r) = roundDiv{mode=mode,stm=stm,r=r,i=i}
194 : monnier 409 in code@shiftri{r=r,i=log2(itow i),d=d} end (* won't overflow *)
195 :     else raise TooComplex
196 :    
197 : george 545 fun multiply x = if trapping then multiplyTrap x else multiplyNonTrap x
198 :     fun divide x = if trapping then divideTrap x else divideNonTrap x
199 : monnier 409
200 :     end

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0