Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /MLRISC/trunk/amd64/amd64MC.sml
ViewVC logotype

Annotation of /MLRISC/trunk/amd64/amd64MC.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5564 - (view) (download)

1 : jhr 5558 (* amd64MC.sml
2 :     *
3 :     * COPYRIGHT (c) 2019 The Fellowship of SML/NJ (http://www.smlnj.org)
4 :     * All rights reserved.
5 :     *
6 :     * Machine code emitter for AMD64 architecture.
7 :     *)
8 : mrainey 2619
9 : jhr 5558 functor AMD64MCEmitter (
10 : mrainey 2619
11 : jhr 5558 structure Instr : AMD64INSTR
12 :     structure Shuffle : AMD64SHUFFLE where I = Instr
13 :     structure MLTreeEval : MLTREE_EVAL where T = Instr.T
14 :     structure AsmEmitter : INSTRUCTION_EMITTER where I = Instr
15 : mrainey 2619
16 : jhr 5558 ) : MC_EMIT = struct
17 :     structure I = Instr
18 :     structure C = I.C
19 :     structure W32 = Word32
20 :     structure W8 = Word8
21 :     structure W = LargeWord
22 :     structure CB = CellsBasis
23 : mrainey 2619
24 : jhr 5558 val println = print o (fn s => s^"\n")
25 :     val i2s = Int.toString
26 :     val print_int = println o i2s
27 :    
28 :     val itow = Word.fromInt
29 :     val wtoi = Word.toInt
30 :    
31 : jhr 5560 fun error msg = MLRiscErrorMsg.impossible ("AMD64MCEmitter: " ^ msg)
32 :     fun unimplemented instr = MLRiscErrorMsg.impossible (concat[
33 :     "AMD64MCEmitter: ", instr, " unimplemented"
34 :     ])
35 : jhr 5558
36 : mrainey 2619 (*
37 :     * Sanity check!
38 :     *)
39 : jhr 5558 val eax = 0 val esp = 4
40 :     val ecx = 1 val ebp = 5
41 :     val edx = 2 val esi = 6
42 :     val ebx = 3 val edi = 7
43 : mrainey 2619
44 : jhr 5560 val lockPrefix : Word8.word = 0wxF0
45 : mrainey 2619
46 : jhr 5560 fun lexp le = Int32.fromInt (MLTreeEval.valueOf le)
47 : jhr 5159
48 : jhr 5558 val toWord8 = Word8.fromLargeWord o LargeWord.fromLargeInt o Int32.toLarge
49 :     val eBytes = Word8Vector.fromList
50 :     fun eByte i = eBytes [W8.fromInt i]
51 : jhr 5159
52 : jhr 5558 local
53 : jhr 5159 val toLWord = (W.fromLargeInt o Int32.toLarge)
54 : mrainey 2619 fun shift (w,cnt) = W8.fromLargeWord(W.>>(w, cnt))
55 : jhr 5558 in
56 :     fun eShort i16 = let
57 :     val w = toLWord i16
58 :     in [
59 :     shift(w, 0w0), shift(w,0w8)
60 :     ] end
61 :     fun eLong i32 = let
62 :     val w = toLWord i32
63 :     in [
64 :     shift(w, 0w0), shift(w,0w8), shift(w,0w16), shift(w,0w24)
65 :     ] end
66 :     end (* local *)
67 : jhr 3801
68 : jhr 5558 fun eLongLong i64 = let
69 :     val toLLWord = (Word64.fromLargeInt o Int64.toLarge)
70 :     val w = toLLWord i64
71 :     fun shift64 (w,cnt) = let
72 :     val shifted = Word64.>>(w, cnt)
73 :     val masked = Word64.andb(0w255, shifted)
74 :     in Word8.fromInt (Word64.toInt masked) end
75 :     in [shift64(w, 0w0),
76 :     shift64(w, 0w8),
77 :     shift64(w, 0w16),
78 :     shift64(w, 0w24),
79 :     shift64(w, 0w32),
80 :     shift64(w, 0w40),
81 :     shift64(w, 0w48),
82 :     shift64(w, 0w56)]
83 :     end
84 : jhr 5159
85 : jhr 5558 fun eLongLongCut i64 = let
86 :     val toLLWord = (Word64.fromLargeInt o Int64.toLarge)
87 :     val w = toLLWord i64
88 :     fun shift64 (w,cnt) = let
89 :     val shifted = Word64.>>(w, cnt)
90 :     val masked = Word64.andb(0w255, shifted)
91 :     in Word8.fromInt (Word64.toInt masked) end
92 :     in [
93 :     shift64(w, 0w0), shift64(w, 0w8), shift64(w, 0w16), shift64(w, 0w24)
94 :     ] end
95 : jhr 5159
96 : jhr 5558 fun emitInstrs instrs = Word8Vector.concat(map emitInstr instrs)
97 : mrainey 2619
98 : jhr 5558 and emitAMD64Instr instr = let
99 :     val error = fn msg => let
100 :     val AsmEmitter.S.STREAM{emit,...} = AsmEmitter.makeStream []
101 :     in
102 :     emit (I.INSTR instr); error msg
103 :     end
104 :    
105 : jhr 5560 datatype reg_or_opc = REG of int | OPCODE of int
106 : mrainey 2619 fun rMask r = r mod 8
107 :     fun getRO (REG r) = rMask r
108 : jhr 5560 | getRO (OPCODE oc) = oc
109 : jhr 5159 val rNum' = rMask o CB.physicalRegisterNum
110 :     val rNum = CB.physicalRegisterNum
111 :     val fNum = CB.physicalRegisterNum
112 : mrainey 2619 val isExtReg = (fn x => x > 7) o rNum
113 :     fun isExtReg' (REG r) = r > 7
114 :     | isExtReg' _ = false
115 : jhr 5159
116 : jhr 5560 (* sizes of immediate operands *)
117 : mrainey 2619 datatype size = Zero | Bits8 | Bits32
118 : jhr 5159 fun size i =
119 : mrainey 2619 if i = 0 then Zero
120 : jhr 5159 else if Int32.<(i, 128) andalso Int32.<=(~128, i) then Bits8
121 : mrainey 2619 else Bits32
122 : jhr 5159
123 : mrainey 2619 fun immedOpnd (I.Immed i32) = i32
124 :     | immedOpnd (I.ImmedLabel le) = lexp le
125 :     | immedOpnd (I.LabelEA le) = lexp le
126 :     | immedOpnd _ = error "immedOpnd"
127 : jhr 5159
128 :     nonfix mod
129 :    
130 : jhr 5561 fun scale (n, m) = Word.toIntX(Word.<<(Word.fromInt n, Word.fromInt m))
131 :     fun modrm {mod, reg, rm} = W8.fromInt(scale(mod,6) + scale(reg,3) + rm)
132 :     fun sib {ss, index, base} = W8.fromInt(scale(ss,6) + scale(index,3) + base)
133 : jhr 5558 fun eREXRegs (r, x, b) = let
134 :     val rb1 = if r then 0wx4 else 0wx0
135 :     val rb2 = if x then rb1 + 0wx2 else rb1
136 :     val rb3 = if b then rb2 + 0wx1 else rb2
137 :     in
138 : jhr 5561 rb3
139 : jhr 5558 end (* rex *)
140 : mrainey 2619 fun eREX rb = 0wx40 + rb
141 : jhr 5560 fun eREX64 rb = eREX rb + 0wx8 (* sets REX.W *)
142 : jhr 5159
143 : jhr 5560 fun eImmedExt (_, I.Immed _) = error "eImmedExt: Immed"
144 :     | eImmedExt (_, I.Immed64 _) = error "eImmedExt: Immed64"
145 :     | eImmedExt (_, I.ImmedLabel _) = error "eImmedExt: ImmedLabel"
146 :     | eImmedExt (_, I.Relative _) = error "eImmedExt: Relative"
147 :     | eImmedExt (_, I.LabelEA _) = error "eImmedExt: LabelEA"
148 :     | eImmedExt (r', I.Direct (_, r)) =
149 : jhr 5159 ( (isExtReg' r', false, isExtReg r),
150 : mrainey 2619 [modrm{mod=3, reg=getRO r', rm=rNum' r}] )
151 : jhr 5159 | eImmedExt (r', I.FDirect r) =
152 :     ( (isExtReg' r', false, isExtReg r),
153 : jhr 3801 [modrm{mod=3, reg=getRO r', rm=rNum' r}] )
154 : jhr 5558 | eImmedExt (r', I.Displace{base=base', disp, ...}) = let
155 :     val base = rNum' base'
156 :     val immed = immedOpnd disp
157 :     val rex = (isExtReg' r', false, isExtReg base')
158 :     val r' = getRO r'
159 :     fun displace(mod, eDisp) = if base = esp
160 :     then modrm{mod=mod, reg=r', rm=4}::
161 :     sib{ss=0, index=4, base=esp}::eDisp immed
162 :     else modrm{mod=mod, reg=r', rm=base} :: eDisp immed
163 :     in
164 : mrainey 2619 (rex,
165 :     (case size immed
166 : jhr 5558 of Zero => if base = esp
167 :     then [modrm{mod=0, reg=r', rm=4}, sib{ss=0,index=4,base=esp}]
168 :     else if base = ebp
169 :     then [modrm{mod=1, reg=r', rm=ebp}, 0w0]
170 :     else [modrm{mod=0, reg=r', rm=base}]
171 : mrainey 2619 | Bits8 => displace (1, fn i => [toWord8 i])
172 :     | Bits32 => displace (2, eLong)
173 :     (*esac*)) )
174 : jhr 5558 end
175 : jhr 5560 | eImmedExt (r', I.Indexed{base=NONE, index, scale, disp, ...}) = let
176 : jhr 5558 val rex = (isExtReg' r', isExtReg index, false)
177 :     val r' = getRO r'
178 :     in
179 : mrainey 2619 (rex,
180 :     (modrm{mod=0, reg=r', rm=4} ::
181 : jhr 5159 sib{base=5, ss=scale, index=rNum' index} ::
182 : mrainey 2619 eLong (immedOpnd disp)) )
183 : jhr 5558 end
184 : jhr 5560 | eImmedExt (r', I.Indexed{base=SOME b, index, scale, disp, ...}) = let
185 : jhr 5558 val rex = (isExtReg' r', isExtReg index, isExtReg b)
186 :     val r' = getRO r'
187 :     val index = rNum' index
188 :     val base = rNum' b
189 :     val immed = immedOpnd disp
190 :     fun indexed (mod, eDisp) =
191 : mrainey 2619 modrm{mod=mod, reg=r', rm=4} ::
192 :     sib{ss=scale, index=index, base=base} :: eDisp immed
193 : jhr 5558 in
194 : mrainey 2619 (rex,
195 :     (case size immed
196 : jhr 5159 of Zero =>
197 :     if base=ebp then
198 : mrainey 2619 [modrm{mod=1, reg=r', rm=4},
199 :     sib{ss=scale, index=index, base=5}, 0w0]
200 :     else
201 : jhr 5159 [modrm{mod=0, reg=r', rm=4},
202 : mrainey 2619 sib{ss=scale, index=index, base=base}]
203 :     | Bits8 => indexed(1, fn i => [toWord8 i])
204 :     | Bits32 => indexed(2, eLong)
205 :     (*esac*)) )
206 : jhr 5558 end
207 : mrainey 2619
208 : jhr 5558 fun encode32' (bytes, r', opnd) = let
209 :     val (rex, e) = eImmedExt (r', opnd)
210 :     in
211 :     case eREXRegs rex
212 : jhr 5561 of 0w0 => bytes @ e
213 :     | rexByte => (eREX rexByte) :: bytes @ e
214 : jhr 5558 (* esac *)
215 :     end (* encode32' *)
216 :     fun encode64' (bytes, r', opnd) = let
217 :     val (rex, e) = eImmedExt (r', opnd)
218 :     in
219 : jhr 5561 (eREX64 (eREXRegs rex)) :: bytes @ e
220 : jhr 5558 end (* encode64' *)
221 : jhr 5564 fun encode' 32 = encode32'
222 :     | encode' _ = encode64'
223 : mrainey 2619 fun encode32 (byte1, r', opnd) = eBytes (encode32' ([byte1], r', opnd))
224 :     fun encode64 (byte1, r', opnd) = eBytes (encode64' ([byte1], r', opnd))
225 :     fun encode sz = if sz = 64 then encode64 else encode32
226 : jhr 5560 fun encodeReg32 (byte1, r, opnd) = encode32 (byte1, REG(rNum r), opnd)
227 :     fun encodeReg64 (byte1, r, opnd) = encode64 (byte1, REG(rNum r), opnd)
228 : mrainey 2619 fun encodeReg sz = if sz = 64 then encodeReg64 else encodeReg32
229 :     fun encodeLongImm32 (byte1, r', opnd, i) =
230 : jhr 5558 eBytes ((encode32' ([byte1], r', opnd)) @ eLong i)
231 : mrainey 2619 fun encodeLongImm64 (byte1, r', opnd, i) =
232 : jhr 5558 eBytes ((encode64' ([byte1], r', opnd)) @ eLong i)
233 : mrainey 2619 fun encodeLongImm sz = if sz = 64 then encodeLongImm64 else encodeLongImm32
234 :     fun encodeShortImm32 (byte1, r', opnd, w) =
235 : jhr 5558 eBytes ((encode32' ([byte1], r', opnd)) @ eShort w)
236 : mrainey 2619 fun encodeShortImm64 (byte1, r', opnd, w) =
237 : jhr 5558 eBytes ((encode64' ([byte1], r', opnd)) @ eShort w)
238 : mrainey 2619 fun encodeShortImm sz = if sz = 64 then encodeShortImm64 else encodeShortImm32
239 :     fun encodeByteImm32 (byte1, r', opnd, b) =
240 : jhr 5558 eBytes ((encode32' ([byte1], r', opnd)) @ [toWord8 b])
241 : mrainey 2619 fun encodeByteImm64 (byte1, r', opnd, b) =
242 : jhr 5558 eBytes ((encode64' ([byte1], r', opnd)) @ [toWord8 b])
243 :     fun encodeByteImm sz = if sz = 64
244 :     then encodeByteImm64
245 :     else encodeByteImm32
246 :     fun encodeST (byte1, opc, STn) = let
247 : jhr 5560 fun reg {opc, reg} = W8.fromInt (scale (opc,3) + reg)
248 : jhr 5558 in
249 :     eBytes [byte1, reg{opc=opc,reg=fNum STn}]
250 :     end
251 : mrainey 2619
252 : jhr 5560 (* arith: only 5 cases need be considered for each size:
253 :     * dst, src op/en
254 :     * --------------------------
255 :     * EAX, imm32 I
256 :     * r/m32, imm32 MI
257 :     * r/m32, imm8 MI
258 :     * r/m32, r32 MR
259 :     * r32, r/m32 RM
260 : mrainey 2619 *)
261 : jhr 5561 fun arith (sz : int, opc1 : Word8.word, opc2 : reg_or_opc) = let
262 : jhr 5558 fun f (I.ImmedLabel le, dst) = f(I.Immed(lexp le), dst)
263 :     | f (I.LabelEA le, dst) = f(I.Immed(lexp le), dst)
264 : jhr 5560 | f (I.Immed i, dst) = (case size i
265 : jhr 5558 of Bits32 => (case dst
266 :     of I.Direct (_, r) => if CB.physicalRegisterNum r = eax
267 :     then if sz = 32
268 :     then eBytes (W8.fromInt (8 * (getRO opc2) + 5) :: eLong i)
269 :     else eBytes (eREX64 0w0 :: W8.fromInt(8 * (getRO opc2) + 5)
270 :     :: eLong i)
271 :     else encodeLongImm sz (0wx81, opc2, dst, i)
272 :     | _ => encodeLongImm sz (0wx81, opc2, dst, i)
273 :     (*esac*))
274 :     | _ => encodeByteImm sz (0wx83, opc2, dst, i) (* 83 /digit ib *)
275 :     (*esac*))
276 : jhr 5560 | f(src, I.Direct(_, r)) = encodeReg sz (opc1+0w3, r, src)
277 :     | f(I.Direct(_, r), dst) = encodeReg sz (opc1+0w1, r, dst)
278 :     | f _ = error "arith"
279 : jhr 5558 in
280 :     f
281 :     end (* arith *)
282 : mrainey 2619
283 : jhr 5558 fun condCode cond = (case cond
284 :     of I.EQ => 0w4 | I.NE => 0w5
285 :     | I.LT => 0w12 | I.LE => 0w14
286 :     | I.GT => 0w15 | I.GE => 0w13
287 :     | I.A => 0w7 | I.AE => 0w3
288 :     | I.B => 0w2 | I.BE => 0w6
289 :     | I.C => 0w2 | I.NC => 0w3
290 :     | I.P => 0wxa | I.NP => 0wxb
291 :     | I.O => 0w0 | I.NO => 0w1
292 :     (*esac*))
293 : mrainey 2619
294 :     (* test: the following cases need be considered:
295 :     * lsrc, rsrc
296 :     * -----------
297 :     * AL, imm8 opc1 A8
298 :     * EAX, imm32 opc1 A9
299 :     * r/m8, imm8 opc2 F6/0 ib
300 :     * r/m32, imm32 opc2 F7/0 id
301 :     * r/m8, r8 opc3 84/r
302 :     * r/m32, r32 opc3 85/r
303 :     *)
304 : jhr 5560 fun test (sz, I.ImmedLabel le, lsrc) = test(sz, I.Immed(lexp le), lsrc)
305 :     | test (sz, I.LabelEA le, lsrc) = test(sz, I.Immed(lexp le), lsrc)
306 : jhr 5561 | test (sz, I.Immed i, lsrc) = (case (lsrc, i >= 0 andalso i < 255)
307 : jhr 5558 of (I.Direct (_, r), false) => if CB.physicalRegisterNum r = eax
308 :     then eBytes(0wxA9 :: eLong i)
309 : jhr 5560 else encodeLongImm sz (0wxF7, OPCODE 0, lsrc, i)
310 :     | (_, false) => encodeLongImm sz (0wxF7, OPCODE 0, lsrc, i)
311 : jhr 5558 | (I.Direct (_, r), true) => let (* 8 bit *)
312 :     val r = CB.physicalRegisterNum r
313 :     in
314 :     if r = eax
315 :     then eBytes[0wxA8, toWord8 i]
316 :     else if r < 4
317 :     (* unfortunately, only CL, DL, BL can be encoded *)
318 : jhr 5560 then encodeByteImm sz (0wxF6, OPCODE 0, lsrc, i)
319 : jhr 5558 else if sz = 8
320 :     then error "test.8"
321 : jhr 5560 else encodeLongImm sz (0wxF7, OPCODE 0, lsrc, i)
322 : jhr 5558 end
323 : jhr 5560 | (_, true) => encodeByteImm sz (0wxF6, OPCODE 0, lsrc, i)
324 : jhr 5558 (* end case *))
325 : jhr 5560 | test (8, rsrc as I.Direct (_, r), lsrc) = if rNum r < 4
326 : jhr 5558 then encodeReg32 (0wx84, r, lsrc)
327 :     else error "test.8"
328 : jhr 5560 | test (sz, I.Direct (_, r), lsrc) = encodeReg sz (0wx85, r, lsrc)
329 : mrainey 2619 | test _ = error "test"
330 :    
331 : jhr 5560 fun movsd (byte3, r, opnd) =
332 :     eBytes (0wxf2::encode32'([0wxf, byte3], REG (rNum r), opnd))
333 : jhr 3801
334 :     (* DEBUG print instructions in stdout
335 :     fun makestream s = let
336 :     fun write f slice = let
337 :     val t = f slice
338 :     val _ = s := (!s)^t
339 :     in
340 :     String.size t
341 :     end
342 :     val writer =
343 :     TextPrimIO.WR
344 :     {
345 :     name = "stringout",
346 :     chunkSize = 512,
347 :     writeVec = SOME (write CharVectorSlice.vector),
348 :     writeArr = SOME (write CharArraySlice.vector),
349 :     writeVecNB = NONE,
350 :     writeArrNB = NONE,
351 :     block = NONE,
352 :     canOutput = NONE,
353 :     getPos = NONE,
354 :     setPos = NONE,
355 :     endPos = NONE,
356 :     verifyPos = NONE,
357 :     close = (fn () => ()),
358 :     ioDesc = NONE
359 :     }
360 :     val stream =
361 :     TextIO.StreamIO.mkOutstream (writer, IO.NO_BUF)
362 :     in
363 :     TextIO.mkOutstream stream
364 :     end
365 :    
366 :     fun instrstring () =
367 :     let
368 :     val s = ref ""
369 :     val stream = makestream s
370 :     val _ =
371 :     AsmStream.withStream stream
372 :     (fn _ => let
373 :     val AsmEmitter.S.STREAM{emit,...} =
374 :     AsmEmitter.makeStream []
375 :     in
376 :     emit (I.INSTR instr) end) ()
377 :     val _ = TextIO.closeOut stream
378 :     in !s end
379 :    
380 :     val _ = print (instrstring ())
381 :     *)
382 :    
383 : jhr 5558 in
384 :     case instr
385 : jhr 5560 of I.NOP => eByte 0x90
386 :     | I.JMP(I.Relative i, _) => (
387 :     case size (Int32.fromInt(i-2))
388 :     of Bits32 => eBytes (0wxe9 :: eLong (Int32.fromInt (i-5)))
389 :     | _ => eBytes [0wxeb, Word8.fromInt(i-2)]
390 :     (*esac*))
391 :     | I.JMP(opnd, _) => let
392 :     val ty = (case opnd of I.Direct(ty,_) => ty | _ => ~1)
393 :     in
394 :     if ty = 64
395 :     then let
396 :     fun encodejmp (bytes, r', opnd) = let
397 :     val (rex, e) = eImmedExt (r', opnd)
398 :     in
399 :     case eREXRegs rex
400 : jhr 5561 of 0w0 => bytes @ e
401 :     | rexByte => (eREX rexByte) :: bytes @ e
402 : jhr 5560 (* end case *)
403 :     end
404 :     in
405 :     eBytes(encodejmp([0wxff], OPCODE 4, opnd))
406 :     end
407 :     else encode32(0wxff, OPCODE 4, opnd)
408 :     end
409 :     | I.JCC{cond, opnd=I.Relative i} => let
410 :     val code = condCode cond
411 :     val i' = Int32.fromInt i
412 :     in
413 :     case size (i'-2)
414 :     of Bits32 => eBytes(0wx0f :: Word8.+(0wx80, code) :: eLong(i'-6))
415 :     | _ => eBytes[Word8.+(0wx70,code), Word8.fromInt(i-2)]
416 : jhr 5558 (* end case *)
417 :     end
418 : jhr 5560 | I.CALL{opnd=I.Relative i,...} =>
419 :     eBytes (0wxe8 :: eLong (Int32.fromInt (i-5)))
420 :     | I.CALL{opnd, ...} => encode32 (0wxff, OPCODE 2, opnd)
421 :     | I.ENTER{src1, src2} => unimplemented "ENTER"
422 : jhr 5562 | I.LEAVE => eByte 0xc9
423 : jhr 5558 | I.RET NONE => eByte 0xc3
424 : jhr 5562 | I.MOVE{mvOp, src, dst} => let
425 :     (* emit basic MOV operation *)
426 :     fun mov sz = (case (src, dst)
427 :     of (I.Immed i, I.Direct (_, r)) => (case sz
428 :     of 32 => eBytes (Word8.+(0wxb8, Word8.fromInt(rNum r))::eLong(i))
429 :     | 64 => let
430 :     val (start, reg) = if rNum r < 8
431 :     then (0wx48, rNum r)
432 :     else (0wx49, rNum r - 8)
433 :     in
434 :     eBytes(start::0wxc7::Word8.+(0wxc0, Word8.fromInt reg)::eLong(i))
435 :     end
436 :     | _ => raise Fail "impossible"
437 :     (* end case *))
438 :     | (I.Immed i, _) => encodeLongImm sz (0wxc7, OPCODE 0, dst, i)
439 :     | (I.Immed64 i, I.Direct (_, r)) => if sz = 32
440 :     then let
441 :     val (start, reg) = if rNum r < 8
442 :     then ([], rNum r)
443 :     else ([0wx41], rNum r - 8)
444 : jhr 5558 in
445 : jhr 5562 eBytes(start@Word8.+(0wxb8, Word8.fromInt reg)::eLongLongCut(i))
446 : jhr 5558 end
447 : jhr 5562 else let
448 :     val (start, reg) = if rNum r < 8
449 :     then (0wx48, rNum r)
450 :     else (0wx49, rNum r - 8)
451 :     in
452 :     eBytes(start::Word8.+(0wxb8, Word8.fromInt reg)::eLongLong(i))
453 :     end
454 :     | (I.Immed64 i, _) => error " Immed64 _"
455 :     | (I.ImmedLabel le, dst) =>
456 :     encodeLongImm sz (0wxc7, OPCODE 0, dst, lexp le)
457 :     | (I.LabelEA le, dst) => error "MOVL: LabelEA"
458 :     | (src, dst) => arith(sz, 0wx88, OPCODE 0) (src, dst)
459 : jhr 5558 (* end case *))
460 : jhr 5562 (* zero and sign-extension moves for 16/32-bit results *)
461 :     fun extend (opc, r) = eBytes (encode32' ([0wx0f, opc], REG(rNum r), src))
462 :     (* utility to compute rex and adjusted register *)
463 :     fun rexReg r = if r >= 8
464 :     then (0wx48, r)
465 :     else (0wx49, r - 8)
466 :     (* zero and sign-extension moves for 64-bit results *)
467 :     fun extend64 (opc, r) = let
468 :     val (rex, r) = rexReg(rNum r)
469 :     in
470 :     eBytes (rex :: encode32' ([0wx0f, opc], REG r, src))
471 :     end
472 :     in
473 :     case (mvOp, src, dst)
474 :     of (I.MOVQ, _, _) => mov 64
475 :     | (I.MOVL, _, _) => mov 32
476 :     | (I.MOVB, I.Immed i, _) => (case size i
477 :     of Bits32 => error "MOVE: MOVB: imm8"
478 :     | _ => encodeByteImm32 (0wxc6, OPCODE 0, dst, i)
479 :     (* end case *))
480 :     | (I.MOVB, I.Direct(_, r), _) => encodeReg32 (0wx88, r, dst)
481 :     | (I.MOVB, _, I.Direct(_, r)) => encodeReg32 (0wx8a, r, src)
482 :     | (I.MOVW, _, I.Direct(_, r)) =>
483 :     eBytes (0wx66 :: encode32' ([0wx89], REG (rNum r), dst))
484 :     | (I.MOVW, _, _) => unimplemented "MOVW"
485 :     | (I.MOVABSQ, I.Immed64 i, I.Direct(_, r)) => let
486 :     val (rex, reg) = rexReg (rNum r)
487 : jhr 5558 in
488 : jhr 5562 eBytes(rex :: (0wxb8+Word8.fromInt reg) :: eLongLong i)
489 : jhr 5558 end
490 : jhr 5562 | (I.MOVABSQ, I.ImmedLabel labexp, I.Direct(_ ,r)) => let
491 :     val p = rNum r
492 :     val byte1 = if p < 8 then 0wx48 else 0wx49
493 :     val byte2 = 0wxb8 + Word8.fromInt (if p < 8 then p else p - 8)
494 :     val byten = eLong (lexp labexp) (* FIXME: should be 64 bits *)
495 :     val hilong = if (lexp labexp) < 0
496 :     then [0wxff, 0wxff, 0wxff, 0wxff]
497 :     else eLong 0
498 : jhr 5558 in
499 : jhr 5562 eBytes ([byte1, byte2] @ byten @ hilong)
500 : jhr 5558 end
501 : jhr 5562 | (I.MOVABSQ, _, _) => unimplemented "MOVABSQ"
502 :     | (I.MOVSWQ, _, I.Direct(_, r)) => extend64 (0wxbf, r)
503 :     | (I.MOVZWQ, _, I.Direct(_, r)) => extend64 (0wxb7, r)
504 :     | (I.MOVSWL, _, I.Direct(_, r)) => extend (0wxbf, r)
505 :     | (I.MOVZWL, _, I.Direct(_, r)) => extend (0wxb7, r)
506 :     | (I.MOVSBQ, _, I.Direct(_, r)) => extend64 (0wxbe, r)
507 :     | (I.MOVZBQ, _, I.Direct(_, r)) => extend64 (0wxb6, r)
508 :     | (I.MOVSBL, _, I.Direct(_, r)) => extend (0wxbe, r)
509 :     | (I.MOVZBL, _, I.Direct(_, r)) => extend (0wxb6, r)
510 :     | (I.MOVSLQ, _, I.Direct(_, r)) => extend64 (0wx63, r)
511 :     | (I.CVTSD2SI, _, I.Direct(_, r)) => unimplemented "CVTSD2SI"
512 :     | (I.CVTSS2SI, _, I.Direct(_, r)) => unimplemented "CVTSS2SI"
513 :     | (I.CVTSD2SIQ, _, I.Direct(_, r)) => unimplemented "CVTSD2SIQ"
514 :     | (I.CVTSS2SIQ, _, I.Direct(_, r)) => unimplemented "CVTSS2SIQ"
515 :     | _ => error "MOVE"
516 :     (* end case *)
517 : jhr 5558 end
518 : jhr 5560 | I.LEAL{r32, addr} => encodeReg32(0wx8d, r32, addr)
519 :     | I.LEAQ{r64, addr} => encodeReg64(0wx8d, r64, addr)
520 :     | I.CMPQ{lsrc, rsrc} => arith(64, 0wx38, OPCODE 7) (rsrc, lsrc)
521 :     | I.CMPL{lsrc, rsrc} => arith(32, 0wx38, OPCODE 7) (rsrc, lsrc)
522 :     | I.CMPW _ => unimplemented "CMPW"
523 :     | I.CMPB _ => unimplemented "CMP"
524 : jhr 5558 | I.TESTQ{lsrc, rsrc} => test(64, rsrc, lsrc)
525 :     | I.TESTL{lsrc, rsrc} => test(32, rsrc, lsrc)
526 : jhr 5560 | I.TESTW _ => unimplemented "TESTW"
527 : jhr 5558 | I.TESTB{lsrc, rsrc} => test(8, rsrc, lsrc)
528 : jhr 5561 | I.BITOP{bitOp, lsrc, rsrc} => let
529 :     fun encode sz = (case lsrc
530 :     of I.Immed n =>
531 :     if ((0 <= n) andalso (n < sz))
532 :     then let
533 :     val (rex, [modRM]) = eImmedExt (OPCODE 4, rsrc)
534 :     val ib = Word8.fromLargeInt(Int32.toLarge n)
535 :     in
536 :     if (sz = 64)
537 :     then [eREX64 (eREXRegs rex), 0wx0f, 0wxba, modRM, ib]
538 :     else [0wx0f, 0wxba, modRM, ib]
539 :     end
540 :     else error "BITOP: invalid bit position"
541 :     | I.Direct(_, r) => let
542 :     val (rex, suffix) = eImmedExt (REG(rNum r), rsrc)
543 :     in
544 :     if (sz = 64)
545 :     then eREX64 (eREXRegs rex) :: 0wx0f :: 0wxba :: suffix
546 :     else 0wx0f :: 0wxba :: suffix
547 :     end
548 :     (* end case *))
549 :     in
550 :     case bitOp
551 :     of I.BTW => unimplemented "BTW"
552 :     | I.BTL => eBytes (encode 32)
553 :     | I.BTQ => eBytes (encode 64)
554 :     | I.LOCK_BTW => eBytes (lockPrefix :: encode 32)
555 :     | I.LOCK_BTL => eBytes (lockPrefix :: encode 64)
556 :     (* end case *)
557 :     end
558 : jhr 5560 | I.BINARY{binOp, src, dst} => let
559 :     fun shift (sz, code) = (case src
560 : jhr 5562 of I.Immed 1 => encode sz (0wxd1, OPCODE code, dst)
561 :     | I.Immed n => encodeByteImm sz (0wxc1, OPCODE code, dst, n)
562 :     | I.Direct(_, r) => if rNum r <> ecx
563 : jhr 5561 then error "shift: Direct"
564 :     else encode sz (0wxd3, OPCODE code, dst)
565 :     (* | I.MemReg _ => shift(code, memReg src) *)
566 : jhr 5560 | _ => error "shift"
567 :     (*esac*))
568 : jhr 5564 (* signed integer multiplication
569 :     * dst <- src * dst [REX.W] 0F AF /r
570 :     * dst <- src * immed8 [REX.W] 6B /r ib
571 :     * dst <- src * immed32 [REX.W] 69 /r id
572 :     *)
573 :     fun imul sz = (case (src, dst)
574 : jhr 5562 of (I.Immed i, I.Direct(_, dstR)) => (case size i
575 : jhr 5560 of Bits32 => encodeLongImm sz (0wx69, REG (rNum dstR), dst, i)
576 : jhr 5564 | _ => encodeByteImm sz (0wx6b, REG (rNum dstR), dst, i)
577 :     (* end case *))
578 : jhr 5562 | (_, I.Direct(_, dstR)) =>
579 : jhr 5564 eBytes (encode' sz ([0wx0f, 0wxaf], REG (rNum dstR), src))
580 : jhr 5560 | _ => error "imul"
581 : jhr 5564 (* end case *))
582 : jhr 5560 in
583 :     case binOp
584 :     of I.ADDQ => arith(64, 0w0, OPCODE 0) (src, dst)
585 :     | I.SUBQ => arith(64, 0wx28, OPCODE 5) (src, dst)
586 :     | I.ANDQ => arith(64, 0wx20, OPCODE 4) (src, dst)
587 :     | I.ORQ => arith(64, 0w8, OPCODE 1) (src, dst)
588 :     | I.XORQ => arith(64, 0wx30, OPCODE 6) (src, dst)
589 :     | I.SHLQ => shift(64, 4)
590 :     | I.SARQ => shift(64, 7)
591 :     | I.SHRQ => shift(64, 5)
592 :     | I.IMULQ => imul 64
593 :     | I.ADCQ => unimplemented "ADCQ"
594 :     | I.SBBQ => unimplemented "SBBQ"
595 :     | I.ADDL => arith(32, 0w0, OPCODE 0) (src, dst)
596 :     | I.SUBL => arith(32, 0wx28, OPCODE 5) (src, dst)
597 :     | I.ANDL => arith(32, 0wx20, OPCODE 4) (src, dst)
598 :     | I.ORL => arith(32, 0w8, OPCODE 1) (src, dst)
599 :     | I.XORL => arith(32, 0wx30, OPCODE 6) (src, dst)
600 :     | I.SHLL => shift(32, 4)
601 :     | I.SARL => shift(32, 7)
602 :     | I.SHRL => shift(32, 5)
603 : jhr 5564 | I.IMULL => imul 32
604 : jhr 5560 | I.ADCL => unimplemented "ADCL"
605 :     | I.SBBL => unimplemented "SBBL"
606 :     | I.ADDW => unimplemented "ADDW"
607 :     | I.SUBW => unimplemented "SUBW"
608 :     | I.ANDW => unimplemented "ANDW"
609 :     | I.ORW => unimplemented "ORW"
610 :     | I.XORW => unimplemented "XORW"
611 :     | I.SHLW => unimplemented "SHLW"
612 :     | I.SARW => unimplemented "SARW"
613 :     | I.SHRW => unimplemented "SHRW"
614 :     | I.IMULW => unimplemented "IMULW"
615 :     | I.ADDB => unimplemented "ADDB"
616 :     | I.SUBB => unimplemented "SUBB"
617 :     | I.ANDB => unimplemented "ANDB"
618 :     | I.ORB => unimplemented "ORB"
619 :     | I.XORB => unimplemented "XORB"
620 :     | I.SHLB => unimplemented "SHLB"
621 :     | I.SARB => unimplemented "SARB"
622 :     | I.SHRB => unimplemented "SHRB"
623 :     | I.IMULB => unimplemented "IMULB"
624 :     | I.BTSW => unimplemented "BTSW"
625 :     | I.BTCW => unimplemented "BTCW"
626 :     | I.BTRW => unimplemented "BTRW"
627 :     | I.BTSL => unimplemented "BTSL"
628 :     | I.BTCL => unimplemented "BTCL"
629 :     | I.BTRL => unimplemented "BTRL"
630 :     | I.ROLW => unimplemented "ROLW"
631 :     | I.RORW => unimplemented "RORW"
632 :     | I.ROLL => unimplemented "ROLL"
633 :     | I.RORL => unimplemented "RORL"
634 :     | I.XCHGB => unimplemented "XCHGB"
635 :     | I.XCHGW => unimplemented "XCHGW"
636 :     | I.XCHGL => unimplemented "XCHGL"
637 :     | I.LOCK_ADCW => unimplemented "LOCK_ADCW"
638 :     | I.LOCK_ADCL => unimplemented "LOCK_ADCL"
639 :     | I.LOCK_ADDW => unimplemented "LOCK_ADDW"
640 :     | I.LOCK_ADDL => unimplemented "LOCK_ADDL"
641 :     | I.LOCK_ANDW => unimplemented "LOCK_ANDW"
642 :     | I.LOCK_ANDL => unimplemented "LOCK_ANDL"
643 :     | I.LOCK_BTSW => unimplemented "LOCK_BTSW"
644 :     | I.LOCK_BTSL => unimplemented "LOCK_BTSL"
645 :     | I.LOCK_BTRW => unimplemented "LOCK_BTRW"
646 :     | I.LOCK_BTRL => unimplemented "LOCK_BTRL"
647 :     | I.LOCK_BTCW => unimplemented "LOCK_BTCW"
648 :     | I.LOCK_BTCL => unimplemented "LOCK_BTCL"
649 :     | I.LOCK_ORW => unimplemented "LOCK_ORW"
650 :     | I.LOCK_ORL => unimplemented "LOCK_ORL"
651 :     | I.LOCK_SBBW => unimplemented "LOCK_SBBW"
652 :     | I.LOCK_SBBL => unimplemented "LOCK_SBBL"
653 :     | I.LOCK_SUBW => unimplemented "LOCK_SUBW"
654 :     | I.LOCK_SUBL => unimplemented "LOCK_SUBL"
655 :     | I.LOCK_XORW => unimplemented "LOCK_XORW"
656 :     | I.LOCK_XORL => unimplemented "LOCK_XORL"
657 :     | I.LOCK_XADDB => unimplemented "LOCK_XADDB"
658 :     | I.LOCK_XADDW => unimplemented "LOCK_XADDW"
659 :     | I.LOCK_XADDL => unimplemented "LOCK_XADDL"
660 :     (* end case *)
661 :     end
662 :     | I.SHIFT{shiftOp, src, dst, count} => (case shiftOp
663 :     of I.SHLDL => unimplemented "SHLDL"
664 :     | I.SHRDL => unimplemented "SHRDL"
665 :     (* end case *))
666 :     | I.MULTDIV{multDivOp, src} => let
667 :     val (mulOp, sz) = (case multDivOp
668 :     of I.MULL1 => (4, 32) | I.IDIVL1 => (7, 32) | I.DIVL1 => (6, 32)
669 :     | I.MULQ1 => (4, 64) | I.IDIVQ1 => (7, 64) | I.DIVQ1 => (6, 64)
670 :     | I.IMULL1 => error "imull1"
671 :     | I.IMULQ1 => error "imulq1"
672 :     (* esac *))
673 :     in
674 :     encode sz (0wxf7, OPCODE mulOp, src)
675 :     end
676 :     | I.MUL3{dst, src1, src2=i} => (case src1
677 :     of I.Immed _ => error "mul3: Immed"
678 :     | I.ImmedLabel _ => error "mul3: ImmedLabel"
679 :     | _ => (case size i
680 :     of Bits32 => encodeLongImm32(0wx69, REG (rNum dst), src1, i)
681 :     | _ => encodeByteImm32(0wx6b, REG (rNum dst), src1, i)
682 :     (*esac*))
683 :     (*esac*))
684 :     | I.MULQ3{dst, src1, src2=i} => (case src1
685 :     of I.Immed _ => error "mul3: Immed"
686 :     | I.ImmedLabel _ => error "mul3: ImmedLabel"
687 :     | _ => (case size i
688 :     of Bits32 => encodeLongImm64(0wx69, REG (rNum dst), src1, i)
689 :     | _ => encodeByteImm64(0wx6b, REG (rNum dst), src1, i)
690 :     (*esac*))
691 :     (*esac*))
692 : jhr 5561 | I.UNARY{unOp, opnd} => let
693 :     fun lock code = eBytes(lockPrefix :: code)
694 :     in
695 :     case unOp
696 :     of I.DECQ => encode64 (0wxff, OPCODE 1, opnd)
697 :     | I.INCQ => encode64 (0wxff, OPCODE 0, opnd)
698 :     | I.NEGQ => encode64 (0wxf7, OPCODE 3, opnd)
699 :     | I.NOTQ => encode64 (0wxff, OPCODE 2, opnd)
700 :     | I.DECL => encode32 (0wxff, OPCODE 1, opnd)
701 :     | I.NEGL => encode32 (0wxff, OPCODE 3, opnd)
702 :     | I.INCL => encode32 (0wxff, OPCODE 0, opnd)
703 :     | I.NOTL => encode32 (0wxff, OPCODE 2, opnd)
704 :     | I.DECW => unimplemented "DECW"
705 :     | I.NEGW => unimplemented "NEGW"
706 :     | I.INCW => unimplemented "INCW"
707 :     | I.NOTW => unimplemented "NOTW"
708 :     | I.DECB => unimplemented "DECB"
709 :     | I.NEGB => unimplemented "NEGB"
710 :     | I.INCB => unimplemented "INCB"
711 :     | I.NOTB => unimplemented "NOTB"
712 :     | I.LOCK_DECQ => lock (encode64' ([0wxff], OPCODE 1, opnd))
713 :     | I.LOCK_INCQ => lock (encode64' ([0wxff], OPCODE 0, opnd))
714 :     | I.LOCK_NEGQ => lock (encode64' ([0wxf7], OPCODE 3, opnd))
715 :     | I.LOCK_NOTQ => lock (encode64' ([0wxff], OPCODE 2, opnd))
716 :     (* esac *)
717 :     end
718 : jhr 5558 | I.SET{cond,opnd} =>
719 : jhr 5560 eBytes (encode32' ([0wx0f, Word8.+(0wx90,condCode cond)], REG 0, opnd))
720 :     | I.CMOV{cond,src,dst} =>
721 :     eBytes (encode32' ([0wx0f, Word8.+(condCode cond,0wx40)], REG (rNum dst), src))
722 : jhr 5562 | I.PUSH(I.Immed i) => (case size i
723 :     of Bits32 => eBytes(0wx68 :: eLong i)
724 : jhr 5560 | _ => eBytes [0wx6a, toWord8 i]
725 :     (* esac *))
726 : jhr 5562 | I.PUSH(I.Direct(_, r)) => let
727 :     val r = rNum r
728 :     in
729 :     if (r < 8)
730 :     then eByte (0x50 + r)
731 :     else eBytes [0wx41, 0wx50 + Word8.fromInt(r - 8)]
732 :     end
733 : jhr 5564 (* TODO: check that PUSH is correct for other operands *)
734 : jhr 5562 | I.PUSH opnd => encode32 (0wxff, OPCODE 6, opnd)
735 :     | I.PUSHFQ => eByte 0x9c
736 :     | I.POPFQ => eByte 0x9d
737 :     | I.POP(I.Direct(_, r)) => let
738 :     val r = rNum r
739 :     in
740 :     if (r < 8)
741 :     then eByte (0x58 + r)
742 :     else eBytes [0wx41, 0wx58 + Word8.fromInt(r - 8)]
743 :     end
744 : jhr 5564 (* TODO: check that POP is correct for other operands *)
745 : jhr 5560 | I.POP opnd => encode32 (0wx8f, OPCODE 0, opnd)
746 :     | I.CDQ => eByte 0x99
747 :     | I.CDO => eBytes [0wx48, 0wx99]
748 :     | I.INT b => eBytes [0wxcd, b]
749 : jhr 5558 | I.FMOVE {fmvOp=I.MOVSD, dst=I.FDirect r, src=src} => movsd(0wx10, r, src)
750 :     | I.FMOVE {fmvOp=I.MOVSD, dst=dst, src=I.FDirect r} => movsd(0wx11, r, dst)
751 :     | I.FMOVE {fmvOp, dst, src} => (case fmvOp
752 :     of I.CVTSI2SD => error "CVTSI2SD not implemented"
753 :     | I.CVTSI2SDQ => let
754 :     val I.FDirect r = dst
755 :     in
756 : jhr 5560 eBytes([0wxf2] @ encode64'([0wxf,0wx2a],REG (fNum r),src))
757 : jhr 5558 end
758 :     | I.CVTSD2SS => (case dst
759 :     of (I.FDirect r) =>
760 : jhr 5560 eBytes([0wxf2] @ encode32'([0wxf,0wx5a],REG (fNum r),src))
761 : jhr 5558 | _ => error "CVTSD2SS"
762 :     (* end case *))
763 :     | I.CVTSS2SD => (case dst
764 :     of (I.FDirect r) =>
765 : jhr 5560 eBytes([0wxf3] @ encode32'([0wxf,0wx5a],REG (fNum r),src))
766 : jhr 5558 | _ => error "CVTSD2SD"
767 :     (* end case *))
768 :     | I.MOVSS => (case src
769 :     of (I.FDirect r) =>
770 : jhr 5560 eBytes([0wxf3] @ encode32'([0wxf,0wx11],REG (fNum r),dst))
771 : jhr 5558 | _ => error "MOVSS"
772 :     (* end case *))
773 :     | _ => error "FMOVE"
774 :     (* end case *))
775 : jhr 5561 | I.FBINOP {binOp, src, dst} => let
776 :     fun encode2 (op1, op2) = eBytes(encode32'([op1, op2], REG(fNum dst), src))
777 :     fun encode (op1, op2, op3) =
778 :     eBytes(op1 :: encode32'([op2, op3], REG(fNum dst), src))
779 :     in
780 :     case binOp
781 :     of I.ADDSS => encode (0wxf3, 0wx0f, 0wx58)
782 :     | I.ADDSD => encode (0wxf2, 0wx0f, 0wx58)
783 :     | I.SUBSS => encode (0wxf3, 0wx0f, 0wx5c)
784 :     | I.SUBSD => encode (0wxf2, 0wx0f, 0wx5c)
785 :     | I.MULSS => encode (0wxf3, 0wx0f, 0wx59)
786 :     | I.MULSD => encode (0wxf2, 0wx0f, 0wx59)
787 :     | I.DIVSS => encode (0wxf3, 0wx0f, 0wx5e)
788 :     | I.DIVSD => encode (0wxf2, 0wx0f, 0wx5e)
789 :     | I.XORPS => encode2 (0wx0f, 0wx57)
790 :     | I.XORPD => encode (0wx66, 0wx0f, 0wx57)
791 :     | I.ANDPS => encode2 (0wx0f, 0wx54)
792 :     | I.ANDPD => encode (0wx66, 0wx0f, 0wx54)
793 :     | I.ORPS => encode2 (0wx0f, 0wx56)
794 :     | I.ORPD => encode (0wx66, 0wx0f, 0wx56)
795 :     (* end case *)
796 :     end
797 : jhr 5558 | I.FCOM {comOp, src, dst} => (case comOp
798 : jhr 5561 of I.COMISS => unimplemented "COMISS"
799 :     | I.COMISD => unimplemented "COMISD"
800 :     | I.UCOMISS => unimplemented "UCOMISS"
801 :     | I.UCOMISD => eBytes([0wx66] @ encode32'([0wxf,0wx2e],REG (fNum dst),src))
802 : jhr 5558 (* end case *))
803 :     | I.FSQRTS {dst, src} => let
804 :     val I.FDirect r = dst
805 :     in
806 : jhr 5560 eBytes([0wxf3] @ encode32'([0wxf,0wx51],REG (fNum r),src))
807 : jhr 5558 end
808 :     | I.FSQRTD {dst, src} => let
809 :     val I.FDirect r = dst
810 :     in
811 : jhr 5560 eBytes([0wxf2] @ encode32'([0wxf,0wx51],REG (fNum r),src))
812 : jhr 5558 end
813 : jhr 5560 | I.SAHF => eByte 0x9e
814 :     | I.LFENCE => eBytes [0wx0f, 0wxae, 0wxe8]
815 :     | I.MFENCE => eBytes [0wx0f, 0wxae, 0wxf0]
816 :     | I.SFENCE => eBytes [0wx0f, 0wxae, 0wxf8]
817 :     | I.PAUSE => eBytes [0wxf3, 0wx90]
818 :     | I.XCHG _ => unimplemented "XCHG"
819 :     | I.CMPXCHG _ => unimplemented "CMPXCHG"
820 :     | I.XADD _ => unimplemented "XADD"
821 :     | I.RDTSC => unimplemented "RDTSC"
822 :     | I.RDTSCP => unimplemented "RDTSCP"
823 :     | I.LAHF => eByte 0x9f
824 : jhr 5558 | _ => error "emitInstr"
825 :     (* esac *)
826 :     end (* emitAMD64Instr *)
827 : jhr 5159
828 : jhr 5558 and emitInstr (I.LIVE _) = Word8Vector.fromList []
829 :     | emitInstr (I.KILL _) = Word8Vector.fromList []
830 :     | emitInstr(I.COPY{k, dst, src, tmp, ...}) = (case k
831 :     of CB.GP => emitInstrs (Shuffle.shuffle {tmp=tmp, dst=dst, src=src})
832 :     | CB.FP => emitInstrs (Shuffle.shufflefp {tmp=tmp, dst=dst, src=src})
833 :     | _ => error "COPY"
834 :     (*esac*))
835 :     | emitInstr (I.INSTR instr) = emitAMD64Instr instr
836 : jhr 5560 (* NOTE: the general consensus on the internet is that Intel hardware ignores the
837 :     * branch hint prefixes and just relies on dynamic techniques. Therefore, we
838 :     * comment this code out for now.
839 :     | emitInstr (I.ANNOTATION{i, a}) = (case #peek MLRiscAnnotations.BRANCH_PROB a
840 :     of SOME prob => let
841 :     val prob = Probability.toReal prob
842 :     fun emit (I.ANNOTATION{i, ...}) = emit i
843 :     | emit (I.INSTR(instr as I.JCC _)) = let
844 :     val code = emitInstr i
845 :     in
846 :     if prob < 0.5
847 :     then Word8Vector.prepend (0wx2e, code) (* add not-taken hint *)
848 :     else if prob > 0.5
849 :     then Word8Vector.prepend (0wx3e, code) (* add taken hint *)
850 :     else code
851 :     end
852 :     | emit _ = error "bogus BRANCH_PROB annotation"
853 :     in
854 :     emit i
855 :     end
856 :     | NONE => emitInstr i
857 :     (* end case *))
858 :     *)
859 :     | emitInstr (I.ANNOTATION{i, a}) = emitInstr i
860 : mrainey 2619
861 : jhr 5558 end (* AMD64MCEmitter *)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0