Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/trunk/src/compiler/TopLevel/batch/binfile.sml
ViewVC logotype

Annotation of /sml/trunk/src/compiler/TopLevel/batch/binfile.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 440 - (view) (download)

1 : monnier 249 (* COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies *)
2 :     (* binfile.sml *)
3 :    
4 :     functor BinfileFun (C : COMPILE) : BINFILE = struct
5 :     structure Pid = PersStamps
6 :     structure Env = CMEnv.Env
7 :     structure Err = ErrorMsg
8 :     structure CB = CompBasic
9 :    
10 :     exception FormatError = CodeObj.FormatError
11 :    
12 :     exception Compile = C.Compile
13 :     exception TopLevelException = C.TopLevelException
14 :     exception SilentException = C.SilentException
15 :    
16 :     fun error msg = (
17 :     Control.Print.say(concat["Bin file format error: ", msg, "\n"]);
18 :     raise FormatError)
19 :    
20 :     type pid = Pid.persstamp
21 :     type senv = Env.staticEnv
22 :     type symenv = Env.symenv
23 :     type denv = Env.dynenv
24 :     type env = Env.environment
25 :     type lambda = CB.flint
26 :    
27 :     type csegments = C.csegments
28 :     type executable = C.executable
29 :    
30 : monnier 429 fun delay thunk = let
31 :     val f = ref (fn () => raise Fail "Binfile.delay not initialized")
32 :     fun first () = let
33 :     val r = thunk ()
34 :     fun later () = r
35 :     in
36 :     f := later; r
37 :     end
38 :     in
39 :     f := first;
40 :     fn () => !f ()
41 :     end
42 : monnier 249
43 :     (* pickled data: the data, the pid, and the pickle *)
44 :     type 'a pData =
45 : monnier 429 { unpickled: unit -> 'a, pid: pid, pickled: Word8Vector.vector }
46 : monnier 249
47 :     datatype bfContent =
48 :     BFC of {
49 :     imports: C.import list,
50 : monnier 439 exportPid: pid option,
51 : monnier 249 cmData: pid list,
52 :     senv: senv pData,
53 :     lambda: lambda option pData,
54 : monnier 429 csegments: csegments,
55 :     executable: executable option ref
56 : monnier 249 }
57 :    
58 :     fun staticPidOf (BFC { senv = { pid, ... }, ... }) = pid
59 : monnier 439 fun exportPidOf (bfc as BFC { exportPid, ... }) = exportPid
60 : monnier 249 fun lambdaPidOf (BFC { lambda = { pid, ... }, ... }) = pid
61 :     fun cmDataOf (BFC { cmData, ... }) = cmData
62 : monnier 429 fun senvOf (BFC { senv = { unpickled, ... }, ... }) = unpickled ()
63 : monnier 439 fun symenvOf (bfc as BFC { senv, lambda, ... }) =
64 : monnier 429 C.mksymenv (exportPidOf bfc, #unpickled lambda ())
65 : monnier 249
66 :     local
67 :     val arch = C.architecture
68 :    
69 : monnier 429 fun executableOf (BFC { executable = ref (SOME e), ... }) = e
70 :     | executableOf (BFC { executable, csegments, ... }) = let
71 :     val e = C.isolate (C.mkexec csegments)
72 : monnier 249 in
73 : monnier 429 executable := SOME e;
74 :     e
75 : monnier 249 end
76 :    
77 : monnier 429 fun codeSegments (BFC { csegments, ... }) = csegments
78 : monnier 249
79 :     val fromInt = Word32.fromInt
80 :     val fromByte = Word32.fromLargeWord o Word8.toLargeWord
81 :     val toByte = Word8.fromLargeWord o Word32.toLargeWord
82 :     val >> = Word32.>>
83 :     infix >>
84 :    
85 :     (*
86 :     * BINFILE FORMAT description:
87 :     *
88 :     * Every 4-byte integer field is stored in big-endian format.
89 :     *
90 :     * Start Size Purpose
91 :     * ----BEGIN OF HEADER----
92 :     * 0 16 magic string
93 :     * 16 4 number of import values (importCnt)
94 :     * 20 4 number of exports (exportCnt = currently always 0 or 1)
95 :     * 24 4 size of import tree area in bytes (importSzB)
96 :     * 28 4 size of CM-specific info in bytes (cmInfoSzB)
97 :     * 32 4 size of pickled lambda-expression in bytes (lambdaSzB)
98 :     * 36 4 size of reserved area 1 in bytes (reserved1)
99 :     * 40 4 size of reserved area 2 in bytes (reserved2)
100 :     * 44 4 size of code area in bytes (codeSzB)
101 :     * 48 4 size of pickled environment in bytes (envSzB)
102 :     * 52 i import trees [This area contains pickled import trees --
103 :     * see below. The total number of leaves in these trees is
104 :     * importCnt and the size is equal to importSzB.]
105 :     * i+52 ex export pids [Each export pid occupies 16 bytes. Thus, the
106 :     * size ex of this area is 16*exportCnt (0 or 16).]
107 :     * ex+i+52 cm CM info [Currently a list of pid-pairs.] (cm = cmInfoSzB)
108 :     * ----END OF HEADER----
109 :     * 0 h HEADER (h = 52+cm+ex+i)
110 :     * h l pickle of exported lambda-expr. (l = lambdaSzB)
111 :     * l+h r reserved areas (r = reserved1+reserved2)
112 :     * r+l+h c code area (c = codeSzB) [Structured into several
113 :     * segments -- see below.]
114 :     * c+r+l+h e pickle of static environment (e = envSzB)
115 :     * e+c+r+l+h - END OF BINFILE
116 :     *
117 :     * IMPORT TREE FORMAT description:
118 :     *
119 :     * The import tree area contains a list of (pid * tree) pairs.
120 :     * The pids are stored directly as 16-byte strings.
121 :     * Trees are constructed according to the following ML-datatype:
122 :     * datatype tree = NODE of (int * tree) list
123 :     * Leaves in this tree have the form (NODE []).
124 :     * Trees are written recursively -- (NODE l) is represented by n (= the
125 :     * length of l) followed by n (int * node) subcomponents. Each component
126 :     * consists of the integer selector followed by the corresponding tree.
127 :     *
128 :     * The size of the import tree area is only given implicitly. When reading
129 :     * this area, the reader must count the number of leaves and compare it
130 :     * with importCnt.
131 :     *
132 :     * Integer values in the import tree area (lengths and selectors) are
133 :     * written in "packed" integer format. In particular, this means that
134 :     * Values in the range 0..127 are represented by only 1 byte.
135 :     * Conceptually, the following pickling routine is used:
136 :     *
137 :     * void recur_write_ul (unsigned long l, FILE *file)
138 :     * {
139 :     * if (l != 0) {
140 :     * recur_write_ul (l >> 7, file);
141 :     * putc ((l & 0x7f) | 0x80, file);
142 :     * }
143 :     * }
144 :     *
145 :     * void write_ul (unsigned long l, FILE *file)
146 :     * {
147 :     * recur_write_ul (l >> 7, file);
148 :     * putc (l & 0x7f, file);
149 :     * }
150 :     *
151 :     * CODE AREA FORMAT description:
152 :     *
153 :     * The code area contains multiple code segements. There will be at least
154 :     * two. The very first segment is the "data" segment -- responsible for
155 :     * creating literal constants on the heap. The idea is that code in the
156 :     * data segment will be executed only once at link-time. Thus, it can
157 :     * then be garbage-collected immediatly. (In the future it is possible that
158 :     * the data segment will not contain executable code at all but some form
159 :     * of bytecode that is to be interpreted separately.)
160 :     *
161 :     * In the binfile, each code segment is represented by its size s (in
162 :     * bytes -- written as a 4-byte big-endian integer) followed by s bytes of
163 :     * machine- (or byte-) code. The total length of all code segments
164 :     * (including the bytes spent on representing individual sizes) is codeSzB.
165 :     *
166 :     * LINKING CONVENTIONS:
167 :     *
168 :     * Linking is achieved by executing all code segments in sequential order.
169 :     *
170 :     * The first code segment (i.e., the "data" segment) receives unit as
171 :     * its single argument.
172 :     *
173 :     * The second code segment receives a record as its single argument.
174 :     * This record has (importCnt+1) components. The first importCnt
175 :     * components correspond to the leaves of the import trees. The final
176 :     * component is the result from executing the data segment.
177 :     *
178 :     * All other code segments receive a single argument which is the result
179 :     * of the preceding segment.
180 :     *
181 :     * The result of the last segment represents the exports of the compilation
182 :     * unit. It is to be paired up with the export pid and stored in the
183 :     * dynamic environment. If there is no export pid, then the final result
184 :     * will be thrown away.
185 :     *
186 :     * The import trees are used for constructing the argument record for the
187 :     * second code segment. The pid at the root of each tree is the key for
188 :     * looking up a value in the existing dynamic environment. In general,
189 :     * that value will be a record. The selector fields of the import tree
190 :     * associated with the pid are used to recursively fetch components of that
191 :     * record.
192 :     *)
193 :    
194 :     val MAGIC = let
195 :     fun fit (i, s) = let
196 :     val s = StringCvt.padRight #" " i s
197 :     in
198 :     if (size s = i) then s
199 :     else substring (s, 0, i)
200 :     end
201 :     fun version [] = []
202 :     | version [x : int] = [Int.toString x]
203 :     | version (x :: r) = (Int.toString x) :: "." :: (version r)
204 :     val v = fit (8, concat (version (#version_id Version.version)))
205 :     val a =
206 :     if String.sub (arch, 0) = #"." then
207 :     fit (7, substring (arch, 1, (String.size arch) - 1))
208 :     else fit (7, arch)
209 :     in
210 :     Byte.stringToBytes (concat [v, a, "\n"])
211 :     end
212 :    
213 :     val magicBytes = Word8Vector.length MAGIC
214 :     val bytesPerPid = 16
215 :    
216 :     fun bytesIn (s, n) = let
217 :     val bv = BinIO.inputN (s, n)
218 :     in
219 :     if n = Word8Vector.length bv then bv
220 :     else error (concat["expected ", Int.toString n,
221 :     " bytes, but found ",
222 :     Int.toString(Word8Vector.length bv)])
223 :     end
224 :    
225 :     fun readInt32 s = LargeWord.toIntX(Pack32Big.subVec(bytesIn(s, 4), 0))
226 :    
227 :     fun readPackedInt32 s = let
228 :     fun loop n =
229 :     case BinIO.input1 s of
230 :     NONE => error "unable to read a packed int32"
231 :     | SOME w8 => let
232 :     val n' =
233 :     n * 0w128
234 :     + Word8.toLargeWord (Word8.andb (w8, 0w127))
235 :     in
236 :     if Word8.andb (w8, 0w128) = 0w0 then n' else loop n'
237 :     end
238 :     in
239 :     LargeWord.toIntX (loop 0w0)
240 :     end
241 :    
242 :     fun readPid s = Pid.fromBytes (bytesIn (s, bytesPerPid))
243 :     fun readPidList (s, n) = List.tabulate (n, fn _ => readPid s)
244 :    
245 :     fun readImportTree s =
246 :     case readPackedInt32 s of
247 :     0 => (CB.ITNODE [], 1)
248 :     | cnt => let
249 :     fun readImportList 0 = ([], 0)
250 :     | readImportList cnt = let
251 :     val selector = readPackedInt32 s
252 :     val (tree, n) = readImportTree s
253 :     val (rest, n') = readImportList (cnt - 1)
254 :     in
255 :     ((selector, tree) :: rest, n + n')
256 :     end
257 :     val (l, n) = readImportList cnt
258 :     in
259 :     (CB.ITNODE l, n)
260 :     end
261 :    
262 :     fun readImports (s, n) =
263 :     if n <= 0 then []
264 :     else let
265 :     val pid = readPid s
266 :     val (tree, n') = readImportTree s
267 :     val rest = readImports (s, n - n')
268 :     in
269 :     (pid, tree) :: rest
270 :     end
271 :    
272 :     fun pickleInt32 i = let
273 :     val w = fromInt i
274 :     fun out w = toByte w
275 :     in
276 :     Word8Vector.fromList [toByte (w >> 0w24), toByte (w >> 0w16),
277 :     toByte (w >> 0w8), toByte w]
278 :     end
279 :     fun writeInt32 s i = BinIO.output (s, pickleInt32 i)
280 :    
281 :     fun picklePackedInt32 i = let
282 :     val n = fromInt i
283 :     val // = LargeWord.div
284 :     val %% = LargeWord.mod
285 :     val !! = LargeWord.orb
286 :     infix // %% !!
287 :     val toW8 = Word8.fromLargeWord
288 :     fun r (0w0, l) = Word8Vector.fromList l
289 :     | r (n, l) = r (n // 0w128, toW8 ((n %% 0w128) !! 0w128) :: l)
290 :     in
291 :     r (n // 0w128, [toW8 (n %% 0w128)])
292 :     end
293 :    
294 :     fun writePid (s, pid) = BinIO.output (s, Pid.toBytes pid)
295 :     fun writePidList (s, l) = app (fn p => writePid (s, p)) l
296 :    
297 :     local
298 :     fun pickleImportSpec ((selector, tree), (n, p)) = let
299 :     val sp = picklePackedInt32 selector
300 :     val (n', p') = pickleImportTree (tree, (n, p))
301 :     in
302 :     (n', sp :: p')
303 :     end
304 :     and pickleImportTree (CB.ITNODE [], (n, p)) =
305 :     (n + 1, picklePackedInt32 0 :: p)
306 :     | pickleImportTree (CB.ITNODE l, (n, p)) = let
307 :     val (n', p') = foldr pickleImportSpec (n, p) l
308 :     in
309 :     (n', picklePackedInt32 (length l) :: p')
310 :     end
311 :    
312 :     fun pickleImport ((pid, tree), (n, p)) = let
313 :     val (n', p') = pickleImportTree (tree, (n, p))
314 :     in
315 :     (n', Pid.toBytes pid :: p')
316 :     end
317 :     in
318 :     fun pickleImports l = let
319 :     val (n, p) = foldr pickleImport (0, []) l
320 :     in
321 :     (n, Word8Vector.concat p)
322 :     end
323 :     end
324 :    
325 :     fun checkMagic s =
326 :     if (bytesIn (s, magicBytes)) = MAGIC then ()
327 :     else error "bad magic number"
328 :    
329 :     fun readHeader s = let
330 :     val _ = checkMagic s
331 :     val leni = readInt32 s
332 :     val ne = readInt32 s
333 :     val importSzB = readInt32 s
334 :     val cmInfoSzB = readInt32 s
335 :     val nei = cmInfoSzB div bytesPerPid
336 :     val sLam = readInt32 s
337 :     val sa1 = readInt32 s
338 :     val sa2 = readInt32 s
339 :     val cs = readInt32 s
340 :     val es = readInt32 s
341 :     val imports = readImports (s, leni)
342 :     val exportPid =
343 :     (case ne of
344 :     0 => NONE
345 :     | 1 => SOME(readPid s)
346 :     | _ => error "too many export PIDs")
347 :     val envPids = readPidList (s, nei)
348 :     in
349 :     case envPids of
350 :     st :: lm :: cmData =>
351 :     { nExports = ne,
352 :     lambdaSz = sLam,
353 :     res1Sz = sa1, res2Sz = sa2, codeSz = cs, envSz = es,
354 :     imports = imports, exportPid = exportPid,
355 :     cmData = cmData, staticPid = st, lambdaPid = lm }
356 :     | _ => error "env PID list"
357 :     end
358 :    
359 :     (* must be called with second arg >= 0 *)
360 :     fun readCodeList (strm, name, nbytes) = let
361 :     fun readCode 0 = []
362 :     | readCode n = let
363 :     val sz = readInt32 strm
364 :     val n' = n - sz - 4
365 :     in
366 :     if n' < 0
367 :     then error "code size"
368 :     else CodeObj.input(strm, sz, SOME name) :: readCode n'
369 :     end
370 :     val dataSz = readInt32 strm
371 :     val n' = nbytes - dataSz - 4
372 :     val data = if n' < 0 then error "data size" else bytesIn (strm, dataSz)
373 :     in
374 :     case readCode n'
375 :     of (c0 :: cn) => {data = data, c0 = c0, cn = cn}
376 :     | [] => error "missing code objects"
377 :     (* end case *)
378 :     end
379 :     in
380 :     fun read args = let
381 : monnier 429 val { name, stream = s, senv = context } = args
382 : monnier 249 val { nExports = ne, lambdaSz = sa2,
383 :     res1Sz, res2Sz, codeSz = cs, envSz = es,
384 :     imports, exportPid, cmData,
385 :     staticPid, lambdaPid } = readHeader s
386 : monnier 429 val (lambda_i, plambda) =
387 :     if sa2 = 0 then (fn () => NONE,
388 :     Word8Vector.fromList [])
389 : monnier 249 else let
390 :     val bytes = bytesIn (s, sa2)
391 :     in
392 : monnier 429 (delay (fn () => UnpickMod.unpickleFLINT bytes),
393 :     bytes)
394 : monnier 249 end
395 :     val _ = if res1Sz = 0 andalso res2Sz = 0
396 :     then () else error "non-zero reserved size"
397 :     val code = readCodeList (s, name, cs)
398 : monnier 429 val (senv, penv) =
399 :     if es = 0 then (fn () => CMStaticEnv.empty,
400 :     Word8Vector.fromList [])
401 :     else let
402 :     val penv = bytesIn (s, es)
403 :     val _ =
404 :     if Word8Vector.length penv = es then ()
405 :     else error "missing bytes in bin file"
406 :     val mkSenv =
407 :     delay (fn () =>
408 :     CMStaticEnv.CM
409 :     (UnpickMod.unpickleEnv { context = context,
410 :     hash = staticPid,
411 :     pickle = penv }))
412 :     in
413 :     (mkSenv, penv)
414 :     end
415 :     fun pd (u, p, pk) = { unpickled = u, pid = p, pickled = pk }
416 : monnier 249 in
417 :     BFC { imports = imports,
418 : monnier 439 exportPid = exportPid,
419 : monnier 249 cmData = cmData,
420 : monnier 429 senv = pd (senv, staticPid, penv),
421 :     lambda = pd (lambda_i, lambdaPid, plambda),
422 :     csegments = code,
423 :     executable = ref NONE }
424 : monnier 249 end
425 :    
426 : monnier 429 (* compute size of code objects (including length fields) *)
427 :     fun codeSize (csegs: csegments) =
428 :     List.foldl
429 :     (fn (co, n) => n + CodeObj.size co + 4)
430 :     (CodeObj.size(#c0 csegs) + Word8Vector.length(#data csegs) + 8)
431 :     (#cn csegs)
432 :    
433 :     (* This function must be kept in sync with the "write" function below.
434 :     * It calculates the number of bytes written by a corresponding
435 :     * call to "write". *)
436 :     fun size { content = bfc, nopickle } = let
437 : monnier 439 val BFC { imports, exportPid, senv, cmData, lambda, ... } = bfc
438 : monnier 429 val { unpickled = lut, pickled = lambdaP, ... } = lambda
439 :     val pidSz = Word8Vector.length (Pid.toBytes (#pid senv))
440 :     val (_, picki) = pickleImports imports
441 :     val csegs = codeSegments bfc
442 : monnier 439 val hasExports = isSome exportPid
443 : monnier 429 in
444 :     magicBytes +
445 :     9 * 4 +
446 :     Word8Vector.length picki +
447 :     (if hasExports then pidSz else 0) +
448 :     pidSz * (length cmData + 2) +
449 :     (if nopickle then 0
450 :     else case lut () of NONE => 0 | _ => Word8Vector.length lambdaP) +
451 :     codeSize csegs +
452 :     (if nopickle then 0 else Word8Vector.length (#pickled senv))
453 :     end
454 :    
455 :     (* Keep this in sync with "size" (see above). *)
456 :     fun write { stream = s, content = bfc, nopickle } = let
457 : monnier 439 val BFC { imports, exportPid, cmData, senv, lambda, ... } = bfc
458 : monnier 429 val { pickled = senvP, pid = staticPid, ... } = senv
459 :     val { pickled = lambdaP, pid = lambdaPid, unpickled = lut } =
460 : monnier 249 lambda
461 :     val staticPid = staticPid
462 :     val envPids = staticPid :: lambdaPid :: cmData
463 :     val (leni, picki) = pickleImports imports
464 :     val importSzB = Word8Vector.length picki
465 : monnier 439 val (ne, epl) =
466 :     case exportPid of
467 :     NONE => (0, [])
468 :     | SOME p => (1, [p])
469 : monnier 249 val nei = length envPids
470 :     val cmInfoSzB = nei * bytesPerPid
471 :     val sa2 =
472 : monnier 429 (if nopickle then 0
473 :     else case lut () of
474 : monnier 249 NONE => 0
475 :     | _ => Word8Vector.length lambdaP)
476 :     val res1Sz = 0
477 :     val res2Sz = 0
478 :     val csegs = codeSegments bfc
479 : monnier 429 val cs = codeSize csegs
480 : monnier 249 fun codeOut c = (
481 :     writeInt32 s (CodeObj.size c);
482 :     CodeObj.output (s, c))
483 : monnier 429 val (es, writeEnv) =
484 :     if nopickle then (0, fn () => ())
485 :     else (Word8Vector.length senvP,
486 :     fn () => BinIO.output (s, senvP))
487 : monnier 249 in
488 :     BinIO.output (s, MAGIC);
489 : monnier 429 app (writeInt32 s) [leni, ne, importSzB, cmInfoSzB,
490 :     sa2, res1Sz, res2Sz, cs, es];
491 : monnier 249 BinIO.output (s, picki);
492 :     writePidList (s, epl);
493 :     (* arena1 *)
494 :     writePidList (s, envPids);
495 : monnier 429 (* arena2 -- pickled flint stuff *)
496 :     if sa2 = 0 then () else BinIO.output (s, lambdaP);
497 : monnier 249 (* arena3 is empty *)
498 :     (* arena4 is empty *)
499 :     (* code objects *)
500 : monnier 429 writeInt32 s (Word8Vector.length (#data csegs));
501 :     BinIO.output(s, #data csegs);
502 : monnier 249 codeOut (#c0 csegs);
503 :     app codeOut (#cn csegs);
504 : monnier 429 writeEnv ()
505 : monnier 249 end
506 :    
507 :     fun create args = let
508 :     val { runtimePid, splitting, cmData,
509 :     ast, source, corenv, senv, symenv } = args
510 :     val errors = Err.errors source
511 :     fun check phase =
512 :     if Err.anyErrors errors then raise Compile (phase ^ " failed")
513 :     else ()
514 :     val cinfo = C.mkCompInfo (source, corenv, fn x => x)
515 :    
516 :     val { csegments=code, newstatenv, exportPid, staticPid, imports,
517 :     pickle=envPickle, inlineExp, ...} =
518 :     C.compile { source=source, ast=ast, statenv=senv,
519 :     symenv=symenv, compInfo=cinfo, checkErr=check,
520 :     runtimePid=runtimePid, splitting=splitting}
521 :     val {hash = lambdaPid, pickle} = PickMod.pickleFLINT inlineExp
522 :     fun pd (u, p, x) =
523 : monnier 429 { unpickled = fn () => u, pid = p, pickled = x }
524 : monnier 249 in
525 :     BFC { imports = imports,
526 : monnier 439 exportPid = exportPid,
527 : monnier 249 cmData = cmData,
528 :     senv = pd (newstatenv, staticPid, envPickle),
529 :     lambda = pd (inlineExp, lambdaPid, pickle),
530 : monnier 429 csegments = code,
531 :     executable = ref NONE }
532 : monnier 249 end
533 :    
534 : monnier 429 fun exec (bfc as BFC { imports, ... }, denv) =
535 :     C.execute { executable = executableOf bfc,
536 :     imports = imports,
537 :     exportPid = exportPidOf bfc,
538 :     dynenv = denv }
539 : monnier 249 end
540 :     end

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0