Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/trunk/src/compiler/TopLevel/batch/binfile.sml
ViewVC logotype

Annotation of /sml/trunk/src/compiler/TopLevel/batch/binfile.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 104 - (view) (download)

1 : monnier 104 (* COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies *)
2 :     (* binfile.sml *)
3 :    
4 :     functor BinfileFun (C : COMPILE) : BINFILE = struct
5 :     structure Pid = PersStamps
6 :     structure Env = CMEnv.Env
7 :     structure Err = ErrorMsg
8 :     structure CB = CompBasic
9 :    
10 :     exception FormatError
11 :     exception NoCodeBug
12 :     exception NoPickleBug
13 :    
14 :     exception Compile = C.Compile
15 :     exception TopLevelException = C.TopLevelException
16 :     exception SilentException = C.SilentException
17 :    
18 :     fun error msg = (
19 :     Control.Print.say(concat["Bin file format error: ", msg, "\n"]);
20 :     raise FormatError)
21 :    
22 :     type pid = Pid.persstamp
23 :     type senv = Env.staticEnv
24 :     type symenv = Env.symenv
25 :     type denv = Env.dynenv
26 :     type env = Env.environment
27 :     type lambda = CB.flint
28 :    
29 :     type csegments = C.csegments
30 :     type executable = C.executable
31 :    
32 :     datatype code
33 :     = DISCARDED
34 :     | CSEGS of csegments
35 :     | CLOSURE of executable
36 :    
37 :     (* pickled data: the data, the pid, and the pickle *)
38 :     type 'a pData =
39 :     { unpickled: 'a,
40 :     pid: pid,
41 :     pickled: Word8Vector.vector option ref }
42 :    
43 :     fun dropPickle { pickled, pid, unpickled } = pickled := NONE
44 :    
45 :     datatype bfContent =
46 :     BFC of {
47 :     imports: C.import list,
48 :     hasExports: bool,
49 :     cmData: pid list,
50 :     senv: senv pData,
51 :     lambda: lambda option pData,
52 :     code: code ref
53 :     }
54 :    
55 :     fun staticPidOf (BFC { senv = { pid, ... }, ... }) = pid
56 :     fun exportPidOf (bfc as BFC { hasExports, ... }) =
57 :     if hasExports then SOME (staticPidOf bfc) else NONE
58 :     fun lambdaPidOf (BFC { lambda = { pid, ... }, ... }) = pid
59 :     fun cmDataOf (BFC { cmData, ... }) = cmData
60 :     fun senvOf (BFC { senv = { unpickled, ... }, ... }) = unpickled
61 :     fun symenvOf (bfc as BFC { senv, lambda, hasExports, ... }) =
62 :     C.mksymenv (exportPidOf bfc, #unpickled lambda)
63 :    
64 :     fun discardCode (BFC { code, ... }) = code := DISCARDED
65 :     fun noCode (BFC { code = ref DISCARDED, ... }) = true
66 :     | noCode _ = false
67 :    
68 :     local
69 :     val arch = C.architecture
70 :    
71 :     fun executableOf (BFC { code = ref (CLOSURE e), ... }) = e
72 :     | executableOf (BFC { code = ref DISCARDED, ... }) = raise NoCodeBug
73 :     | executableOf (BFC { code = cref as ref (CSEGS cs), ... }) = let
74 :     val e = C.isolate (C.mkexec cs)
75 :     in
76 :     cref := CLOSURE e; e
77 :     end
78 :    
79 :     fun codeSegments (BFC {code = ref (CSEGS s), ... }) = s
80 :     | codeSegments _ = raise NoCodeBug
81 :    
82 :     val fromInt = Word32.fromInt
83 :     val fromByte = Word32.fromLargeWord o Word8.toLargeWord
84 :     val toByte = Word8.fromLargeWord o Word32.toLargeWord
85 :     val >> = Word32.>>
86 :     infix >>
87 :    
88 :     (*
89 :     * BINFILE FORMAT description:
90 :     *
91 :     * Every 4-byte integer field is stored in big-endian format.
92 :     *
93 :     * Start Size Purpose
94 :     * ----BEGIN OF HEADER----
95 :     * 0 16 magic string
96 :     * 16 4 number of import values (importCnt)
97 :     * 20 4 number of exports (exportCnt = currently always 0 or 1)
98 :     * 24 4 size of import tree area in bytes (importSzB)
99 :     * 28 4 size of CM-specific info in bytes (cmInfoSzB)
100 :     * 32 4 size of pickled lambda-expression in bytes (lambdaSzB)
101 :     * 36 4 size of reserved area 1 in bytes (reserved1)
102 :     * 40 4 size of reserved area 2 in bytes (reserved2)
103 :     * 44 4 size of code area in bytes (codeSzB)
104 :     * 48 4 size of pickled environment in bytes (envSzB)
105 :     * 52 i import trees [This area contains pickled import trees --
106 :     * see below. The total number of leaves in these trees is
107 :     * importCnt and the size is equal to importSzB.]
108 :     * i+52 ex export pids [Each export pid occupies 16 bytes. Thus, the
109 :     * size ex of this area is 16*exportCnt (0 or 16).]
110 :     * ex+i+52 cm CM info [Currently a list of pid-pairs.] (cm = cmInfoSzB)
111 :     * ----END OF HEADER----
112 :     * 0 h HEADER (h = 52+cm+ex+i)
113 :     * h l pickle of exported lambda-expr. (l = lambdaSzB)
114 :     * l+h r reserved areas (r = reserved1+reserved2)
115 :     * r+l+h c code area (c = codeSzB) [Structured into several
116 :     * segments -- see below.]
117 :     * c+r+l+h e pickle of static environment (e = envSzB)
118 :     * e+c+r+l+h - END OF BINFILE
119 :     *
120 :     * IMPORT TREE FORMAT description:
121 :     *
122 :     * The import tree area contains a list of (pid * tree) pairs.
123 :     * The pids are stored directly as 16-byte strings.
124 :     * Trees are constructed according to the following ML-datatype:
125 :     * datatype tree = NODE of (int * tree) list
126 :     * Leaves in this tree have the form (NODE []).
127 :     * Trees are written recursively -- (NODE l) is represented by n (= the
128 :     * length of l) followed by n (int * node) subcomponents. Each component
129 :     * consists of the integer selector followed by the corresponding tree.
130 :     *
131 :     * The size of the import tree area is only given implicitly. When reading
132 :     * this area, the reader must count the number of leaves and compare it
133 :     * with importCnt.
134 :     *
135 :     * Integer values in the import tree area (lengths and selectors) are
136 :     * written in "packed" integer format. In particular, this means that
137 :     * Values in the range 0..127 are represented by only 1 byte.
138 :     * Conceptually, the following pickling routine is used:
139 :     *
140 :     * void recur_write_ul (unsigned long l, FILE *file)
141 :     * {
142 :     * if (l != 0) {
143 :     * recur_write_ul (l >> 7, file);
144 :     * putc ((l & 0x7f) | 0x80, file);
145 :     * }
146 :     * }
147 :     *
148 :     * void write_ul (unsigned long l, FILE *file)
149 :     * {
150 :     * recur_write_ul (l >> 7, file);
151 :     * putc (l & 0x7f, file);
152 :     * }
153 :     *
154 :     * CODE AREA FORMAT description:
155 :     *
156 :     * The code area contains multiple code segements. There will be at least
157 :     * two. The very first segment is the "data" segment -- responsible for
158 :     * creating literal constants on the heap. The idea is that code in the
159 :     * data segment will be executed only once at link-time. Thus, it can
160 :     * then be garbage-collected immediatly. (In the future it is possible that
161 :     * the data segment will not contain executable code at all but some form
162 :     * of bytecode that is to be interpreted separately.)
163 :     *
164 :     * In the binfile, each code segment is represented by its size s (in
165 :     * bytes -- written as a 4-byte big-endian integer) followed by s bytes of
166 :     * machine- (or byte-) code. The total length of all code segments
167 :     * (including the bytes spent on representing individual sizes) is codeSzB.
168 :     *
169 :     * LINKING CONVENTIONS:
170 :     *
171 :     * Linking is achieved by executing all code segments in sequential order.
172 :     *
173 :     * The first code segment (i.e., the "data" segment) receives unit as
174 :     * its single argument.
175 :     *
176 :     * The second code segment receives a record as its single argument.
177 :     * This record has (importCnt+1) components. The first importCnt
178 :     * components correspond to the leaves of the import trees. The final
179 :     * component is the result from executing the data segment.
180 :     *
181 :     * All other code segments receive a single argument which is the result
182 :     * of the preceding segment.
183 :     *
184 :     * The result of the last segment represents the exports of the compilation
185 :     * unit. It is to be paired up with the export pid and stored in the
186 :     * dynamic environment. If there is no export pid, then the final result
187 :     * will be thrown away.
188 :     *
189 :     * The import trees are used for constructing the argument record for the
190 :     * second code segment. The pid at the root of each tree is the key for
191 :     * looking up a value in the existing dynamic environment. In general,
192 :     * that value will be a record. The selector fields of the import tree
193 :     * associated with the pid are used to recursively fetch components of that
194 :     * record.
195 :     *)
196 :    
197 :     val MAGIC = let
198 :     fun fit (i, s) = let
199 :     val s = StringCvt.padRight #" " i s
200 :     in
201 :     if (size s = i) then s
202 :     else substring (s, 0, i)
203 :     end
204 :     fun version [] = []
205 :     | version [x : int] = [Int.toString x]
206 :     | version (x :: r) = (Int.toString x) :: "." :: (version r)
207 :     val v = fit (8, concat (version (#version_id Version.version)))
208 :     val a =
209 :     if String.sub (arch, 0) = #"." then
210 :     fit (7, substring (arch, 1, (String.size arch) - 1))
211 :     else fit (7, arch)
212 :     in
213 :     Byte.stringToBytes (concat [v, a, "\n"])
214 :     end
215 :    
216 :     val magicBytes = Word8Vector.length MAGIC
217 :     val bytesPerPid = 16
218 :    
219 :     fun bytesIn (s, n) = let
220 :     val bv = BinIO.inputN (s, n)
221 :     in
222 :     if n = Word8Vector.length bv then bv
223 :     else error (concat["expected ", Int.toString n,
224 :     " byte, but found ",
225 :     Int.toString(Word8Vector.length bv)])
226 :     end
227 :    
228 :     fun readInt32 s = LargeWord.toIntX(Pack32Big.subVec(bytesIn(s, 4), 0))
229 :    
230 :     fun readPackedInt32 s = let
231 :     fun loop n =
232 :     case BinIO.input1 s of
233 :     NONE => error "unable to read a packed int32"
234 :     | SOME w8 => let
235 :     val n' =
236 :     n * 0w128
237 :     + Word8.toLargeWord (Word8.andb (w8, 0w127))
238 :     in
239 :     if Word8.andb (w8, 0w128) = 0w0 then n' else loop n'
240 :     end
241 :     in
242 :     LargeWord.toIntX (loop 0w0)
243 :     end
244 :    
245 :     fun readPid s = Pid.fromBytes (bytesIn (s, bytesPerPid))
246 :     fun readPidList (s, n) = List.tabulate (n, fn _ => readPid s)
247 :    
248 :     fun readImportTree s =
249 :     case readPackedInt32 s of
250 :     0 => (CB.ITNODE [], 1)
251 :     | cnt => let
252 :     fun readImportList 0 = ([], 0)
253 :     | readImportList cnt = let
254 :     val selector = readPackedInt32 s
255 :     val (tree, n) = readImportTree s
256 :     val (rest, n') = readImportList (cnt - 1)
257 :     in
258 :     ((selector, tree) :: rest, n + n')
259 :     end
260 :     val (l, n) = readImportList cnt
261 :     in
262 :     (CB.ITNODE l, n)
263 :     end
264 :    
265 :     fun readImports (s, n) =
266 :     if n <= 0 then []
267 :     else let
268 :     val pid = readPid s
269 :     val (tree, n') = readImportTree s
270 :     val rest = readImports (s, n - n')
271 :     in
272 :     (pid, tree) :: rest
273 :     end
274 :    
275 :     fun pickleInt32 i = let
276 :     val w = fromInt i
277 :     fun out w = toByte w
278 :     in
279 :     Word8Vector.fromList [toByte (w >> 0w24), toByte (w >> 0w16),
280 :     toByte (w >> 0w8), toByte w]
281 :     end
282 :     fun writeInt32 s i = BinIO.output (s, pickleInt32 i)
283 :    
284 :     fun picklePackedInt32 i = let
285 :     val n = fromInt i
286 :     val // = LargeWord.div
287 :     val %% = LargeWord.mod
288 :     val !! = LargeWord.orb
289 :     infix // %% !!
290 :     val toW8 = Word8.fromLargeWord
291 :     fun r (0w0, l) = Word8Vector.fromList l
292 :     | r (n, l) = r (n // 0w128, toW8 ((n %% 0w128) !! 0w128) :: l)
293 :     in
294 :     r (n // 0w128, [toW8 (n %% 0w128)])
295 :     end
296 :    
297 :     fun writePid (s, pid) = BinIO.output (s, Pid.toBytes pid)
298 :     fun writePidList (s, l) = app (fn p => writePid (s, p)) l
299 :    
300 :     local
301 :     fun pickleImportSpec ((selector, tree), (n, p)) = let
302 :     val sp = picklePackedInt32 selector
303 :     val (n', p') = pickleImportTree (tree, (n, p))
304 :     in
305 :     (n', sp :: p')
306 :     end
307 :     and pickleImportTree (CB.ITNODE [], (n, p)) =
308 :     (n + 1, picklePackedInt32 0 :: p)
309 :     | pickleImportTree (CB.ITNODE l, (n, p)) = let
310 :     val (n', p') = foldr pickleImportSpec (n, p) l
311 :     in
312 :     (n', picklePackedInt32 (length l) :: p')
313 :     end
314 :    
315 :     fun pickleImport ((pid, tree), (n, p)) = let
316 :     val (n', p') = pickleImportTree (tree, (n, p))
317 :     in
318 :     (n', Pid.toBytes pid :: p')
319 :     end
320 :     in
321 :     fun pickleImports l = let
322 :     val (n, p) = foldr pickleImport (0, []) l
323 :     in
324 :     (n, Word8Vector.concat p)
325 :     end
326 :     end
327 :    
328 :     fun checkMagic s =
329 :     if (bytesIn (s, magicBytes)) = MAGIC then ()
330 :     else error "bad magic number"
331 :    
332 :     fun readHeader s = let
333 :     val _ = checkMagic s
334 :     val leni = readInt32 s
335 :     val ne = readInt32 s
336 :     val importSzB = readInt32 s
337 :     val cmInfoSzB = readInt32 s
338 :     val nei = cmInfoSzB div bytesPerPid
339 :     val sLam = readInt32 s
340 :     val sa1 = readInt32 s
341 :     val sa2 = readInt32 s
342 :     val cs = readInt32 s
343 :     val es = readInt32 s
344 :     val imports = readImports (s, leni)
345 :     val exportPid =
346 :     (case ne of
347 :     0 => NONE
348 :     | 1 => SOME(readPid s)
349 :     | _ => error "too many export PIDs")
350 :     val envPids = readPidList (s, nei)
351 :     in
352 :     case envPids of
353 :     st :: lm :: cmData =>
354 :     { nExports = ne,
355 :     lambdaSz = sLam,
356 :     res1Sz = sa1, res2Sz = sa2, codeSz = cs, envSz = es,
357 :     imports = imports, exportPid = exportPid,
358 :     cmData = cmData, staticPid = st, lambdaPid = lm }
359 :     | _ => error "env PID list"
360 :     end
361 :    
362 :     (* must be called with second arg >= 0 *)
363 :     fun readCodeList (_, 0) = []
364 :     | readCodeList (s, n) = let
365 :     val sz = readInt32 s
366 :     val n' = n - sz - 4
367 :     val c = if n' < 0 then error "code size" else bytesIn (s, sz)
368 :     in
369 :     c :: readCodeList (s, n')
370 :     end
371 :     in
372 :     fun read args = let
373 :     val { name = n, stream = s, senv = context, keep_code} = args
374 :     val { nExports = ne, lambdaSz = sa2,
375 :     res1Sz, res2Sz, codeSz = cs, envSz = es,
376 :     imports, exportPid, cmData,
377 :     staticPid, lambdaPid } = readHeader s
378 :     val lambda_i =
379 :     if sa2 = 0 then NONE
380 :     else let
381 :     val bytes = bytesIn (s, sa2)
382 :     in
383 :     UnpickMod.unpickleFLINT { hash = staticPid, pickle = bytes}
384 :     end
385 :     val _ = if res1Sz = 0 andalso res2Sz = 0
386 :     then () else error "non-zero reserved size"
387 :     val code = (case readCodeList (s, cs) of
388 :     data :: c0 :: cn =>
389 :     { data = data, c0 = c0, cn = cn,
390 :     name = ref (SOME n) }
391 :     | _ => error "missing code objects")
392 :     val penv = bytesIn (s, es)
393 :     val _ =
394 :     if Word8Vector.length penv = es
395 :     andalso BinIO.endOfStream s then ()
396 :     else error "missing/excess bytes in bin file"
397 :     val b'senv = UnpickMod.unpickleEnv (context, { hash = staticPid,
398 :     pickle = penv })
399 :     val senv = CMStaticEnv.CM b'senv
400 :     val hasExports = isSome exportPid
401 :     fun pd (u, p) = { unpickled = u, pid = p, pickled = ref NONE }
402 :     in
403 :     BFC { imports = imports,
404 :     hasExports = hasExports,
405 :     cmData = cmData,
406 :     senv = pd (senv, staticPid),
407 :     lambda = pd (lambda_i, lambdaPid),
408 :     code = ref (if keep_code then CSEGS code else DISCARDED) }
409 :     end
410 :    
411 :     fun write {stream = s, content = bfc, keep_code } = let
412 :     val BFC { imports, hasExports, cmData, senv, lambda, ... } = bfc
413 :     val { pickled = ref senvP, pid = staticPid, ... } = senv
414 :     val { pickled = ref lambdaP, pid = lambdaPid, unpickled = lu } =
415 :     lambda
416 :     val senvP = valOf senvP handle Option => raise NoPickleBug
417 :     val lambdaP = valOf lambdaP handle Option => raise NoPickleBug
418 :     val staticPid = staticPid
419 :     val envPids = staticPid :: lambdaPid :: cmData
420 :     val (leni, picki) = pickleImports imports
421 :     val importSzB = Word8Vector.length picki
422 :     val (ne, epl) = if hasExports then (1, [staticPid]) else (0, [])
423 :     val nei = length envPids
424 :     val cmInfoSzB = nei * bytesPerPid
425 :     val sa2 =
426 :     (case lu of
427 :     NONE => 0
428 :     | _ => Word8Vector.length lambdaP)
429 :     val res1Sz = 0
430 :     val res2Sz = 0
431 :     val { data, c0, cn , ...} = codeSegments bfc
432 :     fun csize c = (Word8Vector.length c) + 4 (* including size field *)
433 :     val cs =
434 :     foldl (fn (c, a) => (csize c) + a) (csize c0 +csize data) cn
435 :     fun codeOut c = (writeInt32 s (Word8Vector.length c);
436 :     BinIO.output (s, c))
437 :     in
438 :     BinIO.output (s, MAGIC);
439 :     app (writeInt32 s) [leni, ne, importSzB, cmInfoSzB];
440 :     app (writeInt32 s) [sa2, res1Sz, res2Sz, cs];
441 :     writeInt32 s (Word8Vector.length senvP);
442 :     BinIO.output (s, picki);
443 :     writePidList (s, epl);
444 :     (* arena1 *)
445 :     writePidList (s, envPids);
446 :     (* arena2 *)
447 :     case lu of
448 :     NONE => ()
449 :     | _ => BinIO.output (s, lambdaP);
450 :     (* arena3 is empty *)
451 :     (* arena4 is empty *)
452 :     (* code objects *)
453 :     codeOut data;
454 :     codeOut c0;
455 :     app codeOut cn;
456 :     BinIO.output (s, senvP);
457 :     if keep_code then () else discardCode bfc;
458 :     dropPickle lambda;
459 :     dropPickle senv
460 :     end
461 :    
462 :     fun create args = let
463 :     val { runtimePid, splitting, cmData,
464 :     ast, source, corenv, senv, symenv } = args
465 :     val errors = Err.errors source
466 :     fun check phase =
467 :     if Err.anyErrors errors then raise Compile (phase ^ " failed")
468 :     else ()
469 :     val cinfo = C.mkCompInfo (source, corenv, fn x => x)
470 :    
471 :     val { csegments=code, newstatenv, exportPid, staticPid, imports,
472 :     pickle=envPickle, inlineExp, ...} =
473 :     C.compile { source=source, ast=ast, statenv=senv,
474 :     symenv=symenv, compInfo=cinfo, checkErr=check,
475 :     runtimePid=runtimePid, splitting=splitting}
476 :     val {hash = lambdaPid, pickle} = PickMod.pickleFLINT inlineExp
477 :     val hasExports = isSome exportPid
478 :     fun pd (u, p, x) =
479 :     { unpickled = u, pid = p, pickled = ref (SOME x) }
480 :     in
481 :     BFC { imports = imports,
482 :     hasExports = hasExports,
483 :     cmData = cmData,
484 :     senv = pd (newstatenv, staticPid, envPickle),
485 :     lambda = pd (inlineExp, lambdaPid, pickle),
486 :     code = ref (CSEGS code) }
487 :     end
488 :    
489 :     fun exec (bfc as BFC { imports, ... }, denv) = let
490 :     val ndenv =
491 :     C.execute { executable = executableOf bfc,
492 :     imports = imports,
493 :     exportPid = exportPidOf bfc,
494 :     dynenv = denv }
495 :     in
496 :     Env.mkenv { static = senvOf bfc,
497 :     dynamic = ndenv,
498 :     symbolic = symenvOf bfc }
499 :     end
500 :    
501 :     val parse = C.parse
502 :     val makePid = C.makePid
503 :     end
504 :     end
505 :    
506 :     (*
507 :     * $Log: binfile.sml,v $
508 :     * Revision 1.1 1998/05/22 15:14:58 jhr
509 :     * Changed name of batchutil.{sig,sml} to binfile.{sig,sml}
510 :     *
511 :     * Revision 1.1.1.1 1998/04/08 18:39:15 george
512 :     * Version 110.5
513 :     *
514 :     *)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0