Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/branches/SMLNJ/src/compiler/TopLevel/batch/batchutil.sml
ViewVC logotype

Annotation of /sml/branches/SMLNJ/src/compiler/TopLevel/batch/batchutil.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 100 - (view) (download)

1 : monnier 16 (* COPYRIGHT (c) 1997 Bell Labs, Lucent Technologies *)
2 :     (* batchutil.sml *)
3 :    
4 :     functor BatchUtilFun(C : COMPILE) : BATCHUTIL =
5 :     struct
6 :     structure Pid = PersStamps
7 : monnier 93 structure Env = CMEnv.Env
8 : monnier 16 structure Err = ErrorMsg
9 : monnier 100 structure CB = CompBasic
10 : monnier 16
11 :     exception FormatError
12 :     exception NoCodeBug
13 :    
14 :     fun error msg = (
15 :     Control.Print.say(concat["Bin file format error: ", msg, "\n"]);
16 :     raise FormatError)
17 :    
18 :     (***************************************************************************
19 :     * UTILITY FUNCTIONS FOR MANIPULATING CUNIT *
20 :     ***************************************************************************)
21 :    
22 :     type pid = Pid.persstamp
23 :     type senv = Env.staticEnv
24 :     type symenv = Env.symenv
25 :     type denv = Env.dynenv
26 :     type env = Env.environment
27 : monnier 100 type lambda = CB.flint
28 : monnier 16
29 :     type csegments = C.csegments
30 :     type executable = C.executable
31 :    
32 :     datatype code
33 :     = DISCARDED
34 :     | CSEGS of csegments
35 :     | CLOSURE of executable
36 :    
37 :     datatype 'iid cunit = CU of {
38 : monnier 100 imports: C.import list,
39 : monnier 16 exportPid: pid option,
40 :     references: 'iid,
41 :     staticPid: pid,
42 :     senv: senv,
43 :     penv: Word8Vector.vector,
44 :     lambdaPid: pid,
45 :     lambda: lambda option,
46 :     plambda: Word8Vector.vector,
47 :     env: env option ref,
48 :     code: code ref
49 :     }
50 :    
51 :     val arch = C.architecture
52 :     fun importsCU (CU{ imports, ...}) = imports
53 :     fun exportCU (CU{exportPid, ...}) = exportPid
54 :     fun referencesCU (CU{references, ...}) = references
55 :     fun staticPidCU (CU{ staticPid, ... }) = staticPid
56 :     fun lambdaPidCU (CU{ lambdaPid, ... }) = lambdaPid
57 :     fun senvCU (CU{senv, ... }) = senv
58 :     fun symenvCU (CU{exportPid, lambda, ... }) = C.mksymenv (exportPid, lambda)
59 :     fun penvCU (CU{penv = pe, ... }) = pe
60 :     fun envCU (CU{env = e, ... }) = e
61 :    
62 :     fun nocodeCU (CU{ code = ref DISCARDED, ... }) = true
63 :     | nocodeCU _ = false
64 :    
65 :     fun codeSegments (CU{code = ref (CSEGS s), ... }) = s
66 :     | codeSegments _ = raise NoCodeBug
67 :    
68 :     fun discardCode (CU{code, ... }) = code := DISCARDED
69 :    
70 :     val fromInt = Word32.fromInt
71 :     val fromByte = Word32.fromLargeWord o Word8.toLargeWord
72 :     val toByte = Word8.fromLargeWord o Word32.toLargeWord
73 :     val >> = Word32.>>
74 :     infix >>
75 :    
76 : monnier 100 (*
77 :     * BINFILE FORMAT description:
78 :     *
79 :     * Every 4-byte integer field is stored in big-endian format.
80 :     *
81 :     * Start Size Purpose
82 :     * ----BEGIN OF HEADER----
83 :     * 0 16 magic string
84 :     * 16 4 number of import values (importCnt)
85 :     * 20 4 number of exports (exportCnt = currently always 0 or 1)
86 :     * 24 4 size of CM-specific info in bytes (cmInfoSzB)
87 :     * 28 4 size of pickled lambda-expression in bytes (lambdaSzB)
88 :     * 32 4 size of reserved area 1 in bytes (reserved1)
89 :     * 36 4 size of reserved area 2 in bytes (reserved2)
90 :     * 40 4 size of code area in bytes (codeSzB)
91 :     * 44 4 size of pickled environment in bytes (envSzB)
92 :     * 48 i import trees [This area contains pickled import trees --
93 :     * see below. The total number of leaves in these trees is
94 :     * importCnt. The size impSzB of this area depends on the
95 :     * shape of the trees.]
96 :     * i+48 ex export pids [Each export pid occupies 16 bytes. Thus, the
97 :     * size ex of this area is 16*exportCnt (0 or 16).]
98 :     * ex+i+48 cm CM info [Currently a list of pid-pairs.] (cm = cmInfoSzB)
99 :     * ----END OF HEADER----
100 :     * 0 h HEADER (h = 48+cm+ex+i)
101 :     * h l pickle of exported lambda-expr. (l = lambdaSzB)
102 :     * l+h r reserved areas (r = reserved1+reserved2)
103 :     * r+l+h c code area (c = codeSzB) [Structured into several
104 :     * segments -- see below.]
105 :     * c+r+l+h e pickle of static environment (e = envSzB)
106 :     * e+c+r+l+h - END OF BINFILE
107 :     *
108 :     * IMPORT TREE FORMAT description:
109 :     *
110 :     * The import tree area contains a list of (pid * tree) pairs.
111 :     * The pids are stored directly as 16-byte strings.
112 :     * Trees are constructed according to the following ML-datatype:
113 :     * datatype tree = NODE of (int * tree) list
114 :     * Leaves in this tree have the form (NODE []).
115 :     * Trees are written recursively -- (NODE l) is represented by n (= the
116 :     * length of l) followed by n (int * node) subcomponents. Each component
117 :     * consists of the integer selector followed by the corresponding tree.
118 :     *
119 :     * The size of the import tree area is only given implicitly. When reading
120 :     * this area, the reader must count the number of leaves and compare it
121 :     * with importCnt.
122 :     *
123 :     * Integer values in the import tree area (lengths and selectors) are
124 :     * written in "packed" integer format. In particular, this means that
125 :     * Values in the range 0..127 are represented by only 1 byte.
126 :     * Conceptually, the following pickling routine is used:
127 :     *
128 :     * void recur_write_ul (unsigned long l, FILE *file)
129 :     * {
130 :     * if (l != 0) {
131 :     * recur_write_ul (l / 0200, file);
132 :     * putc ((l % 0200) | 0200, file);
133 :     * }
134 :     * }
135 :     *
136 :     * void write_ul (unsigned long l, FILE *file)
137 :     * {
138 :     * recur_write_ul (l / 0200, file);
139 :     * putc (l % 0200, file);
140 :     * }
141 :     *
142 :     * CODE AREA FORMAT description:
143 :     *
144 :     * The code area contains multiple code segements. There will be at least
145 :     * two. The very first segment is the "data" segment -- responsible for
146 :     * creating literal constants on the heap. The idea is that code in the
147 :     * data segment will be executed only once at link-time. Thus, it can
148 :     * then be garbage-collected immediatly. (In the future it is possible that
149 :     * the data segment will not contain executable code at all but some form
150 :     * of bytecode that is to be interpreted separately.)
151 :     *
152 :     * In the binfile, each code segment is represented by its size s (in
153 :     * bytes -- written as a 4-byte big-endian integer) followed by s bytes of
154 :     * machine- (or byte-) code. The total length of all code segments
155 :     * (including the bytes spent on representing individual sizes) is codeSzB.
156 :     *
157 :     * LINKING CONVENTIONS:
158 :     *
159 :     * Linking is achieved by executing all code segments in sequential order.
160 :     *
161 :     * The first code segment (i.e., the "data" segment) receives unit as
162 :     * its single argument.
163 :     *
164 :     * The second code segment receives a record as its single argument.
165 :     * This record has (importCnt+1) components. The first importCnt
166 :     * components correspond to the leaves of the import trees. The final
167 :     * component is the result from executing the data segment.
168 :     *
169 :     * All other code segments receive a single argument which is the result
170 :     * of the preceding segment.
171 :     *
172 :     * The result of the last segment represents the exports of the compilation
173 :     * unit. It is to be paired up with the export pid and stored in the
174 :     * dynamic environment. If there is no export pid, then the final result
175 :     * will be thrown away.
176 :     *
177 :     * The import trees are used for constructing the argument record for the
178 :     * second code segment. The pid at the root of each tree is the key for
179 :     * looking up a value in the existing dynamic environment. In general,
180 :     * that value will be a record. The selector fields of the import tree
181 :     * associated with the pid are used to recursively fetch components of that
182 :     * record.
183 :     *)
184 : monnier 16
185 :     val MAGIC = let
186 :     fun fit (i, s) = let
187 :     val s = StringCvt.padRight #" " i s
188 :     in
189 :     if (size s = i) then s
190 :     else substring (s, 0, i)
191 :     end
192 :     fun version [] = []
193 :     | version [x : int] = [Int.toString x]
194 :     | version (x :: r) = (Int.toString x) :: "." :: (version r)
195 :     val v = fit (8, concat (version (#version_id Version.version)))
196 :     val a = if String.sub (arch, 0) = #"."
197 :     then fit (7, substring (arch, 1, (String.size arch) - 1))
198 :     else fit (7, arch)
199 :     in
200 :     Byte.stringToBytes (concat [v, a, "\n"])
201 :     end
202 :    
203 :     val magicBytes = Word8Vector.length MAGIC
204 :     val bytesPerPid = 16
205 :    
206 :     fun bytesIn (s, n) = let
207 :     val bv = BinIO.inputN (s, n)
208 :     in
209 :     if (n = Word8Vector.length bv)
210 :     then bv
211 :     else error(concat[
212 :     "expected ", Int.toString n, " byte, but found ",
213 :     Int.toString(Word8Vector.length bv)
214 :     ])
215 :     end
216 :    
217 :     fun readInt32 s = LargeWord.toIntX(Pack32Big.subVec(bytesIn(s, 4), 0))
218 :    
219 : monnier 100 fun readPackedInt32 s = let
220 :     fun loop n =
221 :     case BinIO.input1 s of
222 :     NONE => error "unable to read a packed int32"
223 :     | SOME w8 => let
224 :     val n' =
225 :     n * 0w128 + Word8.toLargeWord (Word8.andb (w8, 0w127))
226 :     in
227 :     if Word8.andb (w8, 0w128) = 0w0 then n' else loop n'
228 :     end
229 :     in
230 :     LargeWord.toIntX (loop 0w0)
231 :     end
232 :    
233 : monnier 16 fun readPid s = Pid.fromBytes (bytesIn (s, bytesPerPid))
234 :     fun readPidList (s, n) = List.tabulate (n, fn _ => readPid s)
235 :    
236 : monnier 100 fun readImportTree s =
237 :     case readPackedInt32 s of
238 :     0 => (CB.ITNODE [], 1)
239 :     | cnt => let
240 :     fun readImportList 0 = ([], 0)
241 :     | readImportList cnt = let
242 :     val selector = readPackedInt32 s
243 :     val (tree, n) = readImportTree s
244 :     val (rest, n') = readImportList (cnt - 1)
245 :     in
246 :     ((selector, tree) :: rest, n + n')
247 :     end
248 :     val (l, n) = readImportList cnt
249 :     in
250 :     (CB.ITNODE l, n)
251 :     end
252 : monnier 16
253 : monnier 100 fun readImports (s, n) =
254 :     if n <= 0 then []
255 :     else let
256 :     val pid = readPid s
257 :     val (tree, n') = readImportTree s
258 :     val rest = readImports (s, n - n')
259 :     in
260 :     (pid, tree) :: rest
261 :     end
262 :    
263 :     fun pickleInt32 i = let
264 :     val w = fromInt i
265 :     fun out w = toByte w
266 :     in
267 :     Word8Vector.fromList [toByte (w >> 0w24), toByte (w >> 0w16),
268 :     toByte (w >> 0w8), toByte w]
269 :     end
270 :     fun writeInt32 s i = BinIO.output (s, pickleInt32 i)
271 :    
272 :     fun picklePackedInt32 i = let
273 :     val n = fromInt i
274 :     val // = LargeWord.div
275 :     val %% = LargeWord.mod
276 :     val !! = LargeWord.orb
277 :     infix // %% !!
278 :     val toW8 = Word8.fromLargeWord
279 :     fun r (0w0, l) = Word8Vector.fromList l
280 :     | r (n, l) = r (n // 0w128, toW8 ((n %% 0w128) !! 0w128) :: l)
281 :     in
282 :     r (n // 0w128, [toW8 (n %% 0w128)])
283 :     end
284 :    
285 : monnier 16 fun writePid (s, pid) = BinIO.output (s, Pid.toBytes pid)
286 :     fun writePidList (s, l) = app (fn p => writePid (s, p)) l
287 :    
288 : monnier 100 local
289 :     fun pickleImportSpec ((selector, tree), (n, p)) = let
290 :     val sp = picklePackedInt32 selector
291 :     val (n', p') = pickleImportTree (tree, (n, p))
292 :     in
293 :     (n', sp :: p')
294 :     end
295 :     and pickleImportTree (CB.ITNODE [], (n, p)) =
296 :     (n + 1, picklePackedInt32 0 :: p)
297 :     | pickleImportTree (CB.ITNODE l, (n, p)) = let
298 :     val (n', p') = foldr pickleImportSpec (n, p) l
299 :     in
300 :     (n', picklePackedInt32 (length l) :: p')
301 :     end
302 :    
303 :     fun pickleImport ((pid, tree), (n, p)) = let
304 :     val (n', p') = pickleImportTree (tree, (n, p))
305 :     in
306 :     (n', Pid.toBytes pid :: p')
307 :     end
308 :     in
309 :     fun pickleImports l = let
310 :     val (n, p) = foldr pickleImport (0, []) l
311 :     in
312 :     (n, Word8Vector.concat p)
313 :     end
314 :     end
315 :    
316 : monnier 16 fun checkMagic s =
317 : monnier 100 if (bytesIn (s, magicBytes)) = MAGIC then ()
318 :     else error "bad magic number"
319 : monnier 16
320 :     fun readHeader s = let
321 :     val _ = checkMagic s
322 : monnier 100 val leni = readInt32 s
323 : monnier 16 val ne = readInt32 s
324 :     val cmInfoSzB = readInt32 s
325 :     val nei = cmInfoSzB div bytesPerPid
326 :     val sLam = readInt32 s
327 :     val sa1 = readInt32 s
328 :     val sa2 = readInt32 s
329 :     val cs = readInt32 s
330 :     val es = readInt32 s
331 : monnier 100 val imports = readImports (s, leni)
332 : monnier 16 val exportPid = (case ne
333 :     of 0 => NONE
334 :     | 1 => SOME(readPid s)
335 :     | _ => error "too many export PIDs"
336 :     (* end case *))
337 :     val envPids = readPidList (s, nei)
338 :     in
339 :     case envPids
340 :     of (st :: lm :: references) => {
341 : monnier 100 nExports = ne,
342 : monnier 16 lambdaSz = sLam,
343 :     res1Sz = sa1, res2Sz = sa2, codeSz = cs, envSz = es,
344 :     imports = imports, exportPid = exportPid,
345 :     references = references, staticPid = st, lambdaPid = lm
346 :     }
347 :     | _ => error "env PID list"
348 :     (* end case *)
349 :     end
350 :    
351 :     (* must be called with second arg >= 0 *)
352 :     fun readCodeList (_, 0) = []
353 :     | readCodeList (s, n) = let
354 :     val sz = readInt32 s
355 :     val n' = n - sz - 4
356 :     val c = if n' < 0 then error "code size" else bytesIn (s, sz)
357 :     in
358 :     c :: readCodeList (s, n')
359 :     end
360 :    
361 :     fun readUnit {name=n, stream = s, pids2iid, senv = context, keep_code} = let
362 : monnier 100 val { nExports = ne, lambdaSz = sa2,
363 : monnier 16 res1Sz, res2Sz, codeSz = cs, envSz = es,
364 :     imports, exportPid, references,
365 :     staticPid, lambdaPid
366 :     } = readHeader s
367 :     val iid = pids2iid references
368 :     val (plambda, lambda_i) = if sa2 = 0
369 :     then (Word8Vector.fromList [], NONE)
370 :     else let
371 :     val bytes = bytesIn (s, sa2)
372 :     in
373 :     (bytes,
374 :     UnpickMod.unpickleFLINT{hash=staticPid,pickle=bytes})
375 :     end
376 :     val _ = if res1Sz = 0 andalso res2Sz = 0
377 :     then () else error "non-zero reserved size"
378 : monnier 100 val code = (case readCodeList (s, cs) of
379 :     data :: c0 :: cn => { data = data, c0 = c0, cn = cn,
380 :     name = ref (SOME n) }
381 :     | _ => error "missing code objects")
382 : monnier 16 val penv = bytesIn (s, es)
383 :     val _ = if Word8Vector.length penv = es andalso BinIO.endOfStream s
384 :     then ()
385 :     else error "missing/excess bytes in bin file"
386 :     val b'senv = UnpickMod.unpickleEnv (context, { hash = staticPid,
387 :     pickle = penv })
388 : monnier 93 val senv = CMStaticEnv.CM b'senv
389 : monnier 16 in
390 : monnier 100 CU {
391 : monnier 16 imports = imports, exportPid = exportPid, references = iid,
392 :     staticPid = staticPid, senv = senv, penv = penv,
393 :     lambdaPid = lambdaPid, lambda = lambda_i, plambda = plambda,
394 :     env = ref NONE,
395 :     code = ref (if keep_code then CSEGS code else DISCARDED)
396 :     }
397 :     end
398 :    
399 :     fun writeUnit {stream = s, cunit = u, keep_code, iid2pids} = let
400 : monnier 100 val CU { imports, exportPid, references,
401 :     staticPid, penv,
402 :     lambdaPid, lambda, plambda, ...
403 :     } = u
404 :     val envPids = staticPid :: lambdaPid :: iid2pids references
405 :     val (leni, picki) = pickleImports imports
406 :     val (ne, epl) =
407 :     (case exportPid of
408 :     NONE => (0, [])
409 :     | SOME p => (1, [p]))
410 :     val nei = length envPids
411 :     val cmInfoSzB = nei * bytesPerPid
412 :     val sa2 =
413 :     (case lambda of
414 :     NONE => 0
415 :     | _ => Word8Vector.length plambda)
416 :     val res1Sz = 0
417 :     val res2Sz = 0
418 :     val { data, c0, cn , ...} = codeSegments u
419 :     fun csize c = (Word8Vector.length c) + 4 (* including size field *)
420 :     val cs = foldl (fn (c, a) => (csize c) + a) (csize c0 +csize data) cn
421 :     fun codeOut c = (writeInt32 s (Word8Vector.length c);
422 :     BinIO.output (s, c))
423 :     in
424 :     BinIO.output (s, MAGIC);
425 :     app (writeInt32 s) [leni, ne, cmInfoSzB];
426 :     app (writeInt32 s) [sa2, res1Sz, res2Sz, cs];
427 :     writeInt32 s (Word8Vector.length penv);
428 :     BinIO.output (s, picki);
429 :     writePidList (s, epl);
430 : monnier 16 (* arena1 *)
431 : monnier 100 writePidList (s, envPids);
432 : monnier 16 (* arena2 *)
433 : monnier 100 case lambda of
434 :     NONE => ()
435 :     | _ => BinIO.output (s, plambda);
436 : monnier 16 (* arena3 is empty *)
437 :     (* arena4 is empty *)
438 :     (* code objects *)
439 : monnier 100 codeOut data;
440 :     codeOut c0;
441 :     app codeOut cn;
442 :     BinIO.output (s, penv);
443 :     if keep_code then () else discardCode u
444 :     end
445 : monnier 16
446 :    
447 :     (***************************************************************************
448 :     * UTILITY FUNCTIONS THAT SUPPORTS BATCH COMPILATION *
449 :     ***************************************************************************)
450 :     exception Compile = C.Compile
451 :     exception TopLevelException = C.TopLevelException
452 :     exception SilentException = C.SilentException
453 :    
454 :     val parse = C.parse
455 :     val makePid = C.makePid
456 :    
457 :     fun makeUnit {runtimePid : pid option, splitting, references,
458 :     ast, source, corenv, senv : senv, symenv : symenv} = let
459 :     val errors = Err.errors source
460 :     fun check phase =
461 :     if Err.anyErrors errors then raise Compile (phase ^ " failed")
462 :     else ()
463 :     val cinfo = C.mkCompInfo (source, corenv, fn x => x)
464 :    
465 :     val {csegments=code, newstatenv, exportPid, staticPid, imports,
466 :     pickle=envPickle, inlineExp, ...} =
467 :     C.compile {source=source, ast=ast, statenv=senv,
468 :     symenv=symenv, compInfo=cinfo, checkErr=check,
469 :     runtimePid=runtimePid, splitting=splitting}
470 :     val {hash = lambdaPid, pickle} = PickMod.pickleFLINT inlineExp
471 :     in
472 :     CU{
473 :     imports = imports,
474 :     exportPid = exportPid,
475 :     references = references,
476 :     staticPid = staticPid,
477 :     senv = newstatenv,
478 :     penv = envPickle,
479 :     lambdaPid = lambdaPid,
480 :     lambda = inlineExp,
481 :     plambda = pickle,
482 :     env = ref NONE,
483 :     code = ref (CSEGS code)
484 :     }
485 :     end (* function makeUnit *)
486 :    
487 :     fun codeClosure (CU{code, ... }) = (case !code
488 :     of DISCARDED => raise NoCodeBug
489 :     | CLOSURE obj => obj
490 :     | CSEGS s => let
491 :     val obj = C.mkexec s
492 :     in
493 :     code := CLOSURE obj; obj
494 :     end
495 :     (* end case *))
496 :    
497 :     fun execUnit (u, denv) = let
498 :     val ndenv =
499 : monnier 100 C.execute { executable = C.isolate(codeClosure u),
500 : monnier 16 imports = importsCU u,
501 :     exportPid = exportCU u,
502 :     dynenv = denv }
503 :     in
504 :     Env.mkenv {static = senvCU u,
505 :     dynamic = ndenv,
506 :     symbolic = symenvCU u}
507 :     end
508 :    
509 :     end (* functor BatchUtilFun *)
510 : monnier 93
511 :     (*
512 :     * $Log: batchutil.sml,v $
513 :     * Revision 1.1.1.1 1998/04/08 18:39:15 george
514 :     * Version 110.5
515 :     *
516 :     *)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0