SCM Repository
Annotation of /sml/trunk/src/compiler/Execution/binfile/binfile.sml
Parent Directory
|
Revision Log
Revision 902 - (view) (download)
1 : | blume | 902 | (* binfile-new.sml |
2 : | * | ||
3 : | * (C) 2001 Lucent Technologies, Bell Labs | ||
4 : | * | ||
5 : | * author: Matthias Blume (blume@research.bell-labs.com | ||
6 : | *) | ||
7 : | |||
8 : | (* | ||
9 : | * This revised version of structure Binfile is now machine-independent. | ||
10 : | * Moreover, it deals with the file format only and does not know how to | ||
11 : | * create new binfile contents (aka "compile") or how to interpret the | ||
12 : | * pickles. As a result, it does not statically depend on the compiler. | ||
13 : | * (Eventually we might want to support a light-weight binfile loader.) | ||
14 : | * | ||
15 : | * ---------------------------------------------------------------------------- | ||
16 : | * BINFILE FORMAT description: | ||
17 : | * | ||
18 : | * Every 4-byte integer field is stored in big-endian format. | ||
19 : | * | ||
20 : | * Start Size Purpose | ||
21 : | * ----BEGIN OF HEADER---- | ||
22 : | * 0 16 magic string | ||
23 : | * 16 4 number of import values (importCnt) | ||
24 : | * 20 4 number of exports (exportCnt = currently always 0 or 1) | ||
25 : | * 24 4 size of import tree area in bytes (importSzB) | ||
26 : | * 28 4 size of CM-specific info in bytes (cmInfoSzB) | ||
27 : | * 32 4 size of pickled lambda-expression in bytes (lambdaSzB) | ||
28 : | * 36 4 size of reserved area in bytes (reserved) | ||
29 : | * 40 4 size of padding in bytes (pad) | ||
30 : | * 44 4 size of code area in bytes (codeSzB) | ||
31 : | * 48 4 size of pickled environment in bytes (envSzB) | ||
32 : | * 52 i import trees [This area contains pickled import trees -- | ||
33 : | * see below. The total number of leaves in these trees is | ||
34 : | * importCnt and the size is equal to importSzB.] | ||
35 : | * i+52 ex export pids [Each export pid occupies 16 bytes. Thus, the | ||
36 : | * size ex of this area is 16*exportCnt (0 or 16).] | ||
37 : | * ex+i+52 cm CM info [Currently a list of pid-pairs.] (cm = cmInfoSzB) | ||
38 : | * ----END OF HEADER---- | ||
39 : | * 0 h HEADER (h = 52+cm+ex+i) | ||
40 : | * h l pickle of exported lambda-expr. (l = lambdaSzB) | ||
41 : | * l+h r reserved area (r = reserved) | ||
42 : | * r+l+h p padding (p = pad) | ||
43 : | * p+r+l+h c code area (c = codeSzB) [Structured into several | ||
44 : | * segments -- see below.] | ||
45 : | * c+p+r+l+h e pickle of static environment (e = envSzB) | ||
46 : | * e+c+p+r+l+h - END OF BINFILE | ||
47 : | * | ||
48 : | * IMPORT TREE FORMAT description: | ||
49 : | * | ||
50 : | * The import tree area contains a list of (pid * tree) pairs. | ||
51 : | * The pids are stored directly as 16-byte strings. | ||
52 : | * Trees are constructed according to the following ML-datatype: | ||
53 : | * datatype tree = NODE of (int * tree) list | ||
54 : | * Leaves in this tree have the form (NODE []). | ||
55 : | * Trees are written recursively -- (NODE l) is represented by n (= the | ||
56 : | * length of l) followed by n (int * node) subcomponents. Each component | ||
57 : | * consists of the integer selector followed by the corresponding tree. | ||
58 : | * | ||
59 : | * The size of the import tree area is only given implicitly. When reading | ||
60 : | * this area, the reader must count the number of leaves and compare it | ||
61 : | * with importCnt. | ||
62 : | * | ||
63 : | * Integer values in the import tree area (lengths and selectors) are | ||
64 : | * written in "packed" integer format. In particular, this means that | ||
65 : | * Values in the range 0..127 are represented by only 1 byte. | ||
66 : | * Conceptually, the following pickling routine is used: | ||
67 : | * | ||
68 : | * void recur_write_ul (unsigned long l, FILE *file) | ||
69 : | * { | ||
70 : | * if (l != 0) { | ||
71 : | * recur_write_ul (l >> 7, file); | ||
72 : | * putc ((l & 0x7f) | 0x80, file); | ||
73 : | * } | ||
74 : | * } | ||
75 : | * | ||
76 : | * void write_ul (unsigned long l, FILE *file) | ||
77 : | * { | ||
78 : | * recur_write_ul (l >> 7, file); | ||
79 : | * putc (l & 0x7f, file); | ||
80 : | * } | ||
81 : | * | ||
82 : | * CODE AREA FORMAT description: | ||
83 : | * | ||
84 : | * The code area contains multiple code segements. There will be at least | ||
85 : | * two. The very first segment is the "data" segment -- responsible for | ||
86 : | * creating literal constants on the heap. The idea is that code in the | ||
87 : | * data segment will be executed only once at link-time. Thus, it can | ||
88 : | * then be garbage-collected immediatly. (In the future it is possible that | ||
89 : | * the data segment will not contain executable code at all but some form | ||
90 : | * of bytecode that is to be interpreted separately.) | ||
91 : | * | ||
92 : | * In the binfile, each code segment is represented by its size s (in | ||
93 : | * bytes -- written as a 4-byte big-endian integer) followed by s bytes of | ||
94 : | * machine- (or byte-) code. The total length of all code segments | ||
95 : | * (including the bytes spent on representing individual sizes) is codeSzB. | ||
96 : | * | ||
97 : | * LINKING CONVENTIONS: | ||
98 : | * | ||
99 : | * Linking is achieved by executing all code segments in sequential order. | ||
100 : | * | ||
101 : | * The first code segment (i.e., the "data" segment) receives unit as | ||
102 : | * its single argument. | ||
103 : | * | ||
104 : | * The second code segment receives a record as its single argument. | ||
105 : | * This record has (importCnt+1) components. The first importCnt | ||
106 : | * components correspond to the leaves of the import trees. The final | ||
107 : | * component is the result from executing the data segment. | ||
108 : | * | ||
109 : | * All other code segments receive a single argument which is the result | ||
110 : | * of the preceding segment. | ||
111 : | * | ||
112 : | * The result of the last segment represents the exports of the compilation | ||
113 : | * unit. It is to be paired up with the export pid and stored in the | ||
114 : | * dynamic environment. If there is no export pid, then the final result | ||
115 : | * will be thrown away. | ||
116 : | * | ||
117 : | * The import trees are used for constructing the argument record for the | ||
118 : | * second code segment. The pid at the root of each tree is the key for | ||
119 : | * looking up a value in the existing dynamic environment. In general, | ||
120 : | * that value will be a record. The selector fields of the import tree | ||
121 : | * associated with the pid are used to recursively fetch components of that | ||
122 : | * record. | ||
123 : | *) | ||
124 : | structure Binfile :> BINFILE = struct | ||
125 : | |||
126 : | structure Pid = PersStamps | ||
127 : | |||
128 : | exception FormatError = CodeObj.FormatError | ||
129 : | |||
130 : | type pid = Pid.persstamp | ||
131 : | |||
132 : | type csegments = CodeObj.csegments | ||
133 : | |||
134 : | type executable = CodeObj.executable | ||
135 : | |||
136 : | type stats = { env: int, inlinfo: int, data: int, code: int } | ||
137 : | |||
138 : | type pickle = { pid: pid, pickle: Word8Vector.vector } | ||
139 : | |||
140 : | datatype bfContents = | ||
141 : | BF of { imports: ImportTree.import list, | ||
142 : | exportPid: pid option, | ||
143 : | cmData: pid list, | ||
144 : | senv: pickle, | ||
145 : | lambda: pickle, | ||
146 : | csegments: csegments, | ||
147 : | executable: executable option ref } | ||
148 : | fun unBF (BF x) = x | ||
149 : | |||
150 : | val bytesPerPid = Pid.persStampSize | ||
151 : | val magicBytes = 16 | ||
152 : | |||
153 : | val exportPidOf = #exportPid o unBF | ||
154 : | val cmDataOf = #cmData o unBF | ||
155 : | val senvPickleOf = #senv o unBF | ||
156 : | val staticPidOf = #pid o senvPickleOf | ||
157 : | val lambdaPickleOf = #lambda o unBF | ||
158 : | val lambdaPidOf = #pid o lambdaPickleOf | ||
159 : | |||
160 : | fun error msg = | ||
161 : | (Control_Print.say (concat ["binfile format error: ", msg, "\n"]); | ||
162 : | raise FormatError) | ||
163 : | |||
164 : | val fromInt = Word32.fromInt | ||
165 : | val fromByte = Word32.fromLargeWord o Word8.toLargeWord | ||
166 : | val toByte = Word8.fromLargeWord o Word32.toLargeWord | ||
167 : | val >> = Word32.>> | ||
168 : | infix >> | ||
169 : | |||
170 : | fun bytesIn (s, 0) = Byte.stringToBytes "" | ||
171 : | | bytesIn (s, n) = let | ||
172 : | val bv = BinIO.inputN (s, n) | ||
173 : | in | ||
174 : | if n = Word8Vector.length bv then bv | ||
175 : | else error (concat["expected ", Int.toString n, | ||
176 : | " bytes, but found ", | ||
177 : | Int.toString(Word8Vector.length bv)]) | ||
178 : | end | ||
179 : | |||
180 : | fun readInt32 s = LargeWord.toIntX(Pack32Big.subVec(bytesIn(s, 4), 0)) | ||
181 : | |||
182 : | fun readPackedInt32 s = let | ||
183 : | fun loop n = | ||
184 : | case BinIO.input1 s of | ||
185 : | NONE => error "unable to read a packed int32" | ||
186 : | | SOME w8 => let | ||
187 : | val n' = | ||
188 : | n * 0w128 | ||
189 : | + Word8.toLargeWord (Word8.andb (w8, 0w127)) | ||
190 : | in | ||
191 : | if Word8.andb (w8, 0w128) = 0w0 then n' else loop n' | ||
192 : | end | ||
193 : | in | ||
194 : | LargeWord.toIntX (loop 0w0) | ||
195 : | end | ||
196 : | |||
197 : | fun readPid s = Pid.fromBytes (bytesIn (s, bytesPerPid)) | ||
198 : | fun readPidList (s, n) = List.tabulate (n, fn _ => readPid s) | ||
199 : | |||
200 : | fun readImportTree s = | ||
201 : | case readPackedInt32 s of | ||
202 : | 0 => (ImportTree.ITNODE [], 1) | ||
203 : | | cnt => let | ||
204 : | fun readImportList 0 = ([], 0) | ||
205 : | | readImportList cnt = let | ||
206 : | val selector = readPackedInt32 s | ||
207 : | val (tree, n) = readImportTree s | ||
208 : | val (rest, n') = readImportList (cnt - 1) | ||
209 : | in | ||
210 : | ((selector, tree) :: rest, n + n') | ||
211 : | end | ||
212 : | val (l, n) = readImportList cnt | ||
213 : | in | ||
214 : | (ImportTree.ITNODE l, n) | ||
215 : | end | ||
216 : | |||
217 : | fun readImports (s, n) = | ||
218 : | if n <= 0 then [] | ||
219 : | else let | ||
220 : | val pid = readPid s | ||
221 : | val (tree, n') = readImportTree s | ||
222 : | val rest = readImports (s, n - n') | ||
223 : | in | ||
224 : | (pid, tree) :: rest | ||
225 : | end | ||
226 : | |||
227 : | fun pickleInt32 i = let | ||
228 : | val w = fromInt i | ||
229 : | fun out w = toByte w | ||
230 : | in | ||
231 : | Word8Vector.fromList [toByte (w >> 0w24), toByte (w >> 0w16), | ||
232 : | toByte (w >> 0w8), toByte w] | ||
233 : | end | ||
234 : | fun writeInt32 s i = BinIO.output (s, pickleInt32 i) | ||
235 : | |||
236 : | fun picklePackedInt32 i = let | ||
237 : | val n = fromInt i | ||
238 : | val // = LargeWord.div | ||
239 : | val %% = LargeWord.mod | ||
240 : | val !! = LargeWord.orb | ||
241 : | infix // %% !! | ||
242 : | val toW8 = Word8.fromLargeWord | ||
243 : | fun r (0w0, l) = Word8Vector.fromList l | ||
244 : | | r (n, l) = r (n // 0w128, toW8 ((n %% 0w128) !! 0w128) :: l) | ||
245 : | in | ||
246 : | r (n // 0w128, [toW8 (n %% 0w128)]) | ||
247 : | end | ||
248 : | |||
249 : | fun writePid (s, pid) = BinIO.output (s, Pid.toBytes pid) | ||
250 : | fun writePidList (s, l) = app (fn p => writePid (s, p)) l | ||
251 : | |||
252 : | local | ||
253 : | fun pickleImportSpec ((selector, tree), (n, p)) = let | ||
254 : | val sp = picklePackedInt32 selector | ||
255 : | val (n', p') = pickleImportTree (tree, (n, p)) | ||
256 : | in | ||
257 : | (n', sp :: p') | ||
258 : | end | ||
259 : | and pickleImportTree (ImportTree.ITNODE [], (n, p)) = | ||
260 : | (n + 1, picklePackedInt32 0 :: p) | ||
261 : | | pickleImportTree (ImportTree.ITNODE l, (n, p)) = let | ||
262 : | val (n', p') = foldr pickleImportSpec (n, p) l | ||
263 : | in | ||
264 : | (n', picklePackedInt32 (length l) :: p') | ||
265 : | end | ||
266 : | |||
267 : | fun pickleImport ((pid, tree), (n, p)) = let | ||
268 : | val (n', p') = pickleImportTree (tree, (n, p)) | ||
269 : | in | ||
270 : | (n', Pid.toBytes pid :: p') | ||
271 : | end | ||
272 : | in | ||
273 : | fun pickleImports l = let | ||
274 : | val (n, p) = foldr pickleImport (0, []) l | ||
275 : | in | ||
276 : | (n, Word8Vector.concat p) | ||
277 : | end | ||
278 : | end | ||
279 : | |||
280 : | fun mkMAGIC (arch, version_id) = let | ||
281 : | val vbytes = 8 (* version part *) | ||
282 : | val abytes = magicBytes - vbytes - 1 (* arch part *) | ||
283 : | fun fit (i, s) = let | ||
284 : | val s = StringCvt.padRight #" " i s | ||
285 : | in | ||
286 : | if (size s = i) then s | ||
287 : | else substring (s, 0, i) | ||
288 : | end | ||
289 : | fun version [] = [] | ||
290 : | | version [x : int] = [Int.toString x] | ||
291 : | | version (x :: r) = (Int.toString x) :: "." :: (version r) | ||
292 : | val v = fit (vbytes, concat (version version_id)) | ||
293 : | val a = fit (abytes, arch) | ||
294 : | in | ||
295 : | Byte.stringToBytes (concat [v, a, "\n"]) | ||
296 : | (* assert (Word8Vector.length (MAGIC <arch>) = magicBytes *) | ||
297 : | end | ||
298 : | |||
299 : | (* calculate size of code objects (including length fields) *) | ||
300 : | fun codeSize (csegs: csegments) = | ||
301 : | List.foldl | ||
302 : | (fn (co, n) => n + CodeObj.size co + 4) | ||
303 : | (CodeObj.size(#c0 csegs) + Word8Vector.length(#data csegs) + 8) | ||
304 : | (#cn csegs) | ||
305 : | |||
306 : | (* This function must be kept in sync with the "write" function below. | ||
307 : | * It calculates the number of bytes written by a corresponding | ||
308 : | * call to "write". *) | ||
309 : | fun size { contents, nopickle } = let | ||
310 : | val { imports, exportPid, senv, cmData, lambda, csegments, ... } = | ||
311 : | unBF contents | ||
312 : | val (_, picki) = pickleImports imports | ||
313 : | val hasExports = isSome exportPid | ||
314 : | fun pickleSize { pid, pickle } = | ||
315 : | if nopickle then 0 else Word8Vector.length pickle | ||
316 : | in | ||
317 : | magicBytes + | ||
318 : | 9 * 4 + | ||
319 : | Word8Vector.length picki + | ||
320 : | (if hasExports then bytesPerPid else 0) + | ||
321 : | bytesPerPid * (length cmData + 2) + | ||
322 : | pickleSize lambda + | ||
323 : | codeSize csegments + | ||
324 : | pickleSize senv | ||
325 : | end | ||
326 : | |||
327 : | fun create { imports, exportPid, cmData, senv, lambda, csegments } = | ||
328 : | BF { imports = imports, | ||
329 : | exportPid = exportPid, | ||
330 : | cmData = cmData, | ||
331 : | senv = senv, | ||
332 : | lambda = lambda, | ||
333 : | csegments = csegments, | ||
334 : | executable = ref NONE } | ||
335 : | |||
336 : | (* must be called with second arg >= 0 *) | ||
337 : | fun readCodeList (strm, name, nbytes) = let | ||
338 : | fun readCode 0 = [] | ||
339 : | | readCode n = let | ||
340 : | val sz = readInt32 strm | ||
341 : | val n' = n - sz - 4 | ||
342 : | in | ||
343 : | if n' < 0 then | ||
344 : | error "code size" | ||
345 : | else CodeObj.input(strm, sz, SOME name) :: readCode n' | ||
346 : | end | ||
347 : | val dataSz = readInt32 strm | ||
348 : | val n' = nbytes - dataSz - 4 | ||
349 : | val data = if n' < 0 then error "data size" else bytesIn (strm, dataSz) | ||
350 : | in | ||
351 : | case readCode n' of | ||
352 : | (c0 :: cn) => { data = data, c0 = c0, cn = cn } | ||
353 : | | [] => error "missing code objects" | ||
354 : | end | ||
355 : | |||
356 : | fun read { arch, version, name, stream = s } = let | ||
357 : | val MAGIC = mkMAGIC (arch, version) | ||
358 : | val magic = bytesIn (s, magicBytes) | ||
359 : | val _ = if magic = MAGIC then () else error "bad magic number" | ||
360 : | val leni = readInt32 s | ||
361 : | val ne = readInt32 s | ||
362 : | val importSzB = readInt32 s | ||
363 : | val cmInfoSzB = readInt32 s | ||
364 : | val nei = cmInfoSzB div bytesPerPid | ||
365 : | val lambdaSz = readInt32 s | ||
366 : | val reserved = readInt32 s | ||
367 : | val pad = readInt32 s | ||
368 : | val cs = readInt32 s | ||
369 : | val es = readInt32 s | ||
370 : | val imports = readImports (s, leni) | ||
371 : | val exportPid = | ||
372 : | (case ne of | ||
373 : | 0 => NONE | ||
374 : | | 1 => SOME(readPid s) | ||
375 : | | _ => error "too many export PIDs") | ||
376 : | val envPids = readPidList (s, nei) | ||
377 : | val (staticPid, lambdaPid, cmData) = | ||
378 : | case envPids of | ||
379 : | st :: lm :: cmData => (st, lm, cmData) | ||
380 : | | _ => error "env PID list" | ||
381 : | val plambda = bytesIn (s, lambdaSz) | ||
382 : | (* We could simply skip the reserved area if there is one, | ||
383 : | * but in that case there probably is something else seriously | ||
384 : | * wrong (wrong version, etc.), so we may as well complain... *) | ||
385 : | val _ = if reserved = 0 then () else error "non-zero reserved size" | ||
386 : | (* skip padding *) | ||
387 : | val _ = if pad <> 0 then ignore (bytesIn (s, pad)) else () | ||
388 : | (* now get the code *) | ||
389 : | val code = readCodeList (s, name, cs) | ||
390 : | val penv = bytesIn (s, es) | ||
391 : | in | ||
392 : | { contents = create { imports = imports, | ||
393 : | exportPid = exportPid, | ||
394 : | cmData = cmData, | ||
395 : | senv = { pid = staticPid, pickle = penv }, | ||
396 : | lambda = { pid = lambdaPid, pickle = plambda }, | ||
397 : | csegments = code }, | ||
398 : | stats = { env = es, inlinfo = lambdaSz, code = cs, | ||
399 : | data = Word8Vector.length (#data code) } } | ||
400 : | end | ||
401 : | |||
402 : | fun write { arch, version, stream = s, contents, nopickle } = let | ||
403 : | (* Keep this in sync with "size" (see above). *) | ||
404 : | val { imports, exportPid, cmData, senv, lambda, csegments, ... } = | ||
405 : | unBF contents | ||
406 : | val { pickle = senvP, pid = staticPid } = senv | ||
407 : | val { pickle = lambdaP, pid = lambdaPid } = lambda | ||
408 : | val envPids = staticPid :: lambdaPid :: cmData | ||
409 : | val (leni, picki) = pickleImports imports | ||
410 : | val importSzB = Word8Vector.length picki | ||
411 : | val (ne, epl) = | ||
412 : | case exportPid of | ||
413 : | NONE => (0, []) | ||
414 : | | SOME p => (1, [p]) | ||
415 : | val nei = length envPids | ||
416 : | val cmInfoSzB = nei * bytesPerPid | ||
417 : | fun pickleSize { pid, pickle } = | ||
418 : | if nopickle then 0 else Word8Vector.length pickle | ||
419 : | val lambdaSz = pickleSize lambda | ||
420 : | val reserved = 0 (* currently no reserved area *) | ||
421 : | val pad = 0 (* currently no padding *) | ||
422 : | val cs = codeSize csegments | ||
423 : | fun codeOut c = (writeInt32 s (CodeObj.size c); CodeObj.output (s, c)) | ||
424 : | val es = pickleSize senv | ||
425 : | val writeEnv = if nopickle then fn () => () | ||
426 : | else fn () => BinIO.output (s, senvP) | ||
427 : | val datasz = Word8Vector.length (#data csegments) | ||
428 : | val MAGIC = mkMAGIC (arch, version) | ||
429 : | in | ||
430 : | BinIO.output (s, MAGIC); | ||
431 : | app (writeInt32 s) [leni, ne, importSzB, cmInfoSzB, | ||
432 : | lambdaSz, reserved, pad, cs, es]; | ||
433 : | BinIO.output (s, picki); | ||
434 : | writePidList (s, epl); | ||
435 : | (* arena1 *) | ||
436 : | writePidList (s, envPids); | ||
437 : | (* arena2 -- pickled flint stuff *) | ||
438 : | if lambdaSz = 0 then () else BinIO.output (s, lambdaP); | ||
439 : | (* arena3 is empty *) | ||
440 : | (* arena4 is empty *) | ||
441 : | (* code objects *) | ||
442 : | writeInt32 s datasz; | ||
443 : | BinIO.output(s, #data csegments); | ||
444 : | codeOut (#c0 csegments); | ||
445 : | app codeOut (#cn csegments); | ||
446 : | writeEnv (); | ||
447 : | { env = es, inlinfo = lambdaSz, data = datasz, code = cs } | ||
448 : | end | ||
449 : | |||
450 : | fun exec (BF { imports, exportPid, executable, csegments, ... }, dynenv) = | ||
451 : | let val executable = | ||
452 : | case !executable of | ||
453 : | SOME e => e | ||
454 : | | NONE => let | ||
455 : | val e = Isolate.isolate (Execute.mkexec csegments) | ||
456 : | in executable := SOME e; e | ||
457 : | end | ||
458 : | in | ||
459 : | Execute.execute { executable = executable, | ||
460 : | imports = imports, | ||
461 : | exportPid = exportPid, | ||
462 : | dynenv = dynenv } | ||
463 : | end | ||
464 : | end |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |