Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/trunk/src/comp-lib/pickle-util.sml
ViewVC logotype

Annotation of /sml/trunk/src/comp-lib/pickle-util.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 427 - (view) (download)

1 : monnier 427 (*
2 :     * This is the new "generic" pickle utility which replaces Andrew Appel's
3 :     * original "sharewrite" module. Aside from formal differences, this
4 :     * new module ended up not being any different from Andrew's. However,
5 :     * it ties in with its "unpickle" counterpart which is a *lot* better than
6 :     * its predecessor.
7 :     *
8 :     * Generated pickles tend to be a little bit smaller, which can
9 :     * probably be explained by the slightly more compact (in the common case,
10 :     * i.e. for small absolute values) integer representation.
11 :     *
12 :     * July 1999, Matthias Blume
13 :     *)
14 :    
15 :     (*
16 :     * By the way, there is no point in trying to internally use
17 :     * Word8Vector.vector instead of string for now.
18 :     * These strings participate in order comparisons (which makes
19 :     * Word8Vector.vector unsuitable). Moreover, conversion between
20 :     * string and Word8Vector.vector is currently just a cast, so it
21 :     * does not cost anything in the end.
22 :     *)
23 :     signature PICKLE_UTIL = sig
24 :    
25 :     type id
26 :    
27 :     (* Type info. Use a different number for each type constructor. *)
28 :     type tinfo = int (* negative numbers are reserved! *)
29 :    
30 :     type 'ahm pickle
31 :     type ('ahm, 'v) pickler = 'v -> 'ahm pickle
32 :    
33 :     (* Combining pickles into one. The resulting compound pickle will not
34 :     * automatically be subject to hash-consing. Wrap it with $ to get
35 :     * that effect. *)
36 :     val & : 'ahm pickle * 'ahm pickle -> 'ahm pickle
37 :    
38 :     (* $ produces the pickle for one case (constructor) of a datatype.
39 :     * The string must be one character long and the argument pickle
40 :     * should be the pickle for the constructor's arguments. If there
41 :     * are no arguments, then use % instead of $.
42 :     * Use the same tinfo for all constructors of the same datatype
43 :     * and different tinfos for constructors of different types.
44 :     *
45 :     * The latter is really only important if there are constructors
46 :     * of different type who have identical argument types and use the
47 :     * same $ identificaton string. In this case the pickler might
48 :     * identify two values of different types, and as a result the
49 :     * unpickler will be very unhappy.
50 :     *
51 :     * On the other hand, if you use different tinfos for the same type,
52 :     * then nothing terrible will happen. You might lose some sharing,
53 :     * though.
54 :     *
55 :     * The string argument could theoretically be more than one character
56 :     * long. In this case the corresponding unpickling function must
57 :     * be sure to get all those characters out of the input stream.
58 :     * We actually do exploit this "feature" internally. *)
59 :     val $ : tinfo -> string * 'ahm pickle -> 'ahm pickle
60 :     val % : tinfo -> string -> 'ahm pickle
61 :    
62 :     (* "ah_share" is used to specify potential for "ad-hoc" sharing
63 :     * using the user-supplied map. It is important that ah_share is
64 :     * applied to pickles constructed by $ or % but never to those
65 :     * constructed by &. Ad-hoc sharing is used to break structural
66 :     * cycles, to identify parts of the value that the hash-conser cannot
67 :     * automatically identify but which should be identified nevertheless,
68 :     * or to identify those parts that would be too expensive to be left
69 :     * to the hash-conser. *)
70 :     val ah_share : { find : 'ahm * 'v -> id option,
71 :     insert : 'ahm * 'v * id -> 'ahm } ->
72 :     ('ahm, 'v) pickler -> ('ahm, 'v) pickler
73 :    
74 :     (* generating pickles for values of some basic types *)
75 :     val w_bool : ('ahm, bool) pickler
76 :     val w_int : ('ahm, int) pickler
77 :     val w_word : ('ahm, word) pickler
78 :     val w_int32 : ('ahm, Int32.int) pickler
79 :     val w_word32 : ('ahm, Word32.word) pickler
80 :     val w_string : ('ahm, string) pickler
81 :    
82 :     (* generating pickles for some parameterized types (given a pickler
83 :     * for the parameter) *)
84 :     val w_list : ('ahm, 'a) pickler -> ('ahm, 'a list) pickler
85 :     val w_option : ('ahm, 'a) pickler -> ('ahm, 'a option) pickler
86 :    
87 :     (* this doesn't automatically identify (i.e., hash-cons) pairs *)
88 :     val w_pair :
89 :     ('ahm, 'a) pickler * ('ahm, 'b) pickler -> ('ahm, 'a * 'b) pickler
90 :    
91 :     (* Pickling a "lazy" value (i.e., a thunk); the thunk will be forced
92 :     * by the pickler. Unpickling is lazy again; but, of course, that
93 :     * laziness is unrelated to the laziness of the original value. *)
94 :     val w_lazy : ('ahm, 'a) pickler -> ('ahm, unit -> 'a) pickler
95 :    
96 :     (* run the pickle, i.e., turn it into a string *)
97 :     val pickle : 'ahm -> 'ahm pickle -> string
98 :    
99 :     (* The xxx_lifter stuff is here to allow picklers to be "patched
100 :     * together". If you already have a pickler that uses a sharing map
101 :     * of type B and you want to use it as part of a bigger pickler that
102 :     * uses a sharing map of type A, then you must write a (B, A) map_lifter
103 :     * which then lets you lift the existing pickler to one that uses
104 :     * type A maps instead of its own type B maps.
105 :     *
106 :     * The idea is that B maps are really part of A maps. They can be
107 :     * extracted for the duration of using the existing pickler. Then,
108 :     * when that pickler is done, we can patch the resulting new B map
109 :     * back into the original A map to obtain a new A map. *)
110 :     type ('b_ahm, 'a_ahm) map_lifter =
111 :     { extract: 'a_ahm -> 'b_ahm, patchback: 'a_ahm * 'b_ahm -> 'a_ahm }
112 :    
113 :     val lift_pickler: ('b_ahm, 'a_ahm) map_lifter ->
114 :     ('b_ahm, 'v) pickler -> ('a_ahm, 'v) pickler
115 :     end
116 :    
117 :     structure PickleUtil :> PICKLE_UTIL = struct
118 :    
119 :     type pos = int
120 :     type id = pos
121 :     type tinfo = int
122 :     type codes = id list
123 :    
124 :     structure HCM = BinaryMapFn
125 :     (struct
126 :     type ord_key = string * tinfo * codes
127 :     fun compare ((c, t, l), (c', t', l')) = let
128 :     val tinfoCmp = Int.compare
129 :     fun codesCmp ([], []) = EQUAL
130 :     | codesCmp (_ :: _, []) = GREATER
131 :     | codesCmp ([], _ :: _) = LESS
132 :     | codesCmp (h :: t, h' :: t') =
133 :     case Int.compare (h, h') of
134 :     EQUAL => codesCmp (t, t')
135 :     | unequal => unequal
136 :     in
137 :     case String.compare (c, c') of
138 :     EQUAL => (case tinfoCmp (t, t') of
139 :     EQUAL => codesCmp (l, l')
140 :     | unequal => unequal)
141 :     | unequal => unequal
142 :     end
143 :     end)
144 :    
145 :     datatype pre_result =
146 :     STRING of string
147 :     | CONCAT of pre_result * pre_result
148 :    
149 :     fun pre_size (STRING s) = size s
150 :     | pre_size (CONCAT (p, p')) = pre_size p + pre_size p'
151 :    
152 :     val backref = STRING "\255"
153 :     val size_backref = 1
154 :     val nullbytes = STRING ""
155 :    
156 :     type hcm = id HCM.map
157 :     type 'ahm state = hcm * 'ahm * pos
158 :    
159 :     type 'ahm pickle = 'ahm state -> codes * pre_result * 'ahm state
160 :     type ('ahm, 'v) pickler = 'v -> 'ahm pickle
161 :    
162 :     infix 3 $
163 :     infixr 4 &
164 :    
165 :     fun (f & g) state = let
166 :     val (fc, fpr, state') = f state
167 :     val (gc, gpr, state'') = g state'
168 :     in
169 :     (fc @ gc, CONCAT (fpr, gpr), state'')
170 :     end
171 :    
172 :     fun anyint_encode (n, negative) = let
173 :     (* this is essentially the same mechanism that's also used in
174 :     * TopLevel/batch/binfile.sml (maybe we should share it) *)
175 :     val // = LargeWord.div
176 :     val %% = LargeWord.mod
177 :     val !! = LargeWord.orb
178 :     infix // %% !!
179 :     val toW8 = Word8.fromLargeWord
180 :     fun r (0w0, l) = Word8Vector.fromList l
181 :     | r (n, l) =
182 :     r (n // 0w128, toW8 ((n %% 0w128) !! 0w128) :: l)
183 :     val lastDigit = n %% 0w64
184 :     val lastByte = if negative then lastDigit !! 0w64 else lastDigit
185 :     in
186 :     Byte.bytesToString (r (n // 0w64, [toW8 lastByte]))
187 :     end
188 :    
189 :     fun largeword_encode n = anyint_encode (n, false)
190 :     fun largeint_encode i =
191 :     if i >= 0 then anyint_encode (LargeWord.fromLargeInt i, false)
192 :     (* careful to do the negation in word domain... *)
193 :     else anyint_encode (0w0 - LargeWord.fromLargeInt i, true)
194 :    
195 :     val word32_encode = largeword_encode o Word32.toLargeWord
196 :     val word_encode = largeword_encode o Word.toLargeWord
197 :    
198 :     val int32_encode = largeint_encode o Int32.toLarge
199 :     val int_encode = largeint_encode o Int.toLarge
200 :    
201 :     fun % ti c (hcm, ahm, next) = let
202 :     val key = (c, ti, [])
203 :     in
204 :     case HCM.find (hcm, key) of
205 :     SOME i => ([i], STRING c, (hcm, ahm, next + size c))
206 :     | NONE => ([next], STRING c,
207 :     (HCM.insert (hcm, key, next), ahm, next + size c))
208 :     end
209 :    
210 :     fun dollar ti (c, p) (hcm, ahm, next) = let
211 :     val (codes, pr, (hcm', ahm', next')) = p (hcm, ahm, next + size c)
212 :     val key = (c, ti, codes)
213 :     in
214 :     case HCM.find (hcm, key) of
215 :     SOME i => let
216 :     val brnum = int_encode i
217 :     in
218 :     ([i], CONCAT (backref, STRING brnum),
219 :     (hcm, ahm, next + size_backref + size brnum))
220 :     end
221 :     | NONE =>
222 :     ([next], CONCAT (STRING c, pr),
223 :     (HCM.insert (hcm', key, next), ahm', next'))
224 :     end
225 :    
226 :     fun ah_share { find, insert } w v (hcm, ahm, next) =
227 :     case find (ahm, v) of
228 :     SOME i => let
229 :     val brnum = int_encode i
230 :     in
231 :     ([i], CONCAT (backref, STRING brnum),
232 :     (hcm, ahm, next + size_backref + size brnum))
233 :     end
234 :     | NONE => w v (hcm, insert (ahm, v, next), next)
235 :    
236 :     fun w_lazy w thunk (hcm, ahm, next) = let
237 :     val v = thunk ()
238 :     (* The larger the value of trialStart, the smaller the chance that
239 :     * the loop (see below) will run more than once. However, some
240 :     * space may be wasted. 2 sounds like a good compromise to me. *)
241 :     val trialStart = 2
242 :     (* This loop is ugly, but we don't expect it to run very often.
243 :     * It is needed because we must first write the length of the
244 :     * encoding of the thunk's value, but that encoding depends
245 :     * on the length (or rather: on the length of the length). *)
246 :     fun loop (nxt, ilen) = let
247 :     val (codes, pr, state) = w v (hcm, ahm, nxt)
248 :     val sz = pre_size pr
249 :     val ie = int_encode sz
250 :     val iesz = size ie
251 :     (* Padding in front is better because the unpickler can
252 :     * simply discard all leading 0s and does not need to know
253 :     * about the pickler's setting of "trialStart". *)
254 :     val null = STRING "\000"
255 :     fun pad (pr, n) =
256 :     if n = 0 then pr
257 :     else pad (CONCAT (null, pr), n - 1)
258 :     in
259 :     if ilen < iesz then loop (nxt + 1, ilen + 1)
260 :     else (codes, CONCAT (pad (STRING ie, ilen - iesz), pr), state)
261 :     end
262 :     in
263 :     loop (next + trialStart, trialStart)
264 :     end
265 :    
266 :     local
267 :     val I = ~1
268 :     val W = ~2
269 :     val I32 = ~3
270 :     val W32 = ~4
271 :     in
272 :     (* Even though the encoding could start with the
273 :     * backref character, we know that it isn't actually a backref
274 :     * because % suppresses back-references.
275 :     * Of course, this must be taken care of by unpickle-util! *)
276 :     fun w_int i = % I (int_encode i)
277 :     fun w_word w = % W (word_encode w)
278 :     fun w_int32 i32 = % I32 (int32_encode i32)
279 :     fun w_word32 w32 = % W32 (word32_encode w32)
280 :     end
281 :    
282 :     local
283 :     val L = ~5
284 :     fun chop5 l = let
285 :     fun ch (a :: b :: c :: d :: e :: r, cl) =
286 :     ch (r, (e, d, c, b, a) :: cl)
287 :     | ch (r, cl) = (rev r, cl)
288 :     in
289 :     ch (rev l, [])
290 :     end
291 :     in
292 :     fun w_list w l = let
293 :     val op $ = dollar L
294 :     fun wc [] = % L "N"
295 :     | wc ((a, b, c, d, e) :: r) =
296 :     "C" $ w a & w b & w c & w d & w e & wc r
297 :     in
298 :     case chop5 l of
299 :     ([], []) => % L "0"
300 :     | ([a], []) => "1" $ w a
301 :     | ([a, b], []) => "2" $ w a & w b
302 :     | ([a, b, c], []) => "3" $ w a & w b & w c
303 :     | ([a, b, c, d], []) => "4" $ w a & w b & w c & w d
304 :     | ([], r) => "5" $ wc r
305 :     | ([a], r) => "6" $ w a & wc r
306 :     | ([a, b], r) => "7" $ w a & w b & wc r
307 :     | ([a, b, c], r) => "8" $ w a & w b & w c & wc r
308 :     | ([a, b, c, d], r) => "9" $ w a & w b & w c & w d & wc r
309 :     | _ => raise Fail "PickleUtil.w_list: impossible chop"
310 :     end
311 :     end
312 :    
313 :     local
314 :     val O = ~6
315 :     in
316 :     fun w_option arg = let
317 :     val op $ = dollar O
318 :     fun wo w NONE = % O "n"
319 :     | wo w (SOME i) = "s" $ w i
320 :     in
321 :     wo arg
322 :     end
323 :     end
324 :    
325 :     fun w_pair (wa, wb) (a, b) = wa a & wb b
326 :    
327 :     local
328 :     val S = ~7
329 :     in
330 :     fun w_string s = let
331 :     val op $ = dollar S
332 :     (* The dummy_pickle is a hack to get strings to be identified
333 :     * automatically. They don't have "natural" children, so normally
334 :     * % would suppress the backref. The dummy pickle produces no
335 :     * codes and no output, but it is there to make $ believe that
336 :     * there are children. *)
337 :     fun dummy_pickle state = ([], nullbytes, state)
338 :     fun esc #"\\" = "\\\\"
339 :     | esc #"\"" = "\\\""
340 :     | esc #"\255" = "\\\255" (* need to escape backref char *)
341 :     | esc c = String.str c
342 :     in
343 :     (String.translate esc s ^ "\"") $ dummy_pickle
344 :     end
345 :     end
346 :    
347 :     local
348 :     val B = ~8
349 :     in
350 :     fun w_bool true = % B "t"
351 :     | w_bool false = % B "f"
352 :     end
353 :    
354 :     local
355 :     fun pr2s pr = let
356 :     fun flat (STRING s, l) = s :: l
357 :     | flat (CONCAT (x, STRING s), l) = flat (x, s :: l)
358 :     | flat (CONCAT (x, CONCAT (y, z)), l) =
359 :     flat (CONCAT (CONCAT (x, y), z), l)
360 :     in
361 :     concat (flat (pr, []))
362 :     end
363 :     in
364 :     fun pickle emptyMap p = let
365 :     val (_, pr, _) = p (HCM.empty, emptyMap, 0)
366 :     in
367 :     pr2s pr
368 :     end
369 :     end
370 :    
371 :     type ('b_ahm, 'a_ahm) map_lifter =
372 :     { extract: 'a_ahm -> 'b_ahm, patchback: 'a_ahm * 'b_ahm -> 'a_ahm }
373 :    
374 :     fun lift_pickler { extract, patchback } wb b (hcm, a_ahm, next) = let
375 :     val b_ahm = extract a_ahm
376 :     val (codes, pr, (hcm', b_ahm', next')) = wb b (hcm, b_ahm, next)
377 :     val a_ahm' = patchback (a_ahm, b_ahm')
378 :     in
379 :     (codes, pr, (hcm', a_ahm', next'))
380 :     end
381 :    
382 :     (* for export *)
383 :     nonfix $
384 :     val $ = dollar
385 :     end

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0