Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/branches/SMLNJ/src/comp-lib/unpickle-util.sml
ViewVC logotype

Annotation of /sml/branches/SMLNJ/src/comp-lib/unpickle-util.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 428 - (view) (download)

1 : monnier 427 (*
2 :     * This is the new "generic" unpickle utility. It replaces Andrew Appel's
3 :     * original "shareread" module. The main difference is that instead of
4 :     * using a "universal" type together with numerous injections and projections
5 :     * it uses separate maps. This approach proves to be a lot more light-weight.
6 :     *
7 :     * The benefits are:
8 :     * - no projections, hence no "match nonexhaustive" warnings and...
9 :     * - ...no additional run-time overhead (all checking is done during
10 :     * the map membership test which is common to both implementations)
11 :     * - no necessity for those many tiny "fn"-functions that litter the
12 :     * original code, resulting in...
13 :     * - ...a more "natural" style for writing the actual unpickling code
14 :     * that makes for shorter source code
15 :     * - ...a lot less generated machine code (less than 50% of the original
16 :     * version)
17 :     * - ...slightly faster operation (around 15% speedup)
18 :     * (My guess is that it is a combination of fewer projections and
19 :     * fewer generated thunks that make the code run faster.)
20 :     *
21 :     * July 1999, Matthias Blume
22 :     *)
23 :     signature UNPICKLE_UTIL = sig
24 :    
25 :     exception Format
26 :    
27 :     type 'v map (* one for each type *)
28 :     type session (* encapsulates unpickling state *)
29 :    
30 :     (* Make a type-specific sharing map using "mkMap".
31 :     *
32 :     * Be sure to create such maps only locally, otherwise you have a
33 :     * space leak.
34 :     *
35 :     * The ML type system will prevent you from accidentially using the
36 :     * same map for different types, so don't worry. But using TOO MANY
37 :     * maps (i.e., more than one map for the same type) will likely
38 :     * cause problems because the unpickler might try to look for a
39 :     * back reference that is in a different map than the one where the
40 :     * value is actually registered.
41 :     *
42 :     * By the way, this warning is not unique to the many-maps approach.
43 :     * The same thing would happen with the original "universal domain"
44 :     * unpickler if you declare two different constructors for the
45 :     * same type. Given that there are about 100 types (and thus
46 :     * 100 constructors or maps) in the SML/NJ environment pickler,
47 :     * the possibility for such a mistake is not to be dismissed. *)
48 :     val mkMap : unit -> 'v map
49 :    
50 :     type 'v reader = unit -> 'v
51 :    
52 :     (* A "charGetter" is the mechanism that gets actual characters from
53 :     * the pickle. For ordinary pickles, the unpickler will never call
54 :     * "seek". Moreover, the same is true if you read the pickles created
55 :     * by pickleN sequentially from the first to the last (i.e., not
56 :     * "out-of-order"). "cur" determines the current position and must be
57 :     * implemented. *)
58 :     type charGetter =
59 :     { read: char reader, seek: int -> unit, cur: unit -> int }
60 :    
61 :     (* the string is the pickle string *)
62 :     val stringGetter : string -> charGetter
63 :    
64 :     (* open the unpickling session - everything is parameterized by this;
65 :     * the charGetter provides the bytes of the pickle *)
66 :     val mkSession : charGetter -> session
67 :    
68 :     (* The typical style is to write a "reader" function for each type
69 :     * The reader function uses a local helper function which takes the
70 :     * first character of a pickle (this is usually the discriminator that
71 :     * was given to $ or % in the pickler) and returns the unpickled
72 :     * value. The function recursively calls other "reader" functions.
73 :     * To actually get the value from the pickle, pass the helper
74 :     * to "share" -- together with the current session and the
75 :     * type-specific map. "share" will take care of back-references
76 :     * (using the map) and pass the first character to your helper
77 :     * when necessary. The standard pattern for writing a "t reader"
78 :     * therefore is:
79 :     *
80 :     * val session = UnpickleUtil.mkSession pickle
81 :     * fun share m f = UnpickleUtil.share session m f
82 :     * ...
83 :     * val t_map = Unpickleutil.mkMap ()
84 :     * ...
85 :     * fun r_t () = let
86 :     * fun t #"a" = ... (* case a *)
87 :     * | t #"b" = ... (* case b *)
88 :     * ...
89 :     * | _ = raise UnpickleUtil.Format
90 :     * in
91 :     * share t_map t
92 :     * end
93 :     *)
94 :     val share : session -> 'v map -> (char -> 'v) -> 'v
95 :    
96 :     (* If you know that you don't need a map because there can be no
97 :     * sharing (typically if you didn't use any $ but only % for pickling
98 :     * your type), then you can use "nonshare" instead of "share". *)
99 :     val nonshare : session -> (char -> 'v) -> 'v
100 :    
101 :     (* making readers for some common types *)
102 :     val r_int : session -> int reader
103 :     val r_int32 : session -> Int32.int reader
104 :     val r_word : session -> word reader
105 :     val r_word32 : session -> Word32.word reader
106 :     val r_bool : session -> bool reader
107 :     val r_string : session -> string reader
108 :    
109 :     (* readers for parametric types need their own map *)
110 :     val r_list : session -> 'v list map -> 'v reader -> 'v list reader
111 :     val r_option : session -> 'v option map -> 'v reader -> 'v option reader
112 :    
113 :     (* pairs are not shared, so we don't need a map here *)
114 :     val r_pair : 'a reader * 'b reader -> ('a * 'b) reader
115 :    
116 :     (* The laziness generated here is in the unpickling. In other words
117 :     * unpickling state is not discarded until the last lazy value has been
118 :     * forced. *)
119 :     val r_lazy : session -> 'a reader -> (unit -> 'a) reader
120 :     end
121 :    
122 :     structure UnpickleUtil :> UNPICKLE_UTIL = struct
123 :    
124 :     exception Format
125 :    
126 :     type 'v map = ('v * int) IntBinaryMap.map ref
127 :     type state = string map
128 :    
129 :     type 'v reader = unit -> 'v
130 :    
131 :     type charGetter =
132 :     { read: char reader, seek: int -> unit, cur: unit -> int }
133 :    
134 :     type session = { state: state, getter: charGetter }
135 :    
136 :     fun mkMap () = ref IntBinaryMap.empty
137 :    
138 :     fun stringGetter pstring = let
139 :     val pos = ref 0
140 :     fun rd () = let
141 :     val p = !pos
142 :     in
143 :     pos := p + 1;
144 :     String.sub (pstring, p) handle Subscript => raise Format
145 :     end
146 :     fun sk p = pos := p
147 :     fun cur () = !pos
148 :     in
149 :     { read = rd, seek = sk, cur = cur }
150 :     end
151 :    
152 :     local
153 :     fun f_anyint rd () = let
154 :     val & = Word8.andb
155 :     infix &
156 :     val large = Word8.toLargeWord
157 :     fun loop n = let
158 :     val w8 = Byte.charToByte (rd ())
159 :     in
160 :     if (w8 & 0w128) = 0w0 then
161 :     (n * 0w64 + large (w8 & 0w63), (w8 & 0w64) <> 0w0)
162 :     else loop (n * 0w128 + large (w8 & 0w127))
163 :     end
164 :     in
165 :     loop 0w0
166 :     end
167 :    
168 :     fun f_largeword cvt rd () =
169 :     case f_anyint rd () of
170 :     (w, false) => (cvt w handle _ => raise Format)
171 :     | _ => raise Format
172 :    
173 :     fun f_largeint cvt rd () = let
174 :     val (w, negative) = f_anyint rd ()
175 :     val i = LargeWord.toLargeInt w handle _ => raise Format
176 :     in
177 :     (if negative then cvt (~i) else cvt i)
178 :     handle _ => raise Format
179 :     end
180 :     in
181 :     val f_int = f_largeint Int.fromLarge
182 :     val f_int32 = f_largeint Int32.fromLarge
183 :     val f_word = f_largeword Word.fromLargeWord
184 :     val f_word32 = f_largeword Word32.fromLargeWord
185 :     end
186 :    
187 :     fun mkSession charGetter =
188 :     ({ state = mkMap (), getter = charGetter }: session)
189 :    
190 :     fun share { state, getter = { read, seek, cur } } m r = let
191 :     fun firsttime (pos, c) = let
192 :     val v = r c
193 :     val pos' = cur ()
194 :     in
195 :     m := IntBinaryMap.insert (!m, pos, (v, pos'));
196 :     v
197 :     end
198 :     in
199 :     case read () of
200 :     #"\255" => let
201 :     val pos = f_int read ()
202 :     in
203 :     case IntBinaryMap.find (!m, pos) of
204 :     SOME (v, _) => v
205 :     | NONE => let
206 :     val here = cur ()
207 :     in
208 :     seek pos;
209 :     (* It is ok to use "read" here because
210 :     * there won't be back-references that jump
211 :     * to other back-references. *)
212 :     firsttime (pos, read())
213 :     before seek here
214 :     end
215 :     end
216 :     | c => let
217 :     (* Must subtract one to get back in front of c. *)
218 :     val pos = cur () - 1
219 :     in
220 :     case IntBinaryMap.find (!m, pos) of
221 :     SOME (v, pos') => (seek pos'; v)
222 :     | NONE => firsttime (pos, c)
223 :     end
224 :     end
225 :    
226 :     (* "nonshare" gets around backref detection. Certain integer
227 :     * encodings may otherwise be mis-identified as back references.
228 :     * Moreover, unlike in the case of "share" we don't need a map
229 :     * for "nonshare". This could be used as an optimization for
230 :     * types that are known to never be shared anyway (bool, etc.). *)
231 :     fun nonshare (s: session) f = f (#read (#getter s) ())
232 :    
233 :     local
234 :     fun f2r f_x (s: session) = f_x (#read (#getter s))
235 :     in
236 :     val r_int = f2r f_int
237 :     val r_int32 = f2r f_int32
238 :     val r_word = f2r f_word
239 :     val r_word32 = f2r f_word32
240 :     end
241 :    
242 :     fun r_lazy session r () = let
243 :     val memo = ref (fn () => raise Fail "UnpickleUtil.r_lazy")
244 :     val { getter = { cur, seek, ... }, ... } = session
245 :     (* the size may have leading 0s because of padding *)
246 :     fun getSize () = let
247 :     val sz = r_int session ()
248 :     in
249 :     if sz = 0 then getSize () else sz
250 :     end
251 :     val sz = getSize () (* size of v *)
252 :     val start = cur () (* start of v *)
253 :     fun thunk () = let
254 :     val wherever = cur () (* remember where we are now *)
255 :     val _ = seek start (* go to start of v *)
256 :     val v = r () (* read v *)
257 :     in
258 :     seek wherever; (* go back to where we were *)
259 :     memo := (fn () => v); (* memoize *)
260 :     v
261 :     end
262 :     in
263 :     memo := thunk;
264 :     seek (start + sz); (* as if we had read the value *)
265 :     (fn () => !memo ())
266 :     end
267 :    
268 :     fun r_list session m r () = let
269 :     fun r_chops () = let
270 :     fun rcl #"N" = []
271 :     | rcl #"C" = r () :: r () :: r () :: r () :: r () :: r_chops ()
272 :     | rcl _ = raise Format
273 :     in
274 :     share session m rcl
275 :     end
276 :     fun rl #"0" = []
277 :     | rl #"1" = [r ()]
278 :     | rl #"2" = [r (), r ()]
279 :     | rl #"3" = [r (), r (), r ()]
280 :     | rl #"4" = [r (), r (), r (), r ()]
281 :     | rl #"5" = r_chops ()
282 :     | rl #"6" = r () :: r_chops ()
283 :     | rl #"7" = r () :: r () :: r_chops ()
284 :     | rl #"8" = r () :: r () :: r () :: r_chops ()
285 :     | rl #"9" = r () :: r () :: r () :: r () :: r_chops ()
286 :     | rl _ = raise Format
287 :     in
288 :     share session m rl
289 :     end
290 :    
291 :     fun r_option session m r () = let
292 :     fun ro #"n" = NONE
293 :     | ro #"s" = SOME (r ())
294 :     | ro _ = raise Format
295 :     in
296 :     share session m ro
297 :     end
298 :    
299 :     fun r_pair (r_a, r_b) () = (r_a (), r_b ())
300 :    
301 :     fun r_bool session () = let
302 :     fun rb #"t" = true
303 :     | rb #"f" = false
304 :     | rb _ = raise Format
305 :     in
306 :     nonshare session rb
307 :     end
308 :    
309 :     fun r_string session () = let
310 :     val { state = m, getter } = session
311 :     val { read, ... } = getter
312 :     fun rs c = let
313 :     fun loop (l, #"\"") = String.implode (rev l)
314 :     | loop (l, #"\\") = loop (read () :: l, read ())
315 :     | loop (l, c) = loop (c :: l, read ())
316 :     in
317 :     loop ([], c)
318 :     end
319 :     in
320 :     share session m rs
321 :     end
322 :     end

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0