Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Diff of /sml/trunk/src/compiler/MiscUtil/library/pickle-util.sml
ViewVC logotype

Diff of /sml/trunk/src/compiler/MiscUtil/library/pickle-util.sml

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 709, Sat Oct 7 03:35:08 2000 UTC revision 710, Tue Oct 17 08:21:09 2000 UTC
# Line 10  Line 10 
10   * i.e. for small absolute values) integer representation.   * i.e. for small absolute values) integer representation.
11   *   *
12   * July 1999, Matthias Blume   * July 1999, Matthias Blume
13     *
14     * Addendum: This module now also marks as "actually being shared" those
15     * nodes where actual sharing has been detected.  Marking is done by
16     * setting the high bit in the char code of the node.  This means that
17     * char codes must be in the range [0,126] to avoid conflicts. (127
18     * cannot be used because setting the high bit there results in 255 --
19     * which is the backref code.)
20     * This improves unpickling time by about 25% and also reduces memory
21     * usage because much fewer sharing map entries have to be made during
22     * unpickling.
23     *
24     * October 2000, Matthias Blume
25   *)   *)
26    
27  (*  (*
# Line 54  Line 66 
66    
67      (* "ah_share" is used to specify potential for "ad-hoc" sharing      (* "ah_share" is used to specify potential for "ad-hoc" sharing
68       * using the user-supplied map.       * using the user-supplied map.
69       * Ad-hoc sharing is used to break structural       * Ad-hoc sharing is used to identify parts of the value that the
70       * cycles, to identify parts of the value that the hash-conser cannot       * hash-conser cannot automatically identify but which should be
71       * automatically identify but which should be identified nevertheless,       * identified nevertheless, or to identify those parts that would be
72       * or to identify those parts that would be too expensive to be left       * too expensive to be left to the hash-conser. *)
      * to the hash-conser. *)  
73      val ah_share : { find : 'ahm * 'v -> id option,      val ah_share : { find : 'ahm * 'v -> id option,
74                       insert : 'ahm * 'v * id -> 'ahm } ->                       insert : 'ahm * 'v * id -> 'ahm } ->
75          ('ahm, 'v) pickler -> ('ahm, 'v) pickler          ('ahm, 'v) pickler -> ('ahm, 'v) pickler
# Line 75  Line 86 
86       * for the parameter) *)       * for the parameter) *)
87      val w_list : ('ahm, 'a) pickler -> ('ahm, 'a list) pickler      val w_list : ('ahm, 'a) pickler -> ('ahm, 'a list) pickler
88      val w_option : ('ahm, 'a) pickler -> ('ahm, 'a option) pickler      val w_option : ('ahm, 'a) pickler -> ('ahm, 'a option) pickler
   
     (* this doesn't automatically identify (i.e., hash-cons) pairs *)  
89      val w_pair :      val w_pair :
90          ('ahm, 'a) pickler * ('ahm, 'b) pickler -> ('ahm, 'a * 'b) pickler          ('ahm, 'a) pickler * ('ahm, 'b) pickler -> ('ahm, 'a * 'b) pickler
91    
# Line 91  Line 100 
100      (* The xxx_lifter stuff is here to allow picklers to be "patched      (* The xxx_lifter stuff is here to allow picklers to be "patched
101       * together".  If you already have a pickler that uses a sharing map       * together".  If you already have a pickler that uses a sharing map
102       * of type B and you want to use it as part of a bigger pickler that       * of type B and you want to use it as part of a bigger pickler that
103       * uses a sharing map of type A, then you must write a (B, A) map_lifter       * uses a sharing map of type A, you must write a (B, A) map_lifter
104       * which then lets you lift the existing pickler to one that uses       * which then lets you lift the existing pickler to one that uses
105       * type A maps instead of its own type B maps.       * type A maps instead of its own type B maps.
106       *       *
# Line 110  Line 119 
119    
120      type pos = int      type pos = int
121      type id = pos      type id = pos
     type tinfo = int  
122      type codes = id list      type codes = id list
123        type tinfo = int
124    
125        type shareinfo = IntRedBlackSet.set
126        val si_empty = IntRedBlackSet.empty
127        val si_add = IntRedBlackSet.add
128        val si_list = IntRedBlackSet.listItems
129    
130      structure HCM = RedBlackMapFn      structure HCM = RedBlackMapFn
131          (struct          (struct
# Line 146  Line 160 
160    
161      type hcm = id HCM.map      type hcm = id HCM.map
162      type fwdm = id PM.map               (* forwarding map *)      type fwdm = id PM.map               (* forwarding map *)
163      type 'ahm state = hcm * fwdm * 'ahm * pos      type 'ahm state = hcm * fwdm * 'ahm * pos * shareinfo
164    
165      type 'ahm pickle = 'ahm state -> codes * pre_result * 'ahm state      type 'ahm pickle = 'ahm state -> codes * pre_result * 'ahm state
166      type ('ahm, 'v) pickler = 'v -> 'ahm pickle      type ('ahm, 'v) pickler = 'v -> 'ahm pickle
# Line 194  Line 208 
208      val int32_encode = largeint_encode o Int32.toLarge      val int32_encode = largeint_encode o Int32.toLarge
209      val int_encode = largeint_encode o Int.toLarge      val int_encode = largeint_encode o Int.toLarge
210    
211      fun % ti c (hcm, fwdm, ahm, next) = let      fun % ti c (hcm, fwdm, ahm, next, si) = let
212          val key = (c, ti, [])          val key = (c, ti, [])
213      in      in
214          case HCM.find (hcm, key) of          case HCM.find (hcm, key) of
215              SOME i => ([i], STRING c, (hcm, PM.insert (fwdm, next, i),              SOME i =>
216                                         ahm, next + size c))                  ([i], STRING c, (hcm, PM.insert (fwdm, next, i),
217            | NONE => ([next], STRING c,                                   ahm, next + size c, si))
218                       (HCM.insert (hcm, key, next), fwdm, ahm, next + size c))            | NONE =>
219                    ([next], STRING c, (HCM.insert (hcm, key, next), fwdm,
220                                        ahm, next + size c, si))
221      end      end
222    
223      fun dollar ti (c, []) state = % ti c state      fun dollar ti (c, []) state = % ti c state
224        | dollar ti (c, plh :: plt) (hcm, fwdm, ahm, next) = let        | dollar ti (c, plh :: plt) (hcm, fwdm, ahm, next, si) = let
225              val p = collapse (plh, plt)              val p = collapse (plh, plt)
226              val (codes, pr, (hcm', fwdm', ahm', next')) =              val (codes, pr, (hcm', fwdm', ahm', next', si')) =
227                  p (hcm, fwdm, ahm, next + size c)                  p (hcm, fwdm, ahm, next + size c, si)
228              val key = (c, ti, codes)              val key = (c, ti, codes)
229          in          in
230              case HCM.find (hcm, key) of              case HCM.find (hcm, key) of
# Line 217  Line 233 
233                  in                  in
234                      ([i], CONCAT (backref, STRING brnum),                      ([i], CONCAT (backref, STRING brnum),
235                       (hcm, PM.insert (fwdm, next, i),                       (hcm, PM.insert (fwdm, next, i),
236                        ahm, next + size_backref + size brnum))                        ahm, next + size_backref + size brnum,
237                          si_add (si', i)))
238                  end                  end
239                | NONE =>                | NONE =>
240                  ([next], CONCAT (STRING c, pr),                  ([next], CONCAT (STRING c, pr),
241                   (HCM.insert (hcm', key, next), fwdm', ahm', next'))                   (HCM.insert (hcm', key, next), fwdm', ahm', next', si'))
242          end          end
243    
244      fun ah_share { find, insert } w v (hcm, fwdm, ahm, next) =      fun ah_share { find, insert } w v (hcm, fwdm, ahm, next, si) =
245          case find (ahm, v) of          case find (ahm, v) of
246              SOME i0 => let              SOME i0 => let
247                  val i = getOpt (PM.find (fwdm, i0), i0)                  val i = getOpt (PM.find (fwdm, i0), i0)
248                  val brnum = int_encode i                  val brnum = int_encode i
249              in              in
250                  ([i], CONCAT (backref, STRING brnum),                  ([i], CONCAT (backref, STRING brnum),
251                   (hcm, fwdm, ahm, next + size_backref + size brnum))                   (hcm, fwdm, ahm, next + size_backref + size brnum,
252                      si_add (si, i)))
253              end              end
254            | NONE => w v (hcm, fwdm, insert (ahm, v, next), next)            | NONE => w v (hcm, fwdm, insert (ahm, v, next), next, si)
255    
256      fun w_lazy w thunk (hcm, fwdm, ahm, next) = let      fun w_lazy w thunk (hcm, fwdm, ahm, next, si) = let
257          val v = thunk ()          val v = thunk ()
258          (* The larger the value of trialStart, the smaller the chance that          (* The larger the value of trialStart, the smaller the chance that
259           * the loop (see below) will run more than once.  However, some           * the loop (see below) will run more than once.  However, some
# Line 247  Line 265 
265           * encoding of the thunk's value, but that encoding depends           * encoding of the thunk's value, but that encoding depends
266           * on the length (or rather: on the length of the length). *)           * on the length (or rather: on the length of the length). *)
267          fun loop (nxt, ilen) = let          fun loop (nxt, ilen) = let
268              val (codes, pr, state) = w v (hcm, fwdm, ahm, nxt)              val (codes, pr, state) = w v (hcm, fwdm, ahm, nxt, si)
269              val sz = pre_size pr              val sz = pre_size pr
270              val ie = int_encode sz              val ie = int_encode sz
271              val iesz = size ie              val iesz = size ie
272    
             val _ = if iesz > 2 then ignore (w v (hcm, fwdm, ahm, nxt)) else ()  
   
273              (* Padding in front is better because the unpickler can              (* Padding in front is better because the unpickler can
274               * simply discard all leading 0s and does not need to know               * simply discard all leading 0s and does not need to know
275               * about the pickler's setting of "trialStart". *)               * about the pickler's setting of "trialStart". *)
# Line 354  Line 370 
370                | esc #"\255" = "\\\255"  (* need to escape backref char *)                | esc #"\255" = "\\\255"  (* need to escape backref char *)
371                | esc c = String.str c                | esc c = String.str c
372          in          in
373              (String.translate esc s ^ "\"") $ [dummy_pickle]              (concat ["\"", String.translate esc s, "\""]) $ [dummy_pickle]
374          end          end
375      end      end
376    
# Line 366  Line 382 
382      end      end
383    
384      local      local
385          fun pr2s pr = let          fun pr2s (pr, next, si) = let
386              fun flat (STRING s, l) = s :: l  
387                | flat (CONCAT (x, STRING s), l) = flat (x, s :: l)              (* This puts a code string in front of the list of
388                | flat (CONCAT (x, CONCAT (y, z)), l) =               * code strings that follow.  It also takes care of
389                  flat (CONCAT (CONCAT (x, y), z), l)               * setting the high bit where necessary (see below),
390                 * updates the current position and the list of remaining
391                 * shared positions. *)
392                fun add ("", p, h, t, l) = (p, h :: t, l)
393                  | add (s, p, h, t, l) = let
394                        val len = size s
395                        val p' = p - len
396                    in
397                        if p' = h then let
398                            val fst =
399                                String.str
400                                  (Char.chr
401                                    (Char.ord (String.sub (s, 0)) + 128))
402                            fun ret x = (p', t, x)
403                        in
404                            if len > 1 then
405                                ret (fst :: String.extract (s, 1, NONE) :: l)
406                            else ret (fst :: l)
407                        end
408                        else (p', h :: t, s :: l)
409                    end
410    
411                (* Fast flattening -- when we are out of shared codes. *)
412                fun fflat (STRING s, l) = s :: l
413                  | fflat (CONCAT (x, STRING s), l) = fflat (x, s :: l)
414                  | fflat (CONCAT (x, CONCAT (y, z)), l) =
415                    fflat (CONCAT (CONCAT (x, y), z), l)
416    
417                (* Flattening runs in linear time.
418                 * We simultaneously use this loop to set the high bits in
419                 * codes that correspond to shared nodes.  The positions of
420                 * these codes are given by high-to-low sorted list of
421                 * integers. *)
422                fun flat (x, (_, [], l)) = fflat (x, l)
423                  | flat (STRING s, (p, h :: t, l)) =
424                    #3 (add (s, p, h, t, l))
425                  | flat (CONCAT (x, STRING s), (p, h :: t, l)) =
426                    flat (x, add (s, p, h, t, l))
427                  | flat (CONCAT (x, CONCAT (y, z)), phtl) =
428                    flat (CONCAT (CONCAT (x, y), z), phtl)
429          in          in
430               concat (flat (pr, []))              concat (flat (pr, (next, rev (si_list si), [])))
431          end          end
432      in      in
433          fun pickle emptyMap p = let          fun pickle emptyMap p = let
434              val (_, pr, _) = p (HCM.empty, PM.empty, emptyMap, 0)              val (_, pr, (_, _, _, next, si)) =
435                     p (HCM.empty, PM.empty, emptyMap, 0, si_empty)
436          in          in
437              pr2s pr              pr2s (pr, next, si)
438          end          end
439      end      end
440    
441      type ('b_ahm, 'a_ahm) map_lifter =      type ('b_ahm, 'a_ahm) map_lifter =
442          { extract: 'a_ahm -> 'b_ahm, patchback: 'a_ahm * 'b_ahm -> 'a_ahm }          { extract: 'a_ahm -> 'b_ahm, patchback: 'a_ahm * 'b_ahm -> 'a_ahm }
443    
444      fun lift_pickler { extract, patchback } wb b (hcm, fwdm, a_ahm, next) = let      fun lift_pickler { extract, patchback } wb b (hcm, fwdm, a_ahm, next, si) =
445          val b_ahm = extract a_ahm          let val b_ahm = extract a_ahm
446          val (codes, pr, (hcm', fwdm', b_ahm', next')) =              val (codes, pr, (hcm', fwdm', b_ahm', next', si')) =
447              wb b (hcm, fwdm, b_ahm, next)                  wb b (hcm, fwdm, b_ahm, next, si)
448          val a_ahm' = patchback (a_ahm, b_ahm')          val a_ahm' = patchback (a_ahm, b_ahm')
449      in      in
450          (codes, pr, (hcm', fwdm', a_ahm', next'))              (codes, pr, (hcm', fwdm', a_ahm', next', si'))
451      end      end
452    
453      (* for export *)      (* for export *)

Legend:
Removed from v.709  
changed lines
  Added in v.710

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0