Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /sml/trunk/src/smlnj-lib/Util/int-binary-set.sml
ViewVC logotype

Annotation of /sml/trunk/src/smlnj-lib/Util/int-binary-set.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 651 - (view) (download)

1 : monnier 2 (* int-binary-set.sml
2 :     *
3 :     * COPYRIGHT (c) 1993 by AT&T Bell Laboratories. See COPYRIGHT file for details.
4 :     *
5 :     * This code was adapted from Stephen Adams' binary tree implementation
6 :     * of applicative integer sets.
7 :     *
8 :     * Copyright 1992 Stephen Adams.
9 :     *
10 :     * This software may be used freely provided that:
11 :     * 1. This copyright notice is attached to any copy, derived work,
12 :     * or work including all or part of this software.
13 :     * 2. Any derived work must contain a prominent notice stating that
14 :     * it has been altered from the original.
15 :     *
16 :     * Altered to conform to SML library interface - Emden Gansner
17 :     *
18 :     *
19 :     * Name(s): Stephen Adams.
20 :     * Department, Institution: Electronics & Computer Science,
21 :     * University of Southampton
22 :     * Address: Electronics & Computer Science
23 :     * University of Southampton
24 :     * Southampton SO9 5NH
25 :     * Great Britian
26 :     * E-mail: sra@ecs.soton.ac.uk
27 :     *
28 :     * Comments:
29 :     *
30 :     * 1. The implementation is based on Binary search trees of Bounded
31 :     * Balance, similar to Nievergelt & Reingold, SIAM J. Computing
32 :     * 2(1), March 1973. The main advantage of these trees is that
33 :     * they keep the size of the tree in the node, giving a constant
34 :     * time size operation.
35 :     *
36 :     * 2. The bounded balance criterion is simpler than N&R's alpha.
37 :     * Simply, one subtree must not have more than `weight' times as
38 :     * many elements as the opposite subtree. Rebalancing is
39 :     * guaranteed to reinstate the criterion for weight>2.23, but
40 :     * the occasional incorrect behaviour for weight=2 is not
41 :     * detrimental to performance.
42 :     *
43 :     * 3. There are two implementations of union. The default,
44 :     * hedge_union, is much more complex and usually 20% faster. I
45 :     * am not sure that the performance increase warrants the
46 :     * complexity (and time it took to write), but I am leaving it
47 :     * in for the competition. It is derived from the original
48 :     * union by replacing the split_lt(gt) operations with a lazy
49 :     * version. The `obvious' version is called old_union.
50 :     *
51 :     * 4. Most time is spent in T', the rebalancing constructor. If my
52 :     * understanding of the output of *<file> in the sml batch
53 :     * compiler is correct then the code produced by NJSML 0.75
54 :     * (sparc) for the final case is very disappointing. Most
55 :     * invocations fall through to this case and most of these cases
56 :     * fall to the else part, i.e. the plain contructor,
57 :     * T(v,ln+rn+1,l,r). The poor code allocates a 16 word vector
58 :     * and saves lots of registers into it. In the common case it
59 :     * then retrieves a few of the registers and allocates the 5
60 :     * word T node. The values that it retrieves were live in
61 :     * registers before the massive save.
62 :     *)
63 :    
64 :     structure IntBinarySet :> ORD_SET where type Key.ord_key = Int.int =
65 :     struct
66 :    
67 :     structure Key =
68 :     struct
69 :     type ord_key = Int.int
70 :     val compare = Int.compare
71 :     end
72 :    
73 :     type item = Key.ord_key
74 :    
75 :     datatype set
76 :     = E
77 :     | T of {
78 :     elt : item,
79 :     cnt : int,
80 :     left : set,
81 :     right : set
82 :     }
83 :    
84 :     fun numItems E = 0
85 :     | numItems (T{cnt,...}) = cnt
86 :    
87 :     fun isEmpty E = true
88 :     | isEmpty _ = false
89 :    
90 :     fun mkT(v,n,l,r) = T{elt=v,cnt=n,left=l,right=r}
91 :    
92 :     (* N(v,l,r) = T(v,1+numItems(l)+numItems(r),l,r) *)
93 :     fun N(v,E,E) = mkT(v,1,E,E)
94 :     | N(v,E,r as T{cnt=n,...}) = mkT(v,n+1,E,r)
95 :     | N(v,l as T{cnt=n,...}, E) = mkT(v,n+1,l,E)
96 :     | N(v,l as T{cnt=n,...}, r as T{cnt=m,...}) = mkT(v,n+m+1,l,r)
97 :    
98 :     fun single_L (a,x,T{elt=b,left=y,right=z,...}) = N(b,N(a,x,y),z)
99 :     | single_L _ = raise Match
100 :     fun single_R (b,T{elt=a,left=x,right=y,...},z) = N(a,x,N(b,y,z))
101 :     | single_R _ = raise Match
102 :     fun double_L (a,w,T{elt=c,left=T{elt=b,left=x,right=y,...},right=z,...}) =
103 :     N(b,N(a,w,x),N(c,y,z))
104 :     | double_L _ = raise Match
105 :     fun double_R (c,T{elt=a,left=w,right=T{elt=b,left=x,right=y,...},...},z) =
106 :     N(b,N(a,w,x),N(c,y,z))
107 :     | double_R _ = raise Match
108 :    
109 :     (*
110 :     ** val weight = 3
111 :     ** fun wt i = weight * i
112 :     *)
113 :     fun wt (i : int) = i + i + i
114 :    
115 :     fun T' (v,E,E) = mkT(v,1,E,E)
116 :     | T' (v,E,r as T{left=E,right=E,...}) = mkT(v,2,E,r)
117 :     | T' (v,l as T{left=E,right=E,...},E) = mkT(v,2,l,E)
118 :    
119 :     | T' (p as (_,E,T{left=T _,right=E,...})) = double_L p
120 :     | T' (p as (_,T{left=E,right=T _,...},E)) = double_R p
121 :    
122 :     (* these cases almost never happen with small weight*)
123 :     | T' (p as (_,E,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...})) =
124 :     if ln<rn then single_L p else double_L p
125 :     | T' (p as (_,T{left=T{cnt=ln,...},right=T{cnt=rn,...},...},E)) =
126 :     if ln>rn then single_R p else double_R p
127 :    
128 :     | T' (p as (_,E,T{left=E,...})) = single_L p
129 :     | T' (p as (_,T{right=E,...},E)) = single_R p
130 :    
131 :     | T' (p as (v,l as T{elt=lv,cnt=ln,left=ll,right=lr},
132 :     r as T{elt=rv,cnt=rn,left=rl,right=rr})) =
133 :     if rn >= wt ln (*right is too big*)
134 :     then
135 :     let val rln = numItems rl
136 :     val rrn = numItems rr
137 :     in
138 :     if rln < rrn then single_L p else double_L p
139 :     end
140 :     else if ln >= wt rn (*left is too big*)
141 :     then
142 :     let val lln = numItems ll
143 :     val lrn = numItems lr
144 :     in
145 :     if lrn < lln then single_R p else double_R p
146 :     end
147 :     else mkT(v,ln+rn+1,l,r)
148 :    
149 :     fun add (E,x) = mkT(x,1,E,E)
150 :     | add (set as T{elt=v,left=l,right=r,cnt},x) = (
151 :     case Key.compare(x,v)
152 :     of LESS => T'(v,add(l,x),r)
153 :     | GREATER => T'(v,l,add(r,x))
154 :     | EQUAL => mkT(x,cnt,l,r)
155 :     (* end case *))
156 : monnier 29 fun add' (s, x) = add(x, s)
157 : monnier 2
158 :     fun concat3 (E,v,r) = add(r,v)
159 :     | concat3 (l,v,E) = add(l,v)
160 :     | concat3 (l as T{elt=v1,cnt=n1,left=l1,right=r1}, v,
161 :     r as T{elt=v2,cnt=n2,left=l2,right=r2}) =
162 :     if wt n1 < n2 then T'(v2,concat3(l,v,l2),r2)
163 :     else if wt n2 < n1 then T'(v1,l1,concat3(r1,v,r))
164 :     else N(v,l,r)
165 :    
166 :     fun split_lt (E,x) = E
167 :     | split_lt (T{elt=v,left=l,right=r,...},x) =
168 :     case Key.compare(v,x) of
169 :     GREATER => split_lt(l,x)
170 :     | LESS => concat3(l,v,split_lt(r,x))
171 :     | _ => l
172 :    
173 :     fun split_gt (E,x) = E
174 :     | split_gt (T{elt=v,left=l,right=r,...},x) =
175 :     case Key.compare(v,x) of
176 :     LESS => split_gt(r,x)
177 :     | GREATER => concat3(split_gt(l,x),v,r)
178 :     | _ => r
179 :    
180 :     fun min (T{elt=v,left=E,...}) = v
181 :     | min (T{left=l,...}) = min l
182 :     | min _ = raise Match
183 :    
184 :     fun delmin (T{left=E,right=r,...}) = r
185 :     | delmin (T{elt=v,left=l,right=r,...}) = T'(v,delmin l,r)
186 :     | delmin _ = raise Match
187 :    
188 :     fun delete' (E,r) = r
189 :     | delete' (l,E) = l
190 :     | delete' (l,r) = T'(min r,l,delmin r)
191 :    
192 :     fun concat (E, s) = s
193 :     | concat (s, E) = s
194 :     | concat (t1 as T{elt=v1,cnt=n1,left=l1,right=r1},
195 :     t2 as T{elt=v2,cnt=n2,left=l2,right=r2}) =
196 :     if wt n1 < n2 then T'(v2,concat(t1,l2),r2)
197 :     else if wt n2 < n1 then T'(v1,l1,concat(r1,t2))
198 :     else T'(min t2,t1, delmin t2)
199 :    
200 :    
201 :     local
202 :     fun trim (lo,hi,E) = E
203 :     | trim (lo,hi,s as T{elt=v,left=l,right=r,...}) =
204 :     if (v > lo)
205 :     then if (v < hi) then s else trim(lo,hi,l)
206 :     else trim(lo,hi,r)
207 :    
208 :     fun uni_bd (s,E,_,_) = s
209 :     | uni_bd (E,T{elt=v,left=l,right=r,...},lo,hi) =
210 :     concat3(split_gt(l,lo),v,split_lt(r,hi))
211 :     | uni_bd (T{elt=v,left=l1,right=r1,...},
212 :     s2 as T{elt=v2,left=l2,right=r2,...},lo,hi) =
213 :     concat3(uni_bd(l1,trim(lo,v,s2),lo,v),
214 :     v,
215 :     uni_bd(r1,trim(v,hi,s2),v,hi))
216 :     (* inv: lo < v < hi *)
217 :    
218 :     (* all the other versions of uni and trim are
219 :     * specializations of the above two functions with
220 :     * lo=-infinity and/or hi=+infinity
221 :     *)
222 :    
223 :     fun trim_lo (_, E) = E
224 :     | trim_lo (lo,s as T{elt=v,right=r,...}) =
225 :     case Key.compare(v,lo) of
226 :     GREATER => s
227 :     | _ => trim_lo(lo,r)
228 :    
229 :     fun trim_hi (_, E) = E
230 :     | trim_hi (hi,s as T{elt=v,left=l,...}) =
231 :     case Key.compare(v,hi) of
232 :     LESS => s
233 :     | _ => trim_hi(hi,l)
234 :    
235 :     fun uni_hi (s,E,_) = s
236 :     | uni_hi (E,T{elt=v,left=l,right=r,...},hi) =
237 :     concat3(l,v,split_lt(r,hi))
238 :     | uni_hi (T{elt=v,left=l1,right=r1,...},
239 :     s2 as T{elt=v2,left=l2,right=r2,...},hi) =
240 :     concat3(uni_hi(l1,trim_hi(v,s2),v),v,uni_bd(r1,trim(v,hi,s2),v,hi))
241 :    
242 :     fun uni_lo (s,E,_) = s
243 :     | uni_lo (E,T{elt=v,left=l,right=r,...},lo) =
244 :     concat3(split_gt(l,lo),v,r)
245 :     | uni_lo (T{elt=v,left=l1,right=r1,...},
246 :     s2 as T{elt=v2,left=l2,right=r2,...},lo) =
247 :     concat3(uni_bd(l1,trim(lo,v,s2),lo,v),v,uni_lo(r1,trim_lo(v,s2),v))
248 :    
249 :     fun uni (s,E) = s
250 :     | uni (E,s) = s
251 :     | uni (T{elt=v,left=l1,right=r1,...},
252 :     s2 as T{elt=v2,left=l2,right=r2,...}) =
253 :     concat3(uni_hi(l1,trim_hi(v,s2),v), v, uni_lo(r1,trim_lo(v,s2),v))
254 :    
255 :     in
256 :     val hedge_union = uni
257 :     end
258 :    
259 :     (* The old_union version is about 20% slower than
260 :     * hedge_union in most cases
261 :     *)
262 :     fun old_union (E,s2) = s2
263 :     | old_union (s1,E) = s1
264 :     | old_union (T{elt=v,left=l,right=r,...},s2) =
265 :     let val l2 = split_lt(s2,v)
266 :     val r2 = split_gt(s2,v)
267 :     in
268 :     concat3(old_union(l,l2),v,old_union(r,r2))
269 :     end
270 :    
271 :     val empty = E
272 :     fun singleton x = T{elt=x,cnt=1,left=E,right=E}
273 :    
274 :     fun addList (s,l) = List.foldl (fn (i,s) => add(s,i)) s l
275 :    
276 :     val add = add
277 :    
278 :     fun member (set, x) = let
279 :     fun pk E = false
280 :     | pk (T{elt=v, left=l, right=r, ...}) = (
281 :     case Key.compare(x,v)
282 :     of LESS => pk l
283 :     | EQUAL => true
284 :     | GREATER => pk r
285 :     (* end case *))
286 :     in
287 :     pk set
288 :     end
289 :    
290 :     local
291 :     (* true if every item in t is in t' *)
292 :     fun treeIn (t,t') = let
293 :     fun isIn E = true
294 :     | isIn (T{elt,left=E,right=E,...}) = member(t',elt)
295 :     | isIn (T{elt,left,right=E,...}) =
296 :     member(t',elt) andalso isIn left
297 :     | isIn (T{elt,left=E,right,...}) =
298 :     member(t',elt) andalso isIn right
299 :     | isIn (T{elt,left,right,...}) =
300 :     member(t',elt) andalso isIn left andalso isIn right
301 :     in
302 :     isIn t
303 :     end
304 :     in
305 :     fun isSubset (E,_) = true
306 :     | isSubset (_,E) = false
307 :     | isSubset (t as T{cnt=n,...},t' as T{cnt=n',...}) =
308 :     (n<=n') andalso treeIn (t,t')
309 :    
310 :     fun equal (E,E) = true
311 :     | equal (t as T{cnt=n,...},t' as T{cnt=n',...}) =
312 :     (n=n') andalso treeIn (t,t')
313 :     | equal _ = false
314 :     end
315 :    
316 :     local
317 :     fun next ((t as T{right, ...})::rest) = (t, left(right, rest))
318 :     | next _ = (E, [])
319 :     and left (E, rest) = rest
320 :     | left (t as T{left=l, ...}, rest) = left(l, t::rest)
321 :     in
322 :     fun compare (s1, s2) = let
323 :     fun cmp (t1, t2) = (case (next t1, next t2)
324 :     of ((E, _), (E, _)) => EQUAL
325 :     | ((E, _), _) => LESS
326 :     | (_, (E, _)) => GREATER
327 :     | ((T{elt=e1, ...}, r1), (T{elt=e2, ...}, r2)) => (
328 :     case Key.compare(e1, e2)
329 :     of EQUAL => cmp (r1, r2)
330 :     | order => order
331 :     (* end case *))
332 :     (* end case *))
333 :     in
334 :     cmp (left(s1, []), left(s2, []))
335 :     end
336 :     end
337 :    
338 :     fun delete (E,x) = raise LibBase.NotFound
339 :     | delete (set as T{elt=v,left=l,right=r,...},x) =
340 :     case Key.compare(x,v) of
341 :     LESS => T'(v,delete(l,x),r)
342 :     | GREATER => T'(v,l,delete(r,x))
343 :     | _ => delete'(l,r)
344 :    
345 :     val union = hedge_union
346 :    
347 :     fun intersection (E, _) = E
348 :     | intersection (_, E) = E
349 :     | intersection (s, T{elt=v,left=l,right=r,...}) = let
350 :     val l2 = split_lt(s,v)
351 :     val r2 = split_gt(s,v)
352 :     in
353 :     if member(s,v)
354 :     then concat3(intersection(l2,l),v,intersection(r2,r))
355 :     else concat(intersection(l2,l),intersection(r2,r))
356 :     end
357 :    
358 :     fun difference (E,s) = E
359 :     | difference (s,E) = s
360 :     | difference (s, T{elt=v,left=l,right=r,...}) =
361 :     let val l2 = split_lt(s,v)
362 :     val r2 = split_gt(s,v)
363 :     in
364 :     concat(difference(l2,l),difference(r2,r))
365 :     end
366 :    
367 :     fun map f set = let
368 :     fun map'(acc, E) = acc
369 :     | map'(acc, T{elt,left,right,...}) =
370 :     map' (add (map' (acc, left), f elt), right)
371 :     in
372 :     map' (E, set)
373 :     end
374 :    
375 :     fun app apf =
376 :     let fun apply E = ()
377 :     | apply (T{elt,left,right,...}) =
378 :     (apply left;apf elt; apply right)
379 :     in
380 :     apply
381 :     end
382 :    
383 :     fun foldl f b set = let
384 :     fun foldf (E, b) = b
385 :     | foldf (T{elt,left,right,...}, b) =
386 :     foldf (right, f(elt, foldf (left, b)))
387 :     in
388 :     foldf (set, b)
389 :     end
390 :    
391 :     fun foldr f b set = let
392 :     fun foldf (E, b) = b
393 :     | foldf (T{elt,left,right,...}, b) =
394 :     foldf (left, f(elt, foldf (right, b)))
395 :     in
396 :     foldf (set, b)
397 :     end
398 :    
399 :     fun listItems set = foldr (op::) [] set
400 :    
401 :     fun filter pred set =
402 :     foldl (fn (item, s) => if (pred item) then add(s, item) else s)
403 :     empty set
404 :    
405 :     fun find p E = NONE
406 :     | find p (T{elt,left,right,...}) = (case find p left
407 :     of NONE => if (p elt)
408 :     then SOME elt
409 :     else find p right
410 :     | a => a
411 :     (* end case *))
412 :    
413 :     fun exists p E = false
414 :     | exists p (T{elt, left, right,...}) =
415 :     (exists p left) orelse (p elt) orelse (exists p right)
416 :    
417 :     end (* IntBinarySet *)

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0