Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /smlnj-lib/trunk/HashCons/hash-cons-set.sml
ViewVC logotype

Annotation of /smlnj-lib/trunk/HashCons/hash-cons-set.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6159 - (view) (download)

1 : jhr 967 (* hash-cons-set.sml
2 :     *
3 : jhr 3580 * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org)
4 :     * All rights reserved.
5 : jhr 967 *
6 : jhr 6159 * This is an implementation of the HASH_CONS_SET signature that uses Red-Black
7 :     * trees. Eventually, it should be replaced by an implmementation that uses
8 :     * Patricia trees.
9 : jhr 967 *)
10 :    
11 :     structure HashConsSet : HASH_CONS_SET =
12 :     struct
13 :    
14 :     structure HC = HashCons
15 :    
16 : jhr 6159 datatype color = R | B
17 :    
18 : jhr 967 type 'a obj = 'a HC.obj
19 :    
20 : jhr 6159 datatype 'a tree
21 :     = E
22 :     | T of (color * 'a tree * 'a obj * 'a tree)
23 : jhr 967
24 : jhr 6159 datatype 'a set = SET of (int * 'a tree)
25 : jhr 967
26 : jhr 6159 (* NOTE: we should use the Word.< and = operations instead of Word.compare *)
27 :     fun cmpObj (a : 'a obj, b : 'a obj) = Word.compare(#tag a, #tag b)
28 : jhr 967
29 : jhr 6159 fun isEmpty (SET(_, E)) = true
30 :     | isEmpty _ = false
31 : jhr 967
32 : jhr 6159 val empty = SET(0, E)
33 : jhr 967
34 : jhr 6159 fun singleton x = SET(1, T(B, E, x, E))
35 : jhr 967
36 : jhr 6159 fun add (SET(nItems, m), x) = let
37 :     val nItems' = ref nItems
38 :     fun ins E = (nItems' := nItems+1; T(R, E, x, E))
39 :     | ins (s as T(color, a, y, b)) = (case cmpObj(x, y)
40 :     of LESS => (case a
41 :     of T(R, c, z, d) => (case cmpObj(x, z)
42 :     of LESS => (case ins c
43 :     of T(R, e, w, f) => T(R, T(B,e,w,f), z, T(B,d,y,b))
44 :     | c => T(B, T(R,c,z,d), y, b)
45 :     (* end case *))
46 :     | EQUAL => T(color, T(R, c, x, d), y, b)
47 :     | GREATER => (case ins d
48 :     of T(R, e, w, f) => T(R, T(B,c,z,e), w, T(B,f,y,b))
49 :     | d => T(B, T(R,c,z,d), y, b)
50 :     (* end case *))
51 :     (* end case *))
52 :     | _ => T(B, ins a, y, b)
53 :     (* end case *))
54 :     | EQUAL => T(color, a, x, b)
55 :     | GREATER => (case b
56 :     of T(R, c, z, d) => (case cmpObj(x, z)
57 :     of LESS => (case ins c
58 :     of T(R, e, w, f) => T(R, T(B,a,y,e), w, T(B,f,z,d))
59 :     | c => T(B, a, y, T(R,c,z,d))
60 :     (* end case *))
61 :     | EQUAL => T(color, a, y, T(R, c, x, d))
62 :     | GREATER => (case ins d
63 :     of T(R, e, w, f) => T(R, T(B,a,y,c), z, T(B,e,w,f))
64 :     | d => T(B, a, y, T(R,c,z,d))
65 :     (* end case *))
66 :     (* end case *))
67 :     | _ => T(B, a, y, ins b)
68 :     (* end case *))
69 :     (* end case *))
70 :     val T(_, a, y, b) = ins m
71 :     in
72 :     SET(!nItems', T(B, a, y, b))
73 :     end
74 :     fun add' (x, m) = add (m, x)
75 : jhr 967
76 : jhr 6159 fun addList (s, []) = s
77 :     | addList (s, x::r) = addList(add(s, x), r)
78 :    
79 :     (* Remove an item. Raises LibBase.NotFound if not found. *)
80 :     local
81 :     datatype 'a zipper
82 :     = TOP
83 :     | LEFT of (color * 'a obj * 'a tree * 'a zipper)
84 :     | RIGHT of (color * 'a tree * 'a obj * 'a zipper)
85 :     in
86 :     fun delete (SET(nItems, t), k) = let
87 :     (* zip the zipper *)
88 :     fun zip (TOP, t) = t
89 :     | zip (LEFT(color, x, b, p), a) = zip(p, T(color, a, x, b))
90 :     | zip (RIGHT(color, a, x, p), b) = zip(p, T(color, a, x, b))
91 :     (* zip the zipper while resolving a black deficit *)
92 :     fun fixupZip (TOP, t) = (true, t)
93 :     (* case 1 from CLR *)
94 :     | fixupZip (LEFT(B, x, T(R, a, y, b), p), t) = (case a
95 :     of T(_, T(R, a11, w, a12), z, a2) => (* case 1L ==> case 3L ==> case 4L *)
96 :     (false, zip (p, T(B, T(R, T(B, t, x, a11), w, T(B, a12, z, a2)), y, b)))
97 :     | T(_, a1, z, T(R, a21, w, t22)) => (* case 1L ==> case 4L *)
98 :     (false, zip (p, T(B, T(R, T(B, t, x, a1), z, T(B, a21, w, t22)), y, b)))
99 :     | T(_, a1, z, a2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *)
100 :     (false, zip (p, T(B, T(B, t, x, T(R, a1, z, a2)), y, b)))
101 :     | _ => fixupZip (LEFT(R, x, a, LEFT(B, y, b, p)), t)
102 :     (* end case *))
103 :     | fixupZip (RIGHT(B, T(R, a, x, b), y, p), t) = (case b
104 :     of T(_, b1, z, T(R, b21, w, b22)) => (* case 1R ==> case 3R ==> case 4R *)
105 :     (false, zip (p, T(B, a, x, T(R, T(B, b1, z, b21), w, T(B, b22, y, t)))))
106 :     | T(_, T(R, b11, w, b12), z, b2) => (* case 1R ==> case 4R *)
107 :     (false, zip (p, T(B, a, x, T(R, T(B, b11, w, b12), z, T(B, b2, y, t)))))
108 :     | T(_, b1, z, b2) => (* case 1L ==> case 2L; rotate + recolor fixes deficit *)
109 :     (false, zip (p, T(B, a, x, T(B, T(R, b1, z, b2), y, t))))
110 :     | _ => fixupZip (RIGHT(R, b, y, RIGHT(B, a, x, p)), t)
111 :     (* end case *))
112 :     (* case 3 from CLR *)
113 :     | fixupZip (LEFT(color, x, T(B, T(R, a1, y, a2), z, b), p), t) =
114 :     (* case 3L ==> case 4L *)
115 :     (false, zip (p, T(color, T(B, t, x, a1), y, T(B, a2, z, b))))
116 :     | fixupZip (RIGHT(color, T(B, a, x, T(R, b1, y, b2)), z, p), t) =
117 :     (* case 3R ==> case 4R; rotate, recolor, plus rotate fixes deficit *)
118 :     (false, zip (p, T(color, T(B, a, x, b1), y, T(B, b2, z, t))))
119 :     (* case 4 from CLR *)
120 :     | fixupZip (LEFT(color, x, T(B, a, y, T(R, b1, z, b2)), p), t) =
121 :     (false, zip (p, T(color, T(B, t, x, a), y, T(B, b1, z, b2))))
122 :     | fixupZip (RIGHT(color, T(B, T(R, a1, z, a2), x, b), y, p), t) =
123 :     (false, zip (p, T(color, T(B, a1, z, a2), x, T(B, b, y, t))))
124 :     (* case 2 from CLR; note that "a" and "b" are guaranteed to be black, since we did
125 :     * not match cases 3 or 4.
126 :     *)
127 :     | fixupZip (LEFT(R, x, T(B, a, y, b), p), t) =
128 :     (false, zip (p, T(B, t, x, T(R, a, y, b))))
129 :     | fixupZip (LEFT(B, x, T(B, a, y, b), p), t) =
130 :     fixupZip (p, T(B, t, x, T(R, a, y, b)))
131 :     | fixupZip (RIGHT(R, T(B, a, x, b), y, p), t) =
132 :     (false, zip (p, T(B, T(R, a, x, b), y, t)))
133 :     | fixupZip (RIGHT(B, T(B, a, x, b), y, p), t) =
134 :     fixupZip (p, T(B, T(R, a, x, b), y, t))
135 :     (* push deficit up the tree by recoloring a black node as red *)
136 :     | fixupZip (LEFT(_, y, E, p), t) = fixupZip (p, T(R, t, y, E))
137 :     | fixupZip (RIGHT(_, E, y, p), t) = fixupZip (p, T(R, E, y, t))
138 :     (* impossible cases that violate the red invariant *)
139 :     | fixupZip _ = raise Fail "Red invariant violation"
140 :     (* delete the minimum value from a non-empty tree, returning a triple
141 :     * (elem, bd, tr), where elem is the minimum element, tr is the residual
142 :     * tree with elem removed, and bd is true if tr has a black-depth that is
143 :     * less than the original tree.
144 :     *)
145 :     fun delMin (T(R, E, y, b), p) =
146 :     (* replace the node by its right subtree (which must be E) *)
147 :     (y, false, zip(p, b))
148 :     | delMin (T(B, E, y, T(R, a', y', b')), p) =
149 :     (* replace the node with its right child, while recoloring the child black to
150 :     * preserve the black invariant.
151 :     *)
152 :     (y, false, zip (p, T(B, a', y', b')))
153 :     | delMin (T(B, E, y, E), p) = let
154 :     (* delete the node, which reduces the black-depth by one, so we attempt to fix
155 :     * the deficit on the path back.
156 :     *)
157 :     val (blkDeficit, t) = fixupZip (p, E)
158 :     in
159 :     (y, blkDeficit, t)
160 :     end
161 :     | delMin (T(color, a, y, b), z) = delMin(a, LEFT(color, y, b, z))
162 :     | delMin (E, _) = raise Match
163 :     fun del (E, z) = raise LibBase.NotFound
164 :     | del (T(color, a, y, b), p) = (case cmpObj(k, y)
165 :     of LESS => del (a, LEFT(color, y, b, p))
166 :     | EQUAL => (case (color, a, b)
167 :     of (R, E, E) => zip(p, E)
168 :     | (B, E, E) => #2 (fixupZip (p, E))
169 :     | (_, T(_, a', y', b'), E) =>
170 :     (* node is black and left child is red; we replace the node with its
171 :     * left child recolored to black.
172 :     *)
173 :     zip(p, T(B, a', y', b'))
174 :     | (_, E, T(_, a', y', b')) =>
175 :     (* node is black and right child is red; we replace the node with its
176 :     * right child recolored to black.
177 :     *)
178 :     zip(p, T(B, a', y', b'))
179 :     | _ => let
180 :     val (minSucc, blkDeficit, b) = delMin (b, TOP)
181 :     in
182 :     if blkDeficit
183 :     then #2 (fixupZip (RIGHT(color, a, minSucc, p), b))
184 :     else zip (p, T(color, a, minSucc, b))
185 :     end
186 :     (* end case *))
187 :     | GREATER => del (b, RIGHT(color, a, y, p))
188 :     (* end case *))
189 :     in
190 :     case del(t, TOP)
191 :     of T(R, a, x, b) => SET(nItems-1, T(B, a, x, b))
192 :     | t => SET(nItems-1, t)
193 :     (* end case *)
194 :     end
195 :     end (* local *)
196 :    
197 :     (* Return true if and only if item is an element in the set *)
198 :     fun member (SET(_, t), k) = let
199 :     fun find' E = false
200 :     | find' (T(_, a, y, b)) = (case cmpObj(k, y)
201 :     of LESS => find' a
202 :     | EQUAL => true
203 :     | GREATER => find' b
204 :     (* end case *))
205 :     in
206 :     find' t
207 :     end
208 :    
209 :     (* Return the number of items in the map *)
210 :     fun numItems (SET(n, _)) = n
211 :    
212 :     fun fold f = let
213 :     fun foldf (E, accum) = accum
214 :     | foldf (T(_, a, x, b), accum) =
215 :     foldf(b, f(x, foldf(a, accum)))
216 :     in
217 :     fn init => fn (SET(_, m)) => foldf(m, init)
218 :     end
219 :    
220 :     val foldl = fold (* DEPRECATED *)
221 :     val foldr = fold (* DEPRECATED *)
222 :    
223 :     (* return an ordered list of the items in the set. *)
224 :     fun toList s = foldr (fn (x, l) => x::l) [] s
225 :    
226 :     (* functions for walking the tree while keeping a stack of parents
227 :     * to be visited.
228 :     *)
229 :     fun next ((t as T(_, _, _, b))::rest) = (t, left(b, rest))
230 :     | next _ = (E, [])
231 :     and left (E, rest) = rest
232 :     | left (t as T(_, a, _, _), rest) = left(a, t::rest)
233 :     fun start m = left(m, [])
234 :    
235 :     (* Return true if and only if the two sets are equal *)
236 :     fun equal (SET(_, s1), SET(_, s2)) = let
237 :     fun cmp (t1, t2) = (case (next t1, next t2)
238 :     of ((E, _), (E, _)) => true
239 :     | ((E, _), _) => false
240 :     | (_, (E, _)) => false
241 :     | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => (
242 :     case cmpObj(x, y)
243 :     of EQUAL => cmp (r1, r2)
244 :     | _ => false
245 :     (* end case *))
246 :     (* end case *))
247 :     in
248 :     cmp (start s1, start s2)
249 :     end
250 :    
251 :     (* Return the lexical order of two sets *)
252 :     fun compare (SET(_, s1), SET(_, s2)) = let
253 :     fun cmp (t1, t2) = (case (next t1, next t2)
254 :     of ((E, _), (E, _)) => EQUAL
255 :     | ((E, _), _) => LESS
256 :     | (_, (E, _)) => GREATER
257 :     | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => (
258 :     case cmpObj(x, y)
259 :     of EQUAL => cmp (r1, r2)
260 :     | order => order
261 :     (* end case *))
262 :     (* end case *))
263 :     in
264 :     cmp (start s1, start s2)
265 :     end
266 :    
267 :     (* Return true if and only if the first set is a subset of the second *)
268 :     fun isSubset (SET(_, s1), SET(_, s2)) = let
269 :     fun cmp (t1, t2) = (case (next t1, next t2)
270 :     of ((E, _), (E, _)) => true
271 :     | ((E, _), _) => true
272 :     | (_, (E, _)) => false
273 :     | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => (
274 :     case cmpObj(x, y)
275 :     of LESS => false
276 :     | EQUAL => cmp (r1, r2)
277 :     | GREATER => cmp (t1, r2)
278 :     (* end case *))
279 :     (* end case *))
280 :     in
281 :     cmp (start s1, start s2)
282 :     end
283 :    
284 :     (* Return true if the two sets are disjoint *)
285 :     fun disjoint (SET(0, _), _) = true
286 :     | disjoint (_, SET(0, _)) = true
287 :     | disjoint (SET(_, s1), SET(_, s2)) = let
288 :     fun walk ((E, _), _) = true
289 :     | walk (_, (E, _)) = true
290 :     | walk (t1 as (T(_, _, x, _), r1), t2 as (T(_, _, y, _), r2)) = (
291 :     case cmpObj(x, y)
292 :     of LESS => walk (next r1, t2)
293 :     | EQUAL => false
294 :     | GREATER => walk (t1, next r2)
295 :     (* end case *))
296 :     in
297 :     walk (next (start s1), next (start s2))
298 :     end
299 :    
300 :     (* support for constructing red-black trees in linear time from increasing
301 :     * ordered sequences (based on a description by R. Hinze). Note that the
302 :     * elements in the digits are ordered with the largest on the left, whereas
303 :     * the elements of the trees are ordered with the largest on the right.
304 :     *)
305 :     datatype 'a digit
306 :     = ZERO
307 :     | ONE of ('a obj * 'a tree * 'a digit)
308 :     | TWO of ('a obj * 'a tree * 'a obj * 'a tree * 'a digit)
309 :     (* add an item that is guaranteed to be larger than any in l *)
310 :     fun addItem (a, l) = let
311 :     fun incr (a, t, ZERO) = ONE(a, t, ZERO)
312 :     | incr (a1, t1, ONE(a2, t2, r)) = TWO(a1, t1, a2, t2, r)
313 :     | incr (a1, t1, TWO(a2, t2, a3, t3, r)) =
314 :     ONE(a1, t1, incr(a2, T(B, t3, a3, t2), r))
315 :     in
316 :     incr(a, E, l)
317 :     end
318 :     (* link the digits into a tree *)
319 :     fun linkAll t = let
320 :     fun link (t, ZERO) = t
321 :     | link (t1, ONE(a, t2, r)) = link(T(B, t2, a, t1), r)
322 :     | link (t, TWO(a1, t1, a2, t2, r)) =
323 :     link(T(B, T(R, t2, a2, t1), a1, t), r)
324 :     in
325 :     link (E, t)
326 :     end
327 :    
328 :     (* create a set from a list of items; this function works in linear time if the list
329 :     * is in increasing order.
330 :     *)
331 :     fun fromList [] = empty
332 :     | fromList (first::rest) = let
333 :     fun add (prev, x::xs, n, accum) = (case cmpObj(prev, x)
334 :     of LESS => add(x, xs, n+1, addItem(x, accum))
335 :     | _ => (* list not in order, so fall back to addList code *)
336 :     addList(SET(n, linkAll accum), x::xs)
337 :     (* end case *))
338 :     | add (_, [], n, accum) = SET(n, linkAll accum)
339 :     in
340 :     add (first, rest, 1, addItem(first, ZERO))
341 :     end
342 :    
343 :     (* return the union of the two sets *)
344 :     fun union (SET(_, s1), SET(_, s2)) = let
345 :     fun ins ((E, _), n, result) = (n, result)
346 :     | ins ((T(_, _, x, _), r), n, result) =
347 :     ins(next r, n+1, addItem(x, result))
348 :     fun union' (t1, t2, n, result) = (case (next t1, next t2)
349 :     of ((E, _), (E, _)) => (n, result)
350 :     | ((E, _), t2) => ins(t2, n, result)
351 :     | (t1, (E, _)) => ins(t1, n, result)
352 :     | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => (
353 :     case cmpObj(x, y)
354 :     of LESS => union' (r1, t2, n+1, addItem(x, result))
355 :     | EQUAL => union' (r1, r2, n+1, addItem(x, result))
356 :     | GREATER => union' (t1, r2, n+1, addItem(y, result))
357 :     (* end case *))
358 :     (* end case *))
359 :     val (n, result) = union' (start s1, start s2, 0, ZERO)
360 :     in
361 :     SET(n, linkAll result)
362 :     end
363 :    
364 :     (* return the intersection of the two sets *)
365 :     fun intersection (SET(_, s1), SET(_, s2)) = let
366 :     fun intersect (t1, t2, n, result) = (case (next t1, next t2)
367 :     of ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => (
368 :     case cmpObj(x, y)
369 :     of LESS => intersect (r1, t2, n, result)
370 :     | EQUAL => intersect (r1, r2, n+1, addItem(x, result))
371 :     | GREATER => intersect (t1, r2, n, result)
372 :     (* end case *))
373 :     | _ => (n, result)
374 :     (* end case *))
375 :     val (n, result) = intersect (start s1, start s2, 0, ZERO)
376 :     in
377 :     SET(n, linkAll result)
378 :     end
379 :    
380 :     (* return the set difference *)
381 :     fun difference (SET(_, s1), SET(_, s2)) = let
382 :     fun ins ((E, _), n, result) = (n, result)
383 :     | ins ((T(_, _, x, _), r), n, result) =
384 :     ins(next r, n+1, addItem(x, result))
385 :     fun diff (t1, t2, n, result) = (case (next t1, next t2)
386 :     of ((E, _), _) => (n, result)
387 :     | (t1, (E, _)) => ins(t1, n, result)
388 :     | ((T(_, _, x, _), r1), (T(_, _, y, _), r2)) => (
389 :     case cmpObj(x, y)
390 :     of LESS => diff (r1, t2, n+1, addItem(x, result))
391 :     | EQUAL => diff (r1, r2, n, result)
392 :     | GREATER => diff (t1, r2, n, result)
393 :     (* end case *))
394 :     (* end case *))
395 :     val (n, result) = diff (start s1, start s2, 0, ZERO)
396 :     in
397 :     SET(n, linkAll result)
398 :     end
399 :    
400 :     fun subtract (s, item) = difference (s, singleton item)
401 :     fun subtract' (item, s) = subtract (s, item)
402 :    
403 :     fun subtractList (l, items) = let
404 :     val items' = List.foldl (fn (x, set) => add(set, x)) (SET(0, E)) items
405 :     in
406 :     difference (l, items')
407 :     end
408 :    
409 :     fun app f = let
410 :     fun appf E = ()
411 :     | appf (T(_, a, x, b)) = (appf a; f x; appf b)
412 :     in
413 :     fn (SET(_, m)) => appf m
414 :     end
415 :    
416 :     fun map f = let
417 :     fun addf (x, m) = add(m, f x)
418 :     in
419 :     foldl addf empty
420 :     end
421 :    
422 :     fun mapPartial f (SET(_, m)) = let
423 :     fun mapf (E, acc) = acc
424 :     | mapf (T(_, a, x, b), acc) = (case f x
425 :     of SOME y => mapf (b, mapf (a, add (acc, y)))
426 :     | NONE => mapf (b, mapf (a, acc))
427 :     (* end case *))
428 :     in
429 :     mapf (m, empty)
430 :     end
431 :    
432 :     (* Filter out those elements of the set that do not satisfy the
433 :     * predicate. The filtering is done in increasing map order.
434 :     *)
435 :     fun filter pred (SET(_, t)) = let
436 :     fun walk (E, n, result) = (n, result)
437 :     | walk (T(_, a, x, b), n, result) = let
438 :     val (n, result) = walk(a, n, result)
439 :     in
440 :     if (pred x)
441 :     then walk(b, n+1, addItem(x, result))
442 :     else walk(b, n, result)
443 :     end
444 :     val (n, result) = walk (t, 0, ZERO)
445 :     in
446 :     SET(n, linkAll result)
447 :     end
448 :    
449 :     fun partition pred (SET(_, t)) = let
450 :     fun walk (E, n1, result1, n2, result2) = (n1, result1, n2, result2)
451 :     | walk (T(_, a, x, b), n1, result1, n2, result2) = let
452 :     val (n1, result1, n2, result2) = walk(a, n1, result1, n2, result2)
453 :     in
454 :     if (pred x)
455 :     then walk(b, n1+1, addItem(x, result1), n2, result2)
456 :     else walk(b, n1, result1, n2+1, addItem(x, result2))
457 :     end
458 :     val (n1, result1, n2, result2) = walk (t, 0, ZERO, 0, ZERO)
459 :     in
460 :     (SET(n1, linkAll result1), SET(n2, linkAll result2))
461 :     end
462 :    
463 :     fun exists pred = let
464 :     fun test E = false
465 :     | test (T(_, a, x, b)) = test a orelse pred x orelse test b
466 :     in
467 :     fn (SET(_, t)) => test t
468 :     end
469 :    
470 :     fun all pred = let
471 :     fun test E = true
472 :     | test (T(_, a, x, b)) = test a andalso pred x andalso test b
473 :     in
474 :     fn (SET(_, t)) => test t
475 :     end
476 :    
477 :     fun find pred = let
478 :     fun test E = NONE
479 :     | test (T(_, a, x, b)) = (case test a
480 :     of NONE => if pred x then SOME x else test b
481 :     | someItem => someItem
482 :     (* end case *))
483 :     in
484 :     fn (SET(_, t)) => test t
485 :     end
486 :    
487 :     (* deprecated *)
488 :     val listItems = toList
489 :    
490 : jhr 967 end

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0