1 : |
mblume |
1902 |
(* reg-exp-fn.sml
|
2 : |
|
|
*
|
3 : |
|
|
* COPYRIGHT (c) 2005
|
4 : |
|
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
5 : |
|
|
* Aaron Turon (adrassi@gmail.com)
|
6 : |
|
|
* All rights reserved.
|
7 : |
|
|
*
|
8 : |
|
|
* Regular expression representation and manipulation.
|
9 : |
|
|
*
|
10 : |
|
|
* The main points here are to:
|
11 : |
|
|
* (1) make it easy for an RE parser to construct
|
12 : |
|
|
* RE expressions
|
13 : |
|
|
* (2) canonicalize REs for effective comparison
|
14 : |
|
|
* (3) implement the RE derivatives algorithm
|
15 : |
|
|
*
|
16 : |
|
|
* See the implementation notes for details on the derivatives
|
17 : |
|
|
* algorithm and the canonicalization strategy.
|
18 : |
|
|
*)
|
19 : |
|
|
|
20 : |
|
|
structure RegExp : REG_EXP =
|
21 : |
|
|
struct
|
22 : |
|
|
|
23 : |
|
|
(* symbols (i.e., words) *)
|
24 : |
|
|
structure Sym =
|
25 : |
|
|
struct
|
26 : |
|
|
|
27 : |
|
|
structure W32 = Word32
|
28 : |
|
|
type point = W32.word
|
29 : |
|
|
|
30 : |
|
|
val compare = W32.compare
|
31 : |
|
|
val minPt : W32.word = 0w0
|
32 : |
|
|
val maxPt = W32.notb 0w0
|
33 : |
|
|
|
34 : |
|
|
fun succ (w : W32.word) =
|
35 : |
|
|
if w = W32.notb 0w0 then w
|
36 : |
|
|
else w + 0w1
|
37 : |
|
|
fun pred (w : W32.word) =
|
38 : |
|
|
if w = 0w0 then w
|
39 : |
|
|
else w - 0w1
|
40 : |
|
|
|
41 : |
|
|
fun isSucc (w1, w2) = (succ w1 = w2)
|
42 : |
|
|
|
43 : |
|
|
end
|
44 : |
|
|
|
45 : |
|
|
structure SymSet = IntervalSetFn(Sym)
|
46 : |
|
|
|
47 : |
|
|
type symbol = Sym.point
|
48 : |
|
|
type sym_set = SymSet.set
|
49 : |
|
|
|
50 : |
|
|
structure SIS = SymSet
|
51 : |
|
|
|
52 : |
|
|
(* REs *)
|
53 : |
|
|
datatype re
|
54 : |
|
|
= Epsilon (* matches the empty string *)
|
55 : |
|
|
| Any (* matches any single symbol *)
|
56 : |
|
|
| None (* matches nothing (i.e. the empty language) *)
|
57 : |
|
|
| SymSet of sym_set
|
58 : |
|
|
| Concat of re list
|
59 : |
|
|
| Closure of re
|
60 : |
|
|
| Op of (rator * re list) (* list length <> 1 and in sorted order *)
|
61 : |
|
|
| Not of re
|
62 : |
|
|
and rator = OR | AND | XOR
|
63 : |
|
|
|
64 : |
|
|
(* we give a total order to REs; this is useful for canonicalization *)
|
65 : |
|
|
fun compare (re1, re2) = let
|
66 : |
|
|
fun cmpOp (OR, OR) = EQUAL
|
67 : |
|
|
| cmpOp (OR, _) = LESS
|
68 : |
|
|
| cmpOp (_, OR) = GREATER
|
69 : |
|
|
| cmpOp (AND, AND) = EQUAL
|
70 : |
|
|
| cmpOp (AND, _) = LESS
|
71 : |
|
|
| cmpOp (_, AND) = GREATER
|
72 : |
|
|
| cmpOp (XOR, XOR) = EQUAL
|
73 : |
|
|
fun compareList (res1, res2) =
|
74 : |
|
|
List.collate compare (res1, res2)
|
75 : |
|
|
in
|
76 : |
|
|
case (re1, re2)
|
77 : |
|
|
of (Epsilon, Epsilon) => EQUAL
|
78 : |
|
|
| (Epsilon, _) => LESS
|
79 : |
|
|
| (_, Epsilon) => GREATER
|
80 : |
|
|
| (Any, Any) => EQUAL
|
81 : |
|
|
| (Any, _) => LESS
|
82 : |
|
|
| (_, Any) => GREATER
|
83 : |
|
|
| (None, None) => EQUAL
|
84 : |
|
|
| (None, _) => LESS
|
85 : |
|
|
| (_, None) => GREATER
|
86 : |
|
|
| (SymSet a, SymSet b) => SIS.compare(a, b)
|
87 : |
|
|
| (SymSet a, _) => LESS
|
88 : |
|
|
| (_, SymSet b) => GREATER
|
89 : |
|
|
| (Concat a, Concat b) => compareList(a, b)
|
90 : |
|
|
| (Concat a, _) => LESS
|
91 : |
|
|
| (_, Concat b) => GREATER
|
92 : |
|
|
| (Closure a, Closure b) => compare(a, b)
|
93 : |
|
|
| (Closure a, _) => LESS
|
94 : |
|
|
| (_, Closure b) => GREATER
|
95 : |
|
|
| (Op(op1, res1), Op(op2, res2)) => (case cmpOp (op1, op2)
|
96 : |
|
|
of EQUAL => compareList(res1, res2)
|
97 : |
|
|
| order => order
|
98 : |
|
|
(* end case *))
|
99 : |
|
|
| (Op _, _) => LESS
|
100 : |
|
|
| (_, Op _) => GREATER
|
101 : |
|
|
| (Not a, Not b) => compare(a, b)
|
102 : |
|
|
(* end case *)
|
103 : |
|
|
end
|
104 : |
|
|
(* val sort = ListMergeSort.sort (fn (re1, re2) => compare(re1, re2) = LESS) *)
|
105 : |
|
|
|
106 : |
|
|
(* primitive REs *)
|
107 : |
|
|
|
108 : |
|
|
val any = Any
|
109 : |
|
|
val none = None
|
110 : |
|
|
val epsilon = Epsilon
|
111 : |
|
|
|
112 : |
|
|
(* canonical constructors *)
|
113 : |
|
|
|
114 : |
|
|
fun mkSymSet c =
|
115 : |
|
|
if SIS.isEmpty c then None
|
116 : |
|
|
else if SIS.isUniverse c then Any
|
117 : |
|
|
else SymSet c
|
118 : |
|
|
|
119 : |
|
|
fun mkSym sym = mkSymSet (SIS.singleton sym)
|
120 : |
|
|
|
121 : |
|
|
fun mkConcat (re1, re2) = (case (re1, re2)
|
122 : |
|
|
of (Epsilon, re2) => re2
|
123 : |
|
|
| (re1, Epsilon) => re1
|
124 : |
|
|
| (None, _) => None
|
125 : |
|
|
| (_, None) => None
|
126 : |
|
|
| (Concat res1, Concat res2) => Concat(res1@res2)
|
127 : |
|
|
| (re1, Concat res2) => Concat(re1::res2)
|
128 : |
|
|
| (Concat res1, re2) => Concat(res1@[re2])
|
129 : |
|
|
| _ => Concat[re1, re2]
|
130 : |
|
|
(* end case *))
|
131 : |
|
|
|
132 : |
|
|
fun mkConcatList [] = Epsilon
|
133 : |
|
|
| mkConcatList (re::res) = mkConcat(re, mkConcatList res)
|
134 : |
|
|
|
135 : |
|
|
fun mkClosure (Epsilon) = Epsilon
|
136 : |
|
|
| mkClosure (None) = Epsilon
|
137 : |
|
|
| mkClosure (re as Closure _) = re
|
138 : |
|
|
| mkClosure re = Closure re
|
139 : |
|
|
|
140 : |
mblume |
1904 |
fun mergeSIS (inRes, mop) = let
|
141 : |
mblume |
1902 |
fun isSIS (SymSet _) = true
|
142 : |
|
|
| isSIS _ = false
|
143 : |
mblume |
1904 |
val (siss, res) = List.partition isSIS inRes
|
144 : |
mblume |
1902 |
in case siss
|
145 : |
mblume |
1904 |
of [] => inRes
|
146 : |
|
|
| [re] => inRes
|
147 : |
mblume |
1902 |
| sis::siss' => let
|
148 : |
|
|
fun wrapmop (SymSet s1, SymSet s2) =
|
149 : |
|
|
SymSet (mop (s1, s2))
|
150 : |
|
|
| wrapmop _ = raise Fail "BUG: wrapmop: SymSet expected"
|
151 : |
|
|
val merged = List.foldl wrapmop sis siss'
|
152 : |
|
|
fun reinsert (re1, []) = [re1]
|
153 : |
|
|
| reinsert (re1, re::res) = (case compare (re1, re)
|
154 : |
|
|
of LESS => re1::re::res
|
155 : |
|
|
| EQUAL => raise Fail "BUG: mergeSIS: only one SymSet expected"
|
156 : |
|
|
| GREATER => re::(reinsert (re1, res))
|
157 : |
|
|
(* end case *))
|
158 : |
|
|
in reinsert (merged, res)
|
159 : |
|
|
end
|
160 : |
|
|
end
|
161 : |
|
|
|
162 : |
|
|
fun mkOr (re1, re2) = let
|
163 : |
|
|
fun merge ([], res2) = res2
|
164 : |
|
|
| merge (res1, []) = res1
|
165 : |
|
|
| merge (re1::r1, re2::r2) = (case compare(re1, re2)
|
166 : |
|
|
of LESS => re1::merge(r1, re2::r2)
|
167 : |
|
|
| EQUAL => merge (re1::r1, r2)
|
168 : |
|
|
| GREATER => re2 :: merge(re1::r1, r2)
|
169 : |
|
|
(* end case *))
|
170 : |
|
|
fun mk (a, b) = (case mergeSIS(merge(a, b), SIS.union)
|
171 : |
|
|
of [] => None
|
172 : |
|
|
| [re] => re
|
173 : |
|
|
| res => Op(OR, res)
|
174 : |
|
|
(* end case *))
|
175 : |
|
|
in
|
176 : |
|
|
case (re1, re2)
|
177 : |
|
|
of (None, _) => re2
|
178 : |
|
|
| (_, None) => re1
|
179 : |
|
|
| (SymSet s1, SymSet s2) => mkSymSet (SIS.union (s1, s2))
|
180 : |
|
|
| (Op(OR, res1), Op(OR, res2)) => mk(res1, res2)
|
181 : |
|
|
| (Op(OR, res1), _) => mk(res1, [re2])
|
182 : |
|
|
| (_, Op(OR, res2)) => mk([re1], res2)
|
183 : |
|
|
| (re1, re2) => (case compare(re1, re2)
|
184 : |
|
|
of LESS => Op(OR, [re1, re2])
|
185 : |
|
|
| EQUAL => re1
|
186 : |
|
|
| GREATER => Op(OR, [re2, re1])
|
187 : |
|
|
(* end case *))
|
188 : |
|
|
(* end case *)
|
189 : |
|
|
end
|
190 : |
|
|
|
191 : |
|
|
fun mkAnd (re1, re2) = let
|
192 : |
|
|
fun merge ([], res2) = res2
|
193 : |
|
|
| merge (res1, []) = res1
|
194 : |
|
|
| merge (re1::r1, re2::r2) = (case compare(re1, re2)
|
195 : |
|
|
of LESS => re1::merge(r1, re2::r2)
|
196 : |
|
|
| EQUAL => merge (re1::r1, r2)
|
197 : |
|
|
| GREATER => re2 :: merge(re1::r1, r2)
|
198 : |
|
|
(* end case *))
|
199 : |
|
|
fun mk (a, b) = (case mergeSIS(merge(a, b), SIS.intersect)
|
200 : |
|
|
of [] => None
|
201 : |
|
|
| [re] => re
|
202 : |
|
|
| res => Op(AND, res)
|
203 : |
|
|
(* end case *))
|
204 : |
|
|
in
|
205 : |
|
|
case (re1, re2)
|
206 : |
|
|
of (None, _) => None
|
207 : |
|
|
| (_, None) => None
|
208 : |
|
|
| (SymSet s1, SymSet s2) => mkSymSet (SIS.intersect (s1, s2))
|
209 : |
|
|
| (Op(AND, res1), Op(AND, res2)) => mk(res1, res2)
|
210 : |
|
|
| (Op(AND, res1), _) => mk(res1, [re2])
|
211 : |
|
|
| (_, Op(AND, res2)) => mk([re1], res2)
|
212 : |
|
|
| (re1, re2) => (case compare(re1, re2)
|
213 : |
|
|
of LESS => Op(AND, [re1, re2])
|
214 : |
|
|
| EQUAL => re1
|
215 : |
|
|
| GREATER => Op(AND, [re2, re1])
|
216 : |
|
|
(* end case *))
|
217 : |
|
|
(* end case *)
|
218 : |
|
|
end
|
219 : |
|
|
|
220 : |
|
|
fun mkXor (re1, re2) = let
|
221 : |
|
|
fun merge ([], res2) = res2
|
222 : |
|
|
| merge (res1, []) = res1
|
223 : |
|
|
| merge (re1::r1, re2::r2) = (case compare(re1, re2)
|
224 : |
|
|
of LESS => re1::merge(r1, re2::r2)
|
225 : |
|
|
| EQUAL => merge (r1, r2)
|
226 : |
|
|
| GREATER => re2 :: merge(re1::r1, r2)
|
227 : |
|
|
(* end case *))
|
228 : |
|
|
fun mk (a, b) = (case merge(a, b)
|
229 : |
|
|
of [] => None
|
230 : |
|
|
| [re] => re
|
231 : |
|
|
| res => Op(XOR, res)
|
232 : |
|
|
(* end case *))
|
233 : |
|
|
in
|
234 : |
|
|
case (re1, re2)
|
235 : |
|
|
of (None, _) => re2
|
236 : |
|
|
| (_, None) => re1
|
237 : |
|
|
| (SymSet s1, SymSet s2) =>
|
238 : |
|
|
mkSymSet (SIS.intersect (
|
239 : |
|
|
SIS.union (s1, s2),
|
240 : |
|
|
SIS.complement (SIS.intersect (s1, s2))
|
241 : |
|
|
))
|
242 : |
|
|
| (Op(XOR, res1), Op(XOR, res2)) => mk(res1, res2)
|
243 : |
|
|
| (Op(XOR, res1), _) => mk(res1, [re2])
|
244 : |
|
|
| (_, Op(XOR, res2)) => mk([re1], res2)
|
245 : |
|
|
| (re1, re2) => (case compare(re1, re2)
|
246 : |
|
|
of LESS => Op(XOR, [re1, re2])
|
247 : |
|
|
| EQUAL => None (* FIXME is this right? *)
|
248 : |
|
|
| GREATER => Op(XOR, [re2, re1])
|
249 : |
|
|
(* end case *))
|
250 : |
|
|
(* end case *)
|
251 : |
|
|
end
|
252 : |
|
|
|
253 : |
|
|
fun mkOp (OR, re1, re2) = mkOr(re1, re2)
|
254 : |
|
|
| mkOp (AND, re1, re2) = mkAnd(re1, re2)
|
255 : |
|
|
| mkOp (XOR, re1, re2) = mkXor(re1, re2)
|
256 : |
|
|
|
257 : |
|
|
fun mkNot (Not re) = re
|
258 : |
|
|
| mkNot (None) = mkClosure(Any)
|
259 : |
|
|
| mkNot re = Not re
|
260 : |
|
|
|
261 : |
|
|
fun mkOpt re = mkOr(Epsilon, re)
|
262 : |
|
|
|
263 : |
|
|
fun mkRep (re, low, high) = let
|
264 : |
|
|
fun lowReps 0 = Epsilon
|
265 : |
|
|
| lowReps 1 = re
|
266 : |
|
|
| lowReps n = mkConcat (re, lowReps (n-1))
|
267 : |
|
|
fun highReps 0 = Epsilon
|
268 : |
|
|
| highReps 1 = mkOpt re
|
269 : |
|
|
| highReps n = mkConcat (mkOpt re, highReps (n-1))
|
270 : |
|
|
in
|
271 : |
|
|
if high < low then raise Subscript
|
272 : |
|
|
else mkConcat (lowReps low, highReps (high - low))
|
273 : |
|
|
end
|
274 : |
|
|
|
275 : |
|
|
fun mkAtLeast (re, 0) = mkClosure re
|
276 : |
|
|
| mkAtLeast (re, n) = mkConcat (re, mkAtLeast (re, n-1))
|
277 : |
|
|
|
278 : |
|
|
fun isNone None = true
|
279 : |
|
|
| isNone _ = false
|
280 : |
|
|
|
281 : |
|
|
fun symToString w = "#\"" ^ (Char.toString (Char.chr (Word32.toInt w))) ^ "\""
|
282 : |
|
|
handle Overflow => raise Fail "(BUG) RegExp: symToString on a nonascii character"
|
283 : |
|
|
|
284 : |
|
|
fun SISToString s = let
|
285 : |
|
|
fun c2s c =
|
286 : |
|
|
if (c < 0w128) then
|
287 : |
|
|
Char.toString (Char.chr (Word32.toInt c))
|
288 : |
|
|
else
|
289 : |
|
|
String.concat ["\\u", Word32.toString c]
|
290 : |
|
|
fun f (a, b) =
|
291 : |
|
|
if a=b then c2s a
|
292 : |
|
|
else concat[c2s a, "-", c2s b]
|
293 : |
|
|
(* we want to describe the interval set as concisely as possible,
|
294 : |
|
|
* so we compare the number of intervals in the set to the number
|
295 : |
|
|
* of intervals in its complement, and use the smaller of the two.
|
296 : |
|
|
*)
|
297 : |
|
|
val intervals = SIS.intervals s
|
298 : |
|
|
val intervals' = SIS.intervals (SIS.complement s)
|
299 : |
|
|
val (neg, rngs) =
|
300 : |
|
|
if List.length intervals < List.length intervals'
|
301 : |
|
|
then ("", intervals)
|
302 : |
|
|
else ("^", intervals')
|
303 : |
|
|
val str = neg ^ (String.concat (List.map f (rngs)))
|
304 : |
|
|
in
|
305 : |
|
|
if String.size str <= 1
|
306 : |
|
|
then str
|
307 : |
|
|
else "[" ^ str ^ "]"
|
308 : |
|
|
end
|
309 : |
|
|
|
310 : |
|
|
fun toString re = let
|
311 : |
|
|
fun opToString OR = "|"
|
312 : |
|
|
| opToString AND = "&"
|
313 : |
|
|
| opToString XOR = "^"
|
314 : |
|
|
fun opPrec OR = 0
|
315 : |
|
|
| opPrec AND = 2
|
316 : |
|
|
| opPrec XOR = 1
|
317 : |
|
|
fun prec Any = 6
|
318 : |
|
|
| prec None = 6
|
319 : |
|
|
| prec Epsilon = 6
|
320 : |
|
|
| prec (SymSet _) = 6
|
321 : |
|
|
| prec (Concat[]) = 6
|
322 : |
|
|
| prec (Concat _) = 3
|
323 : |
|
|
| prec (Closure _) = 5
|
324 : |
|
|
| prec (Op(_, [])) = 6
|
325 : |
|
|
| prec (Op(_, [re])) = prec re
|
326 : |
|
|
| prec (Op(rator, _)) = opPrec rator
|
327 : |
|
|
| prec (Not _) = 4
|
328 : |
|
|
fun toS (Any, l) = "{any}" :: l
|
329 : |
|
|
| toS (None, l) = "{none}" :: l
|
330 : |
|
|
| toS (Epsilon, l) = "{epsilon}" :: l
|
331 : |
|
|
| toS (SymSet s, l) = SISToString s :: l
|
332 : |
|
|
| toS (Concat[], l) = "" :: l
|
333 : |
|
|
| toS (Concat[re], l) = toS(re, l)
|
334 : |
|
|
| toS (Concat res, l) = toS'(res, 3, "", l)
|
335 : |
|
|
| toS (Closure re, l) = paren(5, re, "*" :: l)
|
336 : |
|
|
| toS (Op(_, []), l) = "{}" :: l
|
337 : |
|
|
| toS (Op(rator, [re]), l) = toS(re, l)
|
338 : |
|
|
| toS (Op(rator, res), l) = toS'(res, opPrec rator, opToString rator, l)
|
339 : |
|
|
| toS (Not re, l) = "!" :: paren(4, re, l)
|
340 : |
|
|
and toS' ([], p, rator, l) = raise Fail "empty"
|
341 : |
|
|
| toS' (re::r, p, rator, l) =
|
342 : |
|
|
paren(p, re, List.foldr
|
343 : |
|
|
(fn (re, l) => rator :: paren(p, re, l))
|
344 : |
|
|
l r)
|
345 : |
|
|
and paren (p, re, l) = if (p <= prec re)
|
346 : |
|
|
then toS (re, l)
|
347 : |
|
|
else "(" :: toS(re, ")" :: l)
|
348 : |
|
|
in
|
349 : |
|
|
String.concat(toS(re, []))
|
350 : |
|
|
end
|
351 : |
|
|
|
352 : |
|
|
(* true iff epsilon is in the language recognized by the RE *)
|
353 : |
|
|
fun nullable Any = false
|
354 : |
|
|
| nullable None = false
|
355 : |
|
|
| nullable Epsilon = true
|
356 : |
|
|
| nullable (SymSet _) = false
|
357 : |
|
|
| nullable (Closure _) = true
|
358 : |
|
|
| nullable (Concat res) = List.all nullable res
|
359 : |
|
|
| nullable (Op(OR, res)) = List.exists nullable res
|
360 : |
|
|
| nullable (Op(AND, res)) = List.all nullable res
|
361 : |
|
|
| nullable (Op(XOR, re::r)) =
|
362 : |
|
|
(nullable re andalso not(List.exists nullable r))
|
363 : |
|
|
orelse nullable(Op(XOR, r))
|
364 : |
|
|
| nullable (Op(XOR, [])) = raise Fail "(BUG) RegExp: RE operator has no operands"
|
365 : |
|
|
| nullable (Not re) = not(nullable re)
|
366 : |
|
|
|
367 : |
|
|
fun delta re = if (nullable re) then Epsilon else None
|
368 : |
|
|
|
369 : |
|
|
(* compute derivative w.r.t. a symbol *)
|
370 : |
|
|
fun derivative a = let
|
371 : |
|
|
fun da Any = Epsilon
|
372 : |
|
|
| da None = None
|
373 : |
|
|
| da Epsilon = None
|
374 : |
|
|
| da (SymSet s) = if SIS.member(s, a) then Epsilon else None
|
375 : |
|
|
| da (re as Closure re') = mkConcat(da re', re)
|
376 : |
|
|
| da (Concat[]) = None
|
377 : |
|
|
| da (Concat[re]) = da re
|
378 : |
|
|
| da (Concat(re::res)) =
|
379 : |
|
|
mkOr(
|
380 : |
|
|
mkConcatList((da re)::res),
|
381 : |
|
|
mkConcat(delta re, da(Concat res)))
|
382 : |
|
|
| da (Op(_, [])) = raise Fail "(BUG) RegExp: RE operator has no operands"
|
383 : |
|
|
| da (Op(rator, [re])) = da re
|
384 : |
|
|
| da (Op(rator, re::res)) = mkOp(rator, da re, da(Op(rator, res)))
|
385 : |
|
|
| da (Not re) = mkNot(da re)
|
386 : |
|
|
in
|
387 : |
|
|
da
|
388 : |
|
|
end
|
389 : |
|
|
|
390 : |
|
|
structure Map = RedBlackMapFn (
|
391 : |
|
|
struct
|
392 : |
|
|
type ord_key = re Vector.vector
|
393 : |
|
|
val compare = Vector.collate compare
|
394 : |
|
|
end)
|
395 : |
|
|
|
396 : |
|
|
(* yields the smallest partitioning of the alphabet that
|
397 : |
|
|
* "respects" the given sets. if S is one of the sets
|
398 : |
|
|
* returned by compress, then it must be either disjoint
|
399 : |
|
|
* with or a subset of each of the sets in the sets
|
400 : |
|
|
* parameter. see the implementation notes for more detail.
|
401 : |
|
|
*)
|
402 : |
|
|
fun compress sets = let
|
403 : |
|
|
(* performs partition of a set againt a list of sets,
|
404 : |
|
|
* assuming the list of sets is pairwise disjoint.
|
405 : |
|
|
*)
|
406 : |
|
|
fun part1 (set, []) =
|
407 : |
|
|
if SIS.isEmpty set then []
|
408 : |
|
|
else [set]
|
409 : |
|
|
| part1 (set1, set2 :: ss) =
|
410 : |
|
|
if SIS.isEmpty set1 then
|
411 : |
|
|
set2 :: ss
|
412 : |
|
|
else let
|
413 : |
|
|
val i = SIS.intersect (set1, set2)
|
414 : |
|
|
in if SIS.isEmpty i then
|
415 : |
|
|
(set2 :: (part1 (set1, ss)))
|
416 : |
|
|
else let
|
417 : |
|
|
val s1 = SIS.difference (set1, i)
|
418 : |
|
|
val s2 = SIS.difference (set2, i)
|
419 : |
|
|
val ss' = if SIS.isEmpty s1 then ss
|
420 : |
|
|
else part1 (s1, ss)
|
421 : |
|
|
in if SIS.isEmpty s2 then
|
422 : |
|
|
(i :: ss')
|
423 : |
|
|
else
|
424 : |
|
|
(i :: s2 :: ss')
|
425 : |
|
|
end
|
426 : |
|
|
end
|
427 : |
|
|
in
|
428 : |
|
|
List.foldl part1 [] (SIS.universe::sets)
|
429 : |
|
|
end
|
430 : |
|
|
|
431 : |
|
|
fun derivatives (res : re Vector.vector) = let
|
432 : |
|
|
(* ds is the "factoring function" *)
|
433 : |
|
|
fun ds Any = [SIS.universe]
|
434 : |
|
|
| ds None = []
|
435 : |
|
|
| ds Epsilon = []
|
436 : |
|
|
| ds (SymSet s) = [s]
|
437 : |
|
|
| ds (Closure re) = ds re
|
438 : |
|
|
| ds (Concat []) = []
|
439 : |
|
|
| ds (Concat [re]) = ds re
|
440 : |
|
|
| ds (Concat (re::res)) =
|
441 : |
|
|
if nullable re then
|
442 : |
|
|
(ds re) @ (ds (Concat res))
|
443 : |
|
|
else ds re
|
444 : |
|
|
| ds (Op(rator, res)) = List.concat (map ds res)
|
445 : |
|
|
| ds (Not re) = ds re
|
446 : |
|
|
val sets = Vector.foldl
|
447 : |
|
|
(fn (re, sets) => (ds re) @ sets)
|
448 : |
|
|
[] res
|
449 : |
|
|
val sets' = compress sets
|
450 : |
|
|
fun classes ([], classMap) = Map.listItemsi classMap
|
451 : |
|
|
| classes (set::sets, classMap) = let
|
452 : |
|
|
(* use first element as representative of the equiv class *)
|
453 : |
|
|
val (rep, _) = List.hd (SIS.intervals set)
|
454 : |
|
|
val derivs = Vector.map (derivative rep) res
|
455 : |
|
|
in case Map.find (classMap, derivs)
|
456 : |
|
|
of NONE =>
|
457 : |
|
|
classes (sets, Map.insert(classMap, derivs, set))
|
458 : |
|
|
| SOME set' => let
|
459 : |
|
|
val map' = Map.insert(classMap,
|
460 : |
|
|
derivs,
|
461 : |
|
|
SIS.union (set, set'))
|
462 : |
|
|
in classes (sets, map')
|
463 : |
|
|
end
|
464 : |
|
|
end
|
465 : |
|
|
in
|
466 : |
|
|
classes (sets', Map.empty)
|
467 : |
|
|
end
|
468 : |
|
|
|
469 : |
|
|
end
|
470 : |
mblume |
1904 |
|
471 : |
|
|
|