SCM Repository
Annotation of /sml/trunk/src/smlnj-lib/RegExp/BackEnd/fsm.sml
Parent Directory
|
Revision Log
Revision 104 - (view) (download)
1 : | monnier | 104 | (* fsm.sml |
2 : | * | ||
3 : | * COPYRIGHT (c) 1998 Bell Labs, Lucent Technologies. | ||
4 : | * | ||
5 : | * Non-deterministic and deterministic finite-state machines. | ||
6 : | *) | ||
7 : | |||
8 : | |||
9 : | signature NFA = | ||
10 : | sig | ||
11 : | |||
12 : | exception SyntaxNotHandled | ||
13 : | |||
14 : | structure IntSet : ORD_SET where type Key.ord_key = int | ||
15 : | |||
16 : | type nfa | ||
17 : | |||
18 : | val build : RegExpSyntax.syntax * int -> nfa | ||
19 : | val buildPattern : RegExpSyntax.syntax list -> nfa | ||
20 : | val start : nfa -> IntSet.set | ||
21 : | val move : nfa -> int * char -> IntSet.set | ||
22 : | val chars : nfa -> int -> char list | ||
23 : | val accepting : nfa -> int -> int option | ||
24 : | |||
25 : | val print : nfa -> unit | ||
26 : | end | ||
27 : | |||
28 : | structure Nfa : NFA = | ||
29 : | struct | ||
30 : | |||
31 : | exception SyntaxNotHandled | ||
32 : | |||
33 : | datatype move = Move of int * char option * int | ||
34 : | |||
35 : | fun compareCharOption (NONE,NONE) = EQUAL | ||
36 : | | compareCharOption (NONE,SOME (c)) = LESS | ||
37 : | | compareCharOption (SOME(c),NONE) = GREATER | ||
38 : | | compareCharOption (SOME(c),SOME(c')) = Char.compare (c,c') | ||
39 : | |||
40 : | structure S = RegExpSyntax | ||
41 : | structure IntSet = | ||
42 : | ListSetFn (struct | ||
43 : | type ord_key = int | ||
44 : | val compare = Int.compare | ||
45 : | end) | ||
46 : | structure Int2Set = | ||
47 : | ListSetFn (struct | ||
48 : | type ord_key = int * int | ||
49 : | fun compare ((i1,i2),(j1,j2)) = | ||
50 : | case (Int.compare (i1,j1)) | ||
51 : | of EQUAL => Int.compare (i2,j2) | ||
52 : | | v => v | ||
53 : | end) | ||
54 : | structure MoveSet = | ||
55 : | ListSetFn (struct | ||
56 : | type ord_key = move | ||
57 : | fun compare (Move (i,c,j),Move (i',c',j')) = | ||
58 : | (case (Int.compare (i,i')) | ||
59 : | of EQUAL => | ||
60 : | (case (compareCharOption (c,c')) | ||
61 : | of EQUAL => Int.compare (j,j') | ||
62 : | | v => v) | ||
63 : | | v => v) | ||
64 : | end) | ||
65 : | structure CharSet = | ||
66 : | ListSetFn (struct | ||
67 : | type ord_key = char | ||
68 : | val compare = Char.compare | ||
69 : | end) | ||
70 : | |||
71 : | structure I = IntSet | ||
72 : | structure I2 = Int2Set | ||
73 : | structure M = MoveSet | ||
74 : | structure C = CharSet | ||
75 : | |||
76 : | (* create sets from lists *) | ||
77 : | fun iList l = I.addList (I.empty,l) | ||
78 : | fun mList l = M.addList (M.empty,l) | ||
79 : | |||
80 : | datatype nfa = Nfa of {states : I.set, | ||
81 : | moves : M.set, | ||
82 : | accepting : I2.set} | ||
83 : | |||
84 : | fun print (Nfa {states,moves,accepting}) = | ||
85 : | let val pr = TextIO.print | ||
86 : | val prI = TextIO.print o Int.toString | ||
87 : | val prI2 = TextIO.print o (fn (i1,i2) => (Int.toString i1)) | ||
88 : | val prC = TextIO.print o Char.toString | ||
89 : | in | ||
90 : | pr ("States: 0 -> "); | ||
91 : | prI (I.numItems (states)-1); | ||
92 : | pr "\nAccepting:"; | ||
93 : | I2.app (fn k => (pr " "; prI2 k)) accepting; | ||
94 : | pr "\nMoves\n"; | ||
95 : | M.app (fn (Move (i,NONE,d)) => (pr " "; | ||
96 : | prI i; | ||
97 : | pr " --@--> "; | ||
98 : | prI d; | ||
99 : | pr "\n") | ||
100 : | | (Move (i,SOME c,d)) => (pr " "; | ||
101 : | prI i; | ||
102 : | pr " --"; | ||
103 : | prC c; | ||
104 : | pr "--> "; | ||
105 : | prI d; | ||
106 : | pr "\n")) moves | ||
107 : | end | ||
108 : | |||
109 : | fun nullAccept n = Nfa {states=iList [0,1], moves=M.add (M.empty, Move (0,NONE,1)), | ||
110 : | accepting=I2.singleton (1,n)} | ||
111 : | fun nullRefuse n = Nfa {states=iList [0,1], moves=M.empty, | ||
112 : | accepting=I2.singleton (1,n)} | ||
113 : | |||
114 : | fun renumber n st = n + st | ||
115 : | fun renumberMove n (Move (s,c,s')) = Move (renumber n s, c, renumber n s') | ||
116 : | fun renumberAcc n (st,n') = (n+st,n') | ||
117 : | |||
118 : | fun build' n (S.Group e) = build' n e | ||
119 : | | build' n (S.Alt l) = | ||
120 : | foldr (fn (Nfa {states=s1, | ||
121 : | moves=m1,...}, | ||
122 : | Nfa {states=s2, | ||
123 : | moves=m2,...}) => | ||
124 : | let val k1 = I.numItems s1 | ||
125 : | val k2 = I.numItems s2 | ||
126 : | val s1' = I.map (renumber 1) s1 | ||
127 : | val s2' = I.map (renumber (k1+1)) s2 | ||
128 : | val m1' = M.map (renumberMove 1) m1 | ||
129 : | val m2' = M.map (renumberMove (k1+1)) m2 | ||
130 : | in | ||
131 : | Nfa {states=I.addList (I.union (s1',s2'), | ||
132 : | [0,k1+k2+1]), | ||
133 : | moves=M.addList (M.union (m1',m2'), | ||
134 : | [Move (0,NONE,1), | ||
135 : | Move (0,NONE,k1+1), | ||
136 : | Move (k1,NONE,k1+k2+1), | ||
137 : | Move (k1+k2,NONE,k1+k2+1)]), | ||
138 : | accepting=I2.singleton (k1+k2+1,n)} | ||
139 : | end) | ||
140 : | (nullRefuse n) (map (build' n) l) | ||
141 : | | build' n (S.Concat l) = | ||
142 : | foldr (fn (Nfa {states=s1,moves=m1,...}, | ||
143 : | Nfa {states=s2,moves=m2,accepting}) => | ||
144 : | let val k = I.numItems s1 - 1 | ||
145 : | val s2' = I.map (renumber k) s2 | ||
146 : | val m2' = M.map (renumberMove k) m2 | ||
147 : | val accepting' = I2.map (renumberAcc k) accepting | ||
148 : | in | ||
149 : | Nfa {states=I.union (s1,s2'), | ||
150 : | moves=M.union (m1,m2'), | ||
151 : | accepting=accepting'} | ||
152 : | end) | ||
153 : | (nullAccept n) (map (build' n) l) | ||
154 : | | build' n (S.Interval (e,n1,n2)) = raise SyntaxNotHandled | ||
155 : | | build' n (S.Option e) = build' n (S.Alt [S.Concat [], e]) | ||
156 : | | build' n (S.Plus e) = | ||
157 : | let val (Nfa {states,moves,...}) = build' n e | ||
158 : | val m = I.numItems states | ||
159 : | in | ||
160 : | Nfa {states=I.add (states,m), | ||
161 : | moves=M.addList (moves, [Move (m-1,NONE,m), | ||
162 : | Move (m-1,NONE,0)]), | ||
163 : | accepting=I2.singleton (m,n)} | ||
164 : | end | ||
165 : | | build' n (S.Star e) = build' n (S.Alt [S.Concat [], S.Plus e]) | ||
166 : | | build' n (S.MatchSet s) = | ||
167 : | if (S.CharSet.isEmpty s) then nullAccept (n) | ||
168 : | else | ||
169 : | let val moves = S.CharSet.foldl (fn (c,moveSet) => M.add (moveSet,Move (0,SOME c,1))) | ||
170 : | M.empty s | ||
171 : | in | ||
172 : | Nfa {states=iList [0,1], | ||
173 : | moves=moves, | ||
174 : | accepting=I2.singleton (1,n)} | ||
175 : | end | ||
176 : | | build' n (S.NonmatchSet s) = | ||
177 : | let val moves = S.CharSet.foldl (fn (c,moveSet) => M.add (moveSet,Move (0,SOME c,1))) | ||
178 : | M.empty (S.CharSet.difference (S.allChars,s)) | ||
179 : | in | ||
180 : | Nfa {states=iList [0,1], | ||
181 : | moves=moves, | ||
182 : | accepting=I2.singleton (1,n)} | ||
183 : | end | ||
184 : | | build' n (S.Char c) = Nfa {states=iList [0,1], | ||
185 : | moves=M.singleton (Move (0,SOME c,1)), | ||
186 : | accepting=I2.singleton (1,n)} | ||
187 : | | build' n (S.Begin) = raise SyntaxNotHandled | ||
188 : | | build' n (S.End) = raise SyntaxNotHandled | ||
189 : | |||
190 : | |||
191 : | fun build (r,n) = let val (Nfa {states,moves,accepting}) = build' n r | ||
192 : | (* Clean up the nfa to remove epsilon moves. | ||
193 : | * A simple way to do this: | ||
194 : | * 1. states={0}, moves={} | ||
195 : | * 2. for every s in states, | ||
196 : | * 3. compute closure(s) | ||
197 : | * 4. for any move (i,c,o) with i in closure (s) | ||
198 : | * 5. add move (0,c,o) to moves | ||
199 : | * 6. add state o to states | ||
200 : | * 7. repeat until no modifications to states and moves | ||
201 : | *) | ||
202 : | in | ||
203 : | Nfa {states=states, moves=moves, accepting=accepting} | ||
204 : | end | ||
205 : | |||
206 : | fun buildPattern rs = | ||
207 : | let fun loop ([],_) = [] | ||
208 : | | loop (r::rs,n) = (build (r,n))::(loop (rs,n+1)) | ||
209 : | val rs' = loop (rs,0) | ||
210 : | val renums = foldr (fn (Nfa {states,...},acc) => 1::(map (fn k=>k+I.numItems states) | ||
211 : | acc)) [] rs' | ||
212 : | val news = ListPair.map (fn (Nfa {states,moves,accepting},renum) => | ||
213 : | let val newStates=I.map (renumber renum) states | ||
214 : | val newMoves=M.map (renumberMove renum) moves | ||
215 : | val newAcc=I2.map (renumberAcc renum) accepting | ||
216 : | in | ||
217 : | Nfa{states=newStates, | ||
218 : | moves=newMoves, | ||
219 : | accepting=newAcc} | ||
220 : | end) (rs',renums) | ||
221 : | val (states,moves,accepting) = foldl (fn (Nfa{states,moves,accepting},(accS,accM,accA))=> | ||
222 : | (I.union (states,accS), | ||
223 : | M.union (moves,accM), | ||
224 : | I2.union (accepting,accA))) | ||
225 : | (I.singleton 0, | ||
226 : | M.addList (M.empty, | ||
227 : | map (fn k => Move (0,NONE,k)) renums), | ||
228 : | I2.empty) news | ||
229 : | in | ||
230 : | Nfa {states=states,moves=moves,accepting=accepting} | ||
231 : | |||
232 : | end | ||
233 : | |||
234 : | fun accepting (Nfa {accepting,...}) state = | ||
235 : | let val item = I2.find (fn (i,_) => (i=state)) accepting | ||
236 : | in | ||
237 : | case item | ||
238 : | of NONE => NONE | ||
239 : | | SOME (s,n) => SOME (n) | ||
240 : | end | ||
241 : | |||
242 : | (* Compute possible next states from orig with character c *) | ||
243 : | fun oneMove (Nfa {moves,...}) (orig,char) = | ||
244 : | M.foldr (fn (Move (_,NONE,_),set) => set | ||
245 : | | (Move (or,SOME c,d),set) => | ||
246 : | if (c=char) andalso (or=orig) | ||
247 : | then I.add (set,d) | ||
248 : | else set) | ||
249 : | I.empty moves | ||
250 : | |||
251 : | fun closure (Nfa {moves,...}) origSet = | ||
252 : | let fun addState (Move (orig,NONE,dest),(b,states)) = | ||
253 : | if (I.member (states,orig) andalso | ||
254 : | not (I.member (states,dest))) | ||
255 : | then (true,I.add (states,dest)) | ||
256 : | else (b,states) | ||
257 : | | addState (_,bs) = bs | ||
258 : | fun loop (states) = | ||
259 : | let val (modified,new) = M.foldr addState | ||
260 : | (false,states) moves | ||
261 : | in | ||
262 : | if modified | ||
263 : | then loop (new) | ||
264 : | else new | ||
265 : | end | ||
266 : | in | ||
267 : | loop (origSet) | ||
268 : | end | ||
269 : | |||
270 : | fun move nfa = | ||
271 : | let val closure = closure nfa | ||
272 : | val oneMove = oneMove nfa | ||
273 : | in | ||
274 : | closure o oneMove | ||
275 : | end | ||
276 : | |||
277 : | fun start nfa = closure nfa (I.singleton 0) | ||
278 : | |||
279 : | fun chars (Nfa {moves,...}) state = | ||
280 : | C.listItems | ||
281 : | (foldl (fn (Move (s1,SOME c,s2),s) => if (s1=state) | ||
282 : | then C.add (s,c) | ||
283 : | else s | ||
284 : | | (_,s) => s) | ||
285 : | C.empty moves) | ||
286 : | |||
287 : | end | ||
288 : | |||
289 : | signature DFA = | ||
290 : | sig | ||
291 : | |||
292 : | exception SyntaxNotHandled | ||
293 : | |||
294 : | type dfa | ||
295 : | |||
296 : | val build : RegExpSyntax.syntax -> dfa | ||
297 : | val buildPattern : RegExpSyntax.syntax list -> dfa | ||
298 : | val move : dfa -> int * char -> int option | ||
299 : | val accepting : dfa -> int -> int option | ||
300 : | val canStart : dfa -> char -> bool | ||
301 : | |||
302 : | end | ||
303 : | |||
304 : | structure Dfa : DFA = | ||
305 : | struct | ||
306 : | |||
307 : | exception SyntaxNotHandled | ||
308 : | |||
309 : | datatype move = Move of int * char option * int | ||
310 : | |||
311 : | fun compareCharOption (NONE,NONE) = EQUAL | ||
312 : | | compareCharOption (NONE,SOME (c)) = LESS | ||
313 : | | compareCharOption (SOME(c),NONE) = GREATER | ||
314 : | | compareCharOption (SOME(c),SOME(c')) = Char.compare (c,c') | ||
315 : | |||
316 : | structure N = Nfa | ||
317 : | structure IntSet = N.IntSet | ||
318 : | structure IntSetSet = | ||
319 : | ListSetFn (struct | ||
320 : | type ord_key = IntSet.set | ||
321 : | val compare = IntSet.compare | ||
322 : | end) | ||
323 : | structure Int2Set = | ||
324 : | ListSetFn (struct | ||
325 : | type ord_key = int * int | ||
326 : | fun compare ((i1,i2),(j1,j2)) = | ||
327 : | case (Int.compare (i1,j1)) | ||
328 : | of EQUAL => Int.compare (i2,j2) | ||
329 : | | v => v | ||
330 : | end) | ||
331 : | structure MoveSet = | ||
332 : | ListSetFn (struct | ||
333 : | type ord_key = move | ||
334 : | fun compare (Move (i,c,j),Move (i',c',j')) = | ||
335 : | (case (Int.compare (i,i')) | ||
336 : | of EQUAL => | ||
337 : | (case (compareCharOption (c,c')) | ||
338 : | of EQUAL => Int.compare (j,j') | ||
339 : | | v => v) | ||
340 : | | v => v) | ||
341 : | end) | ||
342 : | structure CharSet = | ||
343 : | ListSetFn (struct | ||
344 : | type ord_key = char | ||
345 : | val compare = Char.compare | ||
346 : | end) | ||
347 : | |||
348 : | structure IS = IntSetSet | ||
349 : | structure I = IntSet | ||
350 : | structure I2 = Int2Set | ||
351 : | structure M = MoveSet | ||
352 : | structure C = CharSet | ||
353 : | structure A2 = Array2 | ||
354 : | structure A = Array | ||
355 : | structure Map = ListMapFn (struct | ||
356 : | type ord_key = IntSet.set | ||
357 : | val compare = IntSet.compare | ||
358 : | end) | ||
359 : | |||
360 : | (* create sets from lists *) | ||
361 : | fun iList l = I.addList (I.empty,l) | ||
362 : | fun mList l = M.addList (M.empty,l) | ||
363 : | |||
364 : | datatype dfa = Dfa of {states : I.set, | ||
365 : | moves : M.set, | ||
366 : | accepting : I2.set, | ||
367 : | table : int option A2.array, | ||
368 : | accTable : (int option) A.array, | ||
369 : | startTable : bool A.array} | ||
370 : | |||
371 : | fun print (Dfa {states,moves,accepting,...}) = | ||
372 : | let val pr = TextIO.print | ||
373 : | val prI = TextIO.print o Int.toString | ||
374 : | val prI2 = TextIO.print o (fn (i1,i2) => Int.toString i1) | ||
375 : | val prC = TextIO.print o Char.toString | ||
376 : | in | ||
377 : | pr ("States: 0 -> "); | ||
378 : | prI (I.numItems (states)-1); | ||
379 : | pr "\nAccepting:"; | ||
380 : | I2.app (fn k => (pr " "; prI2 k)) accepting; | ||
381 : | pr "\nMoves\n"; | ||
382 : | M.app (fn (Move (i,NONE,d)) => (pr " "; | ||
383 : | prI i; | ||
384 : | pr " --@--> "; | ||
385 : | prI d; | ||
386 : | pr "\n") | ||
387 : | | (Move (i,SOME c,d)) => (pr " "; | ||
388 : | prI i; | ||
389 : | pr " --"; | ||
390 : | prC c; | ||
391 : | pr "--> "; | ||
392 : | prI d; | ||
393 : | pr "\n")) moves | ||
394 : | end | ||
395 : | |||
396 : | |||
397 : | fun move' moves (i,c) = | ||
398 : | (case (M.find (fn (Move (s1,SOME c',s2)) => | ||
399 : | (s1=i andalso c=c')) | ||
400 : | moves) | ||
401 : | of NONE => NONE | ||
402 : | | SOME (Move (s1,SOME c',s2)) => SOME s2) | ||
403 : | (* fun move (Dfa {moves,...}) (i,c) = move' moves (i,c) *) | ||
404 : | fun move (Dfa {table,...}) (i,c) = A2.sub (table,i,ord(c)-ord(Char.minChar)) | ||
405 : | |||
406 : | fun accepting' accepting i = I2.foldr (fn ((s,n),NONE) => if (s=i) | ||
407 : | then SOME(n) | ||
408 : | else NONE | ||
409 : | | ((s,n),SOME(n')) => if (s=i) | ||
410 : | then SOME(n) | ||
411 : | else SOME(n')) | ||
412 : | NONE accepting | ||
413 : | (* fun accepting (Dfa {accepting,...}) i = accepting' accepting i *) | ||
414 : | fun accepting (Dfa {accTable,...}) i = A.sub (accTable,i) | ||
415 : | |||
416 : | fun canStart (Dfa {startTable,...}) c = A.sub (startTable,ord(c)) | ||
417 : | |||
418 : | fun build' nfa = | ||
419 : | let val move = N.move nfa | ||
420 : | val accepting = N.accepting nfa | ||
421 : | val start = N.start nfa | ||
422 : | val chars = N.chars nfa | ||
423 : | fun getAllChars (ps) = | ||
424 : | I.foldl | ||
425 : | (fn (s,cs) => C.addList (cs,chars s)) | ||
426 : | C.empty ps | ||
427 : | val initChars = getAllChars (start) | ||
428 : | fun getAllStates (ps,c) = | ||
429 : | I.foldl | ||
430 : | (fn (s,ss) => I.union (ss,move (s,c))) | ||
431 : | I.empty ps | ||
432 : | fun loop ([],set,moves) = (set,moves) | ||
433 : | | loop (x::xs,set,moves) = | ||
434 : | let val cl = getAllChars (x) | ||
435 : | val (nstack,sdu,ml) = | ||
436 : | C.foldl | ||
437 : | (fn (c,(ns,sd,ml)) => | ||
438 : | let val u = getAllStates (x,c) | ||
439 : | in | ||
440 : | if (not (IS.member (set,u)) | ||
441 : | andalso (not (IS.member (sd,u)))) | ||
442 : | then (u::ns, | ||
443 : | IS.add (sd,u), | ||
444 : | (x,c,u)::ml) | ||
445 : | else (ns,sd,(x,c,u)::ml) | ||
446 : | end) ([],IS.empty,[]) cl | ||
447 : | in | ||
448 : | loop (nstack@xs,IS.union(set,sdu),ml@moves) | ||
449 : | end | ||
450 : | val (sSet,mList) = loop ([start],IS.singleton (start), []) | ||
451 : | val num = ref 1 | ||
452 : | fun new () = let val n = !num | ||
453 : | in | ||
454 : | num := n+1 ; n | ||
455 : | end | ||
456 : | val sMap = Map.insert (Map.empty, start, 0) | ||
457 : | val sSet' = IS.delete (sSet,start) | ||
458 : | val sMap = IS.foldl (fn (is,map) => Map.insert (map,is,new ())) | ||
459 : | sMap sSet' | ||
460 : | val states = I.addList (I.empty,List.tabulate(!num,fn x => x)) | ||
461 : | val moves = M.addList (M.empty, | ||
462 : | map (fn (is1,c,is2) => | ||
463 : | Move (valOf (Map.find (sMap,is1)), | ||
464 : | SOME c, | ||
465 : | valOf (Map.find (sMap,is2)))) | ||
466 : | mList) | ||
467 : | (* Given a set of accepting states, look for a given state, | ||
468 : | * with the minimal corresponding pattern number | ||
469 : | *) | ||
470 : | fun minPattern accSet = let val l = map (valOf o accepting) (I.listItems accSet) | ||
471 : | fun loop ([],min) = min | ||
472 : | | loop (n::ns,min) = | ||
473 : | if (n<min) then loop (ns,n) | ||
474 : | else loop (ns,min) | ||
475 : | in | ||
476 : | loop (tl(l),hd(l)) | ||
477 : | end | ||
478 : | val accept = IS.foldl (fn (is,cis) => | ||
479 : | let val items = I.filter (fn k => | ||
480 : | case (accepting k) | ||
481 : | of SOME _ => true | ||
482 : | | NONE => false) is | ||
483 : | in | ||
484 : | if (I.isEmpty items) | ||
485 : | then cis | ||
486 : | else | ||
487 : | I2.add (cis,(valOf (Map.find (sMap,is)), | ||
488 : | minPattern items)) | ||
489 : | end) I2.empty sSet | ||
490 : | val table = A2.tabulate A2.RowMajor (!num, | ||
491 : | ord(Char.maxChar)-ord(Char.minChar)+1, | ||
492 : | fn (s,c) => move' moves (s,chr(c+ord(Char.minChar)))) | ||
493 : | val accTable = A.tabulate (!num, | ||
494 : | fn (s) => accepting' accept s) | ||
495 : | val startTable = A.tabulate (ord(Char.maxChar)- | ||
496 : | ord(Char.minChar)+1, | ||
497 : | fn (c) => C.member (initChars, | ||
498 : | chr(c+ord(Char.minChar)))) | ||
499 : | in | ||
500 : | Dfa {states=states,moves=moves,accepting=accept, | ||
501 : | table=table,accTable=accTable,startTable=startTable} | ||
502 : | end | ||
503 : | |||
504 : | fun build r = build' (N.build (r,0)) | ||
505 : | |||
506 : | fun buildPattern rs = build' (N.buildPattern rs) | ||
507 : | |||
508 : | |||
509 : | end |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |