SCM Repository
View of /sml/trunk/src/smlnj-lib/RegExp/BackEnd/bt-engine.sml
Parent Directory
|
Revision Log
Revision 104 -
(download)
(annotate)
Thu May 28 21:25:35 1998 UTC (22 years, 10 months ago) by monnier
File size: 8501 byte(s)
Thu May 28 21:25:35 1998 UTC (22 years, 10 months ago) by monnier
File size: 8501 byte(s)
Initial revision
(* bt-engine.sml * * COPYRIGHT (c) 1998 Bell Labs, Lucent Technologies. * * Implements a regular expressions matcher based on a backtracking search. *) structure BackTrackEngine : REGEXP_ENGINE = struct exception Error structure S = RegExpSyntax structure M = MatchTree type regexp = S.syntax fun compile r = r fun scan (regexp,getc,pos,stream) = let fun getc' (s) = (case (getc (s)) of SOME v => v | NONE => raise Subscript) (* This function gets an empty match structure, for when the appropriate * alternative is not followed at all *) fun getMatchStructure (S.Group e) = [M.Match (NONE,getMatchStructure e)] | getMatchStructure (S.Alt l) = List.concat (map getMatchStructure l) | getMatchStructure (S.Concat l) = List.concat (map getMatchStructure l) | getMatchStructure (S.Interval (e,_,_)) = getMatchStructure e | getMatchStructure (S.Option e) = getMatchStructure e | getMatchStructure (S.Star e) = getMatchStructure e | getMatchStructure (S.Plus e) = getMatchStructure e | getMatchStructure (_) = [] (* Walk a regular expression in continuation-passing style * The continuation is simply a list of all this is left to do * Continuations only seem to arise when concatenation are considered * * Walk returns the boolean status of the beast, and a match_tree * containing the match information. * Also: the last position scanned and the remainder stream * MODIFICATION: walk returns a list of matches * (because we need to extract the longest match) *) fun max [] sel = raise Error | max (x::xs) sel = let fun max' [] curr currSel = curr | max' (x::xs) curr currSel = let val xSel = sel(x) in if (xSel>currSel) then max' xs x xSel else max' xs curr currSel end in max' xs x (sel x) end fun longest l = max l (#3: 'a * 'b * int * 'c -> int) fun optMinus1 (SOME i) = SOME (i-1) | optMinus1 NONE = NONE fun walk (S.Group e,cont,p,inits) = (case walk (e,[],p,inits) of [] => [(false,[],p,inits)] | ((b,matches,last,s)::ls) => let fun loop [] cLast 1 cCont cList = let val [(b,matches,last,s)] = cList val [(b',matches',last',s')] = cCont in [(b', (M.Match (SOME {pos=inits, len=last-p}, matches))::matches',last',s')] end | loop [] cLast n cCont cList = raise Error | loop ((b,matches,last,s)::es) cLen cNum cCont cList = let val v as (_,_,last',_) = longest (walk (S.Concat [], cont,last,s)) in if (last' > cLen) then loop es last' 1 [v] [(b,matches,last,s)] else if (last' = cLen) then loop es cLen (cNum+1) (v::cCont) ((b,matches,last,s)::cList) else loop es cLen cNum cCont cList end in loop ls last 1 [longest(walk (S.Concat [],cont,last,s))] [(b,matches,last,s)] end) | walk (S.Alt [],[],p,inits) = [(true,[],p,inits)] | walk (S.Alt [], (c::cs),p,inits) = walk (c,cs,p,inits) | walk (S.Alt l, cont,p,inits) = let fun loop [] = [] | loop (e::es) = let val g = longest (walk (e,cont,p,inits)) in if (#1 g) then g::(loop es) else loop es end in loop l end | walk (S.Concat [],[],p,inits) = [(true,[],p,inits)] | walk (S.Concat [], (c::cs),p,inits) = walk (c,cs,p,inits) | walk (S.Concat (e::es),cont,p,inits) = walk (e,(es@cont),p,inits) | walk (S.Interval (e,0,SOME 0),[],p,inits) = [(true,[],p,inits)] | walk (S.Interval (e,0,SOME 0),(c::cs),p,inits) = walk (c,cs,p,inits) | walk (S.Interval (e,0,k),cont,p,inits) = let val (b',matches',last',s') = longest (walk (S.Concat [],cont,p,inits)) val (b,matches,last,s) = longest (walk (S.Interval (e,1,k),cont,p,inits)) in if ((b andalso b' andalso last >= last') orelse (b andalso (not b'))) then [(b,matches,last,s)] else if ((b' andalso b andalso last' > last) orelse (b' andalso (not b))) then [(b',(getMatchStructure e)@matches',last',s')] else [(false,[],p,inits)] end | walk (S.Interval (e,1,SOME 1),cont,p,inits) = walk (e,cont,p,inits) | walk (S.Interval (e,1,k),cont,p,inits) = let val (b',matches',last',s') = longest (walk (e,[],p,inits)) (* need to match 1 *) in if (not b') then [(false, [], p, inits)] else let val (b,matches,last,s) = longest (walk (S.Interval (e,1,optMinus1 k), cont,last',s')) val (b'',matches'',last'',s'') = longest (walk (S.Concat [], cont,last',s')) in if (b andalso b'' andalso last'' >= last) then [(b'',matches'@matches'',last'',s'')] else if (b) then [(b,matches,last,s)] else [(b'',matches'@matches'',last'',s'')] end end | walk (S.Interval (e,n1,k),cont,p,inits) = walk (S.Concat [e,S.Interval (e,n1-1,optMinus1 k)],cont,p,inits) | walk (S.Option e,cont,p,inits) = walk (S.Interval (e,0,SOME 1),cont,p,inits) | walk (S.Star e,cont,p,inits) = walk (S.Interval (e,0,NONE),cont,p,inits) | walk (S.Plus e,cont,p,inits) = walk (S.Interval (e,1,NONE),cont,p,inits) | walk (S.MatchSet set,[],p,inits) = if (S.CharSet.isEmpty set) then [(true,[],p,inits)] else (case (getc (inits)) of SOME (chr,s) => let val b = S.CharSet.member (set,chr) in [(b,[],p+(if b then 1 else 0),(if b then s else inits))] end | NONE => [(false,[],p,inits)]) | walk (S.MatchSet set,(c::cs),p,inits) = if (S.CharSet.isEmpty set) then walk (c,cs,p,inits) else (case (getc (inits)) of SOME (chr,s) => if (S.CharSet.member (set,chr)) then walk (c,cs,(p+1),s) else [(false,[],p,inits)] | NONE => [(false,[],p,inits)]) | walk (S.NonmatchSet set,[],p,inits) = (case (getc (inits)) of SOME (chr,s) => let val b = not (S.CharSet.member (set,chr)) in [(b, [], p+(if b then 1 else 0),(if b then s else inits))] end | NONE => [(false,[],p,inits)]) | walk (S.NonmatchSet set,(c::cs),p,inits) = (case (getc (inits)) of SOME (chr,s) => if (S.CharSet.member (set,chr)) then [(false,[],p,inits)] else walk (c,cs,(p+1),s) | NONE => [(false,[],p,inits)]) | walk (S.Char ch,[],p,inits) = (case (getc (inits)) of SOME (chr,s) => let val b = (chr = ch) in [(b, [],p+(if b then 1 else 0),(if b then s else inits))] end | NONE => [(false,[],p,inits)]) | walk (S.Char ch,(c::cs),p,inits) = (case (getc (inits)) of SOME (chr,s) => if (chr = ch) then walk (c,cs,(p+1),s) else [(false,[],p,inits)] | NONE => [(false,[],p,inits)]) | walk (S.Begin,[],p,inits) = [(p=0,[],p,inits)] | walk (S.Begin,(c::cs),p,inits) = if (p=0) then walk (c,cs,p,inits) else [(false,[],p,inits)] | walk (S.End,[],p,inits) = [(not (Option.isSome (getc (inits))),[],p,inits)] | walk (S.End,(c::cs),p,inits) = if (Option.isSome (getc (inits))) then [(false,[],p,inits)] else walk (c,cs,p,inits) val l = walk (regexp,[],pos,stream) handle Subscript => [(false,[],pos,stream)] val v as (result,matches,last,s') = longest l handle _ => (false,[],pos,stream) in if result then SOME (M.Match (SOME {pos=stream,len=last-pos}, matches),s') else NONE end fun prefix regexp getc stream = scan (regexp,getc,0,stream) fun find regexp getc stream = let fun loop (p,s) = (case (scan (regexp,getc,p,s)) of NONE => (case (getc (s)) of SOME (_,s') => loop (p+1,s') | NONE => NONE) | SOME v => SOME v) in loop (0,stream) end fun match [] getc stream = NONE | match l getc stream = let val m = map (fn (r,f) => (prefix r getc stream,f)) l (* find the longest SOME *) fun loop ([],max,len) = max | loop ((NONE,_)::xs,max,maxlen) = loop (xs,max,maxlen) | loop ((SOME(m,cs),f)::xs,max,maxlen) = let val (SOME {pos,len}) = MatchTree.nth (m,0) in if (len>maxlen) then loop (xs,(SOME(m,cs),f),len) else loop (xs,max,maxlen) end val (max,f) = loop (tl(m),hd(m),~1) in case max of NONE => NONE | SOME (m,cs) => SOME (f m,cs) end end
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |