Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /lexgen/releases/release-110.61/src/lex-gen.sml
ViewVC logotype

Annotation of /lexgen/releases/release-110.61/src/lex-gen.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2246 - (view) (download)

1 : mblume 1902 (* lex-gen.sml
2 :     *
3 :     * COPYRIGHT (c) 2005
4 :     * John Reppy (http://www.cs.uchicago.edu/~jhr)
5 :     * Aaron Turon (adrassi@gmail.com)
6 :     * All rights reserved.
7 :     *
8 :     * DFA generation using RE derivatives
9 :     *)
10 :    
11 :     structure LexGen :
12 :     sig
13 :    
14 :     val gen : LexSpec.spec -> LexOutputSpec.spec
15 :    
16 :     end = struct
17 :    
18 :     structure RE = RegExp
19 :     structure SIS = RegExp.SymSet
20 :     structure LO = LexOutputSpec
21 :    
22 :     structure Map = RedBlackMapFn (
23 :     struct
24 :     type ord_key = RE.re Vector.vector
25 :     val compare = Vector.collate RE.compare
26 :     end)
27 :    
28 :     (* given a list of RE vectors (start states), produce a DFA recognizer
29 :     * NOTE: invoked once per start state (each start state has a DFA)
30 :     *)
31 :     fun mkDFA startVecs = let
32 :     val n = ref 0 (* next state id *)
33 :     val states = ref []
34 :     (* return the state that the re vector maps to and
35 :     * a flag set to true if the state is new.
36 :     *)
37 : mblume 1903 fun mkState (stateMap, res, asSS) = (case Map.find(stateMap, res)
38 : mblume 1902 of NONE => let
39 :     val id = !n
40 :     fun addFinal (idx, re, finals) =
41 :     if RE.nullable re
42 :     then idx :: finals
43 :     else finals
44 :     val q = LO.State {
45 : mblume 1903 id = id, startState = asSS, label = res,
46 : mblume 1902 final = Vector.foldri addFinal [] res,
47 :     next = ref []
48 :     }
49 :     in
50 :     n := id+1;
51 :     states := q :: !states;
52 :     (true, q, Map.insert(stateMap, res, q))
53 :     end
54 :     | SOME q => (false, q, stateMap)
55 :     (* end case *))
56 :     fun initIter (states, stateMap, []) = (List.rev states, stateMap)
57 :     | initIter (states, stateMap, vec::vecs) = let
58 : mblume 1903 val (_, q, stateMap') = mkState (stateMap, vec, true)
59 : mblume 1902 in initIter (q :: states, stateMap', vecs)
60 :     end
61 :     val (initStates, initStatemap) = initIter ([], Map.empty, startVecs)
62 :     fun f (stateMap, []) = stateMap
63 :     | f (stateMap, LO.State{next, label, ...}::workList) = let
64 :     fun move ((res, edge), (stateMap, workList)) =
65 :     if Vector.all RE.isNone res (* if error transition *)
66 :     then (stateMap, workList)
67 :     else let
68 : mblume 1903 val (isNew, q, stateMap) = mkState (stateMap, res, false)
69 : mblume 1902 in
70 :     next := (edge, q) :: !next;
71 :     if isNew
72 :     then (stateMap, q::workList)
73 :     else (stateMap, workList)
74 :     end
75 :     val edges = RE.derivatives label
76 :     in
77 :     f (List.foldl move (stateMap, workList) edges)
78 :     end
79 :     in
80 :     ignore (f (initStatemap, initStates));
81 :     (initStates, List.rev(!states))
82 :     end
83 :    
84 :     (* clamp a machine to the right character set *)
85 :     fun clamp clampTo states = let
86 :     val ascii127 = SIS.interval (0w0, 0w127)
87 :     fun clampTrans (edge, q) =
88 :     (SIS.intersect (ascii127, edge), q)
89 :     fun clampState (LO.State{next, ...}) =
90 :     next := List.map clampTrans (!next)
91 :     in
92 :     (List.app clampState states;
93 :     states)
94 :     end
95 :    
96 :     fun gen spec = let
97 :     (* TODO: check for invalid start states on rules *)
98 :     val LexSpec.Spec {decls, conf, rules} = spec
99 :     val LexSpec.Conf {structName, header,
100 :     arg, startStates, ...} = conf
101 :     val startStates' = AtomSet.add (startStates, Atom.atom "INITIAL")
102 :     (*
103 :     (* split out actions and associate each ruleSpec to an action ID
104 :     *
105 :     * Note: matchActions tries to find textually idential actions and map
106 :     * them to the same entry in the action vector
107 :     *)
108 :     fun matchActions rules = let
109 :     fun iter ((ruleSpec, action)::rules,
110 :     ruleSpecs, actions, actionMap, n) = let
111 :     val key = Atom.atom action
112 :     val (i, actions', actionMap', n') =
113 :     case AtomMap.find (actionMap, key)
114 :     of NONE => (n, action::actions,
115 :     AtomMap.insert (actionMap, key, n),
116 :     n+1)
117 :     | SOME i => (i, actions, actionMap, n)
118 :     in
119 :     iter (rules, (i, ruleSpec)::ruleSpecs,
120 :     actions', actionMap', n')
121 :     end
122 :     | iter ([], ruleSpecs, actions, _, _) =
123 :     (List.rev ruleSpecs, List.rev actions)
124 :     in
125 :     iter (rules, [], [], AtomMap.empty, 0)
126 :     end
127 :     val (ruleSpecs, actions) = matchActions rules
128 :     *)
129 :     val (ruleSpecs, actions) = ListPair.unzip rules
130 :     val actionsVec = Vector.fromList actions
131 :     val startStates = AtomSet.listItems startStates'
132 :     fun SSVec label = let
133 :     fun hasRule (NONE, re) = re
134 :     | hasRule (SOME ss, re) =
135 :     if AtomSet.member (ss, label)
136 :     then re
137 :     else RegExp.none
138 :     val rules = List.map hasRule ruleSpecs
139 :     in Vector.fromList rules
140 :     end
141 :     val (initStates, states) = mkDFA (List.map SSVec startStates)
142 :     in LO.Spec {
143 :     decls = decls,
144 :     header = (if String.size header = 0
145 :     then "structure " ^
146 :     (if String.size structName = 0
147 :     then "Mlex"
148 :     else structName)
149 :     else header),
150 :     arg = arg,
151 :     actions = actionsVec,
152 : jhr 1919 dfa = states,
153 : mblume 1902 startStates = ListPair.zip
154 :     (List.map Atom.toString startStates,
155 :     initStates)
156 :     }
157 :     end
158 :    
159 :     end

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0