1 : |
mblume |
1902 |
(* lex-gen.sml
|
2 : |
|
|
*
|
3 : |
|
|
* COPYRIGHT (c) 2005
|
4 : |
|
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
5 : |
|
|
* Aaron Turon (adrassi@gmail.com)
|
6 : |
|
|
* All rights reserved.
|
7 : |
|
|
*
|
8 : |
|
|
* DFA generation using RE derivatives
|
9 : |
|
|
*)
|
10 : |
|
|
|
11 : |
|
|
structure LexGen :
|
12 : |
|
|
sig
|
13 : |
|
|
|
14 : |
|
|
val gen : LexSpec.spec -> LexOutputSpec.spec
|
15 : |
|
|
|
16 : |
|
|
end = struct
|
17 : |
|
|
|
18 : |
|
|
structure RE = RegExp
|
19 : |
|
|
structure SIS = RegExp.SymSet
|
20 : |
|
|
structure LO = LexOutputSpec
|
21 : |
|
|
|
22 : |
|
|
structure Map = RedBlackMapFn (
|
23 : |
|
|
struct
|
24 : |
|
|
type ord_key = RE.re Vector.vector
|
25 : |
|
|
val compare = Vector.collate RE.compare
|
26 : |
|
|
end)
|
27 : |
|
|
|
28 : |
|
|
(* given a list of RE vectors (start states), produce a DFA recognizer
|
29 : |
|
|
* NOTE: invoked once per start state (each start state has a DFA)
|
30 : |
|
|
*)
|
31 : |
|
|
fun mkDFA startVecs = let
|
32 : |
|
|
val n = ref 0 (* next state id *)
|
33 : |
|
|
val states = ref []
|
34 : |
|
|
(* return the state that the re vector maps to and
|
35 : |
|
|
* a flag set to true if the state is new.
|
36 : |
|
|
*)
|
37 : |
mblume |
1903 |
fun mkState (stateMap, res, asSS) = (case Map.find(stateMap, res)
|
38 : |
mblume |
1902 |
of NONE => let
|
39 : |
|
|
val id = !n
|
40 : |
|
|
fun addFinal (idx, re, finals) =
|
41 : |
|
|
if RE.nullable re
|
42 : |
|
|
then idx :: finals
|
43 : |
|
|
else finals
|
44 : |
|
|
val q = LO.State {
|
45 : |
mblume |
1903 |
id = id, startState = asSS, label = res,
|
46 : |
mblume |
1902 |
final = Vector.foldri addFinal [] res,
|
47 : |
|
|
next = ref []
|
48 : |
|
|
}
|
49 : |
|
|
in
|
50 : |
|
|
n := id+1;
|
51 : |
|
|
states := q :: !states;
|
52 : |
|
|
(true, q, Map.insert(stateMap, res, q))
|
53 : |
|
|
end
|
54 : |
|
|
| SOME q => (false, q, stateMap)
|
55 : |
|
|
(* end case *))
|
56 : |
|
|
fun initIter (states, stateMap, []) = (List.rev states, stateMap)
|
57 : |
|
|
| initIter (states, stateMap, vec::vecs) = let
|
58 : |
mblume |
1903 |
val (_, q, stateMap') = mkState (stateMap, vec, true)
|
59 : |
mblume |
1902 |
in initIter (q :: states, stateMap', vecs)
|
60 : |
|
|
end
|
61 : |
|
|
val (initStates, initStatemap) = initIter ([], Map.empty, startVecs)
|
62 : |
|
|
fun f (stateMap, []) = stateMap
|
63 : |
|
|
| f (stateMap, LO.State{next, label, ...}::workList) = let
|
64 : |
|
|
fun move ((res, edge), (stateMap, workList)) =
|
65 : |
|
|
if Vector.all RE.isNone res (* if error transition *)
|
66 : |
|
|
then (stateMap, workList)
|
67 : |
|
|
else let
|
68 : |
mblume |
1903 |
val (isNew, q, stateMap) = mkState (stateMap, res, false)
|
69 : |
mblume |
1902 |
in
|
70 : |
|
|
next := (edge, q) :: !next;
|
71 : |
|
|
if isNew
|
72 : |
|
|
then (stateMap, q::workList)
|
73 : |
|
|
else (stateMap, workList)
|
74 : |
|
|
end
|
75 : |
|
|
val edges = RE.derivatives label
|
76 : |
|
|
in
|
77 : |
|
|
f (List.foldl move (stateMap, workList) edges)
|
78 : |
|
|
end
|
79 : |
|
|
in
|
80 : |
|
|
ignore (f (initStatemap, initStates));
|
81 : |
|
|
(initStates, List.rev(!states))
|
82 : |
|
|
end
|
83 : |
|
|
|
84 : |
|
|
(* clamp a machine to the right character set *)
|
85 : |
|
|
fun clamp clampTo states = let
|
86 : |
|
|
val ascii127 = SIS.interval (0w0, 0w127)
|
87 : |
|
|
fun clampTrans (edge, q) =
|
88 : |
|
|
(SIS.intersect (ascii127, edge), q)
|
89 : |
|
|
fun clampState (LO.State{next, ...}) =
|
90 : |
|
|
next := List.map clampTrans (!next)
|
91 : |
|
|
in
|
92 : |
|
|
(List.app clampState states;
|
93 : |
|
|
states)
|
94 : |
|
|
end
|
95 : |
|
|
|
96 : |
|
|
fun gen spec = let
|
97 : |
|
|
(* TODO: check for invalid start states on rules *)
|
98 : |
|
|
val LexSpec.Spec {decls, conf, rules} = spec
|
99 : |
|
|
val LexSpec.Conf {structName, header,
|
100 : |
|
|
arg, startStates, ...} = conf
|
101 : |
|
|
val startStates' = AtomSet.add (startStates, Atom.atom "INITIAL")
|
102 : |
|
|
(*
|
103 : |
|
|
(* split out actions and associate each ruleSpec to an action ID
|
104 : |
|
|
*
|
105 : |
|
|
* Note: matchActions tries to find textually idential actions and map
|
106 : |
|
|
* them to the same entry in the action vector
|
107 : |
|
|
*)
|
108 : |
|
|
fun matchActions rules = let
|
109 : |
|
|
fun iter ((ruleSpec, action)::rules,
|
110 : |
|
|
ruleSpecs, actions, actionMap, n) = let
|
111 : |
|
|
val key = Atom.atom action
|
112 : |
|
|
val (i, actions', actionMap', n') =
|
113 : |
|
|
case AtomMap.find (actionMap, key)
|
114 : |
|
|
of NONE => (n, action::actions,
|
115 : |
|
|
AtomMap.insert (actionMap, key, n),
|
116 : |
|
|
n+1)
|
117 : |
|
|
| SOME i => (i, actions, actionMap, n)
|
118 : |
|
|
in
|
119 : |
|
|
iter (rules, (i, ruleSpec)::ruleSpecs,
|
120 : |
|
|
actions', actionMap', n')
|
121 : |
|
|
end
|
122 : |
|
|
| iter ([], ruleSpecs, actions, _, _) =
|
123 : |
|
|
(List.rev ruleSpecs, List.rev actions)
|
124 : |
|
|
in
|
125 : |
|
|
iter (rules, [], [], AtomMap.empty, 0)
|
126 : |
|
|
end
|
127 : |
|
|
val (ruleSpecs, actions) = matchActions rules
|
128 : |
|
|
*)
|
129 : |
|
|
val (ruleSpecs, actions) = ListPair.unzip rules
|
130 : |
|
|
val actionsVec = Vector.fromList actions
|
131 : |
|
|
val startStates = AtomSet.listItems startStates'
|
132 : |
|
|
fun SSVec label = let
|
133 : |
|
|
fun hasRule (NONE, re) = re
|
134 : |
|
|
| hasRule (SOME ss, re) =
|
135 : |
|
|
if AtomSet.member (ss, label)
|
136 : |
|
|
then re
|
137 : |
|
|
else RegExp.none
|
138 : |
|
|
val rules = List.map hasRule ruleSpecs
|
139 : |
|
|
in Vector.fromList rules
|
140 : |
|
|
end
|
141 : |
|
|
val (initStates, states) = mkDFA (List.map SSVec startStates)
|
142 : |
|
|
in LO.Spec {
|
143 : |
|
|
decls = decls,
|
144 : |
|
|
header = (if String.size header = 0
|
145 : |
|
|
then "structure " ^
|
146 : |
|
|
(if String.size structName = 0
|
147 : |
|
|
then "Mlex"
|
148 : |
|
|
else structName)
|
149 : |
|
|
else header),
|
150 : |
|
|
arg = arg,
|
151 : |
|
|
actions = actionsVec,
|
152 : |
jhr |
1919 |
dfa = states,
|
153 : |
mblume |
1902 |
startStates = ListPair.zip
|
154 : |
|
|
(List.map Atom.toString startStates,
|
155 : |
|
|
initStates)
|
156 : |
|
|
}
|
157 : |
|
|
end
|
158 : |
|
|
|
159 : |
|
|
end
|