2 |
|
|
3 |
type svalue = Tokens.svalue |
type svalue = Tokens.svalue |
4 |
type pos = int |
type pos = int |
5 |
|
|
6 |
type ('a, 'b) token = ('a, 'b) Tokens.token |
type ('a, 'b) token = ('a, 'b) Tokens.token |
7 |
type lexresult = (svalue, pos) token |
type lexresult = (svalue, pos) token |
8 |
|
|
9 |
fun err (p1, p2) = ErrorMsg.error p1 |
type lexarg = { |
10 |
|
enterC: unit -> unit, |
11 |
|
leaveC: unit -> bool, |
12 |
|
newS: pos -> unit, |
13 |
|
addS: char -> unit, |
14 |
|
addSC: string * int -> unit, |
15 |
|
addSN: string * pos -> unit, |
16 |
|
getS: pos -> lexresult, |
17 |
|
handleEof: unit -> lexresult, |
18 |
|
newline: pos -> unit, |
19 |
|
error: pos -> string -> unit |
20 |
|
} |
21 |
|
|
22 |
|
type arg = lexarg |
23 |
|
|
24 |
|
fun eof (arg: lexarg) = (#handleEof arg ()) |
25 |
|
|
26 |
|
(* |
27 |
local |
local |
28 |
val depth = ref 0 |
val depth = ref 0 |
29 |
val curstring = ref ([]: char list) |
val curstring = ref ([]: char list) |
30 |
val startpos = ref 0 |
val startpos = ref 0 |
31 |
val instring = ref false |
val instring = ref false |
32 |
in |
in |
33 |
|
|
34 |
|
|
35 |
fun resetAll () = (depth := 0; startpos := 0; instring := false) |
fun resetAll () = (depth := 0; startpos := 0; instring := false) |
36 |
|
|
37 |
(* comment stuff *) |
(* comment stuff *) |
60 |
Tokens.STRING (implode (rev (!curstring)), !startpos, endpos + 1)) |
Tokens.STRING (implode (rev (!curstring)), !startpos, endpos + 1)) |
61 |
|
|
62 |
(* handling EOF *) |
(* handling EOF *) |
63 |
fun eof() = let |
fun eof (arg: ) = let |
64 |
val pos = ErrorMsg.lastLinePos () |
val pos = ErrorMsg.lastLinePos () |
65 |
in |
in |
66 |
if !depth > 0 then |
if !depth > 0 then |
72 |
Tokens.EOF(pos,pos) |
Tokens.EOF(pos,pos) |
73 |
end |
end |
74 |
end |
end |
75 |
|
*) |
76 |
|
|
77 |
local |
local |
78 |
val idlist = [("Alias", Tokens.ALIAS), |
val idlist = [("Alias", Tokens.ALIAS), |
92 |
in |
in |
93 |
fun idToken (t, p) = |
fun idToken (t, p) = |
94 |
case List.find (fn (id, _) => id = t) idlist of |
case List.find (fn (id, _) => id = t) idlist of |
95 |
NONE => Tokens.ID (t, p, p + size t) |
NONE => Tokens.FILE_STANDARD (t, p, p + size t) |
96 |
| SOME (_, tok) => tok (p, p + size t) |
| SOME (_, tok) => tok (p, p + size t) |
97 |
end |
end |
98 |
|
|
99 |
fun newLine p = ErrorMsg.newLine p |
(* states: |
100 |
|
|
101 |
|
INITIAL -> C |
102 |
|
| |
103 |
|
+------> P -> PC |
104 |
|
| | |
105 |
|
| +--> PM -> PMC |
106 |
|
| |
107 |
|
+------> M -> MC |
108 |
|
| |
109 |
|
+------> S -> SS |
110 |
|
|
111 |
|
"C" -- COMMENT |
112 |
|
"P" -- PREPROC |
113 |
|
"M" -- MLSYMBOL |
114 |
|
"S" -- STRING |
115 |
|
"SS" -- STRINGSKIP |
116 |
|
*) |
117 |
|
|
118 |
%% |
%% |
119 |
%s COMMENT STRING STRINGSKIP; |
|
120 |
|
%s C P PC PM PMC M MC S SS; |
121 |
|
|
122 |
%header(functor CMLexFun (structure Tokens: CM_TOKENS)); |
%header(functor CMLexFun (structure Tokens: CM_TOKENS)); |
123 |
|
|
124 |
|
%arg ({ enterC, leaveC, |
125 |
|
newS, addS, addSC, addSN, getS, |
126 |
|
handleEof, |
127 |
|
newline, |
128 |
|
error }); |
129 |
|
|
130 |
idchars=[A-Za-z'_0-9]; |
idchars=[A-Za-z'_0-9]; |
131 |
id=[A-Za-z]{idchars}*; |
id=[A-Za-z]{idchars}*; |
132 |
|
cmextrachars=[!%&$+/<=>?@~|#*]|\-|\^; |
133 |
|
cmidchars={idchars}|{cmextrachars}; |
134 |
|
cmid={cmextrachars}+; |
135 |
ws=("\012"|[\t\ ]); |
ws=("\012"|[\t\ ]); |
136 |
eol=("\013\010"|"\013"|"\010"); |
eol=("\013\010"|"\013"|"\010"); |
137 |
sym=[!%&$+/:<=>?@~|#*]|\-|\^|"\\"; |
sym=[!%&$+/:<=>?@~|#*]|\-|\^|"\\"; |
139 |
sharp="#"; |
sharp="#"; |
140 |
%% |
%% |
141 |
|
|
142 |
<COMMENT>"(*" => (enterC (); continue ()); |
<INITIAL>"(*" => (enterC (); YYBEGIN C; continue ()); |
143 |
<COMMENT>"*)" => (if leaveC () then YYBEGIN INITIAL else (); |
<P>"(*" => (enterC (); YYBEGIN PC; continue ()); |
144 |
|
<PM>"(*" => (enterC (); YYBEGIN PMC; continue ()); |
145 |
|
<M>"(*" => (enterC (); YYBEGIN MC; continue ()); |
146 |
|
|
147 |
|
<C,PC,PMC,MC>"(*" => (enterC (); continue ()); |
148 |
|
|
149 |
|
<C>"*)" => (if leaveC () then YYBEGIN INITIAL else (); |
150 |
|
continue ()); |
151 |
|
<PC>"*)" => (if leaveC () then YYBEGIN P else (); |
152 |
|
continue ()); |
153 |
|
<PMC>"*)" => (if leaveC () then YYBEGIN PM else (); |
154 |
|
continue ()); |
155 |
|
<MC>"*)" => (if leaveC () then YYBEGIN M else (); |
156 |
|
continue ()); |
157 |
|
<C,PC,PMC,MC>{eol} => (newline yypos; continue ()); |
158 |
|
<C,PC,PMC,MC>. => (continue ()); |
159 |
|
|
160 |
|
<INITIAL,P,PM,M>"*)" => (error yypos "unmatched comment delimiter"; |
161 |
continue ()); |
continue ()); |
|
<COMMENT>{eol} => (newLine yypos; continue ()); |
|
|
<COMMENT>. => (continue ()); |
|
162 |
|
|
163 |
<STRING>"\\a" => (addS #"\a"; continue ()); |
<INITIAL>"\"" => (YYBEGIN S; newS yypos; continue ()); |
|
<STRING>"\\b" => (addS #"\b"; continue ()); |
|
|
<STRING>"\\f" => (addS #"\f"; continue ()); |
|
|
<STRING>"\\n" => (addS #"\n"; continue ()); |
|
|
<STRING>"\\r" => (addS #"\r"; continue ()); |
|
|
<STRING>"\\t" => (addS #"\t"; continue ()); |
|
|
<STRING>"\\v" => (addS #"\v"; continue ()); |
|
|
|
|
|
<STRING>"\\^"@ => (addS (chr 0); continue ()); |
|
|
<STRING>"\\^"[a-z] => (addSC (yytext, yypos, ord #"a"); continue ()); |
|
|
<STRING>"\\^"[A-Z] => (addSC (yytext, yypos, ord #"A"); continue ()); |
|
|
<STRING>"\\^[" => (addS (chr 27); continue ()); |
|
|
<STRING>"\\^\\" => (addS (chr 28); continue ()); |
|
|
<STRING>"\\^]" => (addS (chr 29); continue ()); |
|
|
<STRING>"\\^^" => (addS (chr 30); continue ()); |
|
|
<STRING>"\\^_" => (addS (chr 31); continue ()); |
|
|
|
|
|
<STRING>"\\"[0-9][0-9][0-9] => (addSN (yytext, yypos); continue ()); |
|
164 |
|
|
165 |
<STRING>"\\\"" => (addS #"\""; continue ()); |
<S>"\\a" => (addS #"\a"; continue ()); |
166 |
<STRING>"\\\\" => (addS #"\\"; continue ()); |
<S>"\\b" => (addS #"\b"; continue ()); |
167 |
|
<S>"\\f" => (addS #"\f"; continue ()); |
168 |
|
<S>"\\n" => (addS #"\n"; continue ()); |
169 |
|
<S>"\\r" => (addS #"\r"; continue ()); |
170 |
|
<S>"\\t" => (addS #"\t"; continue ()); |
171 |
|
<S>"\\v" => (addS #"\v"; continue ()); |
172 |
|
|
173 |
|
<S>"\\^"@ => (addS (chr 0); continue ()); |
174 |
|
<S>"\\^"[a-z] => (addSC (yytext, ord #"a"); continue ()); |
175 |
|
<S>"\\^"[A-Z] => (addSC (yytext, ord #"A"); continue ()); |
176 |
|
<S>"\\^[" => (addS (chr 27); continue ()); |
177 |
|
<S>"\\^\\" => (addS (chr 28); continue ()); |
178 |
|
<S>"\\^]" => (addS (chr 29); continue ()); |
179 |
|
<S>"\\^^" => (addS (chr 30); continue ()); |
180 |
|
<S>"\\^_" => (addS (chr 31); continue ()); |
181 |
|
|
182 |
|
<S>"\\"[0-9][0-9][0-9] => (addSN (yytext, yypos); continue ()); |
183 |
|
|
184 |
<STRING>"\\"{eol} => (YYBEGIN STRINGSKIP; newLine yypos; continue ()); |
<S>"\\\"" => (addS #"\""; continue ()); |
185 |
<STRING>"\\"{ws}+ => (YYBEGIN STRINGSKIP; continue ()); |
<S>"\\\\" => (addS #"\\"; continue ()); |
186 |
|
|
187 |
<STRING>"\\". => (ErrorMsg.error yypos |
<S>"\\"{eol} => (YYBEGIN SS; newline (yypos + 1); continue ()); |
188 |
|
<S>"\\"{ws}+ => (YYBEGIN SS; continue ()); |
189 |
|
|
190 |
|
<S>"\\". => (error yypos |
191 |
("illegal escape character in string " ^ yytext); |
("illegal escape character in string " ^ yytext); |
192 |
continue ()); |
continue ()); |
193 |
|
|
194 |
<STRING>"\"" => (YYBEGIN INITIAL; getS yypos); |
<S>"\"" => (YYBEGIN INITIAL; getS yypos); |
195 |
<STRING>{eol} => (ErrorMsg.error yypos "illegal linebreak in string"; |
<S>{eol} => (newline yypos; |
196 |
|
error yypos "illegal linebreak in string"; |
197 |
continue ()); |
continue ()); |
|
<STRING>. => (addS (String.sub (yytext, 0)); continue ()); |
|
198 |
|
|
199 |
<STRINGSKIP>{eol} => (newLine yypos; continue ()); |
<S>. => (addS (String.sub (yytext, 0)); continue ()); |
|
<STRINGSKIP>{ws}+ => (continue ()); |
|
|
<STRINGSKIP>"\\" => (YYBEGIN STRING; continue ()); |
|
|
<STRINGSKIP>. => (ErrorMsg.error yypos |
|
|
("illegal character in stringskip " ^ yytext); |
|
|
continue ()); |
|
200 |
|
|
201 |
<INITIAL>"(*" => (YYBEGIN COMMENT; enterC (); continue ()); |
<SS>{eol} => (newline yypos; continue ()); |
202 |
<INITIAL>"*)" => (ErrorMsg.error yypos "unmatched comment delimiter"; |
<SS>{ws}+ => (continue ()); |
203 |
|
<SS>"\\" => (YYBEGIN S; continue ()); |
204 |
|
<SS>. => (error yypos |
205 |
|
("illegal character in stringskip " ^ yytext); |
206 |
continue ()); |
continue ()); |
|
<INITIAL>"\"" => (YYBEGIN STRING; newS yypos; continue ()); |
|
207 |
|
|
208 |
<INITIAL>"(" => (Tokens.LPAREN (yypos, yypos + 1)); |
<INITIAL,P>"(" => (Tokens.LPAREN (yypos, yypos + 1)); |
209 |
<INITIAL>")" => (Tokens.RPAREN (yypos, yypos + 1)); |
<INITIAL,P>")" => (Tokens.RPAREN (yypos, yypos + 1)); |
|
<INITIAL>"," => (Tokens.COMMA (yypos, yypos + 1)); |
|
210 |
<INITIAL>":" => (Tokens.COLON (yypos, yypos + 1)); |
<INITIAL>":" => (Tokens.COLON (yypos, yypos + 1)); |
211 |
<INITIAL>"+" => (Tokens.PLUS (yypos, yypos + 1)); |
<P>"+" => (Tokens.PLUS (yypos, yypos + 1)); |
212 |
<INITIAL>"-" => (Tokens.MINUS (yypos, yypos + 1)); |
<P>"-" => (Tokens.MINUS (yypos, yypos + 1)); |
213 |
<INITIAL>"*" => (Tokens.TIMES (yypos, yypos + 1)); |
<P>"*" => (Tokens.TIMES (yypos, yypos + 1)); |
214 |
<INITIAL>"<>" => (Tokens.NE (yypos, yypos + 2)); |
<P>"<>" => (Tokens.NE (yypos, yypos + 2)); |
215 |
<INITIAL>"<=" => (Tokens.LE (yypos, yypos + 2)); |
<P>"<=" => (Tokens.LE (yypos, yypos + 2)); |
216 |
<INITIAL>"<" => (Tokens.LT (yypos, yypos + 1)); |
<P>"<" => (Tokens.LT (yypos, yypos + 1)); |
217 |
<INITIAL>">=" => (Tokens.GE (yypos, yypos + 2)); |
<P>">=" => (Tokens.GE (yypos, yypos + 2)); |
218 |
<INITIAL>">" => (Tokens.GT (yypos, yypos + 1)); |
<P>">" => (Tokens.GT (yypos, yypos + 1)); |
219 |
<INITIAL>"=" => (Tokens.EQ (yypos, yypos + 1)); |
<P>"=" => (Tokens.EQ (yypos, yypos + 1)); |
220 |
|
<P>"~" => (Tokens.TILDE (yypos, yypos + 1)); |
221 |
|
|
222 |
<INITIAL>{digit}+ => (Tokens.NUMBER |
<P>{digit}+ => (Tokens.NUMBER |
223 |
(valOf (Int.fromString yytext) |
(valOf (Int.fromString yytext) |
224 |
handle _ => |
handle _ => |
225 |
(ErrorMsg.error yypos "number too large"; 0), |
(error yypos "number too large"; 0), |
226 |
yypos, yypos + size yytext)); |
yypos, yypos + size yytext)); |
|
<INITIAL>{sym}+ => (Tokens.ID (yytext, yypos, yypos + size yytext)); |
|
|
<INITIAL>{id} => (idToken (yytext, yypos)); |
|
227 |
|
|
228 |
<INITIAL>{eol}{sharp}{ws}*"if" => (Tokens.IF (yypos, yypos + size yytext)); |
<P>{id} => (Tokens.CM_ID (yytext, yypos, yypos + size yytext)); |
229 |
<INITIAL>{eol}{sharp}{ws}*"then" => (Tokens.THEN (yypos, yypos + size yytext)); |
|
230 |
<INITIAL>{eol}{sharp}{ws}*"elif" => (Tokens.ELIF (yypos, yypos + size yytext)); |
<M>({id}|{sym}+) => (YYBEGIN INITIAL; |
231 |
<INITIAL>{eol}{sharp}{ws}*"else" => (Tokens.ELSE (yypos, yypos + size yytext)); |
Tokens.ML_ID (yytext, yypos, yypos + size yytext)); |
232 |
<INITIAL>{eol}{sharp}{ws}*"endif" => (Tokens.ENDIF (yypos, |
<PM>({id}|{sym}+) => (YYBEGIN P; |
233 |
|
Tokens.ML_ID (yytext, yypos, yypos + size yytext)); |
234 |
|
|
235 |
|
<INITIAL>{eol}{sharp}{ws}*"if" => (YYBEGIN P; |
236 |
|
newline yypos; |
237 |
|
Tokens.IF (yypos, yypos + size yytext)); |
238 |
|
<INITIAL>{eol}{sharp}{ws}*"then" => (YYBEGIN P; |
239 |
|
newline yypos; |
240 |
|
Tokens.THEN (yypos, yypos + size yytext)); |
241 |
|
<INITIAL>{eol}{sharp}{ws}*"elif" => (YYBEGIN P; |
242 |
|
newline yypos; |
243 |
|
Tokens.ELIF (yypos, yypos + size yytext)); |
244 |
|
<INITIAL>{eol}{sharp}{ws}*"else" => (YYBEGIN P; |
245 |
|
newline yypos; |
246 |
|
Tokens.ELSE (yypos, yypos + size yytext)); |
247 |
|
<INITIAL>{eol}{sharp}{ws}*"endif" => (YYBEGIN P; |
248 |
|
newline yypos; |
249 |
|
Tokens.ENDIF (yypos, |
250 |
yypos + size yytext)); |
yypos + size yytext)); |
251 |
|
|
252 |
<INITIAL>{eol} => (newLine yypos; continue ()); |
<INITIAL,M,PM>{eol} => (newline yypos; continue ()); |
253 |
<INITIAL>{ws}+ => (continue ()); |
<P>{eol} => (YYBEGIN INITIAL; newline yypos; continue ()); |
254 |
<INITIAL>. => (ErrorMsg.error yypos |
|
255 |
("illegal character " ^ yytext); |
<INITIAL,M,PM,P>{ws}+ => (continue ()); |
256 |
|
|
257 |
|
<M,PM>. => (error yypos |
258 |
|
("illegal character at start of ML symbol: " ^ |
259 |
|
yytext); |
260 |
|
continue ()); |
261 |
|
|
262 |
|
<INITIAL>{cmid} => (idToken (yytext, yypos)); |
263 |
|
|
264 |
|
|
265 |
|
<INITIAL>. => (error yypos |
266 |
|
("illegal character: " ^ yytext); |
267 |
continue ()); |
continue ()); |