SCM Repository
Annotation of /sml/trunk/src/cm/parse/cm.lex
Parent Directory
|
Revision Log
Revision 313 - (view) (download)
1 : | blume | 267 | (* -*- sml -*- |
2 : | * | ||
3 : | * lexical analysis (ML-Lex specification) for CM description files | ||
4 : | * | ||
5 : | * (C) 1999 Lucent Technologies, Bell Laboratories | ||
6 : | * | ||
7 : | * Author: Matthias Blume (blume@kurims.kyoto-u.ac.jp) | ||
8 : | *) | ||
9 : | blume | 262 | |
10 : | type svalue = Tokens.svalue | ||
11 : | type pos = int | ||
12 : | blume | 265 | |
13 : | blume | 262 | type ('a, 'b) token = ('a, 'b) Tokens.token |
14 : | type lexresult = (svalue, pos) token | ||
15 : | |||
16 : | blume | 265 | type lexarg = { |
17 : | enterC: unit -> unit, | ||
18 : | leaveC: unit -> bool, | ||
19 : | blume | 294 | newS: pos -> unit, |
20 : | blume | 265 | addS: char -> unit, |
21 : | addSC: string * int -> unit, | ||
22 : | addSN: string * pos -> unit, | ||
23 : | blume | 266 | getS: pos * (string * pos * pos -> lexresult) -> lexresult, |
24 : | blume | 268 | handleEof: unit -> pos, |
25 : | blume | 265 | newline: pos -> unit, |
26 : | blume | 313 | error: pos * pos -> string -> unit, |
27 : | sync: pos * string -> unit | ||
28 : | blume | 265 | } |
29 : | blume | 262 | |
30 : | blume | 265 | type arg = lexarg |
31 : | |||
32 : | blume | 268 | fun eof (arg: lexarg) = let |
33 : | val pos = #handleEof arg () | ||
34 : | blume | 262 | in |
35 : | blume | 268 | Tokens.EOF (pos, pos) |
36 : | blume | 262 | end |
37 : | |||
38 : | blume | 274 | fun errorTok (t, p) = let |
39 : | fun findGraph i = | ||
40 : | if Char.isGraph (String.sub (t, i)) then i | ||
41 : | else findGraph (i + 1) | ||
42 : | fun findError i = | ||
43 : | if String.sub (t, i) = #"e" then i | ||
44 : | else findError (i + 1) | ||
45 : | val start = findGraph (5 + findError 0) | ||
46 : | val msg = String.extract (t, start, NONE) | ||
47 : | blume | 262 | in |
48 : | blume | 275 | Tokens.ERROR (msg, p + 1, p + size t) |
49 : | blume | 262 | end |
50 : | |||
51 : | blume | 274 | val cm_ids = [("Alias", Tokens.ALIAS), |
52 : | blume | 276 | ("ALIAS", Tokens.ALIAS), |
53 : | ("alias", Tokens.ALIAS), | ||
54 : | blume | 274 | ("Group", Tokens.GROUP), |
55 : | blume | 276 | ("GROUP", Tokens.GROUP), |
56 : | ("group", Tokens.GROUP), | ||
57 : | blume | 274 | ("Library", Tokens.LIBRARY), |
58 : | blume | 276 | ("LIBRARY", Tokens.LIBRARY), |
59 : | ("library", Tokens.LIBRARY), | ||
60 : | ("IS", Tokens.IS), | ||
61 : | blume | 274 | ("is", Tokens.IS)] |
62 : | |||
63 : | val ml_ids = [("structure", Tokens.STRUCTURE), | ||
64 : | ("signature", Tokens.SIGNATURE), | ||
65 : | ("functor", Tokens.FUNCTOR), | ||
66 : | ("funsig", Tokens.FUNSIG)] | ||
67 : | |||
68 : | val pp_ids = [("defined", Tokens.DEFINED), | ||
69 : | ("div", Tokens.DIV), | ||
70 : | ("mod", Tokens.MOD), | ||
71 : | ("andalso", Tokens.ANDALSO), | ||
72 : | ("orelse", Tokens.ORELSE), | ||
73 : | ("not", Tokens.NOT)] | ||
74 : | |||
75 : | fun idToken (t, p, idlist, default, chstate) = | ||
76 : | case List.find (fn (id, _) => id = t) ml_ids of | ||
77 : | SOME (_, tok) => (chstate (); tok (p, p + size t)) | ||
78 : | | NONE => | ||
79 : | (case List.find (fn (id, _) => id = t) idlist of | ||
80 : | SOME (_, tok) => tok (p, p + size t) | ||
81 : | | NONE => default (t, p, p + size t)) | ||
82 : | |||
83 : | blume | 265 | (* states: |
84 : | blume | 262 | |
85 : | blume | 265 | INITIAL -> C |
86 : | | | ||
87 : | +------> P -> PC | ||
88 : | | | | ||
89 : | | +--> PM -> PMC | ||
90 : | | | ||
91 : | +------> M -> MC | ||
92 : | | | ||
93 : | +------> S -> SS | ||
94 : | |||
95 : | "C" -- COMMENT | ||
96 : | "P" -- PREPROC | ||
97 : | "M" -- MLSYMBOL | ||
98 : | "S" -- STRING | ||
99 : | "SS" -- STRINGSKIP | ||
100 : | *) | ||
101 : | |||
102 : | blume | 262 | %% |
103 : | |||
104 : | blume | 274 | %s C P PC PM PMC M MC S SS; |
105 : | blume | 265 | |
106 : | blume | 262 | %header(functor CMLexFun (structure Tokens: CM_TOKENS)); |
107 : | |||
108 : | blume | 265 | %arg ({ enterC, leaveC, |
109 : | newS, addS, addSC, addSN, getS, | ||
110 : | handleEof, | ||
111 : | newline, | ||
112 : | blume | 313 | error, |
113 : | sync }); | ||
114 : | blume | 265 | |
115 : | blume | 262 | idchars=[A-Za-z'_0-9]; |
116 : | id=[A-Za-z]{idchars}*; | ||
117 : | blume | 274 | cmextrachars=[.;,!%&$+/<=>?@~|#*]|\-|\^; |
118 : | blume | 265 | cmidchars={idchars}|{cmextrachars}; |
119 : | blume | 274 | cmid={cmidchars}+; |
120 : | blume | 262 | ws=("\012"|[\t\ ]); |
121 : | eol=("\013\010"|"\013"|"\010"); | ||
122 : | blume | 274 | neol=[^\013\010]; |
123 : | blume | 262 | sym=[!%&$+/:<=>?@~|#*]|\-|\^|"\\"; |
124 : | digit=[0-9]; | ||
125 : | sharp="#"; | ||
126 : | %% | ||
127 : | |||
128 : | blume | 265 | <INITIAL>"(*" => (enterC (); YYBEGIN C; continue ()); |
129 : | <P>"(*" => (enterC (); YYBEGIN PC; continue ()); | ||
130 : | <PM>"(*" => (enterC (); YYBEGIN PMC; continue ()); | ||
131 : | <M>"(*" => (enterC (); YYBEGIN MC; continue ()); | ||
132 : | |||
133 : | <C,PC,PMC,MC>"(*" => (enterC (); continue ()); | ||
134 : | |||
135 : | <C>"*)" => (if leaveC () then YYBEGIN INITIAL else (); | ||
136 : | blume | 262 | continue ()); |
137 : | blume | 265 | <PC>"*)" => (if leaveC () then YYBEGIN P else (); |
138 : | continue ()); | ||
139 : | <PMC>"*)" => (if leaveC () then YYBEGIN PM else (); | ||
140 : | continue ()); | ||
141 : | <MC>"*)" => (if leaveC () then YYBEGIN M else (); | ||
142 : | continue ()); | ||
143 : | <C,PC,PMC,MC>{eol} => (newline yypos; continue ()); | ||
144 : | <C,PC,PMC,MC>. => (continue ()); | ||
145 : | blume | 262 | |
146 : | blume | 268 | <INITIAL,P,PM,M>"*)" => (error (yypos, yypos+2) |
147 : | "unmatched comment delimiter"; | ||
148 : | blume | 265 | continue ()); |
149 : | blume | 262 | |
150 : | blume | 294 | <INITIAL>"\"" => (YYBEGIN S; newS yypos; continue ()); |
151 : | blume | 262 | |
152 : | blume | 265 | <S>"\\a" => (addS #"\a"; continue ()); |
153 : | <S>"\\b" => (addS #"\b"; continue ()); | ||
154 : | <S>"\\f" => (addS #"\f"; continue ()); | ||
155 : | <S>"\\n" => (addS #"\n"; continue ()); | ||
156 : | <S>"\\r" => (addS #"\r"; continue ()); | ||
157 : | <S>"\\t" => (addS #"\t"; continue ()); | ||
158 : | <S>"\\v" => (addS #"\v"; continue ()); | ||
159 : | blume | 262 | |
160 : | blume | 265 | <S>"\\^"@ => (addS (chr 0); continue ()); |
161 : | <S>"\\^"[a-z] => (addSC (yytext, ord #"a"); continue ()); | ||
162 : | <S>"\\^"[A-Z] => (addSC (yytext, ord #"A"); continue ()); | ||
163 : | <S>"\\^[" => (addS (chr 27); continue ()); | ||
164 : | <S>"\\^\\" => (addS (chr 28); continue ()); | ||
165 : | <S>"\\^]" => (addS (chr 29); continue ()); | ||
166 : | <S>"\\^^" => (addS (chr 30); continue ()); | ||
167 : | <S>"\\^_" => (addS (chr 31); continue ()); | ||
168 : | blume | 262 | |
169 : | blume | 265 | <S>"\\"[0-9][0-9][0-9] => (addSN (yytext, yypos); continue ()); |
170 : | blume | 262 | |
171 : | blume | 265 | <S>"\\\"" => (addS #"\""; continue ()); |
172 : | <S>"\\\\" => (addS #"\\"; continue ()); | ||
173 : | |||
174 : | <S>"\\"{eol} => (YYBEGIN SS; newline (yypos + 1); continue ()); | ||
175 : | <S>"\\"{ws}+ => (YYBEGIN SS; continue ()); | ||
176 : | |||
177 : | blume | 268 | <S>"\\". => (error (yypos, yypos+2) |
178 : | blume | 262 | ("illegal escape character in string " ^ yytext); |
179 : | continue ()); | ||
180 : | |||
181 : | blume | 266 | <S>"\"" => (YYBEGIN INITIAL; getS (yypos, Tokens.FILE_NATIVE)); |
182 : | blume | 265 | <S>{eol} => (newline yypos; |
183 : | blume | 268 | error (yypos, yypos + size yytext) |
184 : | "illegal linebreak in string"; | ||
185 : | blume | 262 | continue ()); |
186 : | |||
187 : | blume | 265 | <S>. => (addS (String.sub (yytext, 0)); continue ()); |
188 : | |||
189 : | <SS>{eol} => (newline yypos; continue ()); | ||
190 : | <SS>{ws}+ => (continue ()); | ||
191 : | <SS>"\\" => (YYBEGIN S; continue ()); | ||
192 : | blume | 268 | <SS>. => (error (yypos, yypos+1) |
193 : | blume | 262 | ("illegal character in stringskip " ^ yytext); |
194 : | continue ()); | ||
195 : | |||
196 : | blume | 265 | <INITIAL,P>"(" => (Tokens.LPAREN (yypos, yypos + 1)); |
197 : | <INITIAL,P>")" => (Tokens.RPAREN (yypos, yypos + 1)); | ||
198 : | blume | 262 | <INITIAL>":" => (Tokens.COLON (yypos, yypos + 1)); |
199 : | blume | 265 | <P>"+" => (Tokens.PLUS (yypos, yypos + 1)); |
200 : | <P>"-" => (Tokens.MINUS (yypos, yypos + 1)); | ||
201 : | <P>"*" => (Tokens.TIMES (yypos, yypos + 1)); | ||
202 : | <P>"<>" => (Tokens.NE (yypos, yypos + 2)); | ||
203 : | <P>"<=" => (Tokens.LE (yypos, yypos + 2)); | ||
204 : | <P>"<" => (Tokens.LT (yypos, yypos + 1)); | ||
205 : | <P>">=" => (Tokens.GE (yypos, yypos + 2)); | ||
206 : | <P>">" => (Tokens.GT (yypos, yypos + 1)); | ||
207 : | <P>"=" => (Tokens.EQ (yypos, yypos + 1)); | ||
208 : | <P>"~" => (Tokens.TILDE (yypos, yypos + 1)); | ||
209 : | blume | 262 | |
210 : | blume | 265 | <P>{digit}+ => (Tokens.NUMBER |
211 : | blume | 262 | (valOf (Int.fromString yytext) |
212 : | handle _ => | ||
213 : | blume | 268 | (error (yypos, yypos + size yytext) |
214 : | "number too large"; | ||
215 : | 0), | ||
216 : | blume | 262 | yypos, yypos + size yytext)); |
217 : | |||
218 : | blume | 274 | <P>{id} => (idToken (yytext, yypos, pp_ids, Tokens.CM_ID, |
219 : | fn () => YYBEGIN PM)); | ||
220 : | blume | 265 | |
221 : | <M>({id}|{sym}+) => (YYBEGIN INITIAL; | ||
222 : | Tokens.ML_ID (yytext, yypos, yypos + size yytext)); | ||
223 : | <PM>({id}|{sym}+) => (YYBEGIN P; | ||
224 : | Tokens.ML_ID (yytext, yypos, yypos + size yytext)); | ||
225 : | |||
226 : | blume | 274 | <INITIAL,P>{eol}{sharp}{ws}*"if" => (YYBEGIN P; |
227 : | blume | 265 | newline yypos; |
228 : | Tokens.IF (yypos, yypos + size yytext)); | ||
229 : | blume | 274 | <INITIAL,P>{eol}{sharp}{ws}*"elif" => (YYBEGIN P; |
230 : | blume | 265 | newline yypos; |
231 : | Tokens.ELIF (yypos, yypos + size yytext)); | ||
232 : | blume | 274 | <INITIAL,P>{eol}{sharp}{ws}*"else" => (YYBEGIN P; |
233 : | blume | 265 | newline yypos; |
234 : | Tokens.ELSE (yypos, yypos + size yytext)); | ||
235 : | blume | 274 | <INITIAL,P>{eol}{sharp}{ws}*"endif" => (YYBEGIN P; |
236 : | blume | 265 | newline yypos; |
237 : | Tokens.ENDIF (yypos, | ||
238 : | blume | 262 | yypos + size yytext)); |
239 : | blume | 274 | <INITIAL,P>{eol}{sharp}{ws}*"error"{ws}+{neol}* => (newline yypos; |
240 : | errorTok (yytext, yypos)); | ||
241 : | blume | 265 | <INITIAL,M,PM>{eol} => (newline yypos; continue ()); |
242 : | <P>{eol} => (YYBEGIN INITIAL; newline yypos; continue ()); | ||
243 : | |||
244 : | <INITIAL,M,PM,P>{ws}+ => (continue ()); | ||
245 : | |||
246 : | blume | 268 | <M,PM>. => (error (yypos, yypos+1) |
247 : | blume | 265 | ("illegal character at start of ML symbol: " ^ |
248 : | yytext); | ||
249 : | blume | 262 | continue ()); |
250 : | blume | 265 | |
251 : | blume | 274 | <INITIAL>{cmid} => (idToken (yytext, yypos, cm_ids, |
252 : | Tokens.FILE_STANDARD, | ||
253 : | fn () => YYBEGIN M)); | ||
254 : | blume | 265 | |
255 : | |||
256 : | blume | 268 | <INITIAL>. => (error (yypos, yypos+1) |
257 : | blume | 265 | ("illegal character: " ^ yytext); |
258 : | continue ()); | ||
259 : | blume | 313 | |
260 : | {eol}{sharp}{ws}*"line"{ws}+{neol}* => (newline yypos; | ||
261 : | sync (yypos, yytext); | ||
262 : | continue ()); |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |