SCM Repository
Annotation of /sml/trunk/src/compiler/Parse/lex/foo.lex
Parent Directory
|
Revision Log
Revision 114 - (view) (download)
1 : | monnier | 66 | (* ml.lex |
2 : | * | ||
3 : | * Copyright 1989 by AT&T Bell Laboratories | ||
4 : | * | ||
5 : | monnier | 113 | * $Log: ml.lex,v $ |
6 : | monnier | 66 | * Revision 1.6 1998/02/10 21:10:20 jhr |
7 : | * Changes in the way that syntatic extensions (overload, lazy and quotation) | ||
8 : | * are controlled. | ||
9 : | * | ||
10 : | * Revision 1.5 1997/11/13 19:52:45 jhr | ||
11 : | * Modified lexer to recognize Windows and MacOS end-of-line disciplines. | ||
12 : | * | ||
13 : | * Revision 1.4 1997/08/26 19:25:26 jhr | ||
14 : | * Keyword clean-up: abstraction is gone; overload is _overload; lazy is _lazy. | ||
15 : | * | ||
16 : | * Revision 1.3 1997/05/22 20:17:22 jhr | ||
17 : | * Changed lexer to accept "1e1" style floating-point literals. | ||
18 : | * | ||
19 : | * Revision 1.2 1997/01/28 23:20:40 jhr | ||
20 : | * Integer and word literals are now represented by IntInf.int (instead of | ||
21 : | * as strings). | ||
22 : | * | ||
23 : | *) | ||
24 : | |||
25 : | open ErrorMsg; | ||
26 : | |||
27 : | structure TokTable = TokenTable(Tokens); | ||
28 : | type svalue = Tokens.svalue | ||
29 : | type pos = int | ||
30 : | type lexresult = (svalue,pos) Tokens.token | ||
31 : | type lexarg = { | ||
32 : | comLevel : int ref, | ||
33 : | sourceMap : SourceMap.sourcemap, | ||
34 : | charlist : string list ref, | ||
35 : | stringtype : bool ref, | ||
36 : | stringstart : int ref, (* start of current string or comment*) | ||
37 : | brack_stack : int ref list ref, (* for frags *) | ||
38 : | err : pos*pos -> ErrorMsg.complainer | ||
39 : | } | ||
40 : | type arg = lexarg | ||
41 : | type ('a,'b) token = ('a,'b) Tokens.token | ||
42 : | fun eof ({comLevel,err,charlist,stringstart,sourceMap, ...} : lexarg) = let | ||
43 : | val pos = Int.max(!stringstart+2, SourceMap.lastChange sourceMap) | ||
44 : | in | ||
45 : | if !comLevel>0 | ||
46 : | then err (!stringstart,pos) COMPLAIN "unclosed comment" nullErrorBody | ||
47 : | else if !charlist <> [] | ||
48 : | then err (!stringstart,pos) COMPLAIN | ||
49 : | "unclosed string, character, or quotation" nullErrorBody | ||
50 : | |||
51 : | else (); | ||
52 : | Tokens.EOF(pos,pos) | ||
53 : | end | ||
54 : | fun addString (charlist,s:string) = charlist := s :: (!charlist) | ||
55 : | fun addChar (charlist, c:char) = addString(charlist, String.str c) | ||
56 : | fun makeString charlist = (concat(rev(!charlist)) before charlist := nil) | ||
57 : | |||
58 : | local | ||
59 : | fun cvt radix (s, i) = | ||
60 : | #1(valOf(IntInf.scan radix Substring.getc (Substring.triml i (Substring.all s)))) | ||
61 : | in | ||
62 : | val atoi = cvt StringCvt.DEC | ||
63 : | val xtoi = cvt StringCvt.HEX | ||
64 : | end (* local *) | ||
65 : | |||
66 : | fun mysynch (src, pos, parts) = | ||
67 : | let fun digit d = Char.ord d - Char.ord #"0" | ||
68 : | fun cvt digits = foldl (fn(d, n) => 10*n + digit d) 0 (explode digits) | ||
69 : | val r = SourceMap.resynch src | ||
70 : | in case parts | ||
71 : | of [col, line] => | ||
72 : | r (pos, {fileName=NONE, line=cvt line, column=SOME(cvt col)}) | ||
73 : | | [file, col, line] => | ||
74 : | r (pos, {fileName=SOME file, line=cvt line, column=SOME(cvt col)}) | ||
75 : | | _ => impossible "text in (*#line...*)" | ||
76 : | end | ||
77 : | |||
78 : | fun has_quote s = let | ||
79 : | fun loop i = ((String.sub(s,i) = #"`") orelse loop (i+1)) | ||
80 : | handle _ => false | ||
81 : | in | ||
82 : | loop 0 | ||
83 : | end | ||
84 : | |||
85 : | fun inc (ri as ref i) = (ri := i+1) | ||
86 : | fun dec (ri as ref i) = (ri := i-1) | ||
87 : | %% | ||
88 : | %reject | ||
89 : | %s A S F Q AQ L LL LLC LLCQ; | ||
90 : | %header (functor MLLexFun(structure Tokens : ML_TOKENS)); | ||
91 : | %arg ({ | ||
92 : | comLevel, | ||
93 : | sourceMap, | ||
94 : | err, | ||
95 : | charlist, | ||
96 : | stringstart, | ||
97 : | stringtype, | ||
98 : | brack_stack}); | ||
99 : | idchars=[A-Za-z'_0-9]; | ||
100 : | id=[A-Za-z]{idchars}*; | ||
101 : | ws=("\012"|[\t\ ])*; | ||
102 : | nrws=("\012"|[\t\ ])+; | ||
103 : | eol=("\013\010"|"\010"|"\013"); | ||
104 : | some_sym=[!%&$+/:<=>?@~|#*]|\-|\^; | ||
105 : | sym={some_sym}|"\\"; | ||
106 : | quote="`"; | ||
107 : | full_sym={sym}|{quote}; | ||
108 : | num=[0-9]+; | ||
109 : | frac="."{num}; | ||
110 : | exp=[eE](~?){num}; | ||
111 : | real=(~?)(({num}{frac}?{exp})|({num}{frac}{exp}?)); | ||
112 : | hexnum=[0-9a-fA-F]+; | ||
113 : | %% | ||
114 : | <INITIAL>{ws} => (continue()); | ||
115 : | <INITIAL>{eol} => (SourceMap.newline sourceMap yypos; continue()); | ||
116 : | <INITIAL>"_" => (Tokens.WILD(yypos,yypos+1)); | ||
117 : | <INITIAL>"," => (Tokens.COMMA(yypos,yypos+1)); | ||
118 : | <INITIAL>"{" => (Tokens.LBRACE(yypos,yypos+1)); | ||
119 : | <INITIAL>"}" => (Tokens.RBRACE(yypos,yypos+1)); | ||
120 : | <INITIAL>"[" => (Tokens.LBRACKET(yypos,yypos+1)); | ||
121 : | <INITIAL>"#[" => (Tokens.VECTORSTART(yypos,yypos+1)); | ||
122 : | <INITIAL>"]" => (Tokens.RBRACKET(yypos,yypos+1)); | ||
123 : | <INITIAL>";" => (Tokens.SEMICOLON(yypos,yypos+1)); | ||
124 : | <INITIAL>"(" => (if (null(!brack_stack)) | ||
125 : | then () | ||
126 : | else inc (hd (!brack_stack)); | ||
127 : | Tokens.LPAREN(yypos,yypos+1)); | ||
128 : | <INITIAL>")" => (if (null(!brack_stack)) | ||
129 : | then () | ||
130 : | else if (!(hd (!brack_stack)) = 1) | ||
131 : | then ( brack_stack := tl (!brack_stack); | ||
132 : | charlist := []; | ||
133 : | YYBEGIN Q) | ||
134 : | else dec (hd (!brack_stack)); | ||
135 : | Tokens.RPAREN(yypos,yypos+1)); | ||
136 : | <INITIAL>"." => (Tokens.DOT(yypos,yypos+1)); | ||
137 : | <INITIAL>"..." => (Tokens.DOTDOTDOT(yypos,yypos+3)); | ||
138 : | <INITIAL>"'"("'"?)("_"|{num})?{id} | ||
139 : | => (TokTable.checkTyvar(yytext,yypos)); | ||
140 : | <INITIAL>{id} => (TokTable.checkId(yytext, yypos)); | ||
141 : | <INITIAL>{full_sym}+ => (if !Control.quotation | ||
142 : | then if (has_quote yytext) | ||
143 : | then REJECT() | ||
144 : | else TokTable.checkSymId(yytext,yypos) | ||
145 : | else TokTable.checkSymId(yytext,yypos)); | ||
146 : | <INITIAL>{sym}+ => (TokTable.checkSymId(yytext,yypos)); | ||
147 : | <INITIAL>{quote} => (if !Control.quotation | ||
148 : | then (YYBEGIN Q; | ||
149 : | charlist := []; | ||
150 : | Tokens.BEGINQ(yypos,yypos+1)) | ||
151 : | else (err(yypos, yypos+1) | ||
152 : | COMPLAIN "quotation implementation error" | ||
153 : | nullErrorBody; | ||
154 : | Tokens.BEGINQ(yypos,yypos+1))); | ||
155 : | <INITIAL>{real} => (Tokens.REAL(yytext,yypos,yypos+size yytext)); | ||
156 : | <INITIAL>[1-9][0-9]* => (Tokens.INT(atoi(yytext, 0),yypos,yypos+size yytext)); | ||
157 : | <INITIAL>{num} => (Tokens.INT0(atoi(yytext, 0),yypos,yypos+size yytext)); | ||
158 : | <INITIAL>~{num} => (Tokens.INT0(atoi(yytext, 0),yypos,yypos+size yytext)); | ||
159 : | <INITIAL>"0x"{hexnum} => (Tokens.INT0(xtoi(yytext, 2),yypos,yypos+size yytext)); | ||
160 : | <INITIAL>"~0x"{hexnum} => (Tokens.INT0(IntInf.~(xtoi(yytext, 3)),yypos,yypos+size yytext)); | ||
161 : | <INITIAL>"0w"{num} => (Tokens.WORD(atoi(yytext, 2),yypos,yypos+size yytext)); | ||
162 : | <INITIAL>"0wx"{hexnum} => (Tokens.WORD(xtoi(yytext, 3),yypos,yypos+size yytext)); | ||
163 : | <INITIAL>\" => (charlist := [""]; stringstart := yypos; | ||
164 : | stringtype := true; YYBEGIN S; continue()); | ||
165 : | <INITIAL>\#\" => (charlist := [""]; stringstart := yypos; | ||
166 : | stringtype := false; YYBEGIN S; continue()); | ||
167 : | <INITIAL>"(*#line"{nrws} => | ||
168 : | (YYBEGIN L; stringstart := yypos; comLevel := 1; continue()); | ||
169 : | <INITIAL>"(*" => (YYBEGIN A; stringstart := yypos; comLevel := 1; continue()); | ||
170 : | <INITIAL>"*)" => (err (yypos,yypos+1) COMPLAIN "unmatched close comment" | ||
171 : | nullErrorBody; | ||
172 : | continue()); | ||
173 : | <INITIAL>\h => (err (yypos,yypos) COMPLAIN "non-Ascii character" | ||
174 : | nullErrorBody; | ||
175 : | continue()); | ||
176 : | <INITIAL>. => (err (yypos,yypos) COMPLAIN "illegal token" nullErrorBody; | ||
177 : | continue()); | ||
178 : | <L>[0-9]+ => (YYBEGIN LL; charlist := [yytext]; continue()); | ||
179 : | <LL>\. => ((* cheat: take n > 0 dots *) continue()); | ||
180 : | <LL>[0-9]+ => (YYBEGIN LLC; addString(charlist, yytext); continue()); | ||
181 : | <LL>0* => (YYBEGIN LLC; addString(charlist, "1"); continue() | ||
182 : | (* note hack, since ml-lex chokes on the empty string for 0* *)); | ||
183 : | <LLC>"*)" => (YYBEGIN INITIAL; mysynch(sourceMap, yypos+2, !charlist); | ||
184 : | comLevel := 0; charlist := []; continue()); | ||
185 : | <LLC>{ws}\" => (YYBEGIN LLCQ; continue()); | ||
186 : | <LLCQ>[^\"]* => (addString(charlist, yytext); continue()); | ||
187 : | <LLCQ>\""*)" => (YYBEGIN INITIAL; mysynch(sourceMap, yypos+3, !charlist); | ||
188 : | comLevel := 0; charlist := []; continue()); | ||
189 : | <L,LLC,LLCQ>"*)" => (err (!stringstart, yypos+1) WARN | ||
190 : | "ill-formed (*#line...*) taken as comment" nullErrorBody; | ||
191 : | YYBEGIN INITIAL; comLevel := 0; charlist := []; continue()); | ||
192 : | <L,LLC,LLCQ>. => (err (!stringstart, yypos+1) WARN | ||
193 : | "ill-formed (*#line...*) taken as comment" nullErrorBody; | ||
194 : | YYBEGIN A; continue()); | ||
195 : | <A>"(*" => (inc comLevel; continue()); | ||
196 : | <A>{eol} => (SourceMap.newline sourceMap yypos; continue()); | ||
197 : | <A>"*)" => (dec comLevel; if !comLevel=0 then YYBEGIN INITIAL else (); continue()); | ||
198 : | <A>. => (continue()); | ||
199 : | <S>\" => (let val s = makeString charlist | ||
200 : | val s = if size s <> 1 andalso not(!stringtype) | ||
201 : | then (err(!stringstart,yypos) COMPLAIN | ||
202 : | "character constant not length 1" | ||
203 : | nullErrorBody; | ||
204 : | substring(s^"x",0,1)) | ||
205 : | else s | ||
206 : | val t = (s,!stringstart,yypos+1) | ||
207 : | in YYBEGIN INITIAL; | ||
208 : | if !stringtype then Tokens.STRING t else Tokens.CHAR t | ||
209 : | end); | ||
210 : | <S>{eol} => (err (!stringstart,yypos) COMPLAIN "unclosed string" | ||
211 : | nullErrorBody; | ||
212 : | SourceMap.newline sourceMap yypos; | ||
213 : | YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos)); | ||
214 : | <S>\\{eol} => (SourceMap.newline sourceMap (yypos+1); | ||
215 : | YYBEGIN F; continue()); | ||
216 : | <S>\\{ws} => (YYBEGIN F; continue()); | ||
217 : | <S>\\a => (addString(charlist, "\007"); continue()); | ||
218 : | <S>\\b => (addString(charlist, "\008"); continue()); | ||
219 : | <S>\\f => (addString(charlist, "\012"); continue()); | ||
220 : | <S>\\n => (addString(charlist, "\010"); continue()); | ||
221 : | <S>\\r => (addString(charlist, "\013"); continue()); | ||
222 : | <S>\\t => (addString(charlist, "\009"); continue()); | ||
223 : | <S>\\v => (addString(charlist, "\011"); continue()); | ||
224 : | <S>\\\\ => (addString(charlist, "\\"); continue()); | ||
225 : | <S>\\\" => (addString(charlist, "\""); continue()); | ||
226 : | <S>\\\^[@-_] => (addChar(charlist, | ||
227 : | Char.chr(Char.ord(String.sub(yytext,2))-Char.ord #"@")); | ||
228 : | continue()); | ||
229 : | <S>\\\^. => | ||
230 : | (err(yypos,yypos+2) COMPLAIN "illegal control escape; must be one of \ | ||
231 : | \@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_" nullErrorBody; | ||
232 : | continue()); | ||
233 : | <S>\\[0-9]{3} => | ||
234 : | (let val x = Char.ord(String.sub(yytext,1))*100 | ||
235 : | +Char.ord(String.sub(yytext,2))*10 | ||
236 : | +Char.ord(String.sub(yytext,3)) | ||
237 : | -((Char.ord #"0")*111) | ||
238 : | in (if x>255 | ||
239 : | then err (yypos,yypos+4) COMPLAIN "illegal ascii escape" nullErrorBody | ||
240 : | else addChar(charlist, Char.chr x); | ||
241 : | continue()) | ||
242 : | end); | ||
243 : | <S>\\ => (err (yypos,yypos+1) COMPLAIN "illegal string escape" | ||
244 : | nullErrorBody; | ||
245 : | continue()); | ||
246 : | |||
247 : | |||
248 : | <S>[\000-\031] => (err (yypos,yypos+1) COMPLAIN "illegal non-printing character in string" nullErrorBody; | ||
249 : | continue()); | ||
250 : | <S>({idchars}|{some_sym}|\[|\]|\(|\)|{quote}|[,.;^{}])+|. => (addString(charlist,yytext); continue()); | ||
251 : | <F>{eol} => (SourceMap.newline sourceMap yypos; continue()); | ||
252 : | <F>{ws} => (continue()); | ||
253 : | <F>\\ => (YYBEGIN S; stringstart := yypos; continue()); | ||
254 : | <F>. => (err (!stringstart,yypos) COMPLAIN "unclosed string" | ||
255 : | nullErrorBody; | ||
256 : | YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos+1)); | ||
257 : | <Q>"^`" => (addString(charlist, "`"); continue()); | ||
258 : | <Q>"^^" => (addString(charlist, "^"); continue()); | ||
259 : | <Q>"^" => (YYBEGIN AQ; | ||
260 : | let val x = makeString charlist | ||
261 : | in | ||
262 : | Tokens.OBJL(x,yypos,yypos+(size x)) | ||
263 : | end); | ||
264 : | <Q>"`" => ((* a closing quote *) | ||
265 : | YYBEGIN INITIAL; | ||
266 : | let val x = makeString charlist | ||
267 : | in | ||
268 : | Tokens.ENDQ(x,yypos,yypos+(size x)) | ||
269 : | end); | ||
270 : | <Q>{eol} => (SourceMap.newline sourceMap yypos; addString(charlist,"\n"); continue()); | ||
271 : | <Q>. => (addString(charlist,yytext); continue()); | ||
272 : | |||
273 : | <AQ>{eol} => (SourceMap.newline sourceMap yypos; continue()); | ||
274 : | <AQ>{ws} => (continue()); | ||
275 : | <AQ>{id} => (YYBEGIN Q; | ||
276 : | let val hash = StrgHash.hashString yytext | ||
277 : | in | ||
278 : | Tokens.AQID(FastSymbol.rawSymbol(hash,yytext), | ||
279 : | yypos,yypos+(size yytext)) | ||
280 : | end); | ||
281 : | <AQ>{sym}+ => (YYBEGIN Q; | ||
282 : | let val hash = StrgHash.hashString yytext | ||
283 : | in | ||
284 : | Tokens.AQID(FastSymbol.rawSymbol(hash,yytext), | ||
285 : | yypos,yypos+(size yytext)) | ||
286 : | end); | ||
287 : | <AQ>"(" => (YYBEGIN INITIAL; | ||
288 : | brack_stack := ((ref 1)::(!brack_stack)); | ||
289 : | Tokens.LPAREN(yypos,yypos+1)); | ||
290 : | <AQ>. => (err (yypos,yypos+1) COMPLAIN | ||
291 : | ("ml lexer: bad character after antiquote "^yytext) | ||
292 : | nullErrorBody; | ||
293 : | Tokens.AQID(FastSymbol.rawSymbol(0,""),yypos,yypos)); | ||
294 : |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |