SCM Repository
View of /sml/branches/SMLNJ/src/compiler/Parse/lex/ml.lex.sml
Parent Directory
|
Revision Log
Revision 206 -
(download)
(annotate)
Fri Jan 15 15:54:19 1999 UTC (23 years, 4 months ago)
File size: 62702 byte(s)
Fri Jan 15 15:54:19 1999 UTC (23 years, 4 months ago)
File size: 62702 byte(s)
This commit was manufactured by cvs2svn to create branch 'SMLNJ'.
functor MLLexFun(structure Tokens : ML_TOKENS)= struct structure UserDeclarations = struct (* ml.lex * * Copyright 1989 by AT&T Bell Laboratories *) (* * $Log$ *) open ErrorMsg; structure TokTable = TokenTable(Tokens); type svalue = Tokens.svalue type pos = int type lexresult = (svalue,pos) Tokens.token type lexarg = { comLevel : int ref, sourceMap : SourceMap.sourcemap, charlist : string list ref, stringtype : bool ref, stringstart : int ref, (* start of current string or comment*) brack_stack : int ref list ref, (* for frags *) err : pos*pos -> ErrorMsg.complainer } type arg = lexarg type ('a,'b) token = ('a,'b) Tokens.token fun eof ({comLevel,err,charlist,stringstart,sourceMap, ...} : lexarg) = let val pos = Int.max(!stringstart+2, SourceMap.lastChange sourceMap) in if !comLevel>0 then err (!stringstart,pos) COMPLAIN "unclosed comment" nullErrorBody else if !charlist <> [] then err (!stringstart,pos) COMPLAIN "unclosed string, character, or quotation" nullErrorBody else (); Tokens.EOF(pos,pos) end fun addString (charlist,s:string) = charlist := s :: (!charlist) fun addChar (charlist, c:char) = addString(charlist, String.str c) fun makeString charlist = (concat(rev(!charlist)) before charlist := nil) local fun cvt radix (s, i) = #1(valOf(IntInf.scan radix Substring.getc (Substring.triml i (Substring.all s)))) in val atoi = cvt StringCvt.DEC val xtoi = cvt StringCvt.HEX end (* local *) fun mysynch (src, pos, parts) = let fun digit d = Char.ord d - Char.ord #"0" fun cvt digits = foldl (fn(d, n) => 10*n + digit d) 0 (explode digits) val r = SourceMap.resynch src in case parts of [col, line] => r (pos, {fileName=NONE, line=cvt line, column=SOME(cvt col)}) | [file, col, line] => r (pos, {fileName=SOME file, line=cvt line, column=SOME(cvt col)}) | _ => impossible "text in (*#line...*)" end fun has_quote s = let fun loop i = ((String.sub(s,i) = #"`") orelse loop (i+1)) handle _ => false in loop 0 end fun inc (ri as ref i) = (ri := i+1) fun dec (ri as ref i) = (ri := i-1) end (* end of user routines *) exception LexError (* raised if illegal leaf action tried *) structure Internal = struct datatype yyfinstate = N of int type statedata = {fin : yyfinstate list, trans: string} (* transition & final state table *) val tab = let val sfun f x = x val s = map f (rev (tl (rev s))) exception LexHackingError fun look ((j,x)::r, i) = if i = j then x else look(r, i) | look ([], i) = raise LexHackingError fun g {fin=x, trans=i} = {fin=x, trans=look(s,i)} in Vector.fromList(map g [{fin = [], trans = 0}, {fin = [(N 2)], trans = 1}, {fin = [(N 2)], trans = 1}, {fin = [], trans = 3}, {fin = [], trans = 3}, {fin = [], trans = 5}, {fin = [], trans = 5}, {fin = [(N 278)], trans = 7}, {fin = [(N 278)], trans = 7}, {fin = [], trans = 9}, {fin = [], trans = 9}, {fin = [(N 307)], trans = 11}, {fin = [(N 307)], trans = 11}, {fin = [], trans = 13}, {fin = [], trans = 13}, {fin = [(N 156)], trans = 15}, {fin = [(N 156)], trans = 15}, {fin = [], trans = 17}, {fin = [], trans = 17}, {fin = [(N 165)], trans = 19}, {fin = [(N 165)], trans = 19}, {fin = [(N 144),(N 146)], trans = 0}, {fin = [(N 146)], trans = 0}, {fin = [(N 56),(N 65),(N 146)], trans = 23}, {fin = [(N 56),(N 65)], trans = 24}, {fin = [(N 56)], trans = 25}, {fin = [(N 97)], trans = 26}, {fin = [], trans = 27}, {fin = [], trans = 28}, {fin = [(N 87)], trans = 28}, {fin = [], trans = 30}, {fin = [(N 87)], trans = 31}, {fin = [], trans = 32}, {fin = [], trans = 33}, {fin = [(N 87)], trans = 33}, {fin = [(N 97)], trans = 35}, {fin = [], trans = 36}, {fin = [(N 108)], trans = 36}, {fin = [(N 15),(N 146)], trans = 0}, {fin = [(N 56),(N 65),(N 146)], trans = 24}, {fin = [(N 13),(N 146)], trans = 0}, {fin = [(N 45),(N 146)], trans = 41}, {fin = [(N 45)], trans = 41}, {fin = [(N 56),(N 67),(N 146)], trans = 25}, {fin = [(N 9),(N 146)], trans = 0}, {fin = [(N 22),(N 146)], trans = 0}, {fin = [(N 17),(N 146)], trans = 0}, {fin = [(N 24),(N 146)], trans = 0}, {fin = [(N 90),(N 93),(N 146)], trans = 48}, {fin = [(N 90),(N 93)], trans = 48}, {fin = [(N 93),(N 146)], trans = 50}, {fin = [], trans = 51}, {fin = [(N 102)], trans = 51}, {fin = [], trans = 53}, {fin = [], trans = 54}, {fin = [(N 119)], trans = 54}, {fin = [(N 113)], trans = 56}, {fin = [(N 93)], trans = 57}, {fin = [(N 30),(N 146)], trans = 58}, {fin = [], trans = 59}, {fin = [(N 34)], trans = 0}, {fin = [(N 11),(N 146)], trans = 0}, {fin = [(N 56),(N 65),(N 146)], trans = 62}, {fin = [(N 142)], trans = 0}, {fin = [(N 28),(N 146)], trans = 0}, {fin = [(N 26),(N 146)], trans = 65}, {fin = [(N 139)], trans = 66}, {fin = [], trans = 67}, {fin = [], trans = 68}, {fin = [], trans = 69}, {fin = [], trans = 70}, {fin = [], trans = 71}, {fin = [(N 136)], trans = 71}, {fin = [(N 146)], trans = 73}, {fin = [(N 42)], trans = 74}, {fin = [], trans = 75}, {fin = [], trans = 76}, {fin = [], trans = 77}, {fin = [(N 56),(N 65),(N 146)], trans = 78}, {fin = [(N 20)], trans = 0}, {fin = [(N 124)], trans = 0}, {fin = [(N 121),(N 146)], trans = 0}, {fin = [(N 2),(N 146)], trans = 82}, {fin = [(N 2)], trans = 82}, {fin = [(N 7),(N 146)], trans = 84}, {fin = [(N 7)], trans = 0}, {fin = [(N 187)], trans = 0}, {fin = [(N 187)], trans = 87}, {fin = [(N 185)], trans = 0}, {fin = [(N 187)], trans = 89}, {fin = [(N 177)], trans = 0}, {fin = [(N 182),(N 187)], trans = 91}, {fin = [(N 182)], trans = 0}, {fin = [(N 270)], trans = 0}, {fin = [(N 270)], trans = 94}, {fin = [(N 204),(N 246),(N 270)], trans = 95}, {fin = [(N 225)], trans = 0}, {fin = [(N 222)], trans = 0}, {fin = [(N 219)], trans = 0}, {fin = [(N 216)], trans = 0}, {fin = [(N 213)], trans = 0}, {fin = [(N 210)], trans = 0}, {fin = [(N 207)], trans = 0}, {fin = [], trans = 103}, {fin = [(N 239)], trans = 0}, {fin = [(N 235),(N 239)], trans = 0}, {fin = [(N 228)], trans = 0}, {fin = [], trans = 107}, {fin = [], trans = 108}, {fin = [(N 244)], trans = 0}, {fin = [(N 231)], trans = 0}, {fin = [(N 204)], trans = 111}, {fin = [(N 200)], trans = 112}, {fin = [(N 200)], trans = 0}, {fin = [(N 189),(N 270)], trans = 0}, {fin = [(N 248),(N 270)], trans = 0}, {fin = [(N 194),(N 248),(N 270)], trans = 116}, {fin = [(N 194)], trans = 0}, {fin = [(N 194),(N 248)], trans = 0}, {fin = [(N 282)], trans = 0}, {fin = [(N 280),(N 282)], trans = 0}, {fin = [(N 278),(N 282)], trans = 121}, {fin = [(N 278)], trans = 121}, {fin = [(N 275),(N 282)], trans = 123}, {fin = [(N 275)], trans = 0}, {fin = [(N 299)], trans = 0}, {fin = [(N 292),(N 299)], trans = 0}, {fin = [(N 290),(N 299)], trans = 127}, {fin = [(N 285)], trans = 0}, {fin = [(N 288)], trans = 0}, {fin = [(N 297),(N 299)], trans = 130}, {fin = [(N 297)], trans = 0}, {fin = [(N 323)], trans = 0}, {fin = [(N 319),(N 323)], trans = 133}, {fin = [(N 319)], trans = 133}, {fin = [(N 310),(N 323)], trans = 135}, {fin = [(N 310)], trans = 135}, {fin = [(N 321),(N 323)], trans = 0}, {fin = [(N 307),(N 323)], trans = 138}, {fin = [(N 307)], trans = 138}, {fin = [(N 304),(N 323)], trans = 140}, {fin = [(N 304)], trans = 0}, {fin = [(N 174)], trans = 0}, {fin = [(N 149),(N 174)], trans = 143}, {fin = [(N 149)], trans = 143}, {fin = [(N 174)], trans = 145}, {fin = [(N 172)], trans = 0}, {fin = [(N 154)], trans = 147}, {fin = [(N 154),(N 156)], trans = 148}, {fin = [(N 151)], trans = 0}, {fin = [(N 174)], trans = 150}, {fin = [(N 159),(N 172)], trans = 0}, {fin = [(N 163),(N 174)], trans = 0}, {fin = [(N 174)], trans = 153}, {fin = [(N 163)], trans = 0}, {fin = [], trans = 153}, {fin = [(N 165),(N 174)], trans = 156}, {fin = [(N 165)], trans = 156}, {fin = [(N 165),(N 174)], trans = 158}, {fin = [(N 165),(N 172)], trans = 156}, {fin = [(N 174)], trans = 160}, {fin = [], trans = 161}, {fin = [(N 169)], trans = 0}]) end structure StartStates = struct datatype yystartstate = STARTSTATE of int (* start state definitions *) val A = STARTSTATE 3; val AQ = STARTSTATE 11; val F = STARTSTATE 7; val INITIAL = STARTSTATE 1; val L = STARTSTATE 13; val LL = STARTSTATE 15; val LLC = STARTSTATE 17; val LLCQ = STARTSTATE 19; val Q = STARTSTATE 9; val S = STARTSTATE 5; end type result = UserDeclarations.lexresult exception LexerError (* raised if illegal leaf action tried *) end fun makeLexer yyinput = let val yygone0=1 val yyb = ref "\n" (* buffer *) val yybl = ref 1 (*buffer length *) val yybufpos = ref 1 (* location of next character to use *) val yygone = ref yygone0 (* position in file of beginning of buffer *) val yydone = ref false (* eof found yet? *) val yybegin = ref 1 (*Current 'start state' for lexer *) val YYBEGIN = fn (Internal.StartStates.STARTSTATE x) => yybegin := x fun lex (yyarg as ({ comLevel, sourceMap, err, charlist, stringstart, stringtype, brack_stack})) = let fun continue() : Internal.result = let fun scan (s,AcceptingLeaves : Internal.yyfinstate list list,l,i0) = let fun action (i,nil) = raise LexError | action (i,nil::l) = action (i-1,l) | action (i,(node::acts)::l) = case node of Internal.N yyk => (let fun yymktext() = substring(!yyb,i0,i-i0) val yypos = i0+ !yygone fun REJECT() = action(i,acts::l) open UserDeclarations Internal.StartStates in (yybufpos := i; case yyk of (* Application actions *) 102 => let val yytext=yymktext() in Tokens.INT0(xtoi(yytext, 2),yypos,yypos+size yytext) end | 108 => let val yytext=yymktext() in Tokens.INT0(IntInf.~(xtoi(yytext, 3)),yypos,yypos+size yytext) end | 11 => (Tokens.COMMA(yypos,yypos+1)) | 113 => let val yytext=yymktext() in Tokens.WORD(atoi(yytext, 2),yypos,yypos+size yytext) end | 119 => let val yytext=yymktext() in Tokens.WORD(xtoi(yytext, 3),yypos,yypos+size yytext) end | 121 => (charlist := [""]; stringstart := yypos; stringtype := true; YYBEGIN S; continue()) | 124 => (charlist := [""]; stringstart := yypos; stringtype := false; YYBEGIN S; continue()) | 13 => (Tokens.LBRACE(yypos,yypos+1)) | 136 => (YYBEGIN L; stringstart := yypos; comLevel := 1; continue()) | 139 => (YYBEGIN A; stringstart := yypos; comLevel := 1; continue()) | 142 => (err (yypos,yypos+1) COMPLAIN "unmatched close comment" nullErrorBody; continue()) | 144 => (err (yypos,yypos) COMPLAIN "non-Ascii character" nullErrorBody; continue()) | 146 => (err (yypos,yypos) COMPLAIN "illegal token" nullErrorBody; continue()) | 149 => let val yytext=yymktext() in YYBEGIN LL; charlist := [yytext]; continue() end | 15 => (Tokens.RBRACE(yypos,yypos+1)) | 151 => ((* cheat: take n > 0 dots *) continue()) | 154 => let val yytext=yymktext() in YYBEGIN LLC; addString(charlist, yytext); continue() end | 156 => (YYBEGIN LLC; addString(charlist, "1"); continue() (* note hack, since ml-lex chokes on the empty string for 0* *)) | 159 => (YYBEGIN INITIAL; mysynch(sourceMap, yypos+2, !charlist); comLevel := 0; charlist := []; continue()) | 163 => (YYBEGIN LLCQ; continue()) | 165 => let val yytext=yymktext() in addString(charlist, yytext); continue() end | 169 => (YYBEGIN INITIAL; mysynch(sourceMap, yypos+3, !charlist); comLevel := 0; charlist := []; continue()) | 17 => (Tokens.LBRACKET(yypos,yypos+1)) | 172 => (err (!stringstart, yypos+1) WARN "ill-formed (*#line...*) taken as comment" nullErrorBody; YYBEGIN INITIAL; comLevel := 0; charlist := []; continue()) | 174 => (err (!stringstart, yypos+1) WARN "ill-formed (*#line...*) taken as comment" nullErrorBody; YYBEGIN A; continue()) | 177 => (inc comLevel; continue()) | 182 => (SourceMap.newline sourceMap yypos; continue()) | 185 => (dec comLevel; if !comLevel=0 then YYBEGIN INITIAL else (); continue()) | 187 => (continue()) | 189 => (let val s = makeString charlist val s = if size s <> 1 andalso not(!stringtype) then (err(!stringstart,yypos) COMPLAIN "character constant not length 1" nullErrorBody; substring(s^"x",0,1)) else s val t = (s,!stringstart,yypos+1) in YYBEGIN INITIAL; if !stringtype then Tokens.STRING t else Tokens.CHAR t end) | 194 => (err (!stringstart,yypos) COMPLAIN "unclosed string" nullErrorBody; SourceMap.newline sourceMap yypos; YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos)) | 2 => (continue()) | 20 => (Tokens.VECTORSTART(yypos,yypos+1)) | 200 => (SourceMap.newline sourceMap (yypos+1); YYBEGIN F; continue()) | 204 => (YYBEGIN F; continue()) | 207 => (addString(charlist, "\007"); continue()) | 210 => (addString(charlist, "\008"); continue()) | 213 => (addString(charlist, "\012"); continue()) | 216 => (addString(charlist, "\010"); continue()) | 219 => (addString(charlist, "\013"); continue()) | 22 => (Tokens.RBRACKET(yypos,yypos+1)) | 222 => (addString(charlist, "\009"); continue()) | 225 => (addString(charlist, "\011"); continue()) | 228 => (addString(charlist, "\\"); continue()) | 231 => (addString(charlist, "\""); continue()) | 235 => let val yytext=yymktext() in addChar(charlist, Char.chr(Char.ord(String.sub(yytext,2))-Char.ord #"@")); continue() end | 239 => (err(yypos,yypos+2) COMPLAIN "illegal control escape; must be one of \ \@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_" nullErrorBody; continue()) | 24 => (Tokens.SEMICOLON(yypos,yypos+1)) | 244 => let val yytext=yymktext() in let val x = Char.ord(String.sub(yytext,1))*100 +Char.ord(String.sub(yytext,2))*10 +Char.ord(String.sub(yytext,3)) -((Char.ord #"0")*111) in (if x>255 then err (yypos,yypos+4) COMPLAIN "illegal ascii escape" nullErrorBody else addChar(charlist, Char.chr x); continue()) end end | 246 => (err (yypos,yypos+1) COMPLAIN "illegal string escape" nullErrorBody; continue()) | 248 => (err (yypos,yypos+1) COMPLAIN "illegal non-printing character in string" nullErrorBody; continue()) | 26 => (if (null(!brack_stack)) then () else inc (hd (!brack_stack)); Tokens.LPAREN(yypos,yypos+1)) | 270 => let val yytext=yymktext() in addString(charlist,yytext); continue() end | 275 => (SourceMap.newline sourceMap yypos; continue()) | 278 => (continue()) | 28 => (if (null(!brack_stack)) then () else if (!(hd (!brack_stack)) = 1) then ( brack_stack := tl (!brack_stack); charlist := []; YYBEGIN Q) else dec (hd (!brack_stack)); Tokens.RPAREN(yypos,yypos+1)) | 280 => (YYBEGIN S; stringstart := yypos; continue()) | 282 => (err (!stringstart,yypos) COMPLAIN "unclosed string" nullErrorBody; YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos+1)) | 285 => (addString(charlist, "`"); continue()) | 288 => (addString(charlist, "^"); continue()) | 290 => (YYBEGIN AQ; let val x = makeString charlist in Tokens.OBJL(x,yypos,yypos+(size x)) end) | 292 => ((* a closing quote *) YYBEGIN INITIAL; let val x = makeString charlist in Tokens.ENDQ(x,yypos,yypos+(size x)) end) | 297 => (SourceMap.newline sourceMap yypos; addString(charlist,"\n"); continue()) | 299 => let val yytext=yymktext() in addString(charlist,yytext); continue() end | 30 => (Tokens.DOT(yypos,yypos+1)) | 304 => (SourceMap.newline sourceMap yypos; continue()) | 307 => (continue()) | 310 => let val yytext=yymktext() in YYBEGIN Q; let val hash = StrgHash.hashString yytext in Tokens.AQID(FastSymbol.rawSymbol(hash,yytext), yypos,yypos+(size yytext)) end end | 319 => let val yytext=yymktext() in YYBEGIN Q; let val hash = StrgHash.hashString yytext in Tokens.AQID(FastSymbol.rawSymbol(hash,yytext), yypos,yypos+(size yytext)) end end | 321 => (YYBEGIN INITIAL; brack_stack := ((ref 1)::(!brack_stack)); Tokens.LPAREN(yypos,yypos+1)) | 323 => let val yytext=yymktext() in err (yypos,yypos+1) COMPLAIN ("ml lexer: bad character after antiquote "^yytext) nullErrorBody; Tokens.AQID(FastSymbol.rawSymbol(0,""),yypos,yypos) end | 34 => (Tokens.DOTDOTDOT(yypos,yypos+3)) | 42 => let val yytext=yymktext() in TokTable.checkTyvar(yytext,yypos) end | 45 => let val yytext=yymktext() in TokTable.checkId(yytext, yypos) end | 56 => let val yytext=yymktext() in if !Control.quotation then if (has_quote yytext) then REJECT() else TokTable.checkSymId(yytext,yypos) else TokTable.checkSymId(yytext,yypos) end | 65 => let val yytext=yymktext() in TokTable.checkSymId(yytext,yypos) end | 67 => (if !Control.quotation then (YYBEGIN Q; charlist := []; Tokens.BEGINQ(yypos,yypos+1)) else (err(yypos, yypos+1) COMPLAIN "quotation implementation error" nullErrorBody; Tokens.BEGINQ(yypos,yypos+1))) | 7 => (SourceMap.newline sourceMap yypos; continue()) | 87 => let val yytext=yymktext() in Tokens.REAL(yytext,yypos,yypos+size yytext) end | 9 => (Tokens.WILD(yypos,yypos+1)) | 90 => let val yytext=yymktext() in Tokens.INT(atoi(yytext, 0),yypos,yypos+size yytext) end | 93 => let val yytext=yymktext() in Tokens.INT0(atoi(yytext, 0),yypos,yypos+size yytext) end | 97 => let val yytext=yymktext() in Tokens.INT0(atoi(yytext, 0),yypos,yypos+size yytext) end | _ => raise Internal.LexerError ) end ) val {fin,trans} = Unsafe.Vector.sub(Internal.tab, s) val NewAcceptingLeaves = fin::AcceptingLeaves in if l = !yybl then if trans = #trans(Vector.sub(Internal.tab,0)) then action(l,NewAcceptingLeaves ) else let val newchars= if !yydone then "" else yyinput 1024 in if (size newchars)=0 then (yydone := true; if (l=i0) then UserDeclarations.eof yyarg else action(l,NewAcceptingLeaves)) else (if i0=l then yyb := newchars else yyb := substring(!yyb,i0,l-i0)^newchars; yygone := !yygone+i0; yybl := size (!yyb); scan (s,AcceptingLeaves,l-i0,0)) end else let val NewChar = Char.ord(Unsafe.CharVector.sub(!yyb,l)) val NewChar = if NewChar<128 then NewChar else 128 val NewState = Char.ord(Unsafe.CharVector.sub(trans,NewChar)) in if NewState=0 then action(l,NewAcceptingLeaves) else scan(NewState,NewAcceptingLeaves,l+1,i0) end end (* val start= if substring(!yyb,!yybufpos-1,1)="\n" then !yybegin+1 else !yybegin *) in scan(!yybegin (* start *),nil,!yybufpos,!yybufpos) end in continue end in lex end end
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |