(* ml.lex * * Copyright 1989 by AT&T Bell Laboratories * * $Log: ml.lex,v $ * Revision 1.6 1998/02/10 21:10:20 jhr * Changes in the way that syntatic extensions (overload, lazy and quotation) * are controlled. * * Revision 1.5 1997/11/13 19:52:45 jhr * Modified lexer to recognize Windows and MacOS end-of-line disciplines. * * Revision 1.4 1997/08/26 19:25:26 jhr * Keyword clean-up: abstraction is gone; overload is _overload; lazy is _lazy. * * Revision 1.3 1997/05/22 20:17:22 jhr * Changed lexer to accept "1e1" style floating-point literals. * * Revision 1.2 1997/01/28 23:20:40 jhr * Integer and word literals are now represented by IntInf.int (instead of * as strings). * *) open ErrorMsg; structure TokTable = TokenTable(Tokens); type svalue = Tokens.svalue type pos = int type lexresult = (svalue,pos) Tokens.token type lexarg = { comLevel : int ref, sourceMap : SourceMap.sourcemap, charlist : string list ref, stringtype : bool ref, stringstart : int ref, (* start of current string or comment*) brack_stack : int ref list ref, (* for frags *) err : pos*pos -> ErrorMsg.complainer } type arg = lexarg type ('a,'b) token = ('a,'b) Tokens.token fun eof ({comLevel,err,charlist,stringstart,sourceMap, ...} : lexarg) = let val pos = Int.max(!stringstart+2, SourceMap.lastChange sourceMap) in if !comLevel>0 then err (!stringstart,pos) COMPLAIN "unclosed comment" nullErrorBody else if !charlist <> [] then err (!stringstart,pos) COMPLAIN "unclosed string, character, or quotation" nullErrorBody else (); Tokens.EOF(pos,pos) end fun addString (charlist,s:string) = charlist := s :: (!charlist) fun addChar (charlist, c:char) = addString(charlist, String.str c) fun makeString charlist = (concat(rev(!charlist)) before charlist := nil) local fun cvt radix (s, i) = #1(valOf(IntInf.scan radix Substring.getc (Substring.triml i (Substring.all s)))) in val atoi = cvt StringCvt.DEC val xtoi = cvt StringCvt.HEX end (* local *) fun mysynch (src, pos, parts) = let fun digit d = Char.ord d - Char.ord #"0" fun cvt digits = foldl (fn(d, n) => 10*n + digit d) 0 (explode digits) val r = SourceMap.resynch src in case parts of [col, line] => r (pos, {fileName=NONE, line=cvt line, column=SOME(cvt col)}) | [file, col, line] => r (pos, {fileName=SOME file, line=cvt line, column=SOME(cvt col)}) | _ => impossible "text in (*#line...*)" end fun has_quote s = let fun loop i = ((String.sub(s,i) = #"`") orelse loop (i+1)) handle _ => false in loop 0 end fun inc (ri as ref i) = (ri := i+1) fun dec (ri as ref i) = (ri := i-1) %% %reject %s A S F Q AQ L LL LLC LLCQ; %header (functor MLLexFun(structure Tokens : ML_TOKENS)); %arg ({ comLevel, sourceMap, err, charlist, stringstart, stringtype, brack_stack}); idchars=[A-Za-z'_0-9]; id=[A-Za-z]{idchars}*; ws=("\012"|[\t\ ])*; nrws=("\012"|[\t\ ])+; eol=("\013\010"|"\010"|"\013"); some_sym=[!%&$+/:<=>?@~|#*]|\-|\^; sym={some_sym}|"\\"; quote="`"; full_sym={sym}|{quote}; num=[0-9]+; frac="."{num}; exp=[eE](~?){num}; real=(~?)(({num}{frac}?{exp})|({num}{frac}{exp}?)); hexnum=[0-9a-fA-F]+; %% {ws} => (continue()); {eol} => (SourceMap.newline sourceMap yypos; continue()); "_" => (Tokens.WILD(yypos,yypos+1)); "," => (Tokens.COMMA(yypos,yypos+1)); "{" => (Tokens.LBRACE(yypos,yypos+1)); "}" => (Tokens.RBRACE(yypos,yypos+1)); "[" => (Tokens.LBRACKET(yypos,yypos+1)); "#[" => (Tokens.VECTORSTART(yypos,yypos+1)); "]" => (Tokens.RBRACKET(yypos,yypos+1)); ";" => (Tokens.SEMICOLON(yypos,yypos+1)); "(" => (if (null(!brack_stack)) then () else inc (hd (!brack_stack)); Tokens.LPAREN(yypos,yypos+1)); ")" => (if (null(!brack_stack)) then () else if (!(hd (!brack_stack)) = 1) then ( brack_stack := tl (!brack_stack); charlist := []; YYBEGIN Q) else dec (hd (!brack_stack)); Tokens.RPAREN(yypos,yypos+1)); "." => (Tokens.DOT(yypos,yypos+1)); "..." => (Tokens.DOTDOTDOT(yypos,yypos+3)); "'"("'"?)("_"|{num})?{id} => (TokTable.checkTyvar(yytext,yypos)); {id} => (TokTable.checkId(yytext, yypos)); {full_sym}+ => (if !Control.quotation then if (has_quote yytext) then REJECT() else TokTable.checkSymId(yytext,yypos) else TokTable.checkSymId(yytext,yypos)); {sym}+ => (TokTable.checkSymId(yytext,yypos)); {quote} => (if !Control.quotation then (YYBEGIN Q; charlist := []; Tokens.BEGINQ(yypos,yypos+1)) else (err(yypos, yypos+1) COMPLAIN "quotation implementation error" nullErrorBody; Tokens.BEGINQ(yypos,yypos+1))); {real} => (Tokens.REAL(yytext,yypos,yypos+size yytext)); [1-9][0-9]* => (Tokens.INT(atoi(yytext, 0),yypos,yypos+size yytext)); {num} => (Tokens.INT0(atoi(yytext, 0),yypos,yypos+size yytext)); ~{num} => (Tokens.INT0(atoi(yytext, 0),yypos,yypos+size yytext)); "0x"{hexnum} => (Tokens.INT0(xtoi(yytext, 2),yypos,yypos+size yytext)); "~0x"{hexnum} => (Tokens.INT0(IntInf.~(xtoi(yytext, 3)),yypos,yypos+size yytext)); "0w"{num} => (Tokens.WORD(atoi(yytext, 2),yypos,yypos+size yytext)); "0wx"{hexnum} => (Tokens.WORD(xtoi(yytext, 3),yypos,yypos+size yytext)); \" => (charlist := [""]; stringstart := yypos; stringtype := true; YYBEGIN S; continue()); \#\" => (charlist := [""]; stringstart := yypos; stringtype := false; YYBEGIN S; continue()); "(*#line"{nrws} => (YYBEGIN L; stringstart := yypos; comLevel := 1; continue()); "(*" => (YYBEGIN A; stringstart := yypos; comLevel := 1; continue()); "*)" => (err (yypos,yypos+1) COMPLAIN "unmatched close comment" nullErrorBody; continue()); \h => (err (yypos,yypos) COMPLAIN "non-Ascii character" nullErrorBody; continue()); . => (err (yypos,yypos) COMPLAIN "illegal token" nullErrorBody; continue()); [0-9]+ => (YYBEGIN LL; charlist := [yytext]; continue()); \. => ((* cheat: take n > 0 dots *) continue()); [0-9]+ => (YYBEGIN LLC; addString(charlist, yytext); continue()); 0* => (YYBEGIN LLC; addString(charlist, "1"); continue() (* note hack, since ml-lex chokes on the empty string for 0* *)); "*)" => (YYBEGIN INITIAL; mysynch(sourceMap, yypos+2, !charlist); comLevel := 0; charlist := []; continue()); {ws}\" => (YYBEGIN LLCQ; continue()); [^\"]* => (addString(charlist, yytext); continue()); \""*)" => (YYBEGIN INITIAL; mysynch(sourceMap, yypos+3, !charlist); comLevel := 0; charlist := []; continue()); "*)" => (err (!stringstart, yypos+1) WARN "ill-formed (*#line...*) taken as comment" nullErrorBody; YYBEGIN INITIAL; comLevel := 0; charlist := []; continue()); . => (err (!stringstart, yypos+1) WARN "ill-formed (*#line...*) taken as comment" nullErrorBody; YYBEGIN A; continue()); "(*" => (inc comLevel; continue()); {eol} => (SourceMap.newline sourceMap yypos; continue()); "*)" => (dec comLevel; if !comLevel=0 then YYBEGIN INITIAL else (); continue()); . => (continue()); \" => (let val s = makeString charlist val s = if size s <> 1 andalso not(!stringtype) then (err(!stringstart,yypos) COMPLAIN "character constant not length 1" nullErrorBody; substring(s^"x",0,1)) else s val t = (s,!stringstart,yypos+1) in YYBEGIN INITIAL; if !stringtype then Tokens.STRING t else Tokens.CHAR t end); {eol} => (err (!stringstart,yypos) COMPLAIN "unclosed string" nullErrorBody; SourceMap.newline sourceMap yypos; YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos)); \\{eol} => (SourceMap.newline sourceMap (yypos+1); YYBEGIN F; continue()); \\{ws} => (YYBEGIN F; continue()); \\a => (addString(charlist, "\007"); continue()); \\b => (addString(charlist, "\008"); continue()); \\f => (addString(charlist, "\012"); continue()); \\n => (addString(charlist, "\010"); continue()); \\r => (addString(charlist, "\013"); continue()); \\t => (addString(charlist, "\009"); continue()); \\v => (addString(charlist, "\011"); continue()); \\\\ => (addString(charlist, "\\"); continue()); \\\" => (addString(charlist, "\""); continue()); \\\^[@-_] => (addChar(charlist, Char.chr(Char.ord(String.sub(yytext,2))-Char.ord #"@")); continue()); \\\^. => (err(yypos,yypos+2) COMPLAIN "illegal control escape; must be one of \ \@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_" nullErrorBody; continue()); \\[0-9]{3} => (let val x = Char.ord(String.sub(yytext,1))*100 +Char.ord(String.sub(yytext,2))*10 +Char.ord(String.sub(yytext,3)) -((Char.ord #"0")*111) in (if x>255 then err (yypos,yypos+4) COMPLAIN "illegal ascii escape" nullErrorBody else addChar(charlist, Char.chr x); continue()) end); \\ => (err (yypos,yypos+1) COMPLAIN "illegal string escape" nullErrorBody; continue()); [\000-\031] => (err (yypos,yypos+1) COMPLAIN "illegal non-printing character in string" nullErrorBody; continue()); ({idchars}|{some_sym}|\[|\]|\(|\)|{quote}|[,.;^{}])+|. => (addString(charlist,yytext); continue()); {eol} => (SourceMap.newline sourceMap yypos; continue()); {ws} => (continue()); \\ => (YYBEGIN S; stringstart := yypos; continue()); . => (err (!stringstart,yypos) COMPLAIN "unclosed string" nullErrorBody; YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos+1)); "^`" => (addString(charlist, "`"); continue()); "^^" => (addString(charlist, "^"); continue()); "^" => (YYBEGIN AQ; let val x = makeString charlist in Tokens.OBJL(x,yypos,yypos+(size x)) end); "`" => ((* a closing quote *) YYBEGIN INITIAL; let val x = makeString charlist in Tokens.ENDQ(x,yypos,yypos+(size x)) end); {eol} => (SourceMap.newline sourceMap yypos; addString(charlist,"\n"); continue()); . => (addString(charlist,yytext); continue()); {eol} => (SourceMap.newline sourceMap yypos; continue()); {ws} => (continue()); {id} => (YYBEGIN Q; let val hash = StrgHash.hashString yytext in Tokens.AQID(FastSymbol.rawSymbol(hash,yytext), yypos,yypos+(size yytext)) end); {sym}+ => (YYBEGIN Q; let val hash = StrgHash.hashString yytext in Tokens.AQID(FastSymbol.rawSymbol(hash,yytext), yypos,yypos+(size yytext)) end); "(" => (YYBEGIN INITIAL; brack_stack := ((ref 1)::(!brack_stack)); Tokens.LPAREN(yypos,yypos+1)); . => (err (yypos,yypos+1) COMPLAIN ("ml lexer: bad character after antiquote "^yytext) nullErrorBody; Tokens.AQID(FastSymbol.rawSymbol(0,""),yypos,yypos));
Click to toggle
does not end with </html> tag
does not end with </body> tag
The output has ended thus: t) nullErrorBody; Tokens.AQID(FastSymbol.rawSymbol(0,""),yypos,yypos));