(* -*- sml -*- *) type svalue = Tokens.svalue type pos = int type ('a, 'b) token = ('a, 'b) Tokens.token type lexresult = (svalue, pos) token type lexarg = { enterC: unit -> unit, leaveC: unit -> bool, newS: pos * string -> unit, addS: char -> unit, addSC: string * int -> unit, addSN: string * pos -> unit, getS: pos * (string * pos * pos -> lexresult) -> lexresult, handleEof: unit -> lexresult, newline: pos -> unit, error: pos -> string -> unit } type arg = lexarg fun eof (arg: lexarg) = (#handleEof arg ()) (* local val depth = ref 0 val curstring = ref ([]: char list) val startpos = ref 0 val instring = ref false in fun resetAll () = (depth := 0; startpos := 0; instring := false) (* comment stuff *) fun enterC () = depth := !depth + 1 fun leaveC () = let val d = !depth - 1 val _ = depth := d in d = 0 end (* string stuff *) fun newS sp = (curstring := []; startpos := sp; instring := true) fun addS c = curstring := c :: (!curstring) fun addSC (t, p, b) = addS (chr (ord (String.sub (t, 2)) - b)) fun addSN (t, p) = let val ns = substring (t, 1, 3) val n = Int.fromString ns in addS (chr (valOf n)) handle _ => ErrorMsg.error p ("illegal decimal char spec " ^ ns) end fun getS endpos = (instring := false; Tokens.STRING (implode (rev (!curstring)), !startpos, endpos + 1)) (* handling EOF *) fun eof (arg: ) = let val pos = ErrorMsg.lastLinePos () in if !depth > 0 then ErrorMsg.error pos "unexpected EOF in COMMENT" else if !instring then ErrorMsg.error pos "unexpected EOF in STRING" else (); resetAll (); Tokens.EOF(pos,pos) end end *) local val idlist = [("Alias", Tokens.ALIAS), ("Group", Tokens.GROUP), ("Library", Tokens.LIBRARY), ("is", Tokens.IS), ("structure", Tokens.STRUCTURE), ("signature", Tokens.SIGNATURE), ("functor", Tokens.FUNCTOR), ("funsig", Tokens.FUNSIG), ("defined", Tokens.DEFINED), ("div", Tokens.DIV), ("mod", Tokens.MOD), ("andalso", Tokens.ANDALSO), ("orelse", Tokens.ORELSE), ("not", Tokens.NOT)] in fun idToken (t, p) = case List.find (fn (id, _) => id = t) idlist of NONE => Tokens.FILE_STANDARD (t, p, p + size t) | SOME (_, tok) => tok (p, p + size t) end (* states: INITIAL -> C | +------> P -> PC | | | +--> PM -> PMC | +------> M -> MC | +------> S -> SS | +------> ES -> E "C" -- COMMENT "P" -- PREPROC "M" -- MLSYMBOL "S" -- STRING "SS" -- STRINGSKIP "ES" -- ERRORSTART "E" -- ERROR *) %% %s C P PC PM PMC M MC S SS E ES; %header(functor CMLexFun (structure Tokens: CM_TOKENS)); %arg ({ enterC, leaveC, newS, addS, addSC, addSN, getS, handleEof, newline, error }); idchars=[A-Za-z'_0-9]; id=[A-Za-z]{idchars}*; cmextrachars=[!%&$+/<=>?@~|#*]|\-|\^; cmidchars={idchars}|{cmextrachars}; cmid={cmextrachars}+; ws=("\012"|[\t\ ]); eol=("\013\010"|"\013"|"\010"); sym=[!%&$+/:<=>?@~|#*]|\-|\^|"\\"; digit=[0-9]; sharp="#"; %% "(*" => (enterC (); YYBEGIN C; continue ());

"(*" => (enterC (); YYBEGIN PC; continue ()); "(*" => (enterC (); YYBEGIN PMC; continue ()); "(*" => (enterC (); YYBEGIN MC; continue ()); "(*" => (enterC (); continue ()); "*)" => (if leaveC () then YYBEGIN INITIAL else (); continue ()); "*)" => (if leaveC () then YYBEGIN P else (); continue ()); "*)" => (if leaveC () then YYBEGIN PM else (); continue ()); "*)" => (if leaveC () then YYBEGIN M else (); continue ()); {eol} => (newline yypos; continue ()); . => (continue ()); "*)" => (error yypos "unmatched comment delimiter"; continue ()); "\"" => (YYBEGIN S; newS (yypos, "string"); continue ()); "\\a" => (addS #"\a"; continue ()); "\\b" => (addS #"\b"; continue ()); "\\f" => (addS #"\f"; continue ()); "\\n" => (addS #"\n"; continue ()); "\\r" => (addS #"\r"; continue ()); "\\t" => (addS #"\t"; continue ()); "\\v" => (addS #"\v"; continue ()); "\\^"@ => (addS (chr 0); continue ()); "\\^"[a-z] => (addSC (yytext, ord #"a"); continue ()); "\\^"[A-Z] => (addSC (yytext, ord #"A"); continue ()); "\\^[" => (addS (chr 27); continue ()); "\\^\\" => (addS (chr 28); continue ()); "\\^]" => (addS (chr 29); continue ()); "\\^^" => (addS (chr 30); continue ()); "\\^_" => (addS (chr 31); continue ()); "\\"[0-9][0-9][0-9] => (addSN (yytext, yypos); continue ()); "\\\"" => (addS #"\""; continue ()); "\\\\" => (addS #"\\"; continue ()); "\\"{eol} => (YYBEGIN SS; newline (yypos + 1); continue ()); "\\"{ws}+ => (YYBEGIN SS; continue ()); "\\". => (error yypos ("illegal escape character in string " ^ yytext); continue ()); "\"" => (YYBEGIN INITIAL; getS (yypos, Tokens.FILE_NATIVE)); {eol} => (newline yypos; error yypos "illegal linebreak in string"; continue ()); . => (addS (String.sub (yytext, 0)); continue ()); {eol} => (newline yypos; continue ()); {ws}+ => (continue ()); "\\" => (YYBEGIN S; continue ()); . => (error yypos ("illegal character in stringskip " ^ yytext); continue ()); "(" => (Tokens.LPAREN (yypos, yypos + 1)); ")" => (Tokens.RPAREN (yypos, yypos + 1)); ":" => (Tokens.COLON (yypos, yypos + 1));

"+" => (Tokens.PLUS (yypos, yypos + 1));

"-" => (Tokens.MINUS (yypos, yypos + 1));

"*" => (Tokens.TIMES (yypos, yypos + 1));

"<>" => (Tokens.NE (yypos, yypos + 2));

"<=" => (Tokens.LE (yypos, yypos + 2));

"<" => (Tokens.LT (yypos, yypos + 1));

">=" => (Tokens.GE (yypos, yypos + 2));

">" => (Tokens.GT (yypos, yypos + 1));

"=" => (Tokens.EQ (yypos, yypos + 1));

"~" => (Tokens.TILDE (yypos, yypos + 1));

{digit}+ => (Tokens.NUMBER (valOf (Int.fromString yytext) handle _ => (error yypos "number too large"; 0), yypos, yypos + size yytext));

{id} => (Tokens.CM_ID (yytext, yypos, yypos + size yytext)); ({id}|{sym}+) => (YYBEGIN INITIAL; Tokens.ML_ID (yytext, yypos, yypos + size yytext)); ({id}|{sym}+) => (YYBEGIN P; Tokens.ML_ID (yytext, yypos, yypos + size yytext)); {eol}{sharp}{ws}*"if" => (YYBEGIN P; newline yypos; Tokens.IF (yypos, yypos + size yytext)); {eol}{sharp}{ws}*"then" => (YYBEGIN P; newline yypos; Tokens.THEN (yypos, yypos + size yytext)); {eol}{sharp}{ws}*"elif" => (YYBEGIN P; newline yypos; Tokens.ELIF (yypos, yypos + size yytext)); {eol}{sharp}{ws}*"else" => (YYBEGIN P; newline yypos; Tokens.ELSE (yypos, yypos + size yytext)); {eol}{sharp}{ws}*"endif" => (YYBEGIN P; newline yypos; Tokens.ENDIF (yypos, yypos + size yytext)); {eol}{sharp}{ws}*"error" => (YYBEGIN ES; newline yypos; newS (yypos, "error"); continue ()); {ws}+ => (continue ()); {eol} => (YYBEGIN INITIAL; newline yypos; getS (yypos, Tokens.ERROR)); . => (YYBEGIN E; addS (String.sub (yytext, 0)); continue ()); {eol} => (YYBEGIN INITIAL; newline yypos; getS (yypos, Tokens.ERROR)); . => (addS (String.sub (yytext, 0)); continue ()); {eol} => (newline yypos; continue ());

{eol} => (YYBEGIN INITIAL; newline yypos; continue ()); {ws}+ => (continue ()); . => (error yypos ("illegal character at start of ML symbol: " ^ yytext); continue ()); {cmid} => (idToken (yytext, yypos)); . => (error yypos ("illegal character: " ^ yytext); continue ());

Click to toggle
does not end with </html> tag
does not end with </body> tag
The output has ended thus: yypos)); <INITIAL>. => (error yypos ("illegal character: " ^ yytext); continue ());