1 : |
jhr |
14 |
(* diderot.lex
|
2 : |
|
|
*
|
3 : |
|
|
* COPYRIGHT (c) 2010 The Diderot Project (http://diderot.cs.uchicago.edu)
|
4 : |
|
|
* All rights reserved.
|
5 : |
|
|
*)
|
6 : |
|
|
|
7 : |
|
|
%name DiderotLex;
|
8 : |
|
|
|
9 : |
|
|
%arg (lexErr);
|
10 : |
|
|
|
11 : |
|
|
%defs(
|
12 : |
|
|
|
13 : |
jhr |
30 |
structure T = DiderotTokens
|
14 : |
jhr |
14 |
|
15 : |
|
|
(* some type lex_result is necessitated by ml-ulex *)
|
16 : |
|
|
type lex_result = T.token
|
17 : |
|
|
|
18 : |
|
|
(* the depth int ref will be used for keeping track of comment depth *)
|
19 : |
|
|
val depth = ref 0
|
20 : |
|
|
|
21 : |
|
|
(* list of string fragments to concatenate *)
|
22 : |
|
|
val buf : string list ref = ref []
|
23 : |
|
|
|
24 : |
|
|
(* add a string to the buffer *)
|
25 : |
|
|
fun addStr s = (buf := s :: !buf)
|
26 : |
|
|
|
27 : |
|
|
(* make a string from buf *)
|
28 : |
|
|
fun mkString () = let
|
29 : |
|
|
val s = String.concat(List.rev(!buf))
|
30 : |
|
|
in
|
31 : |
|
|
buf := [];
|
32 : |
|
|
T.STRING s
|
33 : |
|
|
end
|
34 : |
|
|
|
35 : |
|
|
(* make a FLOAT token from a substring *)
|
36 : |
|
|
fun mkFloat ss = let
|
37 : |
|
|
val (isNeg, rest) = (case Substring.getc ss
|
38 : |
|
|
of SOME(#"-", r) => (true, r)
|
39 : |
|
|
| SOME(#"+", r) => (false, r)
|
40 : |
|
|
| _ => (false, ss)
|
41 : |
|
|
(* end case *))
|
42 : |
|
|
val (whole, rest) = Substring.splitl Char.isDigit rest
|
43 : |
|
|
val rest = Substring.triml 1 rest (* remove "." *)
|
44 : |
|
|
val (frac, rest) = Substring.splitl Char.isDigit rest
|
45 : |
|
|
val exp = if Substring.isEmpty rest
|
46 : |
|
|
then 0
|
47 : |
|
|
else let
|
48 : |
|
|
val rest = Substring.triml 1 rest (* remove "e" or "E" *)
|
49 : |
|
|
in
|
50 : |
|
|
#1(valOf(Int.scan StringCvt.DEC Substring.getc rest))
|
51 : |
|
|
end
|
52 : |
|
|
in
|
53 : |
|
|
T.FLOAT(FloatLit.float{
|
54 : |
|
|
isNeg = isNeg,
|
55 : |
|
|
whole = Substring.string whole,
|
56 : |
|
|
frac = Substring.string frac,
|
57 : |
|
|
exp = exp
|
58 : |
|
|
})
|
59 : |
|
|
end
|
60 : |
|
|
|
61 : |
|
|
(* scan a number from a hexidecimal string *)
|
62 : |
|
|
val fromHexString = valOf o (StringCvt.scanString (IntInf.scan StringCvt.HEX))
|
63 : |
|
|
(* FIXME: the above code doesn't work in SML/NJ; here is a work around *)
|
64 : |
|
|
fun fromHexString s = let
|
65 : |
|
|
val SOME(n, _) = IntInf.scan StringCvt.HEX Substring.getc
|
66 : |
|
|
(Substring.triml 2 (Substring.full s))
|
67 : |
|
|
in
|
68 : |
|
|
n
|
69 : |
|
|
end
|
70 : |
|
|
|
71 : |
|
|
(* eof : unit -> lex_result *)
|
72 : |
|
|
(* ml-ulex requires this as well *)
|
73 : |
|
|
fun eof () = T.EOF
|
74 : |
|
|
);
|
75 : |
|
|
|
76 : |
|
|
%states INITIAL STRING COM1 COM2;
|
77 : |
|
|
|
78 : |
|
|
%let letter = [a-zA-Z];
|
79 : |
|
|
%let dig = [0-9];
|
80 : |
|
|
%let num = {dig}+;
|
81 : |
|
|
%let hexdigit = [0-9a-fA-F];
|
82 : |
|
|
%let hexnum = "0x"{hexdigit}+;
|
83 : |
|
|
%let idchar = {letter}|{dig}|"_"|"'";
|
84 : |
|
|
%let id = {letter}{idchar}*;
|
85 : |
|
|
%let ws = " "|[\t\n\v\f\r];
|
86 : |
jhr |
35 |
%let esc = "\\"[abfnrtv\\\"]|"\\"{dig}{dig}{dig};
|
87 : |
|
|
%let sgood = [\032-\126]&[^\"\\]; (* sgood means "characters good inside strings" *)
|
88 : |
|
|
%let eol = "\n";
|
89 : |
jhr |
14 |
|
90 : |
|
|
(***** Keywords and operators *****)
|
91 : |
|
|
|
92 : |
jhr |
26 |
<INITIAL> "||" => (T.OP_orelse);
|
93 : |
|
|
<INITIAL> "&&" => (T.OP_andalso);
|
94 : |
|
|
<INITIAL> "<" => (T.OP_lt);
|
95 : |
|
|
<INITIAL> "<=" => (T.OP_lte);
|
96 : |
|
|
<INITIAL> "==" => (T.OP_eqeq);
|
97 : |
|
|
<INITIAL> "!=" => (T.OP_neq);
|
98 : |
|
|
<INITIAL> ">=" => (T.OP_gte);
|
99 : |
|
|
<INITIAL> ">" => (T.OP_gt);
|
100 : |
|
|
<INITIAL> "+" => (T.OP_plus);
|
101 : |
|
|
<INITIAL> "-" => (T.OP_minus);
|
102 : |
|
|
<INITIAL> "*" => (T.OP_star);
|
103 : |
jhr |
35 |
<INITIAL> "/" => (T.OP_slash);
|
104 : |
jhr |
26 |
<INITIAL> "@" => (T.OP_at);
|
105 : |
jhr |
35 |
<INITIAL> "(" => (T.LP);
|
106 : |
|
|
<INITIAL> ")" => (T.RP);
|
107 : |
|
|
<INITIAL> "[" => (T.LB);
|
108 : |
|
|
<INITIAL> "]" => (T.RB);
|
109 : |
|
|
<INITIAL> "{" => (T.LCB);
|
110 : |
|
|
<INITIAL> "}" => (T.RCB);
|
111 : |
jhr |
23 |
<INITIAL> "," => (T.COMMA);
|
112 : |
|
|
<INITIAL> ";" => (T.SEMI);
|
113 : |
|
|
<INITIAL> "#" => (T.HASH);
|
114 : |
jhr |
26 |
<INITIAL> "!" => (T.BANG);
|
115 : |
jhr |
35 |
<INITIAL> "=" => (T.OP_eq);
|
116 : |
jhr |
14 |
|
117 : |
jhr |
33 |
<INITIAL> {id} => (Keywords.idToken yytext);
|
118 : |
|
|
|
119 : |
|
|
<INITIAL> {num} => (T.POSINT(valOf (IntInf.fromString yytext)));
|
120 : |
|
|
<INITIAL> "-"{num} => (T.NEGINT(valOf (IntInf.fromString yytext)));
|
121 : |
|
|
<INITIAL> "-"?{num}"."{num}([eE][+-]?{num})?
|
122 : |
|
|
=> (mkFloat yysubstr);
|
123 : |
|
|
<INITIAL> {ws} => (skip ());
|
124 : |
|
|
|
125 : |
jhr |
35 |
<INITIAL> . => (lexErr(yypos, ["bad character `", String.toString yytext]);
|
126 : |
|
|
continue());
|
127 : |
|
|
|
128 : |
|
|
(***** Strings *****)
|
129 : |
|
|
|
130 : |
|
|
<INITIAL> "\"" => (YYBEGIN STRING; continue());
|
131 : |
|
|
<STRING>{esc} => (addStr(valOf(String.fromString yytext)); continue());
|
132 : |
|
|
<STRING>{sgood}+ => (addStr yytext; continue());
|
133 : |
|
|
<STRING> "\"" => (YYBEGIN INITIAL; mkString());
|
134 : |
|
|
|
135 : |
|
|
<STRING> . => (lexErr(yypos, [
|
136 : |
|
|
"bad character `", String.toString yytext,
|
137 : |
|
|
"' in string literal"
|
138 : |
|
|
]);
|
139 : |
|
|
continue());
|
140 : |
|
|
|
141 : |
jhr |
14 |
(***** Comments *****)
|
142 : |
jhr |
35 |
<INITIAL> "//" => (YYBEGIN COM1; skip());
|
143 : |
|
|
<COM1> {eol} => (YYBEGIN INITIAL; skip());
|
144 : |
|
|
<COM1> . => (skip());
|
145 : |
jhr |
14 |
|
146 : |
|
|
<INITIAL> "/*"
|
147 : |
|
|
=> (YYBEGIN COM2; skip());
|
148 : |
|
|
<COM2> "*/"
|
149 : |
|
|
=> (YYBEGIN INITIAL; skip());
|
150 : |
|
|
<COM2> .
|
151 : |
|
|
=> (skip());
|