SCM Repository
Annotation of /trunk/src/compiler/parser/diderot.lex
Parent Directory
|
Revision Log
Revision 110 - (view) (download)
1 : | jhr | 14 | (* diderot.lex |
2 : | * | ||
3 : | * COPYRIGHT (c) 2010 The Diderot Project (http://diderot.cs.uchicago.edu) | ||
4 : | * All rights reserved. | ||
5 : | *) | ||
6 : | |||
7 : | %name DiderotLex; | ||
8 : | |||
9 : | %arg (lexErr); | ||
10 : | |||
11 : | %defs( | ||
12 : | |||
13 : | jhr | 30 | structure T = DiderotTokens |
14 : | jhr | 14 | |
15 : | (* some type lex_result is necessitated by ml-ulex *) | ||
16 : | type lex_result = T.token | ||
17 : | |||
18 : | (* the depth int ref will be used for keeping track of comment depth *) | ||
19 : | val depth = ref 0 | ||
20 : | |||
21 : | (* list of string fragments to concatenate *) | ||
22 : | val buf : string list ref = ref [] | ||
23 : | |||
24 : | (* add a string to the buffer *) | ||
25 : | fun addStr s = (buf := s :: !buf) | ||
26 : | |||
27 : | (* make a string from buf *) | ||
28 : | fun mkString () = let | ||
29 : | val s = String.concat(List.rev(!buf)) | ||
30 : | in | ||
31 : | buf := []; | ||
32 : | T.STRING s | ||
33 : | end | ||
34 : | |||
35 : | (* make a FLOAT token from a substring *) | ||
36 : | fun mkFloat ss = let | ||
37 : | val (isNeg, rest) = (case Substring.getc ss | ||
38 : | of SOME(#"-", r) => (true, r) | ||
39 : | | SOME(#"+", r) => (false, r) | ||
40 : | | _ => (false, ss) | ||
41 : | (* end case *)) | ||
42 : | val (whole, rest) = Substring.splitl Char.isDigit rest | ||
43 : | val rest = Substring.triml 1 rest (* remove "." *) | ||
44 : | val (frac, rest) = Substring.splitl Char.isDigit rest | ||
45 : | val exp = if Substring.isEmpty rest | ||
46 : | then 0 | ||
47 : | else let | ||
48 : | val rest = Substring.triml 1 rest (* remove "e" or "E" *) | ||
49 : | in | ||
50 : | #1(valOf(Int.scan StringCvt.DEC Substring.getc rest)) | ||
51 : | end | ||
52 : | in | ||
53 : | T.FLOAT(FloatLit.float{ | ||
54 : | isNeg = isNeg, | ||
55 : | whole = Substring.string whole, | ||
56 : | frac = Substring.string frac, | ||
57 : | exp = exp | ||
58 : | }) | ||
59 : | end | ||
60 : | |||
61 : | (* scan a number from a hexidecimal string *) | ||
62 : | val fromHexString = valOf o (StringCvt.scanString (IntInf.scan StringCvt.HEX)) | ||
63 : | (* FIXME: the above code doesn't work in SML/NJ; here is a work around *) | ||
64 : | fun fromHexString s = let | ||
65 : | val SOME(n, _) = IntInf.scan StringCvt.HEX Substring.getc | ||
66 : | (Substring.triml 2 (Substring.full s)) | ||
67 : | in | ||
68 : | n | ||
69 : | end | ||
70 : | |||
71 : | (* eof : unit -> lex_result *) | ||
72 : | (* ml-ulex requires this as well *) | ||
73 : | fun eof () = T.EOF | ||
74 : | ); | ||
75 : | |||
76 : | %states INITIAL STRING COM1 COM2; | ||
77 : | |||
78 : | %let letter = [a-zA-Z]; | ||
79 : | %let dig = [0-9]; | ||
80 : | %let num = {dig}+; | ||
81 : | %let hexdigit = [0-9a-fA-F]; | ||
82 : | %let hexnum = "0x"{hexdigit}+; | ||
83 : | %let idchar = {letter}|{dig}|"_"|"'"; | ||
84 : | %let id = {letter}{idchar}*; | ||
85 : | %let ws = " "|[\t\n\v\f\r]; | ||
86 : | jhr | 35 | %let esc = "\\"[abfnrtv\\\"]|"\\"{dig}{dig}{dig}; |
87 : | %let sgood = [\032-\126]&[^\"\\]; (* sgood means "characters good inside strings" *) | ||
88 : | %let eol = "\n"; | ||
89 : | jhr | 14 | |
90 : | (***** Keywords and operators *****) | ||
91 : | |||
92 : | jhr | 26 | <INITIAL> "||" => (T.OP_orelse); |
93 : | <INITIAL> "&&" => (T.OP_andalso); | ||
94 : | <INITIAL> "<" => (T.OP_lt); | ||
95 : | <INITIAL> "<=" => (T.OP_lte); | ||
96 : | <INITIAL> "==" => (T.OP_eqeq); | ||
97 : | <INITIAL> "!=" => (T.OP_neq); | ||
98 : | <INITIAL> ">=" => (T.OP_gte); | ||
99 : | <INITIAL> ">" => (T.OP_gt); | ||
100 : | <INITIAL> "+" => (T.OP_plus); | ||
101 : | <INITIAL> "-" => (T.OP_minus); | ||
102 : | <INITIAL> "*" => (T.OP_star); | ||
103 : | jhr | 35 | <INITIAL> "/" => (T.OP_slash); |
104 : | jhr | 26 | <INITIAL> "@" => (T.OP_at); |
105 : | jhr | 35 | <INITIAL> "(" => (T.LP); |
106 : | <INITIAL> ")" => (T.RP); | ||
107 : | <INITIAL> "[" => (T.LB); | ||
108 : | <INITIAL> "]" => (T.RB); | ||
109 : | <INITIAL> "{" => (T.LCB); | ||
110 : | <INITIAL> "}" => (T.RCB); | ||
111 : | jhr | 23 | <INITIAL> "," => (T.COMMA); |
112 : | <INITIAL> ";" => (T.SEMI); | ||
113 : | <INITIAL> "#" => (T.HASH); | ||
114 : | jhr | 26 | <INITIAL> "!" => (T.BANG); |
115 : | jhr | 35 | <INITIAL> "=" => (T.OP_eq); |
116 : | jhr | 40 | <INITIAL> "|" => (T.BAR); |
117 : | <INITIAL> ".." => (T.DOTDOT); | ||
118 : | jhr | 14 | |
119 : | jhr | 33 | <INITIAL> {id} => (Keywords.idToken yytext); |
120 : | |||
121 : | jhr | 40 | <INITIAL> {num} => (T.INT(valOf (IntInf.fromString yytext))); |
122 : | <INITIAL> {num}"."{num}([eE][+-]?{num})? | ||
123 : | jhr | 33 | => (mkFloat yysubstr); |
124 : | <INITIAL> {ws} => (skip ()); | ||
125 : | |||
126 : | jhr | 50 | <INITIAL> "\"" => (YYBEGIN STRING; continue()); |
127 : | |||
128 : | jhr | 35 | <INITIAL> . => (lexErr(yypos, ["bad character `", String.toString yytext]); |
129 : | continue()); | ||
130 : | |||
131 : | (***** Strings *****) | ||
132 : | <STRING>{esc} => (addStr(valOf(String.fromString yytext)); continue()); | ||
133 : | <STRING>{sgood}+ => (addStr yytext; continue()); | ||
134 : | <STRING> "\"" => (YYBEGIN INITIAL; mkString()); | ||
135 : | |||
136 : | <STRING> . => (lexErr(yypos, [ | ||
137 : | "bad character `", String.toString yytext, | ||
138 : | "' in string literal" | ||
139 : | ]); | ||
140 : | continue()); | ||
141 : | |||
142 : | jhr | 14 | (***** Comments *****) |
143 : | jhr | 35 | <INITIAL> "//" => (YYBEGIN COM1; skip()); |
144 : | <COM1> {eol} => (YYBEGIN INITIAL; skip()); | ||
145 : | <COM1> . => (skip()); | ||
146 : | jhr | 14 | |
147 : | <INITIAL> "/*" | ||
148 : | => (YYBEGIN COM2; skip()); | ||
149 : | <COM2> "*/" | ||
150 : | => (YYBEGIN INITIAL; skip()); | ||
151 : | <COM2> . | ||
152 : | => (skip()); |
root@smlnj-gforge.cs.uchicago.edu | ViewVC Help |
Powered by ViewVC 1.0.0 |