Home My Page Projects Code Snippets Project Openings diderot
Summary Activity Tracker Tasks SCM

SCM Repository

[diderot] Annotation of /trunk/src/compiler/parser/diderot.lex
ViewVC logotype

Annotation of /trunk/src/compiler/parser/diderot.lex

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3349 - (view) (download)

1 : jhr 14 (* diderot.lex
2 :     *
3 : jhr 3349 * This code is part of the Diderot Project (http://diderot-language.cs.uchicago.edu)
4 :     *
5 :     * COPYRIGHT (c) 2015 The University of Chicago
6 : jhr 14 * All rights reserved.
7 :     *)
8 :    
9 :     %name DiderotLex;
10 :    
11 :     %arg (lexErr);
12 :    
13 :     %defs(
14 :    
15 : jhr 30 structure T = DiderotTokens
16 : jhr 14
17 :     (* some type lex_result is necessitated by ml-ulex *)
18 :     type lex_result = T.token
19 :    
20 :     (* the depth int ref will be used for keeping track of comment depth *)
21 :     val depth = ref 0
22 :    
23 :     (* list of string fragments to concatenate *)
24 :     val buf : string list ref = ref []
25 :    
26 :     (* add a string to the buffer *)
27 :     fun addStr s = (buf := s :: !buf)
28 :    
29 :     (* make a string from buf *)
30 :     fun mkString () = let
31 : jhr 2356 val s = String.concat(List.rev(!buf))
32 :     in
33 :     buf := [];
34 :     T.STRING s
35 :     end
36 : jhr 14
37 :     (* make a FLOAT token from a substring *)
38 :     fun mkFloat ss = let
39 : jhr 2356 val (isNeg, rest) = (case Substring.getc ss
40 :     of SOME(#"-", r) => (true, r)
41 :     | SOME(#"+", r) => (false, r)
42 :     | _ => (false, ss)
43 :     (* end case *))
44 :     val (whole, rest) = Substring.splitl Char.isDigit rest
45 :     val rest = Substring.triml 1 rest (* remove "." *)
46 :     val (frac, rest) = Substring.splitl Char.isDigit rest
47 :     val exp = if Substring.isEmpty rest
48 :     then 0
49 :     else let
50 :     val rest = Substring.triml 1 rest (* remove "e" or "E" *)
51 :     in
52 :     #1(valOf(Int.scan StringCvt.DEC Substring.getc rest))
53 :     end
54 :     in
55 :     T.FLOAT(FloatLit.float{
56 :     isNeg = isNeg,
57 :     whole = Substring.string whole,
58 :     frac = Substring.string frac,
59 :     exp = exp
60 :     })
61 :     end
62 : jhr 14
63 :     (* scan a number from a hexidecimal string *)
64 :     val fromHexString = valOf o (StringCvt.scanString (IntInf.scan StringCvt.HEX))
65 :     (* FIXME: the above code doesn't work in SML/NJ; here is a work around *)
66 :     fun fromHexString s = let
67 :     val SOME(n, _) = IntInf.scan StringCvt.HEX Substring.getc
68 : jhr 2356 (Substring.triml 2 (Substring.full s))
69 : jhr 14 in
70 : jhr 2356 n
71 : jhr 14 end
72 :    
73 :     (* eof : unit -> lex_result *)
74 :     (* ml-ulex requires this as well *)
75 :     fun eof () = T.EOF
76 :     );
77 :    
78 :     %states INITIAL STRING COM1 COM2;
79 :    
80 :     %let dig = [0-9];
81 :     %let num = {dig}+;
82 :     %let hexdigit = [0-9a-fA-F];
83 :     %let hexnum = "0x"{hexdigit}+;
84 : jhr 2356 %let greek = [αβγζηθλμξπρστφψω];
85 :     %let letter = [a-zA-Z]|{greek};
86 : jhr 14 %let idchar = {letter}|{dig}|"_"|"'";
87 :     %let id = {letter}{idchar}*;
88 :     %let ws = " "|[\t\n\v\f\r];
89 : jhr 35 %let esc = "\\"[abfnrtv\\\"]|"\\"{dig}{dig}{dig};
90 :     %let sgood = [\032-\126]&[^\"\\]; (* sgood means "characters good inside strings" *)
91 :     %let eol = "\n";
92 : jhr 14
93 :     (***** Keywords and operators *****)
94 :    
95 : jhr 2356 <INITIAL> "||" => (T.OP_orelse);
96 :     <INITIAL> "&&" => (T.OP_andalso);
97 :     <INITIAL> "<" => (T.OP_lt);
98 :     <INITIAL> "<=" => (T.OP_lte);
99 :     <INITIAL> "==" => (T.OP_eqeq);
100 :     <INITIAL> "!=" => (T.OP_neq);
101 :     <INITIAL> ">=" => (T.OP_gte);
102 :     <INITIAL> ">" => (T.OP_gt);
103 :     <INITIAL> "+" => (T.OP_plus);
104 :     <INITIAL> "-" => (T.OP_minus);
105 :     <INITIAL> "*" => (T.OP_star);
106 :     <INITIAL> "/" => (T.OP_slash);
107 :     <INITIAL> "^" => (T.OP_exp);
108 :     <INITIAL> "@" => (T.OP_at);
109 :     <INITIAL> "⊛" => (T.OP_convolve); (* u229b *)
110 :     <INITIAL> "(*)" => (T.OP_convolve);
111 :     <INITIAL> "∇•" => (T.OP_Ddot); (* u2207, u2022 *)
112 :     <INITIAL> "∇⋅" => (T.OP_Ddot); (* u2207, u22c5 *)
113 :     <INITIAL> "∇⊗" => (T.OP_Dotimes); (* u2207, u2297 *)
114 :     <INITIAL> "∇×" => (T.OP_curl); (* u2207, u00d7 *)
115 :     <INITIAL> "∇" => (T.OP_D); (* u2207 *)
116 :     <INITIAL> "•" => (T.OP_dot); (* u2022 *)
117 :     <INITIAL> "⋅" => (T.OP_dot); (* u22c5 *)
118 :     <INITIAL> "×" => (T.OP_cross); (* u00d7 *)
119 :     <INITIAL> "⊗" => (T.OP_outer); (* u2297 *)
120 :     <INITIAL> "(" => (T.LP);
121 :     <INITIAL> ")" => (T.RP);
122 :     <INITIAL> "[" => (T.LB);
123 :     <INITIAL> "]" => (T.RB);
124 :     <INITIAL> "{" => (T.LCB);
125 :     <INITIAL> "}" => (T.RCB);
126 :     <INITIAL> "," => (T.COMMA);
127 :     <INITIAL> ";" => (T.SEMI);
128 :     <INITIAL> ":" => (T.COLON);
129 :     <INITIAL> "#" => (T.HASH);
130 :     <INITIAL> "!" => (T.BANG);
131 :     <INITIAL> "=" => (T.OP_eq);
132 :     <INITIAL> "+=" => (T.OP_pluseq);
133 :     <INITIAL> "-=" => (T.OP_minuseq);
134 :     <INITIAL> "*=" => (T.OP_stareq);
135 :     <INITIAL> "/=" => (T.OP_slasheq);
136 :     <INITIAL> "|" => (T.BAR);
137 :     <INITIAL> ".." => (T.DOTDOT);
138 : jhr 14
139 : jhr 2356 <INITIAL> "∞" => (T.FLOAT FloatLit.posInf); (* u221e *)
140 :     <INITIAL> "π" => (T.FLOAT FloatLit.pi); (* u03c0 *)
141 : jhr 250
142 : jhr 2356 <INITIAL> {id} => (Keywords.idToken yytext);
143 : jhr 33
144 : jhr 2356 <INITIAL> {num} => (T.INT(valOf (IntInf.fromString yytext)));
145 : jhr 40 <INITIAL> {num}"."{num}([eE][+-]?{num})?
146 : jhr 2356 => (mkFloat yysubstr);
147 :     <INITIAL> {ws} => (skip ());
148 : jhr 33
149 : jhr 2356 <INITIAL> "\"" => (YYBEGIN STRING; continue());
150 : jhr 50
151 : jhr 3001 <INITIAL> . => (lexErr(yypos, ["bad character '", String.toString yytext, "'"]);
152 : jhr 2356 continue());
153 : jhr 35
154 :     (***** Strings *****)
155 : jhr 2356 <STRING>{esc} => (addStr(valOf(String.fromString yytext)); continue());
156 :     <STRING>{sgood}+ => (addStr yytext; continue());
157 :     <STRING> "\"" => (YYBEGIN INITIAL; mkString());
158 : jhr 35
159 : jhr 2356 <STRING> . => (lexErr(yypos, [
160 :     "bad character `", String.toString yytext,
161 :     "' in string literal"
162 :     ]);
163 :     continue());
164 : jhr 35
165 : jhr 14 (***** Comments *****)
166 : jhr 2356 <INITIAL> "//" => (YYBEGIN COM1; skip());
167 :     <COM1> {eol} => (YYBEGIN INITIAL; skip());
168 :     <COM1> . => (skip());
169 : jhr 14
170 :     <INITIAL> "/*"
171 : jhr 2356 => (YYBEGIN COM2; skip());
172 : jhr 14 <COM2> "*/"
173 : jhr 2356 => (YYBEGIN INITIAL; skip());
174 : jhr 14 <COM2> .
175 : jhr 2356 => (skip());

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0