Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /smlnj-lib/trunk/SExp/sexp.lex
ViewVC logotype

Annotation of /smlnj-lib/trunk/SExp/sexp.lex

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3635 - (view) (download)

1 : jhr 3635 (* sexp.lex
2 :     *
3 :     * COPYRIGHT (c) 2011 The Fellowship of SML/NJ (http://www.smlnj.org)
4 :     * All rights reserved.
5 :     *
6 :     * Author: Damon Wang (with modifications by John Reppy)
7 :     *
8 :     * Lexer for Sexp files.
9 :     *
10 :     * TODO:
11 :     * EOF rules for strings
12 :     * newlines in strings
13 :     * error messages for unknown characters
14 :     *)
15 :    
16 :     %name SExpLexer;
17 :    
18 :     %defs (
19 :     structure T = SExpTokens
20 :     type lex_result = T.token
21 :     fun eof () = T.EOF
22 :     fun int s = T.INT(valOf(IntInf.fromString s))
23 :     fun float s = T.FLOAT(valOf(LargeReal.fromString s))
24 :     (* support for incremental construction of strings *)
25 :     val sbuf : string list ref = ref []
26 :     fun addStr s = sbuf := s :: !sbuf
27 :     fun addUChr lit = let
28 :     (* trim the "\u" prefix *)
29 :     val digits = Substring.triml 2 (Substring.full lit)
30 :     val SOME(w, _) = Word.scan StringCvt.HEX Substring.getc digits
31 :     in
32 :     addStr(UTF8.encode w)
33 :     end
34 :     fun finishString () = (T.STRING(String.concat(List.rev(!sbuf))) before sbuf := [])
35 :     );
36 :    
37 :     %let digit1_9 = [1-9];
38 :     %let digit = [0-9];
39 :     %let digits = {digit}+;
40 :     (* TODO check if JSON should allow "+1" as a valid encoding of positive one. *)
41 :     %let int = [+-]?({digit} | {digit1_9}{digits}+);
42 :     %let frac = "."{digits};
43 :     %let exp = [eE][+-]?{digits};
44 :     %let xdigit = {digit}|[a-fA-F];
45 :     %let alpha = ([a-z] | [A-Z]);
46 :     %let punct = [-\^_/~!@$%&*\\:?.<>|+='];
47 :     %let symbol = ({alpha} | {punct})({alpha} | {punct} | {digit})*;
48 :    
49 :     %states S;
50 :    
51 :     <INITIAL>[,;\ \t\n\r]+ => ( T.WHITE );
52 :    
53 :     <INITIAL>"'"([^\ \t\n\r]+) => ( T.SYMBOL (String.extract(yytext, 1, NONE)) );
54 :    
55 :     <INITIAL>"(" => ( T.DELIM (T.PAREN, T.OPEN) );
56 :     <INITIAL>")" => ( T.DELIM (T.PAREN, T.CLOSE) );
57 :     <INITIAL>"[" => ( T.DELIM (T.BRACKET, T.OPEN) );
58 :     <INITIAL>"]" => ( T.DELIM (T.BRACKET, T.CLOSE) );
59 :     <INITIAL>"{" => ( T.DELIM (T.BRACE, T.OPEN) );
60 :     <INITIAL>"}" => ( T.DELIM (T.BRACE, T.CLOSE) );
61 :     <INITIAL>"#t" => ( T.KW_true );
62 :     <INITIAL>"#f" => ( T.KW_false );
63 :    
64 :     (* takes a string of form "0xdeadbeef", strips the leading "0x", and returns
65 :     * an IntInf with hex value deadbeef. Note that the hex value is unsigned; to
66 :     * get negatives, write "-0xdeadbeef". This means that the string from C's
67 :     * `printf("%x", -1)` will be parsed as INT_MAX. TODO is this a good idea? *)
68 :    
69 :     <INITIAL>[+-]?"0x"{xdigit}+ => (
70 :     let
71 :     (* TODO Doesn't StringCvt.HEX handle stripping the "0x" prefix? *)
72 :     val digits = if String.isPrefix "+" yytext (* "+0xdeadbeef" *)
73 :     then String.extract(yytext, 3, NONE)
74 :     else if String.isPrefix "-" yytext (* "-0xdeadbeef" *)
75 :     then "-" ^ String.extract(yytext, 3, NONE)
76 :     else String.extract(yytext, 2, NONE) (* "0xdeadbeef" *)
77 :     val SOME(value) = StringCvt.scanString (IntInf.scan StringCvt.HEX) digits
78 :     in
79 :     T.INT(value)
80 :     end
81 :     );
82 :    
83 :     <INITIAL>{int} => ( T.INT(valOf(IntInf.fromString yytext)) );
84 :    
85 :     <INITIAL>{int}{frac} => ( float yytext );
86 :     <INITIAL>{int}{exp} => ( float yytext );
87 :     <INITIAL>{int}{frac}{exp} => ( float yytext );
88 :    
89 :     <INITIAL>"\"" => ( YYBEGIN S; continue() );
90 :    
91 :     <INITIAL>{symbol} => ( T.SYMBOL yytext );
92 :     (* TODO backport this to the JSON parser, which hangs if it sees a \\ in a
93 :     * string. *)
94 :     <S>"\\\\" => ( addStr "\\"; continue() );
95 :     <S>"\\\"" => ( addStr "\""; continue() );
96 :     <S>"\\/" => ( addStr "/"; continue() );
97 :     <S>"\\b" => ( addStr "\b"; continue() );
98 :     <S>"\\f" => ( addStr "\f"; continue() );
99 :     <S>"\\n" => ( addStr "\n"; continue() );
100 :     <S>"\\r" => ( addStr "\r"; continue() );
101 :     <S>"\\t" => ( addStr "\t"; continue() );
102 :     <S>"\\u"{xdigit}{4} => ( addUChr yytext; continue() );
103 :     <S>[^\\"]+ => ( addStr yytext; continue() );
104 :     <S>"\"" => ( YYBEGIN INITIAL; finishString() );
105 :    
106 :     <INITIAL>"/*"(~(.*"*/".*))"*/" => ( skip() );
107 :    
108 :     (* FIXME: add some error reporting *)
109 :     <INITIAL>. => ( skip() );

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0