Home My Page Projects Code Snippets Project Openings SML/NJ
Summary Activity Forums Tracker Lists Tasks Docs Surveys News SCM Files

SCM Repository

[smlnj] Annotation of /smlnj-lib/trunk/HTML/html-elements-fn.sml
ViewVC logotype

Annotation of /smlnj-lib/trunk/HTML/html-elements-fn.sml

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (view) (download)
Original Path: sml/branches/SMLNJ/src/smlnj-lib/HTML/html-elements-fn.sml

1 : monnier 2 (* html-elements-fn.sml
2 :     *
3 :     * COPYRIGHT (c) 1996 AT&T REsearch.
4 :     *
5 :     * This module builds element tags for the lexer.
6 :     *)
7 :    
8 :     functor HTMLElementsFn (
9 :     structure Tokens : HTML_TOKENS
10 :     structure Err : HTML_ERROR
11 :     structure HTMLAttrs : HTML_ATTRS
12 :     ) : sig
13 :    
14 :     structure T : HTML_TOKENS
15 :    
16 :     type pos = int
17 :    
18 :     val startTag : string option
19 :     -> (string * pos * pos) -> (T.svalue, pos) T.token option
20 :     val endTag : string option
21 :     -> (string * pos * pos) -> (T.svalue, pos) T.token option
22 :    
23 :     end = struct
24 :    
25 :     structure T = Tokens
26 :     structure A = HTMLAttrs
27 :    
28 :     type pos = int
29 :    
30 :     datatype start_tag
31 :     = WAttrs of ((A.attrs * pos * pos) -> (T.svalue, pos) T.token)
32 :     | WOAttrs of ((pos * pos) -> (T.svalue, pos) T.token)
33 :     datatype end_tag
34 :     = End of ((pos * pos) -> (T.svalue, pos) T.token)
35 :     | Empty
36 :    
37 :     val tokenData = [
38 :     ("A", WAttrs T.START_A, End T.END_A),
39 :     ("ADDRESS", WOAttrs T.START_ADDRESS, End T.END_ADDRESS),
40 :     ("APPLET", WAttrs T.START_APPLET, End T.END_APPLET),
41 :     ("AREA", WAttrs T.TAG_AREA, Empty),
42 :     ("B", WOAttrs T.START_B, End T.END_B),
43 :     ("BASE", WAttrs T.TAG_BASE, Empty),
44 :     ("BASEFONT", WAttrs T.START_BASEFONT, End T.END_BASEFONT),
45 :     ("BIG", WOAttrs T.START_BIG, End T.END_BIG),
46 :     ("BLOCKQUOTE", WOAttrs T.START_BLOCKQUOTE, End T.END_BLOCKQUOTE),
47 :     ("BODY", WAttrs T.START_BODY, End T.END_BODY),
48 :     ("BR", WAttrs T.TAG_BR, Empty),
49 :     ("CAPTION", WAttrs T.START_CAPTION, End T.END_CAPTION),
50 :     ("CENTER", WOAttrs T.START_CENTER, End T.END_CENTER),
51 :     ("CITE", WOAttrs T.START_CITE, End T.END_CITE),
52 :     ("CODE", WOAttrs T.START_CODE, End T.END_CODE),
53 :     ("DD", WOAttrs T.START_DD, End T.END_DD),
54 :     ("DFN", WOAttrs T.START_DFN, End T.END_DFN),
55 :     ("DIR", WAttrs T.START_DIR, End T.END_DIR),
56 :     ("DIV", WAttrs T.START_DIV, End T.END_DIV),
57 :     ("DL", WAttrs T.START_DL, End T.END_DL),
58 :     ("DT", WOAttrs T.START_DT, End T.END_DT),
59 :     ("EM", WOAttrs T.START_EM, End T.END_EM),
60 :     ("FONT", WAttrs T.START_FONT, End T.END_FONT),
61 :     ("FORM", WAttrs T.START_FORM, End T.END_FORM),
62 :     ("H1", WAttrs T.START_H1, End T.END_H1),
63 :     ("H2", WAttrs T.START_H2, End T.END_H2),
64 :     ("H3", WAttrs T.START_H3, End T.END_H3),
65 :     ("H4", WAttrs T.START_H4, End T.END_H4),
66 :     ("H5", WAttrs T.START_H5, End T.END_H5),
67 :     ("H6", WAttrs T.START_H6, End T.END_H6),
68 :     ("HEAD", WOAttrs T.START_HEAD, End T.END_HEAD),
69 :     ("HR", WAttrs T.TAG_HR, Empty),
70 :     ("HTML", WOAttrs T.START_HTML, End T.END_HTML),
71 :     ("I", WOAttrs T.START_I, End T.END_I),
72 :     ("IMG", WAttrs T.TAG_IMG, Empty),
73 :     ("INPUT", WAttrs T.TAG_INPUT, Empty),
74 :     ("ISINDEX", WAttrs T.TAG_ISINDEX, Empty),
75 :     ("KBD", WOAttrs T.START_KBD, End T.END_KBD),
76 :     ("LI", WAttrs T.START_LI, End T.END_LI),
77 :     ("LINK", WAttrs T.TAG_LINK, Empty),
78 :     ("MAP", WAttrs T.START_MAP, End T.END_MAP),
79 :     ("MENU", WAttrs T.START_MENU, End T.END_MENU),
80 :     ("META", WAttrs T.TAG_META, Empty),
81 :     ("OL", WAttrs T.START_OL, End T.END_OL),
82 :     ("OPTION", WAttrs T.START_OPTION, End T.END_OPTION),
83 :     ("P", WAttrs T.START_P, End T.END_P),
84 :     ("PARAM", WAttrs T.TAG_PARAM, Empty),
85 :     ("PRE", WAttrs T.START_PRE, End T.END_PRE),
86 :     ("SAMP", WOAttrs T.START_SAMP, End T.END_SAMP),
87 :     ("SCRIPT", WOAttrs T.START_SCRIPT, End T.END_SCRIPT),
88 :     ("SELECT", WAttrs T.START_SELECT, End T.END_SELECT),
89 :     ("SMALL", WOAttrs T.START_SMALL, End T.END_SMALL),
90 :     ("STRIKE", WOAttrs T.START_STRIKE, End T.END_STRIKE),
91 :     ("STRONG", WOAttrs T.START_STRONG, End T.END_STRONG),
92 :     ("STYLE", WOAttrs T.START_STYLE, End T.END_STYLE),
93 :     ("SUB", WOAttrs T.START_SUB, End T.END_SUB),
94 :     ("SUP", WOAttrs T.START_SUP, End T.END_SUP),
95 :     ("TABLE", WAttrs T.START_TABLE, End T.END_TABLE),
96 :     ("TD", WAttrs T.START_TD, End T.END_TD),
97 :     ("TEXTAREA", WAttrs T.START_TEXTAREA, End T.END_TEXTAREA),
98 :     ("TH", WAttrs T.START_TH, End T.END_TH),
99 :     ("TITLE", WOAttrs T.START_TITLE, End T.END_TITLE),
100 :     ("TR", WAttrs T.START_TR, End T.END_TR),
101 :     ("TT", WOAttrs T.START_TT, End T.END_TT),
102 :     ("U", WOAttrs T.START_U, End T.END_U),
103 :     ("UL", WAttrs T.START_UL, End T.END_UL),
104 :     ("VAR", WOAttrs T.START_VAR, End T.END_VAR)
105 :     ]
106 :    
107 :     structure HTbl = HashTableFn (struct
108 :     type hash_key = string
109 :     val hashVal = HashString.hashString
110 :     val sameKey = (op = : (string * string) -> bool)
111 :     end)
112 :    
113 :     val elemTbl = let
114 :     val tbl = HTbl.mkTable (length tokenData, Fail "HTMLElements")
115 :     fun ins (tag, startTok, endTok) =
116 :     HTbl.insert tbl (tag, {startT=startTok, endT=endTok})
117 :     in
118 :     List.app ins tokenData; tbl
119 :     end
120 :    
121 :     structure SS = Substring
122 :    
123 :     fun canonName name = SS.translate (String.str o Char.toUpper) name
124 :    
125 :     fun find name = (HTbl.find elemTbl (canonName name))
126 :    
127 :     val skipWS = SS.dropl Char.isSpace
128 :    
129 :     fun scanStr (ctx, quoteChar, ss) = let
130 :     val (str, rest) = SS.splitl (fn c => (c <> quoteChar)) ss
131 :     in
132 :     if (SS.isEmpty rest)
133 :     then (
134 :     Err.lexError ctx "missing close quote for string";
135 :     (A.STRING(SS.string str), rest))
136 :     else (A.STRING(SS.string str), SS.triml 1 rest)
137 :     end
138 :    
139 :     (* scan an attribute value from a substring, returning the value, and
140 :     * the rest of the substring. Attribute values have one of the following
141 :     * forms:
142 :     * 1) a name token (a sequence of letters, digits, periods, or hyphens).
143 :     * 2) a string literal enclosed in ""
144 :     * 3) a string literal enclosed in ''
145 :     *)
146 :     fun scanAttrVal (ctx, attrName, ss) = let
147 :     fun isNameChar (#"." | #"-") = true
148 :     | isNameChar c = (Char.isAlphaNum c)
149 :     in
150 :     case SS.getc ss
151 :     of NONE => (A.IMPLICIT, ss)
152 :     | (SOME(#"\"", rest)) => scanStr (ctx, #"\"", rest)
153 :     | (SOME(#"'", rest)) => scanStr (ctx, #"'", rest)
154 :     | (SOME(c, _)) => let
155 :     (**
156 :     * Unquoted attributes should be Names, but this is often not
157 :     * the case, so we terminate them on whitespace or ">".
158 :     *)
159 :     val notNameChar = ref false
160 :     fun isAttrChar c =
161 :     if ((Char.isSpace c) orelse (c = #">"))
162 :     then false
163 :     else (
164 :     if isNameChar c then () else notNameChar := true;
165 :     true)
166 :     val (value, rest) = SS.splitl isAttrChar ss
167 :     in
168 :     if (SS.isEmpty value)
169 :     then (
170 :     Err.badAttrVal ctx (SS.string attrName, "");
171 :     (A.IMPLICIT, ss))
172 :     else if (! notNameChar)
173 :     then (
174 :     Err.unquotedAttrVal ctx (SS.string attrName);
175 :     (A.STRING(SS.string value), rest))
176 :     else (A.NAME(SS.string value), rest)
177 :     end
178 :     (* end case *)
179 :     end
180 :    
181 :     fun scanStartTag (ctx, ss) = let
182 :     val (name, rest) = SS.splitl (not o Char.isSpace) ss
183 :     fun scanAttrs (rest, attrs) = let
184 :     val rest = skipWS rest
185 :     in
186 :     case SS.getc rest
187 :     of NONE => (name, List.rev attrs)
188 :     | (SOME(#"\"", rest)) => (
189 :     Err.lexError ctx "bogus text in element";
190 :     scanAttrs (#2(scanStr (ctx, #"\"", rest)), attrs))
191 :     | (SOME(#"'", rest)) => (
192 :     Err.lexError ctx "bogus text in element";
193 :     scanAttrs (#2(scanStr (ctx, #"'", rest)), attrs))
194 :     | (SOME(c, rest')) =>
195 :     if Char.isAlpha c
196 :     then let
197 :     val (aName, rest) = SS.splitl Char.isAlphaNum rest
198 :     val rest = skipWS rest
199 :     in
200 :     case (SS.getc rest)
201 :     of (SOME(#"=", rest)) => let
202 :     (* get the attribute value *)
203 :     val (aVal, rest) =
204 :     scanAttrVal (ctx, aName, skipWS rest)
205 :     in
206 :     scanAttrs (rest, (canonName aName, aVal)::attrs)
207 :     end
208 :     | _ => scanAttrs (rest,
209 :     (canonName aName, A.IMPLICIT)::attrs)
210 :     (* end case *)
211 :     end
212 :     else (
213 :     Err.lexError ctx "bogus character in element";
214 :     scanAttrs (rest', attrs))
215 :     (* end case *)
216 :     end
217 :     in
218 :     TextIO.print "scanStartTag\n";
219 :     scanAttrs(rest, [])
220 :     end
221 :    
222 :     fun startTag file (tag, p1, p2) = let
223 :     val ctx = {file=file, line=p1}
224 :     val tag' = SS.triml 1 (SS.trimr 1 (SS.all tag))
225 :     val (name, attrs) = scanStartTag (ctx, tag')
226 :     in
227 :     TextIO.print(concat["startTag: ", SS.string name, "\n"]);
228 :     case (find name, attrs)
229 :     of (NONE, _) => (Err.badStartTag ctx (SS.string name); NONE)
230 :     | (SOME{startT=WOAttrs _, ...}, _::_) => (
231 :     List.app (Err.unknownAttr ctx o #1) attrs; NONE)
232 :     | (SOME{startT=WOAttrs tag, ...}, []) =>
233 :     SOME(tag (p1, p2))
234 :     | (SOME{startT=WAttrs tag, ...}, attrs) =>
235 :     (TextIO.print " with attributes\n";
236 :     SOME(tag (attrs, p1, p2))
237 :     )
238 :     (* end case *)
239 :     end
240 :    
241 :     fun endTag file (tag, p1, p2) = let
242 :     val ctx = {file=file, line=p1}
243 :     val name = SS.triml 2 (SS.trimr 1 (SS.all tag))
244 :     in
245 :     TextIO.print(concat["endTag: ", SS.string name, "\n"]);
246 :     case (find name)
247 :     of NONE => (Err.badEndTag ctx (SS.string name); NONE)
248 :     | (SOME{endT=Empty, ...}) => (Err.badEndTag ctx (SS.string name); NONE)
249 :     | (SOME{endT=End endTok, ...}) => SOME(endTok (p1, p2))
250 :     (* end case *)
251 :     end
252 :    
253 :     end
254 :    

root@smlnj-gforge.cs.uchicago.edu
ViewVC Help
Powered by ViewVC 1.0.0