Template language overhaul & misc. improvements
[bpt/mlt.git] / src / mlt.lex
1 (*
2 * Dynamic web page generation with Standard ML
3 * Copyright (C) 2003 Adam Chlipala
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *)
19
20 (* Lexing info for ML template language *)
21
22 type pos = int
23 type svalue = Tokens.svalue
24 type ('a,'b) token = ('a,'b) Tokens.token
25 type lexresult = (svalue,pos) Tokens.token
26
27 val lineNum = ErrorMsg.lineNum
28 val linePos = ErrorMsg.linePos
29
30 fun strip s = String.extract (s, 1, SOME (String.size s - 2))
31
32 local
33 val commentLevel = ref 0
34 val commentPos = ref 0
35 val linCom = ref false
36 in
37 fun enterComment yypos = (commentLevel := !commentLevel + 1; commentPos := yypos)
38
39 fun linComStart yypos = (linCom := true; commentPos := yypos)
40 fun isLinCom () = !linCom
41 fun linComEnd () = linCom := false
42
43 fun exitComment () =
44 let val _ = commentLevel := !commentLevel - 1 in
45 !commentLevel = 0
46 end
47
48 fun eof () =
49 let
50 val pos = hd (!linePos)
51 in
52 if (!commentLevel > 0) then
53 (ErrorMsg.error (SOME (!commentPos,!commentPos)) "Unterminated comment")
54 else ();
55 Tokens.EOF (pos,pos)
56 end
57 end
58
59 val str = ref ""
60 val strStart = ref 0
61
62 %%
63 %header (functor MltLexFn(structure Tokens : Mlt_TOKENS));
64 %full
65 %s COMMENT STRING CHAR CODE;
66
67 id = ([A-Za-z_][A-Za-z0-9_]*)|([:]+);
68 intconst = [0-9]+;
69 realconst = [0-9]+\.[0-9]*;
70 ws = [\ \t\012];
71 bo = [^<]+;
72
73 %%
74
75 \n => (if isLinCom () then (linComEnd (); YYBEGIN INITIAL) else ();
76 lineNum := !lineNum + 1;
77 linePos := yypos :: ! linePos;
78 continue ());
79
80 <INITIAL> {ws}+ => (Tokens.HTML (" ", yypos, yypos + size yytext); lex ());
81
82 <INITIAL> "<%" => (YYBEGIN CODE; Tokens.SEMI(yypos, yypos + size yytext));
83 <CODE> "%>" => (YYBEGIN INITIAL; Tokens.SEMI(yypos, yypos + size yytext));
84
85 <CODE> "(*" => (YYBEGIN COMMENT; enterComment yypos; continue());
86 <CODE> "*)" => (ErrorMsg.error (SOME (yypos, yypos)) "Unbalanced comments";
87 continue());
88
89 <COMMENT> "(*" => (if not (isLinCom ()) then enterComment yypos else (); continue());
90 <COMMENT> "*)" => (if not (isLinCom ()) andalso exitComment () then YYBEGIN INITIAL else ();
91 continue());
92
93 <CODE> "//" => (YYBEGIN COMMENT; linComStart yypos; continue());
94
95 <CODE> {ws}+ => (lex ());
96
97 <CODE> "\"" => (YYBEGIN STRING; strStart := yypos; str := ""; continue());
98 <STRING> "\\\"" => (str := !str ^ "\\\""; continue());
99 <STRING> "\"" => (YYBEGIN CODE; Tokens.STRING (!str, !strStart, yypos + 1));
100 <STRING> . => (str := !str ^ yytext; continue());
101
102 <CODE> "#\"" => (YYBEGIN CHAR; strStart := yypos; str := ""; continue());
103 <CHAR> "\\\"" => (str := !str ^ "\\\""; continue());
104 <CHAR> "\"" => (YYBEGIN CODE; if size (!str) = 1 then
105 Tokens.CHAR (!str, !strStart, yypos + 1)
106 else
107 (ErrorMsg.error (SOME (yypos, yypos)) "Invalid character constant";
108 continue()));
109 <CHAR> . => (str := !str ^ yytext; continue());
110
111 <CODE> "{" => (Tokens.LBRACE (yypos, yypos + size yytext));
112 <CODE> "}" => (Tokens.RBRACE (yypos, yypos + size yytext));
113 <CODE> "(" => (Tokens.LPAREN (yypos, yypos + size yytext));
114 <CODE> ")" => (Tokens.RPAREN (yypos, yypos + size yytext));
115 <CODE> "[" => (Tokens.LBRACK (yypos, yypos + size yytext));
116 <CODE> "]" => (Tokens.RBRACK (yypos, yypos + size yytext));
117
118 <CODE> "=" => (Tokens.EQ (yypos, yypos + size yytext));
119 <CODE> "<>" => (Tokens.NEQ (yypos, yypos + size yytext));
120 <CODE> "<" => (Tokens.LT (yypos, yypos + size yytext));
121 <CODE> "<=" => (Tokens.LTE (yypos, yypos + size yytext));
122 <CODE> ">" => (Tokens.GT (yypos, yypos + size yytext));
123 <CODE> ">=" => (Tokens.GTE (yypos, yypos + size yytext));
124
125 <CODE> ":=" => (Tokens.ASN (yypos, yypos + size yytext));
126
127 <CODE> "/" => (Tokens.DIVIDE (yypos, yypos + size yytext));
128 <CODE> "*" => (Tokens.TIMES (yypos, yypos + size yytext));
129 <CODE> "+" => (Tokens.PLUS (yypos, yypos + size yytext));
130 <CODE> "-" => (Tokens.MINUS (yypos, yypos + size yytext));
131 <CODE> "%" => (Tokens.MOD (yypos, yypos + size yytext));
132 <CODE> "^" => (Tokens.STRCAT (yypos, yypos + size yytext));
133
134 <CODE> "~" => (Tokens.NEG (yypos, yypos + size yytext));
135 <CODE> "," => (Tokens.COMMA (yypos, yypos + size yytext));
136 <CODE> ":" => (Tokens.COLON (yypos, yypos + size yytext));
137 <CODE> "..." => (Tokens.DOTDOTDOT (yypos, yypos + 3));
138 <CODE> ".." => (Tokens.DOTDOT (yypos, yypos + 2));
139 <CODE> "." => (Tokens.DOT (yypos, yypos + 1));
140 <CODE> "_" => (Tokens.UNDER (yypos, yypos + 1));
141 <CODE> "#" => (Tokens.HASH (yypos, yypos + 1));
142 <CODE> ";" => (Tokens.SEMI (yypos, yypos + 1));
143 <CODE> "$" => (Tokens.DOLLAR (yypos, yypos + size yytext));
144 <CODE> "@" => (Tokens.AT (yypos, yypos + size yytext));
145
146 <CODE> "if" => (Tokens.IF (yypos, yypos + 2));
147 <CODE> "then" => (Tokens.THEN (yypos, yypos + 4));
148 <CODE> "else" => (Tokens.ELSE (yypos, yypos + 4));
149 <CODE> "foreach" => (Tokens.FOREACH (yypos, yypos + 7));
150 <CODE> "in" => (Tokens.IN (yypos, yypos + 2));
151 <CODE> "case" => (Tokens.CASE (yypos, yypos + 4));
152 <CODE> "as" => (Tokens.AS (yypos, yypos + 2));
153 <CODE> "fn" => (Tokens.FN (yypos, yypos + 2));
154 <CODE> "with" => (Tokens.WITH (yypos, yypos + 4));
155 <CODE> "open" => (Tokens.OPEN (yypos, yypos + 4));
156 <CODE> "val" => (Tokens.VAL (yypos, yypos + 3));
157 <CODE> "ref" => (Tokens.REF (yypos, yypos + 3));
158 <CODE> "try" => (Tokens.TRY (yypos, yypos + 3));
159 <CODE> "catch" => (Tokens.CATCH (yypos, yypos + 5));
160 <CODE> "or" => (Tokens.ORELSE (yypos, yypos + 2));
161 <CODE> "and" => (Tokens.ANDALSO (yypos, yypos + 3));
162 <CODE> "switch" => (Tokens.SWITCH (yypos, yypos + 6));
163 <CODE> "of" => (Tokens.OF (yypos, yypos + 2));
164 <CODE> "=>" => (Tokens.ARROW (yypos, yypos + 2));
165 <CODE> "|" => (Tokens.BAR (yypos, yypos + 1));
166 <CODE> "do" => (Tokens.DO (yypos, yypos + 2));
167 <CODE> "end" => (Tokens.END (yypos, yypos + 3));
168 <CODE> "raise" => (Tokens.RAISE (yypos, yypos + 5));
169
170 <CODE> "::" => (Tokens.CONS (yypos, yypos + 2));
171 <CODE> {id} => (Tokens.IDENT (yytext, yypos, yypos + size yytext));
172 <CODE> {intconst} => (case Int.fromString yytext of
173 SOME x => Tokens.INT (x, yypos, yypos + size yytext)
174 | NONE => (ErrorMsg.error (SOME (yypos, yypos))
175 ("Expected int, received: " ^ yytext);
176 continue ()));
177 <CODE> {realconst} => (case Real.fromString yytext of
178 SOME x => Tokens.REAL (x, yypos, yypos + size yytext)
179 | NONE => (ErrorMsg.error (SOME (yypos, yypos))
180 ("Expected real, received: " ^ yytext);
181 continue ()));
182
183 <CODE> "\"" {id} "\"" => (Tokens.STRING (String.substring(yytext, 1, String.size yytext - 2), yypos, yypos + size yytext));
184
185 <COMMENT> . => (continue());
186
187 <INITIAL> {bo} => (Tokens.HTML (yytext, yypos, yypos + size yytext));
188 <INITIAL> . => (Tokens.HTML (yytext, yypos, yypos + 1));
189
190 <CODE> . => (ErrorMsg.error (SOME (yypos,yypos))
191 ("illegal character: \"" ^ yytext ^ "\"");
192 continue ());