Initial revision
[bpt/mlt.git] / src / mlt.lex
1 (*
2 * Dynamic web page generation with Standard ML
3 * Copyright (C) 2003 Adam Chlipala
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *)
19
20 (* Lexing info for ML template language *)
21
22 type pos = int
23 type svalue = Tokens.svalue
24 type ('a,'b) token = ('a,'b) Tokens.token
25 type lexresult = (svalue,pos) Tokens.token
26
27 val lineNum = ErrorMsg.lineNum
28 val linePos = ErrorMsg.linePos
29
30 fun strip s = String.extract (s, 1, SOME (String.size s - 2))
31
32 local
33 val commentLevel = ref 0
34 val commentPos = ref 0
35 val linCom = ref false
36 in
37 fun enterComment yypos = (commentLevel := !commentLevel + 1; commentPos := yypos)
38
39 fun linComStart yypos = (linCom := true; commentPos := yypos)
40 fun isLinCom () = !linCom
41 fun linComEnd () = linCom := false
42
43 fun exitComment () =
44 let val _ = commentLevel := !commentLevel - 1 in
45 !commentLevel = 0
46 end
47
48 fun eof () =
49 let
50 val pos = hd (!linePos)
51 in
52 if (!commentLevel > 0) then
53 (ErrorMsg.error (SOME (!commentPos,!commentPos)) "Unterminated comment")
54 else ();
55 Tokens.EOF (pos,pos)
56 end
57 end
58
59 val str = ref ""
60 val strStart = ref 0
61
62 %%
63 %header (functor MltLexFn(structure Tokens : Mlt_TOKENS));
64 %full
65 %s COMMENT STRING CHAR CODE;
66
67 id = ([A-Za-z_][A-Za-z0-9_]*)|([:]+);
68 intconst = [0-9]+;
69 ws = [\ \t\012];
70 bo = [^<]+;
71
72 %%
73
74 \n => (if isLinCom () then (linComEnd (); YYBEGIN INITIAL) else ();
75 lineNum := !lineNum + 1;
76 linePos := yypos :: ! linePos;
77 continue ());
78
79 <INITIAL> {ws}+ => (Tokens.HTML (" ", yypos, yypos + size yytext); lex ());
80
81 <INITIAL> "<%" => (YYBEGIN CODE; Tokens.SEMI(yypos, yypos + size yytext));
82 <CODE> "%>" => (YYBEGIN INITIAL; Tokens.SEMI(yypos, yypos + size yytext));
83
84 <CODE> "(*" => (YYBEGIN COMMENT; enterComment yypos; continue());
85 <CODE> "*)" => (ErrorMsg.error (SOME (yypos, yypos)) "Unbalanced comments";
86 continue());
87
88 <COMMENT> "(*" => (if not (isLinCom ()) then enterComment yypos else (); continue());
89 <COMMENT> "*)" => (if not (isLinCom ()) andalso exitComment () then YYBEGIN INITIAL else ();
90 continue());
91
92 <CODE> "//" => (YYBEGIN COMMENT; linComStart yypos; continue());
93
94 <CODE> {ws}+ => (lex ());
95
96 <CODE> "\"" => (YYBEGIN STRING; strStart := yypos; str := ""; continue());
97 <STRING> "\\\"" => (str := !str ^ "\\\""; continue());
98 <STRING> "\"" => (YYBEGIN CODE; Tokens.STRING (!str, !strStart, yypos + 1));
99 <STRING> . => (str := !str ^ yytext; continue());
100
101 <CODE> "#\"" => (YYBEGIN CHAR; strStart := yypos; str := ""; continue());
102 <CHAR> "\\\"" => (str := !str ^ "\\\""; continue());
103 <CHAR> "\"" => (YYBEGIN CODE; if size (!str) = 1 then
104 Tokens.CHAR (!str, !strStart, yypos + 1)
105 else
106 (ErrorMsg.error (SOME (yypos, yypos)) "Invalid character constant";
107 continue()));
108 <CHAR> . => (str := !str ^ yytext; continue());
109
110 <CODE> "{" => (Tokens.LBRACE (yypos, yypos + size yytext));
111 <CODE> "}" => (Tokens.RBRACE (yypos, yypos + size yytext));
112 <CODE> "(" => (Tokens.LPAREN (yypos, yypos + size yytext));
113 <CODE> ")" => (Tokens.RPAREN (yypos, yypos + size yytext));
114 <CODE> "[" => (Tokens.LBRACK (yypos, yypos + size yytext));
115 <CODE> "]" => (Tokens.RBRACK (yypos, yypos + size yytext));
116
117 <CODE> "=" => (Tokens.EQ (yypos, yypos + size yytext));
118 <CODE> "<>" => (Tokens.NEQ (yypos, yypos + size yytext));
119 <CODE> "<" => (Tokens.LT (yypos, yypos + size yytext));
120 <CODE> "<=" => (Tokens.LTE (yypos, yypos + size yytext));
121 <CODE> ">" => (Tokens.GT (yypos, yypos + size yytext));
122 <CODE> ">=" => (Tokens.GTE (yypos, yypos + size yytext));
123
124 <CODE> ":=" => (Tokens.ASN (yypos, yypos + size yytext));
125
126 <CODE> "/" => (Tokens.DIVIDE (yypos, yypos + size yytext));
127 <CODE> "*" => (Tokens.TIMES (yypos, yypos + size yytext));
128 <CODE> "+" => (Tokens.PLUS (yypos, yypos + size yytext));
129 <CODE> "-" => (Tokens.MINUS (yypos, yypos + size yytext));
130 <CODE> "%" => (Tokens.MOD (yypos, yypos + size yytext));
131 <CODE> "^" => (Tokens.STRCAT (yypos, yypos + size yytext));
132
133 <CODE> "~" => (Tokens.NEG (yypos, yypos + size yytext));
134 <CODE> "," => (Tokens.COMMA (yypos, yypos + size yytext));
135 <CODE> ":" => (Tokens.COLON (yypos, yypos + size yytext));
136 <CODE> "..." => (Tokens.DOTDOTDOT (yypos, yypos + 3));
137 <CODE> ".." => (Tokens.DOTDOT (yypos, yypos + 2));
138 <CODE> "." => (Tokens.DOT (yypos, yypos + 1));
139 <CODE> "_" => (Tokens.UNDER (yypos, yypos + 1));
140 <CODE> "#" => (Tokens.HASH (yypos, yypos + 1));
141 <CODE> ";" => (Tokens.SEMI (yypos, yypos + 1));
142 <CODE> "$" => (Tokens.DOLLAR (yypos, yypos + size yytext));
143 <CODE> "@" => (Tokens.AT (yypos, yypos + size yytext));
144
145 <CODE> "if" => (Tokens.IF (yypos, yypos + 2));
146 <CODE> "else" => (Tokens.ELSE (yypos, yypos + 4));
147 <CODE> "foreach" => (Tokens.FOREACH (yypos, yypos + 7));
148 <CODE> "in" => (Tokens.IN (yypos, yypos + 2));
149 <CODE> "case" => (Tokens.CASE (yypos, yypos + 4));
150 <CODE> "as" => (Tokens.AS (yypos, yypos + 2));
151 <CODE> "with" => (Tokens.WITH (yypos, yypos + 4));
152 <CODE> "open" => (Tokens.OPEN (yypos, yypos + 4));
153 <CODE> "val" => (Tokens.VAL (yypos, yypos + 3));
154 <CODE> "ref" => (Tokens.REF (yypos, yypos + 3));
155 <CODE> "try" => (Tokens.TRY (yypos, yypos + 3));
156 <CODE> "catch" => (Tokens.CATCH (yypos, yypos + 5));
157 <CODE> "or" => (Tokens.ORELSE (yypos, yypos + 5));
158 <CODE> "and" => (Tokens.ANDALSO (yypos, yypos + 5));
159
160 <CODE> "::" => (Tokens.CONS (yypos, yypos + 2));
161 <CODE> {id} => (Tokens.IDENT (yytext, yypos, yypos + size yytext));
162 <CODE> {intconst} => (case Int.fromString yytext of
163 SOME (x) => Tokens.INT (x, yypos, yypos + size yytext)
164 | NONE => (ErrorMsg.error (SOME (yypos, yypos))
165 ("Expected number, received: " ^ yytext);
166 continue ()));
167
168 <CODE> "\"" {id} "\"" => (Tokens.STRING (String.substring(yytext, 1, String.size yytext - 2), yypos, yypos + size yytext));
169
170 <COMMENT> . => (continue());
171
172 <INITIAL> {bo} => (Tokens.HTML (yytext, yypos, yypos + size yytext));
173 <INITIAL> . => (Tokens.HTML (yytext, yypos, yypos + 1));
174
175 <CODE> . => (ErrorMsg.error (SOME (yypos,yypos))
176 ("illegal character: \"" ^ yytext ^ "\"");
177 continue ());