String quoting for SQL queries
[bpt/mlt.git] / src / mlt.lex
CommitLineData
c0a3b488
AC
1(*
2 * Dynamic web page generation with Standard ML
3 * Copyright (C) 2003 Adam Chlipala
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *)
19
20(* Lexing info for ML template language *)
21
22type pos = int
23type svalue = Tokens.svalue
24type ('a,'b) token = ('a,'b) Tokens.token
25type lexresult = (svalue,pos) Tokens.token
26
27val lineNum = ErrorMsg.lineNum
28val linePos = ErrorMsg.linePos
29
30fun strip s = String.extract (s, 1, SOME (String.size s - 2))
31
32local
33 val commentLevel = ref 0
34 val commentPos = ref 0
35 val linCom = ref false
36in
37 fun enterComment yypos = (commentLevel := !commentLevel + 1; commentPos := yypos)
38
39 fun linComStart yypos = (linCom := true; commentPos := yypos)
40 fun isLinCom () = !linCom
41 fun linComEnd () = linCom := false
42
43 fun exitComment () =
44 let val _ = commentLevel := !commentLevel - 1 in
45 !commentLevel = 0
46 end
47
48 fun eof () =
49 let
50 val pos = hd (!linePos)
51 in
52 if (!commentLevel > 0) then
53 (ErrorMsg.error (SOME (!commentPos,!commentPos)) "Unterminated comment")
54 else ();
55 Tokens.EOF (pos,pos)
56 end
57end
58
59val str = ref ""
60val strStart = ref 0
61
62%%
63%header (functor MltLexFn(structure Tokens : Mlt_TOKENS));
64%full
65%s COMMENT STRING CHAR CODE;
66
67id = ([A-Za-z_][A-Za-z0-9_]*)|([:]+);
68intconst = [0-9]+;
8291a2b9 69realconst = [0-9]+\.[0-9]*;
c0a3b488
AC
70ws = [\ \t\012];
71bo = [^<]+;
72
73%%
74
75\n => (if isLinCom () then (linComEnd (); YYBEGIN INITIAL) else ();
76 lineNum := !lineNum + 1;
77 linePos := yypos :: ! linePos;
78 continue ());
79
80<INITIAL> {ws}+ => (Tokens.HTML (" ", yypos, yypos + size yytext); lex ());
81
82<INITIAL> "<%" => (YYBEGIN CODE; Tokens.SEMI(yypos, yypos + size yytext));
83<CODE> "%>" => (YYBEGIN INITIAL; Tokens.SEMI(yypos, yypos + size yytext));
84
85<CODE> "(*" => (YYBEGIN COMMENT; enterComment yypos; continue());
86<CODE> "*)" => (ErrorMsg.error (SOME (yypos, yypos)) "Unbalanced comments";
87 continue());
88
89<COMMENT> "(*" => (if not (isLinCom ()) then enterComment yypos else (); continue());
90<COMMENT> "*)" => (if not (isLinCom ()) andalso exitComment () then YYBEGIN INITIAL else ();
91 continue());
92
93<CODE> "//" => (YYBEGIN COMMENT; linComStart yypos; continue());
94
95<CODE> {ws}+ => (lex ());
96
97<CODE> "\"" => (YYBEGIN STRING; strStart := yypos; str := ""; continue());
98<STRING> "\\\"" => (str := !str ^ "\\\""; continue());
99<STRING> "\"" => (YYBEGIN CODE; Tokens.STRING (!str, !strStart, yypos + 1));
100<STRING> . => (str := !str ^ yytext; continue());
101
102<CODE> "#\"" => (YYBEGIN CHAR; strStart := yypos; str := ""; continue());
103<CHAR> "\\\"" => (str := !str ^ "\\\""; continue());
104<CHAR> "\"" => (YYBEGIN CODE; if size (!str) = 1 then
105 Tokens.CHAR (!str, !strStart, yypos + 1)
106 else
107 (ErrorMsg.error (SOME (yypos, yypos)) "Invalid character constant";
108 continue()));
109<CHAR> . => (str := !str ^ yytext; continue());
110
111<CODE> "{" => (Tokens.LBRACE (yypos, yypos + size yytext));
112<CODE> "}" => (Tokens.RBRACE (yypos, yypos + size yytext));
113<CODE> "(" => (Tokens.LPAREN (yypos, yypos + size yytext));
114<CODE> ")" => (Tokens.RPAREN (yypos, yypos + size yytext));
115<CODE> "[" => (Tokens.LBRACK (yypos, yypos + size yytext));
116<CODE> "]" => (Tokens.RBRACK (yypos, yypos + size yytext));
117
118<CODE> "=" => (Tokens.EQ (yypos, yypos + size yytext));
119<CODE> "<>" => (Tokens.NEQ (yypos, yypos + size yytext));
120<CODE> "<" => (Tokens.LT (yypos, yypos + size yytext));
121<CODE> "<=" => (Tokens.LTE (yypos, yypos + size yytext));
122<CODE> ">" => (Tokens.GT (yypos, yypos + size yytext));
123<CODE> ">=" => (Tokens.GTE (yypos, yypos + size yytext));
124
125<CODE> ":=" => (Tokens.ASN (yypos, yypos + size yytext));
126
127<CODE> "/" => (Tokens.DIVIDE (yypos, yypos + size yytext));
128<CODE> "*" => (Tokens.TIMES (yypos, yypos + size yytext));
129<CODE> "+" => (Tokens.PLUS (yypos, yypos + size yytext));
130<CODE> "-" => (Tokens.MINUS (yypos, yypos + size yytext));
131<CODE> "%" => (Tokens.MOD (yypos, yypos + size yytext));
132<CODE> "^" => (Tokens.STRCAT (yypos, yypos + size yytext));
133
134<CODE> "~" => (Tokens.NEG (yypos, yypos + size yytext));
135<CODE> "," => (Tokens.COMMA (yypos, yypos + size yytext));
136<CODE> ":" => (Tokens.COLON (yypos, yypos + size yytext));
137<CODE> "..." => (Tokens.DOTDOTDOT (yypos, yypos + 3));
138<CODE> ".." => (Tokens.DOTDOT (yypos, yypos + 2));
139<CODE> "." => (Tokens.DOT (yypos, yypos + 1));
140<CODE> "_" => (Tokens.UNDER (yypos, yypos + 1));
141<CODE> "#" => (Tokens.HASH (yypos, yypos + 1));
142<CODE> ";" => (Tokens.SEMI (yypos, yypos + 1));
143<CODE> "$" => (Tokens.DOLLAR (yypos, yypos + size yytext));
144<CODE> "@" => (Tokens.AT (yypos, yypos + size yytext));
145
146<CODE> "if" => (Tokens.IF (yypos, yypos + 2));
8291a2b9 147<CODE> "then" => (Tokens.THEN (yypos, yypos + 4));
c0a3b488
AC
148<CODE> "else" => (Tokens.ELSE (yypos, yypos + 4));
149<CODE> "foreach" => (Tokens.FOREACH (yypos, yypos + 7));
150<CODE> "in" => (Tokens.IN (yypos, yypos + 2));
151<CODE> "case" => (Tokens.CASE (yypos, yypos + 4));
152<CODE> "as" => (Tokens.AS (yypos, yypos + 2));
8291a2b9 153<CODE> "fn" => (Tokens.FN (yypos, yypos + 2));
c0a3b488
AC
154<CODE> "with" => (Tokens.WITH (yypos, yypos + 4));
155<CODE> "open" => (Tokens.OPEN (yypos, yypos + 4));
156<CODE> "val" => (Tokens.VAL (yypos, yypos + 3));
157<CODE> "ref" => (Tokens.REF (yypos, yypos + 3));
158<CODE> "try" => (Tokens.TRY (yypos, yypos + 3));
159<CODE> "catch" => (Tokens.CATCH (yypos, yypos + 5));
8291a2b9
AC
160<CODE> "or" => (Tokens.ORELSE (yypos, yypos + 2));
161<CODE> "and" => (Tokens.ANDALSO (yypos, yypos + 3));
162<CODE> "switch" => (Tokens.SWITCH (yypos, yypos + 6));
163<CODE> "of" => (Tokens.OF (yypos, yypos + 2));
164<CODE> "=>" => (Tokens.ARROW (yypos, yypos + 2));
165<CODE> "|" => (Tokens.BAR (yypos, yypos + 1));
166<CODE> "do" => (Tokens.DO (yypos, yypos + 2));
167<CODE> "end" => (Tokens.END (yypos, yypos + 3));
168<CODE> "raise" => (Tokens.RAISE (yypos, yypos + 5));
c0a3b488
AC
169
170<CODE> "::" => (Tokens.CONS (yypos, yypos + 2));
171<CODE> {id} => (Tokens.IDENT (yytext, yypos, yypos + size yytext));
172<CODE> {intconst} => (case Int.fromString yytext of
8291a2b9
AC
173 SOME x => Tokens.INT (x, yypos, yypos + size yytext)
174 | NONE => (ErrorMsg.error (SOME (yypos, yypos))
175 ("Expected int, received: " ^ yytext);
176 continue ()));
177<CODE> {realconst} => (case Real.fromString yytext of
178 SOME x => Tokens.REAL (x, yypos, yypos + size yytext)
179 | NONE => (ErrorMsg.error (SOME (yypos, yypos))
180 ("Expected real, received: " ^ yytext);
181 continue ()));
c0a3b488
AC
182
183<CODE> "\"" {id} "\"" => (Tokens.STRING (String.substring(yytext, 1, String.size yytext - 2), yypos, yypos + size yytext));
184
185<COMMENT> . => (continue());
186
187<INITIAL> {bo} => (Tokens.HTML (yytext, yypos, yypos + size yytext));
188<INITIAL> . => (Tokens.HTML (yytext, yypos, yypos + 1));
189
190<CODE> . => (ErrorMsg.error (SOME (yypos,yypos))
191 ("illegal character: \"" ^ yytext ^ "\"");
192 continue ());