Various improvements made while working on relwiki
[bpt/mlt.git] / src / mlt.lex
CommitLineData
c0a3b488
AC
1(*
2 * Dynamic web page generation with Standard ML
3 * Copyright (C) 2003 Adam Chlipala
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *)
19
20(* Lexing info for ML template language *)
21
22type pos = int
23type svalue = Tokens.svalue
24type ('a,'b) token = ('a,'b) Tokens.token
25type lexresult = (svalue,pos) Tokens.token
26
27val lineNum = ErrorMsg.lineNum
28val linePos = ErrorMsg.linePos
29
30fun strip s = String.extract (s, 1, SOME (String.size s - 2))
31
32local
33 val commentLevel = ref 0
34 val commentPos = ref 0
35 val linCom = ref false
36in
37 fun enterComment yypos = (commentLevel := !commentLevel + 1; commentPos := yypos)
38
39 fun linComStart yypos = (linCom := true; commentPos := yypos)
40 fun isLinCom () = !linCom
41 fun linComEnd () = linCom := false
42
43 fun exitComment () =
44 let val _ = commentLevel := !commentLevel - 1 in
45 !commentLevel = 0
46 end
47
48 fun eof () =
49 let
50 val pos = hd (!linePos)
51 in
52 if (!commentLevel > 0) then
53 (ErrorMsg.error (SOME (!commentPos,!commentPos)) "Unterminated comment")
54 else ();
55 Tokens.EOF (pos,pos)
56 end
57end
58
59val str = ref ""
60val strStart = ref 0
61
62%%
63%header (functor MltLexFn(structure Tokens : Mlt_TOKENS));
64%full
65%s COMMENT STRING CHAR CODE;
66
67id = ([A-Za-z_][A-Za-z0-9_]*)|([:]+);
68intconst = [0-9]+;
8291a2b9 69realconst = [0-9]+\.[0-9]*;
c0a3b488 70ws = [\ \t\012];
16abb0f9 71bo = [^<\n]+;
c0a3b488
AC
72
73%%
74
16abb0f9 75<CODE> \n => (if isLinCom () then (linComEnd (); YYBEGIN INITIAL) else ();
c0a3b488
AC
76 lineNum := !lineNum + 1;
77 linePos := yypos :: ! linePos;
78 continue ());
16abb0f9
AC
79<INITIAL> \n => (lineNum := !lineNum + 1;
80 linePos := yypos :: ! linePos;
81 Tokens.HTML (yytext, yypos, yypos + size yytext));
c0a3b488
AC
82
83<INITIAL> {ws}+ => (Tokens.HTML (" ", yypos, yypos + size yytext); lex ());
84
85<INITIAL> "<%" => (YYBEGIN CODE; Tokens.SEMI(yypos, yypos + size yytext));
86<CODE> "%>" => (YYBEGIN INITIAL; Tokens.SEMI(yypos, yypos + size yytext));
87
88<CODE> "(*" => (YYBEGIN COMMENT; enterComment yypos; continue());
89<CODE> "*)" => (ErrorMsg.error (SOME (yypos, yypos)) "Unbalanced comments";
90 continue());
91
92<COMMENT> "(*" => (if not (isLinCom ()) then enterComment yypos else (); continue());
93<COMMENT> "*)" => (if not (isLinCom ()) andalso exitComment () then YYBEGIN INITIAL else ();
94 continue());
95
96<CODE> "//" => (YYBEGIN COMMENT; linComStart yypos; continue());
97
98<CODE> {ws}+ => (lex ());
99
100<CODE> "\"" => (YYBEGIN STRING; strStart := yypos; str := ""; continue());
101<STRING> "\\\"" => (str := !str ^ "\\\""; continue());
102<STRING> "\"" => (YYBEGIN CODE; Tokens.STRING (!str, !strStart, yypos + 1));
103<STRING> . => (str := !str ^ yytext; continue());
104
105<CODE> "#\"" => (YYBEGIN CHAR; strStart := yypos; str := ""; continue());
106<CHAR> "\\\"" => (str := !str ^ "\\\""; continue());
107<CHAR> "\"" => (YYBEGIN CODE; if size (!str) = 1 then
108 Tokens.CHAR (!str, !strStart, yypos + 1)
109 else
110 (ErrorMsg.error (SOME (yypos, yypos)) "Invalid character constant";
111 continue()));
112<CHAR> . => (str := !str ^ yytext; continue());
113
114<CODE> "{" => (Tokens.LBRACE (yypos, yypos + size yytext));
115<CODE> "}" => (Tokens.RBRACE (yypos, yypos + size yytext));
116<CODE> "(" => (Tokens.LPAREN (yypos, yypos + size yytext));
117<CODE> ")" => (Tokens.RPAREN (yypos, yypos + size yytext));
118<CODE> "[" => (Tokens.LBRACK (yypos, yypos + size yytext));
119<CODE> "]" => (Tokens.RBRACK (yypos, yypos + size yytext));
120
121<CODE> "=" => (Tokens.EQ (yypos, yypos + size yytext));
122<CODE> "<>" => (Tokens.NEQ (yypos, yypos + size yytext));
123<CODE> "<" => (Tokens.LT (yypos, yypos + size yytext));
124<CODE> "<=" => (Tokens.LTE (yypos, yypos + size yytext));
125<CODE> ">" => (Tokens.GT (yypos, yypos + size yytext));
126<CODE> ">=" => (Tokens.GTE (yypos, yypos + size yytext));
127
128<CODE> ":=" => (Tokens.ASN (yypos, yypos + size yytext));
129
130<CODE> "/" => (Tokens.DIVIDE (yypos, yypos + size yytext));
131<CODE> "*" => (Tokens.TIMES (yypos, yypos + size yytext));
132<CODE> "+" => (Tokens.PLUS (yypos, yypos + size yytext));
133<CODE> "-" => (Tokens.MINUS (yypos, yypos + size yytext));
134<CODE> "%" => (Tokens.MOD (yypos, yypos + size yytext));
135<CODE> "^" => (Tokens.STRCAT (yypos, yypos + size yytext));
136
137<CODE> "~" => (Tokens.NEG (yypos, yypos + size yytext));
138<CODE> "," => (Tokens.COMMA (yypos, yypos + size yytext));
139<CODE> ":" => (Tokens.COLON (yypos, yypos + size yytext));
140<CODE> "..." => (Tokens.DOTDOTDOT (yypos, yypos + 3));
141<CODE> ".." => (Tokens.DOTDOT (yypos, yypos + 2));
142<CODE> "." => (Tokens.DOT (yypos, yypos + 1));
143<CODE> "_" => (Tokens.UNDER (yypos, yypos + 1));
144<CODE> "#" => (Tokens.HASH (yypos, yypos + 1));
145<CODE> ";" => (Tokens.SEMI (yypos, yypos + 1));
146<CODE> "$" => (Tokens.DOLLAR (yypos, yypos + size yytext));
147<CODE> "@" => (Tokens.AT (yypos, yypos + size yytext));
148
149<CODE> "if" => (Tokens.IF (yypos, yypos + 2));
16abb0f9 150<CODE> "iff" => (Tokens.IFF (yypos, yypos + 3));
8291a2b9 151<CODE> "then" => (Tokens.THEN (yypos, yypos + 4));
c0a3b488 152<CODE> "else" => (Tokens.ELSE (yypos, yypos + 4));
16abb0f9 153<CODE> "elseif" => (Tokens.ELSEIF (yypos, yypos + 6));
c0a3b488 154<CODE> "foreach" => (Tokens.FOREACH (yypos, yypos + 7));
16abb0f9 155<CODE> "for" => (Tokens.FOR (yypos, yypos + 3));
c0a3b488
AC
156<CODE> "in" => (Tokens.IN (yypos, yypos + 2));
157<CODE> "case" => (Tokens.CASE (yypos, yypos + 4));
158<CODE> "as" => (Tokens.AS (yypos, yypos + 2));
8291a2b9 159<CODE> "fn" => (Tokens.FN (yypos, yypos + 2));
c0a3b488
AC
160<CODE> "with" => (Tokens.WITH (yypos, yypos + 4));
161<CODE> "open" => (Tokens.OPEN (yypos, yypos + 4));
162<CODE> "val" => (Tokens.VAL (yypos, yypos + 3));
163<CODE> "ref" => (Tokens.REF (yypos, yypos + 3));
164<CODE> "try" => (Tokens.TRY (yypos, yypos + 3));
165<CODE> "catch" => (Tokens.CATCH (yypos, yypos + 5));
8291a2b9
AC
166<CODE> "or" => (Tokens.ORELSE (yypos, yypos + 2));
167<CODE> "and" => (Tokens.ANDALSO (yypos, yypos + 3));
168<CODE> "switch" => (Tokens.SWITCH (yypos, yypos + 6));
169<CODE> "of" => (Tokens.OF (yypos, yypos + 2));
170<CODE> "=>" => (Tokens.ARROW (yypos, yypos + 2));
171<CODE> "|" => (Tokens.BAR (yypos, yypos + 1));
172<CODE> "do" => (Tokens.DO (yypos, yypos + 2));
173<CODE> "end" => (Tokens.END (yypos, yypos + 3));
174<CODE> "raise" => (Tokens.RAISE (yypos, yypos + 5));
16abb0f9
AC
175<CODE> "let" => (Tokens.LET (yypos, yypos + 3));
176<CODE> "in" => (Tokens.IN (yypos, yypos + 2));
c0a3b488
AC
177
178<CODE> "::" => (Tokens.CONS (yypos, yypos + 2));
16abb0f9 179<CODE> "o" => (Tokens.O (yypos, yypos + 1));
c0a3b488
AC
180<CODE> {id} => (Tokens.IDENT (yytext, yypos, yypos + size yytext));
181<CODE> {intconst} => (case Int.fromString yytext of
8291a2b9
AC
182 SOME x => Tokens.INT (x, yypos, yypos + size yytext)
183 | NONE => (ErrorMsg.error (SOME (yypos, yypos))
184 ("Expected int, received: " ^ yytext);
185 continue ()));
186<CODE> {realconst} => (case Real.fromString yytext of
187 SOME x => Tokens.REAL (x, yypos, yypos + size yytext)
188 | NONE => (ErrorMsg.error (SOME (yypos, yypos))
189 ("Expected real, received: " ^ yytext);
190 continue ()));
c0a3b488
AC
191
192<CODE> "\"" {id} "\"" => (Tokens.STRING (String.substring(yytext, 1, String.size yytext - 2), yypos, yypos + size yytext));
193
194<COMMENT> . => (continue());
195
196<INITIAL> {bo} => (Tokens.HTML (yytext, yypos, yypos + size yytext));
197<INITIAL> . => (Tokens.HTML (yytext, yypos, yypos + 1));
198
199<CODE> . => (ErrorMsg.error (SOME (yypos,yypos))
200 ("illegal character: \"" ^ yytext ^ "\"");
201 continue ());