| 1 | (* |
| 2 | * Dynamic web page generation with Standard ML |
| 3 | * Copyright (C) 2003 Adam Chlipala |
| 4 | * |
| 5 | * This library is free software; you can redistribute it and/or |
| 6 | * modify it under the terms of the GNU Lesser General Public |
| 7 | * License as published by the Free Software Foundation; either |
| 8 | * version 2.1 of the License, or (at your option) any later version. |
| 9 | * |
| 10 | * This library is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | * Lesser General Public License for more details. |
| 14 | * |
| 15 | * You should have received a copy of the GNU Lesser General Public |
| 16 | * License along with this library; if not, write to the Free Software |
| 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 18 | *) |
| 19 | |
| 20 | (* Lexing info for ML template language *) |
| 21 | |
| 22 | type pos = int |
| 23 | type svalue = Tokens.svalue |
| 24 | type ('a,'b) token = ('a,'b) Tokens.token |
| 25 | type lexresult = (svalue,pos) Tokens.token |
| 26 | |
| 27 | val lineNum = ErrorMsg.lineNum |
| 28 | val linePos = ErrorMsg.linePos |
| 29 | |
| 30 | fun strip s = String.extract (s, 1, SOME (String.size s - 2)) |
| 31 | |
| 32 | local |
| 33 | val commentLevel = ref 0 |
| 34 | val commentPos = ref 0 |
| 35 | val linCom = ref false |
| 36 | in |
| 37 | fun enterComment yypos = (commentLevel := !commentLevel + 1; commentPos := yypos) |
| 38 | |
| 39 | fun linComStart yypos = (linCom := true; commentPos := yypos) |
| 40 | fun isLinCom () = !linCom |
| 41 | fun linComEnd () = linCom := false |
| 42 | |
| 43 | fun exitComment () = |
| 44 | let val _ = commentLevel := !commentLevel - 1 in |
| 45 | !commentLevel = 0 |
| 46 | end |
| 47 | |
| 48 | fun eof () = |
| 49 | let |
| 50 | val pos = hd (!linePos) |
| 51 | in |
| 52 | if (!commentLevel > 0) then |
| 53 | (ErrorMsg.error (SOME (!commentPos,!commentPos)) "Unterminated comment") |
| 54 | else (); |
| 55 | Tokens.EOF (pos,pos) |
| 56 | end |
| 57 | end |
| 58 | |
| 59 | val str = ref "" |
| 60 | val strStart = ref 0 |
| 61 | |
| 62 | %% |
| 63 | %header (functor MltLexFn(structure Tokens : Mlt_TOKENS)); |
| 64 | %full |
| 65 | %s COMMENT STRING CHAR CODE; |
| 66 | |
| 67 | id = ([A-Za-z_][A-Za-z0-9_]*)|([:]+); |
| 68 | intconst = [0-9]+; |
| 69 | realconst = [0-9]+\.[0-9]*; |
| 70 | ws = [\ \t\012]; |
| 71 | bo = [^<]+; |
| 72 | |
| 73 | %% |
| 74 | |
| 75 | \n => (if isLinCom () then (linComEnd (); YYBEGIN INITIAL) else (); |
| 76 | lineNum := !lineNum + 1; |
| 77 | linePos := yypos :: ! linePos; |
| 78 | continue ()); |
| 79 | |
| 80 | <INITIAL> {ws}+ => (Tokens.HTML (" ", yypos, yypos + size yytext); lex ()); |
| 81 | |
| 82 | <INITIAL> "<%" => (YYBEGIN CODE; Tokens.SEMI(yypos, yypos + size yytext)); |
| 83 | <CODE> "%>" => (YYBEGIN INITIAL; Tokens.SEMI(yypos, yypos + size yytext)); |
| 84 | |
| 85 | <CODE> "(*" => (YYBEGIN COMMENT; enterComment yypos; continue()); |
| 86 | <CODE> "*)" => (ErrorMsg.error (SOME (yypos, yypos)) "Unbalanced comments"; |
| 87 | continue()); |
| 88 | |
| 89 | <COMMENT> "(*" => (if not (isLinCom ()) then enterComment yypos else (); continue()); |
| 90 | <COMMENT> "*)" => (if not (isLinCom ()) andalso exitComment () then YYBEGIN INITIAL else (); |
| 91 | continue()); |
| 92 | |
| 93 | <CODE> "//" => (YYBEGIN COMMENT; linComStart yypos; continue()); |
| 94 | |
| 95 | <CODE> {ws}+ => (lex ()); |
| 96 | |
| 97 | <CODE> "\"" => (YYBEGIN STRING; strStart := yypos; str := ""; continue()); |
| 98 | <STRING> "\\\"" => (str := !str ^ "\\\""; continue()); |
| 99 | <STRING> "\"" => (YYBEGIN CODE; Tokens.STRING (!str, !strStart, yypos + 1)); |
| 100 | <STRING> . => (str := !str ^ yytext; continue()); |
| 101 | |
| 102 | <CODE> "#\"" => (YYBEGIN CHAR; strStart := yypos; str := ""; continue()); |
| 103 | <CHAR> "\\\"" => (str := !str ^ "\\\""; continue()); |
| 104 | <CHAR> "\"" => (YYBEGIN CODE; if size (!str) = 1 then |
| 105 | Tokens.CHAR (!str, !strStart, yypos + 1) |
| 106 | else |
| 107 | (ErrorMsg.error (SOME (yypos, yypos)) "Invalid character constant"; |
| 108 | continue())); |
| 109 | <CHAR> . => (str := !str ^ yytext; continue()); |
| 110 | |
| 111 | <CODE> "{" => (Tokens.LBRACE (yypos, yypos + size yytext)); |
| 112 | <CODE> "}" => (Tokens.RBRACE (yypos, yypos + size yytext)); |
| 113 | <CODE> "(" => (Tokens.LPAREN (yypos, yypos + size yytext)); |
| 114 | <CODE> ")" => (Tokens.RPAREN (yypos, yypos + size yytext)); |
| 115 | <CODE> "[" => (Tokens.LBRACK (yypos, yypos + size yytext)); |
| 116 | <CODE> "]" => (Tokens.RBRACK (yypos, yypos + size yytext)); |
| 117 | |
| 118 | <CODE> "=" => (Tokens.EQ (yypos, yypos + size yytext)); |
| 119 | <CODE> "<>" => (Tokens.NEQ (yypos, yypos + size yytext)); |
| 120 | <CODE> "<" => (Tokens.LT (yypos, yypos + size yytext)); |
| 121 | <CODE> "<=" => (Tokens.LTE (yypos, yypos + size yytext)); |
| 122 | <CODE> ">" => (Tokens.GT (yypos, yypos + size yytext)); |
| 123 | <CODE> ">=" => (Tokens.GTE (yypos, yypos + size yytext)); |
| 124 | |
| 125 | <CODE> ":=" => (Tokens.ASN (yypos, yypos + size yytext)); |
| 126 | |
| 127 | <CODE> "/" => (Tokens.DIVIDE (yypos, yypos + size yytext)); |
| 128 | <CODE> "*" => (Tokens.TIMES (yypos, yypos + size yytext)); |
| 129 | <CODE> "+" => (Tokens.PLUS (yypos, yypos + size yytext)); |
| 130 | <CODE> "-" => (Tokens.MINUS (yypos, yypos + size yytext)); |
| 131 | <CODE> "%" => (Tokens.MOD (yypos, yypos + size yytext)); |
| 132 | <CODE> "^" => (Tokens.STRCAT (yypos, yypos + size yytext)); |
| 133 | |
| 134 | <CODE> "~" => (Tokens.NEG (yypos, yypos + size yytext)); |
| 135 | <CODE> "," => (Tokens.COMMA (yypos, yypos + size yytext)); |
| 136 | <CODE> ":" => (Tokens.COLON (yypos, yypos + size yytext)); |
| 137 | <CODE> "..." => (Tokens.DOTDOTDOT (yypos, yypos + 3)); |
| 138 | <CODE> ".." => (Tokens.DOTDOT (yypos, yypos + 2)); |
| 139 | <CODE> "." => (Tokens.DOT (yypos, yypos + 1)); |
| 140 | <CODE> "_" => (Tokens.UNDER (yypos, yypos + 1)); |
| 141 | <CODE> "#" => (Tokens.HASH (yypos, yypos + 1)); |
| 142 | <CODE> ";" => (Tokens.SEMI (yypos, yypos + 1)); |
| 143 | <CODE> "$" => (Tokens.DOLLAR (yypos, yypos + size yytext)); |
| 144 | <CODE> "@" => (Tokens.AT (yypos, yypos + size yytext)); |
| 145 | |
| 146 | <CODE> "if" => (Tokens.IF (yypos, yypos + 2)); |
| 147 | <CODE> "then" => (Tokens.THEN (yypos, yypos + 4)); |
| 148 | <CODE> "else" => (Tokens.ELSE (yypos, yypos + 4)); |
| 149 | <CODE> "foreach" => (Tokens.FOREACH (yypos, yypos + 7)); |
| 150 | <CODE> "in" => (Tokens.IN (yypos, yypos + 2)); |
| 151 | <CODE> "case" => (Tokens.CASE (yypos, yypos + 4)); |
| 152 | <CODE> "as" => (Tokens.AS (yypos, yypos + 2)); |
| 153 | <CODE> "fn" => (Tokens.FN (yypos, yypos + 2)); |
| 154 | <CODE> "with" => (Tokens.WITH (yypos, yypos + 4)); |
| 155 | <CODE> "open" => (Tokens.OPEN (yypos, yypos + 4)); |
| 156 | <CODE> "val" => (Tokens.VAL (yypos, yypos + 3)); |
| 157 | <CODE> "ref" => (Tokens.REF (yypos, yypos + 3)); |
| 158 | <CODE> "try" => (Tokens.TRY (yypos, yypos + 3)); |
| 159 | <CODE> "catch" => (Tokens.CATCH (yypos, yypos + 5)); |
| 160 | <CODE> "or" => (Tokens.ORELSE (yypos, yypos + 2)); |
| 161 | <CODE> "and" => (Tokens.ANDALSO (yypos, yypos + 3)); |
| 162 | <CODE> "switch" => (Tokens.SWITCH (yypos, yypos + 6)); |
| 163 | <CODE> "of" => (Tokens.OF (yypos, yypos + 2)); |
| 164 | <CODE> "=>" => (Tokens.ARROW (yypos, yypos + 2)); |
| 165 | <CODE> "|" => (Tokens.BAR (yypos, yypos + 1)); |
| 166 | <CODE> "do" => (Tokens.DO (yypos, yypos + 2)); |
| 167 | <CODE> "end" => (Tokens.END (yypos, yypos + 3)); |
| 168 | <CODE> "raise" => (Tokens.RAISE (yypos, yypos + 5)); |
| 169 | |
| 170 | <CODE> "::" => (Tokens.CONS (yypos, yypos + 2)); |
| 171 | <CODE> {id} => (Tokens.IDENT (yytext, yypos, yypos + size yytext)); |
| 172 | <CODE> {intconst} => (case Int.fromString yytext of |
| 173 | SOME x => Tokens.INT (x, yypos, yypos + size yytext) |
| 174 | | NONE => (ErrorMsg.error (SOME (yypos, yypos)) |
| 175 | ("Expected int, received: " ^ yytext); |
| 176 | continue ())); |
| 177 | <CODE> {realconst} => (case Real.fromString yytext of |
| 178 | SOME x => Tokens.REAL (x, yypos, yypos + size yytext) |
| 179 | | NONE => (ErrorMsg.error (SOME (yypos, yypos)) |
| 180 | ("Expected real, received: " ^ yytext); |
| 181 | continue ())); |
| 182 | |
| 183 | <CODE> "\"" {id} "\"" => (Tokens.STRING (String.substring(yytext, 1, String.size yytext - 2), yypos, yypos + size yytext)); |
| 184 | |
| 185 | <COMMENT> . => (continue()); |
| 186 | |
| 187 | <INITIAL> {bo} => (Tokens.HTML (yytext, yypos, yypos + size yytext)); |
| 188 | <INITIAL> . => (Tokens.HTML (yytext, yypos, yypos + 1)); |
| 189 | |
| 190 | <CODE> . => (ErrorMsg.error (SOME (yypos,yypos)) |
| 191 | ("illegal character: \"" ^ yytext ^ "\""); |
| 192 | continue ()); |