Import Upstream version 20180207
[hcoop/debian/mlton.git] / benchmark / tests / DATA / ml.lex
CommitLineData
7f918cf1
CE
1(* Copyright 1989 by AT&T Bell Laboratories *)
2open ErrorMsg
3type svalue = Tokens.svalue
4type pos = int
5type lexresult = (svalue,pos) Tokens.token
6type lexarg = {comLevel : int ref,
7 lineNum : int ref,
8 linePos : int list ref, (* offsets of lines in file *)
9 charlist : string list ref,
10 stringstart : int ref, (* start of current string or comment*)
11 err : pos*pos -> ErrorMsg.severity -> string->unit}
12type arg = lexarg
13type ('a,'b) token = ('a,'b) Tokens.token
14val eof = fn ({comLevel,err,linePos,stringstart,lineNum,charlist}:lexarg) =>
15 let val pos = Integer.max(!stringstart+2, hd(!linePos))
16 in if !comLevel>0 then err (!stringstart,pos) COMPLAIN
17 "unclosed comment"
18 else ();
19 Tokens.EOF(pos,pos)
20 end
21fun addString (charlist,s:string) = charlist := s :: (!charlist)
22fun makeString charlist = (implode(rev(!charlist)) before charlist := nil)
23fun makeHexInt sign s = let
24 fun digit d = if (d < Ascii.uc_a) then (d - Ascii.zero)
25 else (10 + (if (d < Ascii.lc_a) then (d - Ascii.uc_a) else (d - Ascii.lc_a)))
26 in
27 revfold (fn (c,a) => sign(a*16, digit(ord c))) (explode s) 0
28 end
29fun makeInt sign s =
30 revfold (fn (c,a) => sign(a*10, ord c - Ascii.zero)) (explode s) 0
31%%
32%s A S F;
33%header (functor MLLexFun(structure Tokens : ML_TOKENS));
34%arg ({comLevel,lineNum,err,linePos,charlist,stringstart});
35idchars=[A-Za-z'_0-9];
36id=[A-Za-z'_]{idchars}*;
37ws=("\012"|[\t\ ])*;
38sym=[!%&$+/:<=>?@~|#*`]|\\|\-|\^;
39num=[0-9]+;
40frac="."{num};
41exp="E"(~?){num};
42real=(~?)(({num}{frac}?{exp})|({num}{frac}{exp}?));
43hexnum=[0-9a-fA-F]+;
44%%
45<INITIAL>{ws} => (continue());
46<INITIAL>\n => (inc lineNum; linePos := yypos :: !linePos; continue());
47<INITIAL>"*" => (Tokens.ASTERISK(yypos,yypos+1));
48<INITIAL>"|" => (Tokens.BAR(yypos,yypos+1));
49<INITIAL>":" => (Tokens.COLON(yypos,yypos+1));
50<INITIAL>"=" => (Tokens.EQUAL(yypos,yypos+1));
51<INITIAL>"_" => (Tokens.WILD(yypos,yypos+1));
52<INITIAL>"#" => (Tokens.HASH(yypos,yypos+1));
53<INITIAL>"," => (Tokens.COMMA(yypos,yypos+1));
54<INITIAL>"{" => (Tokens.LBRACE(yypos,yypos+1));
55<INITIAL>"}" => (Tokens.RBRACE(yypos,yypos+1));
56<INITIAL>"[" => (Tokens.LBRACKET(yypos,yypos+1));
57<INITIAL>"]" => (Tokens.RBRACKET(yypos,yypos+1));
58<INITIAL>";" => (Tokens.SEMICOLON(yypos,yypos+1));
59<INITIAL>"(" => (Tokens.LPAREN(yypos,yypos+1));
60<INITIAL>")" => (Tokens.RPAREN(yypos,yypos+1));
61<INITIAL>"and" => (Tokens.AND(yypos,yypos+3));
62<INITIAL>"abstraction" => (Tokens.ABSTRACTION(yypos,yypos+11));
63<INITIAL>"abstype" => (Tokens.ABSTYPE(yypos,yypos+7));
64<INITIAL>"->" => (Tokens.ARROW(yypos,yypos+2));
65<INITIAL>"as" => (Tokens.AS(yypos,yypos+2));
66<INITIAL>"case" => (Tokens.CASE(yypos,yypos+4));
67<INITIAL>"datatype" => (Tokens.DATATYPE(yypos,yypos+8));
68<INITIAL>"." => (Tokens.DOT(yypos,yypos+1));
69<INITIAL>"..." => (Tokens.DOTDOTDOT(yypos,yypos+3));
70<INITIAL>"else" => (Tokens.ELSE(yypos,yypos+4));
71<INITIAL>"end" => (Tokens.END(yypos,yypos+3));
72<INITIAL>"eqtype" => (Tokens.EQTYPE(yypos,yypos+6));
73<INITIAL>"exception" => (Tokens.EXCEPTION(yypos,yypos+9));
74<INITIAL>"do" => (Tokens.DO(yypos,yypos+2));
75<INITIAL>"=>" => (Tokens.DARROW(yypos,yypos+2));
76<INITIAL>"fn" => (Tokens.FN(yypos,yypos+2));
77<INITIAL>"fun" => (Tokens.FUN(yypos,yypos+3));
78<INITIAL>"functor" => (Tokens.FUNCTOR(yypos,yypos+7));
79<INITIAL>"handle" => (Tokens.HANDLE(yypos,yypos+6));
80<INITIAL>"if" => (Tokens.IF(yypos,yypos+2));
81<INITIAL>"in" => (Tokens.IN(yypos,yypos+2));
82<INITIAL>"include" => (Tokens.INCLUDE(yypos,yypos+7));
83<INITIAL>"infix" => (Tokens.INFIX(yypos,yypos+5));
84<INITIAL>"infixr" => (Tokens.INFIXR(yypos,yypos+6));
85<INITIAL>"let" => (Tokens.LET(yypos,yypos+3));
86<INITIAL>"local" => (Tokens.LOCAL(yypos,yypos+5));
87<INITIAL>"nonfix" => (Tokens.NONFIX(yypos,yypos+6));
88<INITIAL>"of" => (Tokens.OF(yypos,yypos+2));
89<INITIAL>"op" => (Tokens.OP(yypos,yypos+2));
90<INITIAL>"open" => (Tokens.OPEN(yypos,yypos+4));
91<INITIAL>"overload" => (Tokens.OVERLOAD(yypos,yypos+8));
92<INITIAL>"raise" => (Tokens.RAISE(yypos,yypos+5));
93<INITIAL>"rec" => (Tokens.REC(yypos,yypos+3));
94<INITIAL>"sharing" => (Tokens.SHARING(yypos,yypos+7));
95<INITIAL>"sig" => (Tokens.SIG(yypos,yypos+3));
96<INITIAL>"signature" => (Tokens.SIGNATURE(yypos,yypos+9));
97<INITIAL>"struct" => (Tokens.STRUCT(yypos,yypos+6));
98<INITIAL>"structure" => (Tokens.STRUCTURE(yypos,yypos+9));
99<INITIAL>"then" => (Tokens.THEN(yypos,yypos+4));
100<INITIAL>"type" => (Tokens.TYPE(yypos,yypos+4));
101<INITIAL>"val" => (Tokens.VAL(yypos,yypos+3));
102<INITIAL>"while" => (Tokens.WHILE(yypos,yypos+5));
103<INITIAL>"with" => (Tokens.WITH(yypos,yypos+4));
104<INITIAL>"withtype" => (Tokens.WITHTYPE(yypos,yypos+8));
105<INITIAL>"orelse" => (Tokens.ORELSE(yypos,yypos+6));
106<INITIAL>"andalso" => (Tokens.ANDALSO(yypos,yypos+7));
107<INITIAL>"import" => (Tokens.IMPORT(yypos,yypos+6));
108<INITIAL>"'"{idchars}* => (Tokens.TYVAR(yytext, yypos, yypos+size yytext));
109<INITIAL>({sym}+|{id}) => (Tokens.ID(yytext, yypos, yypos+size yytext));
110<INITIAL>{real} => (Tokens.REAL(yytext,yypos,yypos+size yytext));
111<INITIAL>[1-9][0-9]* => (Tokens.INT(makeInt (op +) yytext
112 handle Overflow => (err (yypos,yypos+size yytext)
113 COMPLAIN "integer too large"; 1),
114 yypos,yypos+size yytext));
115<INITIAL>{num} => (Tokens.INT0(makeInt (op +) yytext
116 handle Overflow => (err (yypos,yypos+size yytext)
117 COMPLAIN "integer too large"; 0),
118 yypos,yypos+size yytext));
119<INITIAL>~{num} => (Tokens.INT0(makeInt (op -)
120 (substring(yytext,1,size(yytext)-1))
121 handle Overflow => (err (yypos,yypos+size yytext)
122 COMPLAIN "integer too large"; 0),
123 yypos,yypos+size yytext));
124<INITIAL>"0x"{hexnum} => (
125 Tokens.INT0(makeHexInt (op +) (substring(yytext, 2, size(yytext)-2))
126 handle Overflow => (err (yypos,yypos+size yytext)
127 COMPLAIN "integer too large"; 0),
128 yypos, yypos+size yytext));
129<INITIAL>"~0x"{hexnum} => (
130 Tokens.INT0(makeHexInt (op -) (substring(yytext, 3, size(yytext)-3))
131 handle Overflow => (err (yypos,yypos+size yytext)
132 COMPLAIN "integer too large"; 0),
133 yypos, yypos+size yytext));
134<INITIAL>\" => (charlist := [""]; stringstart := yypos;
135 YYBEGIN S; continue());
136<INITIAL>"(*" => (YYBEGIN A; stringstart := yypos; comLevel := 1; continue());
137<INITIAL>\h => (err (yypos,yypos) COMPLAIN "non-Ascii character"; continue());
138<INITIAL>. => (err (yypos,yypos) COMPLAIN "illegal token"; continue());
139<A>"(*" => (inc comLevel; continue());
140<A>\n => (inc lineNum; linePos := yypos :: !linePos; continue());
141<A>"*)" => (dec comLevel; if !comLevel=0 then YYBEGIN INITIAL else (); continue());
142<A>. => (continue());
143<S>\" => (YYBEGIN INITIAL; Tokens.STRING(makeString charlist,
144 !stringstart,yypos+1));
145<S>\n => (err (!stringstart,yypos) COMPLAIN "unclosed string";
146 inc lineNum; linePos := yypos :: !linePos;
147 YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos));
148<S>[^"\\\n]* => (addString(charlist,yytext); continue());
149<S>\\\n => (inc lineNum; linePos := yypos :: !linePos;
150 YYBEGIN F; continue());
151<S>\\[\ \t] => (YYBEGIN F; continue());
152<F>\n => (inc lineNum; linePos := yypos :: !linePos; continue());
153<F>{ws} => (continue());
154<F>\\ => (YYBEGIN S; stringstart := yypos; continue());
155<F>. => (err (!stringstart,yypos) COMPLAIN "unclosed string";
156 YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos+1));
157<S>\\t => (addString(charlist,"\t"); continue());
158<S>\\n => (addString(charlist,"\n"); continue());
159<S>\\\\ => (addString(charlist,"\\"); continue());
160<S>\\\" => (addString(charlist,chr(Ascii.dquote)); continue());
161<S>\\\^[@-_] => (addString(charlist,chr(ordof(yytext,2)-ord("@"))); continue());
162<S>\\[0-9]{3} =>
163 (let val x = ordof(yytext,1)*100
164 +ordof(yytext,2)*10
165 +ordof(yytext,3)
166 -(Ascii.zero*111)
167 in (if x>255
168 then err (yypos,yypos+4) COMPLAIN "illegal ascii escape"
169 else addString(charlist,chr x);
170 continue())
171 end);
172<S>\\ => (err (yypos,yypos+1) COMPLAIN "illegal string escape";
173 continue());