Import Upstream version 20180207
[hcoop/debian/mlton.git] / benchmark / tests / DATA / ml.lex
1 (* Copyright 1989 by AT&T Bell Laboratories *)
2 open ErrorMsg
3 type svalue = Tokens.svalue
4 type pos = int
5 type lexresult = (svalue,pos) Tokens.token
6 type lexarg = {comLevel : int ref,
7 lineNum : int ref,
8 linePos : int list ref, (* offsets of lines in file *)
9 charlist : string list ref,
10 stringstart : int ref, (* start of current string or comment*)
11 err : pos*pos -> ErrorMsg.severity -> string->unit}
12 type arg = lexarg
13 type ('a,'b) token = ('a,'b) Tokens.token
14 val eof = fn ({comLevel,err,linePos,stringstart,lineNum,charlist}:lexarg) =>
15 let val pos = Integer.max(!stringstart+2, hd(!linePos))
16 in if !comLevel>0 then err (!stringstart,pos) COMPLAIN
17 "unclosed comment"
18 else ();
19 Tokens.EOF(pos,pos)
20 end
21 fun addString (charlist,s:string) = charlist := s :: (!charlist)
22 fun makeString charlist = (implode(rev(!charlist)) before charlist := nil)
23 fun makeHexInt sign s = let
24 fun digit d = if (d < Ascii.uc_a) then (d - Ascii.zero)
25 else (10 + (if (d < Ascii.lc_a) then (d - Ascii.uc_a) else (d - Ascii.lc_a)))
26 in
27 revfold (fn (c,a) => sign(a*16, digit(ord c))) (explode s) 0
28 end
29 fun makeInt sign s =
30 revfold (fn (c,a) => sign(a*10, ord c - Ascii.zero)) (explode s) 0
31 %%
32 %s A S F;
33 %header (functor MLLexFun(structure Tokens : ML_TOKENS));
34 %arg ({comLevel,lineNum,err,linePos,charlist,stringstart});
35 idchars=[A-Za-z'_0-9];
36 id=[A-Za-z'_]{idchars}*;
37 ws=("\012"|[\t\ ])*;
38 sym=[!%&$+/:<=>?@~|#*`]|\\|\-|\^;
39 num=[0-9]+;
40 frac="."{num};
41 exp="E"(~?){num};
42 real=(~?)(({num}{frac}?{exp})|({num}{frac}{exp}?));
43 hexnum=[0-9a-fA-F]+;
44 %%
45 <INITIAL>{ws} => (continue());
46 <INITIAL>\n => (inc lineNum; linePos := yypos :: !linePos; continue());
47 <INITIAL>"*" => (Tokens.ASTERISK(yypos,yypos+1));
48 <INITIAL>"|" => (Tokens.BAR(yypos,yypos+1));
49 <INITIAL>":" => (Tokens.COLON(yypos,yypos+1));
50 <INITIAL>"=" => (Tokens.EQUAL(yypos,yypos+1));
51 <INITIAL>"_" => (Tokens.WILD(yypos,yypos+1));
52 <INITIAL>"#" => (Tokens.HASH(yypos,yypos+1));
53 <INITIAL>"," => (Tokens.COMMA(yypos,yypos+1));
54 <INITIAL>"{" => (Tokens.LBRACE(yypos,yypos+1));
55 <INITIAL>"}" => (Tokens.RBRACE(yypos,yypos+1));
56 <INITIAL>"[" => (Tokens.LBRACKET(yypos,yypos+1));
57 <INITIAL>"]" => (Tokens.RBRACKET(yypos,yypos+1));
58 <INITIAL>";" => (Tokens.SEMICOLON(yypos,yypos+1));
59 <INITIAL>"(" => (Tokens.LPAREN(yypos,yypos+1));
60 <INITIAL>")" => (Tokens.RPAREN(yypos,yypos+1));
61 <INITIAL>"and" => (Tokens.AND(yypos,yypos+3));
62 <INITIAL>"abstraction" => (Tokens.ABSTRACTION(yypos,yypos+11));
63 <INITIAL>"abstype" => (Tokens.ABSTYPE(yypos,yypos+7));
64 <INITIAL>"->" => (Tokens.ARROW(yypos,yypos+2));
65 <INITIAL>"as" => (Tokens.AS(yypos,yypos+2));
66 <INITIAL>"case" => (Tokens.CASE(yypos,yypos+4));
67 <INITIAL>"datatype" => (Tokens.DATATYPE(yypos,yypos+8));
68 <INITIAL>"." => (Tokens.DOT(yypos,yypos+1));
69 <INITIAL>"..." => (Tokens.DOTDOTDOT(yypos,yypos+3));
70 <INITIAL>"else" => (Tokens.ELSE(yypos,yypos+4));
71 <INITIAL>"end" => (Tokens.END(yypos,yypos+3));
72 <INITIAL>"eqtype" => (Tokens.EQTYPE(yypos,yypos+6));
73 <INITIAL>"exception" => (Tokens.EXCEPTION(yypos,yypos+9));
74 <INITIAL>"do" => (Tokens.DO(yypos,yypos+2));
75 <INITIAL>"=>" => (Tokens.DARROW(yypos,yypos+2));
76 <INITIAL>"fn" => (Tokens.FN(yypos,yypos+2));
77 <INITIAL>"fun" => (Tokens.FUN(yypos,yypos+3));
78 <INITIAL>"functor" => (Tokens.FUNCTOR(yypos,yypos+7));
79 <INITIAL>"handle" => (Tokens.HANDLE(yypos,yypos+6));
80 <INITIAL>"if" => (Tokens.IF(yypos,yypos+2));
81 <INITIAL>"in" => (Tokens.IN(yypos,yypos+2));
82 <INITIAL>"include" => (Tokens.INCLUDE(yypos,yypos+7));
83 <INITIAL>"infix" => (Tokens.INFIX(yypos,yypos+5));
84 <INITIAL>"infixr" => (Tokens.INFIXR(yypos,yypos+6));
85 <INITIAL>"let" => (Tokens.LET(yypos,yypos+3));
86 <INITIAL>"local" => (Tokens.LOCAL(yypos,yypos+5));
87 <INITIAL>"nonfix" => (Tokens.NONFIX(yypos,yypos+6));
88 <INITIAL>"of" => (Tokens.OF(yypos,yypos+2));
89 <INITIAL>"op" => (Tokens.OP(yypos,yypos+2));
90 <INITIAL>"open" => (Tokens.OPEN(yypos,yypos+4));
91 <INITIAL>"overload" => (Tokens.OVERLOAD(yypos,yypos+8));
92 <INITIAL>"raise" => (Tokens.RAISE(yypos,yypos+5));
93 <INITIAL>"rec" => (Tokens.REC(yypos,yypos+3));
94 <INITIAL>"sharing" => (Tokens.SHARING(yypos,yypos+7));
95 <INITIAL>"sig" => (Tokens.SIG(yypos,yypos+3));
96 <INITIAL>"signature" => (Tokens.SIGNATURE(yypos,yypos+9));
97 <INITIAL>"struct" => (Tokens.STRUCT(yypos,yypos+6));
98 <INITIAL>"structure" => (Tokens.STRUCTURE(yypos,yypos+9));
99 <INITIAL>"then" => (Tokens.THEN(yypos,yypos+4));
100 <INITIAL>"type" => (Tokens.TYPE(yypos,yypos+4));
101 <INITIAL>"val" => (Tokens.VAL(yypos,yypos+3));
102 <INITIAL>"while" => (Tokens.WHILE(yypos,yypos+5));
103 <INITIAL>"with" => (Tokens.WITH(yypos,yypos+4));
104 <INITIAL>"withtype" => (Tokens.WITHTYPE(yypos,yypos+8));
105 <INITIAL>"orelse" => (Tokens.ORELSE(yypos,yypos+6));
106 <INITIAL>"andalso" => (Tokens.ANDALSO(yypos,yypos+7));
107 <INITIAL>"import" => (Tokens.IMPORT(yypos,yypos+6));
108 <INITIAL>"'"{idchars}* => (Tokens.TYVAR(yytext, yypos, yypos+size yytext));
109 <INITIAL>({sym}+|{id}) => (Tokens.ID(yytext, yypos, yypos+size yytext));
110 <INITIAL>{real} => (Tokens.REAL(yytext,yypos,yypos+size yytext));
111 <INITIAL>[1-9][0-9]* => (Tokens.INT(makeInt (op +) yytext
112 handle Overflow => (err (yypos,yypos+size yytext)
113 COMPLAIN "integer too large"; 1),
114 yypos,yypos+size yytext));
115 <INITIAL>{num} => (Tokens.INT0(makeInt (op +) yytext
116 handle Overflow => (err (yypos,yypos+size yytext)
117 COMPLAIN "integer too large"; 0),
118 yypos,yypos+size yytext));
119 <INITIAL>~{num} => (Tokens.INT0(makeInt (op -)
120 (substring(yytext,1,size(yytext)-1))
121 handle Overflow => (err (yypos,yypos+size yytext)
122 COMPLAIN "integer too large"; 0),
123 yypos,yypos+size yytext));
124 <INITIAL>"0x"{hexnum} => (
125 Tokens.INT0(makeHexInt (op +) (substring(yytext, 2, size(yytext)-2))
126 handle Overflow => (err (yypos,yypos+size yytext)
127 COMPLAIN "integer too large"; 0),
128 yypos, yypos+size yytext));
129 <INITIAL>"~0x"{hexnum} => (
130 Tokens.INT0(makeHexInt (op -) (substring(yytext, 3, size(yytext)-3))
131 handle Overflow => (err (yypos,yypos+size yytext)
132 COMPLAIN "integer too large"; 0),
133 yypos, yypos+size yytext));
134 <INITIAL>\" => (charlist := [""]; stringstart := yypos;
135 YYBEGIN S; continue());
136 <INITIAL>"(*" => (YYBEGIN A; stringstart := yypos; comLevel := 1; continue());
137 <INITIAL>\h => (err (yypos,yypos) COMPLAIN "non-Ascii character"; continue());
138 <INITIAL>. => (err (yypos,yypos) COMPLAIN "illegal token"; continue());
139 <A>"(*" => (inc comLevel; continue());
140 <A>\n => (inc lineNum; linePos := yypos :: !linePos; continue());
141 <A>"*)" => (dec comLevel; if !comLevel=0 then YYBEGIN INITIAL else (); continue());
142 <A>. => (continue());
143 <S>\" => (YYBEGIN INITIAL; Tokens.STRING(makeString charlist,
144 !stringstart,yypos+1));
145 <S>\n => (err (!stringstart,yypos) COMPLAIN "unclosed string";
146 inc lineNum; linePos := yypos :: !linePos;
147 YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos));
148 <S>[^"\\\n]* => (addString(charlist,yytext); continue());
149 <S>\\\n => (inc lineNum; linePos := yypos :: !linePos;
150 YYBEGIN F; continue());
151 <S>\\[\ \t] => (YYBEGIN F; continue());
152 <F>\n => (inc lineNum; linePos := yypos :: !linePos; continue());
153 <F>{ws} => (continue());
154 <F>\\ => (YYBEGIN S; stringstart := yypos; continue());
155 <F>. => (err (!stringstart,yypos) COMPLAIN "unclosed string";
156 YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos+1));
157 <S>\\t => (addString(charlist,"\t"); continue());
158 <S>\\n => (addString(charlist,"\n"); continue());
159 <S>\\\\ => (addString(charlist,"\\"); continue());
160 <S>\\\" => (addString(charlist,chr(Ascii.dquote)); continue());
161 <S>\\\^[@-_] => (addString(charlist,chr(ordof(yytext,2)-ord("@"))); continue());
162 <S>\\[0-9]{3} =>
163 (let val x = ordof(yytext,1)*100
164 +ordof(yytext,2)*10
165 +ordof(yytext,3)
166 -(Ascii.zero*111)
167 in (if x>255
168 then err (yypos,yypos+4) COMPLAIN "illegal ascii escape"
169 else addString(charlist,chr x);
170 continue())
171 end);
172 <S>\\ => (err (yypos,yypos+1) COMPLAIN "illegal string escape";
173 continue());