Commit | Line | Data |
---|---|---|
7f918cf1 CE |
1 | (* Copyright 1989 by AT&T Bell Laboratories *) |
2 | open ErrorMsg | |
3 | type svalue = Tokens.svalue | |
4 | type pos = int | |
5 | type lexresult = (svalue,pos) Tokens.token | |
6 | type lexarg = {comLevel : int ref, | |
7 | lineNum : int ref, | |
8 | linePos : int list ref, (* offsets of lines in file *) | |
9 | charlist : string list ref, | |
10 | stringstart : int ref, (* start of current string or comment*) | |
11 | err : pos*pos -> ErrorMsg.severity -> string->unit} | |
12 | type arg = lexarg | |
13 | type ('a,'b) token = ('a,'b) Tokens.token | |
14 | val eof = fn ({comLevel,err,linePos,stringstart,lineNum,charlist}:lexarg) => | |
15 | let val pos = Integer.max(!stringstart+2, hd(!linePos)) | |
16 | in if !comLevel>0 then err (!stringstart,pos) COMPLAIN | |
17 | "unclosed comment" | |
18 | else (); | |
19 | Tokens.EOF(pos,pos) | |
20 | end | |
21 | fun addString (charlist,s:string) = charlist := s :: (!charlist) | |
22 | fun makeString charlist = (implode(rev(!charlist)) before charlist := nil) | |
23 | fun makeHexInt sign s = let | |
24 | fun digit d = if (d < Ascii.uc_a) then (d - Ascii.zero) | |
25 | else (10 + (if (d < Ascii.lc_a) then (d - Ascii.uc_a) else (d - Ascii.lc_a))) | |
26 | in | |
27 | revfold (fn (c,a) => sign(a*16, digit(ord c))) (explode s) 0 | |
28 | end | |
29 | fun makeInt sign s = | |
30 | revfold (fn (c,a) => sign(a*10, ord c - Ascii.zero)) (explode s) 0 | |
31 | %% | |
32 | %s A S F; | |
33 | %header (functor MLLexFun(structure Tokens : ML_TOKENS)); | |
34 | %arg ({comLevel,lineNum,err,linePos,charlist,stringstart}); | |
35 | idchars=[A-Za-z'_0-9]; | |
36 | id=[A-Za-z'_]{idchars}*; | |
37 | ws=("\012"|[\t\ ])*; | |
38 | sym=[!%&$+/:<=>?@~|#*`]|\\|\-|\^; | |
39 | num=[0-9]+; | |
40 | frac="."{num}; | |
41 | exp="E"(~?){num}; | |
42 | real=(~?)(({num}{frac}?{exp})|({num}{frac}{exp}?)); | |
43 | hexnum=[0-9a-fA-F]+; | |
44 | %% | |
45 | <INITIAL>{ws} => (continue()); | |
46 | <INITIAL>\n => (inc lineNum; linePos := yypos :: !linePos; continue()); | |
47 | <INITIAL>"*" => (Tokens.ASTERISK(yypos,yypos+1)); | |
48 | <INITIAL>"|" => (Tokens.BAR(yypos,yypos+1)); | |
49 | <INITIAL>":" => (Tokens.COLON(yypos,yypos+1)); | |
50 | <INITIAL>"=" => (Tokens.EQUAL(yypos,yypos+1)); | |
51 | <INITIAL>"_" => (Tokens.WILD(yypos,yypos+1)); | |
52 | <INITIAL>"#" => (Tokens.HASH(yypos,yypos+1)); | |
53 | <INITIAL>"," => (Tokens.COMMA(yypos,yypos+1)); | |
54 | <INITIAL>"{" => (Tokens.LBRACE(yypos,yypos+1)); | |
55 | <INITIAL>"}" => (Tokens.RBRACE(yypos,yypos+1)); | |
56 | <INITIAL>"[" => (Tokens.LBRACKET(yypos,yypos+1)); | |
57 | <INITIAL>"]" => (Tokens.RBRACKET(yypos,yypos+1)); | |
58 | <INITIAL>";" => (Tokens.SEMICOLON(yypos,yypos+1)); | |
59 | <INITIAL>"(" => (Tokens.LPAREN(yypos,yypos+1)); | |
60 | <INITIAL>")" => (Tokens.RPAREN(yypos,yypos+1)); | |
61 | <INITIAL>"and" => (Tokens.AND(yypos,yypos+3)); | |
62 | <INITIAL>"abstraction" => (Tokens.ABSTRACTION(yypos,yypos+11)); | |
63 | <INITIAL>"abstype" => (Tokens.ABSTYPE(yypos,yypos+7)); | |
64 | <INITIAL>"->" => (Tokens.ARROW(yypos,yypos+2)); | |
65 | <INITIAL>"as" => (Tokens.AS(yypos,yypos+2)); | |
66 | <INITIAL>"case" => (Tokens.CASE(yypos,yypos+4)); | |
67 | <INITIAL>"datatype" => (Tokens.DATATYPE(yypos,yypos+8)); | |
68 | <INITIAL>"." => (Tokens.DOT(yypos,yypos+1)); | |
69 | <INITIAL>"..." => (Tokens.DOTDOTDOT(yypos,yypos+3)); | |
70 | <INITIAL>"else" => (Tokens.ELSE(yypos,yypos+4)); | |
71 | <INITIAL>"end" => (Tokens.END(yypos,yypos+3)); | |
72 | <INITIAL>"eqtype" => (Tokens.EQTYPE(yypos,yypos+6)); | |
73 | <INITIAL>"exception" => (Tokens.EXCEPTION(yypos,yypos+9)); | |
74 | <INITIAL>"do" => (Tokens.DO(yypos,yypos+2)); | |
75 | <INITIAL>"=>" => (Tokens.DARROW(yypos,yypos+2)); | |
76 | <INITIAL>"fn" => (Tokens.FN(yypos,yypos+2)); | |
77 | <INITIAL>"fun" => (Tokens.FUN(yypos,yypos+3)); | |
78 | <INITIAL>"functor" => (Tokens.FUNCTOR(yypos,yypos+7)); | |
79 | <INITIAL>"handle" => (Tokens.HANDLE(yypos,yypos+6)); | |
80 | <INITIAL>"if" => (Tokens.IF(yypos,yypos+2)); | |
81 | <INITIAL>"in" => (Tokens.IN(yypos,yypos+2)); | |
82 | <INITIAL>"include" => (Tokens.INCLUDE(yypos,yypos+7)); | |
83 | <INITIAL>"infix" => (Tokens.INFIX(yypos,yypos+5)); | |
84 | <INITIAL>"infixr" => (Tokens.INFIXR(yypos,yypos+6)); | |
85 | <INITIAL>"let" => (Tokens.LET(yypos,yypos+3)); | |
86 | <INITIAL>"local" => (Tokens.LOCAL(yypos,yypos+5)); | |
87 | <INITIAL>"nonfix" => (Tokens.NONFIX(yypos,yypos+6)); | |
88 | <INITIAL>"of" => (Tokens.OF(yypos,yypos+2)); | |
89 | <INITIAL>"op" => (Tokens.OP(yypos,yypos+2)); | |
90 | <INITIAL>"open" => (Tokens.OPEN(yypos,yypos+4)); | |
91 | <INITIAL>"overload" => (Tokens.OVERLOAD(yypos,yypos+8)); | |
92 | <INITIAL>"raise" => (Tokens.RAISE(yypos,yypos+5)); | |
93 | <INITIAL>"rec" => (Tokens.REC(yypos,yypos+3)); | |
94 | <INITIAL>"sharing" => (Tokens.SHARING(yypos,yypos+7)); | |
95 | <INITIAL>"sig" => (Tokens.SIG(yypos,yypos+3)); | |
96 | <INITIAL>"signature" => (Tokens.SIGNATURE(yypos,yypos+9)); | |
97 | <INITIAL>"struct" => (Tokens.STRUCT(yypos,yypos+6)); | |
98 | <INITIAL>"structure" => (Tokens.STRUCTURE(yypos,yypos+9)); | |
99 | <INITIAL>"then" => (Tokens.THEN(yypos,yypos+4)); | |
100 | <INITIAL>"type" => (Tokens.TYPE(yypos,yypos+4)); | |
101 | <INITIAL>"val" => (Tokens.VAL(yypos,yypos+3)); | |
102 | <INITIAL>"while" => (Tokens.WHILE(yypos,yypos+5)); | |
103 | <INITIAL>"with" => (Tokens.WITH(yypos,yypos+4)); | |
104 | <INITIAL>"withtype" => (Tokens.WITHTYPE(yypos,yypos+8)); | |
105 | <INITIAL>"orelse" => (Tokens.ORELSE(yypos,yypos+6)); | |
106 | <INITIAL>"andalso" => (Tokens.ANDALSO(yypos,yypos+7)); | |
107 | <INITIAL>"import" => (Tokens.IMPORT(yypos,yypos+6)); | |
108 | <INITIAL>"'"{idchars}* => (Tokens.TYVAR(yytext, yypos, yypos+size yytext)); | |
109 | <INITIAL>({sym}+|{id}) => (Tokens.ID(yytext, yypos, yypos+size yytext)); | |
110 | <INITIAL>{real} => (Tokens.REAL(yytext,yypos,yypos+size yytext)); | |
111 | <INITIAL>[1-9][0-9]* => (Tokens.INT(makeInt (op +) yytext | |
112 | handle Overflow => (err (yypos,yypos+size yytext) | |
113 | COMPLAIN "integer too large"; 1), | |
114 | yypos,yypos+size yytext)); | |
115 | <INITIAL>{num} => (Tokens.INT0(makeInt (op +) yytext | |
116 | handle Overflow => (err (yypos,yypos+size yytext) | |
117 | COMPLAIN "integer too large"; 0), | |
118 | yypos,yypos+size yytext)); | |
119 | <INITIAL>~{num} => (Tokens.INT0(makeInt (op -) | |
120 | (substring(yytext,1,size(yytext)-1)) | |
121 | handle Overflow => (err (yypos,yypos+size yytext) | |
122 | COMPLAIN "integer too large"; 0), | |
123 | yypos,yypos+size yytext)); | |
124 | <INITIAL>"0x"{hexnum} => ( | |
125 | Tokens.INT0(makeHexInt (op +) (substring(yytext, 2, size(yytext)-2)) | |
126 | handle Overflow => (err (yypos,yypos+size yytext) | |
127 | COMPLAIN "integer too large"; 0), | |
128 | yypos, yypos+size yytext)); | |
129 | <INITIAL>"~0x"{hexnum} => ( | |
130 | Tokens.INT0(makeHexInt (op -) (substring(yytext, 3, size(yytext)-3)) | |
131 | handle Overflow => (err (yypos,yypos+size yytext) | |
132 | COMPLAIN "integer too large"; 0), | |
133 | yypos, yypos+size yytext)); | |
134 | <INITIAL>\" => (charlist := [""]; stringstart := yypos; | |
135 | YYBEGIN S; continue()); | |
136 | <INITIAL>"(*" => (YYBEGIN A; stringstart := yypos; comLevel := 1; continue()); | |
137 | <INITIAL>\h => (err (yypos,yypos) COMPLAIN "non-Ascii character"; continue()); | |
138 | <INITIAL>. => (err (yypos,yypos) COMPLAIN "illegal token"; continue()); | |
139 | <A>"(*" => (inc comLevel; continue()); | |
140 | <A>\n => (inc lineNum; linePos := yypos :: !linePos; continue()); | |
141 | <A>"*)" => (dec comLevel; if !comLevel=0 then YYBEGIN INITIAL else (); continue()); | |
142 | <A>. => (continue()); | |
143 | <S>\" => (YYBEGIN INITIAL; Tokens.STRING(makeString charlist, | |
144 | !stringstart,yypos+1)); | |
145 | <S>\n => (err (!stringstart,yypos) COMPLAIN "unclosed string"; | |
146 | inc lineNum; linePos := yypos :: !linePos; | |
147 | YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos)); | |
148 | <S>[^"\\\n]* => (addString(charlist,yytext); continue()); | |
149 | <S>\\\n => (inc lineNum; linePos := yypos :: !linePos; | |
150 | YYBEGIN F; continue()); | |
151 | <S>\\[\ \t] => (YYBEGIN F; continue()); | |
152 | <F>\n => (inc lineNum; linePos := yypos :: !linePos; continue()); | |
153 | <F>{ws} => (continue()); | |
154 | <F>\\ => (YYBEGIN S; stringstart := yypos; continue()); | |
155 | <F>. => (err (!stringstart,yypos) COMPLAIN "unclosed string"; | |
156 | YYBEGIN INITIAL; Tokens.STRING(makeString charlist,!stringstart,yypos+1)); | |
157 | <S>\\t => (addString(charlist,"\t"); continue()); | |
158 | <S>\\n => (addString(charlist,"\n"); continue()); | |
159 | <S>\\\\ => (addString(charlist,"\\"); continue()); | |
160 | <S>\\\" => (addString(charlist,chr(Ascii.dquote)); continue()); | |
161 | <S>\\\^[@-_] => (addString(charlist,chr(ordof(yytext,2)-ord("@"))); continue()); | |
162 | <S>\\[0-9]{3} => | |
163 | (let val x = ordof(yytext,1)*100 | |
164 | +ordof(yytext,2)*10 | |
165 | +ordof(yytext,3) | |
166 | -(Ascii.zero*111) | |
167 | in (if x>255 | |
168 | then err (yypos,yypos+4) COMPLAIN "illegal ascii escape" | |
169 | else addString(charlist,chr x); | |
170 | continue()) | |
171 | end); | |
172 | <S>\\ => (err (yypos,yypos+1) COMPLAIN "illegal string escape"; | |
173 | continue()); |