Commit | Line | Data |
---|---|---|
7f918cf1 CE |
1 | (* Copyright (C) 2009,2016,2017 Matthew Fluet. |
2 | * Copyright (C) 2004-2006 Henry Cejtin, Matthew Fluet, Suresh | |
3 | * Jagannathan, and Stephen Weeks. | |
4 | * | |
5 | * MLton is released under a BSD-style license. | |
6 | * See the file MLton-LICENSE for details. | |
7 | *) | |
8 | ||
9 | type svalue = Tokens.svalue | |
10 | type pos = SourcePos.t | |
11 | type lexresult = (svalue, pos) Tokens.token | |
12 | type lexarg = {source: Source.t} | |
13 | type arg = lexarg | |
14 | type ('a,'b) token = ('a,'b) Tokens.token | |
15 | ||
16 | fun lastPos (yypos, yytext) = yypos + size yytext - 1 | |
17 | ||
18 | fun tok (t, x, s, l) = | |
19 | let | |
20 | val left = Source.getPos (s, l) | |
21 | val right = Source.getPos (s, lastPos (l, x)) | |
22 | in | |
23 | t (left, right) | |
24 | end | |
25 | ||
26 | fun tok' (t, x, s, l) = tok (fn (l, r) => t (x, l, r), x, s, l) | |
27 | ||
28 | fun error' (left, right, msg) = | |
29 | Control.errorStr (Region.make {left = left, right = right}, msg) | |
30 | fun error (source, left, right, msg) = | |
31 | error' (Source.getPos (source, left), Source.getPos (source, right), msg) | |
32 | ||
33 | ||
34 | (* Comments *) | |
35 | local | |
36 | val commentErrors: string list ref = ref [] | |
37 | val commentLeft = ref SourcePos.bogus | |
38 | val commentStack: (int -> unit) list ref = ref [] | |
39 | in | |
40 | fun addCommentError msg = | |
41 | List.push (commentErrors, msg) | |
42 | val inComment = fn () => not (List.isEmpty (!commentStack)) | |
43 | fun startComment (source, yypos, th) = | |
44 | if inComment () | |
45 | then List.push (commentStack, fn _ => th ()) | |
46 | else (commentErrors := [] | |
47 | ; commentLeft := Source.getPos (source, yypos) | |
48 | ; List.push (commentStack, fn yypos => | |
49 | (List.foreach (!commentErrors, fn msg => | |
50 | error' (!commentLeft, | |
51 | Source.getPos (source, yypos), | |
52 | msg)) | |
53 | ; th ()))) | |
54 | fun finishComment yypos = | |
55 | (List.pop commentStack) yypos | |
56 | end | |
57 | ||
58 | ||
59 | (* Line Directives *) | |
60 | local | |
61 | val lineDirCol: int ref = ref ~1 | |
62 | val lineDirFile: File.t option ref = ref NONE | |
63 | val lineDirLine: int ref = ref ~1 | |
64 | in | |
65 | fun startLineDir (source, yypos, th) = | |
66 | let | |
67 | val _ = lineDirCol := ~1 | |
68 | val _ = lineDirFile := NONE | |
69 | val _ = lineDirLine := ~1 | |
70 | in | |
71 | startComment (source, yypos, th) | |
72 | end | |
73 | fun addLineDirLineCol (line, col) = | |
74 | let | |
75 | val _ = lineDirLine := line | |
76 | val _ = lineDirCol := col | |
77 | in | |
78 | () | |
79 | end | |
80 | fun addLineDirFile file = | |
81 | let | |
82 | val _ = lineDirFile := SOME file | |
83 | in | |
84 | () | |
85 | end | |
86 | fun finishLineDir (source, yypos) = | |
87 | let | |
88 | val col = !lineDirCol | |
89 | val file = !lineDirFile | |
90 | val line = !lineDirLine | |
91 | val _ = lineDirCol := ~1 | |
92 | val _ = lineDirFile := NONE | |
93 | val _ = lineDirLine := ~1 | |
94 | in | |
95 | finishComment yypos | |
96 | ; Source.lineDirective (source, file, | |
97 | {lineNum = line, | |
98 | lineStart = yypos + 1 - col}) | |
99 | end | |
100 | end | |
101 | ||
102 | ||
103 | (* Text Constants *) | |
104 | local | |
105 | val chars: char list ref = ref [] | |
106 | val inText = ref false | |
107 | val textLeft = ref SourcePos.bogus | |
108 | val textFinishFn: (string * SourcePos.t * SourcePos.t -> lexresult) ref = ref (fn _ => raise Fail "textFinish") | |
109 | in | |
110 | fun startText (tl, tf) = | |
111 | let | |
112 | val _ = chars := [] | |
113 | val _ = inText := true | |
114 | val _ = textLeft := tl | |
115 | val _ = textFinishFn := tf | |
116 | in | |
117 | () | |
118 | end | |
119 | fun finishText textRight = | |
120 | let | |
121 | val cs = String.fromListRev (!chars) | |
122 | val tl = !textLeft | |
123 | val tr = textRight | |
124 | val tf = !textFinishFn | |
125 | val _ = chars := [] | |
126 | val _ = inText := false | |
127 | val _ = textLeft := SourcePos.bogus | |
128 | val _ = textFinishFn := (fn _ => raise Fail "textFinish") | |
129 | in | |
130 | tf (cs, tl, tr) | |
131 | end | |
132 | fun addTextString (s: string) = | |
133 | chars := String.fold (s, !chars, fn (c, ac) => c :: ac) | |
134 | val inText = fn () => !inText | |
135 | end | |
136 | fun addTextChar (c: char) = addTextString (String.fromChar c) | |
137 | fun addTextNumEsc (source, yypos, yytext, drop, radix): unit = | |
138 | let | |
139 | val left = yypos | |
140 | val right = lastPos (left, yytext) | |
141 | fun err () = | |
142 | error (source, left, right, "Illegal numeric escape in text constant") | |
143 | in | |
144 | case StringCvt.scanString (fn r => IntInf.scan (radix, r)) (String.dropPrefix (yytext, drop)) of | |
145 | NONE => err () | |
146 | | SOME i => if i > 255 | |
147 | then err () | |
148 | else addTextChar (Char.chr (IntInf.toInt i)) | |
149 | end | |
150 | fun addTextUTF8 (source, yypos, yytext): unit = | |
151 | addTextString yytext | |
152 | ||
153 | ||
154 | (* EOF *) | |
155 | val eof: lexarg -> lexresult = | |
156 | fn {source, ...} => | |
157 | let | |
158 | val _ = Source.newline (source, ~1) | |
159 | val pos = Source.getPos (source, ~1) | |
160 | val _ = | |
161 | if inComment () | |
162 | then error' (pos, SourcePos.bogus, "Unclosed comment at end of file") | |
163 | else () | |
164 | val _ = | |
165 | if inText () | |
166 | then error' (pos, SourcePos.bogus, "Unclosed text constant at end of file") | |
167 | else () | |
168 | in | |
169 | Tokens.EOF (pos, SourcePos.bogus) | |
170 | end | |
171 | ||
172 | ||
173 | %% | |
174 | %full | |
175 | ||
176 | %s TEXT TEXT_FMT BLOCK_COMMENT LINE_COMMENT LINE_DIR1 LINE_DIR2 LINE_DIR3 LINE_DIR4; | |
177 | ||
178 | %header (functor MLBLexFun (structure Tokens : MLB_TOKENS)); | |
179 | %arg ({source}); | |
180 | ||
181 | ws=\t|"\011"|"\012"|" "; | |
182 | cr="\013"; | |
183 | nl="\010"; | |
184 | eol=({cr}{nl}|{nl}|{cr}); | |
185 | ||
186 | alphanum=[A-Za-z0-9'_]; | |
187 | alphanumId=[A-Za-z]{alphanum}*; | |
188 | id={alphanumId}; | |
189 | ||
190 | pathvar="$("([A-Z_][A-Z0-9_]*)")"; | |
191 | filename=({pathvar}|[A-Za-z0-9_.])({pathvar}|[-A-Za-z0-9_.])*; | |
192 | arc=({pathvar}|{filename}|"."|".."); | |
193 | relpath=({arc}"/")*; | |
194 | abspath="/"{relpath}; | |
195 | path={relpath}|{abspath}; | |
196 | file={path}{filename}; | |
197 | ||
198 | decDigit=[0-9]; | |
199 | hexDigit=[0-9a-fA-F]; | |
200 | ||
201 | %% | |
202 | <INITIAL>{ws}+ => (continue ()); | |
203 | <INITIAL>{eol} => (Source.newline (source, lastPos (yypos, yytext)); continue ()); | |
204 | ||
205 | <INITIAL>"_prim" => (tok (Tokens.PRIM, yytext, source, yypos)); | |
206 | ||
207 | <INITIAL>"," => (tok (Tokens.COMMA, yytext, source, yypos)); | |
208 | <INITIAL>";" => (tok (Tokens.SEMICOLON, yytext, source, yypos)); | |
209 | <INITIAL>"=" => (tok (Tokens.EQUALOP, yytext, source, yypos)); | |
210 | ||
211 | <INITIAL>"and" => (tok (Tokens.AND, yytext, source, yypos)); | |
212 | <INITIAL>"ann" => (tok (Tokens.ANN, yytext, source, yypos)); | |
213 | <INITIAL>"bas" => (tok (Tokens.BAS, yytext, source, yypos)); | |
214 | <INITIAL>"basis" => (tok (Tokens.BASIS, yytext, source, yypos)); | |
215 | <INITIAL>"end" => (tok (Tokens.END, yytext, source, yypos)); | |
216 | <INITIAL>"functor" => (tok (Tokens.FUNCTOR, yytext, source, yypos)); | |
217 | <INITIAL>"in" => (tok (Tokens.IN, yytext, source, yypos)); | |
218 | <INITIAL>"let" => (tok (Tokens.LET, yytext, source, yypos)); | |
219 | <INITIAL>"local" => (tok (Tokens.LOCAL, yytext, source, yypos)); | |
220 | <INITIAL>"open" => (tok (Tokens.OPEN, yytext, source, yypos)); | |
221 | <INITIAL>"signature" => (tok (Tokens.SIGNATURE, yytext, source, yypos)); | |
222 | <INITIAL>"structure" => (tok (Tokens.STRUCTURE, yytext, source, yypos)); | |
223 | ||
224 | <INITIAL>{id} => (tok' (Tokens.ID, yytext, source, yypos)); | |
225 | <INITIAL>{file} => (tok' (Tokens.FILE, yytext, source, yypos)); | |
226 | ||
227 | <INITIAL>"\"" => | |
228 | (startText (Source.getPos (source, yypos), fn (s, l, r) => | |
229 | (YYBEGIN INITIAL; | |
230 | Tokens.STRING (s, l, r))) | |
231 | ; YYBEGIN TEXT | |
232 | ; continue ()); | |
233 | ||
234 | <TEXT>"\"" => (finishText (Source.getPos (source, lastPos (yypos, yytext)))); | |
235 | <TEXT>" "|!|[\035-\091]|[\093-\126] => | |
236 | (addTextString yytext; continue ()); | |
237 | <TEXT>[\192-\223][\128-\191] => | |
238 | (addTextUTF8 (source, yypos, yytext); continue()); | |
239 | <TEXT>[\224-\239][\128-\191][\128-\191] => | |
240 | (addTextUTF8 (source, yypos, yytext); continue()); | |
241 | <TEXT>[\240-\247][\128-\191][\128-\191][\128-\191] => | |
242 | (addTextUTF8 (source, yypos, yytext); continue()); | |
243 | <TEXT>\\a => (addTextChar #"\a"; continue ()); | |
244 | <TEXT>\\b => (addTextChar #"\b"; continue ()); | |
245 | <TEXT>\\t => (addTextChar #"\t"; continue ()); | |
246 | <TEXT>\\n => (addTextChar #"\n"; continue ()); | |
247 | <TEXT>\\v => (addTextChar #"\v"; continue ()); | |
248 | <TEXT>\\f => (addTextChar #"\f"; continue ()); | |
249 | <TEXT>\\r => (addTextChar #"\r"; continue ()); | |
250 | <TEXT>\\\^[@-_] => (addTextChar (Char.chr(Char.ord(String.sub(yytext, 2)) - Char.ord #"@")); | |
251 | continue ()); | |
252 | <TEXT>\\\^. => (error (source, yypos, yypos + 2, "Illegal control escape in text constant; must be one of @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"); | |
253 | continue ()); | |
254 | <TEXT>\\[0-9]{3} => (addTextNumEsc (source, yypos, yytext, 1, | |
255 | StringCvt.DEC) | |
256 | ; continue ()); | |
257 | <TEXT>\\u{hexDigit}{4} => | |
258 | (addTextNumEsc (source, yypos, yytext, 2, | |
259 | StringCvt.HEX) | |
260 | ; continue ()); | |
261 | <TEXT>\\U{hexDigit}{8} => | |
262 | (addTextNumEsc (source, yypos, yytext, 2, | |
263 | StringCvt.HEX) | |
264 | ; continue ()); | |
265 | <TEXT>"\\\"" => (addTextString "\""; continue ()); | |
266 | <TEXT>\\\\ => (addTextString "\\"; continue ()); | |
267 | <TEXT>\\{ws}+ => (YYBEGIN TEXT_FMT; continue ()); | |
268 | <TEXT>\\{eol} => (Source.newline (source, lastPos (yypos, yytext)); YYBEGIN TEXT_FMT; continue ()); | |
269 | <TEXT>\\ => (error (source, yypos, yypos + 1, "Illegal escape in text constant") | |
270 | ; continue ()); | |
271 | <TEXT>{eol} => (error (source, yypos, lastPos (yypos, yytext), "Unclosed text constant at end of line") | |
272 | ; Source.newline (source, lastPos (yypos, yytext)) | |
273 | ; continue ()); | |
274 | <TEXT>. => (error (source, yypos, yypos, "Illegal character in text constant") | |
275 | ; continue ()); | |
276 | ||
277 | <TEXT_FMT>{ws}+ => (continue ()); | |
278 | <TEXT_FMT>{eol} => (Source.newline (source, lastPos (yypos, yytext)); continue ()); | |
279 | <TEXT_FMT>\\ => (YYBEGIN TEXT; continue ()); | |
280 | <TEXT_FMT>. => (error (source, yypos, yypos, "Illegal formatting character in text continuation") | |
281 | ; continue ()); | |
282 | ||
283 | ||
284 | <INITIAL>"(*)" => | |
285 | (startComment (source, yypos, fn () => | |
286 | YYBEGIN INITIAL) | |
287 | ; YYBEGIN LINE_COMMENT | |
288 | ; continue ()); | |
289 | <INITIAL>"(*" => | |
290 | (startComment (source, yypos, fn () => | |
291 | YYBEGIN INITIAL) | |
292 | ; YYBEGIN BLOCK_COMMENT | |
293 | ; continue ()); | |
294 | ||
295 | <LINE_COMMENT>{eol} => | |
296 | (finishComment (lastPos (yypos, yytext)) | |
297 | ; Source.newline (source, lastPos (yypos, yytext)) | |
298 | ; continue ()); | |
299 | <LINE_COMMENT>. => | |
300 | (continue ()); | |
301 | ||
302 | <BLOCK_COMMENT>"(*)" => | |
303 | (startComment (source, yypos, fn () => | |
304 | YYBEGIN BLOCK_COMMENT) | |
305 | ; YYBEGIN LINE_COMMENT | |
306 | ; continue ()); | |
307 | <BLOCK_COMMENT>"(*" => | |
308 | (startComment (source, yypos, fn () => | |
309 | YYBEGIN BLOCK_COMMENT) | |
310 | ; YYBEGIN BLOCK_COMMENT | |
311 | ; continue ()); | |
312 | <BLOCK_COMMENT>"*)" => | |
313 | (finishComment (lastPos (yypos, yytext)) | |
314 | ; continue ()); | |
315 | <BLOCK_COMMENT>{eol} => | |
316 | (Source.newline (source, lastPos (yypos, yytext)) | |
317 | ; continue ()); | |
318 | <BLOCK_COMMENT>. => | |
319 | (continue ()); | |
320 | ||
321 | ||
322 | <INITIAL>"(*#line"{ws}+ => | |
323 | (startLineDir (source, yypos, fn () => | |
324 | YYBEGIN INITIAL) | |
325 | ; YYBEGIN LINE_DIR1 | |
326 | ; continue ()); | |
327 | ||
328 | <LINE_DIR1>{decDigit}+"."{decDigit}+ => | |
329 | (let | |
330 | fun err () = | |
331 | (addCommentError "Illegal line directive" | |
332 | ; YYBEGIN BLOCK_COMMENT) | |
333 | in | |
334 | case String.split (yytext, #".") of | |
335 | [line, col] => | |
336 | (YYBEGIN LINE_DIR2 | |
337 | ; addLineDirLineCol (valOf (Int.fromString line), valOf (Int.fromString col)) | |
338 | handle Overflow => err () | Option => err () | |
339 | ; continue ()) | |
340 | | _ => (err (); continue ()) | |
341 | end); | |
342 | <LINE_DIR2>{ws}+"\"" => | |
343 | (YYBEGIN LINE_DIR3 | |
344 | ; continue ()); | |
345 | <LINE_DIR3>[^"]*"\"" => | |
346 | (addLineDirFile (String.dropLast yytext) | |
347 | ; YYBEGIN LINE_DIR4 | |
348 | ; continue ()); | |
349 | <LINE_DIR2,LINE_DIR4>{ws}*"*)" => | |
350 | (finishLineDir (source, lastPos (yypos, yytext)) | |
351 | ; continue ()); | |
352 | <LINE_DIR1,LINE_DIR2,LINE_DIR3,LINE_DIR4>. => | |
353 | (addCommentError "Illegal line directive" | |
354 | ; YYBEGIN BLOCK_COMMENT | |
355 | ; continue ()); | |
356 | ||
357 | ||
358 | <INITIAL>. => | |
359 | (error (source, yypos, yypos, "Illegal character") | |
360 | ; continue ()); |