Commit | Line | Data |
---|---|---|
7f918cf1 CE |
1 | (* Heavily modified from SML/NJ sources. *) |
2 | ||
3 | (* ml.lex | |
4 | * | |
5 | * Copyright 1989 by AT&T Bell Laboratories | |
6 | * | |
7 | * SML/NJ is released under a BSD-style license. | |
8 | * See the file NJ-LICENSE for details. | |
9 | *) | |
10 | ||
11 | (* Copyright (C) 2009,2016-2017 Matthew Fluet. | |
12 | * Copyright (C) 1999-2006 Henry Cejtin, Matthew Fluet, Suresh | |
13 | * Jagannathan, and Stephen Weeks. | |
14 | * Copyright (C) 1997-2000 NEC Research Institute. | |
15 | * | |
16 | * MLton is released under a BSD-style license. | |
17 | * See the file MLton-LICENSE for details. | |
18 | *) | |
19 | ||
20 | type svalue = Tokens.svalue | |
21 | type pos = SourcePos.t | |
22 | type lexresult = (svalue, pos) Tokens.token | |
23 | type lexarg = {source: Source.t} | |
24 | type arg = lexarg | |
25 | type ('a,'b) token = ('a,'b) Tokens.token | |
26 | ||
27 | local | |
28 | open Control.Elaborate | |
29 | in | |
30 | val allowLineComments = fn () => current allowLineComments | |
31 | val allowExtendedNumConsts = fn () => current allowExtendedNumConsts | |
32 | val allowExtendedTextConsts = fn () => current allowExtendedTextConsts | |
33 | end | |
34 | ||
35 | fun lastPos (yypos, yytext) = yypos + size yytext - 1 | |
36 | ||
37 | fun tok (t, x, s, l) = | |
38 | let | |
39 | val left = Source.getPos (s, l) | |
40 | val right = Source.getPos (s, lastPos (l, x)) | |
41 | in | |
42 | t (left, right) | |
43 | end | |
44 | ||
45 | fun tok' (t, x, s, l) = tok (fn (l, r) => t (x, l, r), x, s, l) | |
46 | ||
47 | fun error' (left, right, msg) = | |
48 | Control.errorStr (Region.make {left = left, right = right}, msg) | |
49 | fun error (source, left, right, msg) = | |
50 | error' (Source.getPos (source, left), Source.getPos (source, right), msg) | |
51 | ||
52 | ||
53 | (* Comments *) | |
54 | local | |
55 | val commentErrors: string list ref = ref [] | |
56 | val commentLeft = ref SourcePos.bogus | |
57 | val commentStack: (int -> unit) list ref = ref [] | |
58 | in | |
59 | fun addCommentError msg = | |
60 | List.push (commentErrors, msg) | |
61 | val inComment = fn () => not (List.isEmpty (!commentStack)) | |
62 | fun startComment (source, yypos, th) = | |
63 | if inComment () | |
64 | then List.push (commentStack, fn _ => th ()) | |
65 | else (commentErrors := [] | |
66 | ; commentLeft := Source.getPos (source, yypos) | |
67 | ; List.push (commentStack, fn yypos => | |
68 | (List.foreach (!commentErrors, fn msg => | |
69 | error' (!commentLeft, | |
70 | Source.getPos (source, yypos), | |
71 | msg)) | |
72 | ; th ()))) | |
73 | fun finishComment yypos = | |
74 | (List.pop commentStack) yypos | |
75 | end | |
76 | ||
77 | ||
78 | (* Line Directives *) | |
79 | local | |
80 | val lineDirCol: int ref = ref ~1 | |
81 | val lineDirFile: File.t option ref = ref NONE | |
82 | val lineDirLine: int ref = ref ~1 | |
83 | in | |
84 | fun startLineDir (source, yypos, th) = | |
85 | let | |
86 | val _ = lineDirCol := ~1 | |
87 | val _ = lineDirFile := NONE | |
88 | val _ = lineDirLine := ~1 | |
89 | in | |
90 | startComment (source, yypos, th) | |
91 | end | |
92 | fun addLineDirLineCol (line, col) = | |
93 | let | |
94 | val _ = lineDirLine := line | |
95 | val _ = lineDirCol := col | |
96 | in | |
97 | () | |
98 | end | |
99 | fun addLineDirFile file = | |
100 | let | |
101 | val _ = lineDirFile := SOME file | |
102 | in | |
103 | () | |
104 | end | |
105 | fun finishLineDir (source, yypos) = | |
106 | let | |
107 | val col = !lineDirCol | |
108 | val file = !lineDirFile | |
109 | val line = !lineDirLine | |
110 | val _ = lineDirCol := ~1 | |
111 | val _ = lineDirFile := NONE | |
112 | val _ = lineDirLine := ~1 | |
113 | in | |
114 | finishComment yypos | |
115 | ; Source.lineDirective (source, file, | |
116 | {lineNum = line, | |
117 | lineStart = yypos + 1 - col}) | |
118 | end | |
119 | end | |
120 | ||
121 | ||
122 | (* Numeric Constants *) | |
123 | local | |
124 | fun doit (source, yypos, yytext, drop, {extended: string option}, mkTok) = | |
125 | let | |
126 | val left = yypos | |
127 | val right = lastPos (yypos, yytext) | |
128 | val extended = | |
129 | if String.contains (yytext, #"_") | |
130 | then SOME (Option.fold | |
131 | (extended, "'_' separators", fn (msg1, msg2) => | |
132 | msg1 ^ " and " ^ msg2)) | |
133 | else extended | |
134 | val _ = | |
135 | case extended of | |
136 | NONE => () | |
137 | | SOME msg => | |
138 | if allowExtendedNumConsts () | |
139 | then () | |
140 | else error (source, left, right, | |
141 | concat ["Extended numeric constants (using ", msg, | |
142 | ") disallowed, compile with -default-ann 'allowExtendedNumConsts true'"]) | |
143 | in | |
144 | mkTok (String.keepAll (String.dropPrefix (yytext, drop), fn c => not (c = #"_")), | |
145 | {extended = Option.isSome extended}, | |
146 | Source.getPos (source, left), Source.getPos (source, right)) | |
147 | end | |
148 | in | |
149 | fun real (source, yypos, yytext) = | |
150 | doit (source, yypos, yytext, 0, {extended = NONE}, fn (digits, {extended: bool}, l, r) => | |
151 | Tokens.REAL (digits, l, r)) | |
152 | fun int (source, yypos, yytext, drop, {extended: string option}, {negate: bool}, radix) = | |
153 | doit (source, yypos, yytext, drop, {extended = extended}, fn (digits, {extended: bool}, l, r) => | |
154 | Tokens.INT ({digits = digits, | |
155 | extended = extended, | |
156 | negate = negate, | |
157 | radix = radix}, | |
158 | l, r)) | |
159 | fun word (source, yypos, yytext, drop, {extended: string option}, radix) = | |
160 | doit (source, yypos, yytext, drop, {extended = extended}, fn (digits, {extended: bool}, l, r) => | |
161 | Tokens.WORD ({digits = digits, | |
162 | radix = radix}, | |
163 | l, r)) | |
164 | end | |
165 | ||
166 | ||
167 | (* Text Constants *) | |
168 | local | |
169 | val chars: IntInf.t list ref = ref [] | |
170 | val inText = ref false | |
171 | val textLeft = ref SourcePos.bogus | |
172 | val textFinishFn: (IntInf.t vector * SourcePos.t * SourcePos.t -> lexresult) ref = ref (fn _ => raise Fail "textFinish") | |
173 | in | |
174 | fun startText (tl, tf) = | |
175 | let | |
176 | val _ = chars := [] | |
177 | val _ = inText := true | |
178 | val _ = textLeft := tl | |
179 | val _ = textFinishFn := tf | |
180 | in | |
181 | () | |
182 | end | |
183 | fun finishText textRight = | |
184 | let | |
185 | val cs = Vector.fromListRev (!chars) | |
186 | val tl = !textLeft | |
187 | val tr = textRight | |
188 | val tf = !textFinishFn | |
189 | val _ = chars := [] | |
190 | val _ = inText := false | |
191 | val _ = textLeft := SourcePos.bogus | |
192 | val _ = textFinishFn := (fn _ => raise Fail "textFinish") | |
193 | in | |
194 | tf (cs, tl, tr) | |
195 | end | |
196 | val inText = fn () => !inText | |
197 | fun addTextString (s: string) = | |
198 | chars := String.fold (s, !chars, fn (c, ac) => Int.toIntInf (Char.ord c) :: ac) | |
199 | fun addTextCharCode (i: IntInf.int) = List.push (chars, i) | |
200 | end | |
201 | fun addTextChar (c: char) = addTextString (String.fromChar c) | |
202 | fun addTextNumEsc (source, yypos, yytext, drop, {extended: string option}, radix): unit = | |
203 | let | |
204 | val left = yypos | |
205 | val right = lastPos (yypos, yytext) | |
206 | val _ = | |
207 | case extended of | |
208 | NONE => () | |
209 | | SOME msg => | |
210 | if allowExtendedTextConsts () | |
211 | then () | |
212 | else error (source, left, right, | |
213 | concat ["Extended text constants (using ", msg, | |
214 | ") disallowed, compile with -default-ann 'allowExtendedTextConsts true'"]) | |
215 | in | |
216 | case StringCvt.scanString (fn r => IntInf.scan (radix, r)) (String.dropPrefix (yytext, drop)) of | |
217 | NONE => error (source, left, right, "Illegal numeric escape in text constant") | |
218 | | SOME i => addTextCharCode i | |
219 | end | |
220 | fun addTextUTF8 (source, yypos, yytext): unit = | |
221 | let | |
222 | val left = yypos | |
223 | val right = lastPos (yypos, yytext) | |
224 | in | |
225 | if not (allowExtendedTextConsts ()) | |
226 | then error (source, left, right, | |
227 | "Extended text constants (using UTF-8 byte sequences) disallowed, compile with -default-ann 'allowExtendedTextConsts true'") | |
228 | else addTextString yytext | |
229 | end | |
230 | ||
231 | ||
232 | (* EOF *) | |
233 | val eof: lexarg -> lexresult = | |
234 | fn {source, ...} => | |
235 | let | |
236 | val _ = Source.newline (source, ~1) | |
237 | val pos = Source.getPos (source, ~1) | |
238 | val _ = | |
239 | if inComment () | |
240 | then error' (pos, SourcePos.bogus, "Unclosed comment at end of file") | |
241 | else () | |
242 | val _ = | |
243 | if inText () | |
244 | then error' (pos, SourcePos.bogus, "Unclosed text constant at end of file") | |
245 | else () | |
246 | in | |
247 | Tokens.EOF (pos, SourcePos.bogus) | |
248 | end | |
249 | ||
250 | ||
251 | %% | |
252 | %full | |
253 | ||
254 | %s TEXT TEXT_FMT BLOCK_COMMENT LINE_COMMENT LINE_DIR1 LINE_DIR2 LINE_DIR3 LINE_DIR4; | |
255 | ||
256 | %header (functor MLLexFun (structure Tokens : ML_TOKENS)); | |
257 | %arg ({source}); | |
258 | ||
259 | ws=\t|"\011"|"\012"|" "; | |
260 | cr="\013"; | |
261 | nl="\010"; | |
262 | eol=({cr}{nl}|{nl}|{cr}); | |
263 | ||
264 | alphanum=[A-Za-z0-9'_]; | |
265 | alphanumId=[A-Za-z]{alphanum}*; | |
266 | sym="!"|"%"|"&"|"$"|"#"|"+"|"-"|"/"|":"|"<"|"="|">"|"?"|"@"|"\\"|"~"|"`"|"^"|"|"|"*"; | |
267 | symId={sym}+; | |
268 | ||
269 | tyvarId="'"{alphanum}*; | |
270 | longSymId=({alphanumId}".")+{symId}; | |
271 | longAlphanumId=({alphanumId}".")+{alphanumId}; | |
272 | ||
273 | decDigit=[0-9]; | |
274 | decnum={decDigit}("_"*{decDigit})*; | |
275 | hexDigit=[0-9a-fA-F]; | |
276 | hexnum={hexDigit}("_"*{hexDigit})*; | |
277 | binDigit=[0-1]; | |
278 | binnum={binDigit}("_"*{binDigit})*; | |
279 | frac="."{decnum}; | |
280 | exp=[eE](~?){decnum}; | |
281 | real=(~?)(({decnum}{frac}?{exp})|({decnum}{frac}{exp}?)); | |
282 | ||
283 | %% | |
284 | <INITIAL>{ws}+ => (continue ()); | |
285 | <INITIAL>{eol} => (Source.newline (source, lastPos (yypos, yytext)); continue ()); | |
286 | ||
287 | ||
288 | <INITIAL>"_address" => (tok (Tokens.ADDRESS, yytext, source, yypos)); | |
289 | <INITIAL>"_build_const" => (tok (Tokens.BUILD_CONST, yytext, source, yypos)); | |
290 | <INITIAL>"_command_line_const" => (tok (Tokens.COMMAND_LINE_CONST, yytext, source, yypos)); | |
291 | <INITIAL>"_const" => (tok (Tokens.CONST, yytext, source, yypos)); | |
292 | <INITIAL>"_export" => (tok (Tokens.EXPORT, yytext, source, yypos)); | |
293 | <INITIAL>"_import" => (tok (Tokens.IMPORT, yytext, source, yypos)); | |
294 | <INITIAL>"_overload" => (tok (Tokens.OVERLOAD, yytext, source, yypos)); | |
295 | <INITIAL>"_prim" => (tok (Tokens.PRIM, yytext, source, yypos)); | |
296 | <INITIAL>"_symbol" => (tok (Tokens.SYMBOL, yytext, source, yypos)); | |
297 | ||
298 | <INITIAL>"#" => (tok (Tokens.HASH, yytext, source, yypos)); | |
299 | <INITIAL>"#[" => (tok (Tokens.HASHLBRACKET, yytext, source, yypos)); | |
300 | <INITIAL>"(" => (tok (Tokens.LPAREN, yytext, source, yypos)); | |
301 | <INITIAL>")" => (tok (Tokens.RPAREN, yytext, source, yypos)); | |
302 | <INITIAL>"," => (tok (Tokens.COMMA, yytext, source, yypos)); | |
303 | <INITIAL>"->" => (tok (Tokens.ARROW, yytext, source, yypos)); | |
304 | <INITIAL>"..." => (tok (Tokens.DOTDOTDOT, yytext, source, yypos)); | |
305 | <INITIAL>":" => (tok (Tokens.COLON, yytext, source, yypos)); | |
306 | <INITIAL>":>" => (tok (Tokens.COLONGT, yytext, source, yypos)); | |
307 | <INITIAL>";" => (tok (Tokens.SEMICOLON, yytext, source, yypos)); | |
308 | <INITIAL>"=" => (tok (Tokens.EQUALOP, yytext, source, yypos)); | |
309 | <INITIAL>"=>" => (tok (Tokens.DARROW, yytext, source, yypos)); | |
310 | <INITIAL>"[" => (tok (Tokens.LBRACKET, yytext, source, yypos)); | |
311 | <INITIAL>"]" => (tok (Tokens.RBRACKET, yytext, source, yypos)); | |
312 | <INITIAL>"_" => (tok (Tokens.WILD, yytext, source, yypos)); | |
313 | <INITIAL>"{" => (tok (Tokens.LBRACE, yytext, source, yypos)); | |
314 | <INITIAL>"|" => (tok (Tokens.BAR, yytext, source, yypos)); | |
315 | <INITIAL>"}" => (tok (Tokens.RBRACE, yytext, source, yypos)); | |
316 | ||
317 | <INITIAL>"abstype" => (tok (Tokens.ABSTYPE, yytext, source, yypos)); | |
318 | <INITIAL>"and" => (tok (Tokens.AND, yytext, source, yypos)); | |
319 | <INITIAL>"andalso" => (tok (Tokens.ANDALSO, yytext, source, yypos)); | |
320 | <INITIAL>"as" => (tok (Tokens.AS, yytext, source, yypos)); | |
321 | <INITIAL>"case" => (tok (Tokens.CASE, yytext, source, yypos)); | |
322 | <INITIAL>"datatype" => (tok (Tokens.DATATYPE, yytext, source, yypos)); | |
323 | <INITIAL>"do" => (tok (Tokens.DO, yytext, source, yypos)); | |
324 | <INITIAL>"else" => (tok (Tokens.ELSE, yytext, source, yypos)); | |
325 | <INITIAL>"end" => (tok (Tokens.END, yytext, source, yypos)); | |
326 | <INITIAL>"eqtype" => (tok (Tokens.EQTYPE, yytext, source, yypos)); | |
327 | <INITIAL>"exception" => (tok (Tokens.EXCEPTION, yytext, source, yypos)); | |
328 | <INITIAL>"fn" => (tok (Tokens.FN, yytext, source, yypos)); | |
329 | <INITIAL>"fun" => (tok (Tokens.FUN, yytext, source, yypos)); | |
330 | <INITIAL>"functor" => (tok (Tokens.FUNCTOR, yytext, source, yypos)); | |
331 | <INITIAL>"handle" => (tok (Tokens.HANDLE, yytext, source, yypos)); | |
332 | <INITIAL>"if" => (tok (Tokens.IF, yytext, source, yypos)); | |
333 | <INITIAL>"in" => (tok (Tokens.IN, yytext, source, yypos)); | |
334 | <INITIAL>"include" => (tok (Tokens.INCLUDE, yytext, source, yypos)); | |
335 | <INITIAL>"infix" => (tok (Tokens.INFIX, yytext, source, yypos)); | |
336 | <INITIAL>"infixr" => (tok (Tokens.INFIXR, yytext, source, yypos)); | |
337 | <INITIAL>"let" => (tok (Tokens.LET, yytext, source, yypos)); | |
338 | <INITIAL>"local" => (tok (Tokens.LOCAL, yytext, source, yypos)); | |
339 | <INITIAL>"nonfix" => (tok (Tokens.NONFIX, yytext, source, yypos)); | |
340 | <INITIAL>"of" => (tok (Tokens.OF, yytext, source, yypos)); | |
341 | <INITIAL>"op" => (tok (Tokens.OP, yytext, source, yypos)); | |
342 | <INITIAL>"open" => (tok (Tokens.OPEN, yytext, source, yypos)); | |
343 | <INITIAL>"orelse" => (tok (Tokens.ORELSE, yytext, source, yypos)); | |
344 | <INITIAL>"raise" => (tok (Tokens.RAISE, yytext, source, yypos)); | |
345 | <INITIAL>"rec" => (tok (Tokens.REC, yytext, source, yypos)); | |
346 | <INITIAL>"sharing" => (tok (Tokens.SHARING, yytext, source, yypos)); | |
347 | <INITIAL>"sig" => (tok (Tokens.SIG, yytext, source, yypos)); | |
348 | <INITIAL>"signature" => (tok (Tokens.SIGNATURE, yytext, source, yypos)); | |
349 | <INITIAL>"struct" => (tok (Tokens.STRUCT, yytext, source, yypos)); | |
350 | <INITIAL>"structure" => (tok (Tokens.STRUCTURE, yytext, source, yypos)); | |
351 | <INITIAL>"then" => (tok (Tokens.THEN, yytext, source, yypos)); | |
352 | <INITIAL>"type" => (tok (Tokens.TYPE, yytext, source, yypos)); | |
353 | <INITIAL>"val" => (tok (Tokens.VAL, yytext, source, yypos)); | |
354 | <INITIAL>"where" => (tok (Tokens.WHERE, yytext, source, yypos)); | |
355 | <INITIAL>"while" => (tok (Tokens.WHILE, yytext, source, yypos)); | |
356 | <INITIAL>"with" => (tok (Tokens.WITH, yytext, source, yypos)); | |
357 | <INITIAL>"withtype" => (tok (Tokens.WITHTYPE, yytext, source, yypos)); | |
358 | ||
359 | ||
360 | <INITIAL>{alphanumId} => (tok' (Tokens.SHORTALPHANUMID, yytext, source, yypos)); | |
361 | <INITIAL>{symId} => | |
362 | (case yytext of | |
363 | "*" => tok (Tokens.ASTERISK, yytext, source, yypos) | |
364 | | _ => tok' (Tokens.SHORTSYMID, yytext, source, yypos)); | |
365 | <INITIAL>{tyvarId} => (tok' (Tokens.TYVAR, yytext, source, yypos)); | |
366 | <INITIAL>{longAlphanumId} => (tok' (Tokens.LONGALPHANUMID, yytext, source, yypos)); | |
367 | <INITIAL>{longSymId} => (tok' (Tokens.LONGSYMID, yytext, source, yypos)); | |
368 | ||
369 | ||
370 | <INITIAL>{real} => | |
371 | (real (source, yypos, yytext)); | |
372 | <INITIAL>{decnum} => | |
373 | (int (source, yypos, yytext, 0, {extended = NONE}, {negate = false}, StringCvt.DEC)); | |
374 | <INITIAL>"~"{decnum} => | |
375 | (int (source, yypos, yytext, 1, {extended = NONE}, {negate = true}, StringCvt.DEC)); | |
376 | <INITIAL>"0x"{hexnum} => | |
377 | (int (source, yypos, yytext, 2, {extended = NONE}, {negate = false}, StringCvt.HEX)); | |
378 | <INITIAL>"~0x"{hexnum} => | |
379 | (int (source, yypos, yytext, 3, {extended = NONE}, {negate = true}, StringCvt.HEX)); | |
380 | <INITIAL>"0b"{binnum} => | |
381 | (int (source, yypos, yytext, 2, {extended = SOME "binary notation"}, {negate = false}, StringCvt.BIN)); | |
382 | <INITIAL>"~0b"{binnum} => | |
383 | (int (source, yypos, yytext, 3, {extended = SOME "binary notation"}, {negate = true}, StringCvt.BIN)); | |
384 | <INITIAL>"0w"{decnum} => | |
385 | (word (source, yypos, yytext, 2, {extended = NONE}, StringCvt.DEC)); | |
386 | <INITIAL>"0wx"{hexnum} => | |
387 | (word (source, yypos, yytext, 3, {extended = NONE}, StringCvt.HEX)); | |
388 | <INITIAL>"0wb"{binnum} => | |
389 | (word (source, yypos, yytext, 3, {extended = SOME "binary notation"}, StringCvt.BIN)); | |
390 | ||
391 | <INITIAL>"\"" => | |
392 | (startText (Source.getPos (source, yypos), fn (cs, l, r) => | |
393 | (YYBEGIN INITIAL; | |
394 | Tokens.STRING (cs, l, r))) | |
395 | ; YYBEGIN TEXT | |
396 | ; continue ()); | |
397 | <INITIAL>"#\"" => | |
398 | (startText (Source.getPos (source, yypos), fn (cs, l, r) => | |
399 | let | |
400 | fun err () = | |
401 | error' (l, r, "character constant not of size 1") | |
402 | val c = | |
403 | case Int.compare (Vector.length cs, 1) of | |
404 | LESS => (err (); 0) | |
405 | | EQUAL => Vector.sub (cs, 0) | |
406 | | GREATER => (err (); Vector.sub (cs, 0)) | |
407 | in | |
408 | YYBEGIN INITIAL; | |
409 | Tokens.CHAR (c, l, r) | |
410 | end) | |
411 | ; YYBEGIN TEXT | |
412 | ; continue ()); | |
413 | ||
414 | <TEXT>"\"" => (finishText (Source.getPos (source, lastPos (yypos, yytext)))); | |
415 | <TEXT>" "|!|[\035-\091]|[\093-\126] => | |
416 | (addTextString yytext; continue ()); | |
417 | <TEXT>[\192-\223][\128-\191] => | |
418 | (addTextUTF8 (source, yypos, yytext); continue()); | |
419 | <TEXT>[\224-\239][\128-\191][\128-\191] => | |
420 | (addTextUTF8 (source, yypos, yytext); continue()); | |
421 | <TEXT>[\240-\247][\128-\191][\128-\191][\128-\191] => | |
422 | (addTextUTF8 (source, yypos, yytext); continue()); | |
423 | <TEXT>\\a => (addTextChar #"\a"; continue ()); | |
424 | <TEXT>\\b => (addTextChar #"\b"; continue ()); | |
425 | <TEXT>\\t => (addTextChar #"\t"; continue ()); | |
426 | <TEXT>\\n => (addTextChar #"\n"; continue ()); | |
427 | <TEXT>\\v => (addTextChar #"\v"; continue ()); | |
428 | <TEXT>\\f => (addTextChar #"\f"; continue ()); | |
429 | <TEXT>\\r => (addTextChar #"\r"; continue ()); | |
430 | <TEXT>\\\^[@-_] => (addTextChar (Char.chr(Char.ord(String.sub(yytext, 2)) - Char.ord #"@")); | |
431 | continue ()); | |
432 | <TEXT>\\\^. => (error (source, yypos, yypos + 2, "Illegal control escape in text constant; must be one of @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"); | |
433 | continue ()); | |
434 | <TEXT>\\[0-9]{3} => (addTextNumEsc (source, yypos, yytext, 1, | |
435 | {extended = NONE}, StringCvt.DEC) | |
436 | ; continue ()); | |
437 | <TEXT>\\u{hexDigit}{4} => | |
438 | (addTextNumEsc (source, yypos, yytext, 2, | |
439 | {extended = NONE}, StringCvt.HEX) | |
440 | ; continue ()); | |
441 | <TEXT>\\U{hexDigit}{8} => | |
442 | (addTextNumEsc (source, yypos, yytext, 2, | |
443 | {extended = SOME "\\Uxxxxxxxx numeric escapes"}, | |
444 | StringCvt.HEX) | |
445 | ; continue ()); | |
446 | <TEXT>"\\\"" => (addTextString "\""; continue ()); | |
447 | <TEXT>\\\\ => (addTextString "\\"; continue ()); | |
448 | <TEXT>\\{ws}+ => (YYBEGIN TEXT_FMT; continue ()); | |
449 | <TEXT>\\{eol} => (Source.newline (source, lastPos (yypos, yytext)); YYBEGIN TEXT_FMT; continue ()); | |
450 | <TEXT>\\ => (error (source, yypos, yypos + 1, "Illegal escape in text constant") | |
451 | ; continue ()); | |
452 | <TEXT>{eol} => (error (source, yypos, lastPos (yypos, yytext), "Unclosed text constant at end of line") | |
453 | ; Source.newline (source, lastPos (yypos, yytext)) | |
454 | ; continue ()); | |
455 | <TEXT>. => (error (source, yypos, yypos, "Illegal character in text constant") | |
456 | ; continue ()); | |
457 | ||
458 | <TEXT_FMT>{ws}+ => (continue ()); | |
459 | <TEXT_FMT>{eol} => (Source.newline (source, lastPos (yypos, yytext)); continue ()); | |
460 | <TEXT_FMT>\\ => (YYBEGIN TEXT; continue ()); | |
461 | <TEXT_FMT>. => (error (source, yypos, yypos, "Illegal formatting character in text continuation") | |
462 | ; continue ()); | |
463 | ||
464 | ||
465 | <INITIAL>"(*)" => | |
466 | (if allowLineComments () | |
467 | then () | |
468 | else error (source, yypos, lastPos (yypos, yytext), | |
469 | "Line comments disallowed, compile with -default-ann 'allowLineComments true'") | |
470 | ; startComment (source, yypos, fn () => | |
471 | YYBEGIN INITIAL) | |
472 | ; YYBEGIN LINE_COMMENT | |
473 | ; continue ()); | |
474 | <INITIAL>"(*" => | |
475 | (startComment (source, yypos, fn () => | |
476 | YYBEGIN INITIAL) | |
477 | ; YYBEGIN BLOCK_COMMENT | |
478 | ; continue ()); | |
479 | ||
480 | <LINE_COMMENT>{eol} => | |
481 | (finishComment (lastPos (yypos, yytext)) | |
482 | ; Source.newline (source, lastPos (yypos, yytext)) | |
483 | ; continue ()); | |
484 | <LINE_COMMENT>. => | |
485 | (continue ()); | |
486 | ||
487 | <BLOCK_COMMENT>"(*)" => | |
488 | (if allowLineComments () | |
489 | then () | |
490 | else error (source, yypos, lastPos (yypos, yytext), | |
491 | "Line comments disallowed, compile with -default-ann 'allowLineComments true'") | |
492 | ; startComment (source, yypos, fn () => | |
493 | YYBEGIN BLOCK_COMMENT) | |
494 | ; YYBEGIN LINE_COMMENT | |
495 | ; continue ()); | |
496 | <BLOCK_COMMENT>"(*" => | |
497 | (startComment (source, yypos, fn () => | |
498 | YYBEGIN BLOCK_COMMENT) | |
499 | ; YYBEGIN BLOCK_COMMENT | |
500 | ; continue ()); | |
501 | <BLOCK_COMMENT>"*)" => | |
502 | (finishComment (lastPos (yypos,yytext)) | |
503 | ; continue ()); | |
504 | <BLOCK_COMMENT>{eol} => | |
505 | (Source.newline (source, lastPos (yypos, yytext)) | |
506 | ; continue ()); | |
507 | <BLOCK_COMMENT>. => | |
508 | (continue ()); | |
509 | ||
510 | ||
511 | <INITIAL>"(*#line"{ws}+ => | |
512 | (startLineDir (source, yypos, fn () => | |
513 | YYBEGIN INITIAL) | |
514 | ; YYBEGIN LINE_DIR1 | |
515 | ; continue ()); | |
516 | ||
517 | <LINE_DIR1>{decDigit}+"."{decDigit}+ => | |
518 | (let | |
519 | fun err () = | |
520 | (addCommentError "Illegal line directive" | |
521 | ; YYBEGIN BLOCK_COMMENT) | |
522 | in | |
523 | case String.split (yytext, #".") of | |
524 | [line, col] => | |
525 | (YYBEGIN LINE_DIR2 | |
526 | ; addLineDirLineCol (valOf (Int.fromString line), valOf (Int.fromString col)) | |
527 | handle Overflow => err () | Option => err () | |
528 | ; continue ()) | |
529 | | _ => (err (); continue ()) | |
530 | end); | |
531 | <LINE_DIR2>{ws}+"\"" => | |
532 | (YYBEGIN LINE_DIR3 | |
533 | ; continue ()); | |
534 | <LINE_DIR3>[^"]*"\"" => | |
535 | (addLineDirFile (String.dropLast yytext) | |
536 | ; YYBEGIN LINE_DIR4 | |
537 | ; continue ()); | |
538 | <LINE_DIR2,LINE_DIR4>{ws}*"*)" => | |
539 | (finishLineDir (source, lastPos (yypos, yytext)) | |
540 | ; continue ()); | |
541 | <LINE_DIR1,LINE_DIR2,LINE_DIR3,LINE_DIR4>. => | |
542 | (addCommentError "Illegal line directive" | |
543 | ; YYBEGIN BLOCK_COMMENT | |
544 | ; continue ()); | |
545 | ||
546 | ||
547 | <INITIAL>"(*#showBasis"{ws}+"\""[^"]*"\""{ws}*"*)" => | |
548 | (let | |
549 | val file = List.nth (String.split (yytext, #"\""), 1) | |
550 | val file = | |
551 | if OS.Path.isAbsolute file | |
552 | then file | |
553 | else OS.Path.mkCanonical (OS.Path.concat (OS.Path.dir (Source.name source), file)) | |
554 | in | |
555 | tok' (fn (_, l, r) => Tokens.SHOW_BASIS (file, l, r), yytext, source, yypos) | |
556 | end); | |
557 | ||
558 | ||
559 | <INITIAL>. => | |
560 | (error (source, yypos, yypos, "Illegal token") | |
561 | ; continue ()); |