Import Upstream version 20180207
[hcoop/debian/mlton.git] / mlton / front-end / mlb.lex
CommitLineData
7f918cf1
CE
1(* Copyright (C) 2009,2016,2017 Matthew Fluet.
2 * Copyright (C) 2004-2006 Henry Cejtin, Matthew Fluet, Suresh
3 * Jagannathan, and Stephen Weeks.
4 *
5 * MLton is released under a BSD-style license.
6 * See the file MLton-LICENSE for details.
7 *)
8
9type svalue = Tokens.svalue
10type pos = SourcePos.t
11type lexresult = (svalue, pos) Tokens.token
12type lexarg = {source: Source.t}
13type arg = lexarg
14type ('a,'b) token = ('a,'b) Tokens.token
15
16fun lastPos (yypos, yytext) = yypos + size yytext - 1
17
18fun tok (t, x, s, l) =
19 let
20 val left = Source.getPos (s, l)
21 val right = Source.getPos (s, lastPos (l, x))
22 in
23 t (left, right)
24 end
25
26fun tok' (t, x, s, l) = tok (fn (l, r) => t (x, l, r), x, s, l)
27
28fun error' (left, right, msg) =
29 Control.errorStr (Region.make {left = left, right = right}, msg)
30fun error (source, left, right, msg) =
31 error' (Source.getPos (source, left), Source.getPos (source, right), msg)
32
33
34(* Comments *)
35local
36 val commentErrors: string list ref = ref []
37 val commentLeft = ref SourcePos.bogus
38 val commentStack: (int -> unit) list ref = ref []
39in
40 fun addCommentError msg =
41 List.push (commentErrors, msg)
42 val inComment = fn () => not (List.isEmpty (!commentStack))
43 fun startComment (source, yypos, th) =
44 if inComment ()
45 then List.push (commentStack, fn _ => th ())
46 else (commentErrors := []
47 ; commentLeft := Source.getPos (source, yypos)
48 ; List.push (commentStack, fn yypos =>
49 (List.foreach (!commentErrors, fn msg =>
50 error' (!commentLeft,
51 Source.getPos (source, yypos),
52 msg))
53 ; th ())))
54 fun finishComment yypos =
55 (List.pop commentStack) yypos
56end
57
58
59(* Line Directives *)
60local
61 val lineDirCol: int ref = ref ~1
62 val lineDirFile: File.t option ref = ref NONE
63 val lineDirLine: int ref = ref ~1
64in
65 fun startLineDir (source, yypos, th) =
66 let
67 val _ = lineDirCol := ~1
68 val _ = lineDirFile := NONE
69 val _ = lineDirLine := ~1
70 in
71 startComment (source, yypos, th)
72 end
73 fun addLineDirLineCol (line, col) =
74 let
75 val _ = lineDirLine := line
76 val _ = lineDirCol := col
77 in
78 ()
79 end
80 fun addLineDirFile file =
81 let
82 val _ = lineDirFile := SOME file
83 in
84 ()
85 end
86 fun finishLineDir (source, yypos) =
87 let
88 val col = !lineDirCol
89 val file = !lineDirFile
90 val line = !lineDirLine
91 val _ = lineDirCol := ~1
92 val _ = lineDirFile := NONE
93 val _ = lineDirLine := ~1
94 in
95 finishComment yypos
96 ; Source.lineDirective (source, file,
97 {lineNum = line,
98 lineStart = yypos + 1 - col})
99 end
100end
101
102
103(* Text Constants *)
104local
105 val chars: char list ref = ref []
106 val inText = ref false
107 val textLeft = ref SourcePos.bogus
108 val textFinishFn: (string * SourcePos.t * SourcePos.t -> lexresult) ref = ref (fn _ => raise Fail "textFinish")
109in
110 fun startText (tl, tf) =
111 let
112 val _ = chars := []
113 val _ = inText := true
114 val _ = textLeft := tl
115 val _ = textFinishFn := tf
116 in
117 ()
118 end
119 fun finishText textRight =
120 let
121 val cs = String.fromListRev (!chars)
122 val tl = !textLeft
123 val tr = textRight
124 val tf = !textFinishFn
125 val _ = chars := []
126 val _ = inText := false
127 val _ = textLeft := SourcePos.bogus
128 val _ = textFinishFn := (fn _ => raise Fail "textFinish")
129 in
130 tf (cs, tl, tr)
131 end
132 fun addTextString (s: string) =
133 chars := String.fold (s, !chars, fn (c, ac) => c :: ac)
134 val inText = fn () => !inText
135end
136fun addTextChar (c: char) = addTextString (String.fromChar c)
137fun addTextNumEsc (source, yypos, yytext, drop, radix): unit =
138 let
139 val left = yypos
140 val right = lastPos (left, yytext)
141 fun err () =
142 error (source, left, right, "Illegal numeric escape in text constant")
143 in
144 case StringCvt.scanString (fn r => IntInf.scan (radix, r)) (String.dropPrefix (yytext, drop)) of
145 NONE => err ()
146 | SOME i => if i > 255
147 then err ()
148 else addTextChar (Char.chr (IntInf.toInt i))
149 end
150fun addTextUTF8 (source, yypos, yytext): unit =
151 addTextString yytext
152
153
154(* EOF *)
155val eof: lexarg -> lexresult =
156 fn {source, ...} =>
157 let
158 val _ = Source.newline (source, ~1)
159 val pos = Source.getPos (source, ~1)
160 val _ =
161 if inComment ()
162 then error' (pos, SourcePos.bogus, "Unclosed comment at end of file")
163 else ()
164 val _ =
165 if inText ()
166 then error' (pos, SourcePos.bogus, "Unclosed text constant at end of file")
167 else ()
168 in
169 Tokens.EOF (pos, SourcePos.bogus)
170 end
171
172
173%%
174%full
175
176%s TEXT TEXT_FMT BLOCK_COMMENT LINE_COMMENT LINE_DIR1 LINE_DIR2 LINE_DIR3 LINE_DIR4;
177
178%header (functor MLBLexFun (structure Tokens : MLB_TOKENS));
179%arg ({source});
180
181ws=\t|"\011"|"\012"|" ";
182cr="\013";
183nl="\010";
184eol=({cr}{nl}|{nl}|{cr});
185
186alphanum=[A-Za-z0-9'_];
187alphanumId=[A-Za-z]{alphanum}*;
188id={alphanumId};
189
190pathvar="$("([A-Z_][A-Z0-9_]*)")";
191filename=({pathvar}|[A-Za-z0-9_.])({pathvar}|[-A-Za-z0-9_.])*;
192arc=({pathvar}|{filename}|"."|"..");
193relpath=({arc}"/")*;
194abspath="/"{relpath};
195path={relpath}|{abspath};
196file={path}{filename};
197
198decDigit=[0-9];
199hexDigit=[0-9a-fA-F];
200
201%%
202<INITIAL>{ws}+ => (continue ());
203<INITIAL>{eol} => (Source.newline (source, lastPos (yypos, yytext)); continue ());
204
205<INITIAL>"_prim" => (tok (Tokens.PRIM, yytext, source, yypos));
206
207<INITIAL>"," => (tok (Tokens.COMMA, yytext, source, yypos));
208<INITIAL>";" => (tok (Tokens.SEMICOLON, yytext, source, yypos));
209<INITIAL>"=" => (tok (Tokens.EQUALOP, yytext, source, yypos));
210
211<INITIAL>"and" => (tok (Tokens.AND, yytext, source, yypos));
212<INITIAL>"ann" => (tok (Tokens.ANN, yytext, source, yypos));
213<INITIAL>"bas" => (tok (Tokens.BAS, yytext, source, yypos));
214<INITIAL>"basis" => (tok (Tokens.BASIS, yytext, source, yypos));
215<INITIAL>"end" => (tok (Tokens.END, yytext, source, yypos));
216<INITIAL>"functor" => (tok (Tokens.FUNCTOR, yytext, source, yypos));
217<INITIAL>"in" => (tok (Tokens.IN, yytext, source, yypos));
218<INITIAL>"let" => (tok (Tokens.LET, yytext, source, yypos));
219<INITIAL>"local" => (tok (Tokens.LOCAL, yytext, source, yypos));
220<INITIAL>"open" => (tok (Tokens.OPEN, yytext, source, yypos));
221<INITIAL>"signature" => (tok (Tokens.SIGNATURE, yytext, source, yypos));
222<INITIAL>"structure" => (tok (Tokens.STRUCTURE, yytext, source, yypos));
223
224<INITIAL>{id} => (tok' (Tokens.ID, yytext, source, yypos));
225<INITIAL>{file} => (tok' (Tokens.FILE, yytext, source, yypos));
226
227<INITIAL>"\"" =>
228 (startText (Source.getPos (source, yypos), fn (s, l, r) =>
229 (YYBEGIN INITIAL;
230 Tokens.STRING (s, l, r)))
231 ; YYBEGIN TEXT
232 ; continue ());
233
234<TEXT>"\"" => (finishText (Source.getPos (source, lastPos (yypos, yytext))));
235<TEXT>" "|!|[\035-\091]|[\093-\126] =>
236 (addTextString yytext; continue ());
237<TEXT>[\192-\223][\128-\191] =>
238 (addTextUTF8 (source, yypos, yytext); continue());
239<TEXT>[\224-\239][\128-\191][\128-\191] =>
240 (addTextUTF8 (source, yypos, yytext); continue());
241<TEXT>[\240-\247][\128-\191][\128-\191][\128-\191] =>
242 (addTextUTF8 (source, yypos, yytext); continue());
243<TEXT>\\a => (addTextChar #"\a"; continue ());
244<TEXT>\\b => (addTextChar #"\b"; continue ());
245<TEXT>\\t => (addTextChar #"\t"; continue ());
246<TEXT>\\n => (addTextChar #"\n"; continue ());
247<TEXT>\\v => (addTextChar #"\v"; continue ());
248<TEXT>\\f => (addTextChar #"\f"; continue ());
249<TEXT>\\r => (addTextChar #"\r"; continue ());
250<TEXT>\\\^[@-_] => (addTextChar (Char.chr(Char.ord(String.sub(yytext, 2)) - Char.ord #"@"));
251 continue ());
252<TEXT>\\\^. => (error (source, yypos, yypos + 2, "Illegal control escape in text constant; must be one of @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_");
253 continue ());
254<TEXT>\\[0-9]{3} => (addTextNumEsc (source, yypos, yytext, 1,
255 StringCvt.DEC)
256 ; continue ());
257<TEXT>\\u{hexDigit}{4} =>
258 (addTextNumEsc (source, yypos, yytext, 2,
259 StringCvt.HEX)
260 ; continue ());
261<TEXT>\\U{hexDigit}{8} =>
262 (addTextNumEsc (source, yypos, yytext, 2,
263 StringCvt.HEX)
264 ; continue ());
265<TEXT>"\\\"" => (addTextString "\""; continue ());
266<TEXT>\\\\ => (addTextString "\\"; continue ());
267<TEXT>\\{ws}+ => (YYBEGIN TEXT_FMT; continue ());
268<TEXT>\\{eol} => (Source.newline (source, lastPos (yypos, yytext)); YYBEGIN TEXT_FMT; continue ());
269<TEXT>\\ => (error (source, yypos, yypos + 1, "Illegal escape in text constant")
270 ; continue ());
271<TEXT>{eol} => (error (source, yypos, lastPos (yypos, yytext), "Unclosed text constant at end of line")
272 ; Source.newline (source, lastPos (yypos, yytext))
273 ; continue ());
274<TEXT>. => (error (source, yypos, yypos, "Illegal character in text constant")
275 ; continue ());
276
277<TEXT_FMT>{ws}+ => (continue ());
278<TEXT_FMT>{eol} => (Source.newline (source, lastPos (yypos, yytext)); continue ());
279<TEXT_FMT>\\ => (YYBEGIN TEXT; continue ());
280<TEXT_FMT>. => (error (source, yypos, yypos, "Illegal formatting character in text continuation")
281 ; continue ());
282
283
284<INITIAL>"(*)" =>
285 (startComment (source, yypos, fn () =>
286 YYBEGIN INITIAL)
287 ; YYBEGIN LINE_COMMENT
288 ; continue ());
289<INITIAL>"(*" =>
290 (startComment (source, yypos, fn () =>
291 YYBEGIN INITIAL)
292 ; YYBEGIN BLOCK_COMMENT
293 ; continue ());
294
295<LINE_COMMENT>{eol} =>
296 (finishComment (lastPos (yypos, yytext))
297 ; Source.newline (source, lastPos (yypos, yytext))
298 ; continue ());
299<LINE_COMMENT>. =>
300 (continue ());
301
302<BLOCK_COMMENT>"(*)" =>
303 (startComment (source, yypos, fn () =>
304 YYBEGIN BLOCK_COMMENT)
305 ; YYBEGIN LINE_COMMENT
306 ; continue ());
307<BLOCK_COMMENT>"(*" =>
308 (startComment (source, yypos, fn () =>
309 YYBEGIN BLOCK_COMMENT)
310 ; YYBEGIN BLOCK_COMMENT
311 ; continue ());
312<BLOCK_COMMENT>"*)" =>
313 (finishComment (lastPos (yypos, yytext))
314 ; continue ());
315<BLOCK_COMMENT>{eol} =>
316 (Source.newline (source, lastPos (yypos, yytext))
317 ; continue ());
318<BLOCK_COMMENT>. =>
319 (continue ());
320
321
322<INITIAL>"(*#line"{ws}+ =>
323 (startLineDir (source, yypos, fn () =>
324 YYBEGIN INITIAL)
325 ; YYBEGIN LINE_DIR1
326 ; continue ());
327
328<LINE_DIR1>{decDigit}+"."{decDigit}+ =>
329 (let
330 fun err () =
331 (addCommentError "Illegal line directive"
332 ; YYBEGIN BLOCK_COMMENT)
333 in
334 case String.split (yytext, #".") of
335 [line, col] =>
336 (YYBEGIN LINE_DIR2
337 ; addLineDirLineCol (valOf (Int.fromString line), valOf (Int.fromString col))
338 handle Overflow => err () | Option => err ()
339 ; continue ())
340 | _ => (err (); continue ())
341 end);
342<LINE_DIR2>{ws}+"\"" =>
343 (YYBEGIN LINE_DIR3
344 ; continue ());
345<LINE_DIR3>[^"]*"\"" =>
346 (addLineDirFile (String.dropLast yytext)
347 ; YYBEGIN LINE_DIR4
348 ; continue ());
349<LINE_DIR2,LINE_DIR4>{ws}*"*)" =>
350 (finishLineDir (source, lastPos (yypos, yytext))
351 ; continue ());
352<LINE_DIR1,LINE_DIR2,LINE_DIR3,LINE_DIR4>. =>
353 (addCommentError "Illegal line directive"
354 ; YYBEGIN BLOCK_COMMENT
355 ; continue ());
356
357
358<INITIAL>. =>
359 (error (source, yypos, yypos, "Illegal character")
360 ; continue ());