Import Upstream version 20180207
[hcoop/debian/mlton.git] / benchmark / tests / lexgen.sml
1 (* From the SML/NJ benchmark suite. *)
2 signature BMARK =
3 sig
4 val doit : int -> unit
5 val testit : TextIO.outstream -> unit
6 end;
7 (* Lexical analyzer generator for Standard ML.
8 Version 1.6.0, October 1994
9
10 Copyright (c) 1989-1992 by Andrew W. Appel,
11 David R. Tarditi, James S. Mattson
12
13 This software comes with ABSOLUTELY NO WARRANTY.
14 This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY
15 COPYRIGHT NOTICE, LICENSE AND DISCLAIMER, (in the file "COPYRIGHT",
16 distributed with this software). You may copy and distribute this software;
17 see the COPYRIGHT NOTICE for details and restrictions.
18
19 Changes:
20 07/25/89 (drt): added %header declaration, code to place
21 user declarations at same level as makeLexer, etc.
22 This is needed for the parser generator.
23 /10/89 (appel): added %arg declaration (see lexgen.doc).
24 /04/90 (drt): fixed following bug: couldn't use the lexer after an
25 error occurred -- NextTok and inquote weren't being reset
26 10/22/91 (drt): disabled use of lookahead
27 10/23/92 (drt): disabled use of $ operator (which involves lookahead),
28 added handlers for dictionary lookup routine
29 11/02/92 (drt): changed handler for exception Reject in generated lexer
30 to Internal.Reject
31 02/01/94 (appel): Moved the exception handler for Reject in such
32 a way as to allow tail-recursion (improves performance
33 wonderfully!).
34 02/01/94 (appel): Fixed a bug in parsing of state names.
35 05/19/94 (Mikael Pettersson, mpe@ida.liu.se):
36 Transition tables are usually represented as strings, but
37 when the range is too large, int vectors constructed by
38 code like "Vector.vector[1,2,3,...]" are used instead.
39 The problem with this isn't that the vector itself takes
40 a lot of space, but that the code generated by SML/NJ to
41 construct the intermediate list at run-time is *HUGE*. My
42 fix is to encode an int vector as a string literal (using
43 two bytes per int) and emit code to decode the string to
44 a vector at run-time. SML/NJ compiles string literals into
45 substrings in the code, so this uses much less space.
46 06/02/94 (jhr): Modified export-lex.sml to conform to new installation
47 scheme. Also removed tab characters from string literals.
48 10/05/94 (jhr): Changed generator to produce code that uses the new
49 basis style strings and characters.
50 10/06/94 (jhr) Modified code to compile under new basis style strings
51 and characters.
52 02/08/95 (jhr) Modified to use new List module interface.
53 05/18/95 (jhr) changed Vector.vector to Vector.fromList
54 *
55 * $Log: lexgen.sml,v $
56 * Revision 1.6 1996/10/03 14:57:30 jhr
57 * Qualified use of Int.quot, since it is no longer available at top-level; improved
58 * the code that prints the tables.
59 *
60 * Revision 1.5 1996/09/16 12:25:14 george
61 * here is a bug in ml-lex (109.17) when using the %count flag. The yylineno
62 * variable should get reinitialized to zero on each call to makeLexer, but
63 * instead is globally allocated and never reset.
64 *
65 * Revision 1.4 1996/08/13 13:50:36 george
66 * Fixed bugs in counting lines (from jhr)
67 *
68 * Revision 1.3 1996/07/25 20:38:52 jhr
69 * Fixed bug in ungetch that caused Subscript exceptions.
70 *
71 * Revision 1.2 1996/02/26 15:02:27 george
72 * print no longer overloaded.
73 * use of makestring has been removed and replaced with Int.toString ..
74 * use of IO replaced with TextIO
75 *
76 * Revision 1.1.1.1 1996/01/31 16:01:15 george
77 * Version 109
78 *
79 *)
80
81 (* Subject: lookahead in sml-lex
82 Reply-to: david.tarditi@CS.CMU.EDU
83 Date: Mon, 21 Oct 91 14:13:26 -0400
84
85 There is a serious bug in the implementation of lookahead,
86 as done in sml-lex, and described in Aho, Sethi, and Ullman,
87 p. 134 "Implementing the Lookahead Operator"
88
89 We have disallowed the use of lookahead for now because
90 of this bug.
91
92 As a counter-example to the implementation described in
93 ASU, consider the following specification with the
94 input string "aba" (this example is taken from
95 a comp.compilers message from Dec. 1989, I think):
96
97 type lexresult=unit
98 val linenum = ref 1
99 fun error x = TextIO.output(TextIO.stdErr, x ^ "\n")
100 val eof = fn () => ()
101 %%
102 %structure Lex
103 %%
104 (a|ab)/ba => (print yytext; print "\n"; ());
105
106 The ASU proposal works as follows. Suppose that we are
107 using NFA's to represent our regular expressions. Then to
108 build an NFA for e1 / e2, we build an NFA n1 for e1
109 and an NFA n2 for e2, and add an epsilon transition
110 from e1 to e2.
111
112 When lexing, when we encounter the end state of e1e2,
113 we take as the end of the string the position in
114 the string that was the last occurrence of the state of
115 the NFA having a transition on the epsilon introduced
116 for /.
117
118 Using the example we have above, we'll have an NFA
119 with the following states:
120
121
122 1 -- a --> 2 -- b --> 3
123 | |
124 | epsilon | epsilon
125 | |
126 |------------> 4 -- b --> 5 -- a --> 6
127
128 On our example, we get the following list of transitions:
129
130 a : 2, 4 (make an epsilon transition from 2 to 4)
131 ab : 3, 4, 5 (make an epsilon transition from 3 to 4)
132 aba : 6
133
134 If we chose the last state in which we made an epsilon transition,
135 we'll chose the transition from 3 to 4, and end up with "ab"
136 as our token, when we should have "a" as our token.
137
138 *)
139
140 functor RedBlack(B : sig type key
141 val > : key*key->bool
142 end):
143 sig type tree
144 type key
145 val empty : tree
146 val insert : key * tree -> tree
147 val lookup : key * tree -> key
148 exception notfound of key
149 end =
150 struct
151 open B
152 datatype color = RED | BLACK
153 datatype tree = empty | tree of key * color * tree * tree
154 exception notfound of key
155
156 fun insert (key,t) =
157 let fun f empty = tree(key,RED,empty,empty)
158 | f (tree(k,BLACK,l,r)) =
159 if key>k
160 then case f r
161 of r as tree(rk,RED, rl as tree(rlk,RED,rll,rlr),rr) =>
162 (case l
163 of tree(lk,RED,ll,lr) =>
164 tree(k,RED,tree(lk,BLACK,ll,lr),
165 tree(rk,BLACK,rl,rr))
166 | _ => tree(rlk,BLACK,tree(k,RED,l,rll),
167 tree(rk,RED,rlr,rr)))
168 | r as tree(rk,RED,rl, rr as tree(rrk,RED,rrl,rrr)) =>
169 (case l
170 of tree(lk,RED,ll,lr) =>
171 tree(k,RED,tree(lk,BLACK,ll,lr),
172 tree(rk,BLACK,rl,rr))
173 | _ => tree(rk,BLACK,tree(k,RED,l,rl),rr))
174 | r => tree(k,BLACK,l,r)
175 else if k>key
176 then case f l
177 of l as tree(lk,RED,ll, lr as tree(lrk,RED,lrl,lrr)) =>
178 (case r
179 of tree(rk,RED,rl,rr) =>
180 tree(k,RED,tree(lk,BLACK,ll,lr),
181 tree(rk,BLACK,rl,rr))
182 | _ => tree(lrk,BLACK,tree(lk,RED,ll,lrl),
183 tree(k,RED,lrr,r)))
184 | l as tree(lk,RED, ll as tree(llk,RED,lll,llr), lr) =>
185 (case r
186 of tree(rk,RED,rl,rr) =>
187 tree(k,RED,tree(lk,BLACK,ll,lr),
188 tree(rk,BLACK,rl,rr))
189 | _ => tree(lk,BLACK,ll,tree(k,RED,lr,r)))
190 | l => tree(k,BLACK,l,r)
191 else tree(key,BLACK,l,r)
192 | f (tree(k,RED,l,r)) =
193 if key>k then tree(k,RED,l, f r)
194 else if k>key then tree(k,RED, f l, r)
195 else tree(key,RED,l,r)
196 in case f t
197 of tree(k,RED, l as tree(_,RED,_,_), r) => tree(k,BLACK,l,r)
198 | tree(k,RED, l, r as tree(_,RED,_,_)) => tree(k,BLACK,l,r)
199 | t => t
200 end
201
202
203 fun lookup (key,t) =
204 let fun look empty = raise (notfound key)
205 | look (tree(k,_,l,r)) =
206 if k>key then look l
207 else if key>k then look r
208 else k
209 in look t
210 end
211
212 end
213
214 signature LEXGEN =
215 sig
216 val lexGen: string -> unit
217 end
218
219 structure LexGen: LEXGEN =
220 struct
221 open Array List
222 infix 9 sub
223
224 datatype token = CHARS of bool array | QMARK | STAR | PLUS | BAR
225 | LP | RP | CARAT | DOLLAR | SLASH | STATE of string list
226 | REPS of int * int | ID of string | ACTION of string
227 | BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES |
228 COUNT | REJECT | FULLCHARSET | STRUCT | HEADER | ARG
229
230 datatype exp = EPS | CLASS of bool array * int | CLOSURE of exp
231 | ALT of exp * exp | CAT of exp * exp | TRAIL of int
232 | END of int
233
234 (* flags describing input Lex spec. - unnecessary code is omitted *)
235 (* if possible *)
236
237 val CharFormat = ref false;
238 val UsesTrailingContext = ref false;
239 val UsesPrevNewLine = ref false;
240
241 (* flags for various bells & whistles that Lex has. These slow the
242 lexer down and should be omitted from production lexers (if you
243 really want speed) *)
244
245 val CountNewLines = ref false;
246 val HaveReject = ref false;
247
248 (* Can increase size of character set *)
249
250 val CharSetSize = ref 129;
251
252 (* Can name structure or declare header code *)
253
254 val StrName = ref "Mlex"
255 val HeaderCode = ref ""
256 val HeaderDecl = ref false
257 val ArgCode = ref (NONE: string option)
258 val StrDecl = ref false
259
260 val ResetFlags = fn () => (CountNewLines := false; HaveReject := false;
261 UsesTrailingContext := false;
262 CharSetSize := 129; StrName := "Mlex";
263 HeaderCode := ""; HeaderDecl:= false;
264 ArgCode := NONE;
265 StrDecl := false)
266
267 val LexOut = ref(TextIO.stdOut)
268 fun say x = TextIO.output(!LexOut, x)
269
270 (* Union: merge two sorted lists of integers *)
271
272 fun union(a,b) = let val rec merge = fn
273 (nil,nil,z) => z
274 | (nil,el::more,z) => merge(nil,more,el::z)
275 | (el::more,nil,z) => merge(more,nil,el::z)
276 | (x::morex,y::morey,z) => if (x:int)=(y:int)
277 then merge(morex,morey,x::z)
278 else if x>y then merge(morex,y::morey,x::z)
279 else merge(x::morex,morey,y::z)
280 in merge(rev a,rev b,nil)
281 end
282
283 (* Nullable: compute if a important expression parse tree node is nullable *)
284
285 val rec nullable = fn
286 EPS => true
287 | CLASS(_) => false
288 | CLOSURE(_) => true
289 | ALT(n1,n2) => nullable(n1) orelse nullable(n2)
290 | CAT(n1,n2) => nullable(n1) andalso nullable(n2)
291 | TRAIL(_) => true
292 | END(_) => false
293
294 (* FIRSTPOS: firstpos function for parse tree expressions *)
295
296 and firstpos = fn
297 EPS => nil
298 | CLASS(_,i) => [i]
299 | CLOSURE(n) => firstpos(n)
300 | ALT(n1,n2) => union(firstpos(n1),firstpos(n2))
301 | CAT(n1,n2) => if nullable(n1) then union(firstpos(n1),firstpos(n2))
302 else firstpos(n1)
303 | TRAIL(i) => [i]
304 | END(i) => [i]
305
306 (* LASTPOS: Lastpos function for parse tree expressions *)
307
308 and lastpos = fn
309 EPS => nil
310 | CLASS(_,i) => [i]
311 | CLOSURE(n) => lastpos(n)
312 | ALT(n1,n2) => union(lastpos(n1),lastpos(n2))
313 | CAT(n1,n2) => if nullable(n2) then union(lastpos(n1),lastpos(n2))
314 else lastpos(n2)
315 | TRAIL(i) => [i]
316 | END(i) => [i]
317 ;
318
319 (* ++: Increment an integer reference *)
320
321 fun ++(x) : int = (x := !x + 1; !x);
322
323 structure Dict =
324 struct
325 type 'a relation = 'a * 'a -> bool
326 abstype ('b,'a) dictionary = DATA of {Table : ('b * 'a) list,
327 Leq : 'b * 'b -> bool }
328 with
329 exception LOOKUP
330 fun create Leqfunc = DATA { Table = nil, Leq = Leqfunc }
331
332 fun lookup (DATA { Table = entrylist, Leq = leq }) key =
333 let fun search [] = raise LOOKUP
334 | search((k,item)::entries) =
335 if leq(key,k)
336 then if leq(k,key) then item else raise LOOKUP
337 else search entries
338 in search entrylist
339 end
340
341 fun enter (DATA { Table = entrylist, Leq = leq })
342 (newentry as (key : 'b,item :'a)) : ('b,'a) dictionary =
343 let val gt = fn a => fn b => not (leq(a,b))
344 val eq = fn k => fn k' => (leq(k,k')) andalso (leq(k',k))
345 fun update nil = [ newentry ]
346 | update ((entry as (k,_))::entries) =
347 if (eq key k) then newentry::entries
348 else if gt k key then newentry::(entry::entries)
349 else entry::(update entries)
350 in DATA { Table = update entrylist, Leq = leq }
351 end
352
353 fun listofdict (DATA { Table = entrylist,Leq = leq}) =
354 let fun f (nil,r) = rev r
355 | f (a::b,r) = f (b,a::r)
356 in f(entrylist,nil)
357 end
358 end
359 end (* structure Dict *)
360
361 open Dict;
362
363 (* INPUT.ML : Input w/ one character push back capability *)
364
365 val LineNum = ref 1;
366
367 abstype ibuf =
368 BUF of TextIO.instream * {b : string ref, p : int ref}
369 with
370 fun make_ibuf(s) = BUF (s, {b=ref"", p = ref 0})
371 fun close_ibuf (BUF (s,_)) = TextIO.closeIn(s)
372 exception eof
373 fun getch (a as (BUF(s,{b,p}))) =
374 if (!p = (size (!b)))
375 then (b := TextIO.inputN(s, 1024);
376 p := 0;
377 if (size (!b))=0
378 then raise eof
379 else getch a)
380 else (let val ch = String.sub(!b,!p)
381 in (if ch = #"\n"
382 then LineNum := !LineNum + 1
383 else ();
384 p := !p + 1;
385 ch)
386 end)
387
388 fun ungetch(BUF(s,{b,p})) = (
389 p := !p - 1;
390 if String.sub(!b,!p) = #"\n"
391 then LineNum := !LineNum - 1
392 else ())
393 end;
394
395 exception Error
396
397 fun prErr x = (
398 TextIO.output (TextIO.stdErr, String.concat [
399 "ml-lex: error, line ", (Int.toString (!LineNum)), ": ", x, "\n"
400 ]);
401 raise Error)
402 fun prSynErr x = (
403 TextIO.output (TextIO.stdErr, String.concat [
404 "ml-lex: syntax error, line ", (Int.toString (!LineNum)), ": ", x, "\n"
405 ]);
406 raise Error)
407
408 exception SyntaxError; (* error in user's input file *)
409
410 exception LexError; (* unexpected error in lexer *)
411
412 val LexBuf = ref(make_ibuf(TextIO.stdIn));
413 val LexState = ref 0;
414 val NextTok = ref BOF;
415 val inquote = ref false;
416
417 fun AdvanceTok () : unit = let
418 fun isLetter c =
419 ((c >= #"a") andalso (c <= #"z")) orelse
420 ((c >= #"A") andalso (c <= #"Z"))
421 fun isDigit c = (c >= #"0") andalso (c <= #"9")
422 (* check for valid (non-leading) identifier character (added by JHR) *)
423 fun isIdentChr c =
424 ((isLetter c) orelse (isDigit c) orelse (c = #"_") orelse (c = #"'"))
425 fun atoi s = let
426 fun num (c::r, n) = if isDigit c
427 then num (r, 10*n + (Char.ord c - Char.ord #"0"))
428 else n
429 | num ([], n) = n
430 in
431 num (explode s, 0)
432 end
433
434 fun skipws () = (case nextch()
435 of #" " => skipws()
436 | #"\t" => skipws()
437 | #"\n" => skipws()
438 | x => x
439 (* end case *))
440
441 and nextch () = getch(!LexBuf)
442
443 and escaped () = (case nextch()
444 of #"b" => #"\008"
445 | #"n" => #"\n"
446 | #"t" => #"\t"
447 | #"h" => #"\128"
448 | x => let
449 fun err t = prErr("illegal ascii escape '"^(implode(rev t))^"'")
450 fun cvt c = (Char.ord c - Char.ord #"0")
451 fun f (n, c, t) = if c=3
452 then if n >= (!CharSetSize)
453 then err t
454 else Char.chr n
455 else let val ch=nextch()
456 in
457 if isDigit ch
458 then f(n*10+(cvt ch), c+1, ch::t)
459 else err t
460 end
461 in
462 if isDigit x then f(cvt x, 1, [x]) else x
463 end
464 (* end case *))
465
466 and onechar x =
467 let val c = array(!CharSetSize, false)
468 in
469 update(c, Char.ord(x), true);
470 CHARS(c)
471 end
472
473 in case !LexState of 0 => let val makeTok = fn () =>
474 case skipws()
475 (* Lex % operators *)
476 of #"%" => (case nextch() of
477 #"%" => LEXMARK
478 | a => let fun f s =
479 let val a = nextch()
480 in if isLetter a then f(a::s)
481 else (ungetch(!LexBuf);
482 implode(rev s))
483 end
484 val command = f [a]
485 in if command = "reject" then REJECT
486 else if command = "count" then COUNT
487 else if command = "full" then FULLCHARSET
488 else if command = "s" then LEXSTATES
489 else if command = "S" then LEXSTATES
490 else if command = "structure" then STRUCT
491 else if command = "header" then HEADER
492 else if command = "arg" then ARG
493 else prErr "unknown % operator "
494 end
495 )
496 (* semicolon (for end of LEXSTATES) *)
497 | #";" => SEMI
498 (* anything else *)
499 | ch => if isLetter(ch) then
500 let fun getID matched =
501 let val x = nextch()
502 (**** fix by JHR
503 in if isLetter(x) orelse isDigit(x) orelse
504 x = "_" orelse x = "'"
505 ****)
506 in if (isIdentChr x)
507 then getID (x::matched)
508 else (ungetch(!LexBuf); implode(rev matched))
509 end
510 in ID(getID [ch])
511 end
512 else (prSynErr ("bad character: " ^ String.str ch))
513 in NextTok := makeTok()
514 end
515 | 1 => let val rec makeTok = fn () =>
516 if !inquote then case nextch() of
517 (* inside quoted string *)
518 #"\\" => onechar(escaped())
519 | #"\"" => (inquote := false; makeTok())
520 | x => onechar(x)
521 else case skipws() of
522 (* single character operators *)
523 #"?" => QMARK
524 | #"*" => STAR
525 | #"+" => PLUS
526 | #"|" => BAR
527 | #"(" => LP
528 | #")" => RP
529 | #"^" => CARAT
530 | #"$" => DOLLAR
531 | #"/" => SLASH
532 | #";" => SEMI
533 | #"." => let val c = array(!CharSetSize,true) in
534 update(c,10,false); CHARS(c)
535 end
536 (* assign and arrow *)
537 | #"=" => let val c = nextch() in
538 if c = #">" then ARROW else (ungetch(!LexBuf); ASSIGN)
539 end
540 (* character set *)
541 | #"[" => let val rec classch = fn () => let val x = skipws()
542 in if x = #"\\" then escaped() else x
543 end;
544 val first = classch();
545 val flag = (first <> #"^");
546 val c = array(!CharSetSize,not flag);
547 fun add NONE = ()
548 | add (SOME x) = update(c, Char.ord(x), flag)
549 and range (x, y) = if x>y
550 then (prErr "bad char. range")
551 else let
552 val i = ref(Char.ord(x)) and j = Char.ord(y)
553 in while !i<=j do (
554 add (SOME(Char.chr(!i)));
555 i := !i + 1)
556 end
557 and getClass last = (case classch()
558 of #"]" => (add(last); c)
559 | #"-" => (case last
560 of NONE => getClass(SOME #"-")
561 | (SOME last') => let val x = classch()
562 in
563 if x = #"]"
564 then (add(last); add(SOME #"-"); c)
565 else (range(last',x); getClass(NONE))
566 end
567 (* end case *))
568 | x => (add(last); getClass(SOME x))
569 (* end case *))
570 in CHARS(getClass(if first = #"^" then NONE else SOME first))
571 end
572 (* Start States specification *)
573 | #"<" => let val rec get_state = fn (prev,matched) =>
574 case nextch() of
575 #">" => matched::prev
576 | #"," => get_state(matched::prev,"")
577 | x => if isIdentChr(x)
578 then get_state(prev,matched ^ String.str x)
579 else (prSynErr "bad start state list")
580 in STATE(get_state(nil,""))
581 end
582 (* {id} or repititions *)
583 | #"{" => let val ch = nextch() in if isLetter(ch) then
584 let fun getID matched = (case nextch()
585 of #"}" => matched
586 | x => if (isIdentChr x) then
587 getID(matched ^ String.str x)
588 else (prErr "invalid char. class name")
589 (* end case *))
590 in ID(getID(String.str ch))
591 end
592 else if isDigit(ch) then
593 let fun get_r (matched, r1) = (case nextch()
594 of #"}" => let val n = atoi(matched) in
595 if r1 = ~1 then (n,n) else (r1,n)
596 end
597 | #"," => if r1 = ~1 then get_r("",atoi(matched))
598 else (prErr "invalid repetitions spec.")
599 | x => if isDigit(x)
600 then get_r(matched ^ String.str x,r1)
601 else (prErr "invalid char in repetitions spec")
602 (* end case *))
603 in REPS(get_r(String.str ch,~1))
604 end
605 else (prErr "bad repetitions spec")
606 end
607 (* Lex % operators *)
608 | #"%" => if nextch() = #"%" then LEXMARK else
609 (ungetch(!LexBuf); onechar (#"%"))
610 (* backslash escape *)
611 | #"\\" => onechar(escaped())
612 (* start quoted string *)
613 | #"\"" => (inquote := true; makeTok())
614 (* anything else *)
615 | ch => onechar(ch)
616 in NextTok := makeTok()
617 end
618 | 2 => NextTok :=
619 (case skipws()
620 of #"(" => let
621 fun GetAct (lpct,x) = (case getch(!LexBuf)
622 of #"(" => GetAct (lpct+1, #"("::x)
623 | #")" => if lpct = 0 then (implode (rev x))
624 else GetAct(lpct-1, #")"::x)
625 | y => GetAct(lpct,y::x)
626 (* end case *))
627 in ACTION (GetAct (0,nil))
628 end
629 | #";" => SEMI
630 | c => (prSynErr ("invalid character " ^ String.str c)))
631 | _ => raise LexError
632 end
633 handle eof => NextTok := EOF ;
634
635 fun GetTok (_:unit) : token =
636 let val t = !NextTok in AdvanceTok(); t
637 end;
638 val SymTab = ref (create String.<=) : (string,exp) dictionary ref
639
640 fun GetExp () : exp =
641
642 let val rec optional = fn e => ALT(EPS,e)
643
644 and lookup' = fn name =>
645 lookup(!SymTab) name
646 handle LOOKUP => prErr ("bad regular expression name: "^
647 name)
648
649 and newline = fn () => let val c = array(!CharSetSize,false) in
650 update(c,10,true); c
651 end
652
653 and endline = fn e => trail(e,CLASS(newline(),0))
654
655 and trail = fn (e1,e2) => CAT(CAT(e1,TRAIL(0)),e2)
656
657 and closure1 = fn e => CAT(e,CLOSURE(e))
658
659 and repeat = fn (min,max,e) => let val rec rep = fn
660 (0,0) => EPS
661 | (0,1) => ALT(e,EPS)
662 | (0,i) => CAT(rep(0,1),rep(0,i-1))
663 | (i,j) => CAT(e,rep(i-1,j-1))
664 in rep(min,max)
665 end
666
667 and exp0 = fn () => case GetTok() of
668 CHARS(c) => exp1(CLASS(c,0))
669 | LP => let val e = exp0() in
670 if !NextTok = RP then
671 (AdvanceTok(); exp1(e))
672 else (prSynErr "missing '('") end
673 | ID(name) => exp1(lookup' name)
674 | _ => raise SyntaxError
675
676 and exp1 = fn (e) => case !NextTok of
677 SEMI => e
678 | ARROW => e
679 | EOF => e
680 | LP => exp2(e,exp0())
681 | RP => e
682 | t => (AdvanceTok(); case t of
683 QMARK => exp1(optional(e))
684 | STAR => exp1(CLOSURE(e))
685 | PLUS => exp1(closure1(e))
686 | CHARS(c) => exp2(e,CLASS(c,0))
687 | BAR => ALT(e,exp0())
688 | DOLLAR => (UsesTrailingContext := true; endline(e))
689 | SLASH => (UsesTrailingContext := true;
690 trail(e,exp0()))
691 | REPS(i,j) => exp1(repeat(i,j,e))
692 | ID(name) => exp2(e,lookup' name)
693 | _ => raise SyntaxError)
694
695 and exp2 = fn (e1,e2) => case !NextTok of
696 SEMI => CAT(e1,e2)
697 | ARROW => CAT(e1,e2)
698 | EOF => CAT(e1,e2)
699 | LP => exp2(CAT(e1,e2),exp0())
700 | RP => CAT(e1,e2)
701 | t => (AdvanceTok(); case t of
702 QMARK => exp1(CAT(e1,optional(e2)))
703 | STAR => exp1(CAT(e1,CLOSURE(e2)))
704 | PLUS => exp1(CAT(e1,closure1(e2)))
705 | CHARS(c) => exp2(CAT(e1,e2),CLASS(c,0))
706 | BAR => ALT(CAT(e1,e2),exp0())
707 | DOLLAR => (UsesTrailingContext := true;
708 endline(CAT(e1,e2)))
709 | SLASH => (UsesTrailingContext := true;
710 trail(CAT(e1,e2),exp0()))
711 | REPS(i,j) => exp1(CAT(e1,repeat(i,j,e2)))
712 | ID(name) => exp2(CAT(e1,e2),lookup' name)
713 | _ => raise SyntaxError)
714 in exp0()
715 end;
716 val StateTab = ref(create(String.<=)) : (string,int) dictionary ref
717
718 val StateNum = ref 0;
719
720 fun GetStates () : int list =
721
722 let fun add nil sl = sl
723 | add (x::y) sl = add y (union ([lookup (!StateTab)(x)
724 handle LOOKUP =>
725 prErr ("bad state name: "^x)
726 ],sl))
727
728 fun addall i sl =
729 if i <= !StateNum then addall (i+2) (union ([i],sl))
730 else sl
731
732 fun incall (x::y) = (x+1)::incall y
733 | incall nil = nil
734
735 fun addincs nil = nil
736 | addincs (x::y) = x::(x+1)::addincs y
737
738 val state_list =
739 case !NextTok of
740 STATE s => (AdvanceTok(); LexState := 1; add s nil)
741 | _ => addall 1 nil
742
743 in case !NextTok
744 of CARAT => (LexState := 1; AdvanceTok(); UsesPrevNewLine := true;
745 incall state_list)
746 | _ => addincs state_list
747 end
748
749 val LeafNum = ref ~1;
750
751 fun renum(e : exp) : exp =
752 let val rec label = fn
753 EPS => EPS
754 | CLASS(x,_) => CLASS(x,++LeafNum)
755 | CLOSURE(e) => CLOSURE(label(e))
756 | ALT(e1,e2) => ALT(label(e1),label(e2))
757 | CAT(e1,e2) => CAT(label(e1),label(e2))
758 | TRAIL(i) => TRAIL(++LeafNum)
759 | END(i) => END(++LeafNum)
760 in label(e)
761 end;
762
763 exception ParseError;
764
765 fun parse() : (string * (int list * exp) list * ((string,string) dictionary)) =
766 let val Accept = ref (create String.<=) : (string,string) dictionary ref
767 val rec ParseRtns = fn l => case getch(!LexBuf) of
768 #"%" => let val c = getch(!LexBuf) in
769 if c = #"%" then (implode (rev l))
770 else ParseRtns(c :: #"%" :: l)
771 end
772 | c => ParseRtns(c::l)
773 and ParseDefs = fn () =>
774 (LexState:=0; AdvanceTok(); case !NextTok of
775 LEXMARK => ()
776 | LEXSTATES =>
777 let fun f () = (case !NextTok of (ID i) =>
778 (StateTab := enter(!StateTab)(i,++StateNum);
779 ++StateNum; AdvanceTok(); f())
780 | _ => ())
781 in AdvanceTok(); f ();
782 if !NextTok=SEMI then ParseDefs() else
783 (prSynErr "expected ';'")
784 end
785 | ID x => (LexState:=1; AdvanceTok(); if GetTok() = ASSIGN
786 then (SymTab := enter(!SymTab)(x,GetExp());
787 if !NextTok = SEMI then ParseDefs()
788 else (prSynErr "expected ';'"))
789 else raise SyntaxError)
790 | REJECT => (HaveReject := true; ParseDefs())
791 | COUNT => (CountNewLines := true; ParseDefs())
792 | FULLCHARSET => (CharSetSize := 256; ParseDefs())
793 | HEADER => (LexState := 2; AdvanceTok();
794 case GetTok()
795 of ACTION s =>
796 if (!StrDecl) then
797 (prErr "cannot have both %s and %header \
798 \declarations")
799 else if (!HeaderDecl) then
800 (prErr "duplicate %header declarations")
801 else
802 (HeaderCode := s; LexState := 0;
803 HeaderDecl := true; ParseDefs())
804 | _ => raise SyntaxError)
805 | ARG => (LexState := 2; AdvanceTok();
806 case GetTok()
807 of ACTION s =>
808 (case !ArgCode
809 of SOME _ => prErr "duplicate %arg declarations"
810 | NONE => ArgCode := SOME s;
811 LexState := 0;
812 ParseDefs())
813 | _ => raise SyntaxError)
814 | STRUCT => (AdvanceTok();
815 case !NextTok of
816 (ID i) =>
817 if (!HeaderDecl) then
818 (prErr "cannot have both %s and %header \
819 \declarations")
820 else if (!StrDecl) then
821 (prErr "duplicate %s declarations")
822 else StrName := i
823 | _ => (prErr "expected ID");
824 ParseDefs())
825 | _ => raise SyntaxError)
826 and ParseRules =
827 fn rules => (LexState:=1; AdvanceTok(); case !NextTok of
828 LEXMARK => rules
829 | EOF => rules
830 | _ =>
831 let val s = GetStates()
832 val e = renum(CAT(GetExp(),END(0)))
833 in
834 if !NextTok = ARROW then
835 (LexState:=2; AdvanceTok();
836 case GetTok() of ACTION(act) =>
837 if !NextTok=SEMI then
838 (Accept:=enter(!Accept) (Int.toString (!LeafNum),act);
839 ParseRules((s,e)::rules))
840 else (prSynErr "expected ';'")
841 | _ => raise SyntaxError)
842 else (prSynErr "expected '=>'")
843 end)
844 in let val usercode = ParseRtns nil
845 in (ParseDefs(); (usercode,ParseRules(nil),!Accept))
846 end
847 end handle SyntaxError => (prSynErr "")
848
849 fun makebegin () : unit =
850 let fun make nil = ()
851 | make ((x,n:int)::y)=(say "val "; say x; say " = " ;
852 say "STARTSTATE ";
853 say (Int.toString n); say ";\n"; make y)
854 in say "\n(* start state definitions *)\n\n"; make(listofdict(!StateTab))
855 end
856
857 structure L =
858 struct
859 nonfix >
860 type key = int list * string
861 fun > ((key,item:string),(key',item')) =
862 let fun f ((a:int)::a') (b::b') = if Int.> (a,b) then true
863 else if a=b then f a' b'
864 else false
865 | f _ _ = false
866 in f key key'
867 end
868 end
869
870 structure RB = RedBlack(L)
871
872 fun maketable (fins:(int * (int list)) list,
873 tcs :(int * (int list)) list,
874 tcpairs: (int * int) list,
875 trans : (int*(int list)) list) : unit =
876
877 (* Fins = (state #, list of final leaves for the state) list
878 tcs = (state #, list of trailing context leaves which begin in this state)
879 list
880 tcpairs = (trailing context leaf, end leaf) list
881 trans = (state #,list of transitions for state) list *)
882
883 let datatype elem = N of int | T of int | D of int
884 val count = ref 0
885 val _ = (if length(trans)<256 then CharFormat := true
886 else CharFormat := false;
887 if !UsesTrailingContext then
888 (say "\ndatatype yyfinstate = N of int | \
889 \ T of int | D of int\n")
890 else say "\ndatatype yyfinstate = N of int";
891 say "\ntype statedata = {fin : yyfinstate list, trans: ";
892 case !CharFormat of
893 true => say "string}"
894 | false => say "int Vector.vector}";
895 say "\n(* transition & final state table *)\nval tab = let\n";
896 case !CharFormat of
897 true => ()
898 | false =>
899 (say "fun decode s k =\n";
900 say " let val k' = k + k\n";
901 say " val hi = Char.ord(String.sub(s, k'))\n";
902 say " val lo = Char.ord(String.sub(s, k' + 1))\n";
903 say " in hi * 256 + lo end\n"))
904 val newfins =
905 let fun IsEndLeaf t =
906 let fun f ((l,e)::r) = if (e=t) then true else f r
907 | f nil = false in f tcpairs end
908
909 fun GetEndLeaf t =
910 let fun f ((tl,el)::r) = if (tl=t) then el else f r
911 in f tcpairs
912 end
913 fun GetTrConLeaves s =
914 let fun f ((s',l)::r) = if (s = s') then l else f r
915 | f nil = nil
916 in f tcs
917 end
918 fun sort_leaves s =
919 let fun insert (x:int) (a::b) =
920 if (x <= a) then x::(a::b)
921 else a::(insert x b)
922 | insert x nil = [x]
923 in List.foldr (fn (x,r) => insert x r) [] s
924 end
925 fun conv a = if (IsEndLeaf a) then (D a) else (N a)
926 fun merge (a::a',b::b') =
927 if (a <= b) then (conv a)::merge(a',b::b')
928 else (T b)::(merge(a::a',b'))
929 | merge (a::a',nil) = (conv a)::(merge (a',nil))
930 | merge (nil,b::b') = (T b)::(merge (b',nil))
931 | merge (nil,nil) = nil
932
933 in map (fn (x,l) =>
934 rev (merge (l,
935 sort_leaves (map (fn x => GetEndLeaf x) (GetTrConLeaves x)))))
936 fins
937 end
938
939 val rs =
940 let open RB
941 fun makeItems x =
942 let fun emit8(x, pos) = let
943 val s = StringCvt.padLeft #"0" 3 (Int.toString x)
944 in
945 case pos
946 of 16 => (say "\\\n\\\\"; say s; 1)
947 | _ => (say "\\"; say s; pos+1)
948 end
949 fun emit16(x, pos) =
950 let val hi8 = x div 256
951 val lo8 = x - hi8 * 256 (* x rem 256 *)
952 in
953 emit8(lo8, emit8(hi8, pos))
954 end
955 fun MakeString([], _, _) = ()
956 | MakeString(x::xs, emitter, pos) =
957 MakeString(xs, emitter, emitter(x, pos))
958 in case !CharFormat of
959 true => (say " =\n\""; MakeString(x,emit8,0); say "\"\n")
960 | false => (say " = Vector.tabulate("; say (Int.toString(length x));
961 say ", decode\n\""; MakeString(x,emit16,0); say "\")\n")
962 end
963 fun makeEntry(nil,rs,t) = rev rs
964 | makeEntry(((l:int,x)::y),rs,t) =
965 let val name = "s" ^ (Int.toString l)
966 in let val (r,n) = lookup ((x,name),t)
967 in makeEntry(y,(n::rs),t)
968 end handle notfound _ => (count := !count+1;
969 say "val "; say name; makeItems x;
970 makeEntry(y,(name::rs),(insert ((x,name),t))))
971 end
972 in (makeEntry(trans,nil,empty))
973 end
974
975 fun makeTable(nil,nil) = ()
976 | makeTable(a::a',b::b') =
977 let fun makeItems nil = ()
978 | makeItems (hd::tl) =
979 let val (t,n) =
980 case hd of
981 (N i) => ("(N ",i)
982 | (T i) => ("(T ",i)
983 | (D i) => ("(D ",i)
984 in (say t; say (Int.toString n); say ")";
985 if null tl
986 then ()
987 else (say ","; makeItems tl))
988 end
989 in (say "{fin = ["; makeItems b;
990 say "], trans = "; say a; say "}";
991 if null a'
992 then ()
993 else (say ",\n"; makeTable(a',b')))
994 end
995
996 fun msg x = () (*TextIO.output(TextIO.stdOut, x)*)
997
998 in (say "in Vector.fromList\n["; makeTable(rs,newfins); say "]\nend\n";
999 msg ("\nNumber of states = " ^ (Int.toString (length trans)));
1000 msg ("\nNumber of distinct rows = " ^ (Int.toString (!count)));
1001 msg ("\nApprox. memory size of trans. table = " ^
1002 (Int.toString (!count*(!CharSetSize)*(if !CharFormat then 1 else 8))));
1003 msg " bytes\n")
1004 end
1005
1006 (* makeaccept: Takes a (string,string) dictionary, prints case statement for
1007 accepting leaf actions. The key strings are the leaf #'s, the data strings
1008 are the actions *)
1009
1010 fun makeaccept ends =
1011 let fun startline f = if f then say " " else say "| "
1012 fun make(nil,f) = (startline f; say "_ => raise Internal.LexerError\n")
1013 | make((x,a)::y,f) = (startline f; say x; say " => (";
1014 say a; say ")\n"; make(y,false))
1015 in make (listofdict(ends),true)
1016 end
1017
1018 fun leafdata(e:(int list * exp) list) =
1019 let val fp = array(!LeafNum + 1,nil)
1020 and leaf = array(!LeafNum + 1,EPS)
1021 and tcpairs = ref nil
1022 and trailmark = ref ~1;
1023 val rec add = fn
1024 (nil,x) => ()
1025 | (hd::tl,x) => (update(fp,hd,union(fp sub hd,x));
1026 add(tl,x))
1027 and moredata = fn
1028 CLOSURE(e1) =>
1029 (moredata(e1); add(lastpos(e1),firstpos(e1)))
1030 | ALT(e1,e2) => (moredata(e1); moredata(e2))
1031 | CAT(e1,e2) => (moredata(e1); moredata(e2);
1032 add(lastpos(e1),firstpos(e2)))
1033 | CLASS(x,i) => update(leaf,i,CLASS(x,i))
1034 | TRAIL(i) => (update(leaf,i,TRAIL(i)); if !trailmark = ~1
1035 then trailmark := i else ())
1036 | END(i) => (update(leaf,i,END(i)); if !trailmark <> ~1
1037 then (tcpairs := (!trailmark,i)::(!tcpairs);
1038 trailmark := ~1) else ())
1039 | _ => ()
1040 and makedata = fn
1041 nil => ()
1042 | (_,x)::tl => (moredata(x);makedata(tl))
1043 in trailmark := ~1; makedata(e); (fp,leaf,!tcpairs)
1044 end;
1045
1046 fun makedfa(rules) =
1047 let val StateTab = ref (create(String.<=)) : (string,int) dictionary ref
1048 val fintab = ref (create(Int.<=)) : (int,(int list)) dictionary ref
1049 val transtab = ref (create(Int.<=)) : (int,int list) dictionary ref
1050 val tctab = ref (create(Int.<=)) : (int,(int list)) dictionary ref
1051 val (fp, leaf, tcpairs) = leafdata(rules);
1052
1053 fun visit (state,statenum) =
1054 let val transitions = gettrans(state) in
1055 fintab := enter(!fintab)(statenum,getfin(state));
1056 tctab := enter(!tctab)(statenum,gettc(state));
1057 transtab := enter(!transtab)(statenum,transitions)
1058 end
1059
1060 and visitstarts (states) =
1061 let fun vs nil i = ()
1062 | vs (hd::tl) i = (visit (hd,i); vs tl (i+1))
1063 in vs states 0
1064 end
1065
1066 and hashstate(s: int list) =
1067 let val rec hs =
1068 fn (nil,z) => z
1069 | ((x:int)::y,z) => hs(y,z ^ " " ^ (Int.toString x))
1070 in hs(s,"")
1071 end
1072
1073 and find(s) = lookup(!StateTab)(hashstate(s))
1074
1075 and add(s,n) = StateTab := enter(!StateTab)(hashstate(s),n)
1076
1077 and getstate (state) =
1078 find(state)
1079 handle LOOKUP => let val n = ++StateNum in
1080 add(state,n); visit(state,n); n
1081 end
1082
1083 and getfin state =
1084 let fun f nil fins = fins
1085 | f (hd::tl) fins =
1086 case (leaf sub hd)
1087 of END _ => f tl (hd::fins)
1088 | _ => f tl fins
1089 in f state nil
1090 end
1091
1092 and gettc state =
1093 let fun f nil fins = fins
1094 | f (hd::tl) fins =
1095 case (leaf sub hd)
1096 of TRAIL _ => f tl (hd::fins)
1097 | _ => f tl fins
1098 in f state nil
1099 end
1100
1101 and gettrans (state) =
1102 let fun loop c tlist =
1103 let fun cktrans nil r = r
1104 | cktrans (hd::tl) r =
1105 case (leaf sub hd) of
1106 CLASS(i,_)=>
1107 (if (i sub c) then cktrans tl (union(r,fp sub hd))
1108 else cktrans tl r handle Subscript =>
1109 cktrans tl r
1110 )
1111 | _ => cktrans tl r
1112 in if c >= 0 then
1113 let val v=cktrans state nil
1114 in loop (c-1) (if v=nil then 0::tlist else (getstate v)::tlist)
1115 end
1116 else tlist
1117 end
1118 in loop ((!CharSetSize) - 1) nil
1119 end
1120
1121 and startstates() =
1122 let val startarray = array(!StateNum + 1, nil);
1123 fun listofarray(a,n) =
1124 let fun f i l = if i >= 0 then f (i-1) ((a sub i)::l) else l
1125 in f (n-1) nil end
1126 val rec makess = fn
1127 nil => ()
1128 | (startlist,e)::tl => (fix(startlist,firstpos(e));makess(tl))
1129 and fix = fn
1130 (nil,_) => ()
1131 | (s::tl,firsts) => (update(startarray,s,
1132 union(firsts,startarray sub s));
1133 fix(tl,firsts))
1134 in makess(rules);listofarray(startarray, !StateNum + 1)
1135 end
1136
1137 in visitstarts(startstates());
1138 (listofdict(!fintab),listofdict(!transtab),listofdict(!tctab),tcpairs)
1139 end
1140
1141 val skel_hd =
1142 " struct\n\
1143 \ structure UserDeclarations =\n\
1144 \ struct\n\
1145 \"
1146
1147 val skel_mid2 =
1148 " | Internal.D k => action (i,(acts::l),k::rs)\n\
1149 \ | Internal.T k =>\n\
1150 \ let fun f (a::b,r) =\n\
1151 \ if a=k\n\
1152 \ then action(i,(((Internal.N a)::acts)::l),(b@r))\n\
1153 \ else f (b,a::r)\n\
1154 \ | f (nil,r) = action(i,(acts::l),rs)\n\
1155 \ in f (rs,nil)\n\
1156 \ end\n\
1157 \"
1158
1159 fun lexGen(infile) =
1160 let val outfile = infile ^ ".sml"
1161 fun PrintLexer (ends) =
1162 let val sayln = fn x => (say x; say "\n")
1163 in case !ArgCode
1164 of NONE => (sayln "fun lex () : Internal.result =";
1165 sayln "let fun continue() = lex() in")
1166 | SOME s => (say "fun lex "; say "(yyarg as ("; say s; sayln ")) =";
1167 sayln "let fun continue() : Internal.result = ");
1168 say " let fun scan (s,AcceptingLeaves : Internal.yyfinstate";
1169 sayln " list list,l,i0) =";
1170 if !UsesTrailingContext
1171 then say "\tlet fun action (i,nil,rs)"
1172 else say "\tlet fun action (i,nil)";
1173 sayln " = raise LexError";
1174 if !UsesTrailingContext
1175 then sayln "\t| action (i,nil::l,rs) = action(i-1,l,rs)"
1176 else sayln "\t| action (i,nil::l) = action (i-1,l)";
1177 if !UsesTrailingContext
1178 then sayln "\t| action (i,(node::acts)::l,rs) ="
1179 else sayln "\t| action (i,(node::acts)::l) =";
1180 sayln "\t\tcase node of";
1181 sayln "\t\t Internal.N yyk => ";
1182 sayln "\t\t\t(let val yytext = substring(!yyb,i0,i-i0)\n\
1183 \\t\t\t val yypos = i0+ !yygone";
1184 if !CountNewLines
1185 then (sayln "\t\t\tval _ = yylineno := CharVector.foldl";
1186 sayln "\t\t\t\t(fn (#\"\\n\", n) => n+1 | (_, n) => n) (!yylineno) yytext")
1187 else ();
1188 if !HaveReject
1189 then (say "\t\t\tfun REJECT() = action(i,acts::l";
1190 if !UsesTrailingContext
1191 then sayln ",rs)" else sayln ")")
1192 else ();
1193 sayln "\t\t\topen UserDeclarations Internal.StartStates";
1194 sayln " in (yybufpos := i; case yyk of ";
1195 sayln "";
1196 sayln "\t\t\t(* Application actions *)\n";
1197 makeaccept(ends);
1198 say "\n\t\t) end ";
1199 say ")\n\n";
1200 if (!UsesTrailingContext) then say skel_mid2 else ();
1201 sayln "\tval {fin,trans} = Vector.sub(Internal.tab, s)";
1202 sayln "\tval NewAcceptingLeaves = fin::AcceptingLeaves";
1203 sayln "\tin if l = !yybl then";
1204 sayln "\t if trans = #trans(Vector.sub(Internal.tab,0))";
1205 sayln "\t then action(l,NewAcceptingLeaves";
1206 if !UsesTrailingContext then say ",nil" else ();
1207 say ") else";
1208 sayln "\t let val newchars= if !yydone then \"\" else yyinput 1024";
1209 sayln "\t in if (size newchars)=0";
1210 sayln "\t\t then (yydone := true;";
1211 say "\t\t if (l=i0) then UserDeclarations.eof ";
1212 sayln (case !ArgCode of NONE => "()" | SOME _ => "yyarg");
1213 say "\t\t else action(l,NewAcceptingLeaves";
1214 if !UsesTrailingContext then
1215 sayln ",nil))" else sayln "))";
1216 sayln "\t\t else (if i0=l then yyb := newchars";
1217 sayln "\t\t else yyb := substring(!yyb,i0,l-i0)^newchars;";
1218 sayln "\t\t yygone := !yygone+i0;";
1219 sayln "\t\t yybl := size (!yyb);";
1220 sayln "\t\t scan (s,AcceptingLeaves,l-i0,0))";
1221 sayln "\t end";
1222 sayln "\t else let val NewChar = Char.ord(String.sub(!yyb,l))";
1223 say "\t\tval NewState = ";
1224 case (!CharFormat,!CharSetSize)
1225 of (true,129) => sayln "if NewChar<128 then Char.ord(String.sub(trans,NewChar)) else Char.ord(String.sub(trans,128))"
1226 | (true,256) => sayln "Char.ord(String.sub(trans,NewChar))"
1227 | (false,129) => sayln "if NewChar<128 then Vector.sub(trans, NewChar) else Vector.sub(trans, 128)"
1228 | (false,256) => sayln "Vector.sub(trans, NewChar)";
1229 say "\t\tin if NewState=0 then action(l,NewAcceptingLeaves";
1230 if !UsesTrailingContext then sayln ",nil)" else sayln ")";
1231 sayln "\t\telse scan(NewState,NewAcceptingLeaves,l+1,i0)";
1232 sayln "\tend";
1233 sayln "\tend";
1234 if !UsesPrevNewLine then () else sayln "(*";
1235 sayln "\tval start= if substring(!yyb,!yybufpos-1,1)=\"\\n\"";
1236 sayln "then !yybegin+1 else !yybegin";
1237 if !UsesPrevNewLine then () else sayln "*)";
1238 say "\tin scan(";
1239 if !UsesPrevNewLine then say "start"
1240 else say "!yybegin (* start *)";
1241 sayln ",nil,!yybufpos,!yybufpos)";
1242 sayln " end";
1243 sayln (case !ArgCode of NONE => "end" | SOME _ => "in continue end");
1244 sayln " in lex";
1245 sayln " end";
1246 sayln "end"
1247 end
1248
1249 in (UsesPrevNewLine := false;
1250 ResetFlags();
1251 LexBuf := make_ibuf(TextIO.openIn infile);
1252 NextTok := BOF;
1253 inquote := false;
1254 LexOut := TextIO.openOut(outfile);
1255 StateNum := 2;
1256 LineNum := 1;
1257 StateTab := enter(create(String.<=))("INITIAL",1);
1258 LeafNum := ~1;
1259 let
1260 val (user_code,rules,ends) =
1261 parse() handle x =>
1262 (close_ibuf(!LexBuf);
1263 TextIO.closeOut(!LexOut);
1264 raise x)
1265 val (fins,trans,tctab,tcpairs) = makedfa(rules)
1266 val _ = if !UsesTrailingContext then
1267 (close_ibuf(!LexBuf);
1268 TextIO.closeOut(!LexOut);
1269 prErr "lookahead is unimplemented")
1270 else ()
1271 in
1272 if (!HeaderDecl)
1273 then say (!HeaderCode)
1274 else say ("structure " ^ (!StrName));
1275 say "=\n";
1276 say skel_hd;
1277 say user_code;
1278 say "end (* end of user routines *)\n";
1279 say "exception LexError (* raised if illegal leaf ";
1280 say "action tried *)\n";
1281 say "structure Internal =\n\tstruct\n";
1282 maketable(fins,tctab,tcpairs,trans);
1283 say "structure StartStates =\n\tstruct\n";
1284 say "\tdatatype yystartstate = STARTSTATE of int\n";
1285 makebegin();
1286 say "\nend\n";
1287 say "type result = UserDeclarations.lexresult\n";
1288 say "\texception LexerError (* raised if illegal leaf ";
1289 say "action tried *)\n";
1290 say "end\n\n";
1291 say "fun makeLexer yyinput = \n";
1292 say "let \n";
1293 if !CountNewLines then say "\tval yylineno = ref 0\n\n" else ();
1294 say "\tval yyb = ref \"\\n\" \t\t(* buffer *)\n\
1295 \\tval yybl = ref 1\t\t(*buffer length *)\n\
1296 \\tval yybufpos = ref 1\t\t(* location of next character to use *)\n\
1297 \\tval yygone = ref 1\t\t(* position in file of beginning of buffer *)\n\
1298 \\tval yydone = ref false\t\t(* eof found yet? *)\n\
1299 \\tval yybegin = ref 1\t\t(*Current 'start state' for lexer *)\n\
1300 \\n\tval YYBEGIN = fn (Internal.StartStates.STARTSTATE x) =>\n\
1301 \\t\t yybegin := x\n\n";
1302 PrintLexer(ends);
1303 close_ibuf(!LexBuf);
1304 TextIO.closeOut(!LexOut)
1305 end)
1306 end
1307 end
1308
1309 structure Main : BMARK =
1310 struct
1311 val s = OS.FileSys.getDir()
1312 fun doit () = LexGen.lexGen (s^"/DATA/ml.lex");
1313 val doit =
1314 fn size =>
1315 let
1316 fun loop n =
1317 if n = 0
1318 then ()
1319 else (doit();
1320 loop(n-1))
1321 in loop size
1322 end
1323
1324 fun testit _ = LexGen.lexGen (s^"DATA/ml.lex")
1325 end (* Main *)