1 (* Modified by Matthew Fluet on
2011-06-17.
2 * Use simple file
name (rather than absolute paths
) in line directives
in output
.
4 (* Modified by Vesa Karvonen on
2007-12-19.
5 * Create line directives
in output
.
7 (* Modified by Matthew Fluet on
2007-11-07.
10 (* Modified by StephenWeeks on
2005-08-18.
11 * Fix file starting position
13 (* Modified by Stephen Weeks on
2004-10-19.
14 * Do not create references to Unsafe
structure.
16 (* Lexical analyzer generator for Standard ML
.
17 Version
1.7.0, June
1998
19 Copyright (c
) 1989-1992 by Andrew W
. Appel
,
20 David R
. Tarditi
, James S
. Mattson
22 This software comes
with ABSOLUTELY NO WARRANTY
.
23 This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY
24 COPYRIGHT NOTICE
, LICENSE AND DISCLAIMER
, (in the file
"COPYRIGHT",
25 distributed
with this software
). You may copy
and distribute this software
;
26 see the COPYRIGHT NOTICE for details
and restrictions
.
29 07/25/89 (drt
): added
%header declaration
, code to place
30 user declarations at same level
as makeLexer
, etc
.
31 This is needed for the parser generator
.
32 /10/89 (appel
): added
%arg
declaration (see lexgen
.doc
).
33 /04/90 (drt
): fixed following bug
: couldn
't use the lexer after an
34 error occurred
-- NextTok
and inquote weren
't being reset
35 10/22/91 (drt
): disabled use
of lookahead
36 10/23/92 (drt
): disabled use
of $
operator (which involves lookahead
),
37 added handlers for dictionary lookup routine
38 11/02/92 (drt
): changed handler for
exception Reject
in generated lexer
40 02/01/94 (appel
): Moved the
exception handler for Reject
in such
41 a way
as to allow tail
-recursion (improves performance
43 02/01/94 (appel
): Fixed a bug
in parsing
of state names
.
44 05/19/94 (Mikael Pettersson
, mpe@ida
.liu
.se
):
45 Transition tables are usually represented
as strings
, but
46 when the range is too large
, int vectors constructed by
47 code like
"Vector.vector[1,2,3,...]" are used instead
.
48 The problem
with this isn
't that the vector itself takes
49 a lot
of space
, but that the code generated by SML
/NJ to
50 construct the intermediate list at run
-time is
*HUGE
*. My
51 fix is to encode an
int vector
as a
string literal (using
52 two bytes per
int) and emit code to decode the
string to
53 a vector at run
-time
. SML
/NJ compiles
string literals into
54 substrings
in the code
, so this uses much less space
.
55 06/02/94 (jhr
): Modified export
-lex
.sml to conform to new installation
56 scheme
. Also removed tab characters from
string literals
.
57 10/05/94 (jhr
): Changed generator to produce code that uses the new
58 basis style strings
and characters
.
59 10/06/94 (jhr
) Modified code to compile under new basis style strings
61 02/08/95 (jhr
) Modified to use new
List module interface
.
62 05/18/95 (jhr
) changed
Vector.vector to
Vector.fromList
64 * Revision
1.9 1998/01/06 19:23:53 appel
65 * added
%posarg feature to permit position
-within
-file to be passed
66 * as a parameter to makeLexer
68 # Revision
1.8 1998/01/06 19:01:48 appel
69 # repaired error messages like
"cannot have both %structure and %header"
71 # Revision
1.7 1998/01/06 18:55:49 appel
72 # permit
%% to be unescaped within regular expressions
74 # Revision
1.6 1998/01/06 18:46:13 appel
75 # removed undocumented feature that permitted extra
%% at
end of rules
77 # Revision
1.5 1998/01/06 18:29:23 appel
78 # put yylineno variable inside makeLexer function
80 # Revision
1.4 1998/01/06 18:19:59 appel
81 # check for newline inside quoted
string
83 # Revision
1.3 1997/10/04 03:52:13 dbm
84 # Fix to remove output file
if ml
-lex fails
.
86 10/17/02 (jhr
) changed bad character error message to properly
87 print the bad character
.
88 10/17/02 (jhr
) fixed skipws to use
Char.isSpace test
.
89 07/27/05 (jhr
) add
\r as a recognized escape sequence
.
92 (* Subject
: lookahead
in sml
-lex
93 Reply
-to
: david
.tarditi@CS
.CMU
.EDU
94 Date
: Mon
, 21 Oct
91 14:13:26 -0400
96 There is a serious bug
in the implementation
of lookahead
,
97 as done
in sml
-lex
, and described
in Aho
, Sethi
, and Ullman
,
98 p
. 134 "Implementing the Lookahead Operator"
100 We have disallowed the use
of lookahead for now because
103 As a counter
-example to the implementation described
in
104 ASU
, consider the following specification
with the
105 input
string "aba" (this example is taken from
106 a comp
.compilers message from Dec
. 1989, I think
):
110 fun error x
= TextIO.output(TextIO.stdErr
, x ^
"\n")
111 val eof
= fn () => ()
115 (a|ab
)/ba
=> (print yytext
; print
"\n"; ());
117 The ASU proposal works
as follows
. Suppose that we are
118 using NFA
's to represent our regular expressions
. Then to
119 build an NFA for e1
/ e2
, we build an NFA n1 for e1
120 and an NFA n2 for e2
, and add an epsilon transition
123 When lexing
, when we encounter the
end state
of e1e2
,
124 we take
as the
end of the
string the position
in
125 the
string that was the last occurrence
of the state
of
126 the NFA having a transition on the epsilon introduced
129 Using the example we have above
, we
'll have an NFA
130 with the following states
:
133 1 -- a
--> 2 -- b
--> 3
137 |
------------> 4 -- b
--> 5 -- a
--> 6
139 On our example
, we get the following list
of transitions
:
141 a
: 2, 4 (make an epsilon transition from
2 to
4)
142 ab
: 3, 4, 5 (make an epsilon transition from
3 to
4)
145 If we chose the last state
in which we made an epsilon transition
,
146 we
'll chose the transition from
3 to
4, and end up
with "ab"
147 as our token
, when we should have
"a" as our token
.
151 functor RedBlack(B
: sig type key
152 val > : key
*key
->bool
157 val insert
: key
* tree
-> tree
158 val lookup
: key
* tree
-> key
159 exception notfound
of key
163 datatype color
= RED | BLACK
164 datatype tree
= empty | tree
of key
* color
* tree
* tree
165 exception notfound
of key
168 let fun f empty
= tree(key
,RED
,empty
,empty
)
169 |
f (tree(k
,BLACK
,l
,r
)) =
172 of r
as tree(rk
,RED
, rl
as tree(rlk
,RED
,rll
,rlr
),rr
) =>
174 of tree(lk
,RED
,ll
,lr
) =>
175 tree(k
,RED
,tree(lk
,BLACK
,ll
,lr
),
176 tree(rk
,BLACK
,rl
,rr
))
177 | _
=> tree(rlk
,BLACK
,tree(k
,RED
,l
,rll
),
178 tree(rk
,RED
,rlr
,rr
)))
179 | r
as tree(rk
,RED
,rl
, rr
as tree(rrk
,RED
,rrl
,rrr
)) =>
181 of tree(lk
,RED
,ll
,lr
) =>
182 tree(k
,RED
,tree(lk
,BLACK
,ll
,lr
),
183 tree(rk
,BLACK
,rl
,rr
))
184 | _
=> tree(rk
,BLACK
,tree(k
,RED
,l
,rl
),rr
))
185 | r
=> tree(k
,BLACK
,l
,r
)
188 of l
as tree(lk
,RED
,ll
, lr
as tree(lrk
,RED
,lrl
,lrr
)) =>
190 of tree(rk
,RED
,rl
,rr
) =>
191 tree(k
,RED
,tree(lk
,BLACK
,ll
,lr
),
192 tree(rk
,BLACK
,rl
,rr
))
193 | _
=> tree(lrk
,BLACK
,tree(lk
,RED
,ll
,lrl
),
195 | l
as tree(lk
,RED
, ll
as tree(llk
,RED
,lll
,llr
), lr
) =>
197 of tree(rk
,RED
,rl
,rr
) =>
198 tree(k
,RED
,tree(lk
,BLACK
,ll
,lr
),
199 tree(rk
,BLACK
,rl
,rr
))
200 | _
=> tree(lk
,BLACK
,ll
,tree(k
,RED
,lr
,r
)))
201 | l
=> tree(k
,BLACK
,l
,r
)
202 else tree(key
,BLACK
,l
,r
)
203 |
f (tree(k
,RED
,l
,r
)) =
204 if key
>k
then tree(k
,RED
,l
, f r
)
205 else if k
>key
then tree(k
,RED
, f l
, r
)
206 else tree(key
,RED
,l
,r
)
208 of tree(k
,RED
, l
as tree(_
,RED
,_
,_
), r
) => tree(k
,BLACK
,l
,r
)
209 |
tree(k
,RED
, l
, r
as tree(_
,RED
,_
,_
)) => tree(k
,BLACK
,l
,r
)
215 let fun look empty
= raise (notfound key
)
216 |
look (tree(k
,_
,l
,r
)) =
218 else if key
>k
then look r
227 val lexGen
: string -> unit
230 structure LexGen
: LEXGEN
=
235 type pos
= {line
: int, col
: int}
237 datatype token
= CHARS
of bool array | QMARK | STAR | PLUS | BAR
238 | LP | RP | CARAT | DOLLAR | SLASH | STATE
of string list
239 | REPS
of int * int | ID
of string | ACTION
of pos
* string
240 | BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES
241 | COUNT | REJECT | FULLCHARSET | STRUCT | HEADER | ARG | POSARG
244 datatype exp
= EPS | CLASS
of bool array
* int | CLOSURE
of exp
245 | ALT
of exp
* exp | CAT
of exp
* exp | TRAIL
of int
248 (* flags describing input Lex spec
. - unnecessary code is omitted
*)
251 val CharFormat
= ref
false;
252 val UsesTrailingContext
= ref
false;
253 val UsesPrevNewLine
= ref
false;
255 (* flags for various bells
& whistles that Lex has
. These slow the
256 lexer down
and should be omitted from production
lexers (if you
257 really want speed
) *)
259 val CountNewLines
= ref
false;
260 val PosArg
= ref
false;
261 val HaveReject
= ref
false;
263 (* Can increase size
of character set
*)
265 val CharSetSize
= ref
129;
267 (* Can name
structure or declare header code
*)
269 val StrName
= ref
"Mlex"
270 val HeaderCode
= ref
""
271 val HeaderPos
= ref
{line
= 0, col
= 0}
272 val HeaderDecl
= ref
false
273 val ArgCode
= ref (NONE
: (pos
* string) option
)
274 val StrDecl
= ref
false
276 (* Can define INTEGER
structure for yypos variable
. *)
277 val PosIntName
= ref
"Int"
278 val PosIntDecl
= ref
false
280 val ResetFlags
= fn () => (CountNewLines
:= false; HaveReject
:= false;
282 UsesTrailingContext
:= false;
283 CharSetSize
:= 129; StrName
:= "Mlex";
284 HeaderCode
:= ""; HeaderDecl
:= false;
287 PosIntName
:= "Int"; PosIntDecl
:= false)
289 val LexOut
= ref(TextIO.stdOut
)
290 val LexOutLine
= ref
1
291 fun setLexOut s
= (LexOut
:= s
; LexOutLine
:= 1)
293 (TextIO.output (!LexOut
, x
)
295 (fn #
"\n" => LexOutLine
:= !LexOutLine
+ 1 | _
=> ())
299 fun fmtLineDir
{line
, col
} file
=
300 String.concat
["(*#line ", Int.toString line
, ".", Int.toString (col
+1),
303 fn SOME pos
=> say (fmtLineDir
pos (!InFile
))
304 | NONE
=> (say (fmtLineDir
{line
= !LexOutLine
, col
= 0} (!OutFile
));
307 (* Union
: merge two sorted lists
of integers
*)
309 fun union(a
,b
) = let val rec merge
= fn
311 |
(nil
,el
::more
,z
) => merge(nil
,more
,el
::z
)
312 |
(el
::more
,nil
,z
) => merge(more
,nil
,el
::z
)
313 |
(x
::morex
,y
::morey
,z
) => if (x
:int)=(y
:int)
314 then merge(morex
,morey
,x
::z
)
315 else if x
>y
then merge(morex
,y
::morey
,x
::z
)
316 else merge(x
::morex
,morey
,y
::z
)
317 in merge(rev a
,rev b
,nil
)
320 (* Nullable
: compute
if a important expression parse tree node is nullable
*)
322 val rec nullable
= fn
326 |
ALT(n1
,n2
) => nullable(n1
) orelse nullable(n2
)
327 |
CAT(n1
,n2
) => nullable(n1
) andalso nullable(n2
)
331 (* FIRSTPOS
: firstpos function for parse tree expressions
*)
336 |
CLOSURE(n
) => firstpos(n
)
337 |
ALT(n1
,n2
) => union(firstpos(n1
),firstpos(n2
))
338 |
CAT(n1
,n2
) => if nullable(n1
) then union(firstpos(n1
),firstpos(n2
))
343 (* LASTPOS
: Lastpos function for parse tree expressions
*)
348 |
CLOSURE(n
) => lastpos(n
)
349 |
ALT(n1
,n2
) => union(lastpos(n1
),lastpos(n2
))
350 |
CAT(n1
,n2
) => if nullable(n2
) then union(lastpos(n1
),lastpos(n2
))
356 (* ++: Increment an integer reference
*)
358 fun ++(x
) : int = (x
:= !x
+ 1; !x
);
362 type 'a relation
= 'a
* 'a
-> bool
363 abstype ('b
,'a
) dictionary
= DATA
of { Table
: ('b
* 'a
) list
,
364 Leq
: 'b
* 'b
-> bool }
367 fun create Leqfunc
= DATA
{ Table
= nil
, Leq
= Leqfunc
}
368 fun lookup (DATA
{ Table
= entrylist
, Leq
= leq
}) key
=
369 let fun search
[] = raise LOOKUP
370 |
search((k
,item
)::entries
) =
372 then if leq(k
,key
) then item
else raise LOOKUP
376 fun enter (DATA
{ Table
= entrylist
, Leq
= leq
})
377 (newentry
as (key
: 'b
,item
:'a
)) : ('b
,'a
) dictionary
=
378 let val gt
= fn a
=> fn b
=> not (leq(a
,b
))
379 val eq
= fn k
=> fn k
' => (leq(k
,k
')) andalso (leq(k
',k
))
380 fun update nil
= [ newentry
]
381 |
update ((entry
as (k
,_
))::entries
) =
382 if (eq key k
) then newentry
::entries
383 else if gt k key
then newentry
::(entry
::entries
)
384 else entry
::(update entries
)
385 in DATA
{ Table
= update entrylist
, Leq
= leq
}
387 fun listofdict (DATA
{ Table
= entrylist
,Leq
= leq
}) =
388 let fun f (nil
,r
) = rev r
389 |
f (a
::b
,r
) = f (b
,a
::r
)
397 (* INPUT
.ML
: Input w
/ one character push back capability
*)
402 BUF
of TextIO.instream
* {b
: string ref
, p
: int ref
}
406 val linePos
= ref
0 (* incorrect after ungetch newline
, non fatal
*)
408 fun resetLexPos () = (LineNum
:= 1; pos
:= 0; linePos
:=0)
409 fun getLexPos () = {line
= !LineNum
, col
= !pos
- !linePos
}
410 fun make_ibuf(s
) = BUF (s
, {b
=ref
"", p
= ref
0})
411 fun close_ibuf (BUF (s
,_
)) = TextIO.closeIn(s
)
413 fun getch (a
as (BUF(s
,{b
,p
}))) =
414 if (!p
= (size (!b
)))
415 then (b
:= TextIO.inputN(s
, 1024);
420 else (let val ch
= String.sub(!b
,!p
)
423 then (LineNum
:= !LineNum
+ 1;
429 fun ungetch(BUF(s
,{b
,p
})) = (
432 if String.sub(!b
,!p
) = #
"\n"
433 then LineNum
:= !LineNum
- 1
441 TextIO.output (TextIO.stdErr
, String.concat
[
442 "ml-lex: error, line ", (Int.toString (!LineNum
)), ": ", x
, "\n"
446 TextIO.output (TextIO.stdErr
, String.concat
[
447 "ml-lex: syntax error, line ", (Int.toString (!LineNum
)), ": ", x
, "\n"
451 exception SyntaxError
; (* error
in user
's input file
*)
453 exception LexError
; (* unexpected error
in lexer
*)
455 val LexBuf
= ref(make_ibuf(TextIO.stdIn
));
456 val LexState
= ref
0;
457 val NextTok
= ref BOF
;
458 val inquote
= ref
false;
460 fun AdvanceTok () : unit
= let
462 ((c
>= #
"a") andalso (c
<= #
"z")) orelse
463 ((c
>= #
"A") andalso (c
<= #
"Z"))
464 fun isDigit c
= (c
>= #
"0") andalso (c
<= #
"9")
465 (* check for
valid (non
-leading
) identifier
character (added by JHR
) *)
467 ((isLetter c
) orelse (isDigit c
) orelse (c
= #
"_") orelse (c
= #
"'"))
469 fun num (c
::r
, n
) = if isDigit c
470 then num (r
, 10*n
+ (Char.ord c
- Char.ord #
"0"))
477 fun skipws () = let val ch
= nextch()
484 and nextch () = getch(!LexBuf
)
486 and escaped () = (case nextch()
493 fun err t
= prErr("illegal ascii escape '"^
(implode(rev t
))^
"'")
494 fun cvt c
= (Char.ord c
- Char.ord #
"0")
495 fun f (n
, c
, t
) = if c
=3
496 then if n
>= (!CharSetSize
)
499 else let val ch
=nextch()
502 then f(n
*10+(cvt ch
), c
+1, ch
::t
)
506 if isDigit x
then f(cvt x
, 1, [x
]) else x
510 and onechar x
= let val c
= Array
.array(!CharSetSize
, false)
512 Array
.update(c
, Char.ord(x
), true); CHARS(c
)
515 in case !LexState
of 0 => let val makeTok
= fn () =>
517 (* Lex
% operators
*)
518 of #
"%" => (case nextch() of
522 in if isLetter a
then f(a
::s
)
523 else (ungetch(!LexBuf
);
527 of "reject" => REJECT
529 |
"full" => FULLCHARSET
532 |
"structure" => STRUCT
537 | _
=> prErr
"unknown % operator "
540 (* semicolon (for
end of LEXSTATES
) *)
543 | ch
=> if isLetter(ch
) then
544 let fun getID matched
=
547 in if isLetter(x
) orelse isDigit(x
) orelse
548 x
= "_" orelse x
= "'"
551 then getID (x
::matched
)
552 else (ungetch(!LexBuf
); implode(rev matched
))
556 else prSynErr (String.concat
[
557 "bad character: \"", Char.toString ch
, "\""
559 in NextTok
:= makeTok()
561 |
1 => let val rec makeTok
= fn () =>
562 if !inquote
then case nextch() of
563 (* inside quoted
string *)
564 #
"\\" => onechar(escaped())
565 | #
"\"" => (inquote
:= false; makeTok())
566 | #
"\n" => (prSynErr
"end-of-line inside quoted string";
567 inquote
:= false; makeTok())
569 else case skipws() of
570 (* single character operators
*)
581 | #
"." => let val c
= Array
.array(!CharSetSize
,true) in
582 Array
.update(c
,10,false); CHARS(c
)
584 (* assign
and arrow
*)
585 | #
"=" => let val c
= nextch() in
586 if c
= #
">" then ARROW
else (ungetch(!LexBuf
); ASSIGN
)
589 | #
"[" => let val rec classch
= fn () => let val x
= skipws()
590 in if x
= #
"\\" then escaped() else x
592 val first
= classch();
593 val flag
= (first
<> #
"^");
594 val c
= Array
.array(!CharSetSize
,not flag
);
596 |
add (SOME x
) = Array
.update(c
, Char.ord(x
), flag
)
597 and range (x
, y
) = if x
>y
598 then (prErr
"bad char. range")
600 val i
= ref(Char.ord(x
)) and j
= Char.ord(y
)
602 add (SOME(Char.chr(!i
)));
605 and getClass last
= (case classch()
606 of #
"]" => (add(last
); c
)
608 of NONE
=> getClass(SOME #
"-")
609 |
(SOME last
') => let val x
= classch()
612 then (add(last
); add(SOME #
"-"); c
)
613 else (range(last
',x
); getClass(NONE
))
616 | x
=> (add(last
); getClass(SOME x
))
618 in CHARS(getClass(if first
= #
"^" then NONE
else SOME first
))
620 (* Start States specification
*)
621 | #
"<" => let val rec get_state
= fn (prev
,matched
) =>
623 #
">" => matched
::prev
624 | #
"," => get_state(matched
::prev
,"")
625 | x
=> if isIdentChr(x
)
626 then get_state(prev
,matched ^
String.str x
)
627 else (prSynErr
"bad start state list")
628 in STATE(get_state(nil
,""))
630 (* {id
} or repititions
*)
631 | #
"{" => let val ch
= nextch() in if isLetter(ch
) then
632 let fun getID matched
= (case nextch()
634 | x
=> if (isIdentChr x
) then
635 getID(matched ^
String.str x
)
636 else (prErr
"invalid char. class name")
638 in ID(getID(String.str ch
))
640 else if isDigit(ch
) then
641 let fun get_r (matched
, r1
) = (case nextch()
642 of #
"}" => let val n
= atoi(matched
) in
643 if r1
= ~
1 then (n
,n
) else (r1
,n
)
645 | #
"," => if r1
= ~
1 then get_r("",atoi(matched
))
646 else (prErr
"invalid repetitions spec.")
648 then get_r(matched ^
String.str x
,r1
)
649 else (prErr
"invalid char in repetitions spec")
651 in REPS(get_r(String.str ch
,~
1))
653 else (prErr
"bad repetitions spec")
655 (* Lex
% operators
*)
656 | #
"\\" => onechar(escaped())
657 (* start quoted
string *)
658 | #
"\"" => (inquote
:= true; makeTok())
661 in NextTok
:= makeTok()
667 fun loop_to_end (backslash
, x
) =
669 val c
= getch (! LexBuf
)
670 val notb
= not backslash
674 #
"\"" => if notb
then nstr
675 else loop_to_end (false, nstr
)
676 | _
=> loop_to_end (c
= #
"\\" andalso notb
, nstr
)
678 fun GetAct (lpct
, x
) =
680 val c
= getch (! LexBuf
)
684 #
"\"" => GetAct (lpct
, loop_to_end (false, nstr
))
685 | #
"(" => GetAct (lpct
+ 1, nstr
)
686 | #
")" => if lpct
= 0 then implode (rev x
)
687 else GetAct(lpct
- 1, nstr
)
688 | _
=> GetAct(lpct
, nstr
)
691 ACTION (getLexPos (), GetAct (0,nil
))
694 | c
=> (prSynErr ("invalid character " ^
String.str c
)))
695 | _
=> raise LexError
697 handle eof
=> NextTok
:= EOF
;
699 fun GetTok (_
:unit
) : token
=
700 let val t
= !NextTok
in AdvanceTok(); t
702 val SymTab
= ref (create
String.<=) : (string,exp
) dictionary ref
704 fun GetExp () : exp
=
706 let val rec optional
= fn e
=> ALT(EPS
,e
)
708 and lookup
' = fn name
=>
710 handle LOOKUP
=> prErr ("bad regular expression name: "^
713 and newline
= fn () => let val c
= Array
.array(!CharSetSize
,false) in
714 Array
.update(c
,10,true); c
717 and endline
= fn e
=> trail(e
,CLASS(newline(),0))
719 and trail
= fn (e1
,e2
) => CAT(CAT(e1
,TRAIL(0)),e2
)
721 and closure1
= fn e
=> CAT(e
,CLOSURE(e
))
723 and repeat
= fn (min
,max
,e
) => let val rec rep
= fn
725 |
(0,1) => ALT(e
,EPS
)
726 |
(0,i
) => CAT(rep(0,1),rep(0,i
-1))
727 |
(i
,j
) => CAT(e
,rep(i
-1,j
-1))
731 and exp0
= fn () => case GetTok() of
732 CHARS(c
) => exp1(CLASS(c
,0))
733 | LP
=> let val e
= exp0() in
734 if !NextTok
= RP
then
735 (AdvanceTok(); exp1(e
))
736 else (prSynErr
"missing ')'") end
737 |
ID(name
) => exp1(lookup
' name
)
738 | _
=> raise SyntaxError
740 and exp1
= fn (e
) => case !NextTok
of
744 | LP
=> exp2(e
,exp0())
746 | t
=> (AdvanceTok(); case t
of
747 QMARK
=> exp1(optional(e
))
748 | STAR
=> exp1(CLOSURE(e
))
749 | PLUS
=> exp1(closure1(e
))
750 |
CHARS(c
) => exp2(e
,CLASS(c
,0))
751 | BAR
=> ALT(e
,exp0())
752 | DOLLAR
=> (UsesTrailingContext
:= true; endline(e
))
753 | SLASH
=> (UsesTrailingContext
:= true;
755 |
REPS(i
,j
) => exp1(repeat(i
,j
,e
))
756 |
ID(name
) => exp2(e
,lookup
' name
)
757 | _
=> raise SyntaxError
)
759 and exp2
= fn (e1
,e2
) => case !NextTok
of
761 | ARROW
=> CAT(e1
,e2
)
763 | LP
=> exp2(CAT(e1
,e2
),exp0())
765 | t
=> (AdvanceTok(); case t
of
766 QMARK
=> exp1(CAT(e1
,optional(e2
)))
767 | STAR
=> exp1(CAT(e1
,CLOSURE(e2
)))
768 | PLUS
=> exp1(CAT(e1
,closure1(e2
)))
769 |
CHARS(c
) => exp2(CAT(e1
,e2
),CLASS(c
,0))
770 | BAR
=> ALT(CAT(e1
,e2
),exp0())
771 | DOLLAR
=> (UsesTrailingContext
:= true;
773 | SLASH
=> (UsesTrailingContext
:= true;
774 trail(CAT(e1
,e2
),exp0()))
775 |
REPS(i
,j
) => exp1(CAT(e1
,repeat(i
,j
,e2
)))
776 |
ID(name
) => exp2(CAT(e1
,e2
),lookup
' name
)
777 | _
=> raise SyntaxError
)
780 val StateTab
= ref(create(String.<=)) : (string,int) dictionary ref
782 val StateNum
= ref
0;
784 fun GetStates () : int list
=
786 let fun add nil sl
= sl
787 |
add (x
::y
) sl
= add
y (union ([lookup (!StateTab
)(x
)
789 prErr ("bad state name: "^x
)
793 if i
<= !StateNum
then addall (i
+2) (union ([i
],sl
))
796 fun incall (x
::y
) = (x
+1)::incall y
799 fun addincs nil
= nil
800 |
addincs (x
::y
) = x
::(x
+1)::addincs y
804 STATE s
=> (AdvanceTok(); LexState
:= 1; add s nil
)
808 of CARAT
=> (LexState
:= 1; AdvanceTok(); UsesPrevNewLine
:= true;
810 | _
=> addincs state_list
813 val LeafNum
= ref ~
1;
815 fun renum(e
: exp
) : exp
=
816 let val rec label
= fn
818 |
CLASS(x
,_
) => CLASS(x
,++LeafNum
)
819 |
CLOSURE(e
) => CLOSURE(label(e
))
820 |
ALT(e1
,e2
) => ALT(label(e1
),label(e2
))
821 |
CAT(e1
,e2
) => CAT(label(e1
),label(e2
))
822 |
TRAIL(i
) => TRAIL(++LeafNum
)
823 |
END(i
) => END(++LeafNum
)
827 exception ParseError
;
829 fun parse() : (string * (int list
* exp
) list
* ((string,pos
*string) dictionary
)) =
830 let val Accept
= ref (create
String.<=) : (string,pos
*string) dictionary ref
831 val rec ParseRtns
= fn l
=> case getch(!LexBuf
) of
832 #
"%" => let val c
= getch(!LexBuf
) in
833 if c
= #
"%" then (implode (rev l
))
834 else ParseRtns(c
:: #
"%" :: l
)
836 | c
=> ParseRtns(c
::l
)
837 and ParseDefs
= fn () =>
838 (LexState
:=0; AdvanceTok(); case !NextTok
of
841 let fun f () = (case !NextTok
of (ID i
) =>
842 (StateTab
:= enter(!StateTab
)(i
,++StateNum
);
843 ++StateNum
; AdvanceTok(); f())
845 in AdvanceTok(); f ();
846 if !NextTok
=SEMI
then ParseDefs() else
847 (prSynErr
"expected ';'")
849 | ID x
=> (LexState
:=1; AdvanceTok(); if GetTok() = ASSIGN
850 then (SymTab
:= enter(!SymTab
)(x
,GetExp());
851 if !NextTok
= SEMI
then ParseDefs()
852 else (prSynErr
"expected ';'"))
853 else raise SyntaxError
)
854 | REJECT
=> (HaveReject
:= true; ParseDefs())
855 | COUNT
=> (CountNewLines
:= true; ParseDefs())
856 | FULLCHARSET
=> (CharSetSize
:= 256; ParseDefs())
857 | HEADER
=> (LexState
:= 2; AdvanceTok();
861 (prErr
"cannot have both %structure and %header \
863 else if (!HeaderDecl
) then
864 (prErr
"duplicate %header declarations")
866 (HeaderCode
:= s
; LexState
:= 0;
868 HeaderDecl
:= true; ParseDefs())
869 | _
=> raise SyntaxError
)
870 | POSARG
=> (PosArg
:= true; ParseDefs())
871 | POSINT
=> (AdvanceTok();
874 if (!PosIntDecl
) then
875 (prErr
"duplicate %posint declarations")
876 else (PosIntName
:= i
; PosIntDecl
:= true)
877 | _
=> (prErr
"expected ID");
879 | ARG
=> (LexState
:= 2; AdvanceTok();
883 of SOME _
=> prErr
"duplicate %arg declarations"
884 | NONE
=> ArgCode
:= SOME s
;
887 | _
=> raise SyntaxError
)
888 | STRUCT
=> (AdvanceTok();
891 if (!HeaderDecl
) then
892 (prErr
"cannot have both %structure and %header \
894 else if (!StrDecl
) then
895 (prErr
"duplicate %structure declarations")
896 else (StrName
:= i
; StrDecl
:= true)
897 | _
=> (prErr
"expected ID");
899 | _
=> raise SyntaxError
)
901 fn rules
=> (LexState
:=1; AdvanceTok(); case !NextTok
of
904 let val s
= GetStates()
905 val e
= renum(CAT(GetExp(),END(0)))
907 if !NextTok
= ARROW
then
908 (LexState
:=2; AdvanceTok();
909 case GetTok() of ACTION(act
) =>
910 if !NextTok
=SEMI
then
911 (Accept
:=enter(!Accept
) (Int.toString (!LeafNum
),act
);
912 ParseRules((s
,e
)::rules
))
913 else (prSynErr
"expected ';'")
914 | _
=> raise SyntaxError
)
915 else (prSynErr
"expected '=>'")
917 in let val usercode
= ParseRtns nil
918 in (ParseDefs(); (usercode
,ParseRules(nil
),!Accept
))
920 end handle SyntaxError
=> (prSynErr
"")
922 fun makebegin () : unit
=
923 let fun make nil
= ()
924 |
make ((x
,n
:int)::y
)=(say
"val "; say x
; say
" = " ;
926 say (Int.toString n
); say
";\n"; make y
)
927 in say
"\n(* start state definitions *)\n\n"; make(listofdict(!StateTab
))
933 type key
= int list
* string
934 fun > ((key
,item
:string),(key
',item
')) =
935 let fun f ((a
:int)::a
') (b
::b
') = if Int.> (a
,b
) then true
936 else if a
=b
then f a
' b
'
943 structure RB
= RedBlack(L
)
945 fun maketable (fins
:(int * (int list
)) list
,
946 tcs
:(int * (int list
)) list
,
947 tcpairs
: (int * int) list
,
948 trans
: (int*(int list
)) list
) : unit
=
950 (* Fins
= (state #
, list
of final leaves for the state
) list
951 tcs
= (state #
, list
of trailing context leaves which begin
in this state
)
953 tcpairs
= (trailing context leaf
, end leaf
) list
954 trans
= (state #
,list
of transitions for state
) list
*)
956 let datatype elem
= N
of int | T
of int | D
of int
958 val _
= (if length(trans
)<256 then CharFormat
:= true
959 else CharFormat
:= false;
960 if !UsesTrailingContext
then
961 (say
"\ndatatype yyfinstate = N of int | \
962 \ T of int | D of int\n")
963 else say
"\ndatatype yyfinstate = N of int";
964 say
"\ntype statedata = {fin : yyfinstate list, trans: ";
966 true => say
"string}"
967 |
false => say
"int Vector.vector}";
968 say
"\n(* transition & final state table *)\nval tab = let\n";
972 (say
"fun decode s k =\n";
973 say
" let val k' = k + k\n";
974 say
" val hi = Char.ord(String.sub(s, k'))\n";
975 say
" val lo = Char.ord(String.sub(s, k' + 1))\n";
976 say
" in hi * 256 + lo end\n"))
979 let fun IsEndLeaf t
=
980 let fun f ((l
,e
)::r
) = if (e
=t
) then true else f r
981 | f nil
= false in f tcpairs
end
984 let fun f ((tl
,el
)::r
) = if (tl
=t
) then el
else f r
988 fun GetTrConLeaves s
=
989 let fun f ((s
',l
)::r
) = if (s
= s
') then l
else f r
994 let fun insert (x
:int) (a
::b
) =
995 if (x
<= a
) then x
::(a
::b
)
998 in List.foldr (fn (x
,r
) => insert x r
) [] s
1000 fun conv a
= if (IsEndLeaf a
) then (D a
) else (N a
)
1001 fun merge (a
::a
',b
::b
') =
1002 if (a
<= b
) then (conv a
)::merge(a
',b
::b
')
1003 else (T b
)::(merge(a
::a
',b
'))
1004 |
merge (a
::a
',nil
) = (conv a
)::(merge (a
',nil
))
1005 |
merge (nil
,b
::b
') = (T b
)::(merge (b
',nil
))
1006 |
merge (nil
,nil
) = nil
1010 sort_leaves (map (fn x
=> GetEndLeaf x
) (GetTrConLeaves x
)))))
1017 let fun emit8(x
, pos
) =
1018 let val s
= StringCvt.padLeft #
"0" 3 (Int.toString x
)
1021 of 16 => (say
"\\\n\\\\"; say s
; 1)
1022 | _
=> (say
"\\"; say s
; pos
+1)
1024 fun emit16(x
, pos
) =
1025 let val hi8
= x
div 256
1026 val lo8
= x
- hi8
* 256 (* x rem
256 *)
1028 emit8(lo8
, emit8(hi8
, pos
))
1030 fun MakeString([], _
, _
) = ()
1031 |
MakeString(x
::xs
, emitter
, pos
) =
1032 MakeString(xs
, emitter
, emitter(x
, pos
))
1033 in case !CharFormat
of
1034 true => (say
" \n\""; MakeString(x
,emit8
,0); say
"\"\n")
1035 |
false => (say (Int.toString(length x
));
1036 say
", \n\""; MakeString(x
,emit16
,0); say
"\"\n")
1039 fun makeEntry(nil
,rs
,t
) = rev rs
1040 |
makeEntry(((l
:int,x
)::y
),rs
,t
) =
1041 let val name
= (Int.toString l
)
1042 in let val (r
,n
) = lookup ((x
,name
),t
)
1043 in makeEntry(y
,(n
::rs
),t
)
1044 end handle notfound _
=>
1046 say
" ("; say name
; say
",";
1047 makeItems x
; say
"),\n";
1048 makeEntry(y
,(name
::rs
),(insert ((x
,name
),t
))))
1051 val _
= say
"val s = [ \n"
1052 val res
= makeEntry(trans
,nil
,empty
)
1055 of true => (say
"(0, \"\")]\n"; say
"fun f x = x \n")
1056 |
false => (say
"(0, 0, \"\")]\n";
1057 say
"fun f(n, i, x) = (n, Vector.tabulate(i, decode x)) \n")
1059 val _
= say
"val s = List.map f (List.rev (tl (List.rev s))) \n"
1060 val _
= say
"exception LexHackingError \n"
1061 val _
= say
"fun look ((j,x)::r, i: int) = if i = j then x else look(r, i) \n"
1062 val _
= say
" | look ([], i) = raise LexHackingError\n"
1064 val _
= say
"fun g {fin=x, trans=i} = {fin=x, trans=look(s,i)} \n"
1068 fun makeTable args
= let
1069 fun makeOne (a
, b
) = let
1070 fun item (N i
) = ("N", i
)
1071 |
item (T i
) = ("T", i
)
1072 |
item (D i
) = ("D", i
)
1073 fun makeItem x
= let
1076 app say
["(", t
, " ", Int.toString n
, ")"]
1078 fun makeItems
[] = ()
1079 | makeItems
[x
] = makeItem x
1080 |
makeItems (hd
:: tl
) =
1081 (makeItem hd
; say
","; makeItems tl
)
1085 app say
["], trans = ", a
, "}"]
1087 fun mt ([], []) = ()
1088 |
mt ([a
], [b
]) = makeOne (a
, b
)
1089 |
mt (a
:: a
', b
:: b
') =
1090 (makeOne (a
, b
); say
",\n"; mt (a
', b
'))
1091 | mt _
= raise Match
1097 fun makeTable(nil
,nil
) = ()
1098 |
makeTable(a
::a
',b
::b
') =
1099 let fun makeItems nil
= ()
1100 |
makeItems (hd
::tl
) =
1104 |
(T i
) => ("(T ",i
)
1105 |
(D i
) => ("(D ",i
)
1106 in (say t
; say (Int.toString n
); say
")";
1109 else (say
","; makeItems tl
))
1111 in (say
"{fin = ["; makeItems b
;
1112 say
"], trans = "; say a
; say
"}";
1115 else (say
",\n"; makeTable(a
',b
')))
1119 fun msg x
= TextIO.output(TextIO.stdOut
, x
)
1121 in (say
"in Vector.fromList(List.map g \n["; makeTable(rs
,newfins
);
1123 msg ("\nNumber of states = " ^
(Int.toString (length trans
)));
1124 msg ("\nNumber of distinct rows = " ^
(Int.toString (!count
)));
1125 msg ("\nApprox. memory size of trans. table = " ^
1126 (Int.toString (!count
*(!CharSetSize
)*(if !CharFormat
then 1 else 8))));
1130 (* makeaccept
: Takes
a (string,string) dictionary
, prints
case statement for
1131 accepting leaf actions
. The key strings are the leaf #
's
, the data strings
1134 fun makeaccept ends
=
1135 let fun startline f
= if f
then say
" " else say
"| "
1136 fun make(nil
,f
) = (startline f
; say
"_ => raise Internal.LexerError\n")
1137 |
make((x
,(p
,a
))::y
,f
) = (startline f
; say x
; say
" => ";
1138 if Substring
.size(#
2 (Substring
.position
"yytext" (Substring
.full a
))) = 0
1140 (say
"("; sayPos (SOME p
); say a
; sayPos NONE
; say
")")
1141 else (say
"let val yytext=yymktext() in ";
1142 sayPos (SOME p
); say a
; sayPos NONE
; say
" end");
1143 say
"\n"; make(y
,false))
1144 in make (listofdict(ends
),true)
1147 fun leafdata(e
:(int list
* exp
) list
) =
1148 let val fp
= Array
.array(!LeafNum
+ 1,nil
)
1149 and leaf
= Array
.array(!LeafNum
+ 1,EPS
)
1150 and tcpairs
= ref nil
1151 and trailmark
= ref ~
1;
1154 |
(hd
::tl
,x
) => (Array
.update(fp
,hd
,union(fp sub hd
,x
));
1158 (moredata(e1
); add(lastpos(e1
),firstpos(e1
)))
1159 |
ALT(e1
,e2
) => (moredata(e1
); moredata(e2
))
1160 |
CAT(e1
,e2
) => (moredata(e1
); moredata(e2
);
1161 add(lastpos(e1
),firstpos(e2
)))
1162 |
CLASS(x
,i
) => Array
.update(leaf
,i
,CLASS(x
,i
))
1163 |
TRAIL(i
) => (Array
.update(leaf
,i
,TRAIL(i
)); if !trailmark
= ~
1
1164 then trailmark
:= i
else ())
1165 |
END(i
) => (Array
.update(leaf
,i
,END(i
)); if !trailmark
<> ~
1
1166 then (tcpairs
:= (!trailmark
,i
)::(!tcpairs
);
1167 trailmark
:= ~
1) else ())
1171 |
(_
,x
)::tl
=> (moredata(x
);makedata(tl
))
1172 in trailmark
:= ~
1; makedata(e
); (fp
,leaf
,!tcpairs
)
1175 fun makedfa(rules
) =
1176 let val StateTab
= ref (create(String.<=)) : (string,int) dictionary ref
1177 val fintab
= ref (create(Int.<=)) : (int,(int list
)) dictionary ref
1178 val transtab
= ref (create(Int.<=)) : (int,int list
) dictionary ref
1179 val tctab
= ref (create(Int.<=)) : (int,(int list
)) dictionary ref
1180 val (fp
, leaf
, tcpairs
) = leafdata(rules
);
1182 fun visit (state
,statenum
) =
1183 let val transitions
= gettrans(state
) in
1184 fintab
:= enter(!fintab
)(statenum
,getfin(state
));
1185 tctab
:= enter(!tctab
)(statenum
,gettc(state
));
1186 transtab
:= enter(!transtab
)(statenum
,transitions
)
1189 and visitstarts (states
) =
1190 let fun vs nil i
= ()
1191 |
vs (hd
::tl
) i
= (visit (hd
,i
); vs
tl (i
+1))
1195 and hashstate(s
: int list
) =
1198 |
((x
:int)::y
,z
) => hs(y
,z ^
" " ^
(Int.toString x
))
1202 and find(s
) = lookup(!StateTab
)(hashstate(s
))
1204 and add(s
,n
) = StateTab
:= enter(!StateTab
)(hashstate(s
),n
)
1206 and getstate (state
) =
1208 handle LOOKUP
=> let val n
= ++StateNum
in
1209 add(state
,n
); visit(state
,n
); n
1213 let fun f nil fins
= fins
1216 of END _
=> f
tl (hd
::fins
)
1222 let fun f nil fins
= fins
1225 of TRAIL _
=> f
tl (hd
::fins
)
1230 and gettrans (state
) =
1231 let fun loop c tlist
=
1232 let fun cktrans nil r
= r
1233 |
cktrans (hd
::tl
) r
=
1234 case (leaf sub hd
) of
1236 (if (i sub c
) then cktrans
tl (union(r
,fp sub hd
))
1237 else cktrans tl r
handle Subscript
=>
1242 let val v
=cktrans state nil
1243 in loop (c
-1) (if v
=nil
then 0::tlist
else (getstate v
)::tlist
)
1247 in loop ((!CharSetSize
) - 1) nil
1251 let val startarray
= Array
.array(!StateNum
+ 1, nil
);
1252 fun listofarray(a
,n
) =
1253 let fun f i l
= if i
>= 0 then f (i
-1) ((a sub i
)::l
) else l
1257 |
(startlist
,e
)::tl
=> (fix(startlist
,firstpos(e
));makess(tl
))
1260 |
(s
::tl
,firsts
) => (Array
.update(startarray
,s
,
1261 union(firsts
,startarray sub s
));
1263 in makess(rules
);listofarray(startarray
, !StateNum
+ 1)
1266 in visitstarts(startstates());
1267 (listofdict(!fintab
),listofdict(!transtab
),listofdict(!tctab
),tcpairs
)
1272 \ structure UserDeclarations =\n\
1277 " | Internal
.D k
=> action (i
,(acts
::l
),k
::rs
)\n\
1278 \ | Internal
.T k
=>\n\
1279 \
let fun f (a
::b
,r
) =\n\
1281 \
then action(i
,(((Internal
.N a
)::acts
)::l
),(b@r
))\n\
1282 \
else f (b
,a
::r
)\n\
1283 \ |
f (nil
,r
) = action(i
,(acts
::l
),rs
)\n\
1288 fun lexGen(infile
) =
1289 let val outfile
= infile ^
".sml"
1290 val () = (InFile
:= OS
.Path
.file infile
; OutFile
:= OS
.Path
.file outfile
)
1291 fun PrintLexer (ends
) =
1292 let val sayln
= fn x
=> (say x
; say
"\n")
1294 of NONE
=> (sayln
"fun lex () : Internal.result =";
1295 sayln
"let fun continue() = lex() in")
1297 (say
"fun lex "; say
"(yyarg as (";
1298 sayPos (SOME p
); say s
; sayPos NONE
; sayln
")) =";
1299 sayln
"let fun continue() : Internal.result = ");
1300 say
" let fun scan (s,AcceptingLeaves : Internal.yyfinstate";
1301 sayln
" list list,l,i0) =";
1302 if !UsesTrailingContext
1303 then say
"\tlet fun action (i,nil,rs)"
1304 else say
"\tlet fun action (i,nil)";
1305 sayln
" = raise LexError";
1306 if !UsesTrailingContext
1307 then sayln
"\t| action (i,nil::l,rs) = action(i-1,l,rs)"
1308 else sayln
"\t| action (i,nil::l) = action (i-1,l)";
1309 if !UsesTrailingContext
1310 then sayln
"\t| action (i,(node::acts)::l,rs) ="
1311 else sayln
"\t| action (i,(node::acts)::l) =";
1312 sayln
"\t\tcase node of";
1313 sayln
"\t\t Internal.N yyk => ";
1314 sayln
"\t\t\t(let fun yymktext() = String.substring(!yyb,i0,i-i0)\n\
1315 \\t\t\t val yypos = YYPosInt.+(YYPosInt.fromInt i0, !yygone)";
1317 then (sayln
"\t\t\tval _ = yylineno := CharVectorSlice.foldli";
1318 sayln
"\t\t\t\t(fn (_,#\"\\n\", n) => n+1 | (_,_, n) => n) (!yylineno) (CharVectorSlice.slice (!yyb,i0,SOME(i-i0)))")
1321 then (say
"\t\t\tfun REJECT() = action(i,acts::l";
1322 if !UsesTrailingContext
1323 then sayln
",rs)" else sayln
")")
1325 sayln
"\t\t\topen UserDeclarations Internal.StartStates";
1326 sayln
" in (yybufpos := i; case yyk of ";
1328 sayln
"\t\t\t(* Application actions *)\n";
1332 if (!UsesTrailingContext
) then say skel_mid2
else ();
1333 sayln
"\tval {fin,trans} = Vector.sub(Internal.tab, s)";
1334 sayln
"\tval NewAcceptingLeaves = fin::AcceptingLeaves";
1335 sayln
"\tin if l = !yybl then";
1336 sayln
"\t if trans = #trans(Vector.sub(Internal.tab,0))";
1337 sayln
"\t then action(l,NewAcceptingLeaves";
1338 if !UsesTrailingContext
then say
",nil" else ();
1340 sayln
"\t let val newchars= if !yydone then \"\" else yyinput 1024";
1341 sayln
"\t in if (String.size newchars)=0";
1342 sayln
"\t\t then (yydone := true;";
1343 say
"\t\t if (l=i0) then UserDeclarations.eof ";
1344 sayln (case !ArgCode
of NONE
=> "()" | SOME _
=> "yyarg");
1345 say
"\t\t else action(l,NewAcceptingLeaves";
1346 if !UsesTrailingContext
then
1347 sayln
",nil))" else sayln
"))";
1348 sayln
"\t\t else (if i0=l then yyb := newchars";
1349 sayln
"\t\t else yyb := String.substring(!yyb,i0,l-i0)^newchars;";
1350 sayln
"\t\t yygone := YYPosInt.+(!yygone, YYPosInt.fromInt i0);";
1351 sayln
"\t\t yybl := String.size (!yyb);";
1352 sayln
"\t\t scan (s,AcceptingLeaves,l-i0,0))";
1354 sayln
"\t else let val NewChar = Char.ord(CharVector.sub(!yyb,l))";
1356 then sayln
"\t\tval NewChar = if NewChar<128 then NewChar else 128"
1358 say
"\t\tval NewState = ";
1359 sayln (if !CharFormat
1360 then "Char.ord(CharVector.sub(trans,NewChar))"
1361 else "Vector.sub(trans, NewChar)");
1362 say
"\t\tin if NewState=0 then action(l,NewAcceptingLeaves";
1363 if !UsesTrailingContext
then sayln
",nil)" else sayln
")";
1364 sayln
"\t\telse scan(NewState,NewAcceptingLeaves,l+1,i0)";
1367 if !UsesPrevNewLine
then () else sayln
"(*";
1368 sayln
"\tval start= if String.substring(!yyb,!yybufpos-1,1)=\"\\n\"";
1369 sayln
"then !yybegin+1 else !yybegin";
1370 if !UsesPrevNewLine
then () else sayln
"*)";
1372 if !UsesPrevNewLine
then say
"start"
1373 else say
"!yybegin (* start *)";
1374 sayln
",nil,!yybufpos,!yybufpos)";
1376 sayln (case !ArgCode
of NONE
=> "end" | SOME _
=> "in continue end");
1382 in (UsesPrevNewLine
:= false;
1384 LexBuf
:= make_ibuf(TextIO.openIn infile
);
1387 setLexOut (TextIO.openOut(outfile
));
1390 StateTab
:= enter(create(String.<=))("INITIAL",1);
1393 val (user_code
,rules
,ends
) =
1395 (close_ibuf(!LexBuf
);
1396 TextIO.closeOut(!LexOut
);
1397 OS
.FileSys
.remove outfile
;
1399 val (fins
,trans
,tctab
,tcpairs
) = makedfa(rules
)
1400 val _
= if !UsesTrailingContext
then
1401 (close_ibuf(!LexBuf
);
1402 TextIO.closeOut(!LexOut
);
1403 OS
.FileSys
.remove outfile
;
1404 prErr
"lookahead is unimplemented")
1408 then (sayPos (SOME (!HeaderPos
))
1411 else say ("structure " ^
(!StrName
));
1414 sayPos (SOME
{line
= 1, col
= 0});
1417 say
"end (* end of user routines *)\n";
1418 say
"exception LexError (* raised if illegal leaf ";
1419 say
"action tried *)\n";
1420 say
"structure Internal =\n\tstruct\n";
1421 maketable(fins
,tctab
,tcpairs
,trans
);
1422 say
"structure StartStates =\n\tstruct\n";
1423 say
"\tdatatype yystartstate = STARTSTATE of int\n";
1426 say
"type result = UserDeclarations.lexresult\n";
1427 say
"\texception LexerError (* raised if illegal leaf ";
1428 say
"action tried *)\n";
1430 say ("structure YYPosInt : INTEGER = " ^
(!PosIntName
) ^
"\n");
1431 say (if (!PosArg
) then "fun makeLexer (yyinput,yygone0:YYPosInt.int) =\nlet\n"
1432 else "fun makeLexer yyinput =\nlet\tval yygone0= YYPosInt.fromInt ~1\n");
1433 if !CountNewLines
then say
"\tval yylineno = ref 0\n\n" else ();
1434 say
"\tval yyb = ref \"\\n\" \t\t(* buffer *)\n\
1435 \\tval yybl = ref 1\t\t(*buffer length *)\n\
1436 \\tval yybufpos = ref 1\t\t(* location of next character to use *)\n\
1437 \\tval yygone = ref yygone0\t(* position in file of beginning of buffer *)\n\
1438 \\tval yydone = ref false\t\t(* eof found yet? *)\n\
1439 \\tval yybegin = ref 1\t\t(*Current 'start state' for lexer *)\n\
1440 \\n\tval YYBEGIN = fn (Internal.StartStates.STARTSTATE x) =>\n\
1441 \\t\t yybegin := x\n\n";
1443 close_ibuf(!LexBuf
);
1444 TextIO.closeOut(!LexOut
)