1 (* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
3 (* base.sig: Base signature file for SML-Yacc. This file contains signatures
4 that must be loaded before any of the files produced by ML-Yacc are loaded
7 (* STREAM: signature for a lazy stream.*)
11 val streamify : (unit -> '_a) -> '_a stream
12 val cons : '_a * '_a stream -> '_a stream
13 val get : '_a stream -> '_a * '_a stream
16 (* LR_TABLE: signature for an LR Table.
18 The list of actions and gotos passed to mkLrTable must be ordered by state
19 number. The values for state 0 are the first in the list, the values for
20 state 1 are next, etc.
25 datatype ('a,'b) pairlist = EMPTY | PAIR of 'a * 'b * ('a,'b) pairlist
26 datatype state = STATE of int
27 datatype term = T of int
28 datatype nonterm = NT of int
29 datatype action = SHIFT of state
35 val numStates : table -> int
36 val numRules : table -> int
37 val describeActions : table -> state ->
38 (term,action) pairlist * action
39 val describeGoto : table -> state -> (nonterm,state) pairlist
40 val action : table -> state * term -> action
41 val goto : table -> state * nonterm -> state
42 val initialState : table -> state
43 exception Goto of state * nonterm
45 val mkLrTable : {actions : ((term,action) pairlist * action) array,
46 gotos : (nonterm,state) pairlist array,
47 numStates : int, numRules : int,
48 initialState : state} -> table
51 (* TOKEN: signature revealing the internal structure of a token. This signature
52 TOKEN distinct from the signature {parser name}_TOKENS produced by ML-Yacc.
53 The {parser name}_TOKENS structures contain some types and functions to
54 construct tokens from values and positions.
56 The representation of token was very carefully chosen here to allow the
57 polymorphic parser to work without knowing the types of semantic values
60 This has had an impact on the TOKENS structure produced by SML-Yacc, which
61 is a structure parameter to lexer functors. We would like to have some
62 type 'a token which functions to construct tokens would create. A
63 constructor function for a integer token might be
65 INT: int * 'a * 'a -> 'a token.
67 This is not possible because we need to have tokens with the representation
68 given below for the polymorphic parser.
70 Thus our constructur functions for tokens have the form:
72 INT: int * 'a * 'a -> (svalue,'a) token
74 This in turn has had an impact on the signature that lexers for SML-Yacc
75 must match and the types that a user must declare in the user declarations
81 structure LrTable : LR_TABLE
82 datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
83 val sameToken : ('a,'b) token * ('a,'b) token -> bool
86 (* LR_PARSER: signature for a polymorphic LR parser *)
90 structure Stream: STREAM
91 structure LrTable : LR_TABLE
92 structure Token : TOKEN
94 sharing LrTable = Token.LrTable
98 val parse : {table : LrTable.table,
99 lexer : ('_b,'_c) Token.token Stream.stream,
103 (LrTable.state * ('_b * '_c * '_c)) list *
107 ((LrTable.state *('_b * '_c * '_c)) list),
109 ec : { is_keyword : LrTable.term -> bool,
110 noShift : LrTable.term -> bool,
111 preferred_change : (LrTable.term list * LrTable.term list) list,
112 errtermvalue : LrTable.term -> '_b,
113 showTerminal : LrTable.term -> string,
114 terms: LrTable.term list,
115 error : string * '_c * '_c -> unit
117 lookahead : int (* max amount of lookahead used in *)
118 (* error correction *)
120 (('_b,'_c) Token.token Stream.stream)
123 (* LEXER: a signature that most lexers produced for use with SML-Yacc's
124 output will match. The user is responsible for declaring type token,
125 type pos, and type svalue in the UserDeclarations section of a lexer.
127 Note that type token is abstract in the lexer. This allows SML-Yacc to
128 create a TOKENS signature for use with lexers produced by ML-Lex that
129 treats the type token abstractly. Lexers that are functors parametrized by
130 a Tokens structure matching a TOKENS signature cannot examine the structure
136 structure UserDeclarations :
142 val makeLexer : (int -> string) -> unit ->
143 (UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
146 (* ARG_LEXER: the %arg option of ML-Lex allows users to produce lexers which
147 also take an argument before yielding a function from unit to a token
150 signature ARG_LEXER =
152 structure UserDeclarations :
159 val makeLexer : (int -> string) -> UserDeclarations.arg -> unit ->
160 (UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
163 (* PARSER_DATA: the signature of ParserData structures in {parser name}LrValsFun
164 produced by SML-Yacc. All such structures match this signature.
166 The {parser name}LrValsFun produces a structure which contains all the values
167 except for the lexer needed to call the polymorphic parser mentioned
172 signature PARSER_DATA =
174 (* the type of line numbers *)
178 (* the type of semantic values *)
182 (* the type of the user-supplied argument to the parser *)
185 (* the intended type of the result of the parser. This value is
186 produced by applying extract from the structure Actions to the
187 final semantic value resultiing from a parse.
192 structure LrTable : LR_TABLE
193 structure Token : TOKEN
194 sharing Token.LrTable = LrTable
196 (* structure Actions contains the functions which mantain the
197 semantic values stack in the parser. Void is used to provide
198 a default value for the semantic stack.
203 val actions : int * pos *
204 (LrTable.state * (svalue * pos * pos)) list * arg->
205 LrTable.nonterm * (svalue * pos * pos) *
206 ((LrTable.state *(svalue * pos * pos)) list)
208 val extract : svalue -> result
211 (* structure EC contains information used to improve error
212 recovery in an error-correcting parser *)
216 val is_keyword : LrTable.term -> bool
217 val noShift : LrTable.term -> bool
218 val preferred_change : (LrTable.term list * LrTable.term list) list
219 val errtermvalue : LrTable.term -> svalue
220 val showTerminal : LrTable.term -> string
221 val terms: LrTable.term list
224 (* table is the LR table for the parser *)
226 val table : LrTable.table
229 (* signature PARSER is the signature that most user parsers created by
235 structure Token : TOKEN
236 structure Stream : STREAM
239 (* type pos is the type of line numbers *)
243 (* type result is the type of the result from the parser *)
247 (* the type of the user-supplied argument to the parser *)
250 (* type svalue is the type of semantic values for the semantic value
256 (* val makeLexer is used to create a stream of tokens for the parser *)
258 val makeLexer : (int -> string) ->
259 (svalue,pos) Token.token Stream.stream
261 (* val parse takes a stream of tokens and a function to print
262 errors and returns a value of type result and a stream containing
266 val parse : int * ((svalue,pos) Token.token Stream.stream) *
267 (string * pos * pos -> unit) * arg ->
268 result * (svalue,pos) Token.token Stream.stream
270 val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->
274 (* signature ARG_PARSER is the signature that will be matched by parsers whose
275 lexer takes an additional argument.
278 signature ARG_PARSER =
280 structure Token : TOKEN
281 structure Stream : STREAM
290 val makeLexer : (int -> string) -> lexarg ->
291 (svalue,pos) Token.token Stream.stream
292 val parse : int * ((svalue,pos) Token.token Stream.stream) *
293 (string * pos * pos -> unit) * arg ->
294 result * (svalue,pos) Token.token Stream.stream
296 val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->