Import Upstream version 20180207
[hcoop/debian/mlton.git] / mlyacc / src / sigs.sml
1 (* Modified by Vesa Karvonen on 2007-12-18.
2 * Create line directives in output.
3 *)
4 (* ML-Yacc Parser Generator (c) 1989, 1991 Andrew W. Appel, David R. Tarditi *)
5
6 signature HEADER =
7 sig
8 type pos = {line : int, col : int}
9 val pos : {line : int ref, start : int ref}
10 val text : string list ref
11
12 type inputSource
13 val newSource : string * TextIO.instream * TextIO.outstream -> inputSource
14 val error : inputSource -> pos -> string -> unit
15 val warn : inputSource -> pos -> string -> unit
16 val errorOccurred : inputSource -> unit -> bool
17
18 datatype symbol = SYMBOL of string * pos
19 val symbolName : symbol -> string
20 val symbolPos : symbol -> pos
21 val symbolMake : string * pos -> symbol
22
23 type ty
24 val tyName : ty -> string
25 val tyMake : string -> ty
26
27 (* associativities: each kind of associativity is assigned a unique
28 integer *)
29
30 datatype prec = LEFT | RIGHT | NONASSOC
31 datatype control = NODEFAULT | VERBOSE | PARSER_NAME of symbol |
32 FUNCTOR of string | START_SYM of symbol |
33 NSHIFT of symbol list | POS of string | PURE |
34 PARSE_ARG of string * string |
35 TOKEN_SIG_INFO of string
36
37 datatype rule = RULE of {lhs : symbol, rhs : symbol list,
38 code : {text : string, pos : pos},
39 prec : symbol option}
40
41 datatype declData = DECL of
42 {eop : symbol list,
43 keyword : symbol list,
44 nonterm : (symbol * ty option) list option,
45 prec : (prec * (symbol list)) list,
46 change: (symbol list * symbol list) list,
47 term : (symbol * ty option) list option,
48 control : control list,
49 value : (symbol * string) list}
50
51 val join_decls : declData * declData * inputSource * pos -> declData
52
53 type parseResult
54 val getResult : parseResult -> string * declData * rule list
55 end;
56
57 signature PARSE_GEN_PARSER =
58 sig
59 structure Header : HEADER
60 val parse : string -> Header.parseResult * Header.inputSource
61 end;
62
63 signature PARSE_GEN =
64 sig
65 val parseGen : string -> unit
66 end;
67
68 signature GRAMMAR =
69 sig
70
71 datatype term = T of int
72 datatype nonterm = NT of int
73 datatype symbol = TERM of term | NONTERM of nonterm
74
75 (* grammar:
76 terminals should be numbered from 0 to terms-1,
77 nonterminals should be numbered from 0 to nonterms-1,
78 rules should be numbered between 0 and (length rules) - 1,
79 higher precedence binds tighter,
80 start nonterminal should not occur on the rhs of any rule
81 *)
82
83 datatype grammar = GRAMMAR of
84 {rules: {lhs : nonterm, rhs : symbol list,
85 precedence : int option, rulenum : int } list,
86 terms: int,
87 nonterms: int,
88 start : nonterm,
89 eop : term list,
90 noshift : term list,
91 precedence : term -> int option,
92 termToString : term -> string,
93 nontermToString : nonterm -> string}
94 end
95
96 (* signature for internal version of grammar *)
97
98 signature INTGRAMMAR =
99 sig
100 structure Grammar : GRAMMAR
101 structure SymbolAssoc : TABLE
102 structure NontermAssoc : TABLE
103
104 sharing type SymbolAssoc.key = Grammar.symbol
105 sharing type NontermAssoc.key = Grammar.nonterm
106
107 datatype rule = RULE of
108 {lhs : Grammar.nonterm,
109 rhs : Grammar.symbol list,
110
111 (* internal number of rule - convenient for producing LR graph *)
112
113 num : int,
114 rulenum : int,
115 precedence : int option}
116
117 val gtTerm : Grammar.term * Grammar.term -> bool
118 val eqTerm : Grammar.term * Grammar.term -> bool
119
120 val gtNonterm : Grammar.nonterm * Grammar.nonterm -> bool
121 val eqNonterm : Grammar.nonterm * Grammar.nonterm -> bool
122
123 val gtSymbol : Grammar.symbol * Grammar.symbol -> bool
124 val eqSymbol : Grammar.symbol * Grammar.symbol -> bool
125
126 (* Debugging information will be generated only if DEBUG is true. *)
127
128 val DEBUG : bool
129
130 val prRule : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
131 (string -> 'b) -> rule -> unit
132 val prGrammar : (Grammar.symbol -> string)*(Grammar.nonterm -> string) *
133 (string -> unit) -> Grammar.grammar -> unit
134 end
135
136 signature CORE =
137 sig
138 structure Grammar : GRAMMAR
139 structure IntGrammar : INTGRAMMAR
140 sharing Grammar = IntGrammar.Grammar
141
142 datatype item = ITEM of
143 { rule : IntGrammar.rule,
144 dot : int,
145
146 (* rhsAfter: The portion of the rhs of a rule that lies after the dot *)
147
148 rhsAfter: Grammar.symbol list }
149
150 (* eqItem and gtItem compare items *)
151
152 val eqItem : item * item -> bool
153 val gtItem : item * item -> bool
154
155 (* functions for maintaining ordered item lists *)
156
157 val insert : item * item list -> item list
158 val union : item list * item list -> item list
159
160 (* core: a set of items. It is represented by an ordered list of items.
161 The list is in ascending order The rule numbers and the positions of the
162 dots are used to order the items. *)
163
164 datatype core = CORE of item list * int (* state # *)
165
166 (* gtCore and eqCore compare the lists of items *)
167
168 val gtCore : core * core -> bool
169 val eqCore : core * core -> bool
170
171 (* functions for debugging *)
172
173 val prItem : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
174 (string -> unit) -> item -> unit
175 val prCore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
176 (string -> unit) -> core -> unit
177 end
178
179 signature CORE_UTILS =
180 sig
181
182 structure Grammar : GRAMMAR
183 structure IntGrammar : INTGRAMMAR
184 structure Core : CORE
185
186 sharing Grammar = IntGrammar.Grammar = Core.Grammar
187 sharing IntGrammar = Core.IntGrammar
188
189 (* mkFuncs: create functions for the set of productions derived from a
190 nonterminal, the cores that result from shift/gotos from a core,
191 and return a list of rules *)
192
193 val mkFuncs : Grammar.grammar ->
194 { produces : Grammar.nonterm -> IntGrammar.rule list,
195
196 (* shifts: take a core and compute all the cores that result from shifts/gotos
197 on symbols *)
198
199 shifts : Core.core -> (Grammar.symbol*Core.item list) list,
200 rules: IntGrammar.rule list,
201
202 (* epsProds: take a core compute epsilon productions for it *)
203
204 epsProds : Core.core -> IntGrammar.rule list}
205 end
206
207 signature LRGRAPH =
208 sig
209 structure Grammar : GRAMMAR
210 structure IntGrammar : INTGRAMMAR
211 structure Core : CORE
212
213 sharing Grammar = IntGrammar.Grammar = Core.Grammar
214 sharing IntGrammar = Core.IntGrammar
215
216 type graph
217 val edges : Core.core * graph -> {edge:Grammar.symbol,to:Core.core} list
218 val nodes : graph -> Core.core list
219 val shift : graph -> int * Grammar.symbol -> int (* int = state # *)
220 val core : graph -> int -> Core.core (* get core for a state *)
221
222 (* mkGraph: compute the LR(0) sets of items *)
223
224 val mkGraph : Grammar.grammar ->
225 {graph : graph,
226 produces : Grammar.nonterm -> IntGrammar.rule list,
227 rules : IntGrammar.rule list,
228 epsProds: Core.core -> IntGrammar.rule list}
229
230 val prGraph: (Grammar.symbol -> string)*(Grammar.nonterm -> string) *
231 (string -> unit) -> graph -> unit
232 end
233
234 signature LOOK =
235 sig
236 structure Grammar : GRAMMAR
237 structure IntGrammar : INTGRAMMAR
238 sharing Grammar = IntGrammar.Grammar
239
240 val union : Grammar.term list * Grammar.term list -> Grammar.term list
241 val make_set : Grammar.term list -> Grammar.term list
242
243 val mkFuncs : {rules : IntGrammar.rule list, nonterms : int,
244 produces : Grammar.nonterm -> IntGrammar.rule list} ->
245 {nullable: Grammar.nonterm -> bool,
246 first : Grammar.symbol list -> Grammar.term list}
247
248 val prLook : (Grammar.term -> string) * (string -> unit) ->
249 Grammar.term list -> unit
250 end
251
252 signature LALR_GRAPH =
253 sig
254 structure Grammar : GRAMMAR
255 structure IntGrammar : INTGRAMMAR
256 structure Core : CORE
257 structure Graph : LRGRAPH
258
259 sharing Grammar = IntGrammar.Grammar = Core.Grammar = Graph.Grammar
260 sharing IntGrammar = Core.IntGrammar = Graph.IntGrammar
261 sharing Core = Graph.Core
262
263 datatype lcore = LCORE of (Core.item * Grammar.term list) list * int
264 val addLookahead : {graph : Graph.graph,
265 first : Grammar.symbol list -> Grammar.term list,
266 eop : Grammar.term list,
267 nonterms : int,
268 nullable: Grammar.nonterm -> bool,
269 produces : Grammar.nonterm -> IntGrammar.rule list,
270 rules : IntGrammar.rule list,
271 epsProds : Core.core -> IntGrammar.rule list,
272 print : string -> unit, (* for debugging *)
273 termToString : Grammar.term -> string,
274 nontermToString : Grammar.nonterm -> string} ->
275 lcore list
276 val prLcore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
277 (Grammar.term -> string) * (string -> unit) ->
278 lcore -> unit
279 end
280
281 (* LR_ERRS: errors found while constructing an LR table *)
282
283 signature LR_ERRS =
284 sig
285 structure LrTable : LR_TABLE
286
287 (* RR = reduce/reduce,
288 SR = shift/reduce
289 NS: non-shiftable terminal found on the rhs of a rule
290 NOT_REDUCED n: rule number n was not reduced
291 START n : start symbol found on the rhs of rule n *)
292
293 datatype err = RR of LrTable.term * LrTable.state * int * int
294 | SR of LrTable.term * LrTable.state * int
295 | NS of LrTable.term * int
296 | NOT_REDUCED of int
297 | START of int
298
299 val summary : err list -> {rr : int, sr: int,
300 not_reduced : int, start : int,nonshift : int}
301
302 val printSummary : (string -> unit) -> err list -> unit
303
304 end
305
306 (* PRINT_STRUCT: prints a structure which includes a value 'table' and a
307 structure Table whose signature matches LR_TABLE. The table in the printed
308 structure will contain the same information as the one passed to
309 printStruct, although the representation may be different. It returns
310 the number of entries left in the table after compaction.*)
311
312 signature PRINT_STRUCT =
313 sig
314 structure LrTable : LR_TABLE
315 val makeStruct :
316 {table : LrTable.table,
317 name : string,
318 print: string -> unit,
319 verbose : bool
320 } -> int
321 end
322
323 (* VERBOSE: signature for a structure which takes a table and creates a
324 verbose description of it *)
325
326 signature VERBOSE =
327 sig
328 structure Errs : LR_ERRS
329 val printVerbose :
330 {table : Errs.LrTable.table,
331 entries : int,
332 termToString : Errs.LrTable.term -> string,
333 nontermToString : Errs.LrTable.nonterm -> string,
334 stateErrs : Errs.LrTable.state -> Errs.err list,
335 errs : Errs.err list,
336 print: string -> unit,
337 printCores : (string -> unit) -> Errs.LrTable.state -> unit,
338 printRule : (string -> unit) -> int -> unit} -> unit
339 end
340
341 (* MAKE_LR_TABLE: signature for a structure which includes a structure
342 matching the signature LR_TABLE and a function which maps grammars
343 to tables *)
344
345 signature MAKE_LR_TABLE =
346 sig
347 structure Grammar : GRAMMAR
348 structure Errs : LR_ERRS
349 structure LrTable : LR_TABLE
350 sharing Errs.LrTable = LrTable
351
352 sharing type LrTable.term = Grammar.term
353 sharing type LrTable.nonterm = Grammar.nonterm
354
355 (* boolean value determines whether default reductions will be used.
356 If it is true, reductions will be used. *)
357
358 val mkTable : Grammar.grammar * bool ->
359 LrTable.table *
360 (LrTable.state -> Errs.err list) * (* errors in a state *)
361 ((string -> unit) -> LrTable.state -> unit) *
362 Errs.err list (* list of all errors *)
363 end;
364
365 (* SHRINK_LR_TABLE: finds unique action entry rows in the action table
366 for the LR parser *)
367
368 signature SHRINK_LR_TABLE =
369 sig
370 (* Takes an action table represented as a list of action rows.
371 It returns the number of unique rows left in the action table,
372 a list of integers which maps each original row to a unique
373 row, and a list of unique rows *)
374 structure LrTable : LR_TABLE
375 val shrinkActionList : LrTable.table * bool ->
376 (int * int list *
377 ((LrTable.term,LrTable.action) LrTable.pairlist *
378 LrTable.action) list) * int
379 end