Commit | Line | Data |
---|---|---|
7f918cf1 CE |
1 | (* Modified by Vesa Karvonen on 2007-12-18. |
2 | * Create line directives in output. | |
3 | *) | |
4 | (* ML-Yacc Parser Generator (c) 1989, 1991 Andrew W. Appel, David R. Tarditi *) | |
5 | ||
6 | signature HEADER = | |
7 | sig | |
8 | type pos = {line : int, col : int} | |
9 | val pos : {line : int ref, start : int ref} | |
10 | val text : string list ref | |
11 | ||
12 | type inputSource | |
13 | val newSource : string * TextIO.instream * TextIO.outstream -> inputSource | |
14 | val error : inputSource -> pos -> string -> unit | |
15 | val warn : inputSource -> pos -> string -> unit | |
16 | val errorOccurred : inputSource -> unit -> bool | |
17 | ||
18 | datatype symbol = SYMBOL of string * pos | |
19 | val symbolName : symbol -> string | |
20 | val symbolPos : symbol -> pos | |
21 | val symbolMake : string * pos -> symbol | |
22 | ||
23 | type ty | |
24 | val tyName : ty -> string | |
25 | val tyMake : string -> ty | |
26 | ||
27 | (* associativities: each kind of associativity is assigned a unique | |
28 | integer *) | |
29 | ||
30 | datatype prec = LEFT | RIGHT | NONASSOC | |
31 | datatype control = NODEFAULT | VERBOSE | PARSER_NAME of symbol | | |
32 | FUNCTOR of string | START_SYM of symbol | | |
33 | NSHIFT of symbol list | POS of string | PURE | | |
34 | PARSE_ARG of string * string | | |
35 | TOKEN_SIG_INFO of string | |
36 | ||
37 | datatype rule = RULE of {lhs : symbol, rhs : symbol list, | |
38 | code : {text : string, pos : pos}, | |
39 | prec : symbol option} | |
40 | ||
41 | datatype declData = DECL of | |
42 | {eop : symbol list, | |
43 | keyword : symbol list, | |
44 | nonterm : (symbol * ty option) list option, | |
45 | prec : (prec * (symbol list)) list, | |
46 | change: (symbol list * symbol list) list, | |
47 | term : (symbol * ty option) list option, | |
48 | control : control list, | |
49 | value : (symbol * string) list} | |
50 | ||
51 | val join_decls : declData * declData * inputSource * pos -> declData | |
52 | ||
53 | type parseResult | |
54 | val getResult : parseResult -> string * declData * rule list | |
55 | end; | |
56 | ||
57 | signature PARSE_GEN_PARSER = | |
58 | sig | |
59 | structure Header : HEADER | |
60 | val parse : string -> Header.parseResult * Header.inputSource | |
61 | end; | |
62 | ||
63 | signature PARSE_GEN = | |
64 | sig | |
65 | val parseGen : string -> unit | |
66 | end; | |
67 | ||
68 | signature GRAMMAR = | |
69 | sig | |
70 | ||
71 | datatype term = T of int | |
72 | datatype nonterm = NT of int | |
73 | datatype symbol = TERM of term | NONTERM of nonterm | |
74 | ||
75 | (* grammar: | |
76 | terminals should be numbered from 0 to terms-1, | |
77 | nonterminals should be numbered from 0 to nonterms-1, | |
78 | rules should be numbered between 0 and (length rules) - 1, | |
79 | higher precedence binds tighter, | |
80 | start nonterminal should not occur on the rhs of any rule | |
81 | *) | |
82 | ||
83 | datatype grammar = GRAMMAR of | |
84 | {rules: {lhs : nonterm, rhs : symbol list, | |
85 | precedence : int option, rulenum : int } list, | |
86 | terms: int, | |
87 | nonterms: int, | |
88 | start : nonterm, | |
89 | eop : term list, | |
90 | noshift : term list, | |
91 | precedence : term -> int option, | |
92 | termToString : term -> string, | |
93 | nontermToString : nonterm -> string} | |
94 | end | |
95 | ||
96 | (* signature for internal version of grammar *) | |
97 | ||
98 | signature INTGRAMMAR = | |
99 | sig | |
100 | structure Grammar : GRAMMAR | |
101 | structure SymbolAssoc : TABLE | |
102 | structure NontermAssoc : TABLE | |
103 | ||
104 | sharing type SymbolAssoc.key = Grammar.symbol | |
105 | sharing type NontermAssoc.key = Grammar.nonterm | |
106 | ||
107 | datatype rule = RULE of | |
108 | {lhs : Grammar.nonterm, | |
109 | rhs : Grammar.symbol list, | |
110 | ||
111 | (* internal number of rule - convenient for producing LR graph *) | |
112 | ||
113 | num : int, | |
114 | rulenum : int, | |
115 | precedence : int option} | |
116 | ||
117 | val gtTerm : Grammar.term * Grammar.term -> bool | |
118 | val eqTerm : Grammar.term * Grammar.term -> bool | |
119 | ||
120 | val gtNonterm : Grammar.nonterm * Grammar.nonterm -> bool | |
121 | val eqNonterm : Grammar.nonterm * Grammar.nonterm -> bool | |
122 | ||
123 | val gtSymbol : Grammar.symbol * Grammar.symbol -> bool | |
124 | val eqSymbol : Grammar.symbol * Grammar.symbol -> bool | |
125 | ||
126 | (* Debugging information will be generated only if DEBUG is true. *) | |
127 | ||
128 | val DEBUG : bool | |
129 | ||
130 | val prRule : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * | |
131 | (string -> 'b) -> rule -> unit | |
132 | val prGrammar : (Grammar.symbol -> string)*(Grammar.nonterm -> string) * | |
133 | (string -> unit) -> Grammar.grammar -> unit | |
134 | end | |
135 | ||
136 | signature CORE = | |
137 | sig | |
138 | structure Grammar : GRAMMAR | |
139 | structure IntGrammar : INTGRAMMAR | |
140 | sharing Grammar = IntGrammar.Grammar | |
141 | ||
142 | datatype item = ITEM of | |
143 | { rule : IntGrammar.rule, | |
144 | dot : int, | |
145 | ||
146 | (* rhsAfter: The portion of the rhs of a rule that lies after the dot *) | |
147 | ||
148 | rhsAfter: Grammar.symbol list } | |
149 | ||
150 | (* eqItem and gtItem compare items *) | |
151 | ||
152 | val eqItem : item * item -> bool | |
153 | val gtItem : item * item -> bool | |
154 | ||
155 | (* functions for maintaining ordered item lists *) | |
156 | ||
157 | val insert : item * item list -> item list | |
158 | val union : item list * item list -> item list | |
159 | ||
160 | (* core: a set of items. It is represented by an ordered list of items. | |
161 | The list is in ascending order The rule numbers and the positions of the | |
162 | dots are used to order the items. *) | |
163 | ||
164 | datatype core = CORE of item list * int (* state # *) | |
165 | ||
166 | (* gtCore and eqCore compare the lists of items *) | |
167 | ||
168 | val gtCore : core * core -> bool | |
169 | val eqCore : core * core -> bool | |
170 | ||
171 | (* functions for debugging *) | |
172 | ||
173 | val prItem : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * | |
174 | (string -> unit) -> item -> unit | |
175 | val prCore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * | |
176 | (string -> unit) -> core -> unit | |
177 | end | |
178 | ||
179 | signature CORE_UTILS = | |
180 | sig | |
181 | ||
182 | structure Grammar : GRAMMAR | |
183 | structure IntGrammar : INTGRAMMAR | |
184 | structure Core : CORE | |
185 | ||
186 | sharing Grammar = IntGrammar.Grammar = Core.Grammar | |
187 | sharing IntGrammar = Core.IntGrammar | |
188 | ||
189 | (* mkFuncs: create functions for the set of productions derived from a | |
190 | nonterminal, the cores that result from shift/gotos from a core, | |
191 | and return a list of rules *) | |
192 | ||
193 | val mkFuncs : Grammar.grammar -> | |
194 | { produces : Grammar.nonterm -> IntGrammar.rule list, | |
195 | ||
196 | (* shifts: take a core and compute all the cores that result from shifts/gotos | |
197 | on symbols *) | |
198 | ||
199 | shifts : Core.core -> (Grammar.symbol*Core.item list) list, | |
200 | rules: IntGrammar.rule list, | |
201 | ||
202 | (* epsProds: take a core compute epsilon productions for it *) | |
203 | ||
204 | epsProds : Core.core -> IntGrammar.rule list} | |
205 | end | |
206 | ||
207 | signature LRGRAPH = | |
208 | sig | |
209 | structure Grammar : GRAMMAR | |
210 | structure IntGrammar : INTGRAMMAR | |
211 | structure Core : CORE | |
212 | ||
213 | sharing Grammar = IntGrammar.Grammar = Core.Grammar | |
214 | sharing IntGrammar = Core.IntGrammar | |
215 | ||
216 | type graph | |
217 | val edges : Core.core * graph -> {edge:Grammar.symbol,to:Core.core} list | |
218 | val nodes : graph -> Core.core list | |
219 | val shift : graph -> int * Grammar.symbol -> int (* int = state # *) | |
220 | val core : graph -> int -> Core.core (* get core for a state *) | |
221 | ||
222 | (* mkGraph: compute the LR(0) sets of items *) | |
223 | ||
224 | val mkGraph : Grammar.grammar -> | |
225 | {graph : graph, | |
226 | produces : Grammar.nonterm -> IntGrammar.rule list, | |
227 | rules : IntGrammar.rule list, | |
228 | epsProds: Core.core -> IntGrammar.rule list} | |
229 | ||
230 | val prGraph: (Grammar.symbol -> string)*(Grammar.nonterm -> string) * | |
231 | (string -> unit) -> graph -> unit | |
232 | end | |
233 | ||
234 | signature LOOK = | |
235 | sig | |
236 | structure Grammar : GRAMMAR | |
237 | structure IntGrammar : INTGRAMMAR | |
238 | sharing Grammar = IntGrammar.Grammar | |
239 | ||
240 | val union : Grammar.term list * Grammar.term list -> Grammar.term list | |
241 | val make_set : Grammar.term list -> Grammar.term list | |
242 | ||
243 | val mkFuncs : {rules : IntGrammar.rule list, nonterms : int, | |
244 | produces : Grammar.nonterm -> IntGrammar.rule list} -> | |
245 | {nullable: Grammar.nonterm -> bool, | |
246 | first : Grammar.symbol list -> Grammar.term list} | |
247 | ||
248 | val prLook : (Grammar.term -> string) * (string -> unit) -> | |
249 | Grammar.term list -> unit | |
250 | end | |
251 | ||
252 | signature LALR_GRAPH = | |
253 | sig | |
254 | structure Grammar : GRAMMAR | |
255 | structure IntGrammar : INTGRAMMAR | |
256 | structure Core : CORE | |
257 | structure Graph : LRGRAPH | |
258 | ||
259 | sharing Grammar = IntGrammar.Grammar = Core.Grammar = Graph.Grammar | |
260 | sharing IntGrammar = Core.IntGrammar = Graph.IntGrammar | |
261 | sharing Core = Graph.Core | |
262 | ||
263 | datatype lcore = LCORE of (Core.item * Grammar.term list) list * int | |
264 | val addLookahead : {graph : Graph.graph, | |
265 | first : Grammar.symbol list -> Grammar.term list, | |
266 | eop : Grammar.term list, | |
267 | nonterms : int, | |
268 | nullable: Grammar.nonterm -> bool, | |
269 | produces : Grammar.nonterm -> IntGrammar.rule list, | |
270 | rules : IntGrammar.rule list, | |
271 | epsProds : Core.core -> IntGrammar.rule list, | |
272 | print : string -> unit, (* for debugging *) | |
273 | termToString : Grammar.term -> string, | |
274 | nontermToString : Grammar.nonterm -> string} -> | |
275 | lcore list | |
276 | val prLcore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) * | |
277 | (Grammar.term -> string) * (string -> unit) -> | |
278 | lcore -> unit | |
279 | end | |
280 | ||
281 | (* LR_ERRS: errors found while constructing an LR table *) | |
282 | ||
283 | signature LR_ERRS = | |
284 | sig | |
285 | structure LrTable : LR_TABLE | |
286 | ||
287 | (* RR = reduce/reduce, | |
288 | SR = shift/reduce | |
289 | NS: non-shiftable terminal found on the rhs of a rule | |
290 | NOT_REDUCED n: rule number n was not reduced | |
291 | START n : start symbol found on the rhs of rule n *) | |
292 | ||
293 | datatype err = RR of LrTable.term * LrTable.state * int * int | |
294 | | SR of LrTable.term * LrTable.state * int | |
295 | | NS of LrTable.term * int | |
296 | | NOT_REDUCED of int | |
297 | | START of int | |
298 | ||
299 | val summary : err list -> {rr : int, sr: int, | |
300 | not_reduced : int, start : int,nonshift : int} | |
301 | ||
302 | val printSummary : (string -> unit) -> err list -> unit | |
303 | ||
304 | end | |
305 | ||
306 | (* PRINT_STRUCT: prints a structure which includes a value 'table' and a | |
307 | structure Table whose signature matches LR_TABLE. The table in the printed | |
308 | structure will contain the same information as the one passed to | |
309 | printStruct, although the representation may be different. It returns | |
310 | the number of entries left in the table after compaction.*) | |
311 | ||
312 | signature PRINT_STRUCT = | |
313 | sig | |
314 | structure LrTable : LR_TABLE | |
315 | val makeStruct : | |
316 | {table : LrTable.table, | |
317 | name : string, | |
318 | print: string -> unit, | |
319 | verbose : bool | |
320 | } -> int | |
321 | end | |
322 | ||
323 | (* VERBOSE: signature for a structure which takes a table and creates a | |
324 | verbose description of it *) | |
325 | ||
326 | signature VERBOSE = | |
327 | sig | |
328 | structure Errs : LR_ERRS | |
329 | val printVerbose : | |
330 | {table : Errs.LrTable.table, | |
331 | entries : int, | |
332 | termToString : Errs.LrTable.term -> string, | |
333 | nontermToString : Errs.LrTable.nonterm -> string, | |
334 | stateErrs : Errs.LrTable.state -> Errs.err list, | |
335 | errs : Errs.err list, | |
336 | print: string -> unit, | |
337 | printCores : (string -> unit) -> Errs.LrTable.state -> unit, | |
338 | printRule : (string -> unit) -> int -> unit} -> unit | |
339 | end | |
340 | ||
341 | (* MAKE_LR_TABLE: signature for a structure which includes a structure | |
342 | matching the signature LR_TABLE and a function which maps grammars | |
343 | to tables *) | |
344 | ||
345 | signature MAKE_LR_TABLE = | |
346 | sig | |
347 | structure Grammar : GRAMMAR | |
348 | structure Errs : LR_ERRS | |
349 | structure LrTable : LR_TABLE | |
350 | sharing Errs.LrTable = LrTable | |
351 | ||
352 | sharing type LrTable.term = Grammar.term | |
353 | sharing type LrTable.nonterm = Grammar.nonterm | |
354 | ||
355 | (* boolean value determines whether default reductions will be used. | |
356 | If it is true, reductions will be used. *) | |
357 | ||
358 | val mkTable : Grammar.grammar * bool -> | |
359 | LrTable.table * | |
360 | (LrTable.state -> Errs.err list) * (* errors in a state *) | |
361 | ((string -> unit) -> LrTable.state -> unit) * | |
362 | Errs.err list (* list of all errors *) | |
363 | end; | |
364 | ||
365 | (* SHRINK_LR_TABLE: finds unique action entry rows in the action table | |
366 | for the LR parser *) | |
367 | ||
368 | signature SHRINK_LR_TABLE = | |
369 | sig | |
370 | (* Takes an action table represented as a list of action rows. | |
371 | It returns the number of unique rows left in the action table, | |
372 | a list of integers which maps each original row to a unique | |
373 | row, and a list of unique rows *) | |
374 | structure LrTable : LR_TABLE | |
375 | val shrinkActionList : LrTable.table * bool -> | |
376 | (int * int list * | |
377 | ((LrTable.term,LrTable.action) LrTable.pairlist * | |
378 | LrTable.action) list) * int | |
379 | end |