Import Upstream version 20180207
[hcoop/debian/mlton.git] / mlyacc / src / lalr.sml
1 (* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
2
3 functor mkLalr ( structure IntGrammar : INTGRAMMAR
4 structure Core : CORE
5 structure Graph : LRGRAPH
6 structure Look: LOOK
7 sharing Graph.Core = Core
8 sharing Graph.IntGrammar = Core.IntGrammar =
9 Look.IntGrammar = IntGrammar) : LALR_GRAPH =
10 struct
11 val sub = Array.sub
12 infix 9 sub
13 open IntGrammar.Grammar IntGrammar Core Graph Look
14 structure Graph = Graph
15 structure Core = Core
16 structure Grammar = IntGrammar.Grammar
17 structure IntGrammar = IntGrammar
18
19 datatype tmpcore = TMPCORE of (item * term list ref) list * int
20 datatype lcore = LCORE of (item * term list) list * int
21
22
23 val prLcore =
24 fn a as (SymbolToString,nontermToString,termToString,print) =>
25 let val printItem = prItem (SymbolToString,nontermToString,print)
26 val printLookahead = prLook(termToString,print)
27 in fn (LCORE (items,state)) =>
28 (print "\n";
29 print "state ";
30 print (Int.toString state);
31 print " :\n\n";
32 List.app (fn (item,lookahead) =>
33 (print "{";
34 printItem item;
35 print ",";
36 printLookahead lookahead;
37 print "}\n")) items)
38 end
39
40 exception Lalr of int
41
42 structure ItemList = ListOrdSet
43 (struct
44 type elem = item * term list ref
45 val eq = fn ((a,_),(b,_)) => eqItem(a,b)
46 val gt = fn ((a,_),(b,_)) => gtItem(a,b)
47 end)
48
49 structure NontermSet = ListOrdSet
50 (struct
51 type elem = nonterm
52 val gt = gtNonterm
53 val eq = eqNonterm
54 end)
55
56 (* NTL: nonterms with lookahead *)
57
58 structure NTL = RbOrdSet
59 (struct
60 type elem = nonterm * term list
61 val gt = fn ((i,_),(j,_)) => gtNonterm(i,j)
62 val eq = fn ((i,_),(j,_)) => eqNonterm(i,j)
63 end)
64
65 val DEBUG = false
66
67 val addLookahead = fn {graph,nullable,first,eop,
68 rules,produces,nonterms,epsProds,
69 print,termToString,nontermToString} =>
70 let
71
72 val eop = Look.make_set eop
73
74 val symbolToString = fn (TERM t) => termToString t
75 | (NONTERM t) => nontermToString t
76
77 val print = if DEBUG then print
78 else fn _ => ()
79
80 val prLook = if DEBUG then prLook (termToString,print)
81 else fn _ => ()
82
83 val prNonterm = print o nontermToString
84
85 val prRule = if DEBUG
86 then prRule(symbolToString,nontermToString,print)
87 else fn _ => ()
88
89 val printInt = print o (Int.toString : int -> string)
90
91 val printItem = prItem(symbolToString,nontermToString,print)
92
93 (* look_pos: position in the rhs of a rule at which we should start placing
94 lookahead ref cells, i.e. the minimum place at which A -> x .B y, where
95 B is a nonterminal and y =*=> epsilon, or A -> x. is true. Positions are
96 given by the number of symbols before the place. The place before the first
97 symbol is 0, etc. *)
98
99 val look_pos =
100 let val positions = Array.array(length rules,0)
101
102 (* rule_pos: calculate place in the rhs of a rule at which we should start
103 placing lookahead ref cells *)
104
105 fun rule_pos (RULE {rhs,...}) =
106 case (rev rhs) of
107 nil => 0
108 | (TERM t) :: r => length rhs
109 | (NONTERM n :: r) => let
110 (* f assumes that everything after n in the
111 * rule has proven to be nullable so far.
112 * Remember that the rhs has been reversed,
113 * implying that this is true initially *)
114 (* A -> .z t B y, where y is nullable *)
115 fun f (b, (r as (TERM _ :: _))) = length r
116 (* A -> .z B C y *)
117 | f (c, (NONTERM b :: r)) =
118 if nullable c then f (b, r)
119 else length r + 1
120 (* A -> .B y, where y is nullable *)
121 | f (_, []) = 0
122 in f (n, r)
123 end
124
125 val check_rule = fn (rule as RULE {num,...}) =>
126 let val pos = rule_pos rule
127 in (print "look_pos: ";
128 prRule rule;
129 print " = ";
130 printInt pos;
131 print "\n";
132 Array.update(positions,num,rule_pos rule))
133 end
134 in app check_rule rules;
135 fn RULE{num,...} => (positions sub num)
136 end
137
138 (* rest_is_null: true for items of the form A -> x .B y, where y is nullable *)
139
140 val rest_is_null =
141 fn (ITEM{rule,dot, rhsAfter=NONTERM _ :: _}) =>
142 dot >= (look_pos rule)
143 | _ => false
144
145 (* map core to a new core including only items of the form A -> x. or
146 A -> x. B y, where y =*=> epsilon. It also adds epsilon productions to the
147 core. Each item is given a ref cell to hold the lookahead nonterminals for
148 it.*)
149
150 val map_core =
151 let val f = fn (item as ITEM {rhsAfter=nil,...},r) =>
152 (item,ref nil) :: r
153 | (item,r) =>
154 if (rest_is_null item)
155 then (item,ref nil)::r
156 else r
157 in fn (c as CORE (items,state)) =>
158 let val epsItems =
159 map (fn rule=>(ITEM{rule=rule,dot=0,rhsAfter=nil},
160 ref (nil : term list))
161 ) (epsProds c)
162 in TMPCORE(ItemList.union(List.foldr f [] items,epsItems),state)
163 end
164 end
165
166 val new_nodes = map map_core (nodes graph)
167
168 exception Find
169
170 (* findRef: state * item -> lookahead ref cell for item *)
171
172 val findRef =
173 let val states = Array.fromList new_nodes
174 val dummy = ref nil
175 in fn (state,item) =>
176 let val TMPCORE (l,_) = states sub state
177 in case ItemList.find((item,dummy),l)
178 of SOME (_,look_ref) => look_ref
179 | NONE => (print "find failed: state ";
180 printInt state;
181 print "\nitem =\n";
182 printItem item;
183 print "\nactual items =\n";
184 app (fn (i,_) => (printItem i;
185 print "\n")) l;
186 raise Find)
187 end
188 end
189
190
191 (* findRuleRefs: state -> rule -> lookahead refs for rule. *)
192
193 val findRuleRefs =
194 let val shift = shift graph
195 in fn state =>
196 (* handle epsilon productions *)
197 fn (rule as RULE {rhs=nil,...}) =>
198 [findRef(state,ITEM{rule=rule,dot=0,rhsAfter=nil})]
199 | (rule as RULE {rhs=sym::rest,...}) =>
200 let val pos = Int.max(look_pos rule,1)
201 fun scan'(state,nil,pos,result) =
202 findRef(state,ITEM{rule=rule,
203 dot=pos,
204 rhsAfter=nil}) :: result
205 | scan'(state,rhs as sym::rest,pos,result) =
206 scan'(shift(state,sym), rest, pos+1,
207 findRef(state,ITEM{rule=rule,
208 dot=pos,
209 rhsAfter=rhs})::result)
210
211 (* find first item of the form A -> x .B y, where y =*=> epsilon and
212 x is not epsilon, or A -> x. use scan' to pick up all refs after this
213 point *)
214
215 fun scan(state,nil,_) =
216 [findRef(state,ITEM{rule=rule,dot=pos,rhsAfter=nil})]
217 | scan(state,rhs,0) = scan'(state,rhs,pos,nil)
218 | scan(state,sym::rest,place) =
219 scan(shift(state,sym),rest,place-1)
220
221 in scan(shift(state,sym),rest,pos-1)
222 end
223
224 end
225
226 (* function to compute for some nonterminal n the set of nonterminals A added
227 through the closure of nonterminal n such that n =c*=> .A x, where x is
228 nullable *)
229
230 val nonterms_w_null = fn nt =>
231 let val collect_nonterms = fn n =>
232 List.foldr (fn (rule as RULE {rhs=rhs as NONTERM n :: _,...},r) =>
233 (case
234 (rest_is_null(ITEM {dot=0,rhsAfter=rhs,rule=rule}))
235 of true => n :: r
236 | false => r)
237 | (_,r) => r) [] (produces n)
238 fun dfs(a as (n,r)) =
239 if (NontermSet.exists a) then r
240 else List.foldr dfs (NontermSet.insert(n,r))
241 (collect_nonterms n)
242 in dfs(nt,NontermSet.empty)
243 end
244
245 val nonterms_w_null =
246 let val data = Array.array(nonterms,NontermSet.empty)
247 fun f n = if n=nonterms then ()
248 else (Array.update(data,n,nonterms_w_null (NT n));
249 f (n+1))
250 in (f 0; fn (NT nt) => data sub nt)
251 end
252
253 (* look_info: for some nonterminal n the set of nonterms A added
254 through the closure of the nonterminal such that n =c+=> .Ax and the
255 lookahead accumlated for each nonterm A *)
256
257 val look_info = fn nt =>
258 let val collect_nonterms = fn n =>
259 List.foldr (fn (RULE {rhs=NONTERM n :: t,...},r) =>
260 (case NTL.find ((n,nil),r)
261 of SOME (key,data) =>
262 NTL.insert((n,Look.union(data,first t)),r)
263 | NONE => NTL.insert ((n,first t),r))
264 | (_,r) => r)
265 NTL.empty (produces n)
266 fun dfs(a as ((key1,data1),r)) =
267 case (NTL.find a)
268 of SOME (_,data2) =>
269 NTL.insert((key1,Look.union(data1,data2)),r)
270 | NONE => NTL.fold dfs (collect_nonterms key1)
271 (NTL.insert a)
272 in dfs((nt,nil),NTL.empty)
273 end
274
275 val look_info =
276 if not DEBUG then look_info
277 else fn nt =>
278 (print "look_info of "; prNonterm nt; print "=\n";
279 let val info = look_info nt
280 in (NTL.app (fn (nt,lookahead) =>
281 (prNonterm nt; print ": "; prLook lookahead;
282 print "\n\n")) info;
283 info)
284 end)
285
286 (* prop_look: propagate lookaheads for nonterms added in the closure of a
287 nonterm. Lookaheads must be propagated from each nonterminal m to
288 all nonterminals { n | m =c+=> nx, where x=*=>epsilon} *)
289
290 val prop_look = fn ntl =>
291 let val upd_lookhd = fn new_look => fn (nt,r) =>
292 case NTL.find ((nt,new_look),r)
293 of SOME (_,old_look) =>
294 NTL.insert((nt, Look.union(new_look,old_look)),r)
295 | NONE => raise (Lalr 241)
296 val upd_nonterm = fn ((nt,look),r) =>
297 NontermSet.fold (upd_lookhd look)
298 (nonterms_w_null nt) r
299 in NTL.fold upd_nonterm ntl ntl
300 end
301
302 val prop_look =
303 if not DEBUG then prop_look
304 else fn ntl =>
305 (print "prop_look =\n";
306 let val info = prop_look ntl
307 in (NTL.app (fn (nt,lookahead) =>
308 (prNonterm nt;
309 print ": ";
310 prLook lookahead;
311 print "\n\n")) info; info)
312 end)
313
314 (* now put the information from these functions together. Create a function
315 which takes a nonterminal n and returns a list of triplets of
316 (a nonterm added through closure,
317 the lookahead for the nonterm,
318 whether the nonterm should include the lookahead for the nonterminal
319 whose closure is being taken (i.e. first(y) for an item j of the
320 form A -> x .n y and lookahead(j) if y =*=> epsilon)
321 *)
322
323 val closure_nonterms =
324 let val data =
325 Array.array(nonterms,nil: (nonterm * term list * bool) list)
326 val do_nonterm = fn i =>
327 let val nonterms_followed_by_null =
328 nonterms_w_null i
329 val nonterms_added_through_closure =
330 NTL.make_list (prop_look (look_info i))
331 val result =
332 map (fn (nt,l) =>
333 (nt,l,NontermSet.exists (nt,nonterms_followed_by_null))
334 ) nonterms_added_through_closure
335 in if DEBUG then
336 (print "closure_nonterms = ";
337 prNonterm i;
338 print "\n";
339 app (fn (nt,look,nullable) =>
340 (prNonterm nt;
341 print ":";
342 prLook look;
343 case nullable
344 of false => print "(false)\n"
345 | true => print "(true)\n")) result;
346 print "\n")
347 else ();
348 result
349 end
350 fun f i =
351 if i=nonterms then ()
352 else (Array.update(data,i,do_nonterm (NT i)); f (i+1))
353 val _ = f 0
354 in fn (NT i) => data sub i
355 end
356
357 (* add_nonterm_lookahead: Add lookahead to all completion items for rules added
358 when the closure of a given nonterm in some state is taken. It returns
359 a list of lookahead refs to which the given nonterm's lookahead should
360 be propagated. For each rule, it must trace the shift/gotos in the LR(0)
361 graph to find all items of the form A-> x .B y where y =*=> epsilon or
362 A -> x.
363 *)
364
365 val add_nonterm_lookahead = fn (nt,state) =>
366 let val f = fn ((nt,lookahead,nullable),r) =>
367 let val refs = map (findRuleRefs state) (produces nt)
368 val refs = List.concat refs
369 val _ = app (fn r =>
370 r := (Look.union (!r,lookahead))) refs
371 in if nullable then refs @ r else r
372 end
373 in List.foldr f [] (closure_nonterms nt)
374 end
375
376 (* scan_core: Scan a core for all items of the form A -> x .B y. Applies
377 add_nonterm_lookahead to each such B, and then merges first(y) into
378 the list of refs returned by add_nonterm_lookahead. It returns
379 a list of ref * ref list for all the items where y =*=> epsilon *)
380
381 val scan_core = fn (CORE (l,state)) =>
382 let fun f ((item as ITEM{rhsAfter= NONTERM b :: y,
383 dot,rule})::t,r) =
384 (case (add_nonterm_lookahead(b,state))
385 of nil => r
386 | l =>
387 let val first_y = first y
388 val newr = if dot >= (look_pos rule)
389 then (findRef(state,item),l)::r
390 else r
391 in (app (fn r =>
392 r := Look.union(!r,first_y)) l;
393 f (t,newr))
394 end)
395 | f (_ :: t,r) = f (t,r)
396 | f (nil,r) = r
397 in f (l,nil)
398 end
399
400 (* add end-of-parse symbols to set of items consisting of all items
401 immediately derived from the start symbol *)
402
403 val add_eop = fn (c as CORE (l,state),eop) =>
404 let fun f (item as ITEM {rule,dot,...}) =
405 let val refs = findRuleRefs state rule
406 in
407
408 (* first take care of kernal items. Add the end-of-parse symbols to
409 the lookahead sets for these items. Epsilon productions of the
410 start symbol do not need to be handled specially because they will
411 be in the kernal also *)
412
413 app (fn r => r := Look.union(!r,eop)) refs;
414
415 (* now take care of closure items. These are all nonterminals C which
416 have a derivation S =+=> .C x, where x is nullable *)
417
418 if dot >= (look_pos rule) then
419 case item
420 of ITEM{rhsAfter=NONTERM b :: _,...} =>
421 (case add_nonterm_lookahead(b,state)
422 of nil => ()
423 | l => app (fn r => r := Look.union(!r,eop)) l)
424 | _ => ()
425 else ()
426 end
427 in app f l
428 end
429
430 val iterate = fn l =>
431 let fun f lookahead (nil,done) = done
432 | f lookahead (h::t,done) =
433 let val old = !h
434 in h := Look.union (old,lookahead);
435 if (length (!h)) <> (length old)
436 then f lookahead (t,false)
437 else f lookahead(t,done)
438 end
439 fun g ((from,to)::rest,done) =
440 let val new_done = f (!from) (to,done)
441 in g (rest,new_done)
442 end
443 | g (nil,done) = done
444 fun loop true = ()
445 | loop false = loop (g (l,true))
446 in loop false
447 end
448
449 val lookahead = List.concat (map scan_core (nodes graph))
450
451 (* used to scan the item list of a TMPCORE and remove the items not
452 being reduced *)
453
454 val create_lcore_list =
455 fn ((item as ITEM {rhsAfter=nil,...},ref l),r) =>
456 (item,l) :: r
457 | (_,r) => r
458
459 in add_eop(Graph.core graph 0,eop);
460 iterate lookahead;
461 map (fn (TMPCORE (l,state)) =>
462 LCORE (List.foldr create_lcore_list [] l, state)) new_nodes
463 end
464 end;