1 (* ML
-Yacc Parser
Generator (c
) 1989 Andrew W
. Appel
, David R
. Tarditi
*)
3 functor mkLalr ( structure IntGrammar
: INTGRAMMAR
5 structure Graph
: LRGRAPH
7 sharing Graph
.Core
= Core
8 sharing Graph
.IntGrammar
= Core
.IntGrammar
=
9 Look
.IntGrammar
= IntGrammar
) : LALR_GRAPH
=
13 open IntGrammar
.Grammar IntGrammar Core Graph Look
14 structure Graph
= Graph
16 structure Grammar
= IntGrammar
.Grammar
17 structure IntGrammar
= IntGrammar
19 datatype tmpcore
= TMPCORE
of (item
* term list ref
) list
* int
20 datatype lcore
= LCORE
of (item
* term list
) list
* int
24 fn a
as (SymbolToString
,nontermToString
,termToString
,print
) =>
25 let val printItem
= prItem (SymbolToString
,nontermToString
,print
)
26 val printLookahead
= prLook(termToString
,print
)
27 in fn (LCORE (items
,state
)) =>
30 print (Int.toString state
);
32 List.app (fn (item
,lookahead
) =>
36 printLookahead lookahead
;
42 structure ItemList
= ListOrdSet
44 type elem
= item
* term list ref
45 val eq
= fn ((a
,_
),(b
,_
)) => eqItem(a
,b
)
46 val gt
= fn ((a
,_
),(b
,_
)) => gtItem(a
,b
)
49 structure NontermSet
= ListOrdSet
56 (* NTL
: nonterms
with lookahead
*)
58 structure NTL
= RbOrdSet
60 type elem
= nonterm
* term list
61 val gt
= fn ((i
,_
),(j
,_
)) => gtNonterm(i
,j
)
62 val eq
= fn ((i
,_
),(j
,_
)) => eqNonterm(i
,j
)
67 val addLookahead
= fn {graph
,nullable
,first
,eop
,
68 rules
,produces
,nonterms
,epsProds
,
69 print
,termToString
,nontermToString
} =>
72 val eop
= Look
.make_set eop
74 val symbolToString
= fn (TERM t
) => termToString t
75 |
(NONTERM t
) => nontermToString t
77 val print
= if DEBUG
then print
80 val prLook
= if DEBUG
then prLook (termToString
,print
)
83 val prNonterm
= print
o nontermToString
86 then prRule(symbolToString
,nontermToString
,print
)
89 val printInt
= print
o (Int.toString
: int -> string)
91 val printItem
= prItem(symbolToString
,nontermToString
,print
)
93 (* look_pos
: position
in the rhs
of a rule at which we should start placing
94 lookahead ref cells
, i
.e
. the minimum place at which A
-> x
.B y
, where
95 B is a nonterminal
and y
=*=> epsilon
, or A
-> x
. is
true. Positions are
96 given by the number
of symbols
before the place
. The place
before the first
100 let val positions
= Array
.array(length rules
,0)
102 (* rule_pos
: calculate place
in the rhs
of a rule at which we should start
103 placing lookahead ref cells
*)
105 fun rule_pos (RULE
{rhs
,...}) =
108 |
(TERM t
) :: r
=> length rhs
109 |
(NONTERM n
:: r
) => let
110 (* f assumes that everything after n
in the
111 * rule has proven to be nullable so far
.
112 * Remember that the rhs has been reversed
,
113 * implying that this is
true initially
*)
114 (* A
-> .z t B y
, where y is nullable
*)
115 fun f (b
, (r
as (TERM _
:: _
))) = length r
117 |
f (c
, (NONTERM b
:: r
)) =
118 if nullable c
then f (b
, r
)
120 (* A
-> .B y
, where y is nullable
*)
125 val check_rule
= fn (rule
as RULE
{num
,...}) =>
126 let val pos
= rule_pos rule
127 in (print
"look_pos: ";
132 Array
.update(positions
,num
,rule_pos rule
))
134 in app check_rule rules
;
135 fn RULE
{num
,...} => (positions sub num
)
138 (* rest_is_null
: true for items
of the form A
-> x
.B y
, where y is nullable
*)
141 fn (ITEM
{rule
,dot
, rhsAfter
=NONTERM _
:: _
}) =>
142 dot
>= (look_pos rule
)
145 (* map core to a new core including only items
of the form A
-> x
. or
146 A
-> x
. B y
, where y
=*=> epsilon
. It also adds epsilon productions to the
147 core
. Each item is given a ref cell to hold the lookahead nonterminals for
151 let val f
= fn (item
as ITEM
{rhsAfter
=nil
,...},r
) =>
154 if (rest_is_null item
)
155 then (item
,ref nil
)::r
157 in fn (c
as CORE (items
,state
)) =>
159 map (fn rule
=>(ITEM
{rule
=rule
,dot
=0,rhsAfter
=nil
},
160 ref (nil
: term list
))
162 in TMPCORE(ItemList
.union(List.foldr f
[] items
,epsItems
),state
)
166 val new_nodes
= map
map_core (nodes graph
)
170 (* findRef
: state
* item
-> lookahead ref cell for item
*)
173 let val states
= Array
.fromList new_nodes
175 in fn (state
,item
) =>
176 let val TMPCORE (l
,_
) = states sub state
177 in case ItemList
.find((item
,dummy
),l
)
178 of SOME (_
,look_ref
) => look_ref
179 | NONE
=> (print
"find failed: state ";
183 print
"\nactual items =\n";
184 app (fn (i
,_
) => (printItem i
;
191 (* findRuleRefs
: state
-> rule
-> lookahead refs for rule
. *)
194 let val shift
= shift graph
196 (* handle epsilon productions
*)
197 fn (rule
as RULE
{rhs
=nil
,...}) =>
198 [findRef(state
,ITEM
{rule
=rule
,dot
=0,rhsAfter
=nil
})]
199 |
(rule
as RULE
{rhs
=sym
::rest
,...}) =>
200 let val pos
= Int.max(look_pos rule
,1)
201 fun scan
'(state
,nil
,pos
,result
) =
202 findRef(state
,ITEM
{rule
=rule
,
204 rhsAfter
=nil
}) :: result
205 | scan
'(state
,rhs
as sym
::rest
,pos
,result
) =
206 scan
'(shift(state
,sym
), rest
, pos
+1,
207 findRef(state
,ITEM
{rule
=rule
,
209 rhsAfter
=rhs
})::result
)
211 (* find first item
of the form A
-> x
.B y
, where y
=*=> epsilon
and
212 x is not epsilon
, or A
-> x
. use scan
' to pick up all refs after this
215 fun scan(state
,nil
,_
) =
216 [findRef(state
,ITEM
{rule
=rule
,dot
=pos
,rhsAfter
=nil
})]
217 |
scan(state
,rhs
,0) = scan
'(state
,rhs
,pos
,nil
)
218 |
scan(state
,sym
::rest
,place
) =
219 scan(shift(state
,sym
),rest
,place
-1)
221 in scan(shift(state
,sym
),rest
,pos
-1)
226 (* function to compute for some nonterminal n the set
of nonterminals A added
227 through the closure
of nonterminal n such that n
=c
*=> .A x
, where x is
230 val nonterms_w_null
= fn nt
=>
231 let val collect_nonterms
= fn n
=>
232 List.foldr (fn (rule
as RULE
{rhs
=rhs
as NONTERM n
:: _
,...},r
) =>
234 (rest_is_null(ITEM
{dot
=0,rhsAfter
=rhs
,rule
=rule
}))
237 |
(_
,r
) => r
) [] (produces n
)
238 fun dfs(a
as (n
,r
)) =
239 if (NontermSet
.exists a
) then r
240 else List.foldr
dfs (NontermSet
.insert(n
,r
))
242 in dfs(nt
,NontermSet
.empty
)
245 val nonterms_w_null
=
246 let val data
= Array
.array(nonterms
,NontermSet
.empty
)
247 fun f n
= if n
=nonterms
then ()
248 else (Array
.update(data
,n
,nonterms_w_null (NT n
));
250 in (f
0; fn (NT nt
) => data sub nt
)
253 (* look_info
: for some nonterminal n the set
of nonterms A added
254 through the closure
of the nonterminal such that n
=c
+=> .Ax
and the
255 lookahead accumlated for each nonterm A
*)
257 val look_info
= fn nt
=>
258 let val collect_nonterms
= fn n
=>
259 List.foldr (fn (RULE
{rhs
=NONTERM n
:: t
,...},r
) =>
260 (case NTL
.find ((n
,nil
),r
)
261 of SOME (key
,data
) =>
262 NTL
.insert((n
,Look
.union(data
,first t
)),r
)
263 | NONE
=> NTL
.insert ((n
,first t
),r
))
265 NTL
.empty (produces n
)
266 fun dfs(a
as ((key1
,data1
),r
)) =
269 NTL
.insert((key1
,Look
.union(data1
,data2
)),r
)
270 | NONE
=> NTL
.fold
dfs (collect_nonterms key1
)
272 in dfs((nt
,nil
),NTL
.empty
)
276 if not DEBUG
then look_info
278 (print
"look_info of "; prNonterm nt
; print
"=\n";
279 let val info
= look_info nt
280 in (NTL
.app (fn (nt
,lookahead
) =>
281 (prNonterm nt
; print
": "; prLook lookahead
;
286 (* prop_look
: propagate lookaheads for nonterms added
in the closure
of a
287 nonterm
. Lookaheads must be propagated from each nonterminal m to
288 all nonterminals
{ n | m
=c
+=> nx
, where x
=*=>epsilon
} *)
290 val prop_look
= fn ntl
=>
291 let val upd_lookhd
= fn new_look
=> fn (nt
,r
) =>
292 case NTL
.find ((nt
,new_look
),r
)
293 of SOME (_
,old_look
) =>
294 NTL
.insert((nt
, Look
.union(new_look
,old_look
)),r
)
295 | NONE
=> raise (Lalr
241)
296 val upd_nonterm
= fn ((nt
,look
),r
) =>
297 NontermSet
.fold (upd_lookhd look
)
298 (nonterms_w_null nt
) r
299 in NTL
.fold upd_nonterm ntl ntl
303 if not DEBUG
then prop_look
305 (print
"prop_look =\n";
306 let val info
= prop_look ntl
307 in (NTL
.app (fn (nt
,lookahead
) =>
311 print
"\n\n")) info
; info
)
314 (* now put the information from these functions together
. Create a function
315 which takes a nonterminal n
and returns a list
of triplets
of
316 (a nonterm added through closure
,
317 the lookahead for the nonterm
,
318 whether the nonterm should
include the lookahead for the nonterminal
319 whose closure is being
taken (i
.e
. first(y
) for an item j
of the
320 form A
-> x
.n y
and lookahead(j
) if y
=*=> epsilon
)
323 val closure_nonterms
=
325 Array
.array(nonterms
,nil
: (nonterm
* term list
* bool) list
)
326 val do_nonterm
= fn i
=>
327 let val nonterms_followed_by_null
=
329 val nonterms_added_through_closure
=
330 NTL
.make_list (prop_look (look_info i
))
333 (nt
,l
,NontermSet
.exists (nt
,nonterms_followed_by_null
))
334 ) nonterms_added_through_closure
336 (print
"closure_nonterms = ";
339 app (fn (nt
,look
,nullable
) =>
344 of false => print
"(false)\n"
345 |
true => print
"(true)\n")) result
;
351 if i
=nonterms
then ()
352 else (Array
.update(data
,i
,do_nonterm (NT i
)); f (i
+1))
354 in fn (NT i
) => data sub i
357 (* add_nonterm_lookahead
: Add lookahead to all completion items for rules added
358 when the closure
of a given nonterm
in some state is taken
. It returns
359 a list
of lookahead refs to which the given nonterm
's lookahead should
360 be propagated
. For each rule
, it must trace the shift
/gotos
in the
LR(0)
361 graph to find all items
of the form A
-> x
.B y
where y
=*=> epsilon or
365 val add_nonterm_lookahead
= fn (nt
,state
) =>
366 let val f
= fn ((nt
,lookahead
,nullable
),r
) =>
367 let val refs
= map (findRuleRefs state
) (produces nt
)
368 val refs
= List.concat refs
370 r
:= (Look
.union (!r
,lookahead
))) refs
371 in if nullable
then refs @ r
else r
373 in List.foldr f
[] (closure_nonterms nt
)
376 (* scan_core
: Scan a core for all items
of the form A
-> x
.B y
. Applies
377 add_nonterm_lookahead to each such B
, and then merges
first(y
) into
378 the list
of refs returned by add_nonterm_lookahead
. It returns
379 a list
of ref
* ref list for all the items
where y
=*=> epsilon
*)
381 val scan_core
= fn (CORE (l
,state
)) =>
382 let fun f ((item
as ITEM
{rhsAfter
= NONTERM b
:: y
,
384 (case (add_nonterm_lookahead(b
,state
))
387 let val first_y
= first y
388 val newr
= if dot
>= (look_pos rule
)
389 then (findRef(state
,item
),l
)::r
392 r
:= Look
.union(!r
,first_y
)) l
;
395 |
f (_
:: t
,r
) = f (t
,r
)
400 (* add
end-of-parse symbols to set
of items consisting
of all items
401 immediately derived from the start symbol
*)
403 val add_eop
= fn (c
as CORE (l
,state
),eop
) =>
404 let fun f (item
as ITEM
{rule
,dot
,...}) =
405 let val refs
= findRuleRefs state rule
408 (* first take care
of kernal items
. Add the
end-of-parse symbols to
409 the lookahead sets for these items
. Epsilon productions
of the
410 start symbol
do not need to be handled specially because they will
411 be
in the kernal also
*)
413 app (fn r
=> r
:= Look
.union(!r
,eop
)) refs
;
415 (* now take care
of closure items
. These are all nonterminals C which
416 have a derivation S
=+=> .C x
, where x is nullable
*)
418 if dot
>= (look_pos rule
) then
420 of ITEM
{rhsAfter
=NONTERM b
:: _
,...} =>
421 (case add_nonterm_lookahead(b
,state
)
423 | l
=> app (fn r
=> r
:= Look
.union(!r
,eop
)) l
)
430 val iterate
= fn l
=>
431 let fun f
lookahead (nil
,done
) = done
432 | f
lookahead (h
::t
,done
) =
434 in h
:= Look
.union (old
,lookahead
);
435 if (length (!h
)) <> (length old
)
436 then f
lookahead (t
,false)
437 else f
lookahead(t
,done
)
439 fun g ((from
,to
)::rest
,done
) =
440 let val new_done
= f (!from
) (to
,done
)
443 |
g (nil
,done
) = done
445 | loop
false = loop (g (l
,true))
449 val lookahead
= List.concat (map
scan_core (nodes graph
))
451 (* used to scan the item list
of a TMPCORE
and remove the items not
454 val create_lcore_list
=
455 fn ((item
as ITEM
{rhsAfter
=nil
,...},ref l
),r
) =>
459 in add_eop(Graph
.core graph
0,eop
);
461 map (fn (TMPCORE (l
,state
)) =>
462 LCORE (List.foldr create_lcore_list
[] l
, state
)) new_nodes