+++ /dev/null
-(**************************************************************************)
-(* *)
-(* Menhir *)
-(* *)
-(* François Pottier, INRIA Rocquencourt *)
-(* Yann Régis-Gianas, PPS, Université Paris Diderot *)
-(* *)
-(* Copyright 2005-2008 Institut National de Recherche en Informatique *)
-(* et en Automatique. All rights reserved. This file is distributed *)
-(* under the terms of the Q Public License version 1.0, with the change *)
-(* described in file LICENSE. *)
-(* *)
-(**************************************************************************)
-
-open Grammar
-
-(* -------------------------------------------------------------------------- *)
-(* Our output channel. *)
-
-let out =
- lazy (open_out (Settings.base ^ ".conflicts"))
-
-(* -------------------------------------------------------------------------- *)
-(* Explaining shift actions. *)
-
-(* The existence of a shift action stems from the existence of a shift
- item in the LR(0) core that underlies the LR(1) state of interest.
- That is, lookahead sets are not relevant. The existence of a shift
- item in the LR(0) core is explained by finding a path from a start
- item to the shift item in the LR(0) nondeterministic automaton,
- such that the symbols read along this path form the (previously
- fixed) symbol string that leads to the conflict state in the LR(1)
- automaton. There may be several such paths: a shortest one is
- chosen. There may also be several shift items in the conflict
- state: an arbitrary one is chosen. I believe it would not be
- interesting to display traces for several shift items: they would
- be identical except in their last line (where the desired shift
- item actually appears). *)
-
-(* Symbolic execution of the nondeterministic LR(0) automaton. *)
-
-(* Configurations are pairs of an LR(0) item and an offset into
- the input string, which indicates how much has been read so
- far. *)
-
-type configuration0 =
- Item.t * int
-
-(* This function builds a derivation out of a (nonempty, reversed) sequence of
- configurations. The derivation is constructed from bottom to top, that is,
- beginning at the last configuration and moving back towards to the start
- configuration. *)
-
-let rec follow derivation offset' = function
- | [] ->
- assert (offset' = 0);
- derivation
- | (item, offset) :: configs ->
- let _, _, rhs, pos, _ = Item.def item in
- let derivation =
- if offset = offset' then
- (* This is an epsilon transition. Put a new root node on top of
- the existing derivation. *)
- Derivation.build pos rhs derivation None
- else
- (* This was a shift transition. Tack symbol in front of the
- forest. *)
- Derivation.prepend rhs.(pos) derivation
- in
- follow derivation offset configs
-
-(* Symbolic execution begins with a start item (corresponding
- to one of the automaton's entry nodes), a fixed string of
- input symbols, to be fully consumed, and a goal item. The
- objective is to find a path through the automaton that
- leads from the start configuration [(stop, 0)] to the goal
- configuration [(stop, n)], where [n] is the length of the
- input string. The automaton is explored via breadth-first
- search. A hash table is used to record which configurations
- have been visited and to build a spanning tree of shortest
- paths. *)
-
-exception Done
-
-let explain_shift_item
- (start : Item.t)
- (input : Symbol.t array)
- (stop : Item.t)
- : Derivation.t =
-
- let n =
- Array.length input
- in
-
- let table : (configuration0, configuration0 option) Hashtbl.t =
- Hashtbl.create 1023
- in
-
- let queue : configuration0 Queue.t =
- Queue.create()
- in
-
- let enqueue ancestor config =
- try
- let _ = Hashtbl.find table config in
- ()
- with Not_found ->
- Hashtbl.add table config ancestor;
- Queue.add config queue
- in
-
- enqueue None (start, 0);
- try
- Misc.qiter (function (item, offset) as config ->
-
- (* If the item we're looking at is the goal item and if
- we have read all of the input symbols, stop. *)
-
- if (Item.equal item stop) && (offset = n) then
- raise Done;
-
- (* Otherwise, explore the transitions out of this item. *)
-
- let prod, _, rhs, pos, length = Item.def item in
-
- (* Shift transition, followed only if the symbol matches
- the symbol found in the input string. *)
-
- if (pos < length)
- && (offset < n)
- && (Symbol.equal rhs.(pos) input.(offset)) then begin
- let config' = (Item.import (prod, pos+1), offset+1) in
- enqueue (Some config) config'
- end;
-
- (* Epsilon transitions. *)
-
- if pos < length then
- match rhs.(pos) with
- | Symbol.N nt ->
- Production.iternt nt (fun prod ->
- let config' = (Item.import (prod, 0), offset) in
- enqueue (Some config) config'
- )
- | Symbol.T _ ->
- ()
-
- ) queue;
- assert false
- with Done ->
-
- (* We have found a (shortest) path from the start configuration to
- the goal configuration. Turn it into an explicit derivation. *)
-
- let configs = Misc.materialize table (stop, n) in
- let _, _, rhs, pos, _ = Item.def stop in
- let derivation = Derivation.tail pos rhs in
- let derivation = follow derivation n configs in
- derivation
-
-(* -------------------------------------------------------------------------- *)
-(* Explaining reduce actions. *)
-
-(* The existence of a reduce action stems from the existence of a
- reduce item, whose lookahead set contains the token of interest, in
- the state of interest. Here, lookahead sets are relevant only
- insofar as they contain or do not contain the token of interest --
- in other words, lookahead sets can be abstracted by Boolean
- values. The existence of the reduce item is explained by finding a
- path from a start item to the reduce item in the LR(1)
- nondeterministic automaton, such that the symbols read along this
- path form the (previously fixed) symbol string that leads to the
- conflict state in the LR(1) automaton. There may be several such
- paths: a shortest one is chosen. *)
-
-(* Symbolic execution of the nondeterministic LR(1) automaton. *)
-
-(* Configurations are pairs of an LR(1) item and an offset into the
- input string, which indicates how much has been read so far. An
- LR(1) item is itself represented as the combination of an LR(0)
- item and a Boolean flag, telling whether the token of interest
- appears or does not appear in the lookahead set. *)
-
-type configuration1 =
- Item.t * bool * int
-
-(* This function builds a derivation out of a sequence of configurations. The
- end of the sequence is dealt with specially -- we want to explain how the
- lookahead symbol appears and is inherited. Once that is done, the rest
- (that is, the beginning) of the derivation is dealt with as above. *)
-
-let config1toconfig0 (item, _, offset) =
- (item, offset)
-
-let rec follow1 tok derivation offset' = function
- | [] ->
- assert (Terminal.equal tok Terminal.sharp);
- (* One could emit a comment saying that the lookahead token is
- initially [#]. That comment would have to be displayed above
- the derivation, though, and there is no support for that
- at the moment, so let's skip it. *)
- derivation
- | (item, _, offset) :: configs ->
- let _, _, rhs, pos, length = Item.def item in
- if offset = offset' then
-
- (* This is an epsilon transition. Attack a new line and add
- a comment that explains why the lookahead symbol is
- produced or inherited. *)
-
- let nullable, first = Analysis.nullable_first_rhs rhs (pos + 1) in
-
- if TerminalSet.mem tok first then
-
- (* The lookahead symbol is produced (and perhaps also inherited,
- but let's ignore that). *)
-
- let e = Analysis.explain_first_rhs tok rhs (pos + 1) in
- let comment =
- "lookahead token appears" ^ (if e = "" then "" else " because " ^ e)
- in
- let derivation =
- Derivation.build pos rhs derivation (Some comment)
- in
-
- (* Print the rest of the derivation without paying attention to
- the lookahead symbols. *)
-
- follow derivation offset (List.map config1toconfig0 configs)
-
- else begin
-
- (* The lookahead symbol is not produced, so it is definitely inherited. *)
-
- assert nullable;
-
- let comment =
- "lookahead token is inherited" ^
- (if pos + 1 < length then Printf.sprintf " because %scan vanish" (Symbol.printao (pos + 1) rhs) else "")
- in
- let derivation =
- Derivation.build pos rhs derivation (Some comment)
- in
-
- follow1 tok derivation offset configs
-
- end
-
- else
-
- (* This is a shift transition. Tack symbol in front of forest. *)
-
- let derivation =
- Derivation.prepend rhs.(pos) derivation
- in
-
- follow1 tok derivation offset configs
-
-(* Symbolic execution is performed in the same manner as above. *)
-
-let explain_reduce_item
- (tok : Terminal.t)
- (start : Item.t)
- (input : Symbol.t array)
- (stop : Item.t)
- : Derivation.t =
-
- let n =
- Array.length input
- in
-
- let table : (configuration1, configuration1 option) Hashtbl.t =
- Hashtbl.create 1023
- in
-
- let queue : configuration1 Queue.t =
- Queue.create()
- in
-
- let enqueue ancestor config =
- try
- let _ = Hashtbl.find table config in
- ()
- with Not_found ->
- Hashtbl.add table config ancestor;
- Queue.add config queue
- in
-
- (* If the lookahead token is #, then it initially appear in the lookahead
- set, otherwise it doesn't. *)
-
- enqueue None (start, Terminal.equal tok Terminal.sharp, 0);
- try
- Misc.qiter (function (item, lookahead, offset) as config ->
-
- (* If the item we're looking at is the goal item and if
- we have read all of the input symbols, stop. *)
-
- if (Item.equal item stop) && lookahead && (offset = n) then
- raise Done;
-
- (* Otherwise, explore the transitions out of this item. *)
-
- let prod, nt, rhs, pos, length = Item.def item in
-
- (* Shift transition, followed only if the symbol matches
- the symbol found in the input string. *)
-
- if (pos < length)
- && (offset < n)
- && (Symbol.equal rhs.(pos) input.(offset)) then begin
- let config' = (Item.import (prod, pos+1), lookahead, offset+1) in
- enqueue (Some config) config'
- end;
-
- (* Epsilon transitions. *)
-
- if pos < length then
- match rhs.(pos) with
- | Symbol.N nt ->
- let nullable, first = Analysis.nullable_first_rhs rhs (pos + 1) in
- let first : bool = TerminalSet.mem tok first in
- let lookahead' =
- if nullable then first || lookahead else first
- in
- Production.iternt nt (fun prod ->
- let config' = (Item.import (prod, 0), lookahead', offset) in
- enqueue (Some config) config'
- )
- | Symbol.T _ ->
- ()
-
- ) queue;
- assert false
- with Done ->
-
- (* We have found a (shortest) path from the start configuration to
- the goal configuration. Turn it into an explicit derivation. *)
-
- let configs = Misc.materialize table (stop, true, n) in
- let derivation = Derivation.empty in
- let derivation = follow1 tok derivation n configs in
- derivation
-
-(* -------------------------------------------------------------------------- *)
-(* Putting it all together. *)
-
-let () =
- if Settings.explain then begin
-
- Lr1.conflicts (fun toks node ->
-
- (* Construct a partial LR(1) automaton, looking for a conflict
- in a state that corresponds to this node. Because Pager's
- algorithm can merge two states as soon as one of them has a
- conflict, we can't be too specific about the conflict that we
- expect to find in the canonical automaton. So, we must supply
- a set of conflict tokens and accept any kind of conflict that
- involves one of them. *)
-
- (* TEMPORARY with the new compatibility criterion, we can be
- sure that every conflict token is indeed involved in a
- conflict. Exploit that? Avoid focusing on a single token? *)
-
- let module P = Lr1partial.Run (struct
- let tokens = toks
- let goal = node
- end) in
-
- let closure =
- Lr0.closure P.goal in
-
- (* Determine what kind of conflict was found. *)
-
- let shift, reduce = Item.Map.fold (fun item toks (shift, reduce) ->
- match Item.classify item with
- | Item.Shift (Symbol.T tok, _)
- when Terminal.equal tok P.token ->
- shift + 1, reduce
- | Item.Reduce prod
- when TerminalSet.mem P.token toks ->
- shift, reduce + 1
- | _ ->
- shift, reduce
- ) closure (0, 0) in
-
- let kind =
- if (shift > 0) && (reduce > 1) then
- "shift/reduce/reduce"
- else if (shift > 0) then
- "shift/reduce"
- else
- "reduce/reduce"
- in
-
- (* Explain how the conflict state is reached. *)
-
- let out = Lazy.force out in
-
- Printf.fprintf out "\n\
- ** Conflict (%s) in state %d.\n\
- ** Token%s involved: %s\n%s\
- ** This state is reached from %s after reading:\n\n%s\n"
- kind (Lr1.number node)
- (if TerminalSet.cardinal toks > 1 then "s" else "")
- (TerminalSet.print toks)
- (if TerminalSet.cardinal toks > 1 then
- Printf.sprintf "** The following explanations concentrate on token %s.\n" (Terminal.print P.token)
- else "")
- (Nonterminal.print false (Item.startnt P.source))
- (Symbol.printa P.path);
-
- (* Examine the items in that state, focusing on one particular
- token. Out of the shift items, we explain just one -- this
- seems enough. We explain each of the reduce items. *)
-
- (* First, build a mapping of items to derivations. *)
-
- let (_ : bool), derivations =
- Item.Map.fold (fun item toks (still_looking_for_shift_item, derivations) ->
- match Item.classify item with
-
- | Item.Shift (Symbol.T tok, _)
- when still_looking_for_shift_item && (Terminal.equal tok P.token) ->
-
- false,
- let derivation = explain_shift_item P.source P.path item in
- Item.Map.add item derivation derivations
-
- | Item.Reduce prod
- when TerminalSet.mem P.token toks ->
-
- still_looking_for_shift_item,
- let derivation = explain_reduce_item P.token P.source P.path item in
- Item.Map.add item derivation derivations
-
- | _ ->
-
- still_looking_for_shift_item,
- derivations
-
- ) closure (true, Item.Map.empty)
- in
-
- (* Factor out the common context among all derivations, so as to avoid
- repeating it. This helps prevent derivation trees from drifting too
- far away towards the right. It also helps produce sub-derivations
- that are quite compact. *)
-
- let context, derivations =
- Derivation.factor derivations
- in
-
- (* Display the common context. *)
-
- Printf.fprintf out
- "\n** The derivations that appear below have the following common factor:\
- \n** (The question mark symbol (?) represents the spot where the derivations begin to differ.)\n\n";
- Derivation.printc out context;
-
- (* Then, display the sub-derivations. *)
-
- Item.Map.iter (fun item derivation ->
-
- Printf.fprintf out
- "\n** In state %d, looking ahead at %s, "
- (Lr1.number node)
- (Terminal.print P.token);
-
- begin match Item.classify item with
- | Item.Shift _ ->
- Printf.fprintf out "shifting is permitted\n** because of the following sub-derivation:\n\n"
- | Item.Reduce prod ->
- Printf.fprintf out
- "reducing production\n** %s\n** is permitted because of the following sub-derivation:\n\n"
- (Production.print prod)
- end;
-
- Derivation.print out derivation
-
- ) derivations;
-
- flush out
-
- );
- Time.tick "Explaining conflicts"
-
- end
-
-(* ------------------------------------------------------------------------ *)
-(* Resolve the conflicts that remain in the automaton. *)
-
-let () =
- Lr1.default_conflict_resolution();
- Time.tick "Resolving remaining conflicts"
-