1 (**************************************************************************)
5 (* François Pottier, INRIA Rocquencourt *)
6 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
8 (* Copyright 2005-2008 Institut National de Recherche en Informatique *)
9 (* et en Automatique. All rights reserved. This file is distributed *)
10 (* under the terms of the GNU Library General Public License, with the *)
11 (* special exception on linking described in file LICENSE. *)
13 (**************************************************************************)
17 (* The LR parsing engine. *)
19 (* This module is used:
21 - at compile time, if so requested by the user, via the --interpret options;
22 - at run time, in the table-based back-end. *)
24 module Make
(T
: TABLE
) = struct
26 (* This propagates type and exception definitions. *)
33 (* --------------------------------------------------------------------------- *)
35 (* [discard] takes a token off the input stream, queries the lexer
36 for a new one, and stores it into [env.token], overwriting the
37 previous token. If [env.shifted] has not yet reached its limit,
41 let lexbuf = env
.lexbuf in
42 let token = env
.lexer
lexbuf in
44 Log.lookahead_token
lexbuf (T.token2terminal
token);
45 let shifted = env
.shifted + 1 in
47 env
.shifted <- shifted
49 (* --------------------------------------------------------------------------- *)
51 (* The type [void] is empty. Many of the functions below have return type
52 [void]. This guarantees that they never return a value. Instead, they
53 must stop by raising an exception: either [Accept] or [Error]. *)
57 (* --------------------------------------------------------------------------- *)
59 (* In the code-based back-end, the [run] function is sometimes responsible
60 for pushing a new cell on the stack. This is motivated by code sharing
61 concerns. In this interpreter, there is no such concern; [run]'s caller
62 is always responsible for updating the stack. *)
64 (* In the code-based back-end, there is a [run] function for each state
65 [s]. This function can behave in two slightly different ways, depending
66 on when it is invoked, or (equivalently) depending on [s].
68 If [run] is invoked after shifting a terminal symbol (or, equivalently,
69 if [s] has a terminal incoming symbol), then [run] discards a token,
70 unless [s] has a default reduction on [#]. (Indeed, in that case,
71 requesting the next token might drive the lexer off the end of the input
74 If, on the other hand, [run] is invoked after performing a goto transition,
75 or invoked directly by an entry point, then there is nothing to discard.
77 These two cases are reflected in [CodeBackend.gettoken].
79 Here, the code is structured in a slightly different way. It is up to
80 the caller of [run] to indicate whether to discard a token. *)
82 let rec run env please_discard
: void
=
84 (* Log the fact that we just entered this state. *)
86 let s = env
.current
in
89 (* If [please_discard] is set, discard a token and fetch the next one. *)
91 (* This flag is set when [s] is being entered by shifting a terminal
92 symbol and [s] does not have a default reduction on [#]. *)
94 if please_discard
then
97 (* Examine what situation we are in. This case analysis is analogous to
98 that performed in [CodeBackend.gettoken], in the sub-case where we do
99 not have a terminal incoming symbol. *)
103 reduce
(* there is a default reduction; perform it *)
104 continue
(* there is none; continue below *)
107 and continue env
: void
=
109 (* There is no default reduction. Consult the current lookahead token
110 so as to determine which action should be taken. *)
112 (* Peeking at the first input token, without taking it off the input
113 stream, is normally done by reading [env.token]. However, we check
114 [env.shifted] first: if it is -1, then the lookahead token is the
117 (* Note that, if we just called [discard] above, then the lookahead
118 token cannot be [error]. *)
120 if env
.shifted = (-1) then begin
121 Log.resuming_error_handling
();
127 (* --------------------------------------------------------------------------- *)
129 (* When [action] is invoked, we know that the current state does not have
130 a default reduction. We also know that the current lookahead token is
131 not [error]: it is a real token, stored in [env.token]. *)
133 and action env
: void
=
135 (* We consult the two-dimensional action table, indexed by the
136 current state and the current lookahead token, in order to
137 determine which action should be taken. *)
139 let token = env
.token in
141 env
.current
(* determines a row *)
142 (T.token2terminal
token) (* determines a column *)
143 (T.token2value
token)
144 shift
(* shift continuation *)
145 reduce
(* reduce continuation *)
146 initiate
(* failure continuation *)
149 (* --------------------------------------------------------------------------- *)
151 (* This function takes care of shift transitions along a terminal symbol.
152 (Goto transitions are taken care of within [reduce] below.) The symbol
153 can be either an actual token or the [error] pseudo-token. *)
156 (please_discard
: bool)
157 (terminal
: terminal
)
158 (value : semantic_value
)
162 (* Log the transition. *)
164 Log.shift terminal
s'
;
166 (* Push a new cell onto the stack, containing the identity of the
167 state that we are leaving. *)
169 let lexbuf = env
.lexbuf in
173 startp
= lexbuf.Lexing.lex_start_p
;
174 endp
= lexbuf.Lexing.lex_curr_p
;
178 (* Switch to state [s']. *)
181 run env please_discard
183 (* --------------------------------------------------------------------------- *)
185 (* This function takes care of reductions. *)
187 and reduce env
(prod
: production
) : void
=
189 (* Log a reduction event. *)
191 Log.reduce_or_accept prod
;
193 (* Invoke the semantic action. The semantic action is responsible for
194 truncating the stack, updating the current state, producing a cell that
195 contains a new semantic value, and raising [Accept] or [Error] if
198 (* If the semantic action raises [Error], we catch it immediately and
199 initiate error handling. *)
201 (* The apparently weird idiom used here is an encoding for a
202 [let/unless] construct, which does not exist in ocaml. *)
206 T.semantic_action prod env
;
212 (* By our convention, the semantic action is responsible for updating
213 the stack. The state now found in the top stack cell is the return
216 (* Perform a goto transition. The target state is determined
217 by consulting the goto table at the return state and at
218 production [prod]. *)
220 env
.current
<- T.goto env
.stack
.state prod
;
228 (* --------------------------------------------------------------------------- *)
230 (* The following functions deal with errors. *)
232 (* [initiate] and [errorbookkeeping] initiate error handling. See the functions
233 by the same names in [CodeBackend]. *)
235 and initiate env
: void
=
236 assert (env
.shifted >= 0);
237 if T.recovery
&& env
.shifted = 0 then begin
238 Log.discarding_last_token
(T.token2terminal env
.token);
246 and errorbookkeeping env
=
247 Log.initiating_error_handling
();
248 env
.previouserror
<- env
.shifted;
252 (* [error] handles errors. *)
254 and error env
: void
=
256 (* Consult the column associated with the [error] pseudo-token in the
260 env
.current
(* determines a row *)
261 T.error_terminal
(* determines a column *)
263 error_shift
(* shift continuation *)
264 error_reduce
(* reduce continuation *)
265 error_fail
(* failure continuation *)
268 and error_shift env please_discard terminal
value s'
=
270 (* Here, [terminal] is [T.error_terminal], and [value] is [T.error_value]. *)
272 assert (terminal
= T.error_terminal
&& value = T.error_value
);
274 (* This state is capable of shifting the [error] token. *)
276 Log.handling_error env
.current
;
277 shift env please_discard terminal
value s'
279 and error_reduce env prod
=
281 (* This state is capable of performing a reduction on [error]. *)
283 Log.handling_error env
.current
;
288 (* This state is unable to handle errors. Attempt to pop a stack
291 let cell = env
.stack
in
292 let next = cell.next in
295 (* The stack is empty. Die. *)
301 (* The stack is nonempty. Pop a cell, updating the current state
302 with that found in the popped cell, and try again. *)
305 env
.current
<- cell.state
;
310 (* --------------------------------------------------------------------------- *)
314 (lexer
: Lexing.lexbuf -> token)
315 (lexbuf : Lexing.lexbuf)
318 (* Build an empty stack. This is a dummy cell, which is its own
319 successor. Its fields other than [next] contain dummy values. *)
322 state
= s; (* dummy *)
323 semv
= T.error_value
; (* dummy *)
324 startp
= lexbuf.Lexing.lex_start_p
; (* dummy *)
325 endp
= lexbuf.Lexing.lex_curr_p
; (* dummy *)
329 (* Perform an initial call to the lexer. *)
335 (* Log our first lookahead token. *)
337 Log.lookahead_token
lexbuf (T.token2terminal
token);
339 (* Build an initial environment. *)
346 previouserror
= max_int
;
351 (* Run. Catch [Accept], which represents normal termination. Let [Error]
356 (* If ocaml offered a [match/with] construct with zero branches, this is
357 what we would use here, since the type [void] has zero cases. *)
359 let (_
: void
) = run env false in
360 assert false (* cannot fail *)