Coccinelle release 1.0.0-rc13
[bpt/coccinelle.git] / bundles / menhirLib / menhir-20120123 / src / engineTypes.ml
1 (**************************************************************************)
2 (* *)
3 (* Menhir *)
4 (* *)
5 (* François Pottier, INRIA Rocquencourt *)
6 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
7 (* *)
8 (* Copyright 2005-2008 Institut National de Recherche en Informatique *)
9 (* et en Automatique. All rights reserved. This file is distributed *)
10 (* under the terms of the GNU Library General Public License, with the *)
11 (* special exception on linking described in file LICENSE. *)
12 (* *)
13 (**************************************************************************)
14
15 (* This file defines several types and module types that are used in the
16 specification of module [Engine]. *)
17
18 (* --------------------------------------------------------------------------- *)
19
20 (* It would be nice if we could keep the structure of stacks and environments
21 hidden. However, stacks and environments must be accessible to semantic
22 actions, so the following data structure definitions must be public. *)
23
24 (* --------------------------------------------------------------------------- *)
25
26 (* A stack is a linked list of cells. A sentinel cell -- which is its own
27 successor -- is used to mark the bottom of the stack. The sentinel cell
28 itself is not significant -- it contains dummy values. *)
29
30 type ('state, 'semantic_value) stack = {
31
32 (* The state that we should go back to if we pop this stack cell. *)
33
34 (* This convention means that the state contained in the top stack cell is
35 not the current state [env.current]. It also means that the state found
36 within the sentinel is a dummy -- it is never consulted. This convention
37 is the same as that adopted by the code-based back-end. *)
38
39 state: 'state;
40
41 (* The semantic value associated with the chunk of input that this cell
42 represents. *)
43
44 semv: 'semantic_value;
45
46 (* The start and end positions of the chunk of input that this cell
47 represents. *)
48
49 startp: Lexing.position;
50 endp: Lexing.position;
51
52 (* The next cell down in the stack. If this is a self-pointer, then this
53 cell is the sentinel, and the stack is conceptually empty. *)
54
55 next: ('state, 'semantic_value) stack;
56
57 }
58
59 (* --------------------------------------------------------------------------- *)
60
61 (* A parsing environment contains basically all of the automaton's state. *)
62
63 type ('state, 'semantic_value, 'token) env = {
64
65 (* The lexer. *)
66
67 lexer: Lexing.lexbuf -> 'token;
68
69 (* The lexing buffer. It is used as an argument to the lexer, and also
70 accessed directly when extracting positions. *)
71
72 lexbuf: Lexing.lexbuf;
73
74 (* The last token that was obtained from the lexer. *)
75
76 mutable token: 'token;
77
78 (* A count of how many tokens were shifted since the beginning, or since
79 the last [error] token was encountered. By convention, if [shifted]
80 is (-1), then the current lookahead token is [error]. *)
81
82 mutable shifted: int;
83
84 (* A copy of the value of [shifted] just before the most recent error
85 was detected. This value is not used by the automaton itself, but
86 is made accessible to semantic actions. *)
87
88 mutable previouserror: int;
89
90 (* The stack. In [CodeBackend], it is passed around on its own,
91 whereas, here, it is accessed via the environment. *)
92
93 mutable stack: ('state, 'semantic_value) stack;
94
95 (* The current state. In [CodeBackend], it is passed around on its
96 own, whereas, here, it is accessed via the environment. *)
97
98 mutable current: 'state;
99
100 }
101
102 (* --------------------------------------------------------------------------- *)
103
104 (* This signature describes the parameters that must be supplied to the LR
105 engine. *)
106
107 module type TABLE = sig
108
109 (* The type of automaton states. *)
110
111 type state
112
113 (* The type of tokens. These can be thought of as real tokens, that is,
114 tokens returned by the lexer. They carry a semantic value. This type
115 does not include the [error] pseudo-token. *)
116
117 type token
118
119 (* The type of terminal symbols. These can be thought of as integer codes.
120 They do not carry a semantic value. This type does include the [error]
121 pseudo-token. *)
122
123 type terminal
124
125 (* The type of semantic values. *)
126
127 type semantic_value
128
129 (* A token is conceptually a pair of a (non-[error]) terminal symbol and
130 a semantic value. The following two functions are the pair projections. *)
131
132 val token2terminal: token -> terminal
133 val token2value: token -> semantic_value
134
135 (* Even though the [error] pseudo-token is not a real token, it is a
136 terminal symbol. Furthermore, for regularity, it must have a semantic
137 value. *)
138
139 val error_terminal: terminal
140 val error_value: semantic_value
141
142 (* The type of productions. *)
143
144 type production
145
146 (* If a state [s] has a default reduction on production [prod], then, upon
147 entering [s], the automaton should reduce [prod] without consulting the
148 lookahead token. The following function allows determining which states
149 have default reductions. *)
150
151 (* Instead of returning a value of a sum type -- either [DefRed prod], or
152 [NoDefRed] -- it accepts two continuations, and invokes just one of
153 them. This mechanism allows avoiding a memory allocation. *)
154
155 val default_reduction:
156 state ->
157 ('env -> production -> 'answer) ->
158 ('env -> 'answer) ->
159 'env -> 'answer
160
161 (* An LR automaton can normally take three kinds of actions: shift, reduce,
162 or fail. (Acceptance is a particular case of reduction: it consists in
163 reducing a start production.) *)
164
165 (* There are two variants of the shift action. [shift/discard s] instructs
166 the automaton to discard the current token, request a new one from the
167 lexer, and move to state [s]. [shift/nodiscard s] instructs it to move to
168 state [s] without requesting a new token. This instruction should be used
169 when [s] has a default reduction on [#]. See [CodeBackend.gettoken] for
170 details. *)
171
172 (* This is the automaton's action table. It maps a pair of a state and a
173 terminal symbol to an action. *)
174
175 (* Instead of returning a value of a sum type -- one of shift/discard,
176 shift/nodiscard, reduce, or fail -- this function accepts three
177 continuations, and invokes just one them. This mechanism allows avoiding
178 a memory allocation. *)
179
180 (* In summary, the parameters to [action] are as follows:
181
182 - the first two parameters, a state and a terminal symbol, are used to
183 look up the action table;
184
185 - the next parameter is the semantic value associated with the above
186 terminal symbol; it is not used, only passed along to the shift
187 continuation, as explained below;
188
189 - the shift continuation expects an environment; a flag that tells
190 whether to discard the current token; the terminal symbol that
191 is being shifted; its semantic value; and the target state of
192 the transition;
193
194 - the reduce continuation expects an environment and a production;
195
196 - the fail continuation expects an environment;
197
198 - the last parameter is the environment; it is not used, only passed
199 along to the selected continuation. *)
200
201 val action:
202 state ->
203 terminal ->
204 semantic_value ->
205 ('env -> bool -> terminal -> semantic_value -> state -> 'answer) ->
206 ('env -> production -> 'answer) ->
207 ('env -> 'answer) ->
208 'env -> 'answer
209
210 (* This is the automaton's goto table. It maps a pair of a state and a
211 production to a new state.
212
213 This convention is slightly different from the textbook approach. The
214 goto table is usually indexed by a state and a non-terminal symbol. *)
215
216 val goto: state -> production -> state
217
218 (* By convention, a semantic action is responsible for:
219
220 1. fetching whatever semantic values and positions it needs off the stack;
221
222 2. popping an appropriate number of cells off the stack, as dictated
223 by the length of the right-hand side of the production; this involves
224 updating [env.stack];
225
226 3. computing a new semantic value, as well as new start and end positions;
227
228 4. pushing a new stack cell, which contains the three values
229 computed in step 3; this again involves updating [env.stack]
230 (only one update is necessary).
231
232 Point 1 is essentially forced upon us: if semantic values were fetched
233 off the stack by this interpreter, then the calling convention for
234 semantic actions would be variadic: not all semantic actions would have
235 the same number of arguments. The rest follows rather naturally. *)
236
237 (* If production [prod] is an accepting production, then the semantic action
238 is responsible for raising exception [Accept], instead of returning
239 normally. This convention allows us to not distinguish between regular
240 productions and accepting productions. All we have to do is catch that
241 exception at top level. *)
242
243 (* Semantic actions are allowed to raise [Error]. *)
244
245 exception Accept of semantic_value
246 exception Error
247
248 type semantic_action =
249 (state, semantic_value, token) env -> unit
250
251 val semantic_action: production -> semantic_action
252
253 (* The LR engine can attempt error recovery. This consists in discarding
254 tokens, just after an error has been successfully handled, until a token
255 that can be successfully handled is found. This mechanism is optional.
256 The following flag enables it. *)
257
258 val recovery: bool
259
260 (* The LR engine requires a number of hooks, which are used for logging. *)
261
262 (* The comments below indicate the conventional messages that correspond
263 to these hooks in the code-based back-end; see [CodeBackend]. *)
264
265 module Log : sig
266
267 (* State %d: *)
268
269 val state: state -> unit
270
271 (* Shifting (<terminal>) to state <state> *)
272
273 val shift: terminal -> state -> unit
274
275 (* Reducing a production should be logged either as a reduction
276 event (for regular productions) or as an acceptance event (for
277 start productions). *)
278
279 (* Reducing production <production> / Accepting *)
280
281 val reduce_or_accept: production -> unit
282
283 (* Lookahead token is now <terminal> (<pos>-<pos>) *)
284
285 val lookahead_token: Lexing.lexbuf -> terminal -> unit
286
287 (* Initiating error handling *)
288
289 val initiating_error_handling: unit -> unit
290
291 (* Resuming error handling *)
292
293 val resuming_error_handling: unit -> unit
294
295 (* Handling error in state <state> *)
296
297 val handling_error: state -> unit
298
299 (* Discarding last token read (<terminal>) *)
300
301 val discarding_last_token: terminal -> unit
302
303 end
304
305 end
306
307 (* --------------------------------------------------------------------------- *)
308
309 (* This signature describes the LR engine. *)
310
311 module type ENGINE = sig
312
313 type state
314
315 type token
316
317 type semantic_value
318
319 (* An entry point to the engine requires a start state, a lexer, and a lexing
320 buffer. It either succeeds and produces a semantic value, or fails and
321 raises [Error]. *)
322
323 exception Error
324
325 val entry:
326 state ->
327 (Lexing.lexbuf -> token) ->
328 Lexing.lexbuf ->
329 semantic_value
330
331 end