8e94d5b1004361f21bc340f68de3079f07bde181
[bpt/coccinelle.git] / bundles / menhirLib / menhir-20120123 / src / tableFormat.ml
1 (**************************************************************************)
2 (* *)
3 (* Menhir *)
4 (* *)
5 (* François Pottier, INRIA Rocquencourt *)
6 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
7 (* *)
8 (* Copyright 2005-2008 Institut National de Recherche en Informatique *)
9 (* et en Automatique. All rights reserved. This file is distributed *)
10 (* under the terms of the GNU Library General Public License, with the *)
11 (* special exception on linking described in file LICENSE. *)
12 (* *)
13 (**************************************************************************)
14
15 (* This signature defines the format of the parse tables. It is used as
16 an argument to [TableInterpreter]. *)
17
18 module type TABLES = sig
19
20 (* This is the parser's type of tokens. *)
21
22 type token
23
24 (* This maps a token to its internal (generation-time) integer code. *)
25
26 val token2terminal: token -> int
27
28 (* This is the integer code for the error pseudo-token. *)
29
30 val error_terminal: int
31
32 (* This maps a token to its semantic value. *)
33
34 val token2value: token -> Obj.t
35
36 (* Traditionally, an LR automaton is described by two tables, namely, an
37 action table and a goto table. See, for instance, the Dragon book.
38
39 The action table is a two-dimensional matrix that maps a state and a
40 lookahead token to an action. An action is one of: shift to a certain
41 state, reduce a certain production, accept, or fail.
42
43 The goto table is a two-dimensional matrix that maps a state and a
44 non-terminal symbol to either a state or undefined. By construction, this
45 table is sparse: its undefined entries are never looked up. A compression
46 technique is free to overlap them with other entries.
47
48 In Menhir, things are slightly different. If a state has a default
49 reduction on token [#], then that reduction must be performed without
50 consulting the lookahead token. As a result, we must first determine
51 whether that is the case, before we can obtain a lookahead token and use it
52 as an index in the action table.
53
54 Thus, Menhir's tables are as follows.
55
56 A one-dimensional default reduction table maps a state to either ``no
57 default reduction'' (encoded as: 0) or ``by default, reduce prod''
58 (encoded as: 1 + prod). The action table is looked up only when there
59 is no default reduction. *)
60
61 val default_reduction: PackedIntArray.t
62
63 (* Menhir follows Dencker, Dürre and Heuft, who point out that, although the
64 action table is not sparse by nature (i.e., the error entries are
65 significant), it can be made sparse by first factoring out a binary error
66 matrix, then replacing the error entries in the action table with undefined
67 entries. Thus:
68
69 A two-dimensional error bitmap maps a state and a terminal to either
70 ``fail'' (encoded as: 0) or ``do not fail'' (encoded as: 1). The action
71 table, which is now sparse, is looked up only in the latter case. *)
72
73 (* The error bitmap is flattened into a one-dimensional table; its width is
74 recorded so as to allow indexing. The table is then compressed via
75 [PackedIntArray]. The bit width of the resulting packed array must be
76 [1], so it is not explicitly recorded. *)
77
78 (* The error bitmap does not contain a column for the [#] pseudo-terminal.
79 Thus, its width is [Terminal.n - 1]. We exploit the fact that the integer
80 code assigned to [#] is greatest: the fact that the right-most column
81 in the bitmap is missing does not affect the code for accessing it. *)
82
83 val error: int (* width of the bitmap *) * string (* second component of [PackedIntArray.t] *)
84
85 (* A two-dimensional action table maps a state and a terminal to one of
86 ``shift to state s and discard the current token'' (encoded as: s | 10),
87 ``shift to state s without discarding the current token'' (encoded as: s |
88 11), or ``reduce prod'' (encoded as: prod | 01). *)
89
90 (* The action table is first compressed via [RowDisplacement], then packed
91 via [PackedIntArray]. *)
92
93 (* Like the error bitmap, the action table does not contain a column for the
94 [#] pseudo-terminal. *)
95
96 val action: PackedIntArray.t * PackedIntArray.t
97
98 (* A one-dimensional lhs table maps a production to its left-hand side (a
99 non-terminal symbol). *)
100
101 val lhs: PackedIntArray.t
102
103 (* A two-dimensional goto table maps a state and a non-terminal symbol to
104 either undefined (encoded as: 0) or a new state s (encoded as: 1 + s). *)
105
106 (* The goto table is first compressed via [RowDisplacement], then packed
107 via [PackedIntArray]. *)
108
109 val goto: PackedIntArray.t * PackedIntArray.t
110
111 (* A one-dimensional semantic action table maps productions to semantic
112 actions. The calling convention for semantic actions is described in
113 [EngineTypes]. *)
114
115 val semantic_action: ((int, Obj.t, token) EngineTypes.env -> unit) array
116
117 (* The parser defines its own [Error] exception. This exception can be
118 raised by semantic actions and caught by the engine, and raised by the
119 engine towards the final user. *)
120
121 exception Error
122
123 (* The parser indicates whether to perform error recovery. *)
124
125 val recovery: bool
126
127 (* The parser indicates whether to generate a trace. Generating a
128 trace requires two extra tables, which respectively map a
129 terminal symbol and a production to a string. *)
130
131 val trace: (string array * string array) option
132
133 end
134