Commit | Line | Data |
---|---|---|
bb051423 CY |
1 | ;;; semantic/wisent/comp.el --- GNU Bison for Emacs - Grammar compiler |
2 | ||
73b0cd50 | 3 | ;; Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000-2007, 2009-2011 |
e9bffc61 | 4 | ;; Free Software Foundation, Inc. |
bb051423 CY |
5 | |
6 | ;; Author: David Ponce <david@dponce.com> | |
7 | ;; Maintainer: David Ponce <david@dponce.com> | |
8 | ;; Created: 30 January 2002 | |
9 | ;; Keywords: syntax | |
10 | ||
11 | ;; This file is part of GNU Emacs. | |
12 | ||
13 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
14 | ;; it under the terms of the GNU General Public License as published by | |
15 | ;; the Free Software Foundation, either version 3 of the License, or | |
16 | ;; (at your option) any later version. | |
17 | ||
18 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | ;; GNU General Public License for more details. | |
22 | ||
23 | ;; You should have received a copy of the GNU General Public License | |
24 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
25 | ||
26 | ;;; Commentary: | |
27 | ;; | |
28 | ;; Grammar compiler that produces Wisent's LALR automatons. | |
29 | ;; | |
30 | ;; Wisent (the European Bison ;-) is an Elisp implementation of the | |
31 | ;; GNU Compiler Compiler Bison. The Elisp code is a port of the C | |
32 | ;; code of GNU Bison 1.28 & 1.31. | |
33 | ;; | |
34 | ;; For more details on the basic concepts for understanding Wisent, | |
35 | ;; read the Bison manual ;) | |
36 | ;; | |
37 | ;; For more details on Wisent itself read the Wisent manual. | |
38 | ||
39 | ;;; History: | |
40 | ;; | |
41 | ||
42 | ;;; Code: | |
43 | (require 'semantic/wisent) | |
44 | \f | |
45 | ;;;; ------------------- | |
46 | ;;;; Misc. useful things | |
47 | ;;;; ------------------- | |
48 | ||
49 | ;; As much as possible I would like to keep the name of global | |
50 | ;; variables used in Bison without polluting too much the Elisp global | |
51 | ;; name space. Elisp dynamic binding allows that ;-) | |
52 | ||
53 | ;; Here are simple macros to easily define and use set of variables | |
54 | ;; binded locally, without all these "reference to free variable" | |
55 | ;; compiler warnings! | |
56 | ||
57 | (defmacro wisent-context-name (name) | |
58 | "Return the context name from NAME." | |
59 | `(if (and ,name (symbolp ,name)) | |
60 | (intern (format "wisent-context-%s" ,name)) | |
61 | (error "Invalid context name: %S" ,name))) | |
62 | ||
63 | (defmacro wisent-context-bindings (name) | |
64 | "Return the variables in context NAME." | |
65 | `(symbol-value (wisent-context-name ,name))) | |
66 | ||
67 | (defmacro wisent-defcontext (name &rest vars) | |
68 | "Define a context NAME that will bind variables VARS." | |
69 | (let* ((context (wisent-context-name name)) | |
70 | (bindings (mapcar #'(lambda (v) (list 'defvar v)) vars))) | |
71 | `(eval-when-compile | |
72 | ,@bindings | |
73 | (defvar ,context ',vars)))) | |
74 | (put 'wisent-defcontext 'lisp-indent-function 1) | |
75 | ||
76 | (defmacro wisent-with-context (name &rest body) | |
77 | "Bind variables in context NAME then eval BODY." | |
78 | `(let* ,(wisent-context-bindings name) | |
79 | ,@body)) | |
80 | (put 'wisent-with-context 'lisp-indent-function 1) | |
81 | ||
82 | ;; A naive implementation of data structures! But it suffice here ;-) | |
83 | ||
84 | (defmacro wisent-struct (name &rest fields) | |
85 | "Define a simple data structure called NAME. | |
86 | Which contains data stored in FIELDS. FIELDS is a list of symbols | |
87 | which are field names or pairs (FIELD INITIAL-VALUE) where | |
88 | INITIAL-VALUE is a constant used as the initial value of FIELD when | |
89 | the data structure is created. INITIAL-VALUE defaults to nil. | |
90 | ||
91 | This defines a `make-NAME' constructor, get-able `NAME-FIELD' and | |
92 | set-able `set-NAME-FIELD' accessors." | |
93 | (let ((size (length fields)) | |
94 | (i 0) | |
95 | accors field sufx fun ivals) | |
96 | (while (< i size) | |
97 | (setq field (car fields) | |
98 | fields (cdr fields)) | |
99 | (if (consp field) | |
100 | (setq ivals (cons (cadr field) ivals) | |
101 | field (car field)) | |
102 | (setq ivals (cons nil ivals))) | |
103 | (setq sufx (format "%s-%s" name field) | |
104 | fun (intern (format "%s" sufx)) | |
105 | accors (cons `(defmacro ,fun (s) | |
106 | (list 'aref s ,i)) | |
107 | accors) | |
108 | fun (intern (format "set-%s" sufx)) | |
109 | accors (cons `(defmacro ,fun (s v) | |
110 | (list 'aset s ,i v)) | |
111 | accors) | |
112 | i (1+ i))) | |
113 | `(progn | |
114 | (defmacro ,(intern (format "make-%s" name)) () | |
115 | (cons 'vector ',(nreverse ivals))) | |
116 | ,@accors))) | |
117 | (put 'wisent-struct 'lisp-indent-function 1) | |
118 | ||
119 | ;; Other utilities | |
120 | ||
121 | (defsubst wisent-pad-string (s n &optional left) | |
122 | "Fill string S with spaces. | |
123 | Return a new string of at least N characters. Insert spaces on right. | |
124 | If optional LEFT is non-nil insert spaces on left." | |
125 | (let ((i (length s))) | |
126 | (if (< i n) | |
127 | (if left | |
128 | (concat (make-string (- n i) ?\ ) s) | |
129 | (concat s (make-string (- n i) ?\ ))) | |
130 | s))) | |
131 | \f | |
132 | ;;;; ------------------------ | |
133 | ;;;; Environment dependencies | |
134 | ;;;; ------------------------ | |
135 | ||
136 | (defconst wisent-BITS-PER-WORD | |
137 | (let ((i 1)) | |
138 | (while (not (zerop (lsh 1 i))) | |
139 | (setq i (1+ i))) | |
140 | i)) | |
141 | ||
142 | (defsubst wisent-WORDSIZE (n) | |
143 | "(N + BITS-PER-WORD - 1) / BITS-PER-WORD." | |
144 | (/ (1- (+ n wisent-BITS-PER-WORD)) wisent-BITS-PER-WORD)) | |
145 | ||
146 | (defsubst wisent-SETBIT (x i) | |
147 | "X[I/BITS-PER-WORD] |= 1 << (I % BITS-PER-WORD)." | |
148 | (let ((k (/ i wisent-BITS-PER-WORD))) | |
149 | (aset x k (logior (aref x k) | |
150 | (lsh 1 (% i wisent-BITS-PER-WORD)))))) | |
151 | ||
152 | (defsubst wisent-RESETBIT (x i) | |
153 | "X[I/BITS-PER-WORD] &= ~(1 << (I % BITS-PER-WORD))." | |
154 | (let ((k (/ i wisent-BITS-PER-WORD))) | |
155 | (aset x k (logand (aref x k) | |
156 | (lognot (lsh 1 (% i wisent-BITS-PER-WORD))))))) | |
157 | ||
158 | (defsubst wisent-BITISSET (x i) | |
159 | "(X[I/BITS-PER-WORD] & (1 << (I % BITS-PER-WORD))) != 0." | |
160 | (not (zerop (logand (aref x (/ i wisent-BITS-PER-WORD)) | |
161 | (lsh 1 (% i wisent-BITS-PER-WORD)))))) | |
162 | ||
bb051423 CY |
163 | (defsubst wisent-noninteractive () |
164 | "Return non-nil if running without interactive terminal." | |
165 | (if (featurep 'xemacs) | |
166 | (noninteractive) | |
167 | noninteractive)) | |
168 | ||
169 | (defvar wisent-debug-flag nil | |
170 | "Non-nil means enable some debug stuff.") | |
171 | \f | |
172 | ;;;; -------------- | |
173 | ;;;; Logging/Output | |
174 | ;;;; -------------- | |
175 | (defconst wisent-log-buffer-name "*wisent-log*" | |
176 | "Name of the log buffer.") | |
177 | ||
178 | (defvar wisent-new-log-flag nil | |
179 | "Non-nil means to start a new report.") | |
180 | ||
181 | (defvar wisent-verbose-flag nil | |
182 | "*Non-nil means to report verbose information on generated parser.") | |
183 | ||
184 | (defun wisent-toggle-verbose-flag () | |
185 | "Toggle whether to report verbose information on generated parser." | |
186 | (interactive) | |
187 | (setq wisent-verbose-flag (not wisent-verbose-flag)) | |
2054a44c | 188 | (when (called-interactively-p 'interactive) |
bb051423 CY |
189 | (message "Verbose report %sabled" |
190 | (if wisent-verbose-flag "en" "dis")))) | |
191 | ||
192 | (defmacro wisent-log-buffer () | |
193 | "Return the log buffer. | |
194 | Its name is defined in constant `wisent-log-buffer-name'." | |
195 | `(get-buffer-create wisent-log-buffer-name)) | |
196 | ||
197 | (defmacro wisent-clear-log () | |
198 | "Delete the entire contents of the log buffer." | |
199 | `(with-current-buffer (wisent-log-buffer) | |
200 | (erase-buffer))) | |
201 | ||
df26e1f5 | 202 | (defvar byte-compile-current-file) |
bb051423 CY |
203 | |
204 | (defun wisent-source () | |
205 | "Return the current source file name or nil." | |
206 | (let ((source (or (and (boundp 'byte-compile-current-file) | |
207 | byte-compile-current-file) | |
208 | load-file-name (buffer-file-name)))) | |
209 | (if source | |
210 | (file-relative-name source)))) | |
211 | ||
212 | (defun wisent-new-log () | |
213 | "Start a new entry into the log buffer." | |
214 | (setq wisent-new-log-flag nil) | |
215 | (let ((text (format "\n\n*** Wisent %s - %s\n\n" | |
216 | (or (wisent-source) (buffer-name)) | |
217 | (format-time-string "%Y-%m-%d %R")))) | |
218 | (with-current-buffer (wisent-log-buffer) | |
219 | (goto-char (point-max)) | |
220 | (insert text)))) | |
221 | ||
222 | (defsubst wisent-log (&rest args) | |
223 | "Insert text into the log buffer. | |
224 | `format' is applied to ARGS and the result string is inserted into the | |
225 | log buffer returned by the function `wisent-log-buffer'." | |
226 | (and wisent-new-log-flag (wisent-new-log)) | |
227 | (with-current-buffer (wisent-log-buffer) | |
228 | (insert (apply 'format args)))) | |
229 | ||
230 | (defconst wisent-log-file "wisent.output" | |
231 | "The log file. | |
232 | Used when running without interactive terminal.") | |
233 | ||
234 | (defun wisent-append-to-log-file () | |
235 | "Append contents of logging buffer to `wisent-log-file'." | |
236 | (if (get-buffer wisent-log-buffer-name) | |
237 | (condition-case err | |
238 | (with-current-buffer (wisent-log-buffer) | |
239 | (widen) | |
240 | (if (> (point-max) (point-min)) | |
241 | (write-region (point-min) (point-max) | |
242 | wisent-log-file t))) | |
243 | (error | |
244 | (message "*** %s" (error-message-string err)))))) | |
245 | \f | |
246 | ;;;; ----------------------------------- | |
247 | ;;;; Representation of the grammar rules | |
248 | ;;;; ----------------------------------- | |
249 | ||
250 | ;; ntokens is the number of tokens, and nvars is the number of | |
251 | ;; variables (nonterminals). nsyms is the total number, ntokens + | |
252 | ;; nvars. | |
253 | ||
254 | ;; Each symbol (either token or variable) receives a symbol number. | |
255 | ;; Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are | |
256 | ;; for variables. Symbol number zero is the end-of-input token. This | |
257 | ;; token is counted in ntokens. | |
258 | ||
259 | ;; The rules receive rule numbers 1 to nrules in the order they are | |
260 | ;; written. Actions and guards are accessed via the rule number. | |
261 | ||
262 | ;; The rules themselves are described by three arrays: rrhs, rlhs and | |
263 | ;; ritem. rlhs[R] is the symbol number of the left hand side of rule | |
264 | ;; R. The right hand side is stored as symbol numbers in a portion of | |
265 | ;; ritem. rrhs[R] contains the index in ritem of the beginning of the | |
266 | ;; portion for rule R. | |
267 | ||
268 | ;; The length of the portion is one greater than the number of symbols | |
269 | ;; in the rule's right hand side. The last element in the portion | |
270 | ;; contains minus R, which identifies it as the end of a portion and | |
271 | ;; says which rule it is for. | |
272 | ||
273 | ;; The portions of ritem come in order of increasing rule number and | |
274 | ;; are followed by an element which is nil to mark the end. nitems is | |
275 | ;; the total length of ritem, not counting the final nil. Each | |
276 | ;; element of ritem is called an "item" and its index in ritem is an | |
277 | ;; item number. | |
278 | ||
279 | ;; Item numbers are used in the finite state machine to represent | |
280 | ;; places that parsing can get to. | |
281 | ||
282 | ;; The vector rprec contains for each rule, the item number of the | |
283 | ;; symbol giving its precedence level to this rule. The precedence | |
284 | ;; level and associativity of each symbol is recorded in respectively | |
285 | ;; the properties 'wisent--prec and 'wisent--assoc. | |
286 | ||
287 | ;; Precedence levels are assigned in increasing order starting with 1 | |
288 | ;; so that numerically higher precedence values mean tighter binding | |
289 | ;; as they ought to. nil as a symbol or rule's precedence means none | |
290 | ;; is assigned. | |
291 | ||
292 | (defcustom wisent-state-table-size 1009 | |
293 | "The size of the state table." | |
294 | :type 'integer | |
295 | :group 'wisent) | |
296 | ||
297 | ;; These variables only exist locally in the function | |
298 | ;; `wisent-compile-grammar' and are shared by all other nested | |
299 | ;; callees. | |
300 | (wisent-defcontext compile-grammar | |
301 | F LA LAruleno accessing-symbol conflicts consistent default-prec | |
302 | derives err-table fderives final-state first-reduction first-shift | |
303 | first-state firsts from-state goto-map includes itemset nitemset | |
304 | kernel-base kernel-end kernel-items last-reduction last-shift | |
305 | last-state lookaheads lookaheadset lookback maxrhs ngotos nitems | |
306 | nrules nshifts nstates nsyms ntokens nullable nvars rassoc redset | |
307 | reduction-table ritem rlhs rprec rrc-count rrc-total rrhs ruseful | |
308 | rcode ruleset rulesetsize shift-symbol shift-table shiftset | |
309 | src-count src-total start-table state-table tags this-state to-state | |
310 | tokensetsize ;; nb of words req. to hold a bit for each rule | |
311 | varsetsize ;; nb of words req. to hold a bit for each variable | |
312 | error-token-number start-symbol token-list var-list | |
313 | N P V V1 nuseless-nonterminals nuseless-productions | |
314 | ptable ;; symbols & characters properties | |
315 | ) | |
316 | ||
317 | (defmacro wisent-ISTOKEN (s) | |
318 | "Return non-nil if item number S defines a token (terminal). | |
319 | That is if S < `ntokens'." | |
320 | `(< ,s ntokens)) | |
321 | ||
322 | (defmacro wisent-ISVAR(s) | |
323 | "Return non-nil if item number S defines a nonterminal. | |
324 | That is if S >= `ntokens'." | |
325 | `(>= ,s ntokens)) | |
326 | ||
327 | (defsubst wisent-tag (s) | |
328 | "Return printable form of item number S." | |
329 | (wisent-item-to-string (aref tags s))) | |
330 | ||
331 | ;; Symbol and character properties | |
332 | ||
333 | (defsubst wisent-put (object propname value) | |
334 | "Store OBJECT's PROPNAME property with value VALUE. | |
335 | Use `eq' to locate OBJECT." | |
336 | (let ((entry (assq object ptable))) | |
337 | (or entry (setq entry (list object) ptable (cons entry ptable))) | |
338 | (setcdr entry (plist-put (cdr entry) propname value)))) | |
339 | ||
340 | (defsubst wisent-get (object propname) | |
341 | "Return the value of OBJECT's PROPNAME property. | |
342 | Use `eq' to locate OBJECT." | |
343 | (plist-get (cdr (assq object ptable)) propname)) | |
344 | ||
345 | (defsubst wisent-item-number (x) | |
346 | "Return the item number of symbol X." | |
347 | (wisent-get x 'wisent--item-no)) | |
348 | ||
349 | (defsubst wisent-set-item-number (x n) | |
350 | "Set the item number of symbol X to N." | |
351 | (wisent-put x 'wisent--item-no n)) | |
352 | ||
353 | (defsubst wisent-assoc (x) | |
354 | "Return the associativity of symbol X." | |
355 | (wisent-get x 'wisent--assoc)) | |
356 | ||
357 | (defsubst wisent-set-assoc (x a) | |
358 | "Set the associativity of symbol X to A." | |
359 | (wisent-put x 'wisent--assoc a)) | |
360 | ||
361 | (defsubst wisent-prec (x) | |
362 | "Return the precedence level of symbol X." | |
363 | (wisent-get x 'wisent--prec)) | |
364 | ||
365 | (defsubst wisent-set-prec (x p) | |
366 | "Set the precedence level of symbol X to P." | |
367 | (wisent-put x 'wisent--prec p)) | |
368 | \f | |
369 | ;;;; ---------------------------------------------------------- | |
370 | ;;;; Type definitions for nondeterministic finite state machine | |
371 | ;;;; ---------------------------------------------------------- | |
372 | ||
373 | ;; These type definitions are used to represent a nondeterministic | |
374 | ;; finite state machine that parses the specified grammar. This | |
375 | ;; information is generated by the function `wisent-generate-states'. | |
376 | ||
377 | ;; Each state of the machine is described by a set of items -- | |
378 | ;; particular positions in particular rules -- that are the possible | |
379 | ;; places where parsing could continue when the machine is in this | |
380 | ;; state. These symbols at these items are the allowable inputs that | |
381 | ;; can follow now. | |
382 | ||
383 | ;; A core represents one state. States are numbered in the number | |
384 | ;; field. When `wisent-generate-states' is finished, the starting | |
385 | ;; state is state 0 and `nstates' is the number of states. (A | |
386 | ;; transition to a state whose state number is `nstates' indicates | |
387 | ;; termination.) All the cores are chained together and `first-state' | |
388 | ;; points to the first one (state 0). | |
389 | ||
390 | ;; For each state there is a particular symbol which must have been | |
391 | ;; the last thing accepted to reach that state. It is the | |
392 | ;; accessing-symbol of the core. | |
393 | ||
394 | ;; Each core contains a vector of `nitems' items which are the indices | |
395 | ;; in the `ritems' vector of the items that are selected in this | |
396 | ;; state. | |
397 | ||
398 | ;; The link field is used for chaining buckets that hash states by | |
399 | ;; their itemsets. This is for recognizing equivalent states and | |
400 | ;; combining them when the states are generated. | |
401 | ||
402 | ;; The two types of transitions are shifts (push the lookahead token | |
403 | ;; and read another) and reductions (combine the last n things on the | |
404 | ;; stack via a rule, replace them with the symbol that the rule | |
405 | ;; derives, and leave the lookahead token alone). When the states are | |
406 | ;; generated, these transitions are represented in two other lists. | |
407 | ||
408 | ;; Each shifts structure describes the possible shift transitions out | |
409 | ;; of one state, the state whose number is in the number field. The | |
410 | ;; shifts structures are linked through next and first-shift points to | |
411 | ;; them. Each contains a vector of numbers of the states that shift | |
412 | ;; transitions can go to. The accessing-symbol fields of those | |
413 | ;; states' cores say what kind of input leads to them. | |
414 | ||
415 | ;; A shift to state zero should be ignored. Conflict resolution | |
416 | ;; deletes shifts by changing them to zero. | |
417 | ||
418 | ;; Each reductions structure describes the possible reductions at the | |
419 | ;; state whose number is in the number field. The data is a list of | |
420 | ;; nreds rules, represented by their rule numbers. `first-reduction' | |
421 | ;; points to the list of these structures. | |
422 | ||
423 | ;; Conflict resolution can decide that certain tokens in certain | |
424 | ;; states should explicitly be errors (for implementing %nonassoc). | |
425 | ;; For each state, the tokens that are errors for this reason are | |
426 | ;; recorded in an errs structure, which has the state number in its | |
427 | ;; number field. The rest of the errs structure is full of token | |
428 | ;; numbers. | |
429 | ||
430 | ;; There is at least one shift transition present in state zero. It | |
431 | ;; leads to a next-to-final state whose accessing-symbol is the | |
432 | ;; grammar's start symbol. The next-to-final state has one shift to | |
433 | ;; the final state, whose accessing-symbol is zero (end of input). | |
434 | ;; The final state has one shift, which goes to the termination state | |
435 | ;; (whose number is `nstates'-1). | |
436 | ;; The reason for the extra state at the end is to placate the | |
437 | ;; parser's strategy of making all decisions one token ahead of its | |
438 | ;; actions. | |
439 | ||
440 | (wisent-struct core | |
441 | next ; -> core | |
442 | link ; -> core | |
443 | (number 0) | |
444 | (accessing-symbol 0) | |
445 | (nitems 0) | |
446 | (items [0])) | |
447 | ||
448 | (wisent-struct shifts | |
449 | next ; -> shifts | |
450 | (number 0) | |
451 | (nshifts 0) | |
452 | (shifts [0])) | |
453 | ||
454 | (wisent-struct reductions | |
455 | next ; -> reductions | |
456 | (number 0) | |
457 | (nreds 0) | |
458 | (rules [0])) | |
459 | ||
460 | (wisent-struct errs | |
461 | (nerrs 0) | |
462 | (errs [0])) | |
463 | \f | |
464 | ;;;; -------------------------------------------------------- | |
465 | ;;;; Find unreachable terminals, nonterminals and productions | |
466 | ;;;; -------------------------------------------------------- | |
467 | ||
468 | (defun wisent-bits-equal (L R n) | |
469 | "Visit L and R and return non-nil if their first N elements are `='. | |
470 | L and R must be vectors of integers." | |
471 | (let* ((i (1- n)) | |
472 | (iseq t)) | |
473 | (while (and iseq (natnump i)) | |
474 | (setq iseq (= (aref L i) (aref R i)) | |
475 | i (1- i))) | |
476 | iseq)) | |
477 | ||
478 | (defun wisent-nbits (i) | |
479 | "Return number of bits set in integer I." | |
480 | (let ((count 0)) | |
481 | (while (not (zerop i)) | |
482 | ;; i ^= (i & ((unsigned) (-(int) i))) | |
483 | (setq i (logxor i (logand i (- i))) | |
484 | count (1+ count))) | |
485 | count)) | |
486 | ||
487 | (defun wisent-bits-size (S n) | |
488 | "In vector S count the total of bits set in first N elements. | |
489 | S must be a vector of integers." | |
490 | (let* ((i (1- n)) | |
491 | (count 0)) | |
492 | (while (natnump i) | |
493 | (setq count (+ count (wisent-nbits (aref S i))) | |
494 | i (1- i))) | |
495 | count)) | |
496 | ||
497 | (defun wisent-useful-production (i N0) | |
498 | "Return non-nil if production I is in useful set N0." | |
499 | (let* ((useful t) | |
500 | (r (aref rrhs i)) | |
501 | n) | |
502 | (while (and useful (> (setq n (aref ritem r)) 0)) | |
503 | (if (wisent-ISVAR n) | |
504 | (setq useful (wisent-BITISSET N0 (- n ntokens)))) | |
505 | (setq r (1+ r))) | |
506 | useful)) | |
507 | ||
508 | (defun wisent-useless-nonterminals () | |
509 | "Find out which nonterminals are used." | |
510 | (let (Np Ns i n break) | |
511 | ;; N is set as built. Np is set being built this iteration. P is | |
512 | ;; set of all productions which have a RHS all in N. | |
513 | (setq n (wisent-WORDSIZE nvars) | |
514 | Np (make-vector n 0)) | |
515 | ||
516 | ;; The set being computed is a set of nonterminals which can | |
517 | ;; derive the empty string or strings consisting of all | |
518 | ;; terminals. At each iteration a nonterminal is added to the set | |
519 | ;; if there is a production with that nonterminal as its LHS for | |
520 | ;; which all the nonterminals in its RHS are already in the set. | |
521 | ;; Iterate until the set being computed remains unchanged. Any | |
522 | ;; nonterminals not in the set at that point are useless in that | |
523 | ;; they will never be used in deriving a sentence of the language. | |
524 | ||
525 | ;; This iteration doesn't use any special traversal over the | |
526 | ;; productions. A set is kept of all productions for which all | |
527 | ;; the nonterminals in the RHS are in useful. Only productions | |
528 | ;; not in this set are scanned on each iteration. At the end, | |
529 | ;; this set is saved to be used when finding useful productions: | |
530 | ;; only productions in this set will appear in the final grammar. | |
531 | ||
532 | (while (not break) | |
533 | (setq i (1- n)) | |
534 | (while (natnump i) | |
535 | ;; Np[i] = N[i] | |
536 | (aset Np i (aref N i)) | |
537 | (setq i (1- i))) | |
538 | ||
539 | (setq i 1) | |
540 | (while (<= i nrules) | |
541 | (if (not (wisent-BITISSET P i)) | |
542 | (when (wisent-useful-production i N) | |
543 | (wisent-SETBIT Np (- (aref rlhs i) ntokens)) | |
544 | (wisent-SETBIT P i))) | |
545 | (setq i (1+ i))) | |
546 | (if (wisent-bits-equal N Np n) | |
547 | (setq break t) | |
548 | (setq Ns Np | |
549 | Np N | |
550 | N Ns))) | |
551 | (setq N Np))) | |
552 | ||
553 | (defun wisent-inaccessable-symbols () | |
554 | "Find out which productions are reachable and which symbols are used." | |
555 | ;; Starting with an empty set of productions and a set of symbols | |
556 | ;; which only has the start symbol in it, iterate over all | |
557 | ;; productions until the set of productions remains unchanged for an | |
558 | ;; iteration. For each production which has a LHS in the set of | |
559 | ;; reachable symbols, add the production to the set of reachable | |
560 | ;; productions, and add all of the nonterminals in the RHS of the | |
561 | ;; production to the set of reachable symbols. | |
562 | ||
563 | ;; Consider only the (partially) reduced grammar which has only | |
564 | ;; nonterminals in N and productions in P. | |
565 | ||
566 | ;; The result is the set P of productions in the reduced grammar, | |
567 | ;; and the set V of symbols in the reduced grammar. | |
568 | ||
569 | ;; Although this algorithm also computes the set of terminals which | |
570 | ;; are reachable, no terminal will be deleted from the grammar. Some | |
571 | ;; terminals might not be in the grammar but might be generated by | |
572 | ;; semantic routines, and so the user might want them available with | |
573 | ;; specified numbers. (Is this true?) However, the non reachable | |
574 | ;; terminals are printed (if running in verbose mode) so that the | |
575 | ;; user can know. | |
576 | (let (Vp Vs Pp i tt r n m break) | |
577 | (setq n (wisent-WORDSIZE nsyms) | |
578 | m (wisent-WORDSIZE (1+ nrules)) | |
579 | Vp (make-vector n 0) | |
580 | Pp (make-vector m 0)) | |
581 | ||
582 | ;; If the start symbol isn't useful, then nothing will be useful. | |
583 | (when (wisent-BITISSET N (- start-symbol ntokens)) | |
584 | (wisent-SETBIT V start-symbol) | |
585 | (while (not break) | |
586 | (setq i (1- n)) | |
587 | (while (natnump i) | |
588 | (aset Vp i (aref V i)) | |
589 | (setq i (1- i))) | |
590 | (setq i 1) | |
591 | (while (<= i nrules) | |
592 | (when (and (not (wisent-BITISSET Pp i)) | |
593 | (wisent-BITISSET P i) | |
594 | (wisent-BITISSET V (aref rlhs i))) | |
595 | (setq r (aref rrhs i)) | |
596 | (while (natnump (setq tt (aref ritem r))) | |
597 | (if (or (wisent-ISTOKEN tt) | |
598 | (wisent-BITISSET N (- tt ntokens))) | |
599 | (wisent-SETBIT Vp tt)) | |
600 | (setq r (1+ r))) | |
601 | (wisent-SETBIT Pp i)) | |
602 | (setq i (1+ i))) | |
603 | (if (wisent-bits-equal V Vp n) | |
604 | (setq break t) | |
605 | (setq Vs Vp | |
606 | Vp V | |
607 | V Vs)))) | |
608 | (setq V Vp) | |
609 | ||
610 | ;; Tokens 0, 1 are internal to Wisent. Consider them useful. | |
611 | (wisent-SETBIT V 0) ;; end-of-input token | |
612 | (wisent-SETBIT V 1) ;; error token | |
613 | (setq P Pp) | |
614 | ||
615 | (setq nuseless-productions (- nrules (wisent-bits-size P m)) | |
616 | nuseless-nonterminals nvars | |
617 | i ntokens) | |
618 | (while (< i nsyms) | |
619 | (if (wisent-BITISSET V i) | |
620 | (setq nuseless-nonterminals (1- nuseless-nonterminals))) | |
621 | (setq i (1+ i))) | |
622 | ||
623 | ;; A token that was used in %prec should not be warned about. | |
624 | (setq i 1) | |
625 | (while (<= i nrules) | |
626 | (if (aref rprec i) | |
627 | (wisent-SETBIT V1 (aref rprec i))) | |
628 | (setq i (1+ i))) | |
629 | )) | |
630 | ||
631 | (defun wisent-reduce-grammar-tables () | |
632 | "Disable useless productions." | |
633 | (if (> nuseless-productions 0) | |
634 | (let ((pn 1)) | |
635 | (while (<= pn nrules) | |
636 | (aset ruseful pn (wisent-BITISSET P pn)) | |
637 | (setq pn (1+ pn)))))) | |
638 | ||
639 | (defun wisent-nonterminals-reduce () | |
640 | "Remove useless nonterminals." | |
641 | (let (i n r item nontermmap tags-sorted) | |
642 | ;; Map the nonterminals to their new index: useful first, useless | |
643 | ;; afterwards. Kept for later report. | |
644 | (setq nontermmap (make-vector nvars 0) | |
645 | n ntokens | |
646 | i ntokens) | |
647 | (while (< i nsyms) | |
648 | (when (wisent-BITISSET V i) | |
649 | (aset nontermmap (- i ntokens) n) | |
650 | (setq n (1+ n))) | |
651 | (setq i (1+ i))) | |
652 | (setq i ntokens) | |
653 | (while (< i nsyms) | |
654 | (unless (wisent-BITISSET V i) | |
655 | (aset nontermmap (- i ntokens) n) | |
656 | (setq n (1+ n))) | |
657 | (setq i (1+ i))) | |
658 | ;; Shuffle elements of tables indexed by symbol number | |
659 | (setq tags-sorted (make-vector nvars nil) | |
660 | i ntokens) | |
661 | (while (< i nsyms) | |
662 | (setq n (aref nontermmap (- i ntokens))) | |
663 | (aset tags-sorted (- n ntokens) (aref tags i)) | |
664 | (setq i (1+ i))) | |
665 | (setq i ntokens) | |
666 | (while (< i nsyms) | |
667 | (aset tags i (aref tags-sorted (- i ntokens))) | |
668 | (setq i (1+ i))) | |
669 | ;; Replace all symbol numbers in valid data structures. | |
670 | (setq i 1) | |
671 | (while (<= i nrules) | |
672 | (aset rlhs i (aref nontermmap (- (aref rlhs i) ntokens))) | |
673 | (setq i (1+ i))) | |
674 | (setq r 0) | |
675 | (while (setq item (aref ritem r)) | |
676 | (if (wisent-ISVAR item) | |
677 | (aset ritem r (aref nontermmap (- item ntokens)))) | |
678 | (setq r (1+ r))) | |
679 | (setq start-symbol (aref nontermmap (- start-symbol ntokens)) | |
680 | nsyms (- nsyms nuseless-nonterminals) | |
681 | nvars (- nvars nuseless-nonterminals)) | |
682 | )) | |
683 | ||
684 | (defun wisent-total-useless () | |
685 | "Report number of useless nonterminals and productions." | |
686 | (let* ((src (wisent-source)) | |
687 | (src (if src (concat " in " src) "")) | |
688 | (msg (format "Grammar%s contains" src))) | |
689 | (if (> nuseless-nonterminals 0) | |
690 | (setq msg (format "%s %d useless nonterminal%s" | |
691 | msg nuseless-nonterminals | |
692 | (if (> nuseless-nonterminals 0) "s" "")))) | |
693 | (if (and (> nuseless-nonterminals 0) (> nuseless-productions 0)) | |
694 | (setq msg (format "%s and" msg))) | |
695 | (if (> nuseless-productions 0) | |
696 | (setq msg (format "%s %d useless rule%s" | |
697 | msg nuseless-productions | |
698 | (if (> nuseless-productions 0) "s" "")))) | |
699 | (message msg))) | |
700 | ||
701 | (defun wisent-reduce-grammar () | |
702 | "Find unreachable terminals, nonterminals and productions." | |
703 | ;; Allocate the global sets used to compute the reduced grammar | |
704 | (setq N (make-vector (wisent-WORDSIZE nvars) 0) | |
705 | P (make-vector (wisent-WORDSIZE (1+ nrules)) 0) | |
706 | V (make-vector (wisent-WORDSIZE nsyms) 0) | |
707 | V1 (make-vector (wisent-WORDSIZE nsyms) 0) | |
708 | nuseless-nonterminals 0 | |
709 | nuseless-productions 0) | |
710 | ||
711 | (wisent-useless-nonterminals) | |
712 | (wisent-inaccessable-symbols) | |
713 | ||
714 | (when (> (+ nuseless-nonterminals nuseless-productions) 0) | |
715 | (wisent-total-useless) | |
716 | (or (wisent-BITISSET N (- start-symbol ntokens)) | |
717 | (error "Start symbol `%s' does not derive any sentence" | |
718 | (wisent-tag start-symbol))) | |
719 | (wisent-reduce-grammar-tables) | |
720 | (if (> nuseless-nonterminals 0) | |
721 | (wisent-nonterminals-reduce)))) | |
722 | ||
723 | (defun wisent-print-useless () | |
724 | "Output the detailed results of the reductions." | |
725 | (let (i b r) | |
726 | (when (> nuseless-nonterminals 0) | |
727 | ;; Useless nonterminals have been moved after useful ones. | |
728 | (wisent-log "\n\nUseless nonterminals:\n\n") | |
729 | (setq i 0) | |
730 | (while (< i nuseless-nonterminals) | |
731 | (wisent-log " %s\n" (wisent-tag (+ nsyms i))) | |
732 | (setq i (1+ i)))) | |
733 | (setq b nil | |
734 | i 0) | |
735 | (while (< i ntokens) | |
736 | (unless (or (wisent-BITISSET V i) (wisent-BITISSET V1 i)) | |
737 | (or b | |
738 | (wisent-log "\n\nTerminals which are not used:\n\n")) | |
739 | (setq b t) | |
740 | (wisent-log " %s\n" (wisent-tag i))) | |
741 | (setq i (1+ i))) | |
742 | (when (> nuseless-productions 0) | |
743 | (wisent-log "\n\nUseless rules:\n\n") | |
744 | (setq i 1) | |
745 | (while (<= i nrules) | |
746 | (unless (aref ruseful i) | |
747 | (wisent-log "#%s " (wisent-pad-string (format "%d" i) 4)) | |
748 | (wisent-log "%s:" (wisent-tag (aref rlhs i))) | |
749 | (setq r (aref rrhs i)) | |
750 | (while (natnump (aref ritem r)) | |
751 | (wisent-log " %s" (wisent-tag (aref ritem r))) | |
752 | (setq r (1+ r))) | |
753 | (wisent-log ";\n")) | |
754 | (setq i (1+ i)))) | |
755 | (if (or b (> nuseless-nonterminals 0) (> nuseless-productions 0)) | |
756 | (wisent-log "\n\n")) | |
757 | )) | |
758 | \f | |
759 | ;;;; ----------------------------- | |
760 | ;;;; Match rules with nonterminals | |
761 | ;;;; ----------------------------- | |
762 | ||
763 | (defun wisent-set-derives () | |
764 | "Find, for each variable (nonterminal), which rules can derive it. | |
765 | It sets up the value of DERIVES so that DERIVES[i - NTOKENS] points to | |
766 | a list of rule numbers, terminated with -1." | |
767 | (let (i lhs p q dset delts) | |
768 | (setq dset (make-vector nvars nil) | |
769 | delts (make-vector (1+ nrules) 0)) | |
770 | (setq p 0 ;; p = delts | |
771 | i nrules) | |
772 | (while (> i 0) | |
773 | (when (aref ruseful i) | |
774 | (setq lhs (aref rlhs i)) | |
775 | ;; p->next = dset[lhs]; | |
776 | ;; p->value = i; | |
777 | (aset delts p (cons i (aref dset (- lhs ntokens)))) ;; (value . next) | |
778 | (aset dset (- lhs ntokens) p) ;; dset[lhs] = p | |
779 | (setq p (1+ p)) ;; p++ | |
780 | ) | |
781 | (setq i (1- i))) | |
782 | ||
783 | (setq derives (make-vector nvars nil) | |
784 | i ntokens) | |
785 | ||
786 | (while (< i nsyms) | |
787 | (setq q nil | |
788 | p (aref dset (- i ntokens))) ;; p = dset[i] | |
789 | ||
790 | (while p | |
791 | (setq p (aref delts p) | |
792 | q (cons (car p) q) ;;q++ = p->value | |
793 | p (cdr p))) ;; p = p->next | |
794 | (setq q (nreverse (cons -1 q))) ;; *q++ = -1 | |
795 | (aset derives (- i ntokens) q) ;; derives[i] = q | |
796 | (setq i (1+ i))) | |
797 | )) | |
798 | \f | |
799 | ;;;; -------------------------------------------------------- | |
800 | ;;;; Find which nonterminals can expand into the null string. | |
801 | ;;;; -------------------------------------------------------- | |
802 | ||
803 | (defun wisent-print-nullable () | |
804 | "Print NULLABLE." | |
805 | (let (i) | |
806 | (wisent-log "NULLABLE\n") | |
807 | (setq i ntokens) | |
808 | (while (< i nsyms) | |
809 | (wisent-log "\t%s: %s\n" (wisent-tag i) | |
810 | (if (aref nullable (- i ntokens)) | |
811 | "yes" : "no")) | |
812 | (setq i (1+ i))) | |
813 | (wisent-log "\n\n"))) | |
814 | ||
815 | (defun wisent-set-nullable () | |
816 | "Set up NULLABLE. | |
817 | A vector saying which nonterminals can expand into the null string. | |
818 | NULLABLE[i - NTOKENS] is nil if symbol I can do so." | |
819 | (let (ruleno s1 s2 p r squeue rcount rsets relts item any-tokens) | |
820 | (setq squeue (make-vector nvars 0) | |
821 | rcount (make-vector (1+ nrules) 0) | |
822 | rsets (make-vector nvars nil) ;; - ntokens | |
823 | relts (make-vector (+ nitems nvars 1) nil) | |
824 | nullable (make-vector nvars nil)) ;; - ntokens | |
825 | (setq s1 0 s2 0 ;; s1 = s2 = squeue | |
826 | p 0 ;; p = relts | |
827 | ruleno 1) | |
828 | (while (<= ruleno nrules) | |
829 | (when (aref ruseful ruleno) | |
830 | (if (> (aref ritem (aref rrhs ruleno)) 0) | |
831 | (progn | |
832 | ;; This rule has a non empty RHS. | |
833 | (setq any-tokens nil | |
834 | r (aref rrhs ruleno)) | |
835 | (while (> (aref ritem r) 0) | |
836 | (if (wisent-ISTOKEN (aref ritem r)) | |
837 | (setq any-tokens t)) | |
838 | (setq r (1+ r))) | |
839 | ||
840 | ;; This rule has only nonterminals: schedule it for the | |
841 | ;; second pass. | |
842 | (unless any-tokens | |
843 | (setq r (aref rrhs ruleno)) | |
844 | (while (> (setq item (aref ritem r)) 0) | |
845 | (aset rcount ruleno (1+ (aref rcount ruleno))) | |
846 | ;; p->next = rsets[item]; | |
847 | ;; p->value = ruleno; | |
848 | (aset relts p (cons ruleno (aref rsets (- item ntokens)))) | |
849 | ;; rsets[item] = p; | |
850 | (aset rsets (- item ntokens) p) | |
851 | (setq p (1+ p) | |
852 | r (1+ r))))) | |
853 | ;; This rule has an empty RHS. | |
854 | ;; assert (ritem[rrhs[ruleno]] == -ruleno) | |
855 | (when (and (aref ruseful ruleno) | |
856 | (setq item (aref rlhs ruleno)) | |
857 | (not (aref nullable (- item ntokens)))) | |
858 | (aset nullable (- item ntokens) t) | |
859 | (aset squeue s2 item) | |
860 | (setq s2 (1+ s2))) | |
861 | ) | |
862 | ) | |
863 | (setq ruleno (1+ ruleno))) | |
864 | ||
865 | (while (< s1 s2) | |
866 | ;; p = rsets[*s1++] | |
867 | (setq p (aref rsets (- (aref squeue s1) ntokens)) | |
868 | s1 (1+ s1)) | |
869 | (while p | |
870 | (setq p (aref relts p) | |
871 | ruleno (car p) | |
872 | p (cdr p)) ;; p = p->next | |
873 | ;; if (--rcount[ruleno] == 0) | |
874 | (when (zerop (aset rcount ruleno (1- (aref rcount ruleno)))) | |
875 | (setq item (aref rlhs ruleno)) | |
876 | (aset nullable (- item ntokens) t) | |
877 | (aset squeue s2 item) | |
878 | (setq s2 (1+ s2))))) | |
879 | ||
880 | (if wisent-debug-flag | |
881 | (wisent-print-nullable)) | |
882 | )) | |
883 | \f | |
884 | ;;;; ----------- | |
885 | ;;;; Subroutines | |
886 | ;;;; ----------- | |
887 | ||
888 | (defun wisent-print-fderives () | |
889 | "Print FDERIVES." | |
890 | (let (i j rp) | |
891 | (wisent-log "\n\n\nFDERIVES\n") | |
892 | (setq i ntokens) | |
893 | (while (< i nsyms) | |
894 | (wisent-log "\n\n%s derives\n\n" (wisent-tag i)) | |
895 | (setq rp (aref fderives (- i ntokens)) | |
896 | j 0) | |
897 | (while (<= j nrules) | |
898 | (if (wisent-BITISSET rp j) | |
899 | (wisent-log " %d\n" j)) | |
900 | (setq j (1+ j))) | |
901 | (setq i (1+ i))))) | |
902 | ||
903 | (defun wisent-set-fderives () | |
904 | "Set up FDERIVES. | |
905 | An NVARS by NRULES matrix of bits indicating which rules can help | |
906 | derive the beginning of the data for each nonterminal. For example, | |
907 | if symbol 5 can be derived as the sequence of symbols 8 3 20, and one | |
908 | of the rules for deriving symbol 8 is rule 4, then the | |
909 | \[5 - NTOKENS, 4] bit in FDERIVES is set." | |
910 | (let (i j k) | |
911 | (setq fderives (make-vector nvars nil)) | |
912 | (setq i 0) | |
913 | (while (< i nvars) | |
914 | (aset fderives i (make-vector rulesetsize 0)) | |
915 | (setq i (1+ i))) | |
916 | ||
917 | (wisent-set-firsts) | |
918 | ||
919 | (setq i ntokens) | |
920 | (while (< i nsyms) | |
921 | (setq j ntokens) | |
922 | (while (< j nsyms) | |
923 | ;; if (BITISSET (FIRSTS (i), j - ntokens)) | |
924 | (when (wisent-BITISSET (aref firsts (- i ntokens)) (- j ntokens)) | |
925 | (setq k (aref derives (- j ntokens))) | |
926 | (while (> (car k) 0) ;; derives[j][k] > 0 | |
927 | ;; SETBIT (FDERIVES (i), derives[j][k]); | |
928 | (wisent-SETBIT (aref fderives (- i ntokens)) (car k)) | |
929 | (setq k (cdr k)))) | |
930 | (setq j (1+ j))) | |
931 | (setq i (1+ i))) | |
932 | ||
933 | (if wisent-debug-flag | |
934 | (wisent-print-fderives)) | |
935 | )) | |
936 | ||
937 | (defun wisent-print-firsts () | |
938 | "Print FIRSTS." | |
939 | (let (i j v) | |
940 | (wisent-log "\n\n\nFIRSTS\n\n") | |
941 | (setq i ntokens) | |
942 | (while (< i nsyms) | |
943 | (wisent-log "\n\n%s firsts\n\n" (wisent-tag i)) | |
944 | (setq v (aref firsts (- i ntokens)) | |
945 | j 0) | |
946 | (while (< j nvars) | |
947 | (if (wisent-BITISSET v j) | |
948 | (wisent-log "\t\t%d (%s)\n" | |
949 | (+ j ntokens) (wisent-tag (+ j ntokens)))) | |
950 | (setq j (1+ j))) | |
951 | (setq i (1+ i))))) | |
952 | ||
953 | (defun wisent-TC (R n) | |
954 | "Transitive closure. | |
955 | Given R an N by N matrix of bits, modify its contents to be the | |
956 | transitive closure of what was given." | |
957 | (let (i j k) | |
958 | ;; R (J, I) && R (I, K) => R (J, K). | |
959 | ;; I *must* be the outer loop. | |
960 | (setq i 0) | |
961 | (while (< i n) | |
962 | (setq j 0) | |
963 | (while (< j n) | |
964 | (when (wisent-BITISSET (aref R j) i) | |
965 | (setq k 0) | |
966 | (while (< k n) | |
967 | (if (wisent-BITISSET (aref R i) k) | |
968 | (wisent-SETBIT (aref R j) k)) | |
969 | (setq k (1+ k)))) | |
970 | (setq j (1+ j))) | |
971 | (setq i (1+ i))))) | |
972 | ||
973 | (defun wisent-RTC (R n) | |
974 | "Reflexive Transitive Closure. | |
975 | Same as `wisent-TC' and then set all the bits on the diagonal of R, an | |
976 | N by N matrix of bits." | |
977 | (let (i) | |
978 | (wisent-TC R n) | |
979 | (setq i 0) | |
980 | (while (< i n) | |
981 | (wisent-SETBIT (aref R i) i) | |
982 | (setq i (1+ i))))) | |
983 | ||
984 | (defun wisent-set-firsts () | |
985 | "Set up FIRSTS. | |
986 | An NVARS by NVARS bit matrix indicating which items can represent the | |
987 | beginning of the input corresponding to which other items. For | |
988 | example, if some rule expands symbol 5 into the sequence of symbols 8 | |
989 | 3 20, the symbol 8 can be the beginning of the data for symbol 5, so | |
990 | the bit [8 - NTOKENS, 5 - NTOKENS] in FIRSTS is set." | |
991 | (let (row symbol sp rowsize i) | |
992 | (setq rowsize (wisent-WORDSIZE nvars) | |
993 | varsetsize rowsize | |
994 | firsts (make-vector nvars nil) | |
995 | i 0) | |
996 | (while (< i nvars) | |
997 | (aset firsts i (make-vector rowsize 0)) | |
998 | (setq i (1+ i))) | |
999 | ||
1000 | (setq row 0 ;; row = firsts | |
1001 | i ntokens) | |
1002 | (while (< i nsyms) | |
1003 | (setq sp (aref derives (- i ntokens))) | |
1004 | (while (>= (car sp) 0) | |
1005 | (setq symbol (aref ritem (aref rrhs (car sp))) | |
1006 | sp (cdr sp)) | |
1007 | (when (wisent-ISVAR symbol) | |
1008 | (setq symbol (- symbol ntokens)) | |
1009 | (wisent-SETBIT (aref firsts row) symbol) | |
1010 | )) | |
1011 | (setq row (1+ row) | |
1012 | i (1+ i))) | |
1013 | ||
1014 | (wisent-RTC firsts nvars) | |
1015 | ||
1016 | (if wisent-debug-flag | |
1017 | (wisent-print-firsts)) | |
1018 | )) | |
1019 | ||
1020 | (defun wisent-initialize-closure (n) | |
1021 | "Allocate the ITEMSET and RULESET vectors. | |
1022 | And precompute useful data so that `wisent-closure' can be called. | |
1023 | N is the number of elements to allocate for ITEMSET." | |
1024 | (setq itemset (make-vector n 0) | |
1025 | rulesetsize (wisent-WORDSIZE (1+ nrules)) | |
1026 | ruleset (make-vector rulesetsize 0)) | |
1027 | ||
1028 | (wisent-set-fderives)) | |
1029 | ||
1030 | (defun wisent-print-closure () | |
1031 | "Print ITEMSET." | |
1032 | (let (i) | |
1033 | (wisent-log "\n\nclosure n = %d\n\n" nitemset) | |
1034 | (setq i 0) ;; isp = itemset | |
1035 | (while (< i nitemset) | |
1036 | (wisent-log " %d\n" (aref itemset i)) | |
1037 | (setq i (1+ i))))) | |
1038 | ||
1039 | (defun wisent-closure (core n) | |
1040 | "Set up RULESET and ITEMSET for the transitions out of CORE state. | |
1041 | Given a vector of item numbers items, of length N, set up RULESET and | |
1042 | ITEMSET to indicate what rules could be run and which items could be | |
1043 | accepted when those items are the active ones. | |
1044 | ||
1045 | RULESET contains a bit for each rule. `wisent-closure' sets the bits | |
1046 | for all rules which could potentially describe the next input to be | |
1047 | read. | |
1048 | ||
1049 | ITEMSET is a vector of item numbers; NITEMSET is the number of items | |
1050 | in ITEMSET. `wisent-closure' places there the indices of all items | |
1051 | which represent units of input that could arrive next." | |
1052 | (let (c r v symbol ruleno itemno) | |
1053 | (if (zerop n) | |
1054 | (progn | |
1055 | (setq r 0 | |
1056 | v (aref fderives (- start-symbol ntokens))) | |
1057 | (while (< r rulesetsize) | |
1058 | ;; ruleset[r] = FDERIVES (start-symbol)[r]; | |
1059 | (aset ruleset r (aref v r)) | |
1060 | (setq r (1+ r))) | |
1061 | ) | |
1062 | (fillarray ruleset 0) | |
1063 | (setq c 0) | |
1064 | (while (< c n) | |
1065 | (setq symbol (aref ritem (aref core c))) | |
1066 | (when (wisent-ISVAR symbol) | |
1067 | (setq r 0 | |
1068 | v (aref fderives (- symbol ntokens))) | |
1069 | (while (< r rulesetsize) | |
1070 | ;; ruleset[r] |= FDERIVES (ritem[core[c]])[r]; | |
1071 | (aset ruleset r (logior (aref ruleset r) (aref v r))) | |
1072 | (setq r (1+ r)))) | |
1073 | (setq c (1+ c))) | |
1074 | ) | |
1075 | (setq nitemset 0 | |
1076 | c 0 | |
1077 | ruleno 0 | |
1078 | r (* rulesetsize wisent-BITS-PER-WORD)) | |
1079 | (while (< ruleno r) | |
1080 | (when (wisent-BITISSET ruleset ruleno) | |
1081 | (setq itemno (aref rrhs ruleno)) | |
1082 | (while (and (< c n) (< (aref core c) itemno)) | |
1083 | (aset itemset nitemset (aref core c)) | |
1084 | (setq nitemset (1+ nitemset) | |
1085 | c (1+ c))) | |
1086 | (aset itemset nitemset itemno) | |
1087 | (setq nitemset (1+ nitemset))) | |
1088 | (setq ruleno (1+ ruleno))) | |
1089 | ||
1090 | (while (< c n) | |
1091 | (aset itemset nitemset (aref core c)) | |
1092 | (setq nitemset (1+ nitemset) | |
1093 | c (1+ c))) | |
1094 | ||
1095 | (if wisent-debug-flag | |
1096 | (wisent-print-closure)) | |
1097 | )) | |
1098 | \f | |
1099 | ;;;; -------------------------------------------------- | |
1100 | ;;;; Generate the nondeterministic finite state machine | |
1101 | ;;;; -------------------------------------------------- | |
1102 | ||
1103 | (defun wisent-allocate-itemsets () | |
1104 | "Allocate storage for itemsets." | |
1105 | (let (symbol i count symbol-count) | |
1106 | ;; Count the number of occurrences of all the symbols in RITEMS. | |
1107 | ;; Note that useless productions (hence useless nonterminals) are | |
1108 | ;; browsed too, hence we need to allocate room for _all_ the | |
1109 | ;; symbols. | |
1110 | (setq count 0 | |
1111 | symbol-count (make-vector (+ nsyms nuseless-nonterminals) 0) | |
1112 | i 0) | |
1113 | (while (setq symbol (aref ritem i)) | |
1114 | (when (> symbol 0) | |
1115 | (setq count (1+ count)) | |
1116 | (aset symbol-count symbol (1+ (aref symbol-count symbol)))) | |
1117 | (setq i (1+ i))) | |
1118 | ;; See comments before `wisent-new-itemsets'. All the vectors of | |
1119 | ;; items live inside kernel-items. The number of active items | |
1120 | ;; after some symbol cannot be more than the number of times that | |
1121 | ;; symbol appears as an item, which is symbol-count[symbol]. We | |
1122 | ;; allocate that much space for each symbol. | |
1123 | (setq kernel-base (make-vector nsyms nil) | |
1124 | kernel-items (make-vector count 0) | |
1125 | count 0 | |
1126 | i 0) | |
1127 | (while (< i nsyms) | |
1128 | (aset kernel-base i count) | |
1129 | (setq count (+ count (aref symbol-count i)) | |
1130 | i (1+ i))) | |
1131 | (setq shift-symbol symbol-count | |
1132 | kernel-end (make-vector nsyms nil)) | |
1133 | )) | |
1134 | ||
1135 | (defun wisent-allocate-storage () | |
1136 | "Allocate storage for the state machine." | |
1137 | (wisent-allocate-itemsets) | |
1138 | (setq shiftset (make-vector nsyms 0) | |
1139 | redset (make-vector (1+ nrules) 0) | |
1140 | state-table (make-vector wisent-state-table-size nil))) | |
1141 | ||
1142 | (defun wisent-new-itemsets () | |
1143 | "Find which symbols can be shifted in the current state. | |
1144 | And for each one record which items would be active after that shift. | |
1145 | Uses the contents of ITEMSET. SHIFT-SYMBOL is set to a vector of the | |
1146 | symbols that can be shifted. For each symbol in the grammar, | |
1147 | KERNEL-BASE[symbol] points to a vector of item numbers activated if | |
1148 | that symbol is shifted, and KERNEL-END[symbol] points after the end of | |
1149 | that vector." | |
1150 | (let (i shiftcount isp ksp symbol) | |
1151 | (fillarray kernel-end nil) | |
1152 | (setq shiftcount 0 | |
1153 | isp 0) | |
1154 | (while (< isp nitemset) | |
1155 | (setq i (aref itemset isp) | |
1156 | isp (1+ isp) | |
1157 | symbol (aref ritem i)) | |
1158 | (when (> symbol 0) | |
1159 | (setq ksp (aref kernel-end symbol)) | |
1160 | (when (not ksp) | |
1161 | ;; shift-symbol[shiftcount++] = symbol; | |
1162 | (aset shift-symbol shiftcount symbol) | |
1163 | (setq shiftcount (1+ shiftcount) | |
1164 | ksp (aref kernel-base symbol))) | |
1165 | ;; *ksp++ = i + 1; | |
1166 | (aset kernel-items ksp (1+ i)) | |
1167 | (setq ksp (1+ ksp)) | |
1168 | (aset kernel-end symbol ksp))) | |
1169 | (setq nshifts shiftcount))) | |
1170 | ||
1171 | (defun wisent-new-state (symbol) | |
1172 | "Create a new state for those items, if necessary. | |
1173 | SYMBOL is the core accessing-symbol. | |
1174 | Subroutine of `wisent-get-state'." | |
1175 | (let (n p isp1 isp2 iend items) | |
1176 | (setq isp1 (aref kernel-base symbol) | |
1177 | iend (aref kernel-end symbol) | |
1178 | n (- iend isp1) | |
1179 | p (make-core) | |
1180 | items (make-vector n 0)) | |
1181 | (set-core-accessing-symbol p symbol) | |
1182 | (set-core-number p nstates) | |
1183 | (set-core-nitems p n) | |
1184 | (set-core-items p items) | |
1185 | (setq isp2 0) ;; isp2 = p->items | |
1186 | (while (< isp1 iend) | |
1187 | ;; *isp2++ = *isp1++; | |
1188 | (aset items isp2 (aref kernel-items isp1)) | |
1189 | (setq isp1 (1+ isp1) | |
1190 | isp2 (1+ isp2))) | |
1191 | (set-core-next last-state p) | |
1192 | (setq last-state p | |
1193 | nstates (1+ nstates)) | |
1194 | p)) | |
1195 | ||
1196 | (defun wisent-get-state (symbol) | |
1197 | "Find the state we would get to by shifting SYMBOL. | |
1198 | Return the state number for the state we would get to (from the | |
1199 | current state) by shifting SYMBOL. Create a new state if no | |
1200 | equivalent one exists already. Used by `wisent-append-states'." | |
1201 | (let (key isp1 isp2 iend sp sp2 found n) | |
1202 | (setq isp1 (aref kernel-base symbol) | |
1203 | iend (aref kernel-end symbol) | |
1204 | n (- iend isp1) | |
1205 | key 0) | |
1206 | ;; Add up the target state's active item numbers to get a hash key | |
1207 | (while (< isp1 iend) | |
1208 | (setq key (+ key (aref kernel-items isp1)) | |
1209 | isp1 (1+ isp1))) | |
1210 | (setq key (% key wisent-state-table-size) | |
1211 | sp (aref state-table key)) | |
1212 | (if sp | |
1213 | (progn | |
1214 | (setq found nil) | |
1215 | (while (not found) | |
1216 | (when (= (core-nitems sp) n) | |
1217 | (setq found t | |
1218 | isp1 (aref kernel-base symbol) | |
1219 | ;; isp2 = sp->items; | |
1220 | sp2 (core-items sp) | |
1221 | isp2 0) | |
1222 | ||
1223 | (while (and found (< isp1 iend)) | |
1224 | ;; if (*isp1++ != *isp2++) | |
1225 | (if (not (= (aref kernel-items isp1) | |
1226 | (aref sp2 isp2))) | |
1227 | (setq found nil)) | |
1228 | (setq isp1 (1+ isp1) | |
1229 | isp2 (1+ isp2)))) | |
1230 | (if (not found) | |
1231 | (if (core-link sp) | |
1232 | (setq sp (core-link sp)) | |
1233 | ;; sp = sp->link = new-state(symbol) | |
1234 | (setq sp (set-core-link sp (wisent-new-state symbol)) | |
1235 | found t))))) | |
1236 | ;; bucket is empty | |
1237 | ;; state-table[key] = sp = new-state(symbol) | |
1238 | (setq sp (wisent-new-state symbol)) | |
1239 | (aset state-table key sp)) | |
1240 | ;; return (sp->number); | |
1241 | (core-number sp))) | |
1242 | ||
1243 | (defun wisent-append-states () | |
1244 | "Find or create the core structures for states. | |
1245 | Use the information computed by `wisent-new-itemsets' to find the | |
1246 | state numbers reached by each shift transition from the current state. | |
1247 | SHIFTSET is set up as a vector of state numbers of those states." | |
1248 | (let (i j symbol) | |
1249 | ;; First sort shift-symbol into increasing order | |
1250 | (setq i 1) | |
1251 | (while (< i nshifts) | |
1252 | (setq symbol (aref shift-symbol i) | |
1253 | j i) | |
1254 | (while (and (> j 0) (> (aref shift-symbol (1- j)) symbol)) | |
1255 | (aset shift-symbol j (aref shift-symbol (1- j))) | |
1256 | (setq j (1- j))) | |
1257 | (aset shift-symbol j symbol) | |
1258 | (setq i (1+ i))) | |
1259 | (setq i 0) | |
1260 | (while (< i nshifts) | |
1261 | (setq symbol (aref shift-symbol i)) | |
1262 | (aset shiftset i (wisent-get-state symbol)) | |
1263 | (setq i (1+ i))) | |
1264 | )) | |
1265 | ||
1266 | (defun wisent-initialize-states () | |
1267 | "Initialize states." | |
1268 | (let ((p (make-core))) | |
1269 | (setq first-state p | |
1270 | last-state p | |
1271 | this-state p | |
1272 | nstates 1))) | |
1273 | ||
1274 | (defun wisent-save-shifts () | |
1275 | "Save the NSHIFTS of SHIFTSET into the current linked list." | |
1276 | (let (p i shifts) | |
1277 | (setq p (make-shifts) | |
1278 | shifts (make-vector nshifts 0) | |
1279 | i 0) | |
1280 | (set-shifts-number p (core-number this-state)) | |
1281 | (set-shifts-nshifts p nshifts) | |
1282 | (set-shifts-shifts p shifts) | |
1283 | (while (< i nshifts) | |
1284 | ;; (p->shifts)[i] = shiftset[i]; | |
1285 | (aset shifts i (aref shiftset i)) | |
1286 | (setq i (1+ i))) | |
1287 | ||
1288 | (if last-shift | |
1289 | (set-shifts-next last-shift p) | |
1290 | (setq first-shift p)) | |
1291 | (setq last-shift p))) | |
1292 | ||
1293 | (defun wisent-insert-start-shift () | |
1294 | "Create the next-to-final state. | |
1295 | That is the state to which a shift has already been made in the | |
1296 | initial state. Subroutine of `wisent-augment-automaton'." | |
1297 | (let (statep sp) | |
1298 | (setq statep (make-core)) | |
1299 | (set-core-number statep nstates) | |
1300 | (set-core-accessing-symbol statep start-symbol) | |
1301 | (set-core-next last-state statep) | |
1302 | (setq last-state statep) | |
1303 | ;; Make a shift from this state to (what will be) the final state. | |
1304 | (setq sp (make-shifts)) | |
1305 | (set-shifts-number sp nstates) | |
1306 | (setq nstates (1+ nstates)) | |
1307 | (set-shifts-nshifts sp 1) | |
1308 | (set-shifts-shifts sp (vector nstates)) | |
1309 | (set-shifts-next last-shift sp) | |
1310 | (setq last-shift sp))) | |
1311 | ||
1312 | (defun wisent-augment-automaton () | |
1313 | "Set up initial and final states as parser wants them. | |
1314 | Make sure that the initial state has a shift that accepts the | |
1315 | grammar's start symbol and goes to the next-to-final state, which has | |
1316 | a shift going to the final state, which has a shift to the termination | |
1317 | state. Create such states and shifts if they don't happen to exist | |
1318 | already." | |
1319 | (let (i k statep sp sp2 sp1 shifts) | |
1320 | (setq sp first-shift) | |
1321 | (if sp | |
1322 | (progn | |
1323 | (if (zerop (shifts-number sp)) | |
1324 | (progn | |
1325 | (setq k (shifts-nshifts sp) | |
1326 | statep (core-next first-state)) | |
1327 | ;; The states reached by shifts from first-state are | |
1328 | ;; numbered 1...K. Look for one reached by | |
1329 | ;; START-SYMBOL. | |
1330 | (while (and (< (core-accessing-symbol statep) start-symbol) | |
1331 | (< (core-number statep) k)) | |
1332 | (setq statep (core-next statep))) | |
1333 | (if (= (core-accessing-symbol statep) start-symbol) | |
1334 | (progn | |
1335 | ;; We already have a next-to-final state. Make | |
1336 | ;; sure it has a shift to what will be the final | |
1337 | ;; state. | |
1338 | (setq k (core-number statep)) | |
1339 | (while (and sp (< (shifts-number sp) k)) | |
1340 | (setq sp1 sp | |
1341 | sp (shifts-next sp))) | |
1342 | (if (and sp (= (shifts-number sp) k)) | |
1343 | (progn | |
1344 | (setq i (shifts-nshifts sp) | |
1345 | sp2 (make-shifts) | |
1346 | shifts (make-vector (1+ i) 0)) | |
1347 | (set-shifts-number sp2 k) | |
1348 | (set-shifts-nshifts sp2 (1+ i)) | |
1349 | (set-shifts-shifts sp2 shifts) | |
1350 | (aset shifts 0 nstates) | |
1351 | (while (> i 0) | |
1352 | ;; sp2->shifts[i] = sp->shifts[i - 1]; | |
1353 | (aset shifts i (aref (shifts-shifts sp) (1- i))) | |
1354 | (setq i (1- i))) | |
1355 | ;; Patch sp2 into the chain of shifts in | |
1356 | ;; place of sp, following sp1. | |
1357 | (set-shifts-next sp2 (shifts-next sp)) | |
1358 | (set-shifts-next sp1 sp2) | |
1359 | (if (eq sp last-shift) | |
1360 | (setq last-shift sp2)) | |
1361 | ) | |
1362 | (setq sp2 (make-shifts)) | |
1363 | (set-shifts-number sp2 k) | |
1364 | (set-shifts-nshifts sp2 1) | |
1365 | (set-shifts-shifts sp2 (vector nstates)) | |
1366 | ;; Patch sp2 into the chain of shifts between | |
1367 | ;; sp1 and sp. | |
1368 | (set-shifts-next sp2 sp) | |
1369 | (set-shifts-next sp1 sp2) | |
1370 | (if (not sp) | |
1371 | (setq last-shift sp2)) | |
1372 | ) | |
1373 | ) | |
1374 | ;; There is no next-to-final state as yet. | |
1375 | ;; Add one more shift in FIRST-SHIFT, going to the | |
1376 | ;; next-to-final state (yet to be made). | |
1377 | (setq sp first-shift | |
1378 | sp2 (make-shifts) | |
1379 | i (shifts-nshifts sp) | |
1380 | shifts (make-vector (1+ i) 0)) | |
1381 | (set-shifts-nshifts sp2 (1+ i)) | |
1382 | (set-shifts-shifts sp2 shifts) | |
1383 | ;; Stick this shift into the vector at the proper place. | |
1384 | (setq statep (core-next first-state) | |
1385 | k 0 | |
1386 | i 0) | |
1387 | (while (< i (shifts-nshifts sp)) | |
1388 | (when (and (> (core-accessing-symbol statep) start-symbol) | |
1389 | (= i k)) | |
1390 | (aset shifts k nstates) | |
1391 | (setq k (1+ k))) | |
1392 | (aset shifts k (aref (shifts-shifts sp) i)) | |
1393 | (setq statep (core-next statep)) | |
1394 | (setq i (1+ i) | |
1395 | k (1+ k))) | |
1396 | (when (= i k) | |
1397 | (aset shifts k nstates) | |
1398 | (setq k (1+ k))) | |
1399 | ;; Patch sp2 into the chain of shifts in place of | |
1400 | ;; sp, at the beginning. | |
1401 | (set-shifts-next sp2 (shifts-next sp)) | |
1402 | (setq first-shift sp2) | |
1403 | (if (eq last-shift sp) | |
1404 | (setq last-shift sp2)) | |
1405 | ;; Create the next-to-final state, with shift to | |
1406 | ;; what will be the final state. | |
1407 | (wisent-insert-start-shift))) | |
1408 | ;; The initial state didn't even have any shifts. Give it | |
1409 | ;; one shift, to the next-to-final state. | |
1410 | (setq sp (make-shifts)) | |
1411 | (set-shifts-nshifts sp 1) | |
1412 | (set-shifts-shifts sp (vector nstates)) | |
1413 | ;; Patch sp into the chain of shifts at the beginning. | |
1414 | (set-shifts-next sp first-shift) | |
1415 | (setq first-shift sp) | |
1416 | ;; Create the next-to-final state, with shift to what will | |
1417 | ;; be the final state. | |
1418 | (wisent-insert-start-shift))) | |
1419 | ;; There are no shifts for any state. Make one shift, from the | |
1420 | ;; initial state to the next-to-final state. | |
1421 | (setq sp (make-shifts)) | |
1422 | (set-shifts-nshifts sp 1) | |
1423 | (set-shifts-shifts sp (vector nstates)) | |
1424 | ;; Initialize the chain of shifts with sp. | |
1425 | (setq first-shift sp | |
1426 | last-shift sp) | |
1427 | ;; Create the next-to-final state, with shift to what will be | |
1428 | ;; the final state. | |
1429 | (wisent-insert-start-shift)) | |
1430 | ;; Make the final state--the one that follows a shift from the | |
1431 | ;; next-to-final state. The symbol for that shift is 0 | |
1432 | ;; (end-of-file). | |
1433 | (setq statep (make-core)) | |
1434 | (set-core-number statep nstates) | |
1435 | (set-core-next last-state statep) | |
1436 | (setq last-state statep) | |
1437 | ;; Make the shift from the final state to the termination state. | |
1438 | (setq sp (make-shifts)) | |
1439 | (set-shifts-number sp nstates) | |
1440 | (setq nstates (1+ nstates)) | |
1441 | (set-shifts-nshifts sp 1) | |
1442 | (set-shifts-shifts sp (vector nstates)) | |
1443 | (set-shifts-next last-shift sp) | |
1444 | (setq last-shift sp) | |
1445 | ;; Note that the variable FINAL-STATE refers to what we sometimes | |
1446 | ;; call the termination state. | |
1447 | (setq final-state nstates) | |
1448 | ;; Make the termination state. | |
1449 | (setq statep (make-core)) | |
1450 | (set-core-number statep nstates) | |
1451 | (setq nstates (1+ nstates)) | |
1452 | (set-core-next last-state statep) | |
1453 | (setq last-state statep))) | |
1454 | ||
1455 | (defun wisent-save-reductions () | |
1456 | "Make a reductions structure. | |
1457 | Find which rules can be used for reduction transitions from the | |
1458 | current state and make a reductions structure for the state to record | |
1459 | their rule numbers." | |
1460 | (let (i item count p rules) | |
1461 | ;; Find and count the active items that represent ends of rules. | |
1462 | (setq count 0 | |
1463 | i 0) | |
1464 | (while (< i nitemset) | |
1465 | (setq item (aref ritem (aref itemset i))) | |
1466 | (when (< item 0) | |
1467 | (aset redset count (- item)) | |
1468 | (setq count (1+ count))) | |
1469 | (setq i (1+ i))) | |
1470 | ;; Make a reductions structure and copy the data into it. | |
1471 | (when (> count 0) | |
1472 | (setq p (make-reductions) | |
1473 | rules (make-vector count 0)) | |
1474 | (set-reductions-number p (core-number this-state)) | |
1475 | (set-reductions-nreds p count) | |
1476 | (set-reductions-rules p rules) | |
1477 | (setq i 0) | |
1478 | (while (< i count) | |
1479 | ;; (p->rules)[i] = redset[i] | |
1480 | (aset rules i (aref redset i)) | |
1481 | (setq i (1+ i))) | |
1482 | (if last-reduction | |
1483 | (set-reductions-next last-reduction p) | |
1484 | (setq first-reduction p)) | |
1485 | (setq last-reduction p)))) | |
1486 | ||
1487 | (defun wisent-generate-states () | |
1488 | "Compute the nondeterministic finite state machine from the grammar." | |
1489 | (wisent-allocate-storage) | |
1490 | (wisent-initialize-closure nitems) | |
1491 | (wisent-initialize-states) | |
1492 | (while this-state | |
1493 | ;; Set up RULESET and ITEMSET for the transitions out of this | |
1494 | ;; state. RULESET gets a 1 bit for each rule that could reduce | |
1495 | ;; now. ITEMSET gets a vector of all the items that could be | |
1496 | ;; accepted next. | |
1497 | (wisent-closure (core-items this-state) (core-nitems this-state)) | |
1498 | ;; Record the reductions allowed out of this state. | |
1499 | (wisent-save-reductions) | |
1500 | ;; Find the itemsets of the states that shifts can reach. | |
1501 | (wisent-new-itemsets) | |
1502 | ;; Find or create the core structures for those states. | |
1503 | (wisent-append-states) | |
1504 | ;; Create the shifts structures for the shifts to those states, | |
1505 | ;; now that the state numbers transitioning to are known. | |
1506 | (if (> nshifts 0) | |
1507 | (wisent-save-shifts)) | |
1508 | ;; States are queued when they are created; process them all. | |
1509 | (setq this-state (core-next this-state))) | |
1510 | ;; Set up initial and final states as parser wants them. | |
1511 | (wisent-augment-automaton)) | |
1512 | \f | |
1513 | ;;;; --------------------------- | |
1514 | ;;;; Compute look-ahead criteria | |
1515 | ;;;; --------------------------- | |
1516 | ||
1517 | ;; Compute how to make the finite state machine deterministic; find | |
1518 | ;; which rules need lookahead in each state, and which lookahead | |
1519 | ;; tokens they accept. | |
1520 | ||
1521 | ;; `wisent-lalr', the entry point, builds these data structures: | |
1522 | ||
1523 | ;; GOTO-MAP, FROM-STATE and TO-STATE record each shift transition | |
1524 | ;; which accepts a variable (a nonterminal). NGOTOS is the number of | |
1525 | ;; such transitions. | |
1526 | ;; FROM-STATE[t] is the state number which a transition leads from and | |
1527 | ;; TO-STATE[t] is the state number it leads to. | |
1528 | ;; All the transitions that accept a particular variable are grouped | |
1529 | ;; together and GOTO-MAP[i - NTOKENS] is the index in FROM-STATE and | |
1530 | ;; TO-STATE of the first of them. | |
1531 | ||
1532 | ;; CONSISTENT[s] is non-nil if no lookahead is needed to decide what | |
1533 | ;; to do in state s. | |
1534 | ||
1535 | ;; LARULENO is a vector which records the rules that need lookahead in | |
1536 | ;; various states. The elements of LARULENO that apply to state s are | |
1537 | ;; those from LOOKAHEADS[s] through LOOKAHEADS[s+1]-1. Each element | |
1538 | ;; of LARULENO is a rule number. | |
1539 | ||
1540 | ;; If LR is the length of LARULENO, then a number from 0 to LR-1 can | |
1541 | ;; specify both a rule and a state where the rule might be applied. | |
1542 | ;; LA is a LR by NTOKENS matrix of bits. | |
1543 | ;; LA[l, i] is 1 if the rule LARULENO[l] is applicable in the | |
1544 | ;; appropriate state when the next token is symbol i. | |
1545 | ;; If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict. | |
1546 | ||
1547 | (wisent-defcontext digraph | |
1548 | INDEX R VERTICES | |
1549 | infinity top) | |
1550 | ||
1551 | (defun wisent-traverse (i) | |
1552 | "Traverse I." | |
1553 | (let (j k height Ri Fi break) | |
1554 | (setq top (1+ top) | |
1555 | height top) | |
1556 | (aset VERTICES top i) ;; VERTICES[++top] = i | |
1557 | (aset INDEX i top) ;; INDEX[i] = height = top | |
1558 | ||
1559 | (setq Ri (aref R i)) | |
1560 | (when Ri | |
1561 | (setq j 0) | |
1562 | (while (>= (aref Ri j) 0) | |
1563 | (if (zerop (aref INDEX (aref Ri j))) | |
1564 | (wisent-traverse (aref Ri j))) | |
1565 | ;; if (INDEX[i] > INDEX[R[i][j]]) | |
1566 | (if (> (aref INDEX i) (aref INDEX (aref Ri j))) | |
1567 | ;; INDEX[i] = INDEX[R[i][j]]; | |
1568 | (aset INDEX i (aref INDEX (aref Ri j)))) | |
1569 | (setq Fi (aref F i) | |
1570 | k 0) | |
1571 | (while (< k tokensetsize) | |
1572 | ;; F (i)[k] |= F (R[i][j])[k]; | |
1573 | (aset Fi k (logior (aref Fi k) | |
1574 | (aref (aref F (aref Ri j)) k))) | |
1575 | (setq k (1+ k))) | |
1576 | (setq j (1+ j)))) | |
1577 | ||
1578 | (when (= (aref INDEX i) height) | |
1579 | (setq break nil) | |
1580 | (while (not break) | |
1581 | (setq j (aref VERTICES top) ;; j = VERTICES[top--] | |
1582 | top (1- top)) | |
1583 | (aset INDEX j infinity) | |
1584 | (if (= i j) | |
1585 | (setq break t) | |
1586 | (setq k 0) | |
1587 | (while (< k tokensetsize) | |
1588 | ;; F (j)[k] = F (i)[k]; | |
1589 | (aset (aref F j) k (aref (aref F i) k)) | |
1590 | (setq k (1+ k)))))) | |
1591 | )) | |
1592 | ||
1593 | (defun wisent-digraph (relation) | |
1594 | "Digraph RELATION." | |
1595 | (wisent-with-context digraph | |
1596 | (setq infinity (+ ngotos 2) | |
1597 | INDEX (make-vector (1+ ngotos) 0) | |
1598 | VERTICES (make-vector (1+ ngotos) 0) | |
1599 | top 0 | |
1600 | R relation) | |
1601 | (let ((i 0)) | |
1602 | (while (< i ngotos) | |
1603 | (if (and (= (aref INDEX i) 0) (aref R i)) | |
1604 | (wisent-traverse i)) | |
1605 | (setq i (1+ i)))))) | |
1606 | ||
1607 | (defun wisent-set-state-table () | |
1608 | "Build state table." | |
1609 | (let (sp) | |
1610 | (setq state-table (make-vector nstates nil) | |
1611 | sp first-state) | |
1612 | (while sp | |
1613 | (aset state-table (core-number sp) sp) | |
1614 | (setq sp (core-next sp))))) | |
1615 | ||
1616 | (defun wisent-set-accessing-symbol () | |
1617 | "Build accessing symbol table." | |
1618 | (let (sp) | |
1619 | (setq accessing-symbol (make-vector nstates 0) | |
1620 | sp first-state) | |
1621 | (while sp | |
1622 | (aset accessing-symbol (core-number sp) (core-accessing-symbol sp)) | |
1623 | (setq sp (core-next sp))))) | |
1624 | ||
1625 | (defun wisent-set-shift-table () | |
1626 | "Build shift table." | |
1627 | (let (sp) | |
1628 | (setq shift-table (make-vector nstates nil) | |
1629 | sp first-shift) | |
1630 | (while sp | |
1631 | (aset shift-table (shifts-number sp) sp) | |
1632 | (setq sp (shifts-next sp))))) | |
1633 | ||
1634 | (defun wisent-set-reduction-table () | |
1635 | "Build reduction table." | |
1636 | (let (rp) | |
1637 | (setq reduction-table (make-vector nstates nil) | |
1638 | rp first-reduction) | |
1639 | (while rp | |
1640 | (aset reduction-table (reductions-number rp) rp) | |
1641 | (setq rp (reductions-next rp))))) | |
1642 | ||
1643 | (defun wisent-set-maxrhs () | |
1644 | "Setup MAXRHS length." | |
1645 | (let (i len max) | |
1646 | (setq len 0 | |
1647 | max 0 | |
1648 | i 0) | |
1649 | (while (aref ritem i) | |
1650 | (if (> (aref ritem i) 0) | |
1651 | (setq len (1+ len)) | |
1652 | (if (> len max) | |
1653 | (setq max len)) | |
1654 | (setq len 0)) | |
1655 | (setq i (1+ i))) | |
1656 | (setq maxrhs max))) | |
1657 | ||
1658 | (defun wisent-initialize-LA () | |
1659 | "Set up LA." | |
1660 | (let (i j k count rp sp np v) | |
1661 | (setq consistent (make-vector nstates nil) | |
1662 | lookaheads (make-vector (1+ nstates) 0) | |
1663 | count 0 | |
1664 | i 0) | |
1665 | (while (< i nstates) | |
1666 | (aset lookaheads i count) | |
1667 | (setq rp (aref reduction-table i) | |
1668 | sp (aref shift-table i)) | |
1669 | ;; if (rp && | |
1670 | ;; (rp->nreds > 1 | |
1671 | ;; || (sp && ! ISVAR(accessing-symbol[sp->shifts[0]])))) | |
1672 | (if (and rp | |
1673 | (or (> (reductions-nreds rp) 1) | |
1674 | (and sp | |
1675 | (not (wisent-ISVAR | |
1676 | (aref accessing-symbol | |
1677 | (aref (shifts-shifts sp) 0))))))) | |
1678 | (setq count (+ count (reductions-nreds rp))) | |
1679 | (aset consistent i t)) | |
1680 | ||
1681 | (when sp | |
1682 | (setq k 0 | |
1683 | j (shifts-nshifts sp) | |
1684 | v (shifts-shifts sp)) | |
1685 | (while (< k j) | |
1686 | (when (= (aref accessing-symbol (aref v k)) | |
1687 | error-token-number) | |
1688 | (aset consistent i nil) | |
1689 | (setq k j)) ;; break | |
1690 | (setq k (1+ k)))) | |
1691 | (setq i (1+ i))) | |
1692 | ||
1693 | (aset lookaheads nstates count) | |
1694 | ||
1695 | (if (zerop count) | |
1696 | (progn | |
1697 | (setq LA (make-vector 1 nil) | |
1698 | LAruleno (make-vector 1 0) | |
1699 | lookback (make-vector 1 nil))) | |
1700 | (setq LA (make-vector count nil) | |
1701 | LAruleno (make-vector count 0) | |
1702 | lookback (make-vector count nil))) | |
1703 | (setq i 0 j (length LA)) | |
1704 | (while (< i j) | |
1705 | (aset LA i (make-vector tokensetsize 0)) | |
1706 | (setq i (1+ i))) | |
1707 | ||
1708 | (setq np 0 | |
1709 | i 0) | |
1710 | (while (< i nstates) | |
1711 | (when (not (aref consistent i)) | |
1712 | (setq rp (aref reduction-table i)) | |
1713 | (when rp | |
1714 | (setq j 0 | |
1715 | k (reductions-nreds rp) | |
1716 | v (reductions-rules rp)) | |
1717 | (while (< j k) | |
1718 | (aset LAruleno np (aref v j)) | |
1719 | (setq np (1+ np) | |
1720 | j (1+ j))))) | |
1721 | (setq i (1+ i))))) | |
1722 | ||
1723 | (defun wisent-set-goto-map () | |
1724 | "Set up GOTO-MAP." | |
1725 | (let (sp i j symbol k temp-map state1 state2 v) | |
1726 | (setq goto-map (make-vector (1+ nvars) 0) | |
1727 | temp-map (make-vector (1+ nvars) 0)) | |
1728 | ||
1729 | (setq ngotos 0 | |
1730 | sp first-shift) | |
1731 | (while sp | |
1732 | (setq i (1- (shifts-nshifts sp)) | |
1733 | v (shifts-shifts sp)) | |
1734 | (while (>= i 0) | |
1735 | (setq symbol (aref accessing-symbol (aref v i))) | |
1736 | (if (wisent-ISTOKEN symbol) | |
1737 | (setq i 0) ;; break | |
1738 | (setq ngotos (1+ ngotos)) | |
1739 | ;; goto-map[symbol]++; | |
1740 | (aset goto-map (- symbol ntokens) | |
1741 | (1+ (aref goto-map (- symbol ntokens))))) | |
1742 | (setq i (1- i))) | |
1743 | (setq sp (shifts-next sp))) | |
1744 | ||
1745 | (setq k 0 | |
1746 | i ntokens | |
1747 | j 0) | |
1748 | (while (< i nsyms) | |
1749 | (aset temp-map j k) | |
1750 | (setq k (+ k (aref goto-map j)) | |
1751 | i (1+ i) | |
1752 | j (1+ j))) | |
1753 | (setq i ntokens | |
1754 | j 0) | |
1755 | (while (< i nsyms) | |
1756 | (aset goto-map j (aref temp-map j)) | |
1757 | (setq i (1+ i) | |
1758 | j (1+ j))) | |
1759 | ;; goto-map[nsyms] = ngotos; | |
1760 | ;; temp-map[nsyms] = ngotos; | |
1761 | (aset goto-map j ngotos) | |
1762 | (aset temp-map j ngotos) | |
1763 | ||
1764 | (setq from-state (make-vector ngotos 0) | |
1765 | to-state (make-vector ngotos 0) | |
1766 | sp first-shift) | |
1767 | (while sp | |
1768 | (setq state1 (shifts-number sp) | |
1769 | v (shifts-shifts sp) | |
1770 | i (1- (shifts-nshifts sp))) | |
1771 | (while (>= i 0) | |
1772 | (setq state2 (aref v i) | |
1773 | symbol (aref accessing-symbol state2)) | |
1774 | (if (wisent-ISTOKEN symbol) | |
1775 | (setq i 0) ;; break | |
1776 | ;; k = temp-map[symbol]++; | |
1777 | (setq k (aref temp-map (- symbol ntokens))) | |
1778 | (aset temp-map (- symbol ntokens) (1+ k)) | |
1779 | (aset from-state k state1) | |
1780 | (aset to-state k state2)) | |
1781 | (setq i (1- i))) | |
1782 | (setq sp (shifts-next sp))) | |
1783 | )) | |
1784 | ||
1785 | (defun wisent-map-goto (state symbol) | |
1786 | "Map a STATE/SYMBOL pair into its numeric representation." | |
1787 | (let (high low middle s result) | |
1788 | ;; low = goto-map[symbol]; | |
1789 | ;; high = goto-map[symbol + 1] - 1; | |
1790 | (setq low (aref goto-map (- symbol ntokens)) | |
1791 | high (1- (aref goto-map (- (1+ symbol) ntokens)))) | |
1792 | (while (and (not result) (<= low high)) | |
1793 | (setq middle (/ (+ low high) 2) | |
1794 | s (aref from-state middle)) | |
1795 | (cond | |
1796 | ((= s state) | |
1797 | (setq result middle)) | |
1798 | ((< s state) | |
1799 | (setq low (1+ middle))) | |
1800 | (t | |
1801 | (setq high (1- middle))))) | |
1802 | (or result | |
1803 | (error "Internal error in `wisent-map-goto'")) | |
1804 | )) | |
1805 | ||
1806 | (defun wisent-initialize-F () | |
1807 | "Set up F." | |
1808 | (let (i j k sp edge rowp rp reads nedges stateno symbol v break) | |
1809 | (setq F (make-vector ngotos nil) | |
1810 | i 0) | |
1811 | (while (< i ngotos) | |
1812 | (aset F i (make-vector tokensetsize 0)) | |
1813 | (setq i (1+ i))) | |
1814 | ||
1815 | (setq reads (make-vector ngotos nil) | |
1816 | edge (make-vector (1+ ngotos) 0) | |
1817 | nedges 0 | |
1818 | rowp 0 ;; rowp = F | |
1819 | i 0) | |
1820 | (while (< i ngotos) | |
1821 | (setq stateno (aref to-state i) | |
1822 | sp (aref shift-table stateno)) | |
1823 | (when sp | |
1824 | (setq k (shifts-nshifts sp) | |
1825 | v (shifts-shifts sp) | |
1826 | j 0 | |
1827 | break nil) | |
1828 | (while (and (not break) (< j k)) | |
1829 | ;; symbol = accessing-symbol[sp->shifts[j]]; | |
1830 | (setq symbol (aref accessing-symbol (aref v j))) | |
1831 | (if (wisent-ISVAR symbol) | |
1832 | (setq break t) ;; break | |
1833 | (wisent-SETBIT (aref F rowp) symbol) | |
1834 | (setq j (1+ j)))) | |
1835 | ||
1836 | (while (< j k) | |
1837 | ;; symbol = accessing-symbol[sp->shifts[j]]; | |
1838 | (setq symbol (aref accessing-symbol (aref v j))) | |
1839 | (when (aref nullable (- symbol ntokens)) | |
1840 | (aset edge nedges (wisent-map-goto stateno symbol)) | |
1841 | (setq nedges (1+ nedges))) | |
1842 | (setq j (1+ j))) | |
1843 | ||
1844 | (when (> nedges 0) | |
1845 | ;; reads[i] = rp = NEW2(nedges + 1, short); | |
1846 | (setq rp (make-vector (1+ nedges) 0) | |
1847 | j 0) | |
1848 | (aset reads i rp) | |
1849 | (while (< j nedges) | |
1850 | ;; rp[j] = edge[j]; | |
1851 | (aset rp j (aref edge j)) | |
1852 | (setq j (1+ j))) | |
1853 | (aset rp nedges -1) | |
1854 | (setq nedges 0))) | |
1855 | (setq rowp (1+ rowp)) | |
1856 | (setq i (1+ i))) | |
1857 | (wisent-digraph reads) | |
1858 | )) | |
1859 | ||
1860 | (defun wisent-add-lookback-edge (stateno ruleno gotono) | |
1861 | "Add a lookback edge. | |
1862 | STATENO, RULENO, GOTONO are self-explanatory." | |
1863 | (let (i k found) | |
1864 | (setq i (aref lookaheads stateno) | |
1865 | k (aref lookaheads (1+ stateno)) | |
1866 | found nil) | |
1867 | (while (and (not found) (< i k)) | |
1868 | (if (= (aref LAruleno i) ruleno) | |
1869 | (setq found t) | |
1870 | (setq i (1+ i)))) | |
1871 | ||
1872 | (or found | |
1873 | (error "Internal error in `wisent-add-lookback-edge'")) | |
1874 | ||
1875 | ;; value . next | |
1876 | ;; lookback[i] = (gotono . lookback[i]) | |
1877 | (aset lookback i (cons gotono (aref lookback i))))) | |
1878 | ||
1879 | (defun wisent-transpose (R-arg n) | |
1880 | "Return the transpose of R-ARG, of size N. | |
1881 | Destroy R-ARG, as it is replaced with the result. R-ARG[I] is nil or | |
1882 | a -1 terminated list of numbers. RESULT[NUM] is nil or the -1 | |
1883 | terminated list of the I such as NUM is in R-ARG[I]." | |
1884 | (let (i j new-R end-R nedges v sp) | |
1885 | (setq new-R (make-vector n nil) | |
1886 | end-R (make-vector n nil) | |
1887 | nedges (make-vector n 0)) | |
1888 | ||
1889 | ;; Count. | |
1890 | (setq i 0) | |
1891 | (while (< i n) | |
1892 | (setq v (aref R-arg i)) | |
1893 | (when v | |
1894 | (setq j 0) | |
1895 | (while (>= (aref v j) 0) | |
1896 | (aset nedges (aref v j) (1+ (aref nedges (aref v j)))) | |
1897 | (setq j (1+ j)))) | |
1898 | (setq i (1+ i))) | |
1899 | ||
1900 | ;; Allocate. | |
1901 | (setq i 0) | |
1902 | (while (< i n) | |
1903 | (when (> (aref nedges i) 0) | |
1904 | (setq sp (make-vector (1+ (aref nedges i)) 0)) | |
1905 | (aset sp (aref nedges i) -1) | |
1906 | (aset new-R i sp) | |
1907 | (aset end-R i 0)) | |
1908 | (setq i (1+ i))) | |
1909 | ||
1910 | ;; Store. | |
1911 | (setq i 0) | |
1912 | (while (< i n) | |
1913 | (setq v (aref R-arg i)) | |
1914 | (when v | |
1915 | (setq j 0) | |
1916 | (while (>= (aref v j) 0) | |
1917 | (aset (aref new-R (aref v j)) (aref end-R (aref v j)) i) | |
1918 | (aset end-R (aref v j) (1+ (aref end-R (aref v j)))) | |
1919 | (setq j (1+ j)))) | |
1920 | (setq i (1+ i))) | |
1921 | ||
1922 | new-R)) | |
1923 | ||
1924 | (defun wisent-build-relations () | |
1925 | "Build relations." | |
1926 | (let (i j k rulep rp sp length nedges done state1 stateno | |
1927 | symbol1 symbol2 edge states v) | |
1928 | (setq includes (make-vector ngotos nil) | |
1929 | edge (make-vector (1+ ngotos) 0) | |
1930 | states (make-vector (1+ maxrhs) 0) | |
1931 | i 0) | |
1932 | ||
1933 | (while (< i ngotos) | |
1934 | (setq nedges 0 | |
1935 | state1 (aref from-state i) | |
1936 | symbol1 (aref accessing-symbol (aref to-state i)) | |
1937 | rulep (aref derives (- symbol1 ntokens))) | |
1938 | ||
1939 | (while (> (car rulep) 0) | |
1940 | (aset states 0 state1) | |
1941 | (setq length 1 | |
1942 | stateno state1 | |
1943 | rp (aref rrhs (car rulep))) ;; rp = ritem + rrhs[*rulep] | |
1944 | (while (> (aref ritem rp) 0) ;; *rp > 0 | |
1945 | (setq symbol2 (aref ritem rp) | |
1946 | sp (aref shift-table stateno) | |
1947 | k (shifts-nshifts sp) | |
1948 | v (shifts-shifts sp) | |
1949 | j 0) | |
1950 | (while (< j k) | |
1951 | (setq stateno (aref v j)) | |
1952 | (if (= (aref accessing-symbol stateno) symbol2) | |
1953 | (setq j k) ;; break | |
1954 | (setq j (1+ j)))) | |
1955 | ;; states[length++] = stateno; | |
1956 | (aset states length stateno) | |
1957 | (setq length (1+ length)) | |
1958 | (setq rp (1+ rp))) | |
1959 | ||
1960 | (if (not (aref consistent stateno)) | |
1961 | (wisent-add-lookback-edge stateno (car rulep) i)) | |
1962 | ||
1963 | (setq length (1- length) | |
1964 | done nil) | |
1965 | (while (not done) | |
1966 | (setq done t | |
1967 | rp (1- rp)) | |
1968 | (when (and (>= rp 0) (wisent-ISVAR (aref ritem rp))) | |
1969 | ;; stateno = states[--length]; | |
1970 | (setq length (1- length) | |
1971 | stateno (aref states length)) | |
1972 | (aset edge nedges (wisent-map-goto stateno (aref ritem rp))) | |
1973 | (setq nedges (1+ nedges)) | |
1974 | (if (aref nullable (- (aref ritem rp) ntokens)) | |
1975 | (setq done nil)))) | |
1976 | (setq rulep (cdr rulep))) | |
1977 | ||
1978 | (when (> nedges 0) | |
1979 | (setq v (make-vector (1+ nedges) 0) | |
1980 | j 0) | |
1981 | (aset includes i v) | |
1982 | (while (< j nedges) | |
1983 | (aset v j (aref edge j)) | |
1984 | (setq j (1+ j))) | |
1985 | (aset v nedges -1)) | |
1986 | (setq i (1+ i))) | |
1987 | ||
1988 | (setq includes (wisent-transpose includes ngotos)) | |
1989 | )) | |
1990 | ||
1991 | (defun wisent-compute-FOLLOWS () | |
1992 | "Compute follows." | |
1993 | (wisent-digraph includes)) | |
1994 | ||
1995 | (defun wisent-compute-lookaheads () | |
1996 | "Compute lookaheads." | |
1997 | (let (i j n v1 v2 sp) | |
1998 | (setq n (aref lookaheads nstates) | |
1999 | i 0) | |
2000 | (while (< i n) | |
2001 | (setq sp (aref lookback i)) | |
2002 | (while sp | |
2003 | (setq v1 (aref LA i) | |
2004 | v2 (aref F (car sp)) | |
2005 | j 0) | |
2006 | (while (< j tokensetsize) | |
2007 | ;; LA (i)[j] |= F (sp->value)[j] | |
2008 | (aset v1 j (logior (aref v1 j) (aref v2 j))) | |
2009 | (setq j (1+ j))) | |
2010 | (setq sp (cdr sp))) | |
2011 | (setq i (1+ i))))) | |
2012 | ||
2013 | (defun wisent-lalr () | |
2014 | "Make the nondeterministic finite state machine deterministic." | |
2015 | (setq tokensetsize (wisent-WORDSIZE ntokens)) | |
2016 | (wisent-set-state-table) | |
2017 | (wisent-set-accessing-symbol) | |
2018 | (wisent-set-shift-table) | |
2019 | (wisent-set-reduction-table) | |
2020 | (wisent-set-maxrhs) | |
2021 | (wisent-initialize-LA) | |
2022 | (wisent-set-goto-map) | |
2023 | (wisent-initialize-F) | |
2024 | (wisent-build-relations) | |
2025 | (wisent-compute-FOLLOWS) | |
2026 | (wisent-compute-lookaheads)) | |
2027 | \f | |
2028 | ;;;; ----------------------------------------------- | |
2029 | ;;;; Find and resolve or report look-ahead conflicts | |
2030 | ;;;; ----------------------------------------------- | |
2031 | ||
2032 | (defsubst wisent-log-resolution (state LAno token resolution) | |
2033 | "Log a shift-reduce conflict resolution. | |
2034 | In specified STATE between rule pointed by lookahead number LANO and | |
2035 | TOKEN, resolved as RESOLUTION." | |
2036 | (if (or wisent-verbose-flag wisent-debug-flag) | |
2037 | (wisent-log | |
2038 | "Conflict in state %d between rule %d and token %s resolved as %s.\n" | |
2039 | state (aref LAruleno LAno) (wisent-tag token) resolution))) | |
2040 | ||
2041 | (defun wisent-flush-shift (state token) | |
2042 | "Turn off the shift recorded in the specified STATE for TOKEN. | |
2043 | Used when we resolve a shift-reduce conflict in favor of the reduction." | |
2044 | (let (shiftp i k v) | |
2045 | (when (setq shiftp (aref shift-table state)) | |
2046 | (setq k (shifts-nshifts shiftp) | |
2047 | v (shifts-shifts shiftp) | |
2048 | i 0) | |
2049 | (while (< i k) | |
2050 | (if (and (not (zerop (aref v i))) | |
2051 | (= token (aref accessing-symbol (aref v i)))) | |
2052 | (aset v i 0)) | |
2053 | (setq i (1+ i)))))) | |
2054 | ||
2055 | (defun wisent-resolve-sr-conflict (state lookaheadnum) | |
2056 | "Attempt to resolve shift-reduce conflict for one rule. | |
2057 | Resolve by means of precedence declarations. The conflict occurred in | |
2058 | specified STATE for the rule pointed by the lookahead symbol | |
2059 | LOOKAHEADNUM. It has already been checked that the rule has a | |
2060 | precedence. A conflict is resolved by modifying the shift or reduce | |
2061 | tables so that there is no longer a conflict." | |
2062 | (let (i redprec errp errs nerrs token sprec sassoc) | |
2063 | ;; Find the rule to reduce by to get precedence of reduction | |
2064 | (setq token (aref tags (aref rprec (aref LAruleno lookaheadnum))) | |
2065 | redprec (wisent-prec token) | |
2066 | errp (make-errs) | |
2067 | errs (make-vector ntokens 0) | |
2068 | nerrs 0 | |
2069 | i 0) | |
2070 | (set-errs-errs errp errs) | |
2071 | (while (< i ntokens) | |
2072 | (setq token (aref tags i)) | |
2073 | (when (and (wisent-BITISSET (aref LA lookaheadnum) i) | |
2074 | (wisent-BITISSET lookaheadset i) | |
2075 | (setq sprec (wisent-prec token))) | |
2076 | ;; Shift-reduce conflict occurs for token number I and it has | |
2077 | ;; a precedence. The precedence of shifting is that of token | |
2078 | ;; I. | |
2079 | (cond | |
2080 | ((< sprec redprec) | |
2081 | (wisent-log-resolution state lookaheadnum i "reduce") | |
2082 | ;; Flush the shift for this token | |
2083 | (wisent-RESETBIT lookaheadset i) | |
2084 | (wisent-flush-shift state i) | |
2085 | ) | |
2086 | ((> sprec redprec) | |
2087 | (wisent-log-resolution state lookaheadnum i "shift") | |
2088 | ;; Flush the reduce for this token | |
2089 | (wisent-RESETBIT (aref LA lookaheadnum) i) | |
2090 | ) | |
2091 | (t | |
2092 | ;; Matching precedence levels. | |
2093 | ;; For left association, keep only the reduction. | |
2094 | ;; For right association, keep only the shift. | |
2095 | ;; For nonassociation, keep neither. | |
2096 | (setq sassoc (wisent-assoc token)) | |
2097 | (cond | |
2098 | ((eq sassoc 'right) | |
2099 | (wisent-log-resolution state lookaheadnum i "shift")) | |
2100 | ((eq sassoc 'left) | |
2101 | (wisent-log-resolution state lookaheadnum i "reduce")) | |
2102 | ((eq sassoc 'nonassoc) | |
2103 | (wisent-log-resolution state lookaheadnum i "an error")) | |
2104 | ) | |
2105 | (when (not (eq sassoc 'right)) | |
2106 | ;; Flush the shift for this token | |
2107 | (wisent-RESETBIT lookaheadset i) | |
2108 | (wisent-flush-shift state i)) | |
2109 | (when (not (eq sassoc 'left)) | |
2110 | ;; Flush the reduce for this token | |
2111 | (wisent-RESETBIT (aref LA lookaheadnum) i)) | |
2112 | (when (eq sassoc 'nonassoc) | |
2113 | ;; Record an explicit error for this token | |
2114 | (aset errs nerrs i) | |
2115 | (setq nerrs (1+ nerrs))) | |
2116 | ))) | |
2117 | (setq i (1+ i))) | |
2118 | (when (> nerrs 0) | |
2119 | (set-errs-nerrs errp nerrs) | |
2120 | (aset err-table state errp)) | |
2121 | )) | |
2122 | ||
2123 | (defun wisent-set-conflicts (state) | |
2124 | "Find and attempt to resolve conflicts in specified STATE." | |
2125 | (let (i j k v shiftp symbol) | |
2126 | (unless (aref consistent state) | |
2127 | (fillarray lookaheadset 0) | |
2128 | ||
2129 | (when (setq shiftp (aref shift-table state)) | |
2130 | (setq k (shifts-nshifts shiftp) | |
2131 | v (shifts-shifts shiftp) | |
2132 | i 0) | |
2133 | (while (and (< i k) | |
2134 | (wisent-ISTOKEN | |
2135 | (setq symbol (aref accessing-symbol (aref v i))))) | |
2136 | (or (zerop (aref v i)) | |
2137 | (wisent-SETBIT lookaheadset symbol)) | |
2138 | (setq i (1+ i)))) | |
2139 | ||
2140 | ;; Loop over all rules which require lookahead in this state | |
2141 | ;; first check for shift-reduce conflict, and try to resolve | |
2142 | ;; using precedence | |
2143 | (setq i (aref lookaheads state) | |
2144 | k (aref lookaheads (1+ state))) | |
2145 | (while (< i k) | |
2146 | (when (aref rprec (aref LAruleno i)) | |
2147 | (setq v (aref LA i) | |
2148 | j 0) | |
2149 | (while (< j tokensetsize) | |
2150 | (if (zerop (logand (aref v j) (aref lookaheadset j))) | |
2151 | (setq j (1+ j)) | |
2152 | ;; if (LA (i)[j] & lookaheadset[j]) | |
2153 | (wisent-resolve-sr-conflict state i) | |
2154 | (setq j tokensetsize)))) ;; break | |
2155 | (setq i (1+ i))) | |
2156 | ||
2157 | ;; Loop over all rules which require lookahead in this state | |
2158 | ;; Check for conflicts not resolved above. | |
2159 | (setq i (aref lookaheads state)) | |
2160 | (while (< i k) | |
2161 | (setq v (aref LA i) | |
2162 | j 0) | |
2163 | (while (< j tokensetsize) | |
2164 | ;; if (LA (i)[j] & lookaheadset[j]) | |
2165 | (if (not (zerop (logand (aref v j) (aref lookaheadset j)))) | |
2166 | (aset conflicts state t)) | |
2167 | (setq j (1+ j))) | |
2168 | (setq j 0) | |
2169 | (while (< j tokensetsize) | |
2170 | ;; lookaheadset[j] |= LA (i)[j]; | |
2171 | (aset lookaheadset j (logior (aref lookaheadset j) | |
2172 | (aref v j))) | |
2173 | (setq j (1+ j))) | |
2174 | (setq i (1+ i))) | |
2175 | ))) | |
2176 | ||
2177 | (defun wisent-resolve-conflicts () | |
2178 | "Find and resolve conflicts." | |
2179 | (let (i) | |
2180 | (setq conflicts (make-vector nstates nil) | |
2181 | shiftset (make-vector tokensetsize 0) | |
2182 | lookaheadset (make-vector tokensetsize 0) | |
2183 | err-table (make-vector nstates nil) | |
2184 | i 0) | |
2185 | (while (< i nstates) | |
2186 | (wisent-set-conflicts i) | |
2187 | (setq i (1+ i))))) | |
2188 | ||
2189 | (defun wisent-count-sr-conflicts (state) | |
2190 | "Count the number of shift/reduce conflicts in specified STATE." | |
2191 | (let (i j k shiftp symbol v) | |
2192 | (setq src-count 0 | |
2193 | shiftp (aref shift-table state)) | |
2194 | (when shiftp | |
2195 | (fillarray shiftset 0) | |
2196 | (fillarray lookaheadset 0) | |
2197 | (setq k (shifts-nshifts shiftp) | |
2198 | v (shifts-shifts shiftp) | |
2199 | i 0) | |
2200 | (while (< i k) | |
2201 | (when (not (zerop (aref v i))) | |
2202 | (setq symbol (aref accessing-symbol (aref v i))) | |
2203 | (if (wisent-ISVAR symbol) | |
2204 | (setq i k) ;; break | |
2205 | (wisent-SETBIT shiftset symbol))) | |
2206 | (setq i (1+ i))) | |
2207 | ||
2208 | (setq k (aref lookaheads (1+ state)) | |
2209 | i (aref lookaheads state)) | |
2210 | (while (< i k) | |
2211 | (setq v (aref LA i) | |
2212 | j 0) | |
2213 | (while (< j tokensetsize) | |
2214 | ;; lookaheadset[j] |= LA (i)[j] | |
2215 | (aset lookaheadset j (logior (aref lookaheadset j) | |
2216 | (aref v j))) | |
2217 | (setq j (1+ j))) | |
2218 | (setq i (1+ i))) | |
2219 | ||
2220 | (setq k 0) | |
2221 | (while (< k tokensetsize) | |
2222 | ;; lookaheadset[k] &= shiftset[k]; | |
2223 | (aset lookaheadset k (logand (aref lookaheadset k) | |
2224 | (aref shiftset k))) | |
2225 | (setq k (1+ k))) | |
2226 | ||
2227 | (setq i 0) | |
2228 | (while (< i ntokens) | |
2229 | (if (wisent-BITISSET lookaheadset i) | |
2230 | (setq src-count (1+ src-count))) | |
2231 | (setq i (1+ i)))) | |
2232 | src-count)) | |
2233 | ||
2234 | (defun wisent-count-rr-conflicts (state) | |
2235 | "Count the number of reduce/reduce conflicts in specified STATE." | |
2236 | (let (i j count n m) | |
2237 | (setq rrc-count 0 | |
2238 | m (aref lookaheads state) | |
2239 | n (aref lookaheads (1+ state))) | |
2240 | (when (>= (- n m) 2) | |
2241 | (setq i 0) | |
2242 | (while (< i ntokens) | |
2243 | (setq count 0 | |
2244 | j m) | |
2245 | (while (< j n) | |
2246 | (if (wisent-BITISSET (aref LA j) i) | |
2247 | (setq count (1+ count))) | |
2248 | (setq j (1+ j))) | |
2249 | ||
2250 | (if (>= count 2) | |
2251 | (setq rrc-count (1+ rrc-count))) | |
2252 | (setq i (1+ i)))) | |
2253 | rrc-count)) | |
2254 | ||
2255 | (defvar wisent-expected-conflicts nil | |
2256 | "*If non-nil suppress the warning about shift/reduce conflicts. | |
2257 | It is a decimal integer N that says there should be no warning if | |
2258 | there are N shift/reduce conflicts and no reduce/reduce conflicts. A | |
2259 | warning is given if there are either more or fewer conflicts, or if | |
2260 | there are any reduce/reduce conflicts.") | |
2261 | ||
2262 | (defun wisent-total-conflicts () | |
2263 | "Report the total number of conflicts." | |
2264 | (unless (and (zerop rrc-total) | |
2265 | (or (zerop src-total) | |
2266 | (= src-total (or wisent-expected-conflicts 0)))) | |
2267 | (let* ((src (wisent-source)) | |
2268 | (src (if src (concat " in " src) "")) | |
2269 | (msg (format "Grammar%s contains" src))) | |
2270 | (if (> src-total 0) | |
2271 | (setq msg (format "%s %d shift/reduce conflict%s" | |
2272 | msg src-total (if (> src-total 1) | |
2273 | "s" "")))) | |
2274 | (if (and (> src-total 0) (> rrc-total 0)) | |
2275 | (setq msg (format "%s and" msg))) | |
2276 | (if (> rrc-total 0) | |
2277 | (setq msg (format "%s %d reduce/reduce conflict%s" | |
2278 | msg rrc-total (if (> rrc-total 1) | |
2279 | "s" "")))) | |
2280 | (message msg)))) | |
2281 | ||
2282 | (defun wisent-print-conflicts () | |
2283 | "Report conflicts." | |
2284 | (let (i) | |
2285 | (setq src-total 0 | |
2286 | rrc-total 0 | |
2287 | i 0) | |
2288 | (while (< i nstates) | |
2289 | (when (aref conflicts i) | |
2290 | (wisent-count-sr-conflicts i) | |
2291 | (wisent-count-rr-conflicts i) | |
2292 | (setq src-total (+ src-total src-count) | |
2293 | rrc-total (+ rrc-total rrc-count)) | |
2294 | (when (or wisent-verbose-flag wisent-debug-flag) | |
2295 | (wisent-log "State %d contains" i) | |
2296 | (if (> src-count 0) | |
2297 | (wisent-log " %d shift/reduce conflict%s" | |
2298 | src-count (if (> src-count 1) "s" ""))) | |
2299 | ||
2300 | (if (and (> src-count 0) (> rrc-count 0)) | |
2301 | (wisent-log " and")) | |
2302 | ||
2303 | (if (> rrc-count 0) | |
2304 | (wisent-log " %d reduce/reduce conflict%s" | |
2305 | rrc-count (if (> rrc-count 1) "s" ""))) | |
2306 | ||
2307 | (wisent-log ".\n"))) | |
2308 | (setq i (1+ i))) | |
2309 | (wisent-total-conflicts))) | |
2310 | \f | |
2311 | ;;;; -------------------------------------- | |
2312 | ;;;; Report information on generated parser | |
2313 | ;;;; -------------------------------------- | |
2314 | (defun wisent-print-grammar () | |
2315 | "Print grammar." | |
2316 | (let (i j r break left-count right-count) | |
2317 | ||
2318 | (wisent-log "\n\nGrammar\n\n Number, Rule\n") | |
2319 | (setq i 1) | |
2320 | (while (<= i nrules) | |
2321 | ;; Don't print rules disabled in `wisent-reduce-grammar-tables'. | |
2322 | (when (aref ruseful i) | |
2323 | (wisent-log " %s %s ->" | |
2324 | (wisent-pad-string (number-to-string i) 6) | |
2325 | (wisent-tag (aref rlhs i))) | |
2326 | (setq r (aref rrhs i)) | |
2327 | (if (> (aref ritem r) 0) | |
2328 | (while (> (aref ritem r) 0) | |
2329 | (wisent-log " %s" (wisent-tag (aref ritem r))) | |
2330 | (setq r (1+ r))) | |
2331 | (wisent-log " /* empty */")) | |
2332 | (wisent-log "\n")) | |
2333 | (setq i (1+ i))) | |
2334 | ||
2335 | (wisent-log "\n\nTerminals, with rules where they appear\n\n") | |
2336 | (wisent-log "%s (-1)\n" (wisent-tag 0)) | |
2337 | (setq i 1) | |
2338 | (while (< i ntokens) | |
2339 | (wisent-log "%s (%d)" (wisent-tag i) i) | |
2340 | (setq j 1) | |
2341 | (while (<= j nrules) | |
2342 | (setq r (aref rrhs j) | |
2343 | break nil) | |
2344 | (while (and (not break) (> (aref ritem r) 0)) | |
2345 | (if (setq break (= (aref ritem r) i)) | |
2346 | (wisent-log " %d" j) | |
2347 | (setq r (1+ r)))) | |
2348 | (setq j (1+ j))) | |
2349 | (wisent-log "\n") | |
2350 | (setq i (1+ i))) | |
2351 | ||
2352 | (wisent-log "\n\nNonterminals, with rules where they appear\n\n") | |
2353 | (setq i ntokens) | |
2354 | (while (< i nsyms) | |
2355 | (setq left-count 0 | |
2356 | right-count 0 | |
2357 | j 1) | |
2358 | (while (<= j nrules) | |
2359 | (if (= (aref rlhs j) i) | |
2360 | (setq left-count (1+ left-count))) | |
2361 | (setq r (aref rrhs j) | |
2362 | break nil) | |
2363 | (while (and (not break) (> (aref ritem r) 0)) | |
2364 | (if (= (aref ritem r) i) | |
2365 | (setq right-count (1+ right-count) | |
2366 | break t) | |
2367 | (setq r (1+ r)))) | |
2368 | (setq j (1+ j))) | |
2369 | (wisent-log "%s (%d)\n " (wisent-tag i) i) | |
2370 | (when (> left-count 0) | |
2371 | (wisent-log " on left:") | |
2372 | (setq j 1) | |
2373 | (while (<= j nrules) | |
2374 | (if (= (aref rlhs j) i) | |
2375 | (wisent-log " %d" j)) | |
2376 | (setq j (1+ j)))) | |
2377 | (when (> right-count 0) | |
2378 | (if (> left-count 0) | |
2379 | (wisent-log ",")) | |
2380 | (wisent-log " on right:") | |
2381 | (setq j 1) | |
2382 | (while (<= j nrules) | |
2383 | (setq r (aref rrhs j) | |
2384 | break nil) | |
2385 | (while (and (not break) (> (aref ritem r) 0)) | |
2386 | (if (setq break (= (aref ritem r) i)) | |
2387 | (wisent-log " %d" j) | |
2388 | (setq r (1+ r)))) | |
2389 | (setq j (1+ j)))) | |
2390 | (wisent-log "\n") | |
2391 | (setq i (1+ i))) | |
2392 | )) | |
2393 | ||
2394 | (defun wisent-print-reductions (state) | |
2395 | "Print reductions on STATE." | |
2396 | (let (i j k v symbol m n defaulted | |
2397 | default-LA default-rule cmax count shiftp errp nodefault) | |
2398 | (setq nodefault nil | |
2399 | i 0) | |
2400 | (fillarray shiftset 0) | |
2401 | ||
2402 | (setq shiftp (aref shift-table state)) | |
2403 | (when shiftp | |
2404 | (setq k (shifts-nshifts shiftp) | |
2405 | v (shifts-shifts shiftp) | |
2406 | i 0) | |
2407 | (while (< i k) | |
2408 | (when (not (zerop (aref v i))) | |
2409 | (setq symbol (aref accessing-symbol (aref v i))) | |
2410 | (if (wisent-ISVAR symbol) | |
2411 | (setq i k) ;; break | |
2412 | ;; If this state has a shift for the error token, don't | |
2413 | ;; use a default rule. | |
2414 | (if (= symbol error-token-number) | |
2415 | (setq nodefault t)) | |
2416 | (wisent-SETBIT shiftset symbol))) | |
2417 | (setq i (1+ i)))) | |
2418 | ||
2419 | (setq errp (aref err-table state)) | |
2420 | (when errp | |
2421 | (setq k (errs-nerrs errp) | |
2422 | v (errs-errs errp) | |
2423 | i 0) | |
2424 | (while (< i k) | |
2425 | (if (not (zerop (setq symbol (aref v i)))) | |
2426 | (wisent-SETBIT shiftset symbol)) | |
2427 | (setq i (1+ i)))) | |
2428 | ||
2429 | (setq m (aref lookaheads state) | |
2430 | n (aref lookaheads (1+ state))) | |
2431 | ||
2432 | (cond | |
2433 | ((and (= (- n m) 1) (not nodefault)) | |
2434 | (setq default-rule (aref LAruleno m) | |
2435 | v (aref LA m) | |
2436 | k 0) | |
2437 | (while (< k tokensetsize) | |
2438 | (aset lookaheadset k (logand (aref v k) | |
2439 | (aref shiftset k))) | |
2440 | (setq k (1+ k))) | |
2441 | ||
2442 | (setq i 0) | |
2443 | (while (< i ntokens) | |
2444 | (if (wisent-BITISSET lookaheadset i) | |
2445 | (wisent-log " %s\t[reduce using rule %d (%s)]\n" | |
2446 | (wisent-tag i) default-rule | |
2447 | (wisent-tag (aref rlhs default-rule)))) | |
2448 | (setq i (1+ i))) | |
2449 | (wisent-log " $default\treduce using rule %d (%s)\n\n" | |
2450 | default-rule | |
2451 | (wisent-tag (aref rlhs default-rule))) | |
2452 | ) | |
2453 | ((>= (- n m) 1) | |
2454 | (setq cmax 0 | |
2455 | default-LA -1 | |
2456 | default-rule 0) | |
2457 | (when (not nodefault) | |
2458 | (setq i m) | |
2459 | (while (< i n) | |
2460 | (setq v (aref LA i) | |
2461 | count 0 | |
2462 | k 0) | |
2463 | (while (< k tokensetsize) | |
2464 | ;; lookaheadset[k] = LA (i)[k] & ~shiftset[k] | |
2465 | (aset lookaheadset k | |
2466 | (logand (aref v k) | |
2467 | (lognot (aref shiftset k)))) | |
2468 | (setq k (1+ k))) | |
2469 | (setq j 0) | |
2470 | (while (< j ntokens) | |
2471 | (if (wisent-BITISSET lookaheadset j) | |
2472 | (setq count (1+ count))) | |
2473 | (setq j (1+ j))) | |
2474 | (if (> count cmax) | |
2475 | (setq cmax count | |
2476 | default-LA i | |
2477 | default-rule (aref LAruleno i))) | |
2478 | (setq k 0) | |
2479 | (while (< k tokensetsize) | |
2480 | (aset shiftset k (logior (aref shiftset k) | |
2481 | (aref lookaheadset k))) | |
2482 | (setq k (1+ k))) | |
2483 | (setq i (1+ i)))) | |
2484 | ||
2485 | (fillarray shiftset 0) | |
2486 | ||
2487 | (when shiftp | |
2488 | (setq k (shifts-nshifts shiftp) | |
2489 | v (shifts-shifts shiftp) | |
2490 | i 0) | |
2491 | (while (< i k) | |
2492 | (when (not (zerop (aref v i))) | |
2493 | (setq symbol (aref accessing-symbol (aref v i))) | |
2494 | (if (wisent-ISVAR symbol) | |
2495 | (setq i k) ;; break | |
2496 | (wisent-SETBIT shiftset symbol))) | |
2497 | (setq i (1+ i)))) | |
2498 | ||
2499 | (setq i 0) | |
2500 | (while (< i ntokens) | |
2501 | (setq defaulted nil | |
2502 | count (if (wisent-BITISSET shiftset i) 1 0) | |
2503 | j m) | |
2504 | (while (< j n) | |
2505 | (when (wisent-BITISSET (aref LA j) i) | |
2506 | (if (zerop count) | |
2507 | (progn | |
2508 | (if (not (= j default-LA)) | |
2509 | (wisent-log | |
2510 | " %s\treduce using rule %d (%s)\n" | |
2511 | (wisent-tag i) (aref LAruleno j) | |
2512 | (wisent-tag (aref rlhs (aref LAruleno j)))) | |
2513 | (setq defaulted t)) | |
2514 | (setq count (1+ count))) | |
2515 | (if defaulted | |
2516 | (wisent-log | |
2517 | " %s\treduce using rule %d (%s)\n" | |
2518 | (wisent-tag i) (aref LAruleno default-LA) | |
2519 | (wisent-tag (aref rlhs (aref LAruleno default-LA))))) | |
2520 | (setq defaulted nil) | |
2521 | (wisent-log | |
2522 | " %s\t[reduce using rule %d (%s)]\n" | |
2523 | (wisent-tag i) (aref LAruleno j) | |
2524 | (wisent-tag (aref rlhs (aref LAruleno j)))))) | |
2525 | (setq j (1+ j))) | |
2526 | (setq i (1+ i))) | |
2527 | ||
2528 | (if (>= default-LA 0) | |
2529 | (wisent-log | |
2530 | " $default\treduce using rule %d (%s)\n" | |
2531 | default-rule | |
2532 | (wisent-tag (aref rlhs default-rule)))) | |
2533 | )))) | |
2534 | ||
2535 | (defun wisent-print-actions (state) | |
2536 | "Print actions on STATE." | |
2537 | (let (i j k v state1 symbol shiftp errp redp rule nerrs break) | |
2538 | (setq shiftp (aref shift-table state) | |
2539 | redp (aref reduction-table state) | |
2540 | errp (aref err-table state)) | |
2541 | (if (and (not shiftp) (not redp)) | |
2542 | (if (= final-state state) | |
2543 | (wisent-log " $default\taccept\n") | |
2544 | (wisent-log " NO ACTIONS\n")) | |
2545 | (if (not shiftp) | |
2546 | (setq i 0 | |
2547 | k 0) | |
2548 | (setq k (shifts-nshifts shiftp) | |
2549 | v (shifts-shifts shiftp) | |
2550 | i 0 | |
2551 | break nil) | |
2552 | (while (and (not break) (< i k)) | |
2553 | (if (zerop (setq state1 (aref v i))) | |
2554 | (setq i (1+ i)) | |
2555 | (setq symbol (aref accessing-symbol state1)) | |
2556 | ;; The following line used to be turned off. | |
2557 | (if (wisent-ISVAR symbol) | |
2558 | (setq break t) ;; break | |
2559 | (wisent-log " %s\tshift, and go to state %d\n" | |
2560 | (wisent-tag symbol) state1) | |
2561 | (setq i (1+ i))))) | |
2562 | (if (> i 0) | |
2563 | (wisent-log "\n"))) | |
2564 | ||
2565 | (when errp | |
2566 | (setq nerrs (errs-nerrs errp) | |
2567 | v (errs-errs errp) | |
2568 | j 0) | |
2569 | (while (< j nerrs) | |
2570 | (if (aref v j) | |
2571 | (wisent-log " %s\terror (nonassociative)\n" | |
2572 | (wisent-tag (aref v j)))) | |
2573 | (setq j (1+ j))) | |
2574 | (if (> j 0) | |
2575 | (wisent-log "\n"))) | |
2576 | ||
2577 | (cond | |
2578 | ((and (aref consistent state) redp) | |
2579 | (setq rule (aref (reductions-rules redp) 0) | |
2580 | symbol (aref rlhs rule)) | |
2581 | (wisent-log " $default\treduce using rule %d (%s)\n\n" | |
2582 | rule (wisent-tag symbol)) | |
2583 | ) | |
2584 | (redp | |
2585 | (wisent-print-reductions state) | |
2586 | )) | |
2587 | ||
2588 | (when (< i k) | |
2589 | (setq v (shifts-shifts shiftp)) | |
2590 | (while (< i k) | |
2591 | (when (setq state1 (aref v i)) | |
2592 | (setq symbol (aref accessing-symbol state1)) | |
2593 | (wisent-log " %s\tgo to state %d\n" | |
2594 | (wisent-tag symbol) state1)) | |
2595 | (setq i (1+ i))) | |
2596 | (wisent-log "\n")) | |
2597 | ))) | |
2598 | ||
2599 | (defun wisent-print-core (state) | |
2600 | "Print STATE core." | |
2601 | (let (i k rule statep sp sp1) | |
2602 | (setq statep (aref state-table state) | |
2603 | k (core-nitems statep)) | |
2604 | (when (> k 0) | |
2605 | (setq i 0) | |
2606 | (while (< i k) | |
2607 | ;; sp1 = sp = ritem + statep->items[i]; | |
2608 | (setq sp1 (aref (core-items statep) i) | |
2609 | sp sp1) | |
2610 | (while (> (aref ritem sp) 0) | |
2611 | (setq sp (1+ sp))) | |
2612 | ||
2613 | (setq rule (- (aref ritem sp))) | |
2614 | (wisent-log " %s -> " (wisent-tag (aref rlhs rule))) | |
2615 | ||
2616 | (setq sp (aref rrhs rule)) | |
2617 | (while (< sp sp1) | |
2618 | (wisent-log "%s " (wisent-tag (aref ritem sp))) | |
2619 | (setq sp (1+ sp))) | |
2620 | (wisent-log ".") | |
2621 | (while (> (aref ritem sp) 0) | |
2622 | (wisent-log " %s" (wisent-tag (aref ritem sp))) | |
2623 | (setq sp (1+ sp))) | |
2624 | (wisent-log " (rule %d)\n" rule) | |
2625 | (setq i (1+ i))) | |
2626 | (wisent-log "\n")))) | |
2627 | ||
2628 | (defun wisent-print-state (state) | |
2629 | "Print information on STATE." | |
2630 | (wisent-log "\n\nstate %d\n\n" state) | |
2631 | (wisent-print-core state) | |
2632 | (wisent-print-actions state)) | |
2633 | ||
2634 | (defun wisent-print-states () | |
2635 | "Print information on states." | |
2636 | (let ((i 0)) | |
2637 | (while (< i nstates) | |
2638 | (wisent-print-state i) | |
2639 | (setq i (1+ i))))) | |
2640 | ||
2641 | (defun wisent-print-results () | |
2642 | "Print information on generated parser. | |
333f9019 | 2643 | Report detailed information if `wisent-verbose-flag' or |
bb051423 CY |
2644 | `wisent-debug-flag' are non-nil." |
2645 | (when (or wisent-verbose-flag wisent-debug-flag) | |
2646 | (wisent-print-useless)) | |
2647 | (wisent-print-conflicts) | |
2648 | (when (or wisent-verbose-flag wisent-debug-flag) | |
2649 | (wisent-print-grammar) | |
2650 | (wisent-print-states)) | |
2651 | ;; Append output to log file when running in batch mode | |
2652 | (when (wisent-noninteractive) | |
2653 | (wisent-append-to-log-file) | |
2654 | (wisent-clear-log))) | |
2655 | \f | |
2656 | ;;;; --------------------------------- | |
2657 | ;;;; Build the generated parser tables | |
2658 | ;;;; --------------------------------- | |
2659 | ||
2660 | (defun wisent-action-row (state actrow) | |
2661 | "Figure out the actions for the specified STATE. | |
2662 | Decide what to do for each type of token if seen as the lookahead | |
2663 | token in specified state. The value returned is used as the default | |
2664 | action for the state. In addition, ACTROW is filled with what to do | |
2665 | for each kind of token, index by symbol number, with nil meaning do | |
2666 | the default action. The value 'error, means this situation is an | |
2667 | error. The parser recognizes this value specially. | |
2668 | ||
2669 | This is where conflicts are resolved. The loop over lookahead rules | |
2670 | considered lower-numbered rules last, and the last rule considered | |
2671 | that likes a token gets to handle it." | |
2672 | (let (i j k m n v default-rule nreds rule max count | |
2673 | shift-state symbol redp shiftp errp nodefault) | |
2674 | ||
2675 | (fillarray actrow nil) | |
2676 | ||
2677 | (setq default-rule 0 | |
2678 | nodefault nil ;; nil inhibit having any default reduction | |
2679 | nreds 0 | |
2680 | m 0 | |
2681 | n 0 | |
2682 | redp (aref reduction-table state)) | |
2683 | ||
2684 | (when redp | |
2685 | (setq nreds (reductions-nreds redp)) | |
2686 | (when (>= nreds 1) | |
2687 | ;; loop over all the rules available here which require | |
2688 | ;; lookahead | |
2689 | (setq m (aref lookaheads state) | |
2690 | n (aref lookaheads (1+ state)) | |
2691 | i (1- n)) | |
2692 | (while (>= i m) | |
2693 | ;; and find each token which the rule finds acceptable to | |
2694 | ;; come next | |
2695 | (setq j 0) | |
2696 | (while (< j ntokens) | |
2697 | ;; and record this rule as the rule to use if that token | |
2698 | ;; follows. | |
2699 | (if (wisent-BITISSET (aref LA i) j) | |
2700 | (aset actrow j (- (aref LAruleno i))) | |
2701 | ) | |
2702 | (setq j (1+ j))) | |
2703 | (setq i (1- i))))) | |
2704 | ||
2705 | ;; Now see which tokens are allowed for shifts in this state. For | |
2706 | ;; them, record the shift as the thing to do. So shift is | |
2707 | ;; preferred to reduce. | |
2708 | (setq shiftp (aref shift-table state)) | |
2709 | (when shiftp | |
2710 | (setq k (shifts-nshifts shiftp) | |
2711 | v (shifts-shifts shiftp) | |
2712 | i 0) | |
2713 | (while (< i k) | |
2714 | (setq shift-state (aref v i)) | |
2715 | (if (zerop shift-state) | |
2716 | nil ;; continue | |
2717 | (setq symbol (aref accessing-symbol shift-state)) | |
2718 | (if (wisent-ISVAR symbol) | |
2719 | (setq i k) ;; break | |
2720 | (aset actrow symbol shift-state) | |
2721 | ;; Do not use any default reduction if there is a shift | |
2722 | ;; for error | |
2723 | (if (= symbol error-token-number) | |
2724 | (setq nodefault t)))) | |
2725 | (setq i (1+ i)))) | |
2726 | ||
2727 | ;; See which tokens are an explicit error in this state (due to | |
2728 | ;; %nonassoc). For them, record error as the action. | |
2729 | (setq errp (aref err-table state)) | |
2730 | (when errp | |
2731 | (setq k (errs-nerrs errp) | |
2732 | v (errs-errs errp) | |
2733 | i 0) | |
2734 | (while (< i k) | |
2735 | (aset actrow (aref v i) wisent-error-tag) | |
2736 | (setq i (1+ i)))) | |
2737 | ||
2738 | ;; Now find the most common reduction and make it the default | |
2739 | ;; action for this state. | |
2740 | (when (and (>= nreds 1) (not nodefault)) | |
2741 | (if (aref consistent state) | |
2742 | (setq default-rule (- (aref (reductions-rules redp) 0))) | |
2743 | (setq max 0 | |
2744 | i m) | |
2745 | (while (< i n) | |
2746 | (setq count 0 | |
2747 | rule (- (aref LAruleno i)) | |
2748 | j 0) | |
2749 | (while (< j ntokens) | |
2750 | (if (and (numberp (aref actrow j)) | |
2751 | (= (aref actrow j) rule)) | |
2752 | (setq count (1+ count))) | |
2753 | (setq j (1+ j))) | |
2754 | (if (> count max) | |
2755 | (setq max count | |
2756 | default-rule rule)) | |
2757 | (setq i (1+ i))) | |
2758 | ;; actions which match the default are replaced with zero, | |
2759 | ;; which means "use the default" | |
2760 | (when (> max 0) | |
2761 | (setq j 0) | |
2762 | (while (< j ntokens) | |
2763 | (if (and (numberp (aref actrow j)) | |
2764 | (= (aref actrow j) default-rule)) | |
2765 | (aset actrow j nil)) | |
2766 | (setq j (1+ j))) | |
2767 | ))) | |
2768 | ||
2769 | ;; If have no default rule, if this is the final state the default | |
2770 | ;; is accept else it is an error. So replace any action which | |
2771 | ;; says "error" with "use default". | |
2772 | (when (zerop default-rule) | |
2773 | (if (= final-state state) | |
2774 | (setq default-rule wisent-accept-tag) | |
2775 | (setq j 0) | |
2776 | (while (< j ntokens) | |
2777 | (if (eq (aref actrow j) wisent-error-tag) | |
2778 | (aset actrow j nil)) | |
2779 | (setq j (1+ j))) | |
2780 | (setq default-rule wisent-error-tag))) | |
2781 | default-rule)) | |
2782 | ||
2783 | (defconst wisent-default-tag 'default | |
2784 | "Tag used in an action table to indicate a default action.") | |
2785 | ||
2786 | ;; These variables only exist locally in the function | |
2787 | ;; `wisent-state-actions' and are shared by all other nested callees. | |
2788 | (wisent-defcontext semantic-actions | |
2789 | ;; Uninterned symbols used in code generation. | |
2790 | stack sp gotos state | |
2791 | ;; Name of the current semantic action | |
2792 | NAME) | |
2793 | ||
2794 | (defun wisent-state-actions () | |
2795 | "Figure out the actions for every state. | |
2796 | Return the action table." | |
2797 | ;; Store the semantic action obarray in (unused) RCODE[0]. | |
2798 | (aset rcode 0 (make-vector 13 0)) | |
2799 | (let (i j action-table actrow action) | |
2800 | (setq action-table (make-vector nstates nil) | |
2801 | actrow (make-vector ntokens nil) | |
2802 | i 0) | |
2803 | (wisent-with-context semantic-actions | |
2804 | (setq stack (make-symbol "stack") | |
2805 | sp (make-symbol "sp") | |
2806 | gotos (make-symbol "gotos") | |
2807 | state (make-symbol "state")) | |
2808 | (while (< i nstates) | |
2809 | (setq action (wisent-action-row i actrow)) | |
2810 | ;; Translate a reduction into semantic action | |
2811 | (and (integerp action) (< action 0) | |
2812 | (setq action (wisent-semantic-action (- action)))) | |
2813 | (aset action-table i (list (cons wisent-default-tag action))) | |
2814 | (setq j 0) | |
2815 | (while (< j ntokens) | |
2816 | (when (setq action (aref actrow j)) | |
2817 | ;; Translate a reduction into semantic action | |
2818 | (and (integerp action) (< action 0) | |
2819 | (setq action (wisent-semantic-action (- action)))) | |
2820 | (aset action-table i (cons (cons (aref tags j) action) | |
2821 | (aref action-table i))) | |
2822 | ) | |
2823 | (setq j (1+ j))) | |
2824 | (aset action-table i (nreverse (aref action-table i))) | |
2825 | (setq i (1+ i))) | |
2826 | action-table))) | |
2827 | ||
2828 | (defun wisent-goto-actions () | |
2829 | "Figure out what to do after reducing with each rule. | |
2830 | Depending on the saved state from before the beginning of parsing the | |
2831 | data that matched this rule. Return the goto table." | |
2832 | (let (i j m n symbol state goto-table) | |
2833 | (setq goto-table (make-vector nstates nil) | |
2834 | i ntokens) | |
2835 | (while (< i nsyms) | |
2836 | (setq symbol (- i ntokens) | |
2837 | m (aref goto-map symbol) | |
2838 | n (aref goto-map (1+ symbol)) | |
2839 | j m) | |
2840 | (while (< j n) | |
2841 | (setq state (aref from-state j)) | |
2842 | (aset goto-table state | |
2843 | (cons (cons (aref tags i) (aref to-state j)) | |
2844 | (aref goto-table state))) | |
2845 | (setq j (1+ j))) | |
2846 | (setq i (1+ i))) | |
2847 | goto-table)) | |
2848 | ||
2849 | (defsubst wisent-quote-p (sym) | |
2850 | "Return non-nil if SYM is bound to the `quote' function." | |
2851 | (condition-case nil | |
2852 | (eq (indirect-function sym) | |
2853 | (indirect-function 'quote)) | |
2854 | (error nil))) | |
2855 | ||
2856 | (defsubst wisent-backquote-p (sym) | |
2857 | "Return non-nil if SYM is bound to the `backquote' function." | |
2858 | (condition-case nil | |
2859 | (eq (indirect-function sym) | |
2860 | (indirect-function 'backquote)) | |
2861 | (error nil))) | |
2862 | ||
2863 | (defun wisent-check-$N (x m) | |
2864 | "Return non-nil if X is a valid $N or $regionN symbol. | |
2865 | That is if X is a $N or $regionN symbol with N >= 1 and N <= M. | |
2866 | Also warn if X is a $N or $regionN symbol with N < 1 or N > M." | |
2867 | (when (symbolp x) | |
2868 | (let* ((n (symbol-name x)) | |
2869 | (i (and (string-match "\\`\\$\\(region\\)?\\([0-9]+\\)\\'" n) | |
2870 | (string-to-number (match-string 2 n))))) | |
2871 | (when i | |
2872 | (if (and (>= i 1) (<= i m)) | |
2873 | t | |
2874 | (message | |
2875 | "*** In %s, %s might be a free variable (rule has %s)" | |
2876 | NAME x (format (cond ((< m 1) "no component") | |
2877 | ((= m 1) "%d component") | |
2878 | ("%d components")) | |
2879 | m)) | |
2880 | nil))))) | |
2881 | ||
2882 | (defun wisent-semantic-action-expand-body (body n &optional found) | |
2883 | "Parse BODY of semantic action. | |
2884 | N is the maximum number of $N variables that can be referenced in | |
2885 | BODY. Warn on references out of permitted range. | |
2886 | Optional argument FOUND is the accumulated list of '$N' references | |
2887 | encountered so far. | |
2888 | Return a cons (FOUND . XBODY), where FOUND is the list of $N | |
2889 | references found in BODY, and XBODY is BODY expression with | |
2890 | `backquote' forms expanded." | |
2891 | (if (not (listp body)) | |
2892 | ;; BODY is an atom, no expansion needed | |
2893 | (progn | |
2894 | (if (wisent-check-$N body n) | |
2895 | ;; Accumulate $i symbol | |
2896 | (add-to-list 'found body)) | |
2897 | (cons found body)) | |
2898 | ;; BODY is a list, expand inside it | |
2899 | (let (xbody sexpr) | |
2900 | ;; If backquote expand it first | |
2901 | (if (wisent-backquote-p (car body)) | |
2902 | (setq body (macroexpand body))) | |
2903 | (while body | |
2904 | (setq sexpr (car body) | |
2905 | body (cdr body)) | |
2906 | (cond | |
2907 | ;; Function call excepted quote expression | |
2908 | ((and (consp sexpr) | |
2909 | (not (wisent-quote-p (car sexpr)))) | |
2910 | (setq sexpr (wisent-semantic-action-expand-body sexpr n found) | |
2911 | found (car sexpr) | |
2912 | sexpr (cdr sexpr))) | |
2913 | ;; $i symbol | |
2914 | ((wisent-check-$N sexpr n) | |
2915 | ;; Accumulate $i symbol | |
2916 | (add-to-list 'found sexpr)) | |
2917 | ) | |
2918 | ;; Accumulate expanded forms | |
2919 | (setq xbody (nconc xbody (list sexpr)))) | |
2920 | (cons found xbody)))) | |
2921 | ||
2922 | (defun wisent-semantic-action (r) | |
2923 | "Set up the Elisp function for semantic action at rule R. | |
2924 | On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY is the | |
2925 | body of the semantic action, N is the maximum number of values | |
2926 | available in the parser's stack, NTERM is the nonterminal the semantic | |
2927 | action belongs to, and I is the index of the semantic action inside | |
2928 | NTERM definition. Return the semantic action symbol. | |
2929 | The semantic action function accepts three arguments: | |
2930 | ||
2931 | - the state/value stack | |
2932 | - the top-of-stack index | |
2933 | - the goto table | |
2934 | ||
2935 | And returns the updated top-of-stack index." | |
2936 | (if (not (aref ruseful r)) | |
2937 | (aset rcode r nil) | |
2938 | (let* ((actn (aref rcode r)) | |
2939 | (n (aref actn 1)) ; nb of val avail. in stack | |
2940 | (NAME (apply 'format "%s:%d" (aref actn 2))) | |
2941 | (form (wisent-semantic-action-expand-body (aref actn 0) n)) | |
2942 | ($l (car form)) ; list of $vars used in body | |
2943 | (form (cdr form)) ; expanded form of body | |
2944 | (nt (aref rlhs r)) ; nonterminal item no. | |
2945 | (bl nil) ; `let*' binding list | |
2946 | $v i j) | |
2947 | ||
2948 | ;; Compute $N and $regionN bindings | |
2949 | (setq i n) | |
2950 | (while (> i 0) | |
2951 | (setq j (1+ (* 2 (- n i)))) | |
2952 | ;; Only bind $regionI if used in action | |
2953 | (setq $v (intern (format "$region%d" i))) | |
2954 | (if (memq $v $l) | |
2955 | (setq bl (cons `(,$v (cdr (aref ,stack (- ,sp ,j)))) bl))) | |
2956 | ;; Only bind $I if used in action | |
2957 | (setq $v (intern (format "$%d" i))) | |
2958 | (if (memq $v $l) | |
2959 | (setq bl (cons `(,$v (car (aref ,stack (- ,sp ,j)))) bl))) | |
2960 | (setq i (1- i))) | |
2961 | ||
2962 | ;; Compute J, the length of rule's RHS. It will give the | |
2963 | ;; current parser state at STACK[SP - 2*J], and where to push | |
2964 | ;; the new semantic value and the next state, respectively at: | |
2965 | ;; STACK[SP - 2*J + 1] and STACK[SP - 2*J + 2]. Generally N, | |
2966 | ;; the maximum number of values available in the stack, is equal | |
2967 | ;; to J. But, for mid-rule actions, N is the number of rule | |
2968 | ;; elements before the action and J is always 0 (empty rule). | |
2969 | (setq i (aref rrhs r) | |
2970 | j 0) | |
2971 | (while (> (aref ritem i) 0) | |
2972 | (setq j (1+ j) | |
2973 | i (1+ i))) | |
2974 | ||
2975 | ;; Create the semantic action symbol. | |
2976 | (setq actn (intern NAME (aref rcode 0))) | |
2977 | ||
2978 | ;; Store source code in function cell of the semantic action | |
2979 | ;; symbol. It will be byte-compiled at automaton's compilation | |
2980 | ;; time. Using a byte-compiled automaton can significantly | |
2981 | ;; speed up parsing! | |
2982 | (fset actn | |
2983 | `(lambda (,stack ,sp ,gotos) | |
2984 | (let* (,@bl | |
2985 | ($region | |
2986 | ,(cond | |
2987 | ((= n 1) | |
2988 | (if (assq '$region1 bl) | |
2989 | '$region1 | |
2990 | `(cdr (aref ,stack (1- ,sp))))) | |
2991 | ((> n 1) | |
2992 | `(wisent-production-bounds | |
2993 | ,stack (- ,sp ,(1- (* 2 n))) (1- ,sp))))) | |
2994 | ($action ,NAME) | |
2995 | ($nterm ',(aref tags nt)) | |
2996 | ,@(and (> j 0) `((,sp (- ,sp ,(* j 2))))) | |
2997 | (,state (cdr (assq $nterm | |
2998 | (aref ,gotos | |
2999 | (aref ,stack ,sp)))))) | |
3000 | (setq ,sp (+ ,sp 2)) | |
3001 | ;; push semantic value | |
3002 | (aset ,stack (1- ,sp) (cons ,form $region)) | |
3003 | ;; push next state | |
3004 | (aset ,stack ,sp ,state) | |
3005 | ;; return new top of stack | |
3006 | ,sp))) | |
3007 | ||
3008 | ;; Return the semantic action symbol | |
3009 | actn))) | |
3010 | \f | |
3011 | ;;;; ---------------------------- | |
3012 | ;;;; Build parser LALR automaton. | |
3013 | ;;;; ---------------------------- | |
3014 | ||
3015 | (defun wisent-parser-automaton () | |
3016 | "Compute and return LALR(1) automaton from GRAMMAR. | |
3017 | GRAMMAR is in internal format. GRAM/ACTS are grammar rules | |
3018 | in internal format. STARTS defines the start symbols." | |
3019 | ;; Check for useless stuff | |
3020 | (wisent-reduce-grammar) | |
3021 | ||
3022 | (wisent-set-derives) | |
3023 | (wisent-set-nullable) | |
3024 | ;; convert to nondeterministic finite state machine. | |
3025 | (wisent-generate-states) | |
3026 | ;; make it deterministic. | |
3027 | (wisent-lalr) | |
3028 | ;; Find and record any conflicts: places where one token of | |
3029 | ;; lookahead is not enough to disambiguate the parsing. Also | |
3030 | ;; resolve s/r conflicts based on precedence declarations. | |
3031 | (wisent-resolve-conflicts) | |
3032 | (wisent-print-results) | |
3033 | ||
3034 | (vector (wisent-state-actions) ; action table | |
3035 | (wisent-goto-actions) ; goto table | |
3036 | start-table ; start symbols | |
3037 | (aref rcode 0) ; sem. action symbol obarray | |
3038 | ) | |
3039 | ) | |
3040 | \f | |
3041 | ;;;; ------------------- | |
3042 | ;;;; Parse input grammar | |
3043 | ;;;; ------------------- | |
3044 | ||
3045 | (defconst wisent-reserved-symbols (list wisent-error-term) | |
3046 | "The list of reserved symbols. | |
3047 | Also all symbols starting with a character defined in | |
3048 | `wisent-reserved-capitals' are reserved for internal use.") | |
3049 | ||
3050 | (defconst wisent-reserved-capitals '(?\$ ?\@) | |
3051 | "The list of reserved capital letters. | |
3052 | All symbol starting with one of these letters are reserved for | |
3053 | internal use.") | |
3054 | ||
3055 | (defconst wisent-starts-nonterm '$STARTS | |
3056 | "Main start symbol. | |
3057 | It gives the rules for start symbols.") | |
3058 | ||
3059 | (defvar wisent-single-start-flag nil | |
3060 | "Non-nil means allows only one start symbol like in Bison. | |
3061 | That is don't add extra start rules to the grammar. This is | |
3062 | useful to compare the Wisent's generated automaton with the Bison's | |
3063 | one.") | |
3064 | ||
3065 | (defsubst wisent-ISVALID-VAR (x) | |
3066 | "Return non-nil if X is a character or an allowed symbol." | |
3067 | (and x (symbolp x) | |
3068 | (not (memq (aref (symbol-name x) 0) wisent-reserved-capitals)) | |
3069 | (not (memq x wisent-reserved-symbols)))) | |
3070 | ||
3071 | (defsubst wisent-ISVALID-TOKEN (x) | |
3072 | "Return non-nil if X is a character or an allowed symbol." | |
3073 | (or (wisent-char-p x) | |
3074 | (wisent-ISVALID-VAR x))) | |
3075 | ||
3076 | (defun wisent-push-token (symbol &optional nocheck) | |
3077 | "Push a new SYMBOL in the list of tokens. | |
3078 | Bypass checking if NOCHECK is non-nil." | |
3079 | ;; Check | |
3080 | (or nocheck (wisent-ISVALID-TOKEN symbol) | |
3081 | (error "Invalid terminal symbol: %S" symbol)) | |
3082 | (if (memq symbol token-list) | |
3083 | (message "*** duplicate terminal `%s' ignored" symbol) | |
3084 | ;; Set up properties | |
3085 | (wisent-set-prec symbol nil) | |
3086 | (wisent-set-assoc symbol nil) | |
3087 | (wisent-set-item-number symbol ntokens) | |
3088 | ;; Add | |
3089 | (setq ntokens (1+ ntokens) | |
3090 | token-list (cons symbol token-list)))) | |
3091 | ||
3092 | (defun wisent-push-var (symbol &optional nocheck) | |
3093 | "Push a new SYMBOL in the list of nonterminals. | |
3094 | Bypass checking if NOCHECK is non-nil." | |
3095 | ;; Check | |
3096 | (unless nocheck | |
3097 | (or (wisent-ISVALID-VAR symbol) | |
3098 | (error "Invalid nonterminal symbol: %S" symbol)) | |
3099 | (if (memq symbol var-list) | |
3100 | (error "Nonterminal `%s' already defined" symbol))) | |
3101 | ;; Set up properties | |
3102 | (wisent-set-item-number symbol nvars) | |
3103 | ;; Add | |
3104 | (setq nvars (1+ nvars) | |
3105 | var-list (cons symbol var-list))) | |
3106 | ||
3107 | (defun wisent-parse-nonterminals (defs) | |
3108 | "Parse nonterminal definitions in DEFS. | |
3109 | Fill in each element of the global arrays RPREC, RCODE, RUSEFUL with | |
3110 | respectively rule precedence level, semantic action code and | |
3111 | usefulness flag. Return a list of rules of the form (LHS . RHS) where | |
3112 | LHS and RHS are respectively the Left Hand Side and Right Hand Side of | |
3113 | the rule." | |
3114 | (setq rprec nil | |
3115 | rcode nil | |
3116 | nitems 0 | |
3117 | nrules 0) | |
3118 | (let (def nonterm rlist rule rules rhs rest item items | |
3119 | rhl plevel semact @n @count iactn) | |
3120 | (setq @count 0) | |
3121 | (while defs | |
3122 | (setq def (car defs) | |
3123 | defs (cdr defs) | |
3124 | nonterm (car def) | |
3125 | rlist (cdr def) | |
3126 | iactn 0) | |
3127 | (or (consp rlist) | |
3128 | (error "Invalid nonterminal definition syntax: %S" def)) | |
3129 | (while rlist | |
3130 | (setq rule (car rlist) | |
3131 | rlist (cdr rlist) | |
3132 | items (car rule) | |
3133 | rest (cdr rule) | |
3134 | rhl 0 | |
3135 | rhs nil) | |
3136 | ||
3137 | ;; Check & count items | |
3138 | (setq nitems (1+ nitems)) ;; LHS item | |
3139 | (while items | |
3140 | (setq item (car items) | |
3141 | items (cdr items) | |
3142 | nitems (1+ nitems)) ;; RHS items | |
3143 | (if (listp item) | |
3144 | ;; Mid-rule action | |
3145 | (progn | |
3146 | (setq @count (1+ @count) | |
3147 | @n (intern (format "@%d" @count))) | |
3148 | (wisent-push-var @n t) | |
3149 | ;; Push a new empty rule with the mid-rule action | |
3150 | (setq semact (vector item rhl (list nonterm iactn)) | |
3151 | iactn (1+ iactn) | |
3152 | plevel nil | |
3153 | rcode (cons semact rcode) | |
3154 | rprec (cons plevel rprec) | |
3155 | item @n ;; Replace action by @N nonterminal | |
3156 | rules (cons (list item) rules) | |
3157 | nitems (1+ nitems) | |
3158 | nrules (1+ nrules))) | |
3159 | ;; Check terminal or nonterminal symbol | |
3160 | (cond | |
3161 | ((or (memq item token-list) (memq item var-list))) | |
3162 | ;; Create new literal character token | |
3163 | ((wisent-char-p item) (wisent-push-token item t)) | |
3164 | ((error "Symbol `%s' is used, but is not defined as a token and has no rules" | |
3165 | item)))) | |
3166 | (setq rhl (1+ rhl) | |
3167 | rhs (cons item rhs))) | |
3168 | ||
3169 | ;; Check & collect rule precedence level | |
3170 | (setq plevel (when (vectorp (car rest)) | |
3171 | (setq item (car rest) | |
3172 | rest (cdr rest)) | |
3173 | (if (and (= (length item) 1) | |
3174 | (memq (aref item 0) token-list) | |
3175 | (wisent-prec (aref item 0))) | |
3176 | (wisent-item-number (aref item 0)) | |
3177 | (error "Invalid rule precedence level syntax: %S" item))) | |
3178 | rprec (cons plevel rprec)) | |
3179 | ||
3180 | ;; Check & collect semantic action body | |
3181 | (setq semact (vector | |
3182 | (if rest | |
3183 | (if (cdr rest) | |
3184 | (error "Invalid semantic action syntax: %S" rest) | |
3185 | (car rest)) | |
3186 | ;; Give a default semantic action body: nil | |
3187 | ;; for an empty rule or $1, the value of the | |
3188 | ;; first symbol in the rule, otherwise. | |
3189 | (if (> rhl 0) '$1 '())) | |
3190 | rhl | |
3191 | (list nonterm iactn)) | |
3192 | iactn (1+ iactn) | |
3193 | rcode (cons semact rcode)) | |
3194 | (setq rules (cons (cons nonterm (nreverse rhs)) rules) | |
3195 | nrules (1+ nrules)))) | |
3196 | ||
3197 | (setq ruseful (make-vector (1+ nrules) t) | |
3198 | rprec (vconcat (cons nil (nreverse rprec))) | |
3199 | rcode (vconcat (cons nil (nreverse rcode)))) | |
3200 | (nreverse rules) | |
3201 | )) | |
3202 | ||
3203 | (defun wisent-parse-grammar (grammar &optional start-list) | |
3204 | "Parse GRAMMAR and build a suitable internal representation. | |
3205 | Optional argument START-LIST defines the start symbols. | |
3206 | GRAMMAR is a list of form: (TOKENS ASSOCS . NONTERMS) | |
3207 | ||
3208 | TOKENS is a list of terminal symbols (tokens). | |
3209 | ||
3210 | ASSOCS is nil or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements | |
3211 | describing the associativity of TOKENS. ASSOC-TYPE must be one of the | |
3212 | `default-prec' `nonassoc', `left' or `right' symbols. When ASSOC-TYPE | |
3213 | is `default-prec', ASSOC-VALUE must be nil or t (the default). | |
3214 | Otherwise it is a list of tokens which must have been previously | |
3215 | declared in TOKENS. | |
3216 | ||
3217 | NONTERMS is the list of non terminal definitions (see function | |
3218 | `wisent-parse-nonterminals')." | |
3219 | (or (and (consp grammar) (> (length grammar) 2)) | |
3220 | (error "Bad input grammar")) | |
3221 | ||
3222 | (let (i r rhs pre dpre lst start-var assoc rules item | |
3223 | token var def tokens defs ep-token ep-var ep-def) | |
3224 | ||
3225 | ;; Built-in tokens | |
3226 | (setq ntokens 0 nvars 0) | |
3227 | (wisent-push-token wisent-eoi-term t) | |
3228 | (wisent-push-token wisent-error-term t) | |
3229 | ||
3230 | ;; Check/collect terminals | |
3231 | (setq lst (car grammar)) | |
3232 | (while lst | |
3233 | (wisent-push-token (car lst)) | |
3234 | (setq lst (cdr lst))) | |
3235 | ||
3236 | ;; Check/Set up tokens precedence & associativity | |
3237 | (setq lst (nth 1 grammar) | |
3238 | pre 0 | |
3239 | defs nil | |
3240 | dpre nil | |
3241 | default-prec t) | |
3242 | (while lst | |
3243 | (setq def (car lst) | |
3244 | assoc (car def) | |
3245 | tokens (cdr def) | |
3246 | lst (cdr lst)) | |
3247 | (if (eq assoc 'default-prec) | |
3248 | (progn | |
3249 | (or (null (cdr tokens)) | |
3250 | (memq (car tokens) '(t nil)) | |
3251 | (error "Invalid default-prec value: %S" tokens)) | |
3252 | (setq default-prec (car tokens)) | |
3253 | (if dpre | |
3254 | (message "*** redefining default-prec to %s" | |
3255 | default-prec)) | |
3256 | (setq dpre t)) | |
3257 | (or (memq assoc '(left right nonassoc)) | |
3258 | (error "Invalid associativity syntax: %S" assoc)) | |
3259 | (setq pre (1+ pre)) | |
3260 | (while tokens | |
3261 | (setq token (car tokens) | |
3262 | tokens (cdr tokens)) | |
3263 | (if (memq token defs) | |
3264 | (message "*** redefining precedence of `%s'" token)) | |
3265 | (or (memq token token-list) | |
3266 | ;; Define token not previously declared. | |
3267 | (wisent-push-token token)) | |
3268 | (setq defs (cons token defs)) | |
3269 | ;; Record the precedence and associativity of the terminal. | |
3270 | (wisent-set-prec token pre) | |
3271 | (wisent-set-assoc token assoc)))) | |
3272 | ||
3273 | ;; Check/Collect nonterminals | |
3274 | (setq lst (nthcdr 2 grammar) | |
3275 | defs nil) | |
3276 | (while lst | |
3277 | (setq def (car lst) | |
3278 | lst (cdr lst)) | |
3279 | (or (consp def) | |
3280 | (error "Invalid nonterminal definition: %S" def)) | |
3281 | (if (memq (car def) token-list) | |
3282 | (error "Nonterminal `%s' already defined as token" (car def))) | |
3283 | (wisent-push-var (car def)) | |
3284 | (setq defs (cons def defs))) | |
3285 | (or defs | |
3286 | (error "No input grammar")) | |
3287 | (setq defs (nreverse defs)) | |
3288 | ||
3289 | ;; Set up the start symbol. | |
3290 | (setq start-table nil) | |
3291 | (cond | |
3292 | ||
3293 | ;; 1. START-LIST is nil, the start symbol is the first | |
3294 | ;; nonterminal defined in the grammar (Bison like). | |
3295 | ((null start-list) | |
3296 | (setq start-var (caar defs))) | |
3297 | ||
3298 | ;; 2. START-LIST contains only one element, it is the start | |
3299 | ;; symbol (Bison like). | |
3300 | ((or wisent-single-start-flag (null (cdr start-list))) | |
3301 | (setq start-var (car start-list)) | |
3302 | (or (assq start-var defs) | |
3303 | (error "Start symbol `%s' has no rule" start-var))) | |
3304 | ||
3305 | ;; 3. START-LIST contains more than one element. All defines | |
3306 | ;; potential start symbols. One of them (the first one by | |
3307 | ;; default) will be given at parse time to be the parser goal. | |
3308 | ;; If `wisent-single-start-flag' is non-nil that feature is | |
3309 | ;; disabled and the first nonterminal in START-LIST defines | |
3310 | ;; the start symbol, like in case 2 above. | |
3311 | ((not wisent-single-start-flag) | |
3312 | ||
3313 | ;; START-LIST is a list of nonterminals '(nt0 ... ntN). | |
3314 | ;; Build and push ad hoc start rules in the grammar: | |
3315 | ||
3316 | ;; ($STARTS ((nt0) $1) ((nt1) $1) ... ((ntN) $1)) | |
3317 | ;; ($nt1 (($$nt1 nt1) $2)) | |
3318 | ;; ... | |
3319 | ;; ($ntN (($$ntN ntN) $2)) | |
3320 | ||
3321 | ;; Where internal symbols $ntI and $$ntI are respectively | |
3322 | ;; nonterminals and terminals. | |
3323 | ||
3324 | ;; The internal start symbol $STARTS is used to build the | |
3325 | ;; LALR(1) automaton. The true default start symbol used by the | |
3326 | ;; parser is the first nonterminal in START-LIST (nt0). | |
3327 | (setq start-var wisent-starts-nonterm | |
3328 | lst (nreverse start-list)) | |
3329 | (while lst | |
3330 | (setq var (car lst) | |
3331 | lst (cdr lst)) | |
3332 | (or (memq var var-list) | |
3333 | (error "Start symbol `%s' has no rule" var)) | |
3334 | (unless (assq var start-table) ;; Ignore duplicates | |
3335 | ;; For each nt start symbol | |
3336 | (setq ep-var (intern (format "$%s" var)) | |
3337 | ep-token (intern (format "$$%s" var))) | |
3338 | (wisent-push-token ep-token t) | |
3339 | (wisent-push-var ep-var t) | |
3340 | (setq | |
3341 | ;; Add entry (nt . $$nt) to start-table | |
3342 | start-table (cons (cons var ep-token) start-table) | |
3343 | ;; Add rule ($nt (($$nt nt) $2)) | |
3344 | defs (cons (list ep-var (list (list ep-token var) '$2)) defs) | |
3345 | ;; Add start rule (($nt) $1) | |
3346 | ep-def (cons (list (list ep-var) '$1) ep-def)) | |
3347 | )) | |
3348 | (wisent-push-var start-var t) | |
3349 | (setq defs (cons (cons start-var ep-def) defs)))) | |
3350 | ||
3351 | ;; Set up rules main data structure & RPREC, RCODE, RUSEFUL | |
3352 | (setq rules (wisent-parse-nonterminals defs)) | |
3353 | ||
3354 | ;; Set up the terminal & nonterminal lists. | |
3355 | (setq nsyms (+ ntokens nvars) | |
3356 | token-list (nreverse token-list) | |
3357 | lst var-list | |
3358 | var-list nil) | |
3359 | (while lst | |
3360 | (setq var (car lst) | |
3361 | lst (cdr lst) | |
3362 | var-list (cons var var-list)) | |
3363 | (wisent-set-item-number ;; adjust nonterminal item number to | |
3364 | var (+ ntokens (wisent-item-number var)))) ;; I += NTOKENS | |
3365 | ||
3366 | ;; Store special item numbers | |
3367 | (setq error-token-number (wisent-item-number wisent-error-term) | |
3368 | start-symbol (wisent-item-number start-var)) | |
3369 | ||
3370 | ;; Keep symbols in the TAGS vector so that TAGS[I] is the symbol | |
3371 | ;; associated to item number I. | |
3372 | (setq tags (vconcat token-list var-list)) | |
3373 | ;; Set up RLHS RRHS & RITEM data structures from list of rules | |
3374 | ;; (LHS . RHS) received from `wisent-parse-nonterminals'. | |
3375 | (setq rlhs (make-vector (1+ nrules) nil) | |
3376 | rrhs (make-vector (1+ nrules) nil) | |
3377 | ritem (make-vector (1+ nitems) nil) | |
3378 | i 0 | |
3379 | r 1) | |
3380 | (while rules | |
3381 | (aset rlhs r (wisent-item-number (caar rules))) | |
3382 | (aset rrhs r i) | |
3383 | (setq rhs (cdar rules) | |
3384 | pre nil) | |
3385 | (while rhs | |
3386 | (setq item (wisent-item-number (car rhs))) | |
3387 | ;; Get default precedence level of rule, that is the | |
3388 | ;; precedence of the last terminal in it. | |
3389 | (and (wisent-ISTOKEN item) | |
3390 | default-prec | |
3391 | (setq pre item)) | |
3392 | ||
3393 | (aset ritem i item) | |
3394 | (setq i (1+ i) | |
3395 | rhs (cdr rhs))) | |
3396 | ;; Setup the precedence level of the rule, that is the one | |
3397 | ;; specified by %prec or the default one. | |
3398 | (and (not (aref rprec r)) ;; Already set by %prec | |
3399 | pre | |
3400 | (wisent-prec (aref tags pre)) | |
3401 | (aset rprec r pre)) | |
3402 | (aset ritem i (- r)) | |
3403 | (setq i (1+ i) | |
3404 | r (1+ r)) | |
3405 | (setq rules (cdr rules))) | |
3406 | )) | |
3407 | \f | |
3408 | ;;;; --------------------- | |
3409 | ;;;; Compile input grammar | |
3410 | ;;;; --------------------- | |
3411 | ||
3412 | (defun wisent-compile-grammar (grammar &optional start-list) | |
3413 | "Compile the LALR(1) GRAMMAR. | |
3414 | ||
3415 | GRAMMAR is a list (TOKENS ASSOCS . NONTERMS) where: | |
3416 | ||
3417 | - TOKENS is a list of terminal symbols (tokens). | |
3418 | ||
3419 | - ASSOCS is nil, or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements | |
3420 | describing the associativity of TOKENS. ASSOC-TYPE must be one of | |
3421 | the `default-prec' `nonassoc', `left' or `right' symbols. When | |
3422 | ASSOC-TYPE is `default-prec', ASSOC-VALUE must be nil or t (the | |
3423 | default). Otherwise it is a list of tokens which must have been | |
3424 | previously declared in TOKENS. | |
3425 | ||
3426 | - NONTERMS is a list of nonterminal definitions. | |
3427 | ||
3428 | Optional argument START-LIST specify the possible grammar start | |
3429 | symbols. This is a list of nonterminals which must have been | |
3430 | previously declared in GRAMMAR's NONTERMS form. By default, the start | |
3431 | symbol is the first nonterminal defined. When START-LIST contains | |
3432 | only one element, it is the start symbol. Otherwise, all elements are | |
3433 | possible start symbols, unless `wisent-single-start-flag' is non-nil. | |
3434 | In that case, the first element is the start symbol, and others are | |
3435 | ignored. | |
3436 | ||
3437 | Return an automaton as a vector: [ACTIONS GOTOS STARTS FUNCTIONS] | |
3438 | where: | |
3439 | ||
3440 | - ACTIONS is a state/token matrix telling the parser what to do at | |
3441 | every state based on the current lookahead token. That is shift, | |
3442 | reduce, accept or error. | |
3443 | ||
3444 | - GOTOS is a state/nonterminal matrix telling the parser the next | |
3445 | state to go to after reducing with each rule. | |
3446 | ||
3447 | - STARTS is an alist which maps the allowed start nonterminal symbols | |
3448 | to tokens that will be first shifted into the parser stack. | |
3449 | ||
3450 | - FUNCTIONS is an obarray of semantic action symbols. Each symbol's | |
3451 | function definition is the semantic action lambda expression." | |
3452 | (if (wisent-automaton-p grammar) | |
3453 | grammar ;; Grammar already compiled just return it | |
3454 | (wisent-with-context compile-grammar | |
cb9336bd | 3455 | (let* ((gc-cons-threshold 1000000)) |
bb051423 CY |
3456 | (garbage-collect) |
3457 | (setq wisent-new-log-flag t) | |
3458 | ;; Parse input grammar | |
3459 | (wisent-parse-grammar grammar start-list) | |
3460 | ;; Generate the LALR(1) automaton | |
cb9336bd | 3461 | (wisent-parser-automaton))))) |
bb051423 CY |
3462 | \f |
3463 | ;;;; -------------------------- | |
3464 | ;;;; Byte compile input grammar | |
3465 | ;;;; -------------------------- | |
3466 | ||
3467 | (require 'bytecomp) | |
3468 | ||
3469 | (defun wisent-byte-compile-grammar (form) | |
3470 | "Byte compile the `wisent-compile-grammar' FORM. | |
3471 | Automatically called by the Emacs Lisp byte compiler as a | |
3472 | `byte-compile' handler." | |
3473 | ;; Eval the `wisent-compile-grammar' form to obtain an LALR | |
3474 | ;; automaton internal data structure. Then, because the internal | |
3475 | ;; data structure contains an obarray, convert it to a lisp form so | |
3476 | ;; it can be byte-compiled. | |
cb9336bd SM |
3477 | (byte-compile-form |
3478 | ;; FIXME: we macroexpand here since `byte-compile-form' expects | |
3479 | ;; macroexpanded code, but that's just a workaround: for lexical-binding | |
3480 | ;; the lisp form should have to pass through closure-conversion and | |
3481 | ;; `wisent-byte-compile-grammar' is called much too late for that. | |
3482 | ;; Why isn't this `wisent-automaton-lisp-form' performed at | |
3483 | ;; macroexpansion time? --Stef | |
3484 | (macroexpand-all | |
3485 | (wisent-automaton-lisp-form (eval form))))) | |
bb051423 | 3486 | |
7200d79c SM |
3487 | ;; FIXME: We shouldn't use a `byte-compile' handler. Maybe using a hash-table |
3488 | ;; instead of an obarray would work around the problem that obarrays | |
3489 | ;; aren't printable. Then (put 'wisent-compile-grammar 'side-effect-free t). | |
bb051423 CY |
3490 | (put 'wisent-compile-grammar 'byte-compile 'wisent-byte-compile-grammar) |
3491 | ||
3492 | (defun wisent-automaton-lisp-form (automaton) | |
3493 | "Return a Lisp form that produces AUTOMATON. | |
3494 | See also `wisent-compile-grammar' for more details on AUTOMATON." | |
3495 | (or (wisent-automaton-p automaton) | |
3496 | (signal 'wrong-type-argument | |
3497 | (list 'wisent-automaton-p automaton))) | |
3498 | (let ((obn (make-symbol "ob")) ; Generated obarray name | |
3499 | (obv (aref automaton 3)) ; Semantic actions obarray | |
3500 | ) | |
3501 | `(let ((,obn (make-vector 13 0))) | |
3502 | ;; Generate code to initialize the semantic actions obarray, | |
3503 | ;; in local variable OBN. | |
3504 | ,@(let (obcode) | |
3505 | (mapatoms | |
3506 | #'(lambda (s) | |
3507 | (setq obcode | |
3508 | (cons `(fset (intern ,(symbol-name s) ,obn) | |
3509 | #',(symbol-function s)) | |
3510 | obcode))) | |
3511 | obv) | |
3512 | obcode) | |
3513 | ;; Generate code to create the automaton. | |
3514 | (vector | |
3515 | ;; In code generated to initialize the action table, take | |
3516 | ;; care of symbols that are interned in the semantic actions | |
3517 | ;; obarray. | |
3518 | (vector | |
3519 | ,@(mapcar | |
3520 | #'(lambda (state) ;; for each state | |
3521 | `(list | |
3522 | ,@(mapcar | |
3523 | #'(lambda (tr) ;; for each transition | |
3524 | (let ((k (car tr)) ; token | |
3525 | (a (cdr tr))) ; action | |
3526 | (if (and (symbolp a) | |
3527 | (intern-soft (symbol-name a) obv)) | |
3528 | `(cons ,(if (symbolp k) `(quote ,k) k) | |
3529 | (intern-soft ,(symbol-name a) ,obn)) | |
3530 | `(quote ,tr)))) | |
3531 | state))) | |
3532 | (aref automaton 0))) | |
3533 | ;; The code of the goto table is unchanged. | |
3534 | ,(aref automaton 1) | |
3535 | ;; The code of the alist of start symbols is unchanged. | |
3536 | ',(aref automaton 2) | |
3537 | ;; The semantic actions obarray is in the local variable OBN. | |
3538 | ,obn)))) | |
3539 | ||
3540 | (provide 'semantic/wisent/comp) | |
3541 | ||
3542 | ;;; semantic/wisent/comp.el ends here |