Commit | Line | Data |
---|---|---|
bb051423 CY |
1 | ;;; semantic/wisent/comp.el --- GNU Bison for Emacs - Grammar compiler |
2 | ||
3 | ;; Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000, 2001, 2002, 2003, | |
e9bffc61 GM |
4 | ;; 2004, 2005, 2006, 2007, 2009, 2010, 2011 |
5 | ;; Free Software Foundation, Inc. | |
bb051423 CY |
6 | |
7 | ;; Author: David Ponce <david@dponce.com> | |
8 | ;; Maintainer: David Ponce <david@dponce.com> | |
9 | ;; Created: 30 January 2002 | |
10 | ;; Keywords: syntax | |
11 | ||
12 | ;; This file is part of GNU Emacs. | |
13 | ||
14 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
15 | ;; it under the terms of the GNU General Public License as published by | |
16 | ;; the Free Software Foundation, either version 3 of the License, or | |
17 | ;; (at your option) any later version. | |
18 | ||
19 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
20 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 | ;; GNU General Public License for more details. | |
23 | ||
24 | ;; You should have received a copy of the GNU General Public License | |
25 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
26 | ||
27 | ;;; Commentary: | |
28 | ;; | |
29 | ;; Grammar compiler that produces Wisent's LALR automatons. | |
30 | ;; | |
31 | ;; Wisent (the European Bison ;-) is an Elisp implementation of the | |
32 | ;; GNU Compiler Compiler Bison. The Elisp code is a port of the C | |
33 | ;; code of GNU Bison 1.28 & 1.31. | |
34 | ;; | |
35 | ;; For more details on the basic concepts for understanding Wisent, | |
36 | ;; read the Bison manual ;) | |
37 | ;; | |
38 | ;; For more details on Wisent itself read the Wisent manual. | |
39 | ||
40 | ;;; History: | |
41 | ;; | |
42 | ||
43 | ;;; Code: | |
44 | (require 'semantic/wisent) | |
45 | \f | |
46 | ;;;; ------------------- | |
47 | ;;;; Misc. useful things | |
48 | ;;;; ------------------- | |
49 | ||
50 | ;; As much as possible I would like to keep the name of global | |
51 | ;; variables used in Bison without polluting too much the Elisp global | |
52 | ;; name space. Elisp dynamic binding allows that ;-) | |
53 | ||
54 | ;; Here are simple macros to easily define and use set of variables | |
55 | ;; binded locally, without all these "reference to free variable" | |
56 | ;; compiler warnings! | |
57 | ||
58 | (defmacro wisent-context-name (name) | |
59 | "Return the context name from NAME." | |
60 | `(if (and ,name (symbolp ,name)) | |
61 | (intern (format "wisent-context-%s" ,name)) | |
62 | (error "Invalid context name: %S" ,name))) | |
63 | ||
64 | (defmacro wisent-context-bindings (name) | |
65 | "Return the variables in context NAME." | |
66 | `(symbol-value (wisent-context-name ,name))) | |
67 | ||
68 | (defmacro wisent-defcontext (name &rest vars) | |
69 | "Define a context NAME that will bind variables VARS." | |
70 | (let* ((context (wisent-context-name name)) | |
71 | (bindings (mapcar #'(lambda (v) (list 'defvar v)) vars))) | |
72 | `(eval-when-compile | |
73 | ,@bindings | |
74 | (defvar ,context ',vars)))) | |
75 | (put 'wisent-defcontext 'lisp-indent-function 1) | |
76 | ||
77 | (defmacro wisent-with-context (name &rest body) | |
78 | "Bind variables in context NAME then eval BODY." | |
79 | `(let* ,(wisent-context-bindings name) | |
80 | ,@body)) | |
81 | (put 'wisent-with-context 'lisp-indent-function 1) | |
82 | ||
83 | ;; A naive implementation of data structures! But it suffice here ;-) | |
84 | ||
85 | (defmacro wisent-struct (name &rest fields) | |
86 | "Define a simple data structure called NAME. | |
87 | Which contains data stored in FIELDS. FIELDS is a list of symbols | |
88 | which are field names or pairs (FIELD INITIAL-VALUE) where | |
89 | INITIAL-VALUE is a constant used as the initial value of FIELD when | |
90 | the data structure is created. INITIAL-VALUE defaults to nil. | |
91 | ||
92 | This defines a `make-NAME' constructor, get-able `NAME-FIELD' and | |
93 | set-able `set-NAME-FIELD' accessors." | |
94 | (let ((size (length fields)) | |
95 | (i 0) | |
96 | accors field sufx fun ivals) | |
97 | (while (< i size) | |
98 | (setq field (car fields) | |
99 | fields (cdr fields)) | |
100 | (if (consp field) | |
101 | (setq ivals (cons (cadr field) ivals) | |
102 | field (car field)) | |
103 | (setq ivals (cons nil ivals))) | |
104 | (setq sufx (format "%s-%s" name field) | |
105 | fun (intern (format "%s" sufx)) | |
106 | accors (cons `(defmacro ,fun (s) | |
107 | (list 'aref s ,i)) | |
108 | accors) | |
109 | fun (intern (format "set-%s" sufx)) | |
110 | accors (cons `(defmacro ,fun (s v) | |
111 | (list 'aset s ,i v)) | |
112 | accors) | |
113 | i (1+ i))) | |
114 | `(progn | |
115 | (defmacro ,(intern (format "make-%s" name)) () | |
116 | (cons 'vector ',(nreverse ivals))) | |
117 | ,@accors))) | |
118 | (put 'wisent-struct 'lisp-indent-function 1) | |
119 | ||
120 | ;; Other utilities | |
121 | ||
122 | (defsubst wisent-pad-string (s n &optional left) | |
123 | "Fill string S with spaces. | |
124 | Return a new string of at least N characters. Insert spaces on right. | |
125 | If optional LEFT is non-nil insert spaces on left." | |
126 | (let ((i (length s))) | |
127 | (if (< i n) | |
128 | (if left | |
129 | (concat (make-string (- n i) ?\ ) s) | |
130 | (concat s (make-string (- n i) ?\ ))) | |
131 | s))) | |
132 | \f | |
133 | ;;;; ------------------------ | |
134 | ;;;; Environment dependencies | |
135 | ;;;; ------------------------ | |
136 | ||
137 | (defconst wisent-BITS-PER-WORD | |
138 | (let ((i 1)) | |
139 | (while (not (zerop (lsh 1 i))) | |
140 | (setq i (1+ i))) | |
141 | i)) | |
142 | ||
143 | (defsubst wisent-WORDSIZE (n) | |
144 | "(N + BITS-PER-WORD - 1) / BITS-PER-WORD." | |
145 | (/ (1- (+ n wisent-BITS-PER-WORD)) wisent-BITS-PER-WORD)) | |
146 | ||
147 | (defsubst wisent-SETBIT (x i) | |
148 | "X[I/BITS-PER-WORD] |= 1 << (I % BITS-PER-WORD)." | |
149 | (let ((k (/ i wisent-BITS-PER-WORD))) | |
150 | (aset x k (logior (aref x k) | |
151 | (lsh 1 (% i wisent-BITS-PER-WORD)))))) | |
152 | ||
153 | (defsubst wisent-RESETBIT (x i) | |
154 | "X[I/BITS-PER-WORD] &= ~(1 << (I % BITS-PER-WORD))." | |
155 | (let ((k (/ i wisent-BITS-PER-WORD))) | |
156 | (aset x k (logand (aref x k) | |
157 | (lognot (lsh 1 (% i wisent-BITS-PER-WORD))))))) | |
158 | ||
159 | (defsubst wisent-BITISSET (x i) | |
160 | "(X[I/BITS-PER-WORD] & (1 << (I % BITS-PER-WORD))) != 0." | |
161 | (not (zerop (logand (aref x (/ i wisent-BITS-PER-WORD)) | |
162 | (lsh 1 (% i wisent-BITS-PER-WORD)))))) | |
163 | ||
bb051423 CY |
164 | (defsubst wisent-noninteractive () |
165 | "Return non-nil if running without interactive terminal." | |
166 | (if (featurep 'xemacs) | |
167 | (noninteractive) | |
168 | noninteractive)) | |
169 | ||
170 | (defvar wisent-debug-flag nil | |
171 | "Non-nil means enable some debug stuff.") | |
172 | \f | |
173 | ;;;; -------------- | |
174 | ;;;; Logging/Output | |
175 | ;;;; -------------- | |
176 | (defconst wisent-log-buffer-name "*wisent-log*" | |
177 | "Name of the log buffer.") | |
178 | ||
179 | (defvar wisent-new-log-flag nil | |
180 | "Non-nil means to start a new report.") | |
181 | ||
182 | (defvar wisent-verbose-flag nil | |
183 | "*Non-nil means to report verbose information on generated parser.") | |
184 | ||
185 | (defun wisent-toggle-verbose-flag () | |
186 | "Toggle whether to report verbose information on generated parser." | |
187 | (interactive) | |
188 | (setq wisent-verbose-flag (not wisent-verbose-flag)) | |
2054a44c | 189 | (when (called-interactively-p 'interactive) |
bb051423 CY |
190 | (message "Verbose report %sabled" |
191 | (if wisent-verbose-flag "en" "dis")))) | |
192 | ||
193 | (defmacro wisent-log-buffer () | |
194 | "Return the log buffer. | |
195 | Its name is defined in constant `wisent-log-buffer-name'." | |
196 | `(get-buffer-create wisent-log-buffer-name)) | |
197 | ||
198 | (defmacro wisent-clear-log () | |
199 | "Delete the entire contents of the log buffer." | |
200 | `(with-current-buffer (wisent-log-buffer) | |
201 | (erase-buffer))) | |
202 | ||
df26e1f5 | 203 | (defvar byte-compile-current-file) |
bb051423 CY |
204 | |
205 | (defun wisent-source () | |
206 | "Return the current source file name or nil." | |
207 | (let ((source (or (and (boundp 'byte-compile-current-file) | |
208 | byte-compile-current-file) | |
209 | load-file-name (buffer-file-name)))) | |
210 | (if source | |
211 | (file-relative-name source)))) | |
212 | ||
213 | (defun wisent-new-log () | |
214 | "Start a new entry into the log buffer." | |
215 | (setq wisent-new-log-flag nil) | |
216 | (let ((text (format "\n\n*** Wisent %s - %s\n\n" | |
217 | (or (wisent-source) (buffer-name)) | |
218 | (format-time-string "%Y-%m-%d %R")))) | |
219 | (with-current-buffer (wisent-log-buffer) | |
220 | (goto-char (point-max)) | |
221 | (insert text)))) | |
222 | ||
223 | (defsubst wisent-log (&rest args) | |
224 | "Insert text into the log buffer. | |
225 | `format' is applied to ARGS and the result string is inserted into the | |
226 | log buffer returned by the function `wisent-log-buffer'." | |
227 | (and wisent-new-log-flag (wisent-new-log)) | |
228 | (with-current-buffer (wisent-log-buffer) | |
229 | (insert (apply 'format args)))) | |
230 | ||
231 | (defconst wisent-log-file "wisent.output" | |
232 | "The log file. | |
233 | Used when running without interactive terminal.") | |
234 | ||
235 | (defun wisent-append-to-log-file () | |
236 | "Append contents of logging buffer to `wisent-log-file'." | |
237 | (if (get-buffer wisent-log-buffer-name) | |
238 | (condition-case err | |
239 | (with-current-buffer (wisent-log-buffer) | |
240 | (widen) | |
241 | (if (> (point-max) (point-min)) | |
242 | (write-region (point-min) (point-max) | |
243 | wisent-log-file t))) | |
244 | (error | |
245 | (message "*** %s" (error-message-string err)))))) | |
246 | \f | |
247 | ;;;; ----------------------------------- | |
248 | ;;;; Representation of the grammar rules | |
249 | ;;;; ----------------------------------- | |
250 | ||
251 | ;; ntokens is the number of tokens, and nvars is the number of | |
252 | ;; variables (nonterminals). nsyms is the total number, ntokens + | |
253 | ;; nvars. | |
254 | ||
255 | ;; Each symbol (either token or variable) receives a symbol number. | |
256 | ;; Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are | |
257 | ;; for variables. Symbol number zero is the end-of-input token. This | |
258 | ;; token is counted in ntokens. | |
259 | ||
260 | ;; The rules receive rule numbers 1 to nrules in the order they are | |
261 | ;; written. Actions and guards are accessed via the rule number. | |
262 | ||
263 | ;; The rules themselves are described by three arrays: rrhs, rlhs and | |
264 | ;; ritem. rlhs[R] is the symbol number of the left hand side of rule | |
265 | ;; R. The right hand side is stored as symbol numbers in a portion of | |
266 | ;; ritem. rrhs[R] contains the index in ritem of the beginning of the | |
267 | ;; portion for rule R. | |
268 | ||
269 | ;; The length of the portion is one greater than the number of symbols | |
270 | ;; in the rule's right hand side. The last element in the portion | |
271 | ;; contains minus R, which identifies it as the end of a portion and | |
272 | ;; says which rule it is for. | |
273 | ||
274 | ;; The portions of ritem come in order of increasing rule number and | |
275 | ;; are followed by an element which is nil to mark the end. nitems is | |
276 | ;; the total length of ritem, not counting the final nil. Each | |
277 | ;; element of ritem is called an "item" and its index in ritem is an | |
278 | ;; item number. | |
279 | ||
280 | ;; Item numbers are used in the finite state machine to represent | |
281 | ;; places that parsing can get to. | |
282 | ||
283 | ;; The vector rprec contains for each rule, the item number of the | |
284 | ;; symbol giving its precedence level to this rule. The precedence | |
285 | ;; level and associativity of each symbol is recorded in respectively | |
286 | ;; the properties 'wisent--prec and 'wisent--assoc. | |
287 | ||
288 | ;; Precedence levels are assigned in increasing order starting with 1 | |
289 | ;; so that numerically higher precedence values mean tighter binding | |
290 | ;; as they ought to. nil as a symbol or rule's precedence means none | |
291 | ;; is assigned. | |
292 | ||
293 | (defcustom wisent-state-table-size 1009 | |
294 | "The size of the state table." | |
295 | :type 'integer | |
296 | :group 'wisent) | |
297 | ||
298 | ;; These variables only exist locally in the function | |
299 | ;; `wisent-compile-grammar' and are shared by all other nested | |
300 | ;; callees. | |
301 | (wisent-defcontext compile-grammar | |
302 | F LA LAruleno accessing-symbol conflicts consistent default-prec | |
303 | derives err-table fderives final-state first-reduction first-shift | |
304 | first-state firsts from-state goto-map includes itemset nitemset | |
305 | kernel-base kernel-end kernel-items last-reduction last-shift | |
306 | last-state lookaheads lookaheadset lookback maxrhs ngotos nitems | |
307 | nrules nshifts nstates nsyms ntokens nullable nvars rassoc redset | |
308 | reduction-table ritem rlhs rprec rrc-count rrc-total rrhs ruseful | |
309 | rcode ruleset rulesetsize shift-symbol shift-table shiftset | |
310 | src-count src-total start-table state-table tags this-state to-state | |
311 | tokensetsize ;; nb of words req. to hold a bit for each rule | |
312 | varsetsize ;; nb of words req. to hold a bit for each variable | |
313 | error-token-number start-symbol token-list var-list | |
314 | N P V V1 nuseless-nonterminals nuseless-productions | |
315 | ptable ;; symbols & characters properties | |
316 | ) | |
317 | ||
318 | (defmacro wisent-ISTOKEN (s) | |
319 | "Return non-nil if item number S defines a token (terminal). | |
320 | That is if S < `ntokens'." | |
321 | `(< ,s ntokens)) | |
322 | ||
323 | (defmacro wisent-ISVAR(s) | |
324 | "Return non-nil if item number S defines a nonterminal. | |
325 | That is if S >= `ntokens'." | |
326 | `(>= ,s ntokens)) | |
327 | ||
328 | (defsubst wisent-tag (s) | |
329 | "Return printable form of item number S." | |
330 | (wisent-item-to-string (aref tags s))) | |
331 | ||
332 | ;; Symbol and character properties | |
333 | ||
334 | (defsubst wisent-put (object propname value) | |
335 | "Store OBJECT's PROPNAME property with value VALUE. | |
336 | Use `eq' to locate OBJECT." | |
337 | (let ((entry (assq object ptable))) | |
338 | (or entry (setq entry (list object) ptable (cons entry ptable))) | |
339 | (setcdr entry (plist-put (cdr entry) propname value)))) | |
340 | ||
341 | (defsubst wisent-get (object propname) | |
342 | "Return the value of OBJECT's PROPNAME property. | |
343 | Use `eq' to locate OBJECT." | |
344 | (plist-get (cdr (assq object ptable)) propname)) | |
345 | ||
346 | (defsubst wisent-item-number (x) | |
347 | "Return the item number of symbol X." | |
348 | (wisent-get x 'wisent--item-no)) | |
349 | ||
350 | (defsubst wisent-set-item-number (x n) | |
351 | "Set the item number of symbol X to N." | |
352 | (wisent-put x 'wisent--item-no n)) | |
353 | ||
354 | (defsubst wisent-assoc (x) | |
355 | "Return the associativity of symbol X." | |
356 | (wisent-get x 'wisent--assoc)) | |
357 | ||
358 | (defsubst wisent-set-assoc (x a) | |
359 | "Set the associativity of symbol X to A." | |
360 | (wisent-put x 'wisent--assoc a)) | |
361 | ||
362 | (defsubst wisent-prec (x) | |
363 | "Return the precedence level of symbol X." | |
364 | (wisent-get x 'wisent--prec)) | |
365 | ||
366 | (defsubst wisent-set-prec (x p) | |
367 | "Set the precedence level of symbol X to P." | |
368 | (wisent-put x 'wisent--prec p)) | |
369 | \f | |
370 | ;;;; ---------------------------------------------------------- | |
371 | ;;;; Type definitions for nondeterministic finite state machine | |
372 | ;;;; ---------------------------------------------------------- | |
373 | ||
374 | ;; These type definitions are used to represent a nondeterministic | |
375 | ;; finite state machine that parses the specified grammar. This | |
376 | ;; information is generated by the function `wisent-generate-states'. | |
377 | ||
378 | ;; Each state of the machine is described by a set of items -- | |
379 | ;; particular positions in particular rules -- that are the possible | |
380 | ;; places where parsing could continue when the machine is in this | |
381 | ;; state. These symbols at these items are the allowable inputs that | |
382 | ;; can follow now. | |
383 | ||
384 | ;; A core represents one state. States are numbered in the number | |
385 | ;; field. When `wisent-generate-states' is finished, the starting | |
386 | ;; state is state 0 and `nstates' is the number of states. (A | |
387 | ;; transition to a state whose state number is `nstates' indicates | |
388 | ;; termination.) All the cores are chained together and `first-state' | |
389 | ;; points to the first one (state 0). | |
390 | ||
391 | ;; For each state there is a particular symbol which must have been | |
392 | ;; the last thing accepted to reach that state. It is the | |
393 | ;; accessing-symbol of the core. | |
394 | ||
395 | ;; Each core contains a vector of `nitems' items which are the indices | |
396 | ;; in the `ritems' vector of the items that are selected in this | |
397 | ;; state. | |
398 | ||
399 | ;; The link field is used for chaining buckets that hash states by | |
400 | ;; their itemsets. This is for recognizing equivalent states and | |
401 | ;; combining them when the states are generated. | |
402 | ||
403 | ;; The two types of transitions are shifts (push the lookahead token | |
404 | ;; and read another) and reductions (combine the last n things on the | |
405 | ;; stack via a rule, replace them with the symbol that the rule | |
406 | ;; derives, and leave the lookahead token alone). When the states are | |
407 | ;; generated, these transitions are represented in two other lists. | |
408 | ||
409 | ;; Each shifts structure describes the possible shift transitions out | |
410 | ;; of one state, the state whose number is in the number field. The | |
411 | ;; shifts structures are linked through next and first-shift points to | |
412 | ;; them. Each contains a vector of numbers of the states that shift | |
413 | ;; transitions can go to. The accessing-symbol fields of those | |
414 | ;; states' cores say what kind of input leads to them. | |
415 | ||
416 | ;; A shift to state zero should be ignored. Conflict resolution | |
417 | ;; deletes shifts by changing them to zero. | |
418 | ||
419 | ;; Each reductions structure describes the possible reductions at the | |
420 | ;; state whose number is in the number field. The data is a list of | |
421 | ;; nreds rules, represented by their rule numbers. `first-reduction' | |
422 | ;; points to the list of these structures. | |
423 | ||
424 | ;; Conflict resolution can decide that certain tokens in certain | |
425 | ;; states should explicitly be errors (for implementing %nonassoc). | |
426 | ;; For each state, the tokens that are errors for this reason are | |
427 | ;; recorded in an errs structure, which has the state number in its | |
428 | ;; number field. The rest of the errs structure is full of token | |
429 | ;; numbers. | |
430 | ||
431 | ;; There is at least one shift transition present in state zero. It | |
432 | ;; leads to a next-to-final state whose accessing-symbol is the | |
433 | ;; grammar's start symbol. The next-to-final state has one shift to | |
434 | ;; the final state, whose accessing-symbol is zero (end of input). | |
435 | ;; The final state has one shift, which goes to the termination state | |
436 | ;; (whose number is `nstates'-1). | |
437 | ;; The reason for the extra state at the end is to placate the | |
438 | ;; parser's strategy of making all decisions one token ahead of its | |
439 | ;; actions. | |
440 | ||
441 | (wisent-struct core | |
442 | next ; -> core | |
443 | link ; -> core | |
444 | (number 0) | |
445 | (accessing-symbol 0) | |
446 | (nitems 0) | |
447 | (items [0])) | |
448 | ||
449 | (wisent-struct shifts | |
450 | next ; -> shifts | |
451 | (number 0) | |
452 | (nshifts 0) | |
453 | (shifts [0])) | |
454 | ||
455 | (wisent-struct reductions | |
456 | next ; -> reductions | |
457 | (number 0) | |
458 | (nreds 0) | |
459 | (rules [0])) | |
460 | ||
461 | (wisent-struct errs | |
462 | (nerrs 0) | |
463 | (errs [0])) | |
464 | \f | |
465 | ;;;; -------------------------------------------------------- | |
466 | ;;;; Find unreachable terminals, nonterminals and productions | |
467 | ;;;; -------------------------------------------------------- | |
468 | ||
469 | (defun wisent-bits-equal (L R n) | |
470 | "Visit L and R and return non-nil if their first N elements are `='. | |
471 | L and R must be vectors of integers." | |
472 | (let* ((i (1- n)) | |
473 | (iseq t)) | |
474 | (while (and iseq (natnump i)) | |
475 | (setq iseq (= (aref L i) (aref R i)) | |
476 | i (1- i))) | |
477 | iseq)) | |
478 | ||
479 | (defun wisent-nbits (i) | |
480 | "Return number of bits set in integer I." | |
481 | (let ((count 0)) | |
482 | (while (not (zerop i)) | |
483 | ;; i ^= (i & ((unsigned) (-(int) i))) | |
484 | (setq i (logxor i (logand i (- i))) | |
485 | count (1+ count))) | |
486 | count)) | |
487 | ||
488 | (defun wisent-bits-size (S n) | |
489 | "In vector S count the total of bits set in first N elements. | |
490 | S must be a vector of integers." | |
491 | (let* ((i (1- n)) | |
492 | (count 0)) | |
493 | (while (natnump i) | |
494 | (setq count (+ count (wisent-nbits (aref S i))) | |
495 | i (1- i))) | |
496 | count)) | |
497 | ||
498 | (defun wisent-useful-production (i N0) | |
499 | "Return non-nil if production I is in useful set N0." | |
500 | (let* ((useful t) | |
501 | (r (aref rrhs i)) | |
502 | n) | |
503 | (while (and useful (> (setq n (aref ritem r)) 0)) | |
504 | (if (wisent-ISVAR n) | |
505 | (setq useful (wisent-BITISSET N0 (- n ntokens)))) | |
506 | (setq r (1+ r))) | |
507 | useful)) | |
508 | ||
509 | (defun wisent-useless-nonterminals () | |
510 | "Find out which nonterminals are used." | |
511 | (let (Np Ns i n break) | |
512 | ;; N is set as built. Np is set being built this iteration. P is | |
513 | ;; set of all productions which have a RHS all in N. | |
514 | (setq n (wisent-WORDSIZE nvars) | |
515 | Np (make-vector n 0)) | |
516 | ||
517 | ;; The set being computed is a set of nonterminals which can | |
518 | ;; derive the empty string or strings consisting of all | |
519 | ;; terminals. At each iteration a nonterminal is added to the set | |
520 | ;; if there is a production with that nonterminal as its LHS for | |
521 | ;; which all the nonterminals in its RHS are already in the set. | |
522 | ;; Iterate until the set being computed remains unchanged. Any | |
523 | ;; nonterminals not in the set at that point are useless in that | |
524 | ;; they will never be used in deriving a sentence of the language. | |
525 | ||
526 | ;; This iteration doesn't use any special traversal over the | |
527 | ;; productions. A set is kept of all productions for which all | |
528 | ;; the nonterminals in the RHS are in useful. Only productions | |
529 | ;; not in this set are scanned on each iteration. At the end, | |
530 | ;; this set is saved to be used when finding useful productions: | |
531 | ;; only productions in this set will appear in the final grammar. | |
532 | ||
533 | (while (not break) | |
534 | (setq i (1- n)) | |
535 | (while (natnump i) | |
536 | ;; Np[i] = N[i] | |
537 | (aset Np i (aref N i)) | |
538 | (setq i (1- i))) | |
539 | ||
540 | (setq i 1) | |
541 | (while (<= i nrules) | |
542 | (if (not (wisent-BITISSET P i)) | |
543 | (when (wisent-useful-production i N) | |
544 | (wisent-SETBIT Np (- (aref rlhs i) ntokens)) | |
545 | (wisent-SETBIT P i))) | |
546 | (setq i (1+ i))) | |
547 | (if (wisent-bits-equal N Np n) | |
548 | (setq break t) | |
549 | (setq Ns Np | |
550 | Np N | |
551 | N Ns))) | |
552 | (setq N Np))) | |
553 | ||
554 | (defun wisent-inaccessable-symbols () | |
555 | "Find out which productions are reachable and which symbols are used." | |
556 | ;; Starting with an empty set of productions and a set of symbols | |
557 | ;; which only has the start symbol in it, iterate over all | |
558 | ;; productions until the set of productions remains unchanged for an | |
559 | ;; iteration. For each production which has a LHS in the set of | |
560 | ;; reachable symbols, add the production to the set of reachable | |
561 | ;; productions, and add all of the nonterminals in the RHS of the | |
562 | ;; production to the set of reachable symbols. | |
563 | ||
564 | ;; Consider only the (partially) reduced grammar which has only | |
565 | ;; nonterminals in N and productions in P. | |
566 | ||
567 | ;; The result is the set P of productions in the reduced grammar, | |
568 | ;; and the set V of symbols in the reduced grammar. | |
569 | ||
570 | ;; Although this algorithm also computes the set of terminals which | |
571 | ;; are reachable, no terminal will be deleted from the grammar. Some | |
572 | ;; terminals might not be in the grammar but might be generated by | |
573 | ;; semantic routines, and so the user might want them available with | |
574 | ;; specified numbers. (Is this true?) However, the non reachable | |
575 | ;; terminals are printed (if running in verbose mode) so that the | |
576 | ;; user can know. | |
577 | (let (Vp Vs Pp i tt r n m break) | |
578 | (setq n (wisent-WORDSIZE nsyms) | |
579 | m (wisent-WORDSIZE (1+ nrules)) | |
580 | Vp (make-vector n 0) | |
581 | Pp (make-vector m 0)) | |
582 | ||
583 | ;; If the start symbol isn't useful, then nothing will be useful. | |
584 | (when (wisent-BITISSET N (- start-symbol ntokens)) | |
585 | (wisent-SETBIT V start-symbol) | |
586 | (while (not break) | |
587 | (setq i (1- n)) | |
588 | (while (natnump i) | |
589 | (aset Vp i (aref V i)) | |
590 | (setq i (1- i))) | |
591 | (setq i 1) | |
592 | (while (<= i nrules) | |
593 | (when (and (not (wisent-BITISSET Pp i)) | |
594 | (wisent-BITISSET P i) | |
595 | (wisent-BITISSET V (aref rlhs i))) | |
596 | (setq r (aref rrhs i)) | |
597 | (while (natnump (setq tt (aref ritem r))) | |
598 | (if (or (wisent-ISTOKEN tt) | |
599 | (wisent-BITISSET N (- tt ntokens))) | |
600 | (wisent-SETBIT Vp tt)) | |
601 | (setq r (1+ r))) | |
602 | (wisent-SETBIT Pp i)) | |
603 | (setq i (1+ i))) | |
604 | (if (wisent-bits-equal V Vp n) | |
605 | (setq break t) | |
606 | (setq Vs Vp | |
607 | Vp V | |
608 | V Vs)))) | |
609 | (setq V Vp) | |
610 | ||
611 | ;; Tokens 0, 1 are internal to Wisent. Consider them useful. | |
612 | (wisent-SETBIT V 0) ;; end-of-input token | |
613 | (wisent-SETBIT V 1) ;; error token | |
614 | (setq P Pp) | |
615 | ||
616 | (setq nuseless-productions (- nrules (wisent-bits-size P m)) | |
617 | nuseless-nonterminals nvars | |
618 | i ntokens) | |
619 | (while (< i nsyms) | |
620 | (if (wisent-BITISSET V i) | |
621 | (setq nuseless-nonterminals (1- nuseless-nonterminals))) | |
622 | (setq i (1+ i))) | |
623 | ||
624 | ;; A token that was used in %prec should not be warned about. | |
625 | (setq i 1) | |
626 | (while (<= i nrules) | |
627 | (if (aref rprec i) | |
628 | (wisent-SETBIT V1 (aref rprec i))) | |
629 | (setq i (1+ i))) | |
630 | )) | |
631 | ||
632 | (defun wisent-reduce-grammar-tables () | |
633 | "Disable useless productions." | |
634 | (if (> nuseless-productions 0) | |
635 | (let ((pn 1)) | |
636 | (while (<= pn nrules) | |
637 | (aset ruseful pn (wisent-BITISSET P pn)) | |
638 | (setq pn (1+ pn)))))) | |
639 | ||
640 | (defun wisent-nonterminals-reduce () | |
641 | "Remove useless nonterminals." | |
642 | (let (i n r item nontermmap tags-sorted) | |
643 | ;; Map the nonterminals to their new index: useful first, useless | |
644 | ;; afterwards. Kept for later report. | |
645 | (setq nontermmap (make-vector nvars 0) | |
646 | n ntokens | |
647 | i ntokens) | |
648 | (while (< i nsyms) | |
649 | (when (wisent-BITISSET V i) | |
650 | (aset nontermmap (- i ntokens) n) | |
651 | (setq n (1+ n))) | |
652 | (setq i (1+ i))) | |
653 | (setq i ntokens) | |
654 | (while (< i nsyms) | |
655 | (unless (wisent-BITISSET V i) | |
656 | (aset nontermmap (- i ntokens) n) | |
657 | (setq n (1+ n))) | |
658 | (setq i (1+ i))) | |
659 | ;; Shuffle elements of tables indexed by symbol number | |
660 | (setq tags-sorted (make-vector nvars nil) | |
661 | i ntokens) | |
662 | (while (< i nsyms) | |
663 | (setq n (aref nontermmap (- i ntokens))) | |
664 | (aset tags-sorted (- n ntokens) (aref tags i)) | |
665 | (setq i (1+ i))) | |
666 | (setq i ntokens) | |
667 | (while (< i nsyms) | |
668 | (aset tags i (aref tags-sorted (- i ntokens))) | |
669 | (setq i (1+ i))) | |
670 | ;; Replace all symbol numbers in valid data structures. | |
671 | (setq i 1) | |
672 | (while (<= i nrules) | |
673 | (aset rlhs i (aref nontermmap (- (aref rlhs i) ntokens))) | |
674 | (setq i (1+ i))) | |
675 | (setq r 0) | |
676 | (while (setq item (aref ritem r)) | |
677 | (if (wisent-ISVAR item) | |
678 | (aset ritem r (aref nontermmap (- item ntokens)))) | |
679 | (setq r (1+ r))) | |
680 | (setq start-symbol (aref nontermmap (- start-symbol ntokens)) | |
681 | nsyms (- nsyms nuseless-nonterminals) | |
682 | nvars (- nvars nuseless-nonterminals)) | |
683 | )) | |
684 | ||
685 | (defun wisent-total-useless () | |
686 | "Report number of useless nonterminals and productions." | |
687 | (let* ((src (wisent-source)) | |
688 | (src (if src (concat " in " src) "")) | |
689 | (msg (format "Grammar%s contains" src))) | |
690 | (if (> nuseless-nonterminals 0) | |
691 | (setq msg (format "%s %d useless nonterminal%s" | |
692 | msg nuseless-nonterminals | |
693 | (if (> nuseless-nonterminals 0) "s" "")))) | |
694 | (if (and (> nuseless-nonterminals 0) (> nuseless-productions 0)) | |
695 | (setq msg (format "%s and" msg))) | |
696 | (if (> nuseless-productions 0) | |
697 | (setq msg (format "%s %d useless rule%s" | |
698 | msg nuseless-productions | |
699 | (if (> nuseless-productions 0) "s" "")))) | |
700 | (message msg))) | |
701 | ||
702 | (defun wisent-reduce-grammar () | |
703 | "Find unreachable terminals, nonterminals and productions." | |
704 | ;; Allocate the global sets used to compute the reduced grammar | |
705 | (setq N (make-vector (wisent-WORDSIZE nvars) 0) | |
706 | P (make-vector (wisent-WORDSIZE (1+ nrules)) 0) | |
707 | V (make-vector (wisent-WORDSIZE nsyms) 0) | |
708 | V1 (make-vector (wisent-WORDSIZE nsyms) 0) | |
709 | nuseless-nonterminals 0 | |
710 | nuseless-productions 0) | |
711 | ||
712 | (wisent-useless-nonterminals) | |
713 | (wisent-inaccessable-symbols) | |
714 | ||
715 | (when (> (+ nuseless-nonterminals nuseless-productions) 0) | |
716 | (wisent-total-useless) | |
717 | (or (wisent-BITISSET N (- start-symbol ntokens)) | |
718 | (error "Start symbol `%s' does not derive any sentence" | |
719 | (wisent-tag start-symbol))) | |
720 | (wisent-reduce-grammar-tables) | |
721 | (if (> nuseless-nonterminals 0) | |
722 | (wisent-nonterminals-reduce)))) | |
723 | ||
724 | (defun wisent-print-useless () | |
725 | "Output the detailed results of the reductions." | |
726 | (let (i b r) | |
727 | (when (> nuseless-nonterminals 0) | |
728 | ;; Useless nonterminals have been moved after useful ones. | |
729 | (wisent-log "\n\nUseless nonterminals:\n\n") | |
730 | (setq i 0) | |
731 | (while (< i nuseless-nonterminals) | |
732 | (wisent-log " %s\n" (wisent-tag (+ nsyms i))) | |
733 | (setq i (1+ i)))) | |
734 | (setq b nil | |
735 | i 0) | |
736 | (while (< i ntokens) | |
737 | (unless (or (wisent-BITISSET V i) (wisent-BITISSET V1 i)) | |
738 | (or b | |
739 | (wisent-log "\n\nTerminals which are not used:\n\n")) | |
740 | (setq b t) | |
741 | (wisent-log " %s\n" (wisent-tag i))) | |
742 | (setq i (1+ i))) | |
743 | (when (> nuseless-productions 0) | |
744 | (wisent-log "\n\nUseless rules:\n\n") | |
745 | (setq i 1) | |
746 | (while (<= i nrules) | |
747 | (unless (aref ruseful i) | |
748 | (wisent-log "#%s " (wisent-pad-string (format "%d" i) 4)) | |
749 | (wisent-log "%s:" (wisent-tag (aref rlhs i))) | |
750 | (setq r (aref rrhs i)) | |
751 | (while (natnump (aref ritem r)) | |
752 | (wisent-log " %s" (wisent-tag (aref ritem r))) | |
753 | (setq r (1+ r))) | |
754 | (wisent-log ";\n")) | |
755 | (setq i (1+ i)))) | |
756 | (if (or b (> nuseless-nonterminals 0) (> nuseless-productions 0)) | |
757 | (wisent-log "\n\n")) | |
758 | )) | |
759 | \f | |
760 | ;;;; ----------------------------- | |
761 | ;;;; Match rules with nonterminals | |
762 | ;;;; ----------------------------- | |
763 | ||
764 | (defun wisent-set-derives () | |
765 | "Find, for each variable (nonterminal), which rules can derive it. | |
766 | It sets up the value of DERIVES so that DERIVES[i - NTOKENS] points to | |
767 | a list of rule numbers, terminated with -1." | |
768 | (let (i lhs p q dset delts) | |
769 | (setq dset (make-vector nvars nil) | |
770 | delts (make-vector (1+ nrules) 0)) | |
771 | (setq p 0 ;; p = delts | |
772 | i nrules) | |
773 | (while (> i 0) | |
774 | (when (aref ruseful i) | |
775 | (setq lhs (aref rlhs i)) | |
776 | ;; p->next = dset[lhs]; | |
777 | ;; p->value = i; | |
778 | (aset delts p (cons i (aref dset (- lhs ntokens)))) ;; (value . next) | |
779 | (aset dset (- lhs ntokens) p) ;; dset[lhs] = p | |
780 | (setq p (1+ p)) ;; p++ | |
781 | ) | |
782 | (setq i (1- i))) | |
783 | ||
784 | (setq derives (make-vector nvars nil) | |
785 | i ntokens) | |
786 | ||
787 | (while (< i nsyms) | |
788 | (setq q nil | |
789 | p (aref dset (- i ntokens))) ;; p = dset[i] | |
790 | ||
791 | (while p | |
792 | (setq p (aref delts p) | |
793 | q (cons (car p) q) ;;q++ = p->value | |
794 | p (cdr p))) ;; p = p->next | |
795 | (setq q (nreverse (cons -1 q))) ;; *q++ = -1 | |
796 | (aset derives (- i ntokens) q) ;; derives[i] = q | |
797 | (setq i (1+ i))) | |
798 | )) | |
799 | \f | |
800 | ;;;; -------------------------------------------------------- | |
801 | ;;;; Find which nonterminals can expand into the null string. | |
802 | ;;;; -------------------------------------------------------- | |
803 | ||
804 | (defun wisent-print-nullable () | |
805 | "Print NULLABLE." | |
806 | (let (i) | |
807 | (wisent-log "NULLABLE\n") | |
808 | (setq i ntokens) | |
809 | (while (< i nsyms) | |
810 | (wisent-log "\t%s: %s\n" (wisent-tag i) | |
811 | (if (aref nullable (- i ntokens)) | |
812 | "yes" : "no")) | |
813 | (setq i (1+ i))) | |
814 | (wisent-log "\n\n"))) | |
815 | ||
816 | (defun wisent-set-nullable () | |
817 | "Set up NULLABLE. | |
818 | A vector saying which nonterminals can expand into the null string. | |
819 | NULLABLE[i - NTOKENS] is nil if symbol I can do so." | |
820 | (let (ruleno s1 s2 p r squeue rcount rsets relts item any-tokens) | |
821 | (setq squeue (make-vector nvars 0) | |
822 | rcount (make-vector (1+ nrules) 0) | |
823 | rsets (make-vector nvars nil) ;; - ntokens | |
824 | relts (make-vector (+ nitems nvars 1) nil) | |
825 | nullable (make-vector nvars nil)) ;; - ntokens | |
826 | (setq s1 0 s2 0 ;; s1 = s2 = squeue | |
827 | p 0 ;; p = relts | |
828 | ruleno 1) | |
829 | (while (<= ruleno nrules) | |
830 | (when (aref ruseful ruleno) | |
831 | (if (> (aref ritem (aref rrhs ruleno)) 0) | |
832 | (progn | |
833 | ;; This rule has a non empty RHS. | |
834 | (setq any-tokens nil | |
835 | r (aref rrhs ruleno)) | |
836 | (while (> (aref ritem r) 0) | |
837 | (if (wisent-ISTOKEN (aref ritem r)) | |
838 | (setq any-tokens t)) | |
839 | (setq r (1+ r))) | |
840 | ||
841 | ;; This rule has only nonterminals: schedule it for the | |
842 | ;; second pass. | |
843 | (unless any-tokens | |
844 | (setq r (aref rrhs ruleno)) | |
845 | (while (> (setq item (aref ritem r)) 0) | |
846 | (aset rcount ruleno (1+ (aref rcount ruleno))) | |
847 | ;; p->next = rsets[item]; | |
848 | ;; p->value = ruleno; | |
849 | (aset relts p (cons ruleno (aref rsets (- item ntokens)))) | |
850 | ;; rsets[item] = p; | |
851 | (aset rsets (- item ntokens) p) | |
852 | (setq p (1+ p) | |
853 | r (1+ r))))) | |
854 | ;; This rule has an empty RHS. | |
855 | ;; assert (ritem[rrhs[ruleno]] == -ruleno) | |
856 | (when (and (aref ruseful ruleno) | |
857 | (setq item (aref rlhs ruleno)) | |
858 | (not (aref nullable (- item ntokens)))) | |
859 | (aset nullable (- item ntokens) t) | |
860 | (aset squeue s2 item) | |
861 | (setq s2 (1+ s2))) | |
862 | ) | |
863 | ) | |
864 | (setq ruleno (1+ ruleno))) | |
865 | ||
866 | (while (< s1 s2) | |
867 | ;; p = rsets[*s1++] | |
868 | (setq p (aref rsets (- (aref squeue s1) ntokens)) | |
869 | s1 (1+ s1)) | |
870 | (while p | |
871 | (setq p (aref relts p) | |
872 | ruleno (car p) | |
873 | p (cdr p)) ;; p = p->next | |
874 | ;; if (--rcount[ruleno] == 0) | |
875 | (when (zerop (aset rcount ruleno (1- (aref rcount ruleno)))) | |
876 | (setq item (aref rlhs ruleno)) | |
877 | (aset nullable (- item ntokens) t) | |
878 | (aset squeue s2 item) | |
879 | (setq s2 (1+ s2))))) | |
880 | ||
881 | (if wisent-debug-flag | |
882 | (wisent-print-nullable)) | |
883 | )) | |
884 | \f | |
885 | ;;;; ----------- | |
886 | ;;;; Subroutines | |
887 | ;;;; ----------- | |
888 | ||
889 | (defun wisent-print-fderives () | |
890 | "Print FDERIVES." | |
891 | (let (i j rp) | |
892 | (wisent-log "\n\n\nFDERIVES\n") | |
893 | (setq i ntokens) | |
894 | (while (< i nsyms) | |
895 | (wisent-log "\n\n%s derives\n\n" (wisent-tag i)) | |
896 | (setq rp (aref fderives (- i ntokens)) | |
897 | j 0) | |
898 | (while (<= j nrules) | |
899 | (if (wisent-BITISSET rp j) | |
900 | (wisent-log " %d\n" j)) | |
901 | (setq j (1+ j))) | |
902 | (setq i (1+ i))))) | |
903 | ||
904 | (defun wisent-set-fderives () | |
905 | "Set up FDERIVES. | |
906 | An NVARS by NRULES matrix of bits indicating which rules can help | |
907 | derive the beginning of the data for each nonterminal. For example, | |
908 | if symbol 5 can be derived as the sequence of symbols 8 3 20, and one | |
909 | of the rules for deriving symbol 8 is rule 4, then the | |
910 | \[5 - NTOKENS, 4] bit in FDERIVES is set." | |
911 | (let (i j k) | |
912 | (setq fderives (make-vector nvars nil)) | |
913 | (setq i 0) | |
914 | (while (< i nvars) | |
915 | (aset fderives i (make-vector rulesetsize 0)) | |
916 | (setq i (1+ i))) | |
917 | ||
918 | (wisent-set-firsts) | |
919 | ||
920 | (setq i ntokens) | |
921 | (while (< i nsyms) | |
922 | (setq j ntokens) | |
923 | (while (< j nsyms) | |
924 | ;; if (BITISSET (FIRSTS (i), j - ntokens)) | |
925 | (when (wisent-BITISSET (aref firsts (- i ntokens)) (- j ntokens)) | |
926 | (setq k (aref derives (- j ntokens))) | |
927 | (while (> (car k) 0) ;; derives[j][k] > 0 | |
928 | ;; SETBIT (FDERIVES (i), derives[j][k]); | |
929 | (wisent-SETBIT (aref fderives (- i ntokens)) (car k)) | |
930 | (setq k (cdr k)))) | |
931 | (setq j (1+ j))) | |
932 | (setq i (1+ i))) | |
933 | ||
934 | (if wisent-debug-flag | |
935 | (wisent-print-fderives)) | |
936 | )) | |
937 | ||
938 | (defun wisent-print-firsts () | |
939 | "Print FIRSTS." | |
940 | (let (i j v) | |
941 | (wisent-log "\n\n\nFIRSTS\n\n") | |
942 | (setq i ntokens) | |
943 | (while (< i nsyms) | |
944 | (wisent-log "\n\n%s firsts\n\n" (wisent-tag i)) | |
945 | (setq v (aref firsts (- i ntokens)) | |
946 | j 0) | |
947 | (while (< j nvars) | |
948 | (if (wisent-BITISSET v j) | |
949 | (wisent-log "\t\t%d (%s)\n" | |
950 | (+ j ntokens) (wisent-tag (+ j ntokens)))) | |
951 | (setq j (1+ j))) | |
952 | (setq i (1+ i))))) | |
953 | ||
954 | (defun wisent-TC (R n) | |
955 | "Transitive closure. | |
956 | Given R an N by N matrix of bits, modify its contents to be the | |
957 | transitive closure of what was given." | |
958 | (let (i j k) | |
959 | ;; R (J, I) && R (I, K) => R (J, K). | |
960 | ;; I *must* be the outer loop. | |
961 | (setq i 0) | |
962 | (while (< i n) | |
963 | (setq j 0) | |
964 | (while (< j n) | |
965 | (when (wisent-BITISSET (aref R j) i) | |
966 | (setq k 0) | |
967 | (while (< k n) | |
968 | (if (wisent-BITISSET (aref R i) k) | |
969 | (wisent-SETBIT (aref R j) k)) | |
970 | (setq k (1+ k)))) | |
971 | (setq j (1+ j))) | |
972 | (setq i (1+ i))))) | |
973 | ||
974 | (defun wisent-RTC (R n) | |
975 | "Reflexive Transitive Closure. | |
976 | Same as `wisent-TC' and then set all the bits on the diagonal of R, an | |
977 | N by N matrix of bits." | |
978 | (let (i) | |
979 | (wisent-TC R n) | |
980 | (setq i 0) | |
981 | (while (< i n) | |
982 | (wisent-SETBIT (aref R i) i) | |
983 | (setq i (1+ i))))) | |
984 | ||
985 | (defun wisent-set-firsts () | |
986 | "Set up FIRSTS. | |
987 | An NVARS by NVARS bit matrix indicating which items can represent the | |
988 | beginning of the input corresponding to which other items. For | |
989 | example, if some rule expands symbol 5 into the sequence of symbols 8 | |
990 | 3 20, the symbol 8 can be the beginning of the data for symbol 5, so | |
991 | the bit [8 - NTOKENS, 5 - NTOKENS] in FIRSTS is set." | |
992 | (let (row symbol sp rowsize i) | |
993 | (setq rowsize (wisent-WORDSIZE nvars) | |
994 | varsetsize rowsize | |
995 | firsts (make-vector nvars nil) | |
996 | i 0) | |
997 | (while (< i nvars) | |
998 | (aset firsts i (make-vector rowsize 0)) | |
999 | (setq i (1+ i))) | |
1000 | ||
1001 | (setq row 0 ;; row = firsts | |
1002 | i ntokens) | |
1003 | (while (< i nsyms) | |
1004 | (setq sp (aref derives (- i ntokens))) | |
1005 | (while (>= (car sp) 0) | |
1006 | (setq symbol (aref ritem (aref rrhs (car sp))) | |
1007 | sp (cdr sp)) | |
1008 | (when (wisent-ISVAR symbol) | |
1009 | (setq symbol (- symbol ntokens)) | |
1010 | (wisent-SETBIT (aref firsts row) symbol) | |
1011 | )) | |
1012 | (setq row (1+ row) | |
1013 | i (1+ i))) | |
1014 | ||
1015 | (wisent-RTC firsts nvars) | |
1016 | ||
1017 | (if wisent-debug-flag | |
1018 | (wisent-print-firsts)) | |
1019 | )) | |
1020 | ||
1021 | (defun wisent-initialize-closure (n) | |
1022 | "Allocate the ITEMSET and RULESET vectors. | |
1023 | And precompute useful data so that `wisent-closure' can be called. | |
1024 | N is the number of elements to allocate for ITEMSET." | |
1025 | (setq itemset (make-vector n 0) | |
1026 | rulesetsize (wisent-WORDSIZE (1+ nrules)) | |
1027 | ruleset (make-vector rulesetsize 0)) | |
1028 | ||
1029 | (wisent-set-fderives)) | |
1030 | ||
1031 | (defun wisent-print-closure () | |
1032 | "Print ITEMSET." | |
1033 | (let (i) | |
1034 | (wisent-log "\n\nclosure n = %d\n\n" nitemset) | |
1035 | (setq i 0) ;; isp = itemset | |
1036 | (while (< i nitemset) | |
1037 | (wisent-log " %d\n" (aref itemset i)) | |
1038 | (setq i (1+ i))))) | |
1039 | ||
1040 | (defun wisent-closure (core n) | |
1041 | "Set up RULESET and ITEMSET for the transitions out of CORE state. | |
1042 | Given a vector of item numbers items, of length N, set up RULESET and | |
1043 | ITEMSET to indicate what rules could be run and which items could be | |
1044 | accepted when those items are the active ones. | |
1045 | ||
1046 | RULESET contains a bit for each rule. `wisent-closure' sets the bits | |
1047 | for all rules which could potentially describe the next input to be | |
1048 | read. | |
1049 | ||
1050 | ITEMSET is a vector of item numbers; NITEMSET is the number of items | |
1051 | in ITEMSET. `wisent-closure' places there the indices of all items | |
1052 | which represent units of input that could arrive next." | |
1053 | (let (c r v symbol ruleno itemno) | |
1054 | (if (zerop n) | |
1055 | (progn | |
1056 | (setq r 0 | |
1057 | v (aref fderives (- start-symbol ntokens))) | |
1058 | (while (< r rulesetsize) | |
1059 | ;; ruleset[r] = FDERIVES (start-symbol)[r]; | |
1060 | (aset ruleset r (aref v r)) | |
1061 | (setq r (1+ r))) | |
1062 | ) | |
1063 | (fillarray ruleset 0) | |
1064 | (setq c 0) | |
1065 | (while (< c n) | |
1066 | (setq symbol (aref ritem (aref core c))) | |
1067 | (when (wisent-ISVAR symbol) | |
1068 | (setq r 0 | |
1069 | v (aref fderives (- symbol ntokens))) | |
1070 | (while (< r rulesetsize) | |
1071 | ;; ruleset[r] |= FDERIVES (ritem[core[c]])[r]; | |
1072 | (aset ruleset r (logior (aref ruleset r) (aref v r))) | |
1073 | (setq r (1+ r)))) | |
1074 | (setq c (1+ c))) | |
1075 | ) | |
1076 | (setq nitemset 0 | |
1077 | c 0 | |
1078 | ruleno 0 | |
1079 | r (* rulesetsize wisent-BITS-PER-WORD)) | |
1080 | (while (< ruleno r) | |
1081 | (when (wisent-BITISSET ruleset ruleno) | |
1082 | (setq itemno (aref rrhs ruleno)) | |
1083 | (while (and (< c n) (< (aref core c) itemno)) | |
1084 | (aset itemset nitemset (aref core c)) | |
1085 | (setq nitemset (1+ nitemset) | |
1086 | c (1+ c))) | |
1087 | (aset itemset nitemset itemno) | |
1088 | (setq nitemset (1+ nitemset))) | |
1089 | (setq ruleno (1+ ruleno))) | |
1090 | ||
1091 | (while (< c n) | |
1092 | (aset itemset nitemset (aref core c)) | |
1093 | (setq nitemset (1+ nitemset) | |
1094 | c (1+ c))) | |
1095 | ||
1096 | (if wisent-debug-flag | |
1097 | (wisent-print-closure)) | |
1098 | )) | |
1099 | \f | |
1100 | ;;;; -------------------------------------------------- | |
1101 | ;;;; Generate the nondeterministic finite state machine | |
1102 | ;;;; -------------------------------------------------- | |
1103 | ||
1104 | (defun wisent-allocate-itemsets () | |
1105 | "Allocate storage for itemsets." | |
1106 | (let (symbol i count symbol-count) | |
1107 | ;; Count the number of occurrences of all the symbols in RITEMS. | |
1108 | ;; Note that useless productions (hence useless nonterminals) are | |
1109 | ;; browsed too, hence we need to allocate room for _all_ the | |
1110 | ;; symbols. | |
1111 | (setq count 0 | |
1112 | symbol-count (make-vector (+ nsyms nuseless-nonterminals) 0) | |
1113 | i 0) | |
1114 | (while (setq symbol (aref ritem i)) | |
1115 | (when (> symbol 0) | |
1116 | (setq count (1+ count)) | |
1117 | (aset symbol-count symbol (1+ (aref symbol-count symbol)))) | |
1118 | (setq i (1+ i))) | |
1119 | ;; See comments before `wisent-new-itemsets'. All the vectors of | |
1120 | ;; items live inside kernel-items. The number of active items | |
1121 | ;; after some symbol cannot be more than the number of times that | |
1122 | ;; symbol appears as an item, which is symbol-count[symbol]. We | |
1123 | ;; allocate that much space for each symbol. | |
1124 | (setq kernel-base (make-vector nsyms nil) | |
1125 | kernel-items (make-vector count 0) | |
1126 | count 0 | |
1127 | i 0) | |
1128 | (while (< i nsyms) | |
1129 | (aset kernel-base i count) | |
1130 | (setq count (+ count (aref symbol-count i)) | |
1131 | i (1+ i))) | |
1132 | (setq shift-symbol symbol-count | |
1133 | kernel-end (make-vector nsyms nil)) | |
1134 | )) | |
1135 | ||
1136 | (defun wisent-allocate-storage () | |
1137 | "Allocate storage for the state machine." | |
1138 | (wisent-allocate-itemsets) | |
1139 | (setq shiftset (make-vector nsyms 0) | |
1140 | redset (make-vector (1+ nrules) 0) | |
1141 | state-table (make-vector wisent-state-table-size nil))) | |
1142 | ||
1143 | (defun wisent-new-itemsets () | |
1144 | "Find which symbols can be shifted in the current state. | |
1145 | And for each one record which items would be active after that shift. | |
1146 | Uses the contents of ITEMSET. SHIFT-SYMBOL is set to a vector of the | |
1147 | symbols that can be shifted. For each symbol in the grammar, | |
1148 | KERNEL-BASE[symbol] points to a vector of item numbers activated if | |
1149 | that symbol is shifted, and KERNEL-END[symbol] points after the end of | |
1150 | that vector." | |
1151 | (let (i shiftcount isp ksp symbol) | |
1152 | (fillarray kernel-end nil) | |
1153 | (setq shiftcount 0 | |
1154 | isp 0) | |
1155 | (while (< isp nitemset) | |
1156 | (setq i (aref itemset isp) | |
1157 | isp (1+ isp) | |
1158 | symbol (aref ritem i)) | |
1159 | (when (> symbol 0) | |
1160 | (setq ksp (aref kernel-end symbol)) | |
1161 | (when (not ksp) | |
1162 | ;; shift-symbol[shiftcount++] = symbol; | |
1163 | (aset shift-symbol shiftcount symbol) | |
1164 | (setq shiftcount (1+ shiftcount) | |
1165 | ksp (aref kernel-base symbol))) | |
1166 | ;; *ksp++ = i + 1; | |
1167 | (aset kernel-items ksp (1+ i)) | |
1168 | (setq ksp (1+ ksp)) | |
1169 | (aset kernel-end symbol ksp))) | |
1170 | (setq nshifts shiftcount))) | |
1171 | ||
1172 | (defun wisent-new-state (symbol) | |
1173 | "Create a new state for those items, if necessary. | |
1174 | SYMBOL is the core accessing-symbol. | |
1175 | Subroutine of `wisent-get-state'." | |
1176 | (let (n p isp1 isp2 iend items) | |
1177 | (setq isp1 (aref kernel-base symbol) | |
1178 | iend (aref kernel-end symbol) | |
1179 | n (- iend isp1) | |
1180 | p (make-core) | |
1181 | items (make-vector n 0)) | |
1182 | (set-core-accessing-symbol p symbol) | |
1183 | (set-core-number p nstates) | |
1184 | (set-core-nitems p n) | |
1185 | (set-core-items p items) | |
1186 | (setq isp2 0) ;; isp2 = p->items | |
1187 | (while (< isp1 iend) | |
1188 | ;; *isp2++ = *isp1++; | |
1189 | (aset items isp2 (aref kernel-items isp1)) | |
1190 | (setq isp1 (1+ isp1) | |
1191 | isp2 (1+ isp2))) | |
1192 | (set-core-next last-state p) | |
1193 | (setq last-state p | |
1194 | nstates (1+ nstates)) | |
1195 | p)) | |
1196 | ||
1197 | (defun wisent-get-state (symbol) | |
1198 | "Find the state we would get to by shifting SYMBOL. | |
1199 | Return the state number for the state we would get to (from the | |
1200 | current state) by shifting SYMBOL. Create a new state if no | |
1201 | equivalent one exists already. Used by `wisent-append-states'." | |
1202 | (let (key isp1 isp2 iend sp sp2 found n) | |
1203 | (setq isp1 (aref kernel-base symbol) | |
1204 | iend (aref kernel-end symbol) | |
1205 | n (- iend isp1) | |
1206 | key 0) | |
1207 | ;; Add up the target state's active item numbers to get a hash key | |
1208 | (while (< isp1 iend) | |
1209 | (setq key (+ key (aref kernel-items isp1)) | |
1210 | isp1 (1+ isp1))) | |
1211 | (setq key (% key wisent-state-table-size) | |
1212 | sp (aref state-table key)) | |
1213 | (if sp | |
1214 | (progn | |
1215 | (setq found nil) | |
1216 | (while (not found) | |
1217 | (when (= (core-nitems sp) n) | |
1218 | (setq found t | |
1219 | isp1 (aref kernel-base symbol) | |
1220 | ;; isp2 = sp->items; | |
1221 | sp2 (core-items sp) | |
1222 | isp2 0) | |
1223 | ||
1224 | (while (and found (< isp1 iend)) | |
1225 | ;; if (*isp1++ != *isp2++) | |
1226 | (if (not (= (aref kernel-items isp1) | |
1227 | (aref sp2 isp2))) | |
1228 | (setq found nil)) | |
1229 | (setq isp1 (1+ isp1) | |
1230 | isp2 (1+ isp2)))) | |
1231 | (if (not found) | |
1232 | (if (core-link sp) | |
1233 | (setq sp (core-link sp)) | |
1234 | ;; sp = sp->link = new-state(symbol) | |
1235 | (setq sp (set-core-link sp (wisent-new-state symbol)) | |
1236 | found t))))) | |
1237 | ;; bucket is empty | |
1238 | ;; state-table[key] = sp = new-state(symbol) | |
1239 | (setq sp (wisent-new-state symbol)) | |
1240 | (aset state-table key sp)) | |
1241 | ;; return (sp->number); | |
1242 | (core-number sp))) | |
1243 | ||
1244 | (defun wisent-append-states () | |
1245 | "Find or create the core structures for states. | |
1246 | Use the information computed by `wisent-new-itemsets' to find the | |
1247 | state numbers reached by each shift transition from the current state. | |
1248 | SHIFTSET is set up as a vector of state numbers of those states." | |
1249 | (let (i j symbol) | |
1250 | ;; First sort shift-symbol into increasing order | |
1251 | (setq i 1) | |
1252 | (while (< i nshifts) | |
1253 | (setq symbol (aref shift-symbol i) | |
1254 | j i) | |
1255 | (while (and (> j 0) (> (aref shift-symbol (1- j)) symbol)) | |
1256 | (aset shift-symbol j (aref shift-symbol (1- j))) | |
1257 | (setq j (1- j))) | |
1258 | (aset shift-symbol j symbol) | |
1259 | (setq i (1+ i))) | |
1260 | (setq i 0) | |
1261 | (while (< i nshifts) | |
1262 | (setq symbol (aref shift-symbol i)) | |
1263 | (aset shiftset i (wisent-get-state symbol)) | |
1264 | (setq i (1+ i))) | |
1265 | )) | |
1266 | ||
1267 | (defun wisent-initialize-states () | |
1268 | "Initialize states." | |
1269 | (let ((p (make-core))) | |
1270 | (setq first-state p | |
1271 | last-state p | |
1272 | this-state p | |
1273 | nstates 1))) | |
1274 | ||
1275 | (defun wisent-save-shifts () | |
1276 | "Save the NSHIFTS of SHIFTSET into the current linked list." | |
1277 | (let (p i shifts) | |
1278 | (setq p (make-shifts) | |
1279 | shifts (make-vector nshifts 0) | |
1280 | i 0) | |
1281 | (set-shifts-number p (core-number this-state)) | |
1282 | (set-shifts-nshifts p nshifts) | |
1283 | (set-shifts-shifts p shifts) | |
1284 | (while (< i nshifts) | |
1285 | ;; (p->shifts)[i] = shiftset[i]; | |
1286 | (aset shifts i (aref shiftset i)) | |
1287 | (setq i (1+ i))) | |
1288 | ||
1289 | (if last-shift | |
1290 | (set-shifts-next last-shift p) | |
1291 | (setq first-shift p)) | |
1292 | (setq last-shift p))) | |
1293 | ||
1294 | (defun wisent-insert-start-shift () | |
1295 | "Create the next-to-final state. | |
1296 | That is the state to which a shift has already been made in the | |
1297 | initial state. Subroutine of `wisent-augment-automaton'." | |
1298 | (let (statep sp) | |
1299 | (setq statep (make-core)) | |
1300 | (set-core-number statep nstates) | |
1301 | (set-core-accessing-symbol statep start-symbol) | |
1302 | (set-core-next last-state statep) | |
1303 | (setq last-state statep) | |
1304 | ;; Make a shift from this state to (what will be) the final state. | |
1305 | (setq sp (make-shifts)) | |
1306 | (set-shifts-number sp nstates) | |
1307 | (setq nstates (1+ nstates)) | |
1308 | (set-shifts-nshifts sp 1) | |
1309 | (set-shifts-shifts sp (vector nstates)) | |
1310 | (set-shifts-next last-shift sp) | |
1311 | (setq last-shift sp))) | |
1312 | ||
1313 | (defun wisent-augment-automaton () | |
1314 | "Set up initial and final states as parser wants them. | |
1315 | Make sure that the initial state has a shift that accepts the | |
1316 | grammar's start symbol and goes to the next-to-final state, which has | |
1317 | a shift going to the final state, which has a shift to the termination | |
1318 | state. Create such states and shifts if they don't happen to exist | |
1319 | already." | |
1320 | (let (i k statep sp sp2 sp1 shifts) | |
1321 | (setq sp first-shift) | |
1322 | (if sp | |
1323 | (progn | |
1324 | (if (zerop (shifts-number sp)) | |
1325 | (progn | |
1326 | (setq k (shifts-nshifts sp) | |
1327 | statep (core-next first-state)) | |
1328 | ;; The states reached by shifts from first-state are | |
1329 | ;; numbered 1...K. Look for one reached by | |
1330 | ;; START-SYMBOL. | |
1331 | (while (and (< (core-accessing-symbol statep) start-symbol) | |
1332 | (< (core-number statep) k)) | |
1333 | (setq statep (core-next statep))) | |
1334 | (if (= (core-accessing-symbol statep) start-symbol) | |
1335 | (progn | |
1336 | ;; We already have a next-to-final state. Make | |
1337 | ;; sure it has a shift to what will be the final | |
1338 | ;; state. | |
1339 | (setq k (core-number statep)) | |
1340 | (while (and sp (< (shifts-number sp) k)) | |
1341 | (setq sp1 sp | |
1342 | sp (shifts-next sp))) | |
1343 | (if (and sp (= (shifts-number sp) k)) | |
1344 | (progn | |
1345 | (setq i (shifts-nshifts sp) | |
1346 | sp2 (make-shifts) | |
1347 | shifts (make-vector (1+ i) 0)) | |
1348 | (set-shifts-number sp2 k) | |
1349 | (set-shifts-nshifts sp2 (1+ i)) | |
1350 | (set-shifts-shifts sp2 shifts) | |
1351 | (aset shifts 0 nstates) | |
1352 | (while (> i 0) | |
1353 | ;; sp2->shifts[i] = sp->shifts[i - 1]; | |
1354 | (aset shifts i (aref (shifts-shifts sp) (1- i))) | |
1355 | (setq i (1- i))) | |
1356 | ;; Patch sp2 into the chain of shifts in | |
1357 | ;; place of sp, following sp1. | |
1358 | (set-shifts-next sp2 (shifts-next sp)) | |
1359 | (set-shifts-next sp1 sp2) | |
1360 | (if (eq sp last-shift) | |
1361 | (setq last-shift sp2)) | |
1362 | ) | |
1363 | (setq sp2 (make-shifts)) | |
1364 | (set-shifts-number sp2 k) | |
1365 | (set-shifts-nshifts sp2 1) | |
1366 | (set-shifts-shifts sp2 (vector nstates)) | |
1367 | ;; Patch sp2 into the chain of shifts between | |
1368 | ;; sp1 and sp. | |
1369 | (set-shifts-next sp2 sp) | |
1370 | (set-shifts-next sp1 sp2) | |
1371 | (if (not sp) | |
1372 | (setq last-shift sp2)) | |
1373 | ) | |
1374 | ) | |
1375 | ;; There is no next-to-final state as yet. | |
1376 | ;; Add one more shift in FIRST-SHIFT, going to the | |
1377 | ;; next-to-final state (yet to be made). | |
1378 | (setq sp first-shift | |
1379 | sp2 (make-shifts) | |
1380 | i (shifts-nshifts sp) | |
1381 | shifts (make-vector (1+ i) 0)) | |
1382 | (set-shifts-nshifts sp2 (1+ i)) | |
1383 | (set-shifts-shifts sp2 shifts) | |
1384 | ;; Stick this shift into the vector at the proper place. | |
1385 | (setq statep (core-next first-state) | |
1386 | k 0 | |
1387 | i 0) | |
1388 | (while (< i (shifts-nshifts sp)) | |
1389 | (when (and (> (core-accessing-symbol statep) start-symbol) | |
1390 | (= i k)) | |
1391 | (aset shifts k nstates) | |
1392 | (setq k (1+ k))) | |
1393 | (aset shifts k (aref (shifts-shifts sp) i)) | |
1394 | (setq statep (core-next statep)) | |
1395 | (setq i (1+ i) | |
1396 | k (1+ k))) | |
1397 | (when (= i k) | |
1398 | (aset shifts k nstates) | |
1399 | (setq k (1+ k))) | |
1400 | ;; Patch sp2 into the chain of shifts in place of | |
1401 | ;; sp, at the beginning. | |
1402 | (set-shifts-next sp2 (shifts-next sp)) | |
1403 | (setq first-shift sp2) | |
1404 | (if (eq last-shift sp) | |
1405 | (setq last-shift sp2)) | |
1406 | ;; Create the next-to-final state, with shift to | |
1407 | ;; what will be the final state. | |
1408 | (wisent-insert-start-shift))) | |
1409 | ;; The initial state didn't even have any shifts. Give it | |
1410 | ;; one shift, to the next-to-final state. | |
1411 | (setq sp (make-shifts)) | |
1412 | (set-shifts-nshifts sp 1) | |
1413 | (set-shifts-shifts sp (vector nstates)) | |
1414 | ;; Patch sp into the chain of shifts at the beginning. | |
1415 | (set-shifts-next sp first-shift) | |
1416 | (setq first-shift sp) | |
1417 | ;; Create the next-to-final state, with shift to what will | |
1418 | ;; be the final state. | |
1419 | (wisent-insert-start-shift))) | |
1420 | ;; There are no shifts for any state. Make one shift, from the | |
1421 | ;; initial state to the next-to-final state. | |
1422 | (setq sp (make-shifts)) | |
1423 | (set-shifts-nshifts sp 1) | |
1424 | (set-shifts-shifts sp (vector nstates)) | |
1425 | ;; Initialize the chain of shifts with sp. | |
1426 | (setq first-shift sp | |
1427 | last-shift sp) | |
1428 | ;; Create the next-to-final state, with shift to what will be | |
1429 | ;; the final state. | |
1430 | (wisent-insert-start-shift)) | |
1431 | ;; Make the final state--the one that follows a shift from the | |
1432 | ;; next-to-final state. The symbol for that shift is 0 | |
1433 | ;; (end-of-file). | |
1434 | (setq statep (make-core)) | |
1435 | (set-core-number statep nstates) | |
1436 | (set-core-next last-state statep) | |
1437 | (setq last-state statep) | |
1438 | ;; Make the shift from the final state to the termination state. | |
1439 | (setq sp (make-shifts)) | |
1440 | (set-shifts-number sp nstates) | |
1441 | (setq nstates (1+ nstates)) | |
1442 | (set-shifts-nshifts sp 1) | |
1443 | (set-shifts-shifts sp (vector nstates)) | |
1444 | (set-shifts-next last-shift sp) | |
1445 | (setq last-shift sp) | |
1446 | ;; Note that the variable FINAL-STATE refers to what we sometimes | |
1447 | ;; call the termination state. | |
1448 | (setq final-state nstates) | |
1449 | ;; Make the termination state. | |
1450 | (setq statep (make-core)) | |
1451 | (set-core-number statep nstates) | |
1452 | (setq nstates (1+ nstates)) | |
1453 | (set-core-next last-state statep) | |
1454 | (setq last-state statep))) | |
1455 | ||
1456 | (defun wisent-save-reductions () | |
1457 | "Make a reductions structure. | |
1458 | Find which rules can be used for reduction transitions from the | |
1459 | current state and make a reductions structure for the state to record | |
1460 | their rule numbers." | |
1461 | (let (i item count p rules) | |
1462 | ;; Find and count the active items that represent ends of rules. | |
1463 | (setq count 0 | |
1464 | i 0) | |
1465 | (while (< i nitemset) | |
1466 | (setq item (aref ritem (aref itemset i))) | |
1467 | (when (< item 0) | |
1468 | (aset redset count (- item)) | |
1469 | (setq count (1+ count))) | |
1470 | (setq i (1+ i))) | |
1471 | ;; Make a reductions structure and copy the data into it. | |
1472 | (when (> count 0) | |
1473 | (setq p (make-reductions) | |
1474 | rules (make-vector count 0)) | |
1475 | (set-reductions-number p (core-number this-state)) | |
1476 | (set-reductions-nreds p count) | |
1477 | (set-reductions-rules p rules) | |
1478 | (setq i 0) | |
1479 | (while (< i count) | |
1480 | ;; (p->rules)[i] = redset[i] | |
1481 | (aset rules i (aref redset i)) | |
1482 | (setq i (1+ i))) | |
1483 | (if last-reduction | |
1484 | (set-reductions-next last-reduction p) | |
1485 | (setq first-reduction p)) | |
1486 | (setq last-reduction p)))) | |
1487 | ||
1488 | (defun wisent-generate-states () | |
1489 | "Compute the nondeterministic finite state machine from the grammar." | |
1490 | (wisent-allocate-storage) | |
1491 | (wisent-initialize-closure nitems) | |
1492 | (wisent-initialize-states) | |
1493 | (while this-state | |
1494 | ;; Set up RULESET and ITEMSET for the transitions out of this | |
1495 | ;; state. RULESET gets a 1 bit for each rule that could reduce | |
1496 | ;; now. ITEMSET gets a vector of all the items that could be | |
1497 | ;; accepted next. | |
1498 | (wisent-closure (core-items this-state) (core-nitems this-state)) | |
1499 | ;; Record the reductions allowed out of this state. | |
1500 | (wisent-save-reductions) | |
1501 | ;; Find the itemsets of the states that shifts can reach. | |
1502 | (wisent-new-itemsets) | |
1503 | ;; Find or create the core structures for those states. | |
1504 | (wisent-append-states) | |
1505 | ;; Create the shifts structures for the shifts to those states, | |
1506 | ;; now that the state numbers transitioning to are known. | |
1507 | (if (> nshifts 0) | |
1508 | (wisent-save-shifts)) | |
1509 | ;; States are queued when they are created; process them all. | |
1510 | (setq this-state (core-next this-state))) | |
1511 | ;; Set up initial and final states as parser wants them. | |
1512 | (wisent-augment-automaton)) | |
1513 | \f | |
1514 | ;;;; --------------------------- | |
1515 | ;;;; Compute look-ahead criteria | |
1516 | ;;;; --------------------------- | |
1517 | ||
1518 | ;; Compute how to make the finite state machine deterministic; find | |
1519 | ;; which rules need lookahead in each state, and which lookahead | |
1520 | ;; tokens they accept. | |
1521 | ||
1522 | ;; `wisent-lalr', the entry point, builds these data structures: | |
1523 | ||
1524 | ;; GOTO-MAP, FROM-STATE and TO-STATE record each shift transition | |
1525 | ;; which accepts a variable (a nonterminal). NGOTOS is the number of | |
1526 | ;; such transitions. | |
1527 | ;; FROM-STATE[t] is the state number which a transition leads from and | |
1528 | ;; TO-STATE[t] is the state number it leads to. | |
1529 | ;; All the transitions that accept a particular variable are grouped | |
1530 | ;; together and GOTO-MAP[i - NTOKENS] is the index in FROM-STATE and | |
1531 | ;; TO-STATE of the first of them. | |
1532 | ||
1533 | ;; CONSISTENT[s] is non-nil if no lookahead is needed to decide what | |
1534 | ;; to do in state s. | |
1535 | ||
1536 | ;; LARULENO is a vector which records the rules that need lookahead in | |
1537 | ;; various states. The elements of LARULENO that apply to state s are | |
1538 | ;; those from LOOKAHEADS[s] through LOOKAHEADS[s+1]-1. Each element | |
1539 | ;; of LARULENO is a rule number. | |
1540 | ||
1541 | ;; If LR is the length of LARULENO, then a number from 0 to LR-1 can | |
1542 | ;; specify both a rule and a state where the rule might be applied. | |
1543 | ;; LA is a LR by NTOKENS matrix of bits. | |
1544 | ;; LA[l, i] is 1 if the rule LARULENO[l] is applicable in the | |
1545 | ;; appropriate state when the next token is symbol i. | |
1546 | ;; If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict. | |
1547 | ||
1548 | (wisent-defcontext digraph | |
1549 | INDEX R VERTICES | |
1550 | infinity top) | |
1551 | ||
1552 | (defun wisent-traverse (i) | |
1553 | "Traverse I." | |
1554 | (let (j k height Ri Fi break) | |
1555 | (setq top (1+ top) | |
1556 | height top) | |
1557 | (aset VERTICES top i) ;; VERTICES[++top] = i | |
1558 | (aset INDEX i top) ;; INDEX[i] = height = top | |
1559 | ||
1560 | (setq Ri (aref R i)) | |
1561 | (when Ri | |
1562 | (setq j 0) | |
1563 | (while (>= (aref Ri j) 0) | |
1564 | (if (zerop (aref INDEX (aref Ri j))) | |
1565 | (wisent-traverse (aref Ri j))) | |
1566 | ;; if (INDEX[i] > INDEX[R[i][j]]) | |
1567 | (if (> (aref INDEX i) (aref INDEX (aref Ri j))) | |
1568 | ;; INDEX[i] = INDEX[R[i][j]]; | |
1569 | (aset INDEX i (aref INDEX (aref Ri j)))) | |
1570 | (setq Fi (aref F i) | |
1571 | k 0) | |
1572 | (while (< k tokensetsize) | |
1573 | ;; F (i)[k] |= F (R[i][j])[k]; | |
1574 | (aset Fi k (logior (aref Fi k) | |
1575 | (aref (aref F (aref Ri j)) k))) | |
1576 | (setq k (1+ k))) | |
1577 | (setq j (1+ j)))) | |
1578 | ||
1579 | (when (= (aref INDEX i) height) | |
1580 | (setq break nil) | |
1581 | (while (not break) | |
1582 | (setq j (aref VERTICES top) ;; j = VERTICES[top--] | |
1583 | top (1- top)) | |
1584 | (aset INDEX j infinity) | |
1585 | (if (= i j) | |
1586 | (setq break t) | |
1587 | (setq k 0) | |
1588 | (while (< k tokensetsize) | |
1589 | ;; F (j)[k] = F (i)[k]; | |
1590 | (aset (aref F j) k (aref (aref F i) k)) | |
1591 | (setq k (1+ k)))))) | |
1592 | )) | |
1593 | ||
1594 | (defun wisent-digraph (relation) | |
1595 | "Digraph RELATION." | |
1596 | (wisent-with-context digraph | |
1597 | (setq infinity (+ ngotos 2) | |
1598 | INDEX (make-vector (1+ ngotos) 0) | |
1599 | VERTICES (make-vector (1+ ngotos) 0) | |
1600 | top 0 | |
1601 | R relation) | |
1602 | (let ((i 0)) | |
1603 | (while (< i ngotos) | |
1604 | (if (and (= (aref INDEX i) 0) (aref R i)) | |
1605 | (wisent-traverse i)) | |
1606 | (setq i (1+ i)))))) | |
1607 | ||
1608 | (defun wisent-set-state-table () | |
1609 | "Build state table." | |
1610 | (let (sp) | |
1611 | (setq state-table (make-vector nstates nil) | |
1612 | sp first-state) | |
1613 | (while sp | |
1614 | (aset state-table (core-number sp) sp) | |
1615 | (setq sp (core-next sp))))) | |
1616 | ||
1617 | (defun wisent-set-accessing-symbol () | |
1618 | "Build accessing symbol table." | |
1619 | (let (sp) | |
1620 | (setq accessing-symbol (make-vector nstates 0) | |
1621 | sp first-state) | |
1622 | (while sp | |
1623 | (aset accessing-symbol (core-number sp) (core-accessing-symbol sp)) | |
1624 | (setq sp (core-next sp))))) | |
1625 | ||
1626 | (defun wisent-set-shift-table () | |
1627 | "Build shift table." | |
1628 | (let (sp) | |
1629 | (setq shift-table (make-vector nstates nil) | |
1630 | sp first-shift) | |
1631 | (while sp | |
1632 | (aset shift-table (shifts-number sp) sp) | |
1633 | (setq sp (shifts-next sp))))) | |
1634 | ||
1635 | (defun wisent-set-reduction-table () | |
1636 | "Build reduction table." | |
1637 | (let (rp) | |
1638 | (setq reduction-table (make-vector nstates nil) | |
1639 | rp first-reduction) | |
1640 | (while rp | |
1641 | (aset reduction-table (reductions-number rp) rp) | |
1642 | (setq rp (reductions-next rp))))) | |
1643 | ||
1644 | (defun wisent-set-maxrhs () | |
1645 | "Setup MAXRHS length." | |
1646 | (let (i len max) | |
1647 | (setq len 0 | |
1648 | max 0 | |
1649 | i 0) | |
1650 | (while (aref ritem i) | |
1651 | (if (> (aref ritem i) 0) | |
1652 | (setq len (1+ len)) | |
1653 | (if (> len max) | |
1654 | (setq max len)) | |
1655 | (setq len 0)) | |
1656 | (setq i (1+ i))) | |
1657 | (setq maxrhs max))) | |
1658 | ||
1659 | (defun wisent-initialize-LA () | |
1660 | "Set up LA." | |
1661 | (let (i j k count rp sp np v) | |
1662 | (setq consistent (make-vector nstates nil) | |
1663 | lookaheads (make-vector (1+ nstates) 0) | |
1664 | count 0 | |
1665 | i 0) | |
1666 | (while (< i nstates) | |
1667 | (aset lookaheads i count) | |
1668 | (setq rp (aref reduction-table i) | |
1669 | sp (aref shift-table i)) | |
1670 | ;; if (rp && | |
1671 | ;; (rp->nreds > 1 | |
1672 | ;; || (sp && ! ISVAR(accessing-symbol[sp->shifts[0]])))) | |
1673 | (if (and rp | |
1674 | (or (> (reductions-nreds rp) 1) | |
1675 | (and sp | |
1676 | (not (wisent-ISVAR | |
1677 | (aref accessing-symbol | |
1678 | (aref (shifts-shifts sp) 0))))))) | |
1679 | (setq count (+ count (reductions-nreds rp))) | |
1680 | (aset consistent i t)) | |
1681 | ||
1682 | (when sp | |
1683 | (setq k 0 | |
1684 | j (shifts-nshifts sp) | |
1685 | v (shifts-shifts sp)) | |
1686 | (while (< k j) | |
1687 | (when (= (aref accessing-symbol (aref v k)) | |
1688 | error-token-number) | |
1689 | (aset consistent i nil) | |
1690 | (setq k j)) ;; break | |
1691 | (setq k (1+ k)))) | |
1692 | (setq i (1+ i))) | |
1693 | ||
1694 | (aset lookaheads nstates count) | |
1695 | ||
1696 | (if (zerop count) | |
1697 | (progn | |
1698 | (setq LA (make-vector 1 nil) | |
1699 | LAruleno (make-vector 1 0) | |
1700 | lookback (make-vector 1 nil))) | |
1701 | (setq LA (make-vector count nil) | |
1702 | LAruleno (make-vector count 0) | |
1703 | lookback (make-vector count nil))) | |
1704 | (setq i 0 j (length LA)) | |
1705 | (while (< i j) | |
1706 | (aset LA i (make-vector tokensetsize 0)) | |
1707 | (setq i (1+ i))) | |
1708 | ||
1709 | (setq np 0 | |
1710 | i 0) | |
1711 | (while (< i nstates) | |
1712 | (when (not (aref consistent i)) | |
1713 | (setq rp (aref reduction-table i)) | |
1714 | (when rp | |
1715 | (setq j 0 | |
1716 | k (reductions-nreds rp) | |
1717 | v (reductions-rules rp)) | |
1718 | (while (< j k) | |
1719 | (aset LAruleno np (aref v j)) | |
1720 | (setq np (1+ np) | |
1721 | j (1+ j))))) | |
1722 | (setq i (1+ i))))) | |
1723 | ||
1724 | (defun wisent-set-goto-map () | |
1725 | "Set up GOTO-MAP." | |
1726 | (let (sp i j symbol k temp-map state1 state2 v) | |
1727 | (setq goto-map (make-vector (1+ nvars) 0) | |
1728 | temp-map (make-vector (1+ nvars) 0)) | |
1729 | ||
1730 | (setq ngotos 0 | |
1731 | sp first-shift) | |
1732 | (while sp | |
1733 | (setq i (1- (shifts-nshifts sp)) | |
1734 | v (shifts-shifts sp)) | |
1735 | (while (>= i 0) | |
1736 | (setq symbol (aref accessing-symbol (aref v i))) | |
1737 | (if (wisent-ISTOKEN symbol) | |
1738 | (setq i 0) ;; break | |
1739 | (setq ngotos (1+ ngotos)) | |
1740 | ;; goto-map[symbol]++; | |
1741 | (aset goto-map (- symbol ntokens) | |
1742 | (1+ (aref goto-map (- symbol ntokens))))) | |
1743 | (setq i (1- i))) | |
1744 | (setq sp (shifts-next sp))) | |
1745 | ||
1746 | (setq k 0 | |
1747 | i ntokens | |
1748 | j 0) | |
1749 | (while (< i nsyms) | |
1750 | (aset temp-map j k) | |
1751 | (setq k (+ k (aref goto-map j)) | |
1752 | i (1+ i) | |
1753 | j (1+ j))) | |
1754 | (setq i ntokens | |
1755 | j 0) | |
1756 | (while (< i nsyms) | |
1757 | (aset goto-map j (aref temp-map j)) | |
1758 | (setq i (1+ i) | |
1759 | j (1+ j))) | |
1760 | ;; goto-map[nsyms] = ngotos; | |
1761 | ;; temp-map[nsyms] = ngotos; | |
1762 | (aset goto-map j ngotos) | |
1763 | (aset temp-map j ngotos) | |
1764 | ||
1765 | (setq from-state (make-vector ngotos 0) | |
1766 | to-state (make-vector ngotos 0) | |
1767 | sp first-shift) | |
1768 | (while sp | |
1769 | (setq state1 (shifts-number sp) | |
1770 | v (shifts-shifts sp) | |
1771 | i (1- (shifts-nshifts sp))) | |
1772 | (while (>= i 0) | |
1773 | (setq state2 (aref v i) | |
1774 | symbol (aref accessing-symbol state2)) | |
1775 | (if (wisent-ISTOKEN symbol) | |
1776 | (setq i 0) ;; break | |
1777 | ;; k = temp-map[symbol]++; | |
1778 | (setq k (aref temp-map (- symbol ntokens))) | |
1779 | (aset temp-map (- symbol ntokens) (1+ k)) | |
1780 | (aset from-state k state1) | |
1781 | (aset to-state k state2)) | |
1782 | (setq i (1- i))) | |
1783 | (setq sp (shifts-next sp))) | |
1784 | )) | |
1785 | ||
1786 | (defun wisent-map-goto (state symbol) | |
1787 | "Map a STATE/SYMBOL pair into its numeric representation." | |
1788 | (let (high low middle s result) | |
1789 | ;; low = goto-map[symbol]; | |
1790 | ;; high = goto-map[symbol + 1] - 1; | |
1791 | (setq low (aref goto-map (- symbol ntokens)) | |
1792 | high (1- (aref goto-map (- (1+ symbol) ntokens)))) | |
1793 | (while (and (not result) (<= low high)) | |
1794 | (setq middle (/ (+ low high) 2) | |
1795 | s (aref from-state middle)) | |
1796 | (cond | |
1797 | ((= s state) | |
1798 | (setq result middle)) | |
1799 | ((< s state) | |
1800 | (setq low (1+ middle))) | |
1801 | (t | |
1802 | (setq high (1- middle))))) | |
1803 | (or result | |
1804 | (error "Internal error in `wisent-map-goto'")) | |
1805 | )) | |
1806 | ||
1807 | (defun wisent-initialize-F () | |
1808 | "Set up F." | |
1809 | (let (i j k sp edge rowp rp reads nedges stateno symbol v break) | |
1810 | (setq F (make-vector ngotos nil) | |
1811 | i 0) | |
1812 | (while (< i ngotos) | |
1813 | (aset F i (make-vector tokensetsize 0)) | |
1814 | (setq i (1+ i))) | |
1815 | ||
1816 | (setq reads (make-vector ngotos nil) | |
1817 | edge (make-vector (1+ ngotos) 0) | |
1818 | nedges 0 | |
1819 | rowp 0 ;; rowp = F | |
1820 | i 0) | |
1821 | (while (< i ngotos) | |
1822 | (setq stateno (aref to-state i) | |
1823 | sp (aref shift-table stateno)) | |
1824 | (when sp | |
1825 | (setq k (shifts-nshifts sp) | |
1826 | v (shifts-shifts sp) | |
1827 | j 0 | |
1828 | break nil) | |
1829 | (while (and (not break) (< j k)) | |
1830 | ;; symbol = accessing-symbol[sp->shifts[j]]; | |
1831 | (setq symbol (aref accessing-symbol (aref v j))) | |
1832 | (if (wisent-ISVAR symbol) | |
1833 | (setq break t) ;; break | |
1834 | (wisent-SETBIT (aref F rowp) symbol) | |
1835 | (setq j (1+ j)))) | |
1836 | ||
1837 | (while (< j k) | |
1838 | ;; symbol = accessing-symbol[sp->shifts[j]]; | |
1839 | (setq symbol (aref accessing-symbol (aref v j))) | |
1840 | (when (aref nullable (- symbol ntokens)) | |
1841 | (aset edge nedges (wisent-map-goto stateno symbol)) | |
1842 | (setq nedges (1+ nedges))) | |
1843 | (setq j (1+ j))) | |
1844 | ||
1845 | (when (> nedges 0) | |
1846 | ;; reads[i] = rp = NEW2(nedges + 1, short); | |
1847 | (setq rp (make-vector (1+ nedges) 0) | |
1848 | j 0) | |
1849 | (aset reads i rp) | |
1850 | (while (< j nedges) | |
1851 | ;; rp[j] = edge[j]; | |
1852 | (aset rp j (aref edge j)) | |
1853 | (setq j (1+ j))) | |
1854 | (aset rp nedges -1) | |
1855 | (setq nedges 0))) | |
1856 | (setq rowp (1+ rowp)) | |
1857 | (setq i (1+ i))) | |
1858 | (wisent-digraph reads) | |
1859 | )) | |
1860 | ||
1861 | (defun wisent-add-lookback-edge (stateno ruleno gotono) | |
1862 | "Add a lookback edge. | |
1863 | STATENO, RULENO, GOTONO are self-explanatory." | |
1864 | (let (i k found) | |
1865 | (setq i (aref lookaheads stateno) | |
1866 | k (aref lookaheads (1+ stateno)) | |
1867 | found nil) | |
1868 | (while (and (not found) (< i k)) | |
1869 | (if (= (aref LAruleno i) ruleno) | |
1870 | (setq found t) | |
1871 | (setq i (1+ i)))) | |
1872 | ||
1873 | (or found | |
1874 | (error "Internal error in `wisent-add-lookback-edge'")) | |
1875 | ||
1876 | ;; value . next | |
1877 | ;; lookback[i] = (gotono . lookback[i]) | |
1878 | (aset lookback i (cons gotono (aref lookback i))))) | |
1879 | ||
1880 | (defun wisent-transpose (R-arg n) | |
1881 | "Return the transpose of R-ARG, of size N. | |
1882 | Destroy R-ARG, as it is replaced with the result. R-ARG[I] is nil or | |
1883 | a -1 terminated list of numbers. RESULT[NUM] is nil or the -1 | |
1884 | terminated list of the I such as NUM is in R-ARG[I]." | |
1885 | (let (i j new-R end-R nedges v sp) | |
1886 | (setq new-R (make-vector n nil) | |
1887 | end-R (make-vector n nil) | |
1888 | nedges (make-vector n 0)) | |
1889 | ||
1890 | ;; Count. | |
1891 | (setq i 0) | |
1892 | (while (< i n) | |
1893 | (setq v (aref R-arg i)) | |
1894 | (when v | |
1895 | (setq j 0) | |
1896 | (while (>= (aref v j) 0) | |
1897 | (aset nedges (aref v j) (1+ (aref nedges (aref v j)))) | |
1898 | (setq j (1+ j)))) | |
1899 | (setq i (1+ i))) | |
1900 | ||
1901 | ;; Allocate. | |
1902 | (setq i 0) | |
1903 | (while (< i n) | |
1904 | (when (> (aref nedges i) 0) | |
1905 | (setq sp (make-vector (1+ (aref nedges i)) 0)) | |
1906 | (aset sp (aref nedges i) -1) | |
1907 | (aset new-R i sp) | |
1908 | (aset end-R i 0)) | |
1909 | (setq i (1+ i))) | |
1910 | ||
1911 | ;; Store. | |
1912 | (setq i 0) | |
1913 | (while (< i n) | |
1914 | (setq v (aref R-arg i)) | |
1915 | (when v | |
1916 | (setq j 0) | |
1917 | (while (>= (aref v j) 0) | |
1918 | (aset (aref new-R (aref v j)) (aref end-R (aref v j)) i) | |
1919 | (aset end-R (aref v j) (1+ (aref end-R (aref v j)))) | |
1920 | (setq j (1+ j)))) | |
1921 | (setq i (1+ i))) | |
1922 | ||
1923 | new-R)) | |
1924 | ||
1925 | (defun wisent-build-relations () | |
1926 | "Build relations." | |
1927 | (let (i j k rulep rp sp length nedges done state1 stateno | |
1928 | symbol1 symbol2 edge states v) | |
1929 | (setq includes (make-vector ngotos nil) | |
1930 | edge (make-vector (1+ ngotos) 0) | |
1931 | states (make-vector (1+ maxrhs) 0) | |
1932 | i 0) | |
1933 | ||
1934 | (while (< i ngotos) | |
1935 | (setq nedges 0 | |
1936 | state1 (aref from-state i) | |
1937 | symbol1 (aref accessing-symbol (aref to-state i)) | |
1938 | rulep (aref derives (- symbol1 ntokens))) | |
1939 | ||
1940 | (while (> (car rulep) 0) | |
1941 | (aset states 0 state1) | |
1942 | (setq length 1 | |
1943 | stateno state1 | |
1944 | rp (aref rrhs (car rulep))) ;; rp = ritem + rrhs[*rulep] | |
1945 | (while (> (aref ritem rp) 0) ;; *rp > 0 | |
1946 | (setq symbol2 (aref ritem rp) | |
1947 | sp (aref shift-table stateno) | |
1948 | k (shifts-nshifts sp) | |
1949 | v (shifts-shifts sp) | |
1950 | j 0) | |
1951 | (while (< j k) | |
1952 | (setq stateno (aref v j)) | |
1953 | (if (= (aref accessing-symbol stateno) symbol2) | |
1954 | (setq j k) ;; break | |
1955 | (setq j (1+ j)))) | |
1956 | ;; states[length++] = stateno; | |
1957 | (aset states length stateno) | |
1958 | (setq length (1+ length)) | |
1959 | (setq rp (1+ rp))) | |
1960 | ||
1961 | (if (not (aref consistent stateno)) | |
1962 | (wisent-add-lookback-edge stateno (car rulep) i)) | |
1963 | ||
1964 | (setq length (1- length) | |
1965 | done nil) | |
1966 | (while (not done) | |
1967 | (setq done t | |
1968 | rp (1- rp)) | |
1969 | (when (and (>= rp 0) (wisent-ISVAR (aref ritem rp))) | |
1970 | ;; stateno = states[--length]; | |
1971 | (setq length (1- length) | |
1972 | stateno (aref states length)) | |
1973 | (aset edge nedges (wisent-map-goto stateno (aref ritem rp))) | |
1974 | (setq nedges (1+ nedges)) | |
1975 | (if (aref nullable (- (aref ritem rp) ntokens)) | |
1976 | (setq done nil)))) | |
1977 | (setq rulep (cdr rulep))) | |
1978 | ||
1979 | (when (> nedges 0) | |
1980 | (setq v (make-vector (1+ nedges) 0) | |
1981 | j 0) | |
1982 | (aset includes i v) | |
1983 | (while (< j nedges) | |
1984 | (aset v j (aref edge j)) | |
1985 | (setq j (1+ j))) | |
1986 | (aset v nedges -1)) | |
1987 | (setq i (1+ i))) | |
1988 | ||
1989 | (setq includes (wisent-transpose includes ngotos)) | |
1990 | )) | |
1991 | ||
1992 | (defun wisent-compute-FOLLOWS () | |
1993 | "Compute follows." | |
1994 | (wisent-digraph includes)) | |
1995 | ||
1996 | (defun wisent-compute-lookaheads () | |
1997 | "Compute lookaheads." | |
1998 | (let (i j n v1 v2 sp) | |
1999 | (setq n (aref lookaheads nstates) | |
2000 | i 0) | |
2001 | (while (< i n) | |
2002 | (setq sp (aref lookback i)) | |
2003 | (while sp | |
2004 | (setq v1 (aref LA i) | |
2005 | v2 (aref F (car sp)) | |
2006 | j 0) | |
2007 | (while (< j tokensetsize) | |
2008 | ;; LA (i)[j] |= F (sp->value)[j] | |
2009 | (aset v1 j (logior (aref v1 j) (aref v2 j))) | |
2010 | (setq j (1+ j))) | |
2011 | (setq sp (cdr sp))) | |
2012 | (setq i (1+ i))))) | |
2013 | ||
2014 | (defun wisent-lalr () | |
2015 | "Make the nondeterministic finite state machine deterministic." | |
2016 | (setq tokensetsize (wisent-WORDSIZE ntokens)) | |
2017 | (wisent-set-state-table) | |
2018 | (wisent-set-accessing-symbol) | |
2019 | (wisent-set-shift-table) | |
2020 | (wisent-set-reduction-table) | |
2021 | (wisent-set-maxrhs) | |
2022 | (wisent-initialize-LA) | |
2023 | (wisent-set-goto-map) | |
2024 | (wisent-initialize-F) | |
2025 | (wisent-build-relations) | |
2026 | (wisent-compute-FOLLOWS) | |
2027 | (wisent-compute-lookaheads)) | |
2028 | \f | |
2029 | ;;;; ----------------------------------------------- | |
2030 | ;;;; Find and resolve or report look-ahead conflicts | |
2031 | ;;;; ----------------------------------------------- | |
2032 | ||
2033 | (defsubst wisent-log-resolution (state LAno token resolution) | |
2034 | "Log a shift-reduce conflict resolution. | |
2035 | In specified STATE between rule pointed by lookahead number LANO and | |
2036 | TOKEN, resolved as RESOLUTION." | |
2037 | (if (or wisent-verbose-flag wisent-debug-flag) | |
2038 | (wisent-log | |
2039 | "Conflict in state %d between rule %d and token %s resolved as %s.\n" | |
2040 | state (aref LAruleno LAno) (wisent-tag token) resolution))) | |
2041 | ||
2042 | (defun wisent-flush-shift (state token) | |
2043 | "Turn off the shift recorded in the specified STATE for TOKEN. | |
2044 | Used when we resolve a shift-reduce conflict in favor of the reduction." | |
2045 | (let (shiftp i k v) | |
2046 | (when (setq shiftp (aref shift-table state)) | |
2047 | (setq k (shifts-nshifts shiftp) | |
2048 | v (shifts-shifts shiftp) | |
2049 | i 0) | |
2050 | (while (< i k) | |
2051 | (if (and (not (zerop (aref v i))) | |
2052 | (= token (aref accessing-symbol (aref v i)))) | |
2053 | (aset v i 0)) | |
2054 | (setq i (1+ i)))))) | |
2055 | ||
2056 | (defun wisent-resolve-sr-conflict (state lookaheadnum) | |
2057 | "Attempt to resolve shift-reduce conflict for one rule. | |
2058 | Resolve by means of precedence declarations. The conflict occurred in | |
2059 | specified STATE for the rule pointed by the lookahead symbol | |
2060 | LOOKAHEADNUM. It has already been checked that the rule has a | |
2061 | precedence. A conflict is resolved by modifying the shift or reduce | |
2062 | tables so that there is no longer a conflict." | |
2063 | (let (i redprec errp errs nerrs token sprec sassoc) | |
2064 | ;; Find the rule to reduce by to get precedence of reduction | |
2065 | (setq token (aref tags (aref rprec (aref LAruleno lookaheadnum))) | |
2066 | redprec (wisent-prec token) | |
2067 | errp (make-errs) | |
2068 | errs (make-vector ntokens 0) | |
2069 | nerrs 0 | |
2070 | i 0) | |
2071 | (set-errs-errs errp errs) | |
2072 | (while (< i ntokens) | |
2073 | (setq token (aref tags i)) | |
2074 | (when (and (wisent-BITISSET (aref LA lookaheadnum) i) | |
2075 | (wisent-BITISSET lookaheadset i) | |
2076 | (setq sprec (wisent-prec token))) | |
2077 | ;; Shift-reduce conflict occurs for token number I and it has | |
2078 | ;; a precedence. The precedence of shifting is that of token | |
2079 | ;; I. | |
2080 | (cond | |
2081 | ((< sprec redprec) | |
2082 | (wisent-log-resolution state lookaheadnum i "reduce") | |
2083 | ;; Flush the shift for this token | |
2084 | (wisent-RESETBIT lookaheadset i) | |
2085 | (wisent-flush-shift state i) | |
2086 | ) | |
2087 | ((> sprec redprec) | |
2088 | (wisent-log-resolution state lookaheadnum i "shift") | |
2089 | ;; Flush the reduce for this token | |
2090 | (wisent-RESETBIT (aref LA lookaheadnum) i) | |
2091 | ) | |
2092 | (t | |
2093 | ;; Matching precedence levels. | |
2094 | ;; For left association, keep only the reduction. | |
2095 | ;; For right association, keep only the shift. | |
2096 | ;; For nonassociation, keep neither. | |
2097 | (setq sassoc (wisent-assoc token)) | |
2098 | (cond | |
2099 | ((eq sassoc 'right) | |
2100 | (wisent-log-resolution state lookaheadnum i "shift")) | |
2101 | ((eq sassoc 'left) | |
2102 | (wisent-log-resolution state lookaheadnum i "reduce")) | |
2103 | ((eq sassoc 'nonassoc) | |
2104 | (wisent-log-resolution state lookaheadnum i "an error")) | |
2105 | ) | |
2106 | (when (not (eq sassoc 'right)) | |
2107 | ;; Flush the shift for this token | |
2108 | (wisent-RESETBIT lookaheadset i) | |
2109 | (wisent-flush-shift state i)) | |
2110 | (when (not (eq sassoc 'left)) | |
2111 | ;; Flush the reduce for this token | |
2112 | (wisent-RESETBIT (aref LA lookaheadnum) i)) | |
2113 | (when (eq sassoc 'nonassoc) | |
2114 | ;; Record an explicit error for this token | |
2115 | (aset errs nerrs i) | |
2116 | (setq nerrs (1+ nerrs))) | |
2117 | ))) | |
2118 | (setq i (1+ i))) | |
2119 | (when (> nerrs 0) | |
2120 | (set-errs-nerrs errp nerrs) | |
2121 | (aset err-table state errp)) | |
2122 | )) | |
2123 | ||
2124 | (defun wisent-set-conflicts (state) | |
2125 | "Find and attempt to resolve conflicts in specified STATE." | |
2126 | (let (i j k v shiftp symbol) | |
2127 | (unless (aref consistent state) | |
2128 | (fillarray lookaheadset 0) | |
2129 | ||
2130 | (when (setq shiftp (aref shift-table state)) | |
2131 | (setq k (shifts-nshifts shiftp) | |
2132 | v (shifts-shifts shiftp) | |
2133 | i 0) | |
2134 | (while (and (< i k) | |
2135 | (wisent-ISTOKEN | |
2136 | (setq symbol (aref accessing-symbol (aref v i))))) | |
2137 | (or (zerop (aref v i)) | |
2138 | (wisent-SETBIT lookaheadset symbol)) | |
2139 | (setq i (1+ i)))) | |
2140 | ||
2141 | ;; Loop over all rules which require lookahead in this state | |
2142 | ;; first check for shift-reduce conflict, and try to resolve | |
2143 | ;; using precedence | |
2144 | (setq i (aref lookaheads state) | |
2145 | k (aref lookaheads (1+ state))) | |
2146 | (while (< i k) | |
2147 | (when (aref rprec (aref LAruleno i)) | |
2148 | (setq v (aref LA i) | |
2149 | j 0) | |
2150 | (while (< j tokensetsize) | |
2151 | (if (zerop (logand (aref v j) (aref lookaheadset j))) | |
2152 | (setq j (1+ j)) | |
2153 | ;; if (LA (i)[j] & lookaheadset[j]) | |
2154 | (wisent-resolve-sr-conflict state i) | |
2155 | (setq j tokensetsize)))) ;; break | |
2156 | (setq i (1+ i))) | |
2157 | ||
2158 | ;; Loop over all rules which require lookahead in this state | |
2159 | ;; Check for conflicts not resolved above. | |
2160 | (setq i (aref lookaheads state)) | |
2161 | (while (< i k) | |
2162 | (setq v (aref LA i) | |
2163 | j 0) | |
2164 | (while (< j tokensetsize) | |
2165 | ;; if (LA (i)[j] & lookaheadset[j]) | |
2166 | (if (not (zerop (logand (aref v j) (aref lookaheadset j)))) | |
2167 | (aset conflicts state t)) | |
2168 | (setq j (1+ j))) | |
2169 | (setq j 0) | |
2170 | (while (< j tokensetsize) | |
2171 | ;; lookaheadset[j] |= LA (i)[j]; | |
2172 | (aset lookaheadset j (logior (aref lookaheadset j) | |
2173 | (aref v j))) | |
2174 | (setq j (1+ j))) | |
2175 | (setq i (1+ i))) | |
2176 | ))) | |
2177 | ||
2178 | (defun wisent-resolve-conflicts () | |
2179 | "Find and resolve conflicts." | |
2180 | (let (i) | |
2181 | (setq conflicts (make-vector nstates nil) | |
2182 | shiftset (make-vector tokensetsize 0) | |
2183 | lookaheadset (make-vector tokensetsize 0) | |
2184 | err-table (make-vector nstates nil) | |
2185 | i 0) | |
2186 | (while (< i nstates) | |
2187 | (wisent-set-conflicts i) | |
2188 | (setq i (1+ i))))) | |
2189 | ||
2190 | (defun wisent-count-sr-conflicts (state) | |
2191 | "Count the number of shift/reduce conflicts in specified STATE." | |
2192 | (let (i j k shiftp symbol v) | |
2193 | (setq src-count 0 | |
2194 | shiftp (aref shift-table state)) | |
2195 | (when shiftp | |
2196 | (fillarray shiftset 0) | |
2197 | (fillarray lookaheadset 0) | |
2198 | (setq k (shifts-nshifts shiftp) | |
2199 | v (shifts-shifts shiftp) | |
2200 | i 0) | |
2201 | (while (< i k) | |
2202 | (when (not (zerop (aref v i))) | |
2203 | (setq symbol (aref accessing-symbol (aref v i))) | |
2204 | (if (wisent-ISVAR symbol) | |
2205 | (setq i k) ;; break | |
2206 | (wisent-SETBIT shiftset symbol))) | |
2207 | (setq i (1+ i))) | |
2208 | ||
2209 | (setq k (aref lookaheads (1+ state)) | |
2210 | i (aref lookaheads state)) | |
2211 | (while (< i k) | |
2212 | (setq v (aref LA i) | |
2213 | j 0) | |
2214 | (while (< j tokensetsize) | |
2215 | ;; lookaheadset[j] |= LA (i)[j] | |
2216 | (aset lookaheadset j (logior (aref lookaheadset j) | |
2217 | (aref v j))) | |
2218 | (setq j (1+ j))) | |
2219 | (setq i (1+ i))) | |
2220 | ||
2221 | (setq k 0) | |
2222 | (while (< k tokensetsize) | |
2223 | ;; lookaheadset[k] &= shiftset[k]; | |
2224 | (aset lookaheadset k (logand (aref lookaheadset k) | |
2225 | (aref shiftset k))) | |
2226 | (setq k (1+ k))) | |
2227 | ||
2228 | (setq i 0) | |
2229 | (while (< i ntokens) | |
2230 | (if (wisent-BITISSET lookaheadset i) | |
2231 | (setq src-count (1+ src-count))) | |
2232 | (setq i (1+ i)))) | |
2233 | src-count)) | |
2234 | ||
2235 | (defun wisent-count-rr-conflicts (state) | |
2236 | "Count the number of reduce/reduce conflicts in specified STATE." | |
2237 | (let (i j count n m) | |
2238 | (setq rrc-count 0 | |
2239 | m (aref lookaheads state) | |
2240 | n (aref lookaheads (1+ state))) | |
2241 | (when (>= (- n m) 2) | |
2242 | (setq i 0) | |
2243 | (while (< i ntokens) | |
2244 | (setq count 0 | |
2245 | j m) | |
2246 | (while (< j n) | |
2247 | (if (wisent-BITISSET (aref LA j) i) | |
2248 | (setq count (1+ count))) | |
2249 | (setq j (1+ j))) | |
2250 | ||
2251 | (if (>= count 2) | |
2252 | (setq rrc-count (1+ rrc-count))) | |
2253 | (setq i (1+ i)))) | |
2254 | rrc-count)) | |
2255 | ||
2256 | (defvar wisent-expected-conflicts nil | |
2257 | "*If non-nil suppress the warning about shift/reduce conflicts. | |
2258 | It is a decimal integer N that says there should be no warning if | |
2259 | there are N shift/reduce conflicts and no reduce/reduce conflicts. A | |
2260 | warning is given if there are either more or fewer conflicts, or if | |
2261 | there are any reduce/reduce conflicts.") | |
2262 | ||
2263 | (defun wisent-total-conflicts () | |
2264 | "Report the total number of conflicts." | |
2265 | (unless (and (zerop rrc-total) | |
2266 | (or (zerop src-total) | |
2267 | (= src-total (or wisent-expected-conflicts 0)))) | |
2268 | (let* ((src (wisent-source)) | |
2269 | (src (if src (concat " in " src) "")) | |
2270 | (msg (format "Grammar%s contains" src))) | |
2271 | (if (> src-total 0) | |
2272 | (setq msg (format "%s %d shift/reduce conflict%s" | |
2273 | msg src-total (if (> src-total 1) | |
2274 | "s" "")))) | |
2275 | (if (and (> src-total 0) (> rrc-total 0)) | |
2276 | (setq msg (format "%s and" msg))) | |
2277 | (if (> rrc-total 0) | |
2278 | (setq msg (format "%s %d reduce/reduce conflict%s" | |
2279 | msg rrc-total (if (> rrc-total 1) | |
2280 | "s" "")))) | |
2281 | (message msg)))) | |
2282 | ||
2283 | (defun wisent-print-conflicts () | |
2284 | "Report conflicts." | |
2285 | (let (i) | |
2286 | (setq src-total 0 | |
2287 | rrc-total 0 | |
2288 | i 0) | |
2289 | (while (< i nstates) | |
2290 | (when (aref conflicts i) | |
2291 | (wisent-count-sr-conflicts i) | |
2292 | (wisent-count-rr-conflicts i) | |
2293 | (setq src-total (+ src-total src-count) | |
2294 | rrc-total (+ rrc-total rrc-count)) | |
2295 | (when (or wisent-verbose-flag wisent-debug-flag) | |
2296 | (wisent-log "State %d contains" i) | |
2297 | (if (> src-count 0) | |
2298 | (wisent-log " %d shift/reduce conflict%s" | |
2299 | src-count (if (> src-count 1) "s" ""))) | |
2300 | ||
2301 | (if (and (> src-count 0) (> rrc-count 0)) | |
2302 | (wisent-log " and")) | |
2303 | ||
2304 | (if (> rrc-count 0) | |
2305 | (wisent-log " %d reduce/reduce conflict%s" | |
2306 | rrc-count (if (> rrc-count 1) "s" ""))) | |
2307 | ||
2308 | (wisent-log ".\n"))) | |
2309 | (setq i (1+ i))) | |
2310 | (wisent-total-conflicts))) | |
2311 | \f | |
2312 | ;;;; -------------------------------------- | |
2313 | ;;;; Report information on generated parser | |
2314 | ;;;; -------------------------------------- | |
2315 | (defun wisent-print-grammar () | |
2316 | "Print grammar." | |
2317 | (let (i j r break left-count right-count) | |
2318 | ||
2319 | (wisent-log "\n\nGrammar\n\n Number, Rule\n") | |
2320 | (setq i 1) | |
2321 | (while (<= i nrules) | |
2322 | ;; Don't print rules disabled in `wisent-reduce-grammar-tables'. | |
2323 | (when (aref ruseful i) | |
2324 | (wisent-log " %s %s ->" | |
2325 | (wisent-pad-string (number-to-string i) 6) | |
2326 | (wisent-tag (aref rlhs i))) | |
2327 | (setq r (aref rrhs i)) | |
2328 | (if (> (aref ritem r) 0) | |
2329 | (while (> (aref ritem r) 0) | |
2330 | (wisent-log " %s" (wisent-tag (aref ritem r))) | |
2331 | (setq r (1+ r))) | |
2332 | (wisent-log " /* empty */")) | |
2333 | (wisent-log "\n")) | |
2334 | (setq i (1+ i))) | |
2335 | ||
2336 | (wisent-log "\n\nTerminals, with rules where they appear\n\n") | |
2337 | (wisent-log "%s (-1)\n" (wisent-tag 0)) | |
2338 | (setq i 1) | |
2339 | (while (< i ntokens) | |
2340 | (wisent-log "%s (%d)" (wisent-tag i) i) | |
2341 | (setq j 1) | |
2342 | (while (<= j nrules) | |
2343 | (setq r (aref rrhs j) | |
2344 | break nil) | |
2345 | (while (and (not break) (> (aref ritem r) 0)) | |
2346 | (if (setq break (= (aref ritem r) i)) | |
2347 | (wisent-log " %d" j) | |
2348 | (setq r (1+ r)))) | |
2349 | (setq j (1+ j))) | |
2350 | (wisent-log "\n") | |
2351 | (setq i (1+ i))) | |
2352 | ||
2353 | (wisent-log "\n\nNonterminals, with rules where they appear\n\n") | |
2354 | (setq i ntokens) | |
2355 | (while (< i nsyms) | |
2356 | (setq left-count 0 | |
2357 | right-count 0 | |
2358 | j 1) | |
2359 | (while (<= j nrules) | |
2360 | (if (= (aref rlhs j) i) | |
2361 | (setq left-count (1+ left-count))) | |
2362 | (setq r (aref rrhs j) | |
2363 | break nil) | |
2364 | (while (and (not break) (> (aref ritem r) 0)) | |
2365 | (if (= (aref ritem r) i) | |
2366 | (setq right-count (1+ right-count) | |
2367 | break t) | |
2368 | (setq r (1+ r)))) | |
2369 | (setq j (1+ j))) | |
2370 | (wisent-log "%s (%d)\n " (wisent-tag i) i) | |
2371 | (when (> left-count 0) | |
2372 | (wisent-log " on left:") | |
2373 | (setq j 1) | |
2374 | (while (<= j nrules) | |
2375 | (if (= (aref rlhs j) i) | |
2376 | (wisent-log " %d" j)) | |
2377 | (setq j (1+ j)))) | |
2378 | (when (> right-count 0) | |
2379 | (if (> left-count 0) | |
2380 | (wisent-log ",")) | |
2381 | (wisent-log " on right:") | |
2382 | (setq j 1) | |
2383 | (while (<= j nrules) | |
2384 | (setq r (aref rrhs j) | |
2385 | break nil) | |
2386 | (while (and (not break) (> (aref ritem r) 0)) | |
2387 | (if (setq break (= (aref ritem r) i)) | |
2388 | (wisent-log " %d" j) | |
2389 | (setq r (1+ r)))) | |
2390 | (setq j (1+ j)))) | |
2391 | (wisent-log "\n") | |
2392 | (setq i (1+ i))) | |
2393 | )) | |
2394 | ||
2395 | (defun wisent-print-reductions (state) | |
2396 | "Print reductions on STATE." | |
2397 | (let (i j k v symbol m n defaulted | |
2398 | default-LA default-rule cmax count shiftp errp nodefault) | |
2399 | (setq nodefault nil | |
2400 | i 0) | |
2401 | (fillarray shiftset 0) | |
2402 | ||
2403 | (setq shiftp (aref shift-table state)) | |
2404 | (when shiftp | |
2405 | (setq k (shifts-nshifts shiftp) | |
2406 | v (shifts-shifts shiftp) | |
2407 | i 0) | |
2408 | (while (< i k) | |
2409 | (when (not (zerop (aref v i))) | |
2410 | (setq symbol (aref accessing-symbol (aref v i))) | |
2411 | (if (wisent-ISVAR symbol) | |
2412 | (setq i k) ;; break | |
2413 | ;; If this state has a shift for the error token, don't | |
2414 | ;; use a default rule. | |
2415 | (if (= symbol error-token-number) | |
2416 | (setq nodefault t)) | |
2417 | (wisent-SETBIT shiftset symbol))) | |
2418 | (setq i (1+ i)))) | |
2419 | ||
2420 | (setq errp (aref err-table state)) | |
2421 | (when errp | |
2422 | (setq k (errs-nerrs errp) | |
2423 | v (errs-errs errp) | |
2424 | i 0) | |
2425 | (while (< i k) | |
2426 | (if (not (zerop (setq symbol (aref v i)))) | |
2427 | (wisent-SETBIT shiftset symbol)) | |
2428 | (setq i (1+ i)))) | |
2429 | ||
2430 | (setq m (aref lookaheads state) | |
2431 | n (aref lookaheads (1+ state))) | |
2432 | ||
2433 | (cond | |
2434 | ((and (= (- n m) 1) (not nodefault)) | |
2435 | (setq default-rule (aref LAruleno m) | |
2436 | v (aref LA m) | |
2437 | k 0) | |
2438 | (while (< k tokensetsize) | |
2439 | (aset lookaheadset k (logand (aref v k) | |
2440 | (aref shiftset k))) | |
2441 | (setq k (1+ k))) | |
2442 | ||
2443 | (setq i 0) | |
2444 | (while (< i ntokens) | |
2445 | (if (wisent-BITISSET lookaheadset i) | |
2446 | (wisent-log " %s\t[reduce using rule %d (%s)]\n" | |
2447 | (wisent-tag i) default-rule | |
2448 | (wisent-tag (aref rlhs default-rule)))) | |
2449 | (setq i (1+ i))) | |
2450 | (wisent-log " $default\treduce using rule %d (%s)\n\n" | |
2451 | default-rule | |
2452 | (wisent-tag (aref rlhs default-rule))) | |
2453 | ) | |
2454 | ((>= (- n m) 1) | |
2455 | (setq cmax 0 | |
2456 | default-LA -1 | |
2457 | default-rule 0) | |
2458 | (when (not nodefault) | |
2459 | (setq i m) | |
2460 | (while (< i n) | |
2461 | (setq v (aref LA i) | |
2462 | count 0 | |
2463 | k 0) | |
2464 | (while (< k tokensetsize) | |
2465 | ;; lookaheadset[k] = LA (i)[k] & ~shiftset[k] | |
2466 | (aset lookaheadset k | |
2467 | (logand (aref v k) | |
2468 | (lognot (aref shiftset k)))) | |
2469 | (setq k (1+ k))) | |
2470 | (setq j 0) | |
2471 | (while (< j ntokens) | |
2472 | (if (wisent-BITISSET lookaheadset j) | |
2473 | (setq count (1+ count))) | |
2474 | (setq j (1+ j))) | |
2475 | (if (> count cmax) | |
2476 | (setq cmax count | |
2477 | default-LA i | |
2478 | default-rule (aref LAruleno i))) | |
2479 | (setq k 0) | |
2480 | (while (< k tokensetsize) | |
2481 | (aset shiftset k (logior (aref shiftset k) | |
2482 | (aref lookaheadset k))) | |
2483 | (setq k (1+ k))) | |
2484 | (setq i (1+ i)))) | |
2485 | ||
2486 | (fillarray shiftset 0) | |
2487 | ||
2488 | (when shiftp | |
2489 | (setq k (shifts-nshifts shiftp) | |
2490 | v (shifts-shifts shiftp) | |
2491 | i 0) | |
2492 | (while (< i k) | |
2493 | (when (not (zerop (aref v i))) | |
2494 | (setq symbol (aref accessing-symbol (aref v i))) | |
2495 | (if (wisent-ISVAR symbol) | |
2496 | (setq i k) ;; break | |
2497 | (wisent-SETBIT shiftset symbol))) | |
2498 | (setq i (1+ i)))) | |
2499 | ||
2500 | (setq i 0) | |
2501 | (while (< i ntokens) | |
2502 | (setq defaulted nil | |
2503 | count (if (wisent-BITISSET shiftset i) 1 0) | |
2504 | j m) | |
2505 | (while (< j n) | |
2506 | (when (wisent-BITISSET (aref LA j) i) | |
2507 | (if (zerop count) | |
2508 | (progn | |
2509 | (if (not (= j default-LA)) | |
2510 | (wisent-log | |
2511 | " %s\treduce using rule %d (%s)\n" | |
2512 | (wisent-tag i) (aref LAruleno j) | |
2513 | (wisent-tag (aref rlhs (aref LAruleno j)))) | |
2514 | (setq defaulted t)) | |
2515 | (setq count (1+ count))) | |
2516 | (if defaulted | |
2517 | (wisent-log | |
2518 | " %s\treduce using rule %d (%s)\n" | |
2519 | (wisent-tag i) (aref LAruleno default-LA) | |
2520 | (wisent-tag (aref rlhs (aref LAruleno default-LA))))) | |
2521 | (setq defaulted nil) | |
2522 | (wisent-log | |
2523 | " %s\t[reduce using rule %d (%s)]\n" | |
2524 | (wisent-tag i) (aref LAruleno j) | |
2525 | (wisent-tag (aref rlhs (aref LAruleno j)))))) | |
2526 | (setq j (1+ j))) | |
2527 | (setq i (1+ i))) | |
2528 | ||
2529 | (if (>= default-LA 0) | |
2530 | (wisent-log | |
2531 | " $default\treduce using rule %d (%s)\n" | |
2532 | default-rule | |
2533 | (wisent-tag (aref rlhs default-rule)))) | |
2534 | )))) | |
2535 | ||
2536 | (defun wisent-print-actions (state) | |
2537 | "Print actions on STATE." | |
2538 | (let (i j k v state1 symbol shiftp errp redp rule nerrs break) | |
2539 | (setq shiftp (aref shift-table state) | |
2540 | redp (aref reduction-table state) | |
2541 | errp (aref err-table state)) | |
2542 | (if (and (not shiftp) (not redp)) | |
2543 | (if (= final-state state) | |
2544 | (wisent-log " $default\taccept\n") | |
2545 | (wisent-log " NO ACTIONS\n")) | |
2546 | (if (not shiftp) | |
2547 | (setq i 0 | |
2548 | k 0) | |
2549 | (setq k (shifts-nshifts shiftp) | |
2550 | v (shifts-shifts shiftp) | |
2551 | i 0 | |
2552 | break nil) | |
2553 | (while (and (not break) (< i k)) | |
2554 | (if (zerop (setq state1 (aref v i))) | |
2555 | (setq i (1+ i)) | |
2556 | (setq symbol (aref accessing-symbol state1)) | |
2557 | ;; The following line used to be turned off. | |
2558 | (if (wisent-ISVAR symbol) | |
2559 | (setq break t) ;; break | |
2560 | (wisent-log " %s\tshift, and go to state %d\n" | |
2561 | (wisent-tag symbol) state1) | |
2562 | (setq i (1+ i))))) | |
2563 | (if (> i 0) | |
2564 | (wisent-log "\n"))) | |
2565 | ||
2566 | (when errp | |
2567 | (setq nerrs (errs-nerrs errp) | |
2568 | v (errs-errs errp) | |
2569 | j 0) | |
2570 | (while (< j nerrs) | |
2571 | (if (aref v j) | |
2572 | (wisent-log " %s\terror (nonassociative)\n" | |
2573 | (wisent-tag (aref v j)))) | |
2574 | (setq j (1+ j))) | |
2575 | (if (> j 0) | |
2576 | (wisent-log "\n"))) | |
2577 | ||
2578 | (cond | |
2579 | ((and (aref consistent state) redp) | |
2580 | (setq rule (aref (reductions-rules redp) 0) | |
2581 | symbol (aref rlhs rule)) | |
2582 | (wisent-log " $default\treduce using rule %d (%s)\n\n" | |
2583 | rule (wisent-tag symbol)) | |
2584 | ) | |
2585 | (redp | |
2586 | (wisent-print-reductions state) | |
2587 | )) | |
2588 | ||
2589 | (when (< i k) | |
2590 | (setq v (shifts-shifts shiftp)) | |
2591 | (while (< i k) | |
2592 | (when (setq state1 (aref v i)) | |
2593 | (setq symbol (aref accessing-symbol state1)) | |
2594 | (wisent-log " %s\tgo to state %d\n" | |
2595 | (wisent-tag symbol) state1)) | |
2596 | (setq i (1+ i))) | |
2597 | (wisent-log "\n")) | |
2598 | ))) | |
2599 | ||
2600 | (defun wisent-print-core (state) | |
2601 | "Print STATE core." | |
2602 | (let (i k rule statep sp sp1) | |
2603 | (setq statep (aref state-table state) | |
2604 | k (core-nitems statep)) | |
2605 | (when (> k 0) | |
2606 | (setq i 0) | |
2607 | (while (< i k) | |
2608 | ;; sp1 = sp = ritem + statep->items[i]; | |
2609 | (setq sp1 (aref (core-items statep) i) | |
2610 | sp sp1) | |
2611 | (while (> (aref ritem sp) 0) | |
2612 | (setq sp (1+ sp))) | |
2613 | ||
2614 | (setq rule (- (aref ritem sp))) | |
2615 | (wisent-log " %s -> " (wisent-tag (aref rlhs rule))) | |
2616 | ||
2617 | (setq sp (aref rrhs rule)) | |
2618 | (while (< sp sp1) | |
2619 | (wisent-log "%s " (wisent-tag (aref ritem sp))) | |
2620 | (setq sp (1+ sp))) | |
2621 | (wisent-log ".") | |
2622 | (while (> (aref ritem sp) 0) | |
2623 | (wisent-log " %s" (wisent-tag (aref ritem sp))) | |
2624 | (setq sp (1+ sp))) | |
2625 | (wisent-log " (rule %d)\n" rule) | |
2626 | (setq i (1+ i))) | |
2627 | (wisent-log "\n")))) | |
2628 | ||
2629 | (defun wisent-print-state (state) | |
2630 | "Print information on STATE." | |
2631 | (wisent-log "\n\nstate %d\n\n" state) | |
2632 | (wisent-print-core state) | |
2633 | (wisent-print-actions state)) | |
2634 | ||
2635 | (defun wisent-print-states () | |
2636 | "Print information on states." | |
2637 | (let ((i 0)) | |
2638 | (while (< i nstates) | |
2639 | (wisent-print-state i) | |
2640 | (setq i (1+ i))))) | |
2641 | ||
2642 | (defun wisent-print-results () | |
2643 | "Print information on generated parser. | |
2644 | Report detailed informations if `wisent-verbose-flag' or | |
2645 | `wisent-debug-flag' are non-nil." | |
2646 | (when (or wisent-verbose-flag wisent-debug-flag) | |
2647 | (wisent-print-useless)) | |
2648 | (wisent-print-conflicts) | |
2649 | (when (or wisent-verbose-flag wisent-debug-flag) | |
2650 | (wisent-print-grammar) | |
2651 | (wisent-print-states)) | |
2652 | ;; Append output to log file when running in batch mode | |
2653 | (when (wisent-noninteractive) | |
2654 | (wisent-append-to-log-file) | |
2655 | (wisent-clear-log))) | |
2656 | \f | |
2657 | ;;;; --------------------------------- | |
2658 | ;;;; Build the generated parser tables | |
2659 | ;;;; --------------------------------- | |
2660 | ||
2661 | (defun wisent-action-row (state actrow) | |
2662 | "Figure out the actions for the specified STATE. | |
2663 | Decide what to do for each type of token if seen as the lookahead | |
2664 | token in specified state. The value returned is used as the default | |
2665 | action for the state. In addition, ACTROW is filled with what to do | |
2666 | for each kind of token, index by symbol number, with nil meaning do | |
2667 | the default action. The value 'error, means this situation is an | |
2668 | error. The parser recognizes this value specially. | |
2669 | ||
2670 | This is where conflicts are resolved. The loop over lookahead rules | |
2671 | considered lower-numbered rules last, and the last rule considered | |
2672 | that likes a token gets to handle it." | |
2673 | (let (i j k m n v default-rule nreds rule max count | |
2674 | shift-state symbol redp shiftp errp nodefault) | |
2675 | ||
2676 | (fillarray actrow nil) | |
2677 | ||
2678 | (setq default-rule 0 | |
2679 | nodefault nil ;; nil inhibit having any default reduction | |
2680 | nreds 0 | |
2681 | m 0 | |
2682 | n 0 | |
2683 | redp (aref reduction-table state)) | |
2684 | ||
2685 | (when redp | |
2686 | (setq nreds (reductions-nreds redp)) | |
2687 | (when (>= nreds 1) | |
2688 | ;; loop over all the rules available here which require | |
2689 | ;; lookahead | |
2690 | (setq m (aref lookaheads state) | |
2691 | n (aref lookaheads (1+ state)) | |
2692 | i (1- n)) | |
2693 | (while (>= i m) | |
2694 | ;; and find each token which the rule finds acceptable to | |
2695 | ;; come next | |
2696 | (setq j 0) | |
2697 | (while (< j ntokens) | |
2698 | ;; and record this rule as the rule to use if that token | |
2699 | ;; follows. | |
2700 | (if (wisent-BITISSET (aref LA i) j) | |
2701 | (aset actrow j (- (aref LAruleno i))) | |
2702 | ) | |
2703 | (setq j (1+ j))) | |
2704 | (setq i (1- i))))) | |
2705 | ||
2706 | ;; Now see which tokens are allowed for shifts in this state. For | |
2707 | ;; them, record the shift as the thing to do. So shift is | |
2708 | ;; preferred to reduce. | |
2709 | (setq shiftp (aref shift-table state)) | |
2710 | (when shiftp | |
2711 | (setq k (shifts-nshifts shiftp) | |
2712 | v (shifts-shifts shiftp) | |
2713 | i 0) | |
2714 | (while (< i k) | |
2715 | (setq shift-state (aref v i)) | |
2716 | (if (zerop shift-state) | |
2717 | nil ;; continue | |
2718 | (setq symbol (aref accessing-symbol shift-state)) | |
2719 | (if (wisent-ISVAR symbol) | |
2720 | (setq i k) ;; break | |
2721 | (aset actrow symbol shift-state) | |
2722 | ;; Do not use any default reduction if there is a shift | |
2723 | ;; for error | |
2724 | (if (= symbol error-token-number) | |
2725 | (setq nodefault t)))) | |
2726 | (setq i (1+ i)))) | |
2727 | ||
2728 | ;; See which tokens are an explicit error in this state (due to | |
2729 | ;; %nonassoc). For them, record error as the action. | |
2730 | (setq errp (aref err-table state)) | |
2731 | (when errp | |
2732 | (setq k (errs-nerrs errp) | |
2733 | v (errs-errs errp) | |
2734 | i 0) | |
2735 | (while (< i k) | |
2736 | (aset actrow (aref v i) wisent-error-tag) | |
2737 | (setq i (1+ i)))) | |
2738 | ||
2739 | ;; Now find the most common reduction and make it the default | |
2740 | ;; action for this state. | |
2741 | (when (and (>= nreds 1) (not nodefault)) | |
2742 | (if (aref consistent state) | |
2743 | (setq default-rule (- (aref (reductions-rules redp) 0))) | |
2744 | (setq max 0 | |
2745 | i m) | |
2746 | (while (< i n) | |
2747 | (setq count 0 | |
2748 | rule (- (aref LAruleno i)) | |
2749 | j 0) | |
2750 | (while (< j ntokens) | |
2751 | (if (and (numberp (aref actrow j)) | |
2752 | (= (aref actrow j) rule)) | |
2753 | (setq count (1+ count))) | |
2754 | (setq j (1+ j))) | |
2755 | (if (> count max) | |
2756 | (setq max count | |
2757 | default-rule rule)) | |
2758 | (setq i (1+ i))) | |
2759 | ;; actions which match the default are replaced with zero, | |
2760 | ;; which means "use the default" | |
2761 | (when (> max 0) | |
2762 | (setq j 0) | |
2763 | (while (< j ntokens) | |
2764 | (if (and (numberp (aref actrow j)) | |
2765 | (= (aref actrow j) default-rule)) | |
2766 | (aset actrow j nil)) | |
2767 | (setq j (1+ j))) | |
2768 | ))) | |
2769 | ||
2770 | ;; If have no default rule, if this is the final state the default | |
2771 | ;; is accept else it is an error. So replace any action which | |
2772 | ;; says "error" with "use default". | |
2773 | (when (zerop default-rule) | |
2774 | (if (= final-state state) | |
2775 | (setq default-rule wisent-accept-tag) | |
2776 | (setq j 0) | |
2777 | (while (< j ntokens) | |
2778 | (if (eq (aref actrow j) wisent-error-tag) | |
2779 | (aset actrow j nil)) | |
2780 | (setq j (1+ j))) | |
2781 | (setq default-rule wisent-error-tag))) | |
2782 | default-rule)) | |
2783 | ||
2784 | (defconst wisent-default-tag 'default | |
2785 | "Tag used in an action table to indicate a default action.") | |
2786 | ||
2787 | ;; These variables only exist locally in the function | |
2788 | ;; `wisent-state-actions' and are shared by all other nested callees. | |
2789 | (wisent-defcontext semantic-actions | |
2790 | ;; Uninterned symbols used in code generation. | |
2791 | stack sp gotos state | |
2792 | ;; Name of the current semantic action | |
2793 | NAME) | |
2794 | ||
2795 | (defun wisent-state-actions () | |
2796 | "Figure out the actions for every state. | |
2797 | Return the action table." | |
2798 | ;; Store the semantic action obarray in (unused) RCODE[0]. | |
2799 | (aset rcode 0 (make-vector 13 0)) | |
2800 | (let (i j action-table actrow action) | |
2801 | (setq action-table (make-vector nstates nil) | |
2802 | actrow (make-vector ntokens nil) | |
2803 | i 0) | |
2804 | (wisent-with-context semantic-actions | |
2805 | (setq stack (make-symbol "stack") | |
2806 | sp (make-symbol "sp") | |
2807 | gotos (make-symbol "gotos") | |
2808 | state (make-symbol "state")) | |
2809 | (while (< i nstates) | |
2810 | (setq action (wisent-action-row i actrow)) | |
2811 | ;; Translate a reduction into semantic action | |
2812 | (and (integerp action) (< action 0) | |
2813 | (setq action (wisent-semantic-action (- action)))) | |
2814 | (aset action-table i (list (cons wisent-default-tag action))) | |
2815 | (setq j 0) | |
2816 | (while (< j ntokens) | |
2817 | (when (setq action (aref actrow j)) | |
2818 | ;; Translate a reduction into semantic action | |
2819 | (and (integerp action) (< action 0) | |
2820 | (setq action (wisent-semantic-action (- action)))) | |
2821 | (aset action-table i (cons (cons (aref tags j) action) | |
2822 | (aref action-table i))) | |
2823 | ) | |
2824 | (setq j (1+ j))) | |
2825 | (aset action-table i (nreverse (aref action-table i))) | |
2826 | (setq i (1+ i))) | |
2827 | action-table))) | |
2828 | ||
2829 | (defun wisent-goto-actions () | |
2830 | "Figure out what to do after reducing with each rule. | |
2831 | Depending on the saved state from before the beginning of parsing the | |
2832 | data that matched this rule. Return the goto table." | |
2833 | (let (i j m n symbol state goto-table) | |
2834 | (setq goto-table (make-vector nstates nil) | |
2835 | i ntokens) | |
2836 | (while (< i nsyms) | |
2837 | (setq symbol (- i ntokens) | |
2838 | m (aref goto-map symbol) | |
2839 | n (aref goto-map (1+ symbol)) | |
2840 | j m) | |
2841 | (while (< j n) | |
2842 | (setq state (aref from-state j)) | |
2843 | (aset goto-table state | |
2844 | (cons (cons (aref tags i) (aref to-state j)) | |
2845 | (aref goto-table state))) | |
2846 | (setq j (1+ j))) | |
2847 | (setq i (1+ i))) | |
2848 | goto-table)) | |
2849 | ||
2850 | (defsubst wisent-quote-p (sym) | |
2851 | "Return non-nil if SYM is bound to the `quote' function." | |
2852 | (condition-case nil | |
2853 | (eq (indirect-function sym) | |
2854 | (indirect-function 'quote)) | |
2855 | (error nil))) | |
2856 | ||
2857 | (defsubst wisent-backquote-p (sym) | |
2858 | "Return non-nil if SYM is bound to the `backquote' function." | |
2859 | (condition-case nil | |
2860 | (eq (indirect-function sym) | |
2861 | (indirect-function 'backquote)) | |
2862 | (error nil))) | |
2863 | ||
2864 | (defun wisent-check-$N (x m) | |
2865 | "Return non-nil if X is a valid $N or $regionN symbol. | |
2866 | That is if X is a $N or $regionN symbol with N >= 1 and N <= M. | |
2867 | Also warn if X is a $N or $regionN symbol with N < 1 or N > M." | |
2868 | (when (symbolp x) | |
2869 | (let* ((n (symbol-name x)) | |
2870 | (i (and (string-match "\\`\\$\\(region\\)?\\([0-9]+\\)\\'" n) | |
2871 | (string-to-number (match-string 2 n))))) | |
2872 | (when i | |
2873 | (if (and (>= i 1) (<= i m)) | |
2874 | t | |
2875 | (message | |
2876 | "*** In %s, %s might be a free variable (rule has %s)" | |
2877 | NAME x (format (cond ((< m 1) "no component") | |
2878 | ((= m 1) "%d component") | |
2879 | ("%d components")) | |
2880 | m)) | |
2881 | nil))))) | |
2882 | ||
2883 | (defun wisent-semantic-action-expand-body (body n &optional found) | |
2884 | "Parse BODY of semantic action. | |
2885 | N is the maximum number of $N variables that can be referenced in | |
2886 | BODY. Warn on references out of permitted range. | |
2887 | Optional argument FOUND is the accumulated list of '$N' references | |
2888 | encountered so far. | |
2889 | Return a cons (FOUND . XBODY), where FOUND is the list of $N | |
2890 | references found in BODY, and XBODY is BODY expression with | |
2891 | `backquote' forms expanded." | |
2892 | (if (not (listp body)) | |
2893 | ;; BODY is an atom, no expansion needed | |
2894 | (progn | |
2895 | (if (wisent-check-$N body n) | |
2896 | ;; Accumulate $i symbol | |
2897 | (add-to-list 'found body)) | |
2898 | (cons found body)) | |
2899 | ;; BODY is a list, expand inside it | |
2900 | (let (xbody sexpr) | |
2901 | ;; If backquote expand it first | |
2902 | (if (wisent-backquote-p (car body)) | |
2903 | (setq body (macroexpand body))) | |
2904 | (while body | |
2905 | (setq sexpr (car body) | |
2906 | body (cdr body)) | |
2907 | (cond | |
2908 | ;; Function call excepted quote expression | |
2909 | ((and (consp sexpr) | |
2910 | (not (wisent-quote-p (car sexpr)))) | |
2911 | (setq sexpr (wisent-semantic-action-expand-body sexpr n found) | |
2912 | found (car sexpr) | |
2913 | sexpr (cdr sexpr))) | |
2914 | ;; $i symbol | |
2915 | ((wisent-check-$N sexpr n) | |
2916 | ;; Accumulate $i symbol | |
2917 | (add-to-list 'found sexpr)) | |
2918 | ) | |
2919 | ;; Accumulate expanded forms | |
2920 | (setq xbody (nconc xbody (list sexpr)))) | |
2921 | (cons found xbody)))) | |
2922 | ||
2923 | (defun wisent-semantic-action (r) | |
2924 | "Set up the Elisp function for semantic action at rule R. | |
2925 | On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY is the | |
2926 | body of the semantic action, N is the maximum number of values | |
2927 | available in the parser's stack, NTERM is the nonterminal the semantic | |
2928 | action belongs to, and I is the index of the semantic action inside | |
2929 | NTERM definition. Return the semantic action symbol. | |
2930 | The semantic action function accepts three arguments: | |
2931 | ||
2932 | - the state/value stack | |
2933 | - the top-of-stack index | |
2934 | - the goto table | |
2935 | ||
2936 | And returns the updated top-of-stack index." | |
2937 | (if (not (aref ruseful r)) | |
2938 | (aset rcode r nil) | |
2939 | (let* ((actn (aref rcode r)) | |
2940 | (n (aref actn 1)) ; nb of val avail. in stack | |
2941 | (NAME (apply 'format "%s:%d" (aref actn 2))) | |
2942 | (form (wisent-semantic-action-expand-body (aref actn 0) n)) | |
2943 | ($l (car form)) ; list of $vars used in body | |
2944 | (form (cdr form)) ; expanded form of body | |
2945 | (nt (aref rlhs r)) ; nonterminal item no. | |
2946 | (bl nil) ; `let*' binding list | |
2947 | $v i j) | |
2948 | ||
2949 | ;; Compute $N and $regionN bindings | |
2950 | (setq i n) | |
2951 | (while (> i 0) | |
2952 | (setq j (1+ (* 2 (- n i)))) | |
2953 | ;; Only bind $regionI if used in action | |
2954 | (setq $v (intern (format "$region%d" i))) | |
2955 | (if (memq $v $l) | |
2956 | (setq bl (cons `(,$v (cdr (aref ,stack (- ,sp ,j)))) bl))) | |
2957 | ;; Only bind $I if used in action | |
2958 | (setq $v (intern (format "$%d" i))) | |
2959 | (if (memq $v $l) | |
2960 | (setq bl (cons `(,$v (car (aref ,stack (- ,sp ,j)))) bl))) | |
2961 | (setq i (1- i))) | |
2962 | ||
2963 | ;; Compute J, the length of rule's RHS. It will give the | |
2964 | ;; current parser state at STACK[SP - 2*J], and where to push | |
2965 | ;; the new semantic value and the next state, respectively at: | |
2966 | ;; STACK[SP - 2*J + 1] and STACK[SP - 2*J + 2]. Generally N, | |
2967 | ;; the maximum number of values available in the stack, is equal | |
2968 | ;; to J. But, for mid-rule actions, N is the number of rule | |
2969 | ;; elements before the action and J is always 0 (empty rule). | |
2970 | (setq i (aref rrhs r) | |
2971 | j 0) | |
2972 | (while (> (aref ritem i) 0) | |
2973 | (setq j (1+ j) | |
2974 | i (1+ i))) | |
2975 | ||
2976 | ;; Create the semantic action symbol. | |
2977 | (setq actn (intern NAME (aref rcode 0))) | |
2978 | ||
2979 | ;; Store source code in function cell of the semantic action | |
2980 | ;; symbol. It will be byte-compiled at automaton's compilation | |
2981 | ;; time. Using a byte-compiled automaton can significantly | |
2982 | ;; speed up parsing! | |
2983 | (fset actn | |
2984 | `(lambda (,stack ,sp ,gotos) | |
2985 | (let* (,@bl | |
2986 | ($region | |
2987 | ,(cond | |
2988 | ((= n 1) | |
2989 | (if (assq '$region1 bl) | |
2990 | '$region1 | |
2991 | `(cdr (aref ,stack (1- ,sp))))) | |
2992 | ((> n 1) | |
2993 | `(wisent-production-bounds | |
2994 | ,stack (- ,sp ,(1- (* 2 n))) (1- ,sp))))) | |
2995 | ($action ,NAME) | |
2996 | ($nterm ',(aref tags nt)) | |
2997 | ,@(and (> j 0) `((,sp (- ,sp ,(* j 2))))) | |
2998 | (,state (cdr (assq $nterm | |
2999 | (aref ,gotos | |
3000 | (aref ,stack ,sp)))))) | |
3001 | (setq ,sp (+ ,sp 2)) | |
3002 | ;; push semantic value | |
3003 | (aset ,stack (1- ,sp) (cons ,form $region)) | |
3004 | ;; push next state | |
3005 | (aset ,stack ,sp ,state) | |
3006 | ;; return new top of stack | |
3007 | ,sp))) | |
3008 | ||
3009 | ;; Return the semantic action symbol | |
3010 | actn))) | |
3011 | \f | |
3012 | ;;;; ---------------------------- | |
3013 | ;;;; Build parser LALR automaton. | |
3014 | ;;;; ---------------------------- | |
3015 | ||
3016 | (defun wisent-parser-automaton () | |
3017 | "Compute and return LALR(1) automaton from GRAMMAR. | |
3018 | GRAMMAR is in internal format. GRAM/ACTS are grammar rules | |
3019 | in internal format. STARTS defines the start symbols." | |
3020 | ;; Check for useless stuff | |
3021 | (wisent-reduce-grammar) | |
3022 | ||
3023 | (wisent-set-derives) | |
3024 | (wisent-set-nullable) | |
3025 | ;; convert to nondeterministic finite state machine. | |
3026 | (wisent-generate-states) | |
3027 | ;; make it deterministic. | |
3028 | (wisent-lalr) | |
3029 | ;; Find and record any conflicts: places where one token of | |
3030 | ;; lookahead is not enough to disambiguate the parsing. Also | |
3031 | ;; resolve s/r conflicts based on precedence declarations. | |
3032 | (wisent-resolve-conflicts) | |
3033 | (wisent-print-results) | |
3034 | ||
3035 | (vector (wisent-state-actions) ; action table | |
3036 | (wisent-goto-actions) ; goto table | |
3037 | start-table ; start symbols | |
3038 | (aref rcode 0) ; sem. action symbol obarray | |
3039 | ) | |
3040 | ) | |
3041 | \f | |
3042 | ;;;; ------------------- | |
3043 | ;;;; Parse input grammar | |
3044 | ;;;; ------------------- | |
3045 | ||
3046 | (defconst wisent-reserved-symbols (list wisent-error-term) | |
3047 | "The list of reserved symbols. | |
3048 | Also all symbols starting with a character defined in | |
3049 | `wisent-reserved-capitals' are reserved for internal use.") | |
3050 | ||
3051 | (defconst wisent-reserved-capitals '(?\$ ?\@) | |
3052 | "The list of reserved capital letters. | |
3053 | All symbol starting with one of these letters are reserved for | |
3054 | internal use.") | |
3055 | ||
3056 | (defconst wisent-starts-nonterm '$STARTS | |
3057 | "Main start symbol. | |
3058 | It gives the rules for start symbols.") | |
3059 | ||
3060 | (defvar wisent-single-start-flag nil | |
3061 | "Non-nil means allows only one start symbol like in Bison. | |
3062 | That is don't add extra start rules to the grammar. This is | |
3063 | useful to compare the Wisent's generated automaton with the Bison's | |
3064 | one.") | |
3065 | ||
3066 | (defsubst wisent-ISVALID-VAR (x) | |
3067 | "Return non-nil if X is a character or an allowed symbol." | |
3068 | (and x (symbolp x) | |
3069 | (not (memq (aref (symbol-name x) 0) wisent-reserved-capitals)) | |
3070 | (not (memq x wisent-reserved-symbols)))) | |
3071 | ||
3072 | (defsubst wisent-ISVALID-TOKEN (x) | |
3073 | "Return non-nil if X is a character or an allowed symbol." | |
3074 | (or (wisent-char-p x) | |
3075 | (wisent-ISVALID-VAR x))) | |
3076 | ||
3077 | (defun wisent-push-token (symbol &optional nocheck) | |
3078 | "Push a new SYMBOL in the list of tokens. | |
3079 | Bypass checking if NOCHECK is non-nil." | |
3080 | ;; Check | |
3081 | (or nocheck (wisent-ISVALID-TOKEN symbol) | |
3082 | (error "Invalid terminal symbol: %S" symbol)) | |
3083 | (if (memq symbol token-list) | |
3084 | (message "*** duplicate terminal `%s' ignored" symbol) | |
3085 | ;; Set up properties | |
3086 | (wisent-set-prec symbol nil) | |
3087 | (wisent-set-assoc symbol nil) | |
3088 | (wisent-set-item-number symbol ntokens) | |
3089 | ;; Add | |
3090 | (setq ntokens (1+ ntokens) | |
3091 | token-list (cons symbol token-list)))) | |
3092 | ||
3093 | (defun wisent-push-var (symbol &optional nocheck) | |
3094 | "Push a new SYMBOL in the list of nonterminals. | |
3095 | Bypass checking if NOCHECK is non-nil." | |
3096 | ;; Check | |
3097 | (unless nocheck | |
3098 | (or (wisent-ISVALID-VAR symbol) | |
3099 | (error "Invalid nonterminal symbol: %S" symbol)) | |
3100 | (if (memq symbol var-list) | |
3101 | (error "Nonterminal `%s' already defined" symbol))) | |
3102 | ;; Set up properties | |
3103 | (wisent-set-item-number symbol nvars) | |
3104 | ;; Add | |
3105 | (setq nvars (1+ nvars) | |
3106 | var-list (cons symbol var-list))) | |
3107 | ||
3108 | (defun wisent-parse-nonterminals (defs) | |
3109 | "Parse nonterminal definitions in DEFS. | |
3110 | Fill in each element of the global arrays RPREC, RCODE, RUSEFUL with | |
3111 | respectively rule precedence level, semantic action code and | |
3112 | usefulness flag. Return a list of rules of the form (LHS . RHS) where | |
3113 | LHS and RHS are respectively the Left Hand Side and Right Hand Side of | |
3114 | the rule." | |
3115 | (setq rprec nil | |
3116 | rcode nil | |
3117 | nitems 0 | |
3118 | nrules 0) | |
3119 | (let (def nonterm rlist rule rules rhs rest item items | |
3120 | rhl plevel semact @n @count iactn) | |
3121 | (setq @count 0) | |
3122 | (while defs | |
3123 | (setq def (car defs) | |
3124 | defs (cdr defs) | |
3125 | nonterm (car def) | |
3126 | rlist (cdr def) | |
3127 | iactn 0) | |
3128 | (or (consp rlist) | |
3129 | (error "Invalid nonterminal definition syntax: %S" def)) | |
3130 | (while rlist | |
3131 | (setq rule (car rlist) | |
3132 | rlist (cdr rlist) | |
3133 | items (car rule) | |
3134 | rest (cdr rule) | |
3135 | rhl 0 | |
3136 | rhs nil) | |
3137 | ||
3138 | ;; Check & count items | |
3139 | (setq nitems (1+ nitems)) ;; LHS item | |
3140 | (while items | |
3141 | (setq item (car items) | |
3142 | items (cdr items) | |
3143 | nitems (1+ nitems)) ;; RHS items | |
3144 | (if (listp item) | |
3145 | ;; Mid-rule action | |
3146 | (progn | |
3147 | (setq @count (1+ @count) | |
3148 | @n (intern (format "@%d" @count))) | |
3149 | (wisent-push-var @n t) | |
3150 | ;; Push a new empty rule with the mid-rule action | |
3151 | (setq semact (vector item rhl (list nonterm iactn)) | |
3152 | iactn (1+ iactn) | |
3153 | plevel nil | |
3154 | rcode (cons semact rcode) | |
3155 | rprec (cons plevel rprec) | |
3156 | item @n ;; Replace action by @N nonterminal | |
3157 | rules (cons (list item) rules) | |
3158 | nitems (1+ nitems) | |
3159 | nrules (1+ nrules))) | |
3160 | ;; Check terminal or nonterminal symbol | |
3161 | (cond | |
3162 | ((or (memq item token-list) (memq item var-list))) | |
3163 | ;; Create new literal character token | |
3164 | ((wisent-char-p item) (wisent-push-token item t)) | |
3165 | ((error "Symbol `%s' is used, but is not defined as a token and has no rules" | |
3166 | item)))) | |
3167 | (setq rhl (1+ rhl) | |
3168 | rhs (cons item rhs))) | |
3169 | ||
3170 | ;; Check & collect rule precedence level | |
3171 | (setq plevel (when (vectorp (car rest)) | |
3172 | (setq item (car rest) | |
3173 | rest (cdr rest)) | |
3174 | (if (and (= (length item) 1) | |
3175 | (memq (aref item 0) token-list) | |
3176 | (wisent-prec (aref item 0))) | |
3177 | (wisent-item-number (aref item 0)) | |
3178 | (error "Invalid rule precedence level syntax: %S" item))) | |
3179 | rprec (cons plevel rprec)) | |
3180 | ||
3181 | ;; Check & collect semantic action body | |
3182 | (setq semact (vector | |
3183 | (if rest | |
3184 | (if (cdr rest) | |
3185 | (error "Invalid semantic action syntax: %S" rest) | |
3186 | (car rest)) | |
3187 | ;; Give a default semantic action body: nil | |
3188 | ;; for an empty rule or $1, the value of the | |
3189 | ;; first symbol in the rule, otherwise. | |
3190 | (if (> rhl 0) '$1 '())) | |
3191 | rhl | |
3192 | (list nonterm iactn)) | |
3193 | iactn (1+ iactn) | |
3194 | rcode (cons semact rcode)) | |
3195 | (setq rules (cons (cons nonterm (nreverse rhs)) rules) | |
3196 | nrules (1+ nrules)))) | |
3197 | ||
3198 | (setq ruseful (make-vector (1+ nrules) t) | |
3199 | rprec (vconcat (cons nil (nreverse rprec))) | |
3200 | rcode (vconcat (cons nil (nreverse rcode)))) | |
3201 | (nreverse rules) | |
3202 | )) | |
3203 | ||
3204 | (defun wisent-parse-grammar (grammar &optional start-list) | |
3205 | "Parse GRAMMAR and build a suitable internal representation. | |
3206 | Optional argument START-LIST defines the start symbols. | |
3207 | GRAMMAR is a list of form: (TOKENS ASSOCS . NONTERMS) | |
3208 | ||
3209 | TOKENS is a list of terminal symbols (tokens). | |
3210 | ||
3211 | ASSOCS is nil or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements | |
3212 | describing the associativity of TOKENS. ASSOC-TYPE must be one of the | |
3213 | `default-prec' `nonassoc', `left' or `right' symbols. When ASSOC-TYPE | |
3214 | is `default-prec', ASSOC-VALUE must be nil or t (the default). | |
3215 | Otherwise it is a list of tokens which must have been previously | |
3216 | declared in TOKENS. | |
3217 | ||
3218 | NONTERMS is the list of non terminal definitions (see function | |
3219 | `wisent-parse-nonterminals')." | |
3220 | (or (and (consp grammar) (> (length grammar) 2)) | |
3221 | (error "Bad input grammar")) | |
3222 | ||
3223 | (let (i r rhs pre dpre lst start-var assoc rules item | |
3224 | token var def tokens defs ep-token ep-var ep-def) | |
3225 | ||
3226 | ;; Built-in tokens | |
3227 | (setq ntokens 0 nvars 0) | |
3228 | (wisent-push-token wisent-eoi-term t) | |
3229 | (wisent-push-token wisent-error-term t) | |
3230 | ||
3231 | ;; Check/collect terminals | |
3232 | (setq lst (car grammar)) | |
3233 | (while lst | |
3234 | (wisent-push-token (car lst)) | |
3235 | (setq lst (cdr lst))) | |
3236 | ||
3237 | ;; Check/Set up tokens precedence & associativity | |
3238 | (setq lst (nth 1 grammar) | |
3239 | pre 0 | |
3240 | defs nil | |
3241 | dpre nil | |
3242 | default-prec t) | |
3243 | (while lst | |
3244 | (setq def (car lst) | |
3245 | assoc (car def) | |
3246 | tokens (cdr def) | |
3247 | lst (cdr lst)) | |
3248 | (if (eq assoc 'default-prec) | |
3249 | (progn | |
3250 | (or (null (cdr tokens)) | |
3251 | (memq (car tokens) '(t nil)) | |
3252 | (error "Invalid default-prec value: %S" tokens)) | |
3253 | (setq default-prec (car tokens)) | |
3254 | (if dpre | |
3255 | (message "*** redefining default-prec to %s" | |
3256 | default-prec)) | |
3257 | (setq dpre t)) | |
3258 | (or (memq assoc '(left right nonassoc)) | |
3259 | (error "Invalid associativity syntax: %S" assoc)) | |
3260 | (setq pre (1+ pre)) | |
3261 | (while tokens | |
3262 | (setq token (car tokens) | |
3263 | tokens (cdr tokens)) | |
3264 | (if (memq token defs) | |
3265 | (message "*** redefining precedence of `%s'" token)) | |
3266 | (or (memq token token-list) | |
3267 | ;; Define token not previously declared. | |
3268 | (wisent-push-token token)) | |
3269 | (setq defs (cons token defs)) | |
3270 | ;; Record the precedence and associativity of the terminal. | |
3271 | (wisent-set-prec token pre) | |
3272 | (wisent-set-assoc token assoc)))) | |
3273 | ||
3274 | ;; Check/Collect nonterminals | |
3275 | (setq lst (nthcdr 2 grammar) | |
3276 | defs nil) | |
3277 | (while lst | |
3278 | (setq def (car lst) | |
3279 | lst (cdr lst)) | |
3280 | (or (consp def) | |
3281 | (error "Invalid nonterminal definition: %S" def)) | |
3282 | (if (memq (car def) token-list) | |
3283 | (error "Nonterminal `%s' already defined as token" (car def))) | |
3284 | (wisent-push-var (car def)) | |
3285 | (setq defs (cons def defs))) | |
3286 | (or defs | |
3287 | (error "No input grammar")) | |
3288 | (setq defs (nreverse defs)) | |
3289 | ||
3290 | ;; Set up the start symbol. | |
3291 | (setq start-table nil) | |
3292 | (cond | |
3293 | ||
3294 | ;; 1. START-LIST is nil, the start symbol is the first | |
3295 | ;; nonterminal defined in the grammar (Bison like). | |
3296 | ((null start-list) | |
3297 | (setq start-var (caar defs))) | |
3298 | ||
3299 | ;; 2. START-LIST contains only one element, it is the start | |
3300 | ;; symbol (Bison like). | |
3301 | ((or wisent-single-start-flag (null (cdr start-list))) | |
3302 | (setq start-var (car start-list)) | |
3303 | (or (assq start-var defs) | |
3304 | (error "Start symbol `%s' has no rule" start-var))) | |
3305 | ||
3306 | ;; 3. START-LIST contains more than one element. All defines | |
3307 | ;; potential start symbols. One of them (the first one by | |
3308 | ;; default) will be given at parse time to be the parser goal. | |
3309 | ;; If `wisent-single-start-flag' is non-nil that feature is | |
3310 | ;; disabled and the first nonterminal in START-LIST defines | |
3311 | ;; the start symbol, like in case 2 above. | |
3312 | ((not wisent-single-start-flag) | |
3313 | ||
3314 | ;; START-LIST is a list of nonterminals '(nt0 ... ntN). | |
3315 | ;; Build and push ad hoc start rules in the grammar: | |
3316 | ||
3317 | ;; ($STARTS ((nt0) $1) ((nt1) $1) ... ((ntN) $1)) | |
3318 | ;; ($nt1 (($$nt1 nt1) $2)) | |
3319 | ;; ... | |
3320 | ;; ($ntN (($$ntN ntN) $2)) | |
3321 | ||
3322 | ;; Where internal symbols $ntI and $$ntI are respectively | |
3323 | ;; nonterminals and terminals. | |
3324 | ||
3325 | ;; The internal start symbol $STARTS is used to build the | |
3326 | ;; LALR(1) automaton. The true default start symbol used by the | |
3327 | ;; parser is the first nonterminal in START-LIST (nt0). | |
3328 | (setq start-var wisent-starts-nonterm | |
3329 | lst (nreverse start-list)) | |
3330 | (while lst | |
3331 | (setq var (car lst) | |
3332 | lst (cdr lst)) | |
3333 | (or (memq var var-list) | |
3334 | (error "Start symbol `%s' has no rule" var)) | |
3335 | (unless (assq var start-table) ;; Ignore duplicates | |
3336 | ;; For each nt start symbol | |
3337 | (setq ep-var (intern (format "$%s" var)) | |
3338 | ep-token (intern (format "$$%s" var))) | |
3339 | (wisent-push-token ep-token t) | |
3340 | (wisent-push-var ep-var t) | |
3341 | (setq | |
3342 | ;; Add entry (nt . $$nt) to start-table | |
3343 | start-table (cons (cons var ep-token) start-table) | |
3344 | ;; Add rule ($nt (($$nt nt) $2)) | |
3345 | defs (cons (list ep-var (list (list ep-token var) '$2)) defs) | |
3346 | ;; Add start rule (($nt) $1) | |
3347 | ep-def (cons (list (list ep-var) '$1) ep-def)) | |
3348 | )) | |
3349 | (wisent-push-var start-var t) | |
3350 | (setq defs (cons (cons start-var ep-def) defs)))) | |
3351 | ||
3352 | ;; Set up rules main data structure & RPREC, RCODE, RUSEFUL | |
3353 | (setq rules (wisent-parse-nonterminals defs)) | |
3354 | ||
3355 | ;; Set up the terminal & nonterminal lists. | |
3356 | (setq nsyms (+ ntokens nvars) | |
3357 | token-list (nreverse token-list) | |
3358 | lst var-list | |
3359 | var-list nil) | |
3360 | (while lst | |
3361 | (setq var (car lst) | |
3362 | lst (cdr lst) | |
3363 | var-list (cons var var-list)) | |
3364 | (wisent-set-item-number ;; adjust nonterminal item number to | |
3365 | var (+ ntokens (wisent-item-number var)))) ;; I += NTOKENS | |
3366 | ||
3367 | ;; Store special item numbers | |
3368 | (setq error-token-number (wisent-item-number wisent-error-term) | |
3369 | start-symbol (wisent-item-number start-var)) | |
3370 | ||
3371 | ;; Keep symbols in the TAGS vector so that TAGS[I] is the symbol | |
3372 | ;; associated to item number I. | |
3373 | (setq tags (vconcat token-list var-list)) | |
3374 | ;; Set up RLHS RRHS & RITEM data structures from list of rules | |
3375 | ;; (LHS . RHS) received from `wisent-parse-nonterminals'. | |
3376 | (setq rlhs (make-vector (1+ nrules) nil) | |
3377 | rrhs (make-vector (1+ nrules) nil) | |
3378 | ritem (make-vector (1+ nitems) nil) | |
3379 | i 0 | |
3380 | r 1) | |
3381 | (while rules | |
3382 | (aset rlhs r (wisent-item-number (caar rules))) | |
3383 | (aset rrhs r i) | |
3384 | (setq rhs (cdar rules) | |
3385 | pre nil) | |
3386 | (while rhs | |
3387 | (setq item (wisent-item-number (car rhs))) | |
3388 | ;; Get default precedence level of rule, that is the | |
3389 | ;; precedence of the last terminal in it. | |
3390 | (and (wisent-ISTOKEN item) | |
3391 | default-prec | |
3392 | (setq pre item)) | |
3393 | ||
3394 | (aset ritem i item) | |
3395 | (setq i (1+ i) | |
3396 | rhs (cdr rhs))) | |
3397 | ;; Setup the precedence level of the rule, that is the one | |
3398 | ;; specified by %prec or the default one. | |
3399 | (and (not (aref rprec r)) ;; Already set by %prec | |
3400 | pre | |
3401 | (wisent-prec (aref tags pre)) | |
3402 | (aset rprec r pre)) | |
3403 | (aset ritem i (- r)) | |
3404 | (setq i (1+ i) | |
3405 | r (1+ r)) | |
3406 | (setq rules (cdr rules))) | |
3407 | )) | |
3408 | \f | |
3409 | ;;;; --------------------- | |
3410 | ;;;; Compile input grammar | |
3411 | ;;;; --------------------- | |
3412 | ||
3413 | (defun wisent-compile-grammar (grammar &optional start-list) | |
3414 | "Compile the LALR(1) GRAMMAR. | |
3415 | ||
3416 | GRAMMAR is a list (TOKENS ASSOCS . NONTERMS) where: | |
3417 | ||
3418 | - TOKENS is a list of terminal symbols (tokens). | |
3419 | ||
3420 | - ASSOCS is nil, or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements | |
3421 | describing the associativity of TOKENS. ASSOC-TYPE must be one of | |
3422 | the `default-prec' `nonassoc', `left' or `right' symbols. When | |
3423 | ASSOC-TYPE is `default-prec', ASSOC-VALUE must be nil or t (the | |
3424 | default). Otherwise it is a list of tokens which must have been | |
3425 | previously declared in TOKENS. | |
3426 | ||
3427 | - NONTERMS is a list of nonterminal definitions. | |
3428 | ||
3429 | Optional argument START-LIST specify the possible grammar start | |
3430 | symbols. This is a list of nonterminals which must have been | |
3431 | previously declared in GRAMMAR's NONTERMS form. By default, the start | |
3432 | symbol is the first nonterminal defined. When START-LIST contains | |
3433 | only one element, it is the start symbol. Otherwise, all elements are | |
3434 | possible start symbols, unless `wisent-single-start-flag' is non-nil. | |
3435 | In that case, the first element is the start symbol, and others are | |
3436 | ignored. | |
3437 | ||
3438 | Return an automaton as a vector: [ACTIONS GOTOS STARTS FUNCTIONS] | |
3439 | where: | |
3440 | ||
3441 | - ACTIONS is a state/token matrix telling the parser what to do at | |
3442 | every state based on the current lookahead token. That is shift, | |
3443 | reduce, accept or error. | |
3444 | ||
3445 | - GOTOS is a state/nonterminal matrix telling the parser the next | |
3446 | state to go to after reducing with each rule. | |
3447 | ||
3448 | - STARTS is an alist which maps the allowed start nonterminal symbols | |
3449 | to tokens that will be first shifted into the parser stack. | |
3450 | ||
3451 | - FUNCTIONS is an obarray of semantic action symbols. Each symbol's | |
3452 | function definition is the semantic action lambda expression." | |
3453 | (if (wisent-automaton-p grammar) | |
3454 | grammar ;; Grammar already compiled just return it | |
3455 | (wisent-with-context compile-grammar | |
3456 | (let* ((gc-cons-threshold 1000000) | |
3457 | automaton) | |
3458 | (garbage-collect) | |
3459 | (setq wisent-new-log-flag t) | |
3460 | ;; Parse input grammar | |
3461 | (wisent-parse-grammar grammar start-list) | |
3462 | ;; Generate the LALR(1) automaton | |
3463 | (setq automaton (wisent-parser-automaton)) | |
3464 | automaton)))) | |
3465 | \f | |
3466 | ;;;; -------------------------- | |
3467 | ;;;; Byte compile input grammar | |
3468 | ;;;; -------------------------- | |
3469 | ||
3470 | (require 'bytecomp) | |
3471 | ||
3472 | (defun wisent-byte-compile-grammar (form) | |
3473 | "Byte compile the `wisent-compile-grammar' FORM. | |
3474 | Automatically called by the Emacs Lisp byte compiler as a | |
3475 | `byte-compile' handler." | |
3476 | ;; Eval the `wisent-compile-grammar' form to obtain an LALR | |
3477 | ;; automaton internal data structure. Then, because the internal | |
3478 | ;; data structure contains an obarray, convert it to a lisp form so | |
3479 | ;; it can be byte-compiled. | |
3480 | (byte-compile-form (wisent-automaton-lisp-form (eval form)))) | |
3481 | ||
3482 | (put 'wisent-compile-grammar 'byte-compile 'wisent-byte-compile-grammar) | |
3483 | ||
3484 | (defun wisent-automaton-lisp-form (automaton) | |
3485 | "Return a Lisp form that produces AUTOMATON. | |
3486 | See also `wisent-compile-grammar' for more details on AUTOMATON." | |
3487 | (or (wisent-automaton-p automaton) | |
3488 | (signal 'wrong-type-argument | |
3489 | (list 'wisent-automaton-p automaton))) | |
3490 | (let ((obn (make-symbol "ob")) ; Generated obarray name | |
3491 | (obv (aref automaton 3)) ; Semantic actions obarray | |
3492 | ) | |
3493 | `(let ((,obn (make-vector 13 0))) | |
3494 | ;; Generate code to initialize the semantic actions obarray, | |
3495 | ;; in local variable OBN. | |
3496 | ,@(let (obcode) | |
3497 | (mapatoms | |
3498 | #'(lambda (s) | |
3499 | (setq obcode | |
3500 | (cons `(fset (intern ,(symbol-name s) ,obn) | |
3501 | #',(symbol-function s)) | |
3502 | obcode))) | |
3503 | obv) | |
3504 | obcode) | |
3505 | ;; Generate code to create the automaton. | |
3506 | (vector | |
3507 | ;; In code generated to initialize the action table, take | |
3508 | ;; care of symbols that are interned in the semantic actions | |
3509 | ;; obarray. | |
3510 | (vector | |
3511 | ,@(mapcar | |
3512 | #'(lambda (state) ;; for each state | |
3513 | `(list | |
3514 | ,@(mapcar | |
3515 | #'(lambda (tr) ;; for each transition | |
3516 | (let ((k (car tr)) ; token | |
3517 | (a (cdr tr))) ; action | |
3518 | (if (and (symbolp a) | |
3519 | (intern-soft (symbol-name a) obv)) | |
3520 | `(cons ,(if (symbolp k) `(quote ,k) k) | |
3521 | (intern-soft ,(symbol-name a) ,obn)) | |
3522 | `(quote ,tr)))) | |
3523 | state))) | |
3524 | (aref automaton 0))) | |
3525 | ;; The code of the goto table is unchanged. | |
3526 | ,(aref automaton 1) | |
3527 | ;; The code of the alist of start symbols is unchanged. | |
3528 | ',(aref automaton 2) | |
3529 | ;; The semantic actions obarray is in the local variable OBN. | |
3530 | ,obn)))) | |
3531 | ||
3532 | (provide 'semantic/wisent/comp) | |
3533 | ||
3534 | ;;; semantic/wisent/comp.el ends here |