Refill some copyright headers.
[bpt/emacs.git] / lisp / cedet / semantic / wisent / comp.el
CommitLineData
bb051423
CY
1;;; semantic/wisent/comp.el --- GNU Bison for Emacs - Grammar compiler
2
3;; Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000, 2001, 2002, 2003,
e9bffc61
GM
4;; 2004, 2005, 2006, 2007, 2009, 2010, 2011
5;; Free Software Foundation, Inc.
bb051423
CY
6
7;; Author: David Ponce <david@dponce.com>
8;; Maintainer: David Ponce <david@dponce.com>
9;; Created: 30 January 2002
10;; Keywords: syntax
11
12;; This file is part of GNU Emacs.
13
14;; GNU Emacs is free software: you can redistribute it and/or modify
15;; it under the terms of the GNU General Public License as published by
16;; the Free Software Foundation, either version 3 of the License, or
17;; (at your option) any later version.
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
25;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
26
27;;; Commentary:
28;;
29;; Grammar compiler that produces Wisent's LALR automatons.
30;;
31;; Wisent (the European Bison ;-) is an Elisp implementation of the
32;; GNU Compiler Compiler Bison. The Elisp code is a port of the C
33;; code of GNU Bison 1.28 & 1.31.
34;;
35;; For more details on the basic concepts for understanding Wisent,
36;; read the Bison manual ;)
37;;
38;; For more details on Wisent itself read the Wisent manual.
39
40;;; History:
41;;
42
43;;; Code:
44(require 'semantic/wisent)
45\f
46;;;; -------------------
47;;;; Misc. useful things
48;;;; -------------------
49
50;; As much as possible I would like to keep the name of global
51;; variables used in Bison without polluting too much the Elisp global
52;; name space. Elisp dynamic binding allows that ;-)
53
54;; Here are simple macros to easily define and use set of variables
55;; binded locally, without all these "reference to free variable"
56;; compiler warnings!
57
58(defmacro wisent-context-name (name)
59 "Return the context name from NAME."
60 `(if (and ,name (symbolp ,name))
61 (intern (format "wisent-context-%s" ,name))
62 (error "Invalid context name: %S" ,name)))
63
64(defmacro wisent-context-bindings (name)
65 "Return the variables in context NAME."
66 `(symbol-value (wisent-context-name ,name)))
67
68(defmacro wisent-defcontext (name &rest vars)
69 "Define a context NAME that will bind variables VARS."
70 (let* ((context (wisent-context-name name))
71 (bindings (mapcar #'(lambda (v) (list 'defvar v)) vars)))
72 `(eval-when-compile
73 ,@bindings
74 (defvar ,context ',vars))))
75(put 'wisent-defcontext 'lisp-indent-function 1)
76
77(defmacro wisent-with-context (name &rest body)
78 "Bind variables in context NAME then eval BODY."
79 `(let* ,(wisent-context-bindings name)
80 ,@body))
81(put 'wisent-with-context 'lisp-indent-function 1)
82
83;; A naive implementation of data structures! But it suffice here ;-)
84
85(defmacro wisent-struct (name &rest fields)
86 "Define a simple data structure called NAME.
87Which contains data stored in FIELDS. FIELDS is a list of symbols
88which are field names or pairs (FIELD INITIAL-VALUE) where
89INITIAL-VALUE is a constant used as the initial value of FIELD when
90the data structure is created. INITIAL-VALUE defaults to nil.
91
92This defines a `make-NAME' constructor, get-able `NAME-FIELD' and
93set-able `set-NAME-FIELD' accessors."
94 (let ((size (length fields))
95 (i 0)
96 accors field sufx fun ivals)
97 (while (< i size)
98 (setq field (car fields)
99 fields (cdr fields))
100 (if (consp field)
101 (setq ivals (cons (cadr field) ivals)
102 field (car field))
103 (setq ivals (cons nil ivals)))
104 (setq sufx (format "%s-%s" name field)
105 fun (intern (format "%s" sufx))
106 accors (cons `(defmacro ,fun (s)
107 (list 'aref s ,i))
108 accors)
109 fun (intern (format "set-%s" sufx))
110 accors (cons `(defmacro ,fun (s v)
111 (list 'aset s ,i v))
112 accors)
113 i (1+ i)))
114 `(progn
115 (defmacro ,(intern (format "make-%s" name)) ()
116 (cons 'vector ',(nreverse ivals)))
117 ,@accors)))
118(put 'wisent-struct 'lisp-indent-function 1)
119
120;; Other utilities
121
122(defsubst wisent-pad-string (s n &optional left)
123 "Fill string S with spaces.
124Return a new string of at least N characters. Insert spaces on right.
125If optional LEFT is non-nil insert spaces on left."
126 (let ((i (length s)))
127 (if (< i n)
128 (if left
129 (concat (make-string (- n i) ?\ ) s)
130 (concat s (make-string (- n i) ?\ )))
131 s)))
132\f
133;;;; ------------------------
134;;;; Environment dependencies
135;;;; ------------------------
136
137(defconst wisent-BITS-PER-WORD
138 (let ((i 1))
139 (while (not (zerop (lsh 1 i)))
140 (setq i (1+ i)))
141 i))
142
143(defsubst wisent-WORDSIZE (n)
144 "(N + BITS-PER-WORD - 1) / BITS-PER-WORD."
145 (/ (1- (+ n wisent-BITS-PER-WORD)) wisent-BITS-PER-WORD))
146
147(defsubst wisent-SETBIT (x i)
148 "X[I/BITS-PER-WORD] |= 1 << (I % BITS-PER-WORD)."
149 (let ((k (/ i wisent-BITS-PER-WORD)))
150 (aset x k (logior (aref x k)
151 (lsh 1 (% i wisent-BITS-PER-WORD))))))
152
153(defsubst wisent-RESETBIT (x i)
154 "X[I/BITS-PER-WORD] &= ~(1 << (I % BITS-PER-WORD))."
155 (let ((k (/ i wisent-BITS-PER-WORD)))
156 (aset x k (logand (aref x k)
157 (lognot (lsh 1 (% i wisent-BITS-PER-WORD)))))))
158
159(defsubst wisent-BITISSET (x i)
160 "(X[I/BITS-PER-WORD] & (1 << (I % BITS-PER-WORD))) != 0."
161 (not (zerop (logand (aref x (/ i wisent-BITS-PER-WORD))
162 (lsh 1 (% i wisent-BITS-PER-WORD))))))
163
bb051423
CY
164(defsubst wisent-noninteractive ()
165 "Return non-nil if running without interactive terminal."
166 (if (featurep 'xemacs)
167 (noninteractive)
168 noninteractive))
169
170(defvar wisent-debug-flag nil
171 "Non-nil means enable some debug stuff.")
172\f
173;;;; --------------
174;;;; Logging/Output
175;;;; --------------
176(defconst wisent-log-buffer-name "*wisent-log*"
177 "Name of the log buffer.")
178
179(defvar wisent-new-log-flag nil
180 "Non-nil means to start a new report.")
181
182(defvar wisent-verbose-flag nil
183 "*Non-nil means to report verbose information on generated parser.")
184
185(defun wisent-toggle-verbose-flag ()
186 "Toggle whether to report verbose information on generated parser."
187 (interactive)
188 (setq wisent-verbose-flag (not wisent-verbose-flag))
2054a44c 189 (when (called-interactively-p 'interactive)
bb051423
CY
190 (message "Verbose report %sabled"
191 (if wisent-verbose-flag "en" "dis"))))
192
193(defmacro wisent-log-buffer ()
194 "Return the log buffer.
195Its name is defined in constant `wisent-log-buffer-name'."
196 `(get-buffer-create wisent-log-buffer-name))
197
198(defmacro wisent-clear-log ()
199 "Delete the entire contents of the log buffer."
200 `(with-current-buffer (wisent-log-buffer)
201 (erase-buffer)))
202
df26e1f5 203(defvar byte-compile-current-file)
bb051423
CY
204
205(defun wisent-source ()
206 "Return the current source file name or nil."
207 (let ((source (or (and (boundp 'byte-compile-current-file)
208 byte-compile-current-file)
209 load-file-name (buffer-file-name))))
210 (if source
211 (file-relative-name source))))
212
213(defun wisent-new-log ()
214 "Start a new entry into the log buffer."
215 (setq wisent-new-log-flag nil)
216 (let ((text (format "\n\n*** Wisent %s - %s\n\n"
217 (or (wisent-source) (buffer-name))
218 (format-time-string "%Y-%m-%d %R"))))
219 (with-current-buffer (wisent-log-buffer)
220 (goto-char (point-max))
221 (insert text))))
222
223(defsubst wisent-log (&rest args)
224 "Insert text into the log buffer.
225`format' is applied to ARGS and the result string is inserted into the
226log buffer returned by the function `wisent-log-buffer'."
227 (and wisent-new-log-flag (wisent-new-log))
228 (with-current-buffer (wisent-log-buffer)
229 (insert (apply 'format args))))
230
231(defconst wisent-log-file "wisent.output"
232 "The log file.
233Used when running without interactive terminal.")
234
235(defun wisent-append-to-log-file ()
236 "Append contents of logging buffer to `wisent-log-file'."
237 (if (get-buffer wisent-log-buffer-name)
238 (condition-case err
239 (with-current-buffer (wisent-log-buffer)
240 (widen)
241 (if (> (point-max) (point-min))
242 (write-region (point-min) (point-max)
243 wisent-log-file t)))
244 (error
245 (message "*** %s" (error-message-string err))))))
246\f
247;;;; -----------------------------------
248;;;; Representation of the grammar rules
249;;;; -----------------------------------
250
251;; ntokens is the number of tokens, and nvars is the number of
252;; variables (nonterminals). nsyms is the total number, ntokens +
253;; nvars.
254
255;; Each symbol (either token or variable) receives a symbol number.
256;; Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are
257;; for variables. Symbol number zero is the end-of-input token. This
258;; token is counted in ntokens.
259
260;; The rules receive rule numbers 1 to nrules in the order they are
261;; written. Actions and guards are accessed via the rule number.
262
263;; The rules themselves are described by three arrays: rrhs, rlhs and
264;; ritem. rlhs[R] is the symbol number of the left hand side of rule
265;; R. The right hand side is stored as symbol numbers in a portion of
266;; ritem. rrhs[R] contains the index in ritem of the beginning of the
267;; portion for rule R.
268
269;; The length of the portion is one greater than the number of symbols
270;; in the rule's right hand side. The last element in the portion
271;; contains minus R, which identifies it as the end of a portion and
272;; says which rule it is for.
273
274;; The portions of ritem come in order of increasing rule number and
275;; are followed by an element which is nil to mark the end. nitems is
276;; the total length of ritem, not counting the final nil. Each
277;; element of ritem is called an "item" and its index in ritem is an
278;; item number.
279
280;; Item numbers are used in the finite state machine to represent
281;; places that parsing can get to.
282
283;; The vector rprec contains for each rule, the item number of the
284;; symbol giving its precedence level to this rule. The precedence
285;; level and associativity of each symbol is recorded in respectively
286;; the properties 'wisent--prec and 'wisent--assoc.
287
288;; Precedence levels are assigned in increasing order starting with 1
289;; so that numerically higher precedence values mean tighter binding
290;; as they ought to. nil as a symbol or rule's precedence means none
291;; is assigned.
292
293(defcustom wisent-state-table-size 1009
294 "The size of the state table."
295 :type 'integer
296 :group 'wisent)
297
298;; These variables only exist locally in the function
299;; `wisent-compile-grammar' and are shared by all other nested
300;; callees.
301(wisent-defcontext compile-grammar
302 F LA LAruleno accessing-symbol conflicts consistent default-prec
303 derives err-table fderives final-state first-reduction first-shift
304 first-state firsts from-state goto-map includes itemset nitemset
305 kernel-base kernel-end kernel-items last-reduction last-shift
306 last-state lookaheads lookaheadset lookback maxrhs ngotos nitems
307 nrules nshifts nstates nsyms ntokens nullable nvars rassoc redset
308 reduction-table ritem rlhs rprec rrc-count rrc-total rrhs ruseful
309 rcode ruleset rulesetsize shift-symbol shift-table shiftset
310 src-count src-total start-table state-table tags this-state to-state
311 tokensetsize ;; nb of words req. to hold a bit for each rule
312 varsetsize ;; nb of words req. to hold a bit for each variable
313 error-token-number start-symbol token-list var-list
314 N P V V1 nuseless-nonterminals nuseless-productions
315 ptable ;; symbols & characters properties
316 )
317
318(defmacro wisent-ISTOKEN (s)
319 "Return non-nil if item number S defines a token (terminal).
320That is if S < `ntokens'."
321 `(< ,s ntokens))
322
323(defmacro wisent-ISVAR(s)
324 "Return non-nil if item number S defines a nonterminal.
325That is if S >= `ntokens'."
326 `(>= ,s ntokens))
327
328(defsubst wisent-tag (s)
329 "Return printable form of item number S."
330 (wisent-item-to-string (aref tags s)))
331
332;; Symbol and character properties
333
334(defsubst wisent-put (object propname value)
335 "Store OBJECT's PROPNAME property with value VALUE.
336Use `eq' to locate OBJECT."
337 (let ((entry (assq object ptable)))
338 (or entry (setq entry (list object) ptable (cons entry ptable)))
339 (setcdr entry (plist-put (cdr entry) propname value))))
340
341(defsubst wisent-get (object propname)
342 "Return the value of OBJECT's PROPNAME property.
343Use `eq' to locate OBJECT."
344 (plist-get (cdr (assq object ptable)) propname))
345
346(defsubst wisent-item-number (x)
347 "Return the item number of symbol X."
348 (wisent-get x 'wisent--item-no))
349
350(defsubst wisent-set-item-number (x n)
351 "Set the item number of symbol X to N."
352 (wisent-put x 'wisent--item-no n))
353
354(defsubst wisent-assoc (x)
355 "Return the associativity of symbol X."
356 (wisent-get x 'wisent--assoc))
357
358(defsubst wisent-set-assoc (x a)
359 "Set the associativity of symbol X to A."
360 (wisent-put x 'wisent--assoc a))
361
362(defsubst wisent-prec (x)
363 "Return the precedence level of symbol X."
364 (wisent-get x 'wisent--prec))
365
366(defsubst wisent-set-prec (x p)
367 "Set the precedence level of symbol X to P."
368 (wisent-put x 'wisent--prec p))
369\f
370;;;; ----------------------------------------------------------
371;;;; Type definitions for nondeterministic finite state machine
372;;;; ----------------------------------------------------------
373
374;; These type definitions are used to represent a nondeterministic
375;; finite state machine that parses the specified grammar. This
376;; information is generated by the function `wisent-generate-states'.
377
378;; Each state of the machine is described by a set of items --
379;; particular positions in particular rules -- that are the possible
380;; places where parsing could continue when the machine is in this
381;; state. These symbols at these items are the allowable inputs that
382;; can follow now.
383
384;; A core represents one state. States are numbered in the number
385;; field. When `wisent-generate-states' is finished, the starting
386;; state is state 0 and `nstates' is the number of states. (A
387;; transition to a state whose state number is `nstates' indicates
388;; termination.) All the cores are chained together and `first-state'
389;; points to the first one (state 0).
390
391;; For each state there is a particular symbol which must have been
392;; the last thing accepted to reach that state. It is the
393;; accessing-symbol of the core.
394
395;; Each core contains a vector of `nitems' items which are the indices
396;; in the `ritems' vector of the items that are selected in this
397;; state.
398
399;; The link field is used for chaining buckets that hash states by
400;; their itemsets. This is for recognizing equivalent states and
401;; combining them when the states are generated.
402
403;; The two types of transitions are shifts (push the lookahead token
404;; and read another) and reductions (combine the last n things on the
405;; stack via a rule, replace them with the symbol that the rule
406;; derives, and leave the lookahead token alone). When the states are
407;; generated, these transitions are represented in two other lists.
408
409;; Each shifts structure describes the possible shift transitions out
410;; of one state, the state whose number is in the number field. The
411;; shifts structures are linked through next and first-shift points to
412;; them. Each contains a vector of numbers of the states that shift
413;; transitions can go to. The accessing-symbol fields of those
414;; states' cores say what kind of input leads to them.
415
416;; A shift to state zero should be ignored. Conflict resolution
417;; deletes shifts by changing them to zero.
418
419;; Each reductions structure describes the possible reductions at the
420;; state whose number is in the number field. The data is a list of
421;; nreds rules, represented by their rule numbers. `first-reduction'
422;; points to the list of these structures.
423
424;; Conflict resolution can decide that certain tokens in certain
425;; states should explicitly be errors (for implementing %nonassoc).
426;; For each state, the tokens that are errors for this reason are
427;; recorded in an errs structure, which has the state number in its
428;; number field. The rest of the errs structure is full of token
429;; numbers.
430
431;; There is at least one shift transition present in state zero. It
432;; leads to a next-to-final state whose accessing-symbol is the
433;; grammar's start symbol. The next-to-final state has one shift to
434;; the final state, whose accessing-symbol is zero (end of input).
435;; The final state has one shift, which goes to the termination state
436;; (whose number is `nstates'-1).
437;; The reason for the extra state at the end is to placate the
438;; parser's strategy of making all decisions one token ahead of its
439;; actions.
440
441(wisent-struct core
442 next ; -> core
443 link ; -> core
444 (number 0)
445 (accessing-symbol 0)
446 (nitems 0)
447 (items [0]))
448
449(wisent-struct shifts
450 next ; -> shifts
451 (number 0)
452 (nshifts 0)
453 (shifts [0]))
454
455(wisent-struct reductions
456 next ; -> reductions
457 (number 0)
458 (nreds 0)
459 (rules [0]))
460
461(wisent-struct errs
462 (nerrs 0)
463 (errs [0]))
464\f
465;;;; --------------------------------------------------------
466;;;; Find unreachable terminals, nonterminals and productions
467;;;; --------------------------------------------------------
468
469(defun wisent-bits-equal (L R n)
470 "Visit L and R and return non-nil if their first N elements are `='.
471L and R must be vectors of integers."
472 (let* ((i (1- n))
473 (iseq t))
474 (while (and iseq (natnump i))
475 (setq iseq (= (aref L i) (aref R i))
476 i (1- i)))
477 iseq))
478
479(defun wisent-nbits (i)
480 "Return number of bits set in integer I."
481 (let ((count 0))
482 (while (not (zerop i))
483 ;; i ^= (i & ((unsigned) (-(int) i)))
484 (setq i (logxor i (logand i (- i)))
485 count (1+ count)))
486 count))
487
488(defun wisent-bits-size (S n)
489 "In vector S count the total of bits set in first N elements.
490S must be a vector of integers."
491 (let* ((i (1- n))
492 (count 0))
493 (while (natnump i)
494 (setq count (+ count (wisent-nbits (aref S i)))
495 i (1- i)))
496 count))
497
498(defun wisent-useful-production (i N0)
499 "Return non-nil if production I is in useful set N0."
500 (let* ((useful t)
501 (r (aref rrhs i))
502 n)
503 (while (and useful (> (setq n (aref ritem r)) 0))
504 (if (wisent-ISVAR n)
505 (setq useful (wisent-BITISSET N0 (- n ntokens))))
506 (setq r (1+ r)))
507 useful))
508
509(defun wisent-useless-nonterminals ()
510 "Find out which nonterminals are used."
511 (let (Np Ns i n break)
512 ;; N is set as built. Np is set being built this iteration. P is
513 ;; set of all productions which have a RHS all in N.
514 (setq n (wisent-WORDSIZE nvars)
515 Np (make-vector n 0))
516
517 ;; The set being computed is a set of nonterminals which can
518 ;; derive the empty string or strings consisting of all
519 ;; terminals. At each iteration a nonterminal is added to the set
520 ;; if there is a production with that nonterminal as its LHS for
521 ;; which all the nonterminals in its RHS are already in the set.
522 ;; Iterate until the set being computed remains unchanged. Any
523 ;; nonterminals not in the set at that point are useless in that
524 ;; they will never be used in deriving a sentence of the language.
525
526 ;; This iteration doesn't use any special traversal over the
527 ;; productions. A set is kept of all productions for which all
528 ;; the nonterminals in the RHS are in useful. Only productions
529 ;; not in this set are scanned on each iteration. At the end,
530 ;; this set is saved to be used when finding useful productions:
531 ;; only productions in this set will appear in the final grammar.
532
533 (while (not break)
534 (setq i (1- n))
535 (while (natnump i)
536 ;; Np[i] = N[i]
537 (aset Np i (aref N i))
538 (setq i (1- i)))
539
540 (setq i 1)
541 (while (<= i nrules)
542 (if (not (wisent-BITISSET P i))
543 (when (wisent-useful-production i N)
544 (wisent-SETBIT Np (- (aref rlhs i) ntokens))
545 (wisent-SETBIT P i)))
546 (setq i (1+ i)))
547 (if (wisent-bits-equal N Np n)
548 (setq break t)
549 (setq Ns Np
550 Np N
551 N Ns)))
552 (setq N Np)))
553
554(defun wisent-inaccessable-symbols ()
555 "Find out which productions are reachable and which symbols are used."
556 ;; Starting with an empty set of productions and a set of symbols
557 ;; which only has the start symbol in it, iterate over all
558 ;; productions until the set of productions remains unchanged for an
559 ;; iteration. For each production which has a LHS in the set of
560 ;; reachable symbols, add the production to the set of reachable
561 ;; productions, and add all of the nonterminals in the RHS of the
562 ;; production to the set of reachable symbols.
563
564 ;; Consider only the (partially) reduced grammar which has only
565 ;; nonterminals in N and productions in P.
566
567 ;; The result is the set P of productions in the reduced grammar,
568 ;; and the set V of symbols in the reduced grammar.
569
570 ;; Although this algorithm also computes the set of terminals which
571 ;; are reachable, no terminal will be deleted from the grammar. Some
572 ;; terminals might not be in the grammar but might be generated by
573 ;; semantic routines, and so the user might want them available with
574 ;; specified numbers. (Is this true?) However, the non reachable
575 ;; terminals are printed (if running in verbose mode) so that the
576 ;; user can know.
577 (let (Vp Vs Pp i tt r n m break)
578 (setq n (wisent-WORDSIZE nsyms)
579 m (wisent-WORDSIZE (1+ nrules))
580 Vp (make-vector n 0)
581 Pp (make-vector m 0))
582
583 ;; If the start symbol isn't useful, then nothing will be useful.
584 (when (wisent-BITISSET N (- start-symbol ntokens))
585 (wisent-SETBIT V start-symbol)
586 (while (not break)
587 (setq i (1- n))
588 (while (natnump i)
589 (aset Vp i (aref V i))
590 (setq i (1- i)))
591 (setq i 1)
592 (while (<= i nrules)
593 (when (and (not (wisent-BITISSET Pp i))
594 (wisent-BITISSET P i)
595 (wisent-BITISSET V (aref rlhs i)))
596 (setq r (aref rrhs i))
597 (while (natnump (setq tt (aref ritem r)))
598 (if (or (wisent-ISTOKEN tt)
599 (wisent-BITISSET N (- tt ntokens)))
600 (wisent-SETBIT Vp tt))
601 (setq r (1+ r)))
602 (wisent-SETBIT Pp i))
603 (setq i (1+ i)))
604 (if (wisent-bits-equal V Vp n)
605 (setq break t)
606 (setq Vs Vp
607 Vp V
608 V Vs))))
609 (setq V Vp)
610
611 ;; Tokens 0, 1 are internal to Wisent. Consider them useful.
612 (wisent-SETBIT V 0) ;; end-of-input token
613 (wisent-SETBIT V 1) ;; error token
614 (setq P Pp)
615
616 (setq nuseless-productions (- nrules (wisent-bits-size P m))
617 nuseless-nonterminals nvars
618 i ntokens)
619 (while (< i nsyms)
620 (if (wisent-BITISSET V i)
621 (setq nuseless-nonterminals (1- nuseless-nonterminals)))
622 (setq i (1+ i)))
623
624 ;; A token that was used in %prec should not be warned about.
625 (setq i 1)
626 (while (<= i nrules)
627 (if (aref rprec i)
628 (wisent-SETBIT V1 (aref rprec i)))
629 (setq i (1+ i)))
630 ))
631
632(defun wisent-reduce-grammar-tables ()
633 "Disable useless productions."
634 (if (> nuseless-productions 0)
635 (let ((pn 1))
636 (while (<= pn nrules)
637 (aset ruseful pn (wisent-BITISSET P pn))
638 (setq pn (1+ pn))))))
639
640(defun wisent-nonterminals-reduce ()
641 "Remove useless nonterminals."
642 (let (i n r item nontermmap tags-sorted)
643 ;; Map the nonterminals to their new index: useful first, useless
644 ;; afterwards. Kept for later report.
645 (setq nontermmap (make-vector nvars 0)
646 n ntokens
647 i ntokens)
648 (while (< i nsyms)
649 (when (wisent-BITISSET V i)
650 (aset nontermmap (- i ntokens) n)
651 (setq n (1+ n)))
652 (setq i (1+ i)))
653 (setq i ntokens)
654 (while (< i nsyms)
655 (unless (wisent-BITISSET V i)
656 (aset nontermmap (- i ntokens) n)
657 (setq n (1+ n)))
658 (setq i (1+ i)))
659 ;; Shuffle elements of tables indexed by symbol number
660 (setq tags-sorted (make-vector nvars nil)
661 i ntokens)
662 (while (< i nsyms)
663 (setq n (aref nontermmap (- i ntokens)))
664 (aset tags-sorted (- n ntokens) (aref tags i))
665 (setq i (1+ i)))
666 (setq i ntokens)
667 (while (< i nsyms)
668 (aset tags i (aref tags-sorted (- i ntokens)))
669 (setq i (1+ i)))
670 ;; Replace all symbol numbers in valid data structures.
671 (setq i 1)
672 (while (<= i nrules)
673 (aset rlhs i (aref nontermmap (- (aref rlhs i) ntokens)))
674 (setq i (1+ i)))
675 (setq r 0)
676 (while (setq item (aref ritem r))
677 (if (wisent-ISVAR item)
678 (aset ritem r (aref nontermmap (- item ntokens))))
679 (setq r (1+ r)))
680 (setq start-symbol (aref nontermmap (- start-symbol ntokens))
681 nsyms (- nsyms nuseless-nonterminals)
682 nvars (- nvars nuseless-nonterminals))
683 ))
684
685(defun wisent-total-useless ()
686 "Report number of useless nonterminals and productions."
687 (let* ((src (wisent-source))
688 (src (if src (concat " in " src) ""))
689 (msg (format "Grammar%s contains" src)))
690 (if (> nuseless-nonterminals 0)
691 (setq msg (format "%s %d useless nonterminal%s"
692 msg nuseless-nonterminals
693 (if (> nuseless-nonterminals 0) "s" ""))))
694 (if (and (> nuseless-nonterminals 0) (> nuseless-productions 0))
695 (setq msg (format "%s and" msg)))
696 (if (> nuseless-productions 0)
697 (setq msg (format "%s %d useless rule%s"
698 msg nuseless-productions
699 (if (> nuseless-productions 0) "s" ""))))
700 (message msg)))
701
702(defun wisent-reduce-grammar ()
703 "Find unreachable terminals, nonterminals and productions."
704 ;; Allocate the global sets used to compute the reduced grammar
705 (setq N (make-vector (wisent-WORDSIZE nvars) 0)
706 P (make-vector (wisent-WORDSIZE (1+ nrules)) 0)
707 V (make-vector (wisent-WORDSIZE nsyms) 0)
708 V1 (make-vector (wisent-WORDSIZE nsyms) 0)
709 nuseless-nonterminals 0
710 nuseless-productions 0)
711
712 (wisent-useless-nonterminals)
713 (wisent-inaccessable-symbols)
714
715 (when (> (+ nuseless-nonterminals nuseless-productions) 0)
716 (wisent-total-useless)
717 (or (wisent-BITISSET N (- start-symbol ntokens))
718 (error "Start symbol `%s' does not derive any sentence"
719 (wisent-tag start-symbol)))
720 (wisent-reduce-grammar-tables)
721 (if (> nuseless-nonterminals 0)
722 (wisent-nonterminals-reduce))))
723
724(defun wisent-print-useless ()
725 "Output the detailed results of the reductions."
726 (let (i b r)
727 (when (> nuseless-nonterminals 0)
728 ;; Useless nonterminals have been moved after useful ones.
729 (wisent-log "\n\nUseless nonterminals:\n\n")
730 (setq i 0)
731 (while (< i nuseless-nonterminals)
732 (wisent-log " %s\n" (wisent-tag (+ nsyms i)))
733 (setq i (1+ i))))
734 (setq b nil
735 i 0)
736 (while (< i ntokens)
737 (unless (or (wisent-BITISSET V i) (wisent-BITISSET V1 i))
738 (or b
739 (wisent-log "\n\nTerminals which are not used:\n\n"))
740 (setq b t)
741 (wisent-log " %s\n" (wisent-tag i)))
742 (setq i (1+ i)))
743 (when (> nuseless-productions 0)
744 (wisent-log "\n\nUseless rules:\n\n")
745 (setq i 1)
746 (while (<= i nrules)
747 (unless (aref ruseful i)
748 (wisent-log "#%s " (wisent-pad-string (format "%d" i) 4))
749 (wisent-log "%s:" (wisent-tag (aref rlhs i)))
750 (setq r (aref rrhs i))
751 (while (natnump (aref ritem r))
752 (wisent-log " %s" (wisent-tag (aref ritem r)))
753 (setq r (1+ r)))
754 (wisent-log ";\n"))
755 (setq i (1+ i))))
756 (if (or b (> nuseless-nonterminals 0) (> nuseless-productions 0))
757 (wisent-log "\n\n"))
758 ))
759\f
760;;;; -----------------------------
761;;;; Match rules with nonterminals
762;;;; -----------------------------
763
764(defun wisent-set-derives ()
765 "Find, for each variable (nonterminal), which rules can derive it.
766It sets up the value of DERIVES so that DERIVES[i - NTOKENS] points to
767a list of rule numbers, terminated with -1."
768 (let (i lhs p q dset delts)
769 (setq dset (make-vector nvars nil)
770 delts (make-vector (1+ nrules) 0))
771 (setq p 0 ;; p = delts
772 i nrules)
773 (while (> i 0)
774 (when (aref ruseful i)
775 (setq lhs (aref rlhs i))
776 ;; p->next = dset[lhs];
777 ;; p->value = i;
778 (aset delts p (cons i (aref dset (- lhs ntokens)))) ;; (value . next)
779 (aset dset (- lhs ntokens) p) ;; dset[lhs] = p
780 (setq p (1+ p)) ;; p++
781 )
782 (setq i (1- i)))
783
784 (setq derives (make-vector nvars nil)
785 i ntokens)
786
787 (while (< i nsyms)
788 (setq q nil
789 p (aref dset (- i ntokens))) ;; p = dset[i]
790
791 (while p
792 (setq p (aref delts p)
793 q (cons (car p) q) ;;q++ = p->value
794 p (cdr p))) ;; p = p->next
795 (setq q (nreverse (cons -1 q))) ;; *q++ = -1
796 (aset derives (- i ntokens) q) ;; derives[i] = q
797 (setq i (1+ i)))
798 ))
799\f
800;;;; --------------------------------------------------------
801;;;; Find which nonterminals can expand into the null string.
802;;;; --------------------------------------------------------
803
804(defun wisent-print-nullable ()
805 "Print NULLABLE."
806 (let (i)
807 (wisent-log "NULLABLE\n")
808 (setq i ntokens)
809 (while (< i nsyms)
810 (wisent-log "\t%s: %s\n" (wisent-tag i)
811 (if (aref nullable (- i ntokens))
812 "yes" : "no"))
813 (setq i (1+ i)))
814 (wisent-log "\n\n")))
815
816(defun wisent-set-nullable ()
817 "Set up NULLABLE.
818A vector saying which nonterminals can expand into the null string.
819NULLABLE[i - NTOKENS] is nil if symbol I can do so."
820 (let (ruleno s1 s2 p r squeue rcount rsets relts item any-tokens)
821 (setq squeue (make-vector nvars 0)
822 rcount (make-vector (1+ nrules) 0)
823 rsets (make-vector nvars nil) ;; - ntokens
824 relts (make-vector (+ nitems nvars 1) nil)
825 nullable (make-vector nvars nil)) ;; - ntokens
826 (setq s1 0 s2 0 ;; s1 = s2 = squeue
827 p 0 ;; p = relts
828 ruleno 1)
829 (while (<= ruleno nrules)
830 (when (aref ruseful ruleno)
831 (if (> (aref ritem (aref rrhs ruleno)) 0)
832 (progn
833 ;; This rule has a non empty RHS.
834 (setq any-tokens nil
835 r (aref rrhs ruleno))
836 (while (> (aref ritem r) 0)
837 (if (wisent-ISTOKEN (aref ritem r))
838 (setq any-tokens t))
839 (setq r (1+ r)))
840
841 ;; This rule has only nonterminals: schedule it for the
842 ;; second pass.
843 (unless any-tokens
844 (setq r (aref rrhs ruleno))
845 (while (> (setq item (aref ritem r)) 0)
846 (aset rcount ruleno (1+ (aref rcount ruleno)))
847 ;; p->next = rsets[item];
848 ;; p->value = ruleno;
849 (aset relts p (cons ruleno (aref rsets (- item ntokens))))
850 ;; rsets[item] = p;
851 (aset rsets (- item ntokens) p)
852 (setq p (1+ p)
853 r (1+ r)))))
854 ;; This rule has an empty RHS.
855 ;; assert (ritem[rrhs[ruleno]] == -ruleno)
856 (when (and (aref ruseful ruleno)
857 (setq item (aref rlhs ruleno))
858 (not (aref nullable (- item ntokens))))
859 (aset nullable (- item ntokens) t)
860 (aset squeue s2 item)
861 (setq s2 (1+ s2)))
862 )
863 )
864 (setq ruleno (1+ ruleno)))
865
866 (while (< s1 s2)
867 ;; p = rsets[*s1++]
868 (setq p (aref rsets (- (aref squeue s1) ntokens))
869 s1 (1+ s1))
870 (while p
871 (setq p (aref relts p)
872 ruleno (car p)
873 p (cdr p)) ;; p = p->next
874 ;; if (--rcount[ruleno] == 0)
875 (when (zerop (aset rcount ruleno (1- (aref rcount ruleno))))
876 (setq item (aref rlhs ruleno))
877 (aset nullable (- item ntokens) t)
878 (aset squeue s2 item)
879 (setq s2 (1+ s2)))))
880
881 (if wisent-debug-flag
882 (wisent-print-nullable))
883 ))
884\f
885;;;; -----------
886;;;; Subroutines
887;;;; -----------
888
889(defun wisent-print-fderives ()
890 "Print FDERIVES."
891 (let (i j rp)
892 (wisent-log "\n\n\nFDERIVES\n")
893 (setq i ntokens)
894 (while (< i nsyms)
895 (wisent-log "\n\n%s derives\n\n" (wisent-tag i))
896 (setq rp (aref fderives (- i ntokens))
897 j 0)
898 (while (<= j nrules)
899 (if (wisent-BITISSET rp j)
900 (wisent-log " %d\n" j))
901 (setq j (1+ j)))
902 (setq i (1+ i)))))
903
904(defun wisent-set-fderives ()
905 "Set up FDERIVES.
906An NVARS by NRULES matrix of bits indicating which rules can help
907derive the beginning of the data for each nonterminal. For example,
908if symbol 5 can be derived as the sequence of symbols 8 3 20, and one
909of the rules for deriving symbol 8 is rule 4, then the
910\[5 - NTOKENS, 4] bit in FDERIVES is set."
911 (let (i j k)
912 (setq fderives (make-vector nvars nil))
913 (setq i 0)
914 (while (< i nvars)
915 (aset fderives i (make-vector rulesetsize 0))
916 (setq i (1+ i)))
917
918 (wisent-set-firsts)
919
920 (setq i ntokens)
921 (while (< i nsyms)
922 (setq j ntokens)
923 (while (< j nsyms)
924 ;; if (BITISSET (FIRSTS (i), j - ntokens))
925 (when (wisent-BITISSET (aref firsts (- i ntokens)) (- j ntokens))
926 (setq k (aref derives (- j ntokens)))
927 (while (> (car k) 0) ;; derives[j][k] > 0
928 ;; SETBIT (FDERIVES (i), derives[j][k]);
929 (wisent-SETBIT (aref fderives (- i ntokens)) (car k))
930 (setq k (cdr k))))
931 (setq j (1+ j)))
932 (setq i (1+ i)))
933
934 (if wisent-debug-flag
935 (wisent-print-fderives))
936 ))
937
938(defun wisent-print-firsts ()
939 "Print FIRSTS."
940 (let (i j v)
941 (wisent-log "\n\n\nFIRSTS\n\n")
942 (setq i ntokens)
943 (while (< i nsyms)
944 (wisent-log "\n\n%s firsts\n\n" (wisent-tag i))
945 (setq v (aref firsts (- i ntokens))
946 j 0)
947 (while (< j nvars)
948 (if (wisent-BITISSET v j)
949 (wisent-log "\t\t%d (%s)\n"
950 (+ j ntokens) (wisent-tag (+ j ntokens))))
951 (setq j (1+ j)))
952 (setq i (1+ i)))))
953
954(defun wisent-TC (R n)
955 "Transitive closure.
956Given R an N by N matrix of bits, modify its contents to be the
957transitive closure of what was given."
958 (let (i j k)
959 ;; R (J, I) && R (I, K) => R (J, K).
960 ;; I *must* be the outer loop.
961 (setq i 0)
962 (while (< i n)
963 (setq j 0)
964 (while (< j n)
965 (when (wisent-BITISSET (aref R j) i)
966 (setq k 0)
967 (while (< k n)
968 (if (wisent-BITISSET (aref R i) k)
969 (wisent-SETBIT (aref R j) k))
970 (setq k (1+ k))))
971 (setq j (1+ j)))
972 (setq i (1+ i)))))
973
974(defun wisent-RTC (R n)
975 "Reflexive Transitive Closure.
976Same as `wisent-TC' and then set all the bits on the diagonal of R, an
977N by N matrix of bits."
978 (let (i)
979 (wisent-TC R n)
980 (setq i 0)
981 (while (< i n)
982 (wisent-SETBIT (aref R i) i)
983 (setq i (1+ i)))))
984
985(defun wisent-set-firsts ()
986 "Set up FIRSTS.
987An NVARS by NVARS bit matrix indicating which items can represent the
988beginning of the input corresponding to which other items. For
989example, if some rule expands symbol 5 into the sequence of symbols 8
9903 20, the symbol 8 can be the beginning of the data for symbol 5, so
991the bit [8 - NTOKENS, 5 - NTOKENS] in FIRSTS is set."
992 (let (row symbol sp rowsize i)
993 (setq rowsize (wisent-WORDSIZE nvars)
994 varsetsize rowsize
995 firsts (make-vector nvars nil)
996 i 0)
997 (while (< i nvars)
998 (aset firsts i (make-vector rowsize 0))
999 (setq i (1+ i)))
1000
1001 (setq row 0 ;; row = firsts
1002 i ntokens)
1003 (while (< i nsyms)
1004 (setq sp (aref derives (- i ntokens)))
1005 (while (>= (car sp) 0)
1006 (setq symbol (aref ritem (aref rrhs (car sp)))
1007 sp (cdr sp))
1008 (when (wisent-ISVAR symbol)
1009 (setq symbol (- symbol ntokens))
1010 (wisent-SETBIT (aref firsts row) symbol)
1011 ))
1012 (setq row (1+ row)
1013 i (1+ i)))
1014
1015 (wisent-RTC firsts nvars)
1016
1017 (if wisent-debug-flag
1018 (wisent-print-firsts))
1019 ))
1020
1021(defun wisent-initialize-closure (n)
1022 "Allocate the ITEMSET and RULESET vectors.
1023And precompute useful data so that `wisent-closure' can be called.
1024N is the number of elements to allocate for ITEMSET."
1025 (setq itemset (make-vector n 0)
1026 rulesetsize (wisent-WORDSIZE (1+ nrules))
1027 ruleset (make-vector rulesetsize 0))
1028
1029 (wisent-set-fderives))
1030
1031(defun wisent-print-closure ()
1032 "Print ITEMSET."
1033 (let (i)
1034 (wisent-log "\n\nclosure n = %d\n\n" nitemset)
1035 (setq i 0) ;; isp = itemset
1036 (while (< i nitemset)
1037 (wisent-log " %d\n" (aref itemset i))
1038 (setq i (1+ i)))))
1039
1040(defun wisent-closure (core n)
1041 "Set up RULESET and ITEMSET for the transitions out of CORE state.
1042Given a vector of item numbers items, of length N, set up RULESET and
1043ITEMSET to indicate what rules could be run and which items could be
1044accepted when those items are the active ones.
1045
1046RULESET contains a bit for each rule. `wisent-closure' sets the bits
1047for all rules which could potentially describe the next input to be
1048read.
1049
1050ITEMSET is a vector of item numbers; NITEMSET is the number of items
1051in ITEMSET. `wisent-closure' places there the indices of all items
1052which represent units of input that could arrive next."
1053 (let (c r v symbol ruleno itemno)
1054 (if (zerop n)
1055 (progn
1056 (setq r 0
1057 v (aref fderives (- start-symbol ntokens)))
1058 (while (< r rulesetsize)
1059 ;; ruleset[r] = FDERIVES (start-symbol)[r];
1060 (aset ruleset r (aref v r))
1061 (setq r (1+ r)))
1062 )
1063 (fillarray ruleset 0)
1064 (setq c 0)
1065 (while (< c n)
1066 (setq symbol (aref ritem (aref core c)))
1067 (when (wisent-ISVAR symbol)
1068 (setq r 0
1069 v (aref fderives (- symbol ntokens)))
1070 (while (< r rulesetsize)
1071 ;; ruleset[r] |= FDERIVES (ritem[core[c]])[r];
1072 (aset ruleset r (logior (aref ruleset r) (aref v r)))
1073 (setq r (1+ r))))
1074 (setq c (1+ c)))
1075 )
1076 (setq nitemset 0
1077 c 0
1078 ruleno 0
1079 r (* rulesetsize wisent-BITS-PER-WORD))
1080 (while (< ruleno r)
1081 (when (wisent-BITISSET ruleset ruleno)
1082 (setq itemno (aref rrhs ruleno))
1083 (while (and (< c n) (< (aref core c) itemno))
1084 (aset itemset nitemset (aref core c))
1085 (setq nitemset (1+ nitemset)
1086 c (1+ c)))
1087 (aset itemset nitemset itemno)
1088 (setq nitemset (1+ nitemset)))
1089 (setq ruleno (1+ ruleno)))
1090
1091 (while (< c n)
1092 (aset itemset nitemset (aref core c))
1093 (setq nitemset (1+ nitemset)
1094 c (1+ c)))
1095
1096 (if wisent-debug-flag
1097 (wisent-print-closure))
1098 ))
1099\f
1100;;;; --------------------------------------------------
1101;;;; Generate the nondeterministic finite state machine
1102;;;; --------------------------------------------------
1103
1104(defun wisent-allocate-itemsets ()
1105 "Allocate storage for itemsets."
1106 (let (symbol i count symbol-count)
1107 ;; Count the number of occurrences of all the symbols in RITEMS.
1108 ;; Note that useless productions (hence useless nonterminals) are
1109 ;; browsed too, hence we need to allocate room for _all_ the
1110 ;; symbols.
1111 (setq count 0
1112 symbol-count (make-vector (+ nsyms nuseless-nonterminals) 0)
1113 i 0)
1114 (while (setq symbol (aref ritem i))
1115 (when (> symbol 0)
1116 (setq count (1+ count))
1117 (aset symbol-count symbol (1+ (aref symbol-count symbol))))
1118 (setq i (1+ i)))
1119 ;; See comments before `wisent-new-itemsets'. All the vectors of
1120 ;; items live inside kernel-items. The number of active items
1121 ;; after some symbol cannot be more than the number of times that
1122 ;; symbol appears as an item, which is symbol-count[symbol]. We
1123 ;; allocate that much space for each symbol.
1124 (setq kernel-base (make-vector nsyms nil)
1125 kernel-items (make-vector count 0)
1126 count 0
1127 i 0)
1128 (while (< i nsyms)
1129 (aset kernel-base i count)
1130 (setq count (+ count (aref symbol-count i))
1131 i (1+ i)))
1132 (setq shift-symbol symbol-count
1133 kernel-end (make-vector nsyms nil))
1134 ))
1135
1136(defun wisent-allocate-storage ()
1137 "Allocate storage for the state machine."
1138 (wisent-allocate-itemsets)
1139 (setq shiftset (make-vector nsyms 0)
1140 redset (make-vector (1+ nrules) 0)
1141 state-table (make-vector wisent-state-table-size nil)))
1142
1143(defun wisent-new-itemsets ()
1144 "Find which symbols can be shifted in the current state.
1145And for each one record which items would be active after that shift.
1146Uses the contents of ITEMSET. SHIFT-SYMBOL is set to a vector of the
1147symbols that can be shifted. For each symbol in the grammar,
1148KERNEL-BASE[symbol] points to a vector of item numbers activated if
1149that symbol is shifted, and KERNEL-END[symbol] points after the end of
1150that vector."
1151 (let (i shiftcount isp ksp symbol)
1152 (fillarray kernel-end nil)
1153 (setq shiftcount 0
1154 isp 0)
1155 (while (< isp nitemset)
1156 (setq i (aref itemset isp)
1157 isp (1+ isp)
1158 symbol (aref ritem i))
1159 (when (> symbol 0)
1160 (setq ksp (aref kernel-end symbol))
1161 (when (not ksp)
1162 ;; shift-symbol[shiftcount++] = symbol;
1163 (aset shift-symbol shiftcount symbol)
1164 (setq shiftcount (1+ shiftcount)
1165 ksp (aref kernel-base symbol)))
1166 ;; *ksp++ = i + 1;
1167 (aset kernel-items ksp (1+ i))
1168 (setq ksp (1+ ksp))
1169 (aset kernel-end symbol ksp)))
1170 (setq nshifts shiftcount)))
1171
1172(defun wisent-new-state (symbol)
1173 "Create a new state for those items, if necessary.
1174SYMBOL is the core accessing-symbol.
1175Subroutine of `wisent-get-state'."
1176 (let (n p isp1 isp2 iend items)
1177 (setq isp1 (aref kernel-base symbol)
1178 iend (aref kernel-end symbol)
1179 n (- iend isp1)
1180 p (make-core)
1181 items (make-vector n 0))
1182 (set-core-accessing-symbol p symbol)
1183 (set-core-number p nstates)
1184 (set-core-nitems p n)
1185 (set-core-items p items)
1186 (setq isp2 0) ;; isp2 = p->items
1187 (while (< isp1 iend)
1188 ;; *isp2++ = *isp1++;
1189 (aset items isp2 (aref kernel-items isp1))
1190 (setq isp1 (1+ isp1)
1191 isp2 (1+ isp2)))
1192 (set-core-next last-state p)
1193 (setq last-state p
1194 nstates (1+ nstates))
1195 p))
1196
1197(defun wisent-get-state (symbol)
1198 "Find the state we would get to by shifting SYMBOL.
1199Return the state number for the state we would get to (from the
1200current state) by shifting SYMBOL. Create a new state if no
1201equivalent one exists already. Used by `wisent-append-states'."
1202 (let (key isp1 isp2 iend sp sp2 found n)
1203 (setq isp1 (aref kernel-base symbol)
1204 iend (aref kernel-end symbol)
1205 n (- iend isp1)
1206 key 0)
1207 ;; Add up the target state's active item numbers to get a hash key
1208 (while (< isp1 iend)
1209 (setq key (+ key (aref kernel-items isp1))
1210 isp1 (1+ isp1)))
1211 (setq key (% key wisent-state-table-size)
1212 sp (aref state-table key))
1213 (if sp
1214 (progn
1215 (setq found nil)
1216 (while (not found)
1217 (when (= (core-nitems sp) n)
1218 (setq found t
1219 isp1 (aref kernel-base symbol)
1220 ;; isp2 = sp->items;
1221 sp2 (core-items sp)
1222 isp2 0)
1223
1224 (while (and found (< isp1 iend))
1225 ;; if (*isp1++ != *isp2++)
1226 (if (not (= (aref kernel-items isp1)
1227 (aref sp2 isp2)))
1228 (setq found nil))
1229 (setq isp1 (1+ isp1)
1230 isp2 (1+ isp2))))
1231 (if (not found)
1232 (if (core-link sp)
1233 (setq sp (core-link sp))
1234 ;; sp = sp->link = new-state(symbol)
1235 (setq sp (set-core-link sp (wisent-new-state symbol))
1236 found t)))))
1237 ;; bucket is empty
1238 ;; state-table[key] = sp = new-state(symbol)
1239 (setq sp (wisent-new-state symbol))
1240 (aset state-table key sp))
1241 ;; return (sp->number);
1242 (core-number sp)))
1243
1244(defun wisent-append-states ()
1245 "Find or create the core structures for states.
1246Use the information computed by `wisent-new-itemsets' to find the
1247state numbers reached by each shift transition from the current state.
1248SHIFTSET is set up as a vector of state numbers of those states."
1249 (let (i j symbol)
1250 ;; First sort shift-symbol into increasing order
1251 (setq i 1)
1252 (while (< i nshifts)
1253 (setq symbol (aref shift-symbol i)
1254 j i)
1255 (while (and (> j 0) (> (aref shift-symbol (1- j)) symbol))
1256 (aset shift-symbol j (aref shift-symbol (1- j)))
1257 (setq j (1- j)))
1258 (aset shift-symbol j symbol)
1259 (setq i (1+ i)))
1260 (setq i 0)
1261 (while (< i nshifts)
1262 (setq symbol (aref shift-symbol i))
1263 (aset shiftset i (wisent-get-state symbol))
1264 (setq i (1+ i)))
1265 ))
1266
1267(defun wisent-initialize-states ()
1268 "Initialize states."
1269 (let ((p (make-core)))
1270 (setq first-state p
1271 last-state p
1272 this-state p
1273 nstates 1)))
1274
1275(defun wisent-save-shifts ()
1276 "Save the NSHIFTS of SHIFTSET into the current linked list."
1277 (let (p i shifts)
1278 (setq p (make-shifts)
1279 shifts (make-vector nshifts 0)
1280 i 0)
1281 (set-shifts-number p (core-number this-state))
1282 (set-shifts-nshifts p nshifts)
1283 (set-shifts-shifts p shifts)
1284 (while (< i nshifts)
1285 ;; (p->shifts)[i] = shiftset[i];
1286 (aset shifts i (aref shiftset i))
1287 (setq i (1+ i)))
1288
1289 (if last-shift
1290 (set-shifts-next last-shift p)
1291 (setq first-shift p))
1292 (setq last-shift p)))
1293
1294(defun wisent-insert-start-shift ()
1295 "Create the next-to-final state.
1296That is the state to which a shift has already been made in the
1297initial state. Subroutine of `wisent-augment-automaton'."
1298 (let (statep sp)
1299 (setq statep (make-core))
1300 (set-core-number statep nstates)
1301 (set-core-accessing-symbol statep start-symbol)
1302 (set-core-next last-state statep)
1303 (setq last-state statep)
1304 ;; Make a shift from this state to (what will be) the final state.
1305 (setq sp (make-shifts))
1306 (set-shifts-number sp nstates)
1307 (setq nstates (1+ nstates))
1308 (set-shifts-nshifts sp 1)
1309 (set-shifts-shifts sp (vector nstates))
1310 (set-shifts-next last-shift sp)
1311 (setq last-shift sp)))
1312
1313(defun wisent-augment-automaton ()
1314 "Set up initial and final states as parser wants them.
1315Make sure that the initial state has a shift that accepts the
1316grammar's start symbol and goes to the next-to-final state, which has
1317a shift going to the final state, which has a shift to the termination
1318state. Create such states and shifts if they don't happen to exist
1319already."
1320 (let (i k statep sp sp2 sp1 shifts)
1321 (setq sp first-shift)
1322 (if sp
1323 (progn
1324 (if (zerop (shifts-number sp))
1325 (progn
1326 (setq k (shifts-nshifts sp)
1327 statep (core-next first-state))
1328 ;; The states reached by shifts from first-state are
1329 ;; numbered 1...K. Look for one reached by
1330 ;; START-SYMBOL.
1331 (while (and (< (core-accessing-symbol statep) start-symbol)
1332 (< (core-number statep) k))
1333 (setq statep (core-next statep)))
1334 (if (= (core-accessing-symbol statep) start-symbol)
1335 (progn
1336 ;; We already have a next-to-final state. Make
1337 ;; sure it has a shift to what will be the final
1338 ;; state.
1339 (setq k (core-number statep))
1340 (while (and sp (< (shifts-number sp) k))
1341 (setq sp1 sp
1342 sp (shifts-next sp)))
1343 (if (and sp (= (shifts-number sp) k))
1344 (progn
1345 (setq i (shifts-nshifts sp)
1346 sp2 (make-shifts)
1347 shifts (make-vector (1+ i) 0))
1348 (set-shifts-number sp2 k)
1349 (set-shifts-nshifts sp2 (1+ i))
1350 (set-shifts-shifts sp2 shifts)
1351 (aset shifts 0 nstates)
1352 (while (> i 0)
1353 ;; sp2->shifts[i] = sp->shifts[i - 1];
1354 (aset shifts i (aref (shifts-shifts sp) (1- i)))
1355 (setq i (1- i)))
1356 ;; Patch sp2 into the chain of shifts in
1357 ;; place of sp, following sp1.
1358 (set-shifts-next sp2 (shifts-next sp))
1359 (set-shifts-next sp1 sp2)
1360 (if (eq sp last-shift)
1361 (setq last-shift sp2))
1362 )
1363 (setq sp2 (make-shifts))
1364 (set-shifts-number sp2 k)
1365 (set-shifts-nshifts sp2 1)
1366 (set-shifts-shifts sp2 (vector nstates))
1367 ;; Patch sp2 into the chain of shifts between
1368 ;; sp1 and sp.
1369 (set-shifts-next sp2 sp)
1370 (set-shifts-next sp1 sp2)
1371 (if (not sp)
1372 (setq last-shift sp2))
1373 )
1374 )
1375 ;; There is no next-to-final state as yet.
1376 ;; Add one more shift in FIRST-SHIFT, going to the
1377 ;; next-to-final state (yet to be made).
1378 (setq sp first-shift
1379 sp2 (make-shifts)
1380 i (shifts-nshifts sp)
1381 shifts (make-vector (1+ i) 0))
1382 (set-shifts-nshifts sp2 (1+ i))
1383 (set-shifts-shifts sp2 shifts)
1384 ;; Stick this shift into the vector at the proper place.
1385 (setq statep (core-next first-state)
1386 k 0
1387 i 0)
1388 (while (< i (shifts-nshifts sp))
1389 (when (and (> (core-accessing-symbol statep) start-symbol)
1390 (= i k))
1391 (aset shifts k nstates)
1392 (setq k (1+ k)))
1393 (aset shifts k (aref (shifts-shifts sp) i))
1394 (setq statep (core-next statep))
1395 (setq i (1+ i)
1396 k (1+ k)))
1397 (when (= i k)
1398 (aset shifts k nstates)
1399 (setq k (1+ k)))
1400 ;; Patch sp2 into the chain of shifts in place of
1401 ;; sp, at the beginning.
1402 (set-shifts-next sp2 (shifts-next sp))
1403 (setq first-shift sp2)
1404 (if (eq last-shift sp)
1405 (setq last-shift sp2))
1406 ;; Create the next-to-final state, with shift to
1407 ;; what will be the final state.
1408 (wisent-insert-start-shift)))
1409 ;; The initial state didn't even have any shifts. Give it
1410 ;; one shift, to the next-to-final state.
1411 (setq sp (make-shifts))
1412 (set-shifts-nshifts sp 1)
1413 (set-shifts-shifts sp (vector nstates))
1414 ;; Patch sp into the chain of shifts at the beginning.
1415 (set-shifts-next sp first-shift)
1416 (setq first-shift sp)
1417 ;; Create the next-to-final state, with shift to what will
1418 ;; be the final state.
1419 (wisent-insert-start-shift)))
1420 ;; There are no shifts for any state. Make one shift, from the
1421 ;; initial state to the next-to-final state.
1422 (setq sp (make-shifts))
1423 (set-shifts-nshifts sp 1)
1424 (set-shifts-shifts sp (vector nstates))
1425 ;; Initialize the chain of shifts with sp.
1426 (setq first-shift sp
1427 last-shift sp)
1428 ;; Create the next-to-final state, with shift to what will be
1429 ;; the final state.
1430 (wisent-insert-start-shift))
1431 ;; Make the final state--the one that follows a shift from the
1432 ;; next-to-final state. The symbol for that shift is 0
1433 ;; (end-of-file).
1434 (setq statep (make-core))
1435 (set-core-number statep nstates)
1436 (set-core-next last-state statep)
1437 (setq last-state statep)
1438 ;; Make the shift from the final state to the termination state.
1439 (setq sp (make-shifts))
1440 (set-shifts-number sp nstates)
1441 (setq nstates (1+ nstates))
1442 (set-shifts-nshifts sp 1)
1443 (set-shifts-shifts sp (vector nstates))
1444 (set-shifts-next last-shift sp)
1445 (setq last-shift sp)
1446 ;; Note that the variable FINAL-STATE refers to what we sometimes
1447 ;; call the termination state.
1448 (setq final-state nstates)
1449 ;; Make the termination state.
1450 (setq statep (make-core))
1451 (set-core-number statep nstates)
1452 (setq nstates (1+ nstates))
1453 (set-core-next last-state statep)
1454 (setq last-state statep)))
1455
1456(defun wisent-save-reductions ()
1457 "Make a reductions structure.
1458Find which rules can be used for reduction transitions from the
1459current state and make a reductions structure for the state to record
1460their rule numbers."
1461 (let (i item count p rules)
1462 ;; Find and count the active items that represent ends of rules.
1463 (setq count 0
1464 i 0)
1465 (while (< i nitemset)
1466 (setq item (aref ritem (aref itemset i)))
1467 (when (< item 0)
1468 (aset redset count (- item))
1469 (setq count (1+ count)))
1470 (setq i (1+ i)))
1471 ;; Make a reductions structure and copy the data into it.
1472 (when (> count 0)
1473 (setq p (make-reductions)
1474 rules (make-vector count 0))
1475 (set-reductions-number p (core-number this-state))
1476 (set-reductions-nreds p count)
1477 (set-reductions-rules p rules)
1478 (setq i 0)
1479 (while (< i count)
1480 ;; (p->rules)[i] = redset[i]
1481 (aset rules i (aref redset i))
1482 (setq i (1+ i)))
1483 (if last-reduction
1484 (set-reductions-next last-reduction p)
1485 (setq first-reduction p))
1486 (setq last-reduction p))))
1487
1488(defun wisent-generate-states ()
1489 "Compute the nondeterministic finite state machine from the grammar."
1490 (wisent-allocate-storage)
1491 (wisent-initialize-closure nitems)
1492 (wisent-initialize-states)
1493 (while this-state
1494 ;; Set up RULESET and ITEMSET for the transitions out of this
1495 ;; state. RULESET gets a 1 bit for each rule that could reduce
1496 ;; now. ITEMSET gets a vector of all the items that could be
1497 ;; accepted next.
1498 (wisent-closure (core-items this-state) (core-nitems this-state))
1499 ;; Record the reductions allowed out of this state.
1500 (wisent-save-reductions)
1501 ;; Find the itemsets of the states that shifts can reach.
1502 (wisent-new-itemsets)
1503 ;; Find or create the core structures for those states.
1504 (wisent-append-states)
1505 ;; Create the shifts structures for the shifts to those states,
1506 ;; now that the state numbers transitioning to are known.
1507 (if (> nshifts 0)
1508 (wisent-save-shifts))
1509 ;; States are queued when they are created; process them all.
1510 (setq this-state (core-next this-state)))
1511 ;; Set up initial and final states as parser wants them.
1512 (wisent-augment-automaton))
1513\f
1514;;;; ---------------------------
1515;;;; Compute look-ahead criteria
1516;;;; ---------------------------
1517
1518;; Compute how to make the finite state machine deterministic; find
1519;; which rules need lookahead in each state, and which lookahead
1520;; tokens they accept.
1521
1522;; `wisent-lalr', the entry point, builds these data structures:
1523
1524;; GOTO-MAP, FROM-STATE and TO-STATE record each shift transition
1525;; which accepts a variable (a nonterminal). NGOTOS is the number of
1526;; such transitions.
1527;; FROM-STATE[t] is the state number which a transition leads from and
1528;; TO-STATE[t] is the state number it leads to.
1529;; All the transitions that accept a particular variable are grouped
1530;; together and GOTO-MAP[i - NTOKENS] is the index in FROM-STATE and
1531;; TO-STATE of the first of them.
1532
1533;; CONSISTENT[s] is non-nil if no lookahead is needed to decide what
1534;; to do in state s.
1535
1536;; LARULENO is a vector which records the rules that need lookahead in
1537;; various states. The elements of LARULENO that apply to state s are
1538;; those from LOOKAHEADS[s] through LOOKAHEADS[s+1]-1. Each element
1539;; of LARULENO is a rule number.
1540
1541;; If LR is the length of LARULENO, then a number from 0 to LR-1 can
1542;; specify both a rule and a state where the rule might be applied.
1543;; LA is a LR by NTOKENS matrix of bits.
1544;; LA[l, i] is 1 if the rule LARULENO[l] is applicable in the
1545;; appropriate state when the next token is symbol i.
1546;; If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
1547
1548(wisent-defcontext digraph
1549 INDEX R VERTICES
1550 infinity top)
1551
1552(defun wisent-traverse (i)
1553 "Traverse I."
1554 (let (j k height Ri Fi break)
1555 (setq top (1+ top)
1556 height top)
1557 (aset VERTICES top i) ;; VERTICES[++top] = i
1558 (aset INDEX i top) ;; INDEX[i] = height = top
1559
1560 (setq Ri (aref R i))
1561 (when Ri
1562 (setq j 0)
1563 (while (>= (aref Ri j) 0)
1564 (if (zerop (aref INDEX (aref Ri j)))
1565 (wisent-traverse (aref Ri j)))
1566 ;; if (INDEX[i] > INDEX[R[i][j]])
1567 (if (> (aref INDEX i) (aref INDEX (aref Ri j)))
1568 ;; INDEX[i] = INDEX[R[i][j]];
1569 (aset INDEX i (aref INDEX (aref Ri j))))
1570 (setq Fi (aref F i)
1571 k 0)
1572 (while (< k tokensetsize)
1573 ;; F (i)[k] |= F (R[i][j])[k];
1574 (aset Fi k (logior (aref Fi k)
1575 (aref (aref F (aref Ri j)) k)))
1576 (setq k (1+ k)))
1577 (setq j (1+ j))))
1578
1579 (when (= (aref INDEX i) height)
1580 (setq break nil)
1581 (while (not break)
1582 (setq j (aref VERTICES top) ;; j = VERTICES[top--]
1583 top (1- top))
1584 (aset INDEX j infinity)
1585 (if (= i j)
1586 (setq break t)
1587 (setq k 0)
1588 (while (< k tokensetsize)
1589 ;; F (j)[k] = F (i)[k];
1590 (aset (aref F j) k (aref (aref F i) k))
1591 (setq k (1+ k))))))
1592 ))
1593
1594(defun wisent-digraph (relation)
1595 "Digraph RELATION."
1596 (wisent-with-context digraph
1597 (setq infinity (+ ngotos 2)
1598 INDEX (make-vector (1+ ngotos) 0)
1599 VERTICES (make-vector (1+ ngotos) 0)
1600 top 0
1601 R relation)
1602 (let ((i 0))
1603 (while (< i ngotos)
1604 (if (and (= (aref INDEX i) 0) (aref R i))
1605 (wisent-traverse i))
1606 (setq i (1+ i))))))
1607
1608(defun wisent-set-state-table ()
1609 "Build state table."
1610 (let (sp)
1611 (setq state-table (make-vector nstates nil)
1612 sp first-state)
1613 (while sp
1614 (aset state-table (core-number sp) sp)
1615 (setq sp (core-next sp)))))
1616
1617(defun wisent-set-accessing-symbol ()
1618 "Build accessing symbol table."
1619 (let (sp)
1620 (setq accessing-symbol (make-vector nstates 0)
1621 sp first-state)
1622 (while sp
1623 (aset accessing-symbol (core-number sp) (core-accessing-symbol sp))
1624 (setq sp (core-next sp)))))
1625
1626(defun wisent-set-shift-table ()
1627 "Build shift table."
1628 (let (sp)
1629 (setq shift-table (make-vector nstates nil)
1630 sp first-shift)
1631 (while sp
1632 (aset shift-table (shifts-number sp) sp)
1633 (setq sp (shifts-next sp)))))
1634
1635(defun wisent-set-reduction-table ()
1636 "Build reduction table."
1637 (let (rp)
1638 (setq reduction-table (make-vector nstates nil)
1639 rp first-reduction)
1640 (while rp
1641 (aset reduction-table (reductions-number rp) rp)
1642 (setq rp (reductions-next rp)))))
1643
1644(defun wisent-set-maxrhs ()
1645 "Setup MAXRHS length."
1646 (let (i len max)
1647 (setq len 0
1648 max 0
1649 i 0)
1650 (while (aref ritem i)
1651 (if (> (aref ritem i) 0)
1652 (setq len (1+ len))
1653 (if (> len max)
1654 (setq max len))
1655 (setq len 0))
1656 (setq i (1+ i)))
1657 (setq maxrhs max)))
1658
1659(defun wisent-initialize-LA ()
1660 "Set up LA."
1661 (let (i j k count rp sp np v)
1662 (setq consistent (make-vector nstates nil)
1663 lookaheads (make-vector (1+ nstates) 0)
1664 count 0
1665 i 0)
1666 (while (< i nstates)
1667 (aset lookaheads i count)
1668 (setq rp (aref reduction-table i)
1669 sp (aref shift-table i))
1670 ;; if (rp &&
1671 ;; (rp->nreds > 1
1672 ;; || (sp && ! ISVAR(accessing-symbol[sp->shifts[0]]))))
1673 (if (and rp
1674 (or (> (reductions-nreds rp) 1)
1675 (and sp
1676 (not (wisent-ISVAR
1677 (aref accessing-symbol
1678 (aref (shifts-shifts sp) 0)))))))
1679 (setq count (+ count (reductions-nreds rp)))
1680 (aset consistent i t))
1681
1682 (when sp
1683 (setq k 0
1684 j (shifts-nshifts sp)
1685 v (shifts-shifts sp))
1686 (while (< k j)
1687 (when (= (aref accessing-symbol (aref v k))
1688 error-token-number)
1689 (aset consistent i nil)
1690 (setq k j)) ;; break
1691 (setq k (1+ k))))
1692 (setq i (1+ i)))
1693
1694 (aset lookaheads nstates count)
1695
1696 (if (zerop count)
1697 (progn
1698 (setq LA (make-vector 1 nil)
1699 LAruleno (make-vector 1 0)
1700 lookback (make-vector 1 nil)))
1701 (setq LA (make-vector count nil)
1702 LAruleno (make-vector count 0)
1703 lookback (make-vector count nil)))
1704 (setq i 0 j (length LA))
1705 (while (< i j)
1706 (aset LA i (make-vector tokensetsize 0))
1707 (setq i (1+ i)))
1708
1709 (setq np 0
1710 i 0)
1711 (while (< i nstates)
1712 (when (not (aref consistent i))
1713 (setq rp (aref reduction-table i))
1714 (when rp
1715 (setq j 0
1716 k (reductions-nreds rp)
1717 v (reductions-rules rp))
1718 (while (< j k)
1719 (aset LAruleno np (aref v j))
1720 (setq np (1+ np)
1721 j (1+ j)))))
1722 (setq i (1+ i)))))
1723
1724(defun wisent-set-goto-map ()
1725 "Set up GOTO-MAP."
1726 (let (sp i j symbol k temp-map state1 state2 v)
1727 (setq goto-map (make-vector (1+ nvars) 0)
1728 temp-map (make-vector (1+ nvars) 0))
1729
1730 (setq ngotos 0
1731 sp first-shift)
1732 (while sp
1733 (setq i (1- (shifts-nshifts sp))
1734 v (shifts-shifts sp))
1735 (while (>= i 0)
1736 (setq symbol (aref accessing-symbol (aref v i)))
1737 (if (wisent-ISTOKEN symbol)
1738 (setq i 0) ;; break
1739 (setq ngotos (1+ ngotos))
1740 ;; goto-map[symbol]++;
1741 (aset goto-map (- symbol ntokens)
1742 (1+ (aref goto-map (- symbol ntokens)))))
1743 (setq i (1- i)))
1744 (setq sp (shifts-next sp)))
1745
1746 (setq k 0
1747 i ntokens
1748 j 0)
1749 (while (< i nsyms)
1750 (aset temp-map j k)
1751 (setq k (+ k (aref goto-map j))
1752 i (1+ i)
1753 j (1+ j)))
1754 (setq i ntokens
1755 j 0)
1756 (while (< i nsyms)
1757 (aset goto-map j (aref temp-map j))
1758 (setq i (1+ i)
1759 j (1+ j)))
1760 ;; goto-map[nsyms] = ngotos;
1761 ;; temp-map[nsyms] = ngotos;
1762 (aset goto-map j ngotos)
1763 (aset temp-map j ngotos)
1764
1765 (setq from-state (make-vector ngotos 0)
1766 to-state (make-vector ngotos 0)
1767 sp first-shift)
1768 (while sp
1769 (setq state1 (shifts-number sp)
1770 v (shifts-shifts sp)
1771 i (1- (shifts-nshifts sp)))
1772 (while (>= i 0)
1773 (setq state2 (aref v i)
1774 symbol (aref accessing-symbol state2))
1775 (if (wisent-ISTOKEN symbol)
1776 (setq i 0) ;; break
1777 ;; k = temp-map[symbol]++;
1778 (setq k (aref temp-map (- symbol ntokens)))
1779 (aset temp-map (- symbol ntokens) (1+ k))
1780 (aset from-state k state1)
1781 (aset to-state k state2))
1782 (setq i (1- i)))
1783 (setq sp (shifts-next sp)))
1784 ))
1785
1786(defun wisent-map-goto (state symbol)
1787 "Map a STATE/SYMBOL pair into its numeric representation."
1788 (let (high low middle s result)
1789 ;; low = goto-map[symbol];
1790 ;; high = goto-map[symbol + 1] - 1;
1791 (setq low (aref goto-map (- symbol ntokens))
1792 high (1- (aref goto-map (- (1+ symbol) ntokens))))
1793 (while (and (not result) (<= low high))
1794 (setq middle (/ (+ low high) 2)
1795 s (aref from-state middle))
1796 (cond
1797 ((= s state)
1798 (setq result middle))
1799 ((< s state)
1800 (setq low (1+ middle)))
1801 (t
1802 (setq high (1- middle)))))
1803 (or result
1804 (error "Internal error in `wisent-map-goto'"))
1805 ))
1806
1807(defun wisent-initialize-F ()
1808 "Set up F."
1809 (let (i j k sp edge rowp rp reads nedges stateno symbol v break)
1810 (setq F (make-vector ngotos nil)
1811 i 0)
1812 (while (< i ngotos)
1813 (aset F i (make-vector tokensetsize 0))
1814 (setq i (1+ i)))
1815
1816 (setq reads (make-vector ngotos nil)
1817 edge (make-vector (1+ ngotos) 0)
1818 nedges 0
1819 rowp 0 ;; rowp = F
1820 i 0)
1821 (while (< i ngotos)
1822 (setq stateno (aref to-state i)
1823 sp (aref shift-table stateno))
1824 (when sp
1825 (setq k (shifts-nshifts sp)
1826 v (shifts-shifts sp)
1827 j 0
1828 break nil)
1829 (while (and (not break) (< j k))
1830 ;; symbol = accessing-symbol[sp->shifts[j]];
1831 (setq symbol (aref accessing-symbol (aref v j)))
1832 (if (wisent-ISVAR symbol)
1833 (setq break t) ;; break
1834 (wisent-SETBIT (aref F rowp) symbol)
1835 (setq j (1+ j))))
1836
1837 (while (< j k)
1838 ;; symbol = accessing-symbol[sp->shifts[j]];
1839 (setq symbol (aref accessing-symbol (aref v j)))
1840 (when (aref nullable (- symbol ntokens))
1841 (aset edge nedges (wisent-map-goto stateno symbol))
1842 (setq nedges (1+ nedges)))
1843 (setq j (1+ j)))
1844
1845 (when (> nedges 0)
1846 ;; reads[i] = rp = NEW2(nedges + 1, short);
1847 (setq rp (make-vector (1+ nedges) 0)
1848 j 0)
1849 (aset reads i rp)
1850 (while (< j nedges)
1851 ;; rp[j] = edge[j];
1852 (aset rp j (aref edge j))
1853 (setq j (1+ j)))
1854 (aset rp nedges -1)
1855 (setq nedges 0)))
1856 (setq rowp (1+ rowp))
1857 (setq i (1+ i)))
1858 (wisent-digraph reads)
1859 ))
1860
1861(defun wisent-add-lookback-edge (stateno ruleno gotono)
1862 "Add a lookback edge.
1863STATENO, RULENO, GOTONO are self-explanatory."
1864 (let (i k found)
1865 (setq i (aref lookaheads stateno)
1866 k (aref lookaheads (1+ stateno))
1867 found nil)
1868 (while (and (not found) (< i k))
1869 (if (= (aref LAruleno i) ruleno)
1870 (setq found t)
1871 (setq i (1+ i))))
1872
1873 (or found
1874 (error "Internal error in `wisent-add-lookback-edge'"))
1875
1876 ;; value . next
1877 ;; lookback[i] = (gotono . lookback[i])
1878 (aset lookback i (cons gotono (aref lookback i)))))
1879
1880(defun wisent-transpose (R-arg n)
1881 "Return the transpose of R-ARG, of size N.
1882Destroy R-ARG, as it is replaced with the result. R-ARG[I] is nil or
1883a -1 terminated list of numbers. RESULT[NUM] is nil or the -1
1884terminated list of the I such as NUM is in R-ARG[I]."
1885 (let (i j new-R end-R nedges v sp)
1886 (setq new-R (make-vector n nil)
1887 end-R (make-vector n nil)
1888 nedges (make-vector n 0))
1889
1890 ;; Count.
1891 (setq i 0)
1892 (while (< i n)
1893 (setq v (aref R-arg i))
1894 (when v
1895 (setq j 0)
1896 (while (>= (aref v j) 0)
1897 (aset nedges (aref v j) (1+ (aref nedges (aref v j))))
1898 (setq j (1+ j))))
1899 (setq i (1+ i)))
1900
1901 ;; Allocate.
1902 (setq i 0)
1903 (while (< i n)
1904 (when (> (aref nedges i) 0)
1905 (setq sp (make-vector (1+ (aref nedges i)) 0))
1906 (aset sp (aref nedges i) -1)
1907 (aset new-R i sp)
1908 (aset end-R i 0))
1909 (setq i (1+ i)))
1910
1911 ;; Store.
1912 (setq i 0)
1913 (while (< i n)
1914 (setq v (aref R-arg i))
1915 (when v
1916 (setq j 0)
1917 (while (>= (aref v j) 0)
1918 (aset (aref new-R (aref v j)) (aref end-R (aref v j)) i)
1919 (aset end-R (aref v j) (1+ (aref end-R (aref v j))))
1920 (setq j (1+ j))))
1921 (setq i (1+ i)))
1922
1923 new-R))
1924
1925(defun wisent-build-relations ()
1926 "Build relations."
1927 (let (i j k rulep rp sp length nedges done state1 stateno
1928 symbol1 symbol2 edge states v)
1929 (setq includes (make-vector ngotos nil)
1930 edge (make-vector (1+ ngotos) 0)
1931 states (make-vector (1+ maxrhs) 0)
1932 i 0)
1933
1934 (while (< i ngotos)
1935 (setq nedges 0
1936 state1 (aref from-state i)
1937 symbol1 (aref accessing-symbol (aref to-state i))
1938 rulep (aref derives (- symbol1 ntokens)))
1939
1940 (while (> (car rulep) 0)
1941 (aset states 0 state1)
1942 (setq length 1
1943 stateno state1
1944 rp (aref rrhs (car rulep))) ;; rp = ritem + rrhs[*rulep]
1945 (while (> (aref ritem rp) 0) ;; *rp > 0
1946 (setq symbol2 (aref ritem rp)
1947 sp (aref shift-table stateno)
1948 k (shifts-nshifts sp)
1949 v (shifts-shifts sp)
1950 j 0)
1951 (while (< j k)
1952 (setq stateno (aref v j))
1953 (if (= (aref accessing-symbol stateno) symbol2)
1954 (setq j k) ;; break
1955 (setq j (1+ j))))
1956 ;; states[length++] = stateno;
1957 (aset states length stateno)
1958 (setq length (1+ length))
1959 (setq rp (1+ rp)))
1960
1961 (if (not (aref consistent stateno))
1962 (wisent-add-lookback-edge stateno (car rulep) i))
1963
1964 (setq length (1- length)
1965 done nil)
1966 (while (not done)
1967 (setq done t
1968 rp (1- rp))
1969 (when (and (>= rp 0) (wisent-ISVAR (aref ritem rp)))
1970 ;; stateno = states[--length];
1971 (setq length (1- length)
1972 stateno (aref states length))
1973 (aset edge nedges (wisent-map-goto stateno (aref ritem rp)))
1974 (setq nedges (1+ nedges))
1975 (if (aref nullable (- (aref ritem rp) ntokens))
1976 (setq done nil))))
1977 (setq rulep (cdr rulep)))
1978
1979 (when (> nedges 0)
1980 (setq v (make-vector (1+ nedges) 0)
1981 j 0)
1982 (aset includes i v)
1983 (while (< j nedges)
1984 (aset v j (aref edge j))
1985 (setq j (1+ j)))
1986 (aset v nedges -1))
1987 (setq i (1+ i)))
1988
1989 (setq includes (wisent-transpose includes ngotos))
1990 ))
1991
1992(defun wisent-compute-FOLLOWS ()
1993 "Compute follows."
1994 (wisent-digraph includes))
1995
1996(defun wisent-compute-lookaheads ()
1997 "Compute lookaheads."
1998 (let (i j n v1 v2 sp)
1999 (setq n (aref lookaheads nstates)
2000 i 0)
2001 (while (< i n)
2002 (setq sp (aref lookback i))
2003 (while sp
2004 (setq v1 (aref LA i)
2005 v2 (aref F (car sp))
2006 j 0)
2007 (while (< j tokensetsize)
2008 ;; LA (i)[j] |= F (sp->value)[j]
2009 (aset v1 j (logior (aref v1 j) (aref v2 j)))
2010 (setq j (1+ j)))
2011 (setq sp (cdr sp)))
2012 (setq i (1+ i)))))
2013
2014(defun wisent-lalr ()
2015 "Make the nondeterministic finite state machine deterministic."
2016 (setq tokensetsize (wisent-WORDSIZE ntokens))
2017 (wisent-set-state-table)
2018 (wisent-set-accessing-symbol)
2019 (wisent-set-shift-table)
2020 (wisent-set-reduction-table)
2021 (wisent-set-maxrhs)
2022 (wisent-initialize-LA)
2023 (wisent-set-goto-map)
2024 (wisent-initialize-F)
2025 (wisent-build-relations)
2026 (wisent-compute-FOLLOWS)
2027 (wisent-compute-lookaheads))
2028\f
2029;;;; -----------------------------------------------
2030;;;; Find and resolve or report look-ahead conflicts
2031;;;; -----------------------------------------------
2032
2033(defsubst wisent-log-resolution (state LAno token resolution)
2034 "Log a shift-reduce conflict resolution.
2035In specified STATE between rule pointed by lookahead number LANO and
2036TOKEN, resolved as RESOLUTION."
2037 (if (or wisent-verbose-flag wisent-debug-flag)
2038 (wisent-log
2039 "Conflict in state %d between rule %d and token %s resolved as %s.\n"
2040 state (aref LAruleno LAno) (wisent-tag token) resolution)))
2041
2042(defun wisent-flush-shift (state token)
2043 "Turn off the shift recorded in the specified STATE for TOKEN.
2044Used when we resolve a shift-reduce conflict in favor of the reduction."
2045 (let (shiftp i k v)
2046 (when (setq shiftp (aref shift-table state))
2047 (setq k (shifts-nshifts shiftp)
2048 v (shifts-shifts shiftp)
2049 i 0)
2050 (while (< i k)
2051 (if (and (not (zerop (aref v i)))
2052 (= token (aref accessing-symbol (aref v i))))
2053 (aset v i 0))
2054 (setq i (1+ i))))))
2055
2056(defun wisent-resolve-sr-conflict (state lookaheadnum)
2057 "Attempt to resolve shift-reduce conflict for one rule.
2058Resolve by means of precedence declarations. The conflict occurred in
2059specified STATE for the rule pointed by the lookahead symbol
2060LOOKAHEADNUM. It has already been checked that the rule has a
2061precedence. A conflict is resolved by modifying the shift or reduce
2062tables so that there is no longer a conflict."
2063 (let (i redprec errp errs nerrs token sprec sassoc)
2064 ;; Find the rule to reduce by to get precedence of reduction
2065 (setq token (aref tags (aref rprec (aref LAruleno lookaheadnum)))
2066 redprec (wisent-prec token)
2067 errp (make-errs)
2068 errs (make-vector ntokens 0)
2069 nerrs 0
2070 i 0)
2071 (set-errs-errs errp errs)
2072 (while (< i ntokens)
2073 (setq token (aref tags i))
2074 (when (and (wisent-BITISSET (aref LA lookaheadnum) i)
2075 (wisent-BITISSET lookaheadset i)
2076 (setq sprec (wisent-prec token)))
2077 ;; Shift-reduce conflict occurs for token number I and it has
2078 ;; a precedence. The precedence of shifting is that of token
2079 ;; I.
2080 (cond
2081 ((< sprec redprec)
2082 (wisent-log-resolution state lookaheadnum i "reduce")
2083 ;; Flush the shift for this token
2084 (wisent-RESETBIT lookaheadset i)
2085 (wisent-flush-shift state i)
2086 )
2087 ((> sprec redprec)
2088 (wisent-log-resolution state lookaheadnum i "shift")
2089 ;; Flush the reduce for this token
2090 (wisent-RESETBIT (aref LA lookaheadnum) i)
2091 )
2092 (t
2093 ;; Matching precedence levels.
2094 ;; For left association, keep only the reduction.
2095 ;; For right association, keep only the shift.
2096 ;; For nonassociation, keep neither.
2097 (setq sassoc (wisent-assoc token))
2098 (cond
2099 ((eq sassoc 'right)
2100 (wisent-log-resolution state lookaheadnum i "shift"))
2101 ((eq sassoc 'left)
2102 (wisent-log-resolution state lookaheadnum i "reduce"))
2103 ((eq sassoc 'nonassoc)
2104 (wisent-log-resolution state lookaheadnum i "an error"))
2105 )
2106 (when (not (eq sassoc 'right))
2107 ;; Flush the shift for this token
2108 (wisent-RESETBIT lookaheadset i)
2109 (wisent-flush-shift state i))
2110 (when (not (eq sassoc 'left))
2111 ;; Flush the reduce for this token
2112 (wisent-RESETBIT (aref LA lookaheadnum) i))
2113 (when (eq sassoc 'nonassoc)
2114 ;; Record an explicit error for this token
2115 (aset errs nerrs i)
2116 (setq nerrs (1+ nerrs)))
2117 )))
2118 (setq i (1+ i)))
2119 (when (> nerrs 0)
2120 (set-errs-nerrs errp nerrs)
2121 (aset err-table state errp))
2122 ))
2123
2124(defun wisent-set-conflicts (state)
2125 "Find and attempt to resolve conflicts in specified STATE."
2126 (let (i j k v shiftp symbol)
2127 (unless (aref consistent state)
2128 (fillarray lookaheadset 0)
2129
2130 (when (setq shiftp (aref shift-table state))
2131 (setq k (shifts-nshifts shiftp)
2132 v (shifts-shifts shiftp)
2133 i 0)
2134 (while (and (< i k)
2135 (wisent-ISTOKEN
2136 (setq symbol (aref accessing-symbol (aref v i)))))
2137 (or (zerop (aref v i))
2138 (wisent-SETBIT lookaheadset symbol))
2139 (setq i (1+ i))))
2140
2141 ;; Loop over all rules which require lookahead in this state
2142 ;; first check for shift-reduce conflict, and try to resolve
2143 ;; using precedence
2144 (setq i (aref lookaheads state)
2145 k (aref lookaheads (1+ state)))
2146 (while (< i k)
2147 (when (aref rprec (aref LAruleno i))
2148 (setq v (aref LA i)
2149 j 0)
2150 (while (< j tokensetsize)
2151 (if (zerop (logand (aref v j) (aref lookaheadset j)))
2152 (setq j (1+ j))
2153 ;; if (LA (i)[j] & lookaheadset[j])
2154 (wisent-resolve-sr-conflict state i)
2155 (setq j tokensetsize)))) ;; break
2156 (setq i (1+ i)))
2157
2158 ;; Loop over all rules which require lookahead in this state
2159 ;; Check for conflicts not resolved above.
2160 (setq i (aref lookaheads state))
2161 (while (< i k)
2162 (setq v (aref LA i)
2163 j 0)
2164 (while (< j tokensetsize)
2165 ;; if (LA (i)[j] & lookaheadset[j])
2166 (if (not (zerop (logand (aref v j) (aref lookaheadset j))))
2167 (aset conflicts state t))
2168 (setq j (1+ j)))
2169 (setq j 0)
2170 (while (< j tokensetsize)
2171 ;; lookaheadset[j] |= LA (i)[j];
2172 (aset lookaheadset j (logior (aref lookaheadset j)
2173 (aref v j)))
2174 (setq j (1+ j)))
2175 (setq i (1+ i)))
2176 )))
2177
2178(defun wisent-resolve-conflicts ()
2179 "Find and resolve conflicts."
2180 (let (i)
2181 (setq conflicts (make-vector nstates nil)
2182 shiftset (make-vector tokensetsize 0)
2183 lookaheadset (make-vector tokensetsize 0)
2184 err-table (make-vector nstates nil)
2185 i 0)
2186 (while (< i nstates)
2187 (wisent-set-conflicts i)
2188 (setq i (1+ i)))))
2189
2190(defun wisent-count-sr-conflicts (state)
2191 "Count the number of shift/reduce conflicts in specified STATE."
2192 (let (i j k shiftp symbol v)
2193 (setq src-count 0
2194 shiftp (aref shift-table state))
2195 (when shiftp
2196 (fillarray shiftset 0)
2197 (fillarray lookaheadset 0)
2198 (setq k (shifts-nshifts shiftp)
2199 v (shifts-shifts shiftp)
2200 i 0)
2201 (while (< i k)
2202 (when (not (zerop (aref v i)))
2203 (setq symbol (aref accessing-symbol (aref v i)))
2204 (if (wisent-ISVAR symbol)
2205 (setq i k) ;; break
2206 (wisent-SETBIT shiftset symbol)))
2207 (setq i (1+ i)))
2208
2209 (setq k (aref lookaheads (1+ state))
2210 i (aref lookaheads state))
2211 (while (< i k)
2212 (setq v (aref LA i)
2213 j 0)
2214 (while (< j tokensetsize)
2215 ;; lookaheadset[j] |= LA (i)[j]
2216 (aset lookaheadset j (logior (aref lookaheadset j)
2217 (aref v j)))
2218 (setq j (1+ j)))
2219 (setq i (1+ i)))
2220
2221 (setq k 0)
2222 (while (< k tokensetsize)
2223 ;; lookaheadset[k] &= shiftset[k];
2224 (aset lookaheadset k (logand (aref lookaheadset k)
2225 (aref shiftset k)))
2226 (setq k (1+ k)))
2227
2228 (setq i 0)
2229 (while (< i ntokens)
2230 (if (wisent-BITISSET lookaheadset i)
2231 (setq src-count (1+ src-count)))
2232 (setq i (1+ i))))
2233 src-count))
2234
2235(defun wisent-count-rr-conflicts (state)
2236 "Count the number of reduce/reduce conflicts in specified STATE."
2237 (let (i j count n m)
2238 (setq rrc-count 0
2239 m (aref lookaheads state)
2240 n (aref lookaheads (1+ state)))
2241 (when (>= (- n m) 2)
2242 (setq i 0)
2243 (while (< i ntokens)
2244 (setq count 0
2245 j m)
2246 (while (< j n)
2247 (if (wisent-BITISSET (aref LA j) i)
2248 (setq count (1+ count)))
2249 (setq j (1+ j)))
2250
2251 (if (>= count 2)
2252 (setq rrc-count (1+ rrc-count)))
2253 (setq i (1+ i))))
2254 rrc-count))
2255
2256(defvar wisent-expected-conflicts nil
2257 "*If non-nil suppress the warning about shift/reduce conflicts.
2258It is a decimal integer N that says there should be no warning if
2259there are N shift/reduce conflicts and no reduce/reduce conflicts. A
2260warning is given if there are either more or fewer conflicts, or if
2261there are any reduce/reduce conflicts.")
2262
2263(defun wisent-total-conflicts ()
2264 "Report the total number of conflicts."
2265 (unless (and (zerop rrc-total)
2266 (or (zerop src-total)
2267 (= src-total (or wisent-expected-conflicts 0))))
2268 (let* ((src (wisent-source))
2269 (src (if src (concat " in " src) ""))
2270 (msg (format "Grammar%s contains" src)))
2271 (if (> src-total 0)
2272 (setq msg (format "%s %d shift/reduce conflict%s"
2273 msg src-total (if (> src-total 1)
2274 "s" ""))))
2275 (if (and (> src-total 0) (> rrc-total 0))
2276 (setq msg (format "%s and" msg)))
2277 (if (> rrc-total 0)
2278 (setq msg (format "%s %d reduce/reduce conflict%s"
2279 msg rrc-total (if (> rrc-total 1)
2280 "s" ""))))
2281 (message msg))))
2282
2283(defun wisent-print-conflicts ()
2284 "Report conflicts."
2285 (let (i)
2286 (setq src-total 0
2287 rrc-total 0
2288 i 0)
2289 (while (< i nstates)
2290 (when (aref conflicts i)
2291 (wisent-count-sr-conflicts i)
2292 (wisent-count-rr-conflicts i)
2293 (setq src-total (+ src-total src-count)
2294 rrc-total (+ rrc-total rrc-count))
2295 (when (or wisent-verbose-flag wisent-debug-flag)
2296 (wisent-log "State %d contains" i)
2297 (if (> src-count 0)
2298 (wisent-log " %d shift/reduce conflict%s"
2299 src-count (if (> src-count 1) "s" "")))
2300
2301 (if (and (> src-count 0) (> rrc-count 0))
2302 (wisent-log " and"))
2303
2304 (if (> rrc-count 0)
2305 (wisent-log " %d reduce/reduce conflict%s"
2306 rrc-count (if (> rrc-count 1) "s" "")))
2307
2308 (wisent-log ".\n")))
2309 (setq i (1+ i)))
2310 (wisent-total-conflicts)))
2311\f
2312;;;; --------------------------------------
2313;;;; Report information on generated parser
2314;;;; --------------------------------------
2315(defun wisent-print-grammar ()
2316 "Print grammar."
2317 (let (i j r break left-count right-count)
2318
2319 (wisent-log "\n\nGrammar\n\n Number, Rule\n")
2320 (setq i 1)
2321 (while (<= i nrules)
2322 ;; Don't print rules disabled in `wisent-reduce-grammar-tables'.
2323 (when (aref ruseful i)
2324 (wisent-log " %s %s ->"
2325 (wisent-pad-string (number-to-string i) 6)
2326 (wisent-tag (aref rlhs i)))
2327 (setq r (aref rrhs i))
2328 (if (> (aref ritem r) 0)
2329 (while (> (aref ritem r) 0)
2330 (wisent-log " %s" (wisent-tag (aref ritem r)))
2331 (setq r (1+ r)))
2332 (wisent-log " /* empty */"))
2333 (wisent-log "\n"))
2334 (setq i (1+ i)))
2335
2336 (wisent-log "\n\nTerminals, with rules where they appear\n\n")
2337 (wisent-log "%s (-1)\n" (wisent-tag 0))
2338 (setq i 1)
2339 (while (< i ntokens)
2340 (wisent-log "%s (%d)" (wisent-tag i) i)
2341 (setq j 1)
2342 (while (<= j nrules)
2343 (setq r (aref rrhs j)
2344 break nil)
2345 (while (and (not break) (> (aref ritem r) 0))
2346 (if (setq break (= (aref ritem r) i))
2347 (wisent-log " %d" j)
2348 (setq r (1+ r))))
2349 (setq j (1+ j)))
2350 (wisent-log "\n")
2351 (setq i (1+ i)))
2352
2353 (wisent-log "\n\nNonterminals, with rules where they appear\n\n")
2354 (setq i ntokens)
2355 (while (< i nsyms)
2356 (setq left-count 0
2357 right-count 0
2358 j 1)
2359 (while (<= j nrules)
2360 (if (= (aref rlhs j) i)
2361 (setq left-count (1+ left-count)))
2362 (setq r (aref rrhs j)
2363 break nil)
2364 (while (and (not break) (> (aref ritem r) 0))
2365 (if (= (aref ritem r) i)
2366 (setq right-count (1+ right-count)
2367 break t)
2368 (setq r (1+ r))))
2369 (setq j (1+ j)))
2370 (wisent-log "%s (%d)\n " (wisent-tag i) i)
2371 (when (> left-count 0)
2372 (wisent-log " on left:")
2373 (setq j 1)
2374 (while (<= j nrules)
2375 (if (= (aref rlhs j) i)
2376 (wisent-log " %d" j))
2377 (setq j (1+ j))))
2378 (when (> right-count 0)
2379 (if (> left-count 0)
2380 (wisent-log ","))
2381 (wisent-log " on right:")
2382 (setq j 1)
2383 (while (<= j nrules)
2384 (setq r (aref rrhs j)
2385 break nil)
2386 (while (and (not break) (> (aref ritem r) 0))
2387 (if (setq break (= (aref ritem r) i))
2388 (wisent-log " %d" j)
2389 (setq r (1+ r))))
2390 (setq j (1+ j))))
2391 (wisent-log "\n")
2392 (setq i (1+ i)))
2393 ))
2394
2395(defun wisent-print-reductions (state)
2396 "Print reductions on STATE."
2397 (let (i j k v symbol m n defaulted
2398 default-LA default-rule cmax count shiftp errp nodefault)
2399 (setq nodefault nil
2400 i 0)
2401 (fillarray shiftset 0)
2402
2403 (setq shiftp (aref shift-table state))
2404 (when shiftp
2405 (setq k (shifts-nshifts shiftp)
2406 v (shifts-shifts shiftp)
2407 i 0)
2408 (while (< i k)
2409 (when (not (zerop (aref v i)))
2410 (setq symbol (aref accessing-symbol (aref v i)))
2411 (if (wisent-ISVAR symbol)
2412 (setq i k) ;; break
2413 ;; If this state has a shift for the error token, don't
2414 ;; use a default rule.
2415 (if (= symbol error-token-number)
2416 (setq nodefault t))
2417 (wisent-SETBIT shiftset symbol)))
2418 (setq i (1+ i))))
2419
2420 (setq errp (aref err-table state))
2421 (when errp
2422 (setq k (errs-nerrs errp)
2423 v (errs-errs errp)
2424 i 0)
2425 (while (< i k)
2426 (if (not (zerop (setq symbol (aref v i))))
2427 (wisent-SETBIT shiftset symbol))
2428 (setq i (1+ i))))
2429
2430 (setq m (aref lookaheads state)
2431 n (aref lookaheads (1+ state)))
2432
2433 (cond
2434 ((and (= (- n m) 1) (not nodefault))
2435 (setq default-rule (aref LAruleno m)
2436 v (aref LA m)
2437 k 0)
2438 (while (< k tokensetsize)
2439 (aset lookaheadset k (logand (aref v k)
2440 (aref shiftset k)))
2441 (setq k (1+ k)))
2442
2443 (setq i 0)
2444 (while (< i ntokens)
2445 (if (wisent-BITISSET lookaheadset i)
2446 (wisent-log " %s\t[reduce using rule %d (%s)]\n"
2447 (wisent-tag i) default-rule
2448 (wisent-tag (aref rlhs default-rule))))
2449 (setq i (1+ i)))
2450 (wisent-log " $default\treduce using rule %d (%s)\n\n"
2451 default-rule
2452 (wisent-tag (aref rlhs default-rule)))
2453 )
2454 ((>= (- n m) 1)
2455 (setq cmax 0
2456 default-LA -1
2457 default-rule 0)
2458 (when (not nodefault)
2459 (setq i m)
2460 (while (< i n)
2461 (setq v (aref LA i)
2462 count 0
2463 k 0)
2464 (while (< k tokensetsize)
2465 ;; lookaheadset[k] = LA (i)[k] & ~shiftset[k]
2466 (aset lookaheadset k
2467 (logand (aref v k)
2468 (lognot (aref shiftset k))))
2469 (setq k (1+ k)))
2470 (setq j 0)
2471 (while (< j ntokens)
2472 (if (wisent-BITISSET lookaheadset j)
2473 (setq count (1+ count)))
2474 (setq j (1+ j)))
2475 (if (> count cmax)
2476 (setq cmax count
2477 default-LA i
2478 default-rule (aref LAruleno i)))
2479 (setq k 0)
2480 (while (< k tokensetsize)
2481 (aset shiftset k (logior (aref shiftset k)
2482 (aref lookaheadset k)))
2483 (setq k (1+ k)))
2484 (setq i (1+ i))))
2485
2486 (fillarray shiftset 0)
2487
2488 (when shiftp
2489 (setq k (shifts-nshifts shiftp)
2490 v (shifts-shifts shiftp)
2491 i 0)
2492 (while (< i k)
2493 (when (not (zerop (aref v i)))
2494 (setq symbol (aref accessing-symbol (aref v i)))
2495 (if (wisent-ISVAR symbol)
2496 (setq i k) ;; break
2497 (wisent-SETBIT shiftset symbol)))
2498 (setq i (1+ i))))
2499
2500 (setq i 0)
2501 (while (< i ntokens)
2502 (setq defaulted nil
2503 count (if (wisent-BITISSET shiftset i) 1 0)
2504 j m)
2505 (while (< j n)
2506 (when (wisent-BITISSET (aref LA j) i)
2507 (if (zerop count)
2508 (progn
2509 (if (not (= j default-LA))
2510 (wisent-log
2511 " %s\treduce using rule %d (%s)\n"
2512 (wisent-tag i) (aref LAruleno j)
2513 (wisent-tag (aref rlhs (aref LAruleno j))))
2514 (setq defaulted t))
2515 (setq count (1+ count)))
2516 (if defaulted
2517 (wisent-log
2518 " %s\treduce using rule %d (%s)\n"
2519 (wisent-tag i) (aref LAruleno default-LA)
2520 (wisent-tag (aref rlhs (aref LAruleno default-LA)))))
2521 (setq defaulted nil)
2522 (wisent-log
2523 " %s\t[reduce using rule %d (%s)]\n"
2524 (wisent-tag i) (aref LAruleno j)
2525 (wisent-tag (aref rlhs (aref LAruleno j))))))
2526 (setq j (1+ j)))
2527 (setq i (1+ i)))
2528
2529 (if (>= default-LA 0)
2530 (wisent-log
2531 " $default\treduce using rule %d (%s)\n"
2532 default-rule
2533 (wisent-tag (aref rlhs default-rule))))
2534 ))))
2535
2536(defun wisent-print-actions (state)
2537 "Print actions on STATE."
2538 (let (i j k v state1 symbol shiftp errp redp rule nerrs break)
2539 (setq shiftp (aref shift-table state)
2540 redp (aref reduction-table state)
2541 errp (aref err-table state))
2542 (if (and (not shiftp) (not redp))
2543 (if (= final-state state)
2544 (wisent-log " $default\taccept\n")
2545 (wisent-log " NO ACTIONS\n"))
2546 (if (not shiftp)
2547 (setq i 0
2548 k 0)
2549 (setq k (shifts-nshifts shiftp)
2550 v (shifts-shifts shiftp)
2551 i 0
2552 break nil)
2553 (while (and (not break) (< i k))
2554 (if (zerop (setq state1 (aref v i)))
2555 (setq i (1+ i))
2556 (setq symbol (aref accessing-symbol state1))
2557 ;; The following line used to be turned off.
2558 (if (wisent-ISVAR symbol)
2559 (setq break t) ;; break
2560 (wisent-log " %s\tshift, and go to state %d\n"
2561 (wisent-tag symbol) state1)
2562 (setq i (1+ i)))))
2563 (if (> i 0)
2564 (wisent-log "\n")))
2565
2566 (when errp
2567 (setq nerrs (errs-nerrs errp)
2568 v (errs-errs errp)
2569 j 0)
2570 (while (< j nerrs)
2571 (if (aref v j)
2572 (wisent-log " %s\terror (nonassociative)\n"
2573 (wisent-tag (aref v j))))
2574 (setq j (1+ j)))
2575 (if (> j 0)
2576 (wisent-log "\n")))
2577
2578 (cond
2579 ((and (aref consistent state) redp)
2580 (setq rule (aref (reductions-rules redp) 0)
2581 symbol (aref rlhs rule))
2582 (wisent-log " $default\treduce using rule %d (%s)\n\n"
2583 rule (wisent-tag symbol))
2584 )
2585 (redp
2586 (wisent-print-reductions state)
2587 ))
2588
2589 (when (< i k)
2590 (setq v (shifts-shifts shiftp))
2591 (while (< i k)
2592 (when (setq state1 (aref v i))
2593 (setq symbol (aref accessing-symbol state1))
2594 (wisent-log " %s\tgo to state %d\n"
2595 (wisent-tag symbol) state1))
2596 (setq i (1+ i)))
2597 (wisent-log "\n"))
2598 )))
2599
2600(defun wisent-print-core (state)
2601 "Print STATE core."
2602 (let (i k rule statep sp sp1)
2603 (setq statep (aref state-table state)
2604 k (core-nitems statep))
2605 (when (> k 0)
2606 (setq i 0)
2607 (while (< i k)
2608 ;; sp1 = sp = ritem + statep->items[i];
2609 (setq sp1 (aref (core-items statep) i)
2610 sp sp1)
2611 (while (> (aref ritem sp) 0)
2612 (setq sp (1+ sp)))
2613
2614 (setq rule (- (aref ritem sp)))
2615 (wisent-log " %s -> " (wisent-tag (aref rlhs rule)))
2616
2617 (setq sp (aref rrhs rule))
2618 (while (< sp sp1)
2619 (wisent-log "%s " (wisent-tag (aref ritem sp)))
2620 (setq sp (1+ sp)))
2621 (wisent-log ".")
2622 (while (> (aref ritem sp) 0)
2623 (wisent-log " %s" (wisent-tag (aref ritem sp)))
2624 (setq sp (1+ sp)))
2625 (wisent-log " (rule %d)\n" rule)
2626 (setq i (1+ i)))
2627 (wisent-log "\n"))))
2628
2629(defun wisent-print-state (state)
2630 "Print information on STATE."
2631 (wisent-log "\n\nstate %d\n\n" state)
2632 (wisent-print-core state)
2633 (wisent-print-actions state))
2634
2635(defun wisent-print-states ()
2636 "Print information on states."
2637 (let ((i 0))
2638 (while (< i nstates)
2639 (wisent-print-state i)
2640 (setq i (1+ i)))))
2641
2642(defun wisent-print-results ()
2643 "Print information on generated parser.
2644Report detailed informations if `wisent-verbose-flag' or
2645`wisent-debug-flag' are non-nil."
2646 (when (or wisent-verbose-flag wisent-debug-flag)
2647 (wisent-print-useless))
2648 (wisent-print-conflicts)
2649 (when (or wisent-verbose-flag wisent-debug-flag)
2650 (wisent-print-grammar)
2651 (wisent-print-states))
2652 ;; Append output to log file when running in batch mode
2653 (when (wisent-noninteractive)
2654 (wisent-append-to-log-file)
2655 (wisent-clear-log)))
2656\f
2657;;;; ---------------------------------
2658;;;; Build the generated parser tables
2659;;;; ---------------------------------
2660
2661(defun wisent-action-row (state actrow)
2662 "Figure out the actions for the specified STATE.
2663Decide what to do for each type of token if seen as the lookahead
2664token in specified state. The value returned is used as the default
2665action for the state. In addition, ACTROW is filled with what to do
2666for each kind of token, index by symbol number, with nil meaning do
2667the default action. The value 'error, means this situation is an
2668error. The parser recognizes this value specially.
2669
2670This is where conflicts are resolved. The loop over lookahead rules
2671considered lower-numbered rules last, and the last rule considered
2672that likes a token gets to handle it."
2673 (let (i j k m n v default-rule nreds rule max count
2674 shift-state symbol redp shiftp errp nodefault)
2675
2676 (fillarray actrow nil)
2677
2678 (setq default-rule 0
2679 nodefault nil ;; nil inhibit having any default reduction
2680 nreds 0
2681 m 0
2682 n 0
2683 redp (aref reduction-table state))
2684
2685 (when redp
2686 (setq nreds (reductions-nreds redp))
2687 (when (>= nreds 1)
2688 ;; loop over all the rules available here which require
2689 ;; lookahead
2690 (setq m (aref lookaheads state)
2691 n (aref lookaheads (1+ state))
2692 i (1- n))
2693 (while (>= i m)
2694 ;; and find each token which the rule finds acceptable to
2695 ;; come next
2696 (setq j 0)
2697 (while (< j ntokens)
2698 ;; and record this rule as the rule to use if that token
2699 ;; follows.
2700 (if (wisent-BITISSET (aref LA i) j)
2701 (aset actrow j (- (aref LAruleno i)))
2702 )
2703 (setq j (1+ j)))
2704 (setq i (1- i)))))
2705
2706 ;; Now see which tokens are allowed for shifts in this state. For
2707 ;; them, record the shift as the thing to do. So shift is
2708 ;; preferred to reduce.
2709 (setq shiftp (aref shift-table state))
2710 (when shiftp
2711 (setq k (shifts-nshifts shiftp)
2712 v (shifts-shifts shiftp)
2713 i 0)
2714 (while (< i k)
2715 (setq shift-state (aref v i))
2716 (if (zerop shift-state)
2717 nil ;; continue
2718 (setq symbol (aref accessing-symbol shift-state))
2719 (if (wisent-ISVAR symbol)
2720 (setq i k) ;; break
2721 (aset actrow symbol shift-state)
2722 ;; Do not use any default reduction if there is a shift
2723 ;; for error
2724 (if (= symbol error-token-number)
2725 (setq nodefault t))))
2726 (setq i (1+ i))))
2727
2728 ;; See which tokens are an explicit error in this state (due to
2729 ;; %nonassoc). For them, record error as the action.
2730 (setq errp (aref err-table state))
2731 (when errp
2732 (setq k (errs-nerrs errp)
2733 v (errs-errs errp)
2734 i 0)
2735 (while (< i k)
2736 (aset actrow (aref v i) wisent-error-tag)
2737 (setq i (1+ i))))
2738
2739 ;; Now find the most common reduction and make it the default
2740 ;; action for this state.
2741 (when (and (>= nreds 1) (not nodefault))
2742 (if (aref consistent state)
2743 (setq default-rule (- (aref (reductions-rules redp) 0)))
2744 (setq max 0
2745 i m)
2746 (while (< i n)
2747 (setq count 0
2748 rule (- (aref LAruleno i))
2749 j 0)
2750 (while (< j ntokens)
2751 (if (and (numberp (aref actrow j))
2752 (= (aref actrow j) rule))
2753 (setq count (1+ count)))
2754 (setq j (1+ j)))
2755 (if (> count max)
2756 (setq max count
2757 default-rule rule))
2758 (setq i (1+ i)))
2759 ;; actions which match the default are replaced with zero,
2760 ;; which means "use the default"
2761 (when (> max 0)
2762 (setq j 0)
2763 (while (< j ntokens)
2764 (if (and (numberp (aref actrow j))
2765 (= (aref actrow j) default-rule))
2766 (aset actrow j nil))
2767 (setq j (1+ j)))
2768 )))
2769
2770 ;; If have no default rule, if this is the final state the default
2771 ;; is accept else it is an error. So replace any action which
2772 ;; says "error" with "use default".
2773 (when (zerop default-rule)
2774 (if (= final-state state)
2775 (setq default-rule wisent-accept-tag)
2776 (setq j 0)
2777 (while (< j ntokens)
2778 (if (eq (aref actrow j) wisent-error-tag)
2779 (aset actrow j nil))
2780 (setq j (1+ j)))
2781 (setq default-rule wisent-error-tag)))
2782 default-rule))
2783
2784(defconst wisent-default-tag 'default
2785 "Tag used in an action table to indicate a default action.")
2786
2787;; These variables only exist locally in the function
2788;; `wisent-state-actions' and are shared by all other nested callees.
2789(wisent-defcontext semantic-actions
2790 ;; Uninterned symbols used in code generation.
2791 stack sp gotos state
2792 ;; Name of the current semantic action
2793 NAME)
2794
2795(defun wisent-state-actions ()
2796 "Figure out the actions for every state.
2797Return the action table."
2798 ;; Store the semantic action obarray in (unused) RCODE[0].
2799 (aset rcode 0 (make-vector 13 0))
2800 (let (i j action-table actrow action)
2801 (setq action-table (make-vector nstates nil)
2802 actrow (make-vector ntokens nil)
2803 i 0)
2804 (wisent-with-context semantic-actions
2805 (setq stack (make-symbol "stack")
2806 sp (make-symbol "sp")
2807 gotos (make-symbol "gotos")
2808 state (make-symbol "state"))
2809 (while (< i nstates)
2810 (setq action (wisent-action-row i actrow))
2811 ;; Translate a reduction into semantic action
2812 (and (integerp action) (< action 0)
2813 (setq action (wisent-semantic-action (- action))))
2814 (aset action-table i (list (cons wisent-default-tag action)))
2815 (setq j 0)
2816 (while (< j ntokens)
2817 (when (setq action (aref actrow j))
2818 ;; Translate a reduction into semantic action
2819 (and (integerp action) (< action 0)
2820 (setq action (wisent-semantic-action (- action))))
2821 (aset action-table i (cons (cons (aref tags j) action)
2822 (aref action-table i)))
2823 )
2824 (setq j (1+ j)))
2825 (aset action-table i (nreverse (aref action-table i)))
2826 (setq i (1+ i)))
2827 action-table)))
2828
2829(defun wisent-goto-actions ()
2830 "Figure out what to do after reducing with each rule.
2831Depending on the saved state from before the beginning of parsing the
2832data that matched this rule. Return the goto table."
2833 (let (i j m n symbol state goto-table)
2834 (setq goto-table (make-vector nstates nil)
2835 i ntokens)
2836 (while (< i nsyms)
2837 (setq symbol (- i ntokens)
2838 m (aref goto-map symbol)
2839 n (aref goto-map (1+ symbol))
2840 j m)
2841 (while (< j n)
2842 (setq state (aref from-state j))
2843 (aset goto-table state
2844 (cons (cons (aref tags i) (aref to-state j))
2845 (aref goto-table state)))
2846 (setq j (1+ j)))
2847 (setq i (1+ i)))
2848 goto-table))
2849
2850(defsubst wisent-quote-p (sym)
2851 "Return non-nil if SYM is bound to the `quote' function."
2852 (condition-case nil
2853 (eq (indirect-function sym)
2854 (indirect-function 'quote))
2855 (error nil)))
2856
2857(defsubst wisent-backquote-p (sym)
2858 "Return non-nil if SYM is bound to the `backquote' function."
2859 (condition-case nil
2860 (eq (indirect-function sym)
2861 (indirect-function 'backquote))
2862 (error nil)))
2863
2864(defun wisent-check-$N (x m)
2865 "Return non-nil if X is a valid $N or $regionN symbol.
2866That is if X is a $N or $regionN symbol with N >= 1 and N <= M.
2867Also warn if X is a $N or $regionN symbol with N < 1 or N > M."
2868 (when (symbolp x)
2869 (let* ((n (symbol-name x))
2870 (i (and (string-match "\\`\\$\\(region\\)?\\([0-9]+\\)\\'" n)
2871 (string-to-number (match-string 2 n)))))
2872 (when i
2873 (if (and (>= i 1) (<= i m))
2874 t
2875 (message
2876 "*** In %s, %s might be a free variable (rule has %s)"
2877 NAME x (format (cond ((< m 1) "no component")
2878 ((= m 1) "%d component")
2879 ("%d components"))
2880 m))
2881 nil)))))
2882
2883(defun wisent-semantic-action-expand-body (body n &optional found)
2884 "Parse BODY of semantic action.
2885N is the maximum number of $N variables that can be referenced in
2886BODY. Warn on references out of permitted range.
2887Optional argument FOUND is the accumulated list of '$N' references
2888encountered so far.
2889Return a cons (FOUND . XBODY), where FOUND is the list of $N
2890references found in BODY, and XBODY is BODY expression with
2891`backquote' forms expanded."
2892 (if (not (listp body))
2893 ;; BODY is an atom, no expansion needed
2894 (progn
2895 (if (wisent-check-$N body n)
2896 ;; Accumulate $i symbol
2897 (add-to-list 'found body))
2898 (cons found body))
2899 ;; BODY is a list, expand inside it
2900 (let (xbody sexpr)
2901 ;; If backquote expand it first
2902 (if (wisent-backquote-p (car body))
2903 (setq body (macroexpand body)))
2904 (while body
2905 (setq sexpr (car body)
2906 body (cdr body))
2907 (cond
2908 ;; Function call excepted quote expression
2909 ((and (consp sexpr)
2910 (not (wisent-quote-p (car sexpr))))
2911 (setq sexpr (wisent-semantic-action-expand-body sexpr n found)
2912 found (car sexpr)
2913 sexpr (cdr sexpr)))
2914 ;; $i symbol
2915 ((wisent-check-$N sexpr n)
2916 ;; Accumulate $i symbol
2917 (add-to-list 'found sexpr))
2918 )
2919 ;; Accumulate expanded forms
2920 (setq xbody (nconc xbody (list sexpr))))
2921 (cons found xbody))))
2922
2923(defun wisent-semantic-action (r)
2924 "Set up the Elisp function for semantic action at rule R.
2925On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY is the
2926body of the semantic action, N is the maximum number of values
2927available in the parser's stack, NTERM is the nonterminal the semantic
2928action belongs to, and I is the index of the semantic action inside
2929NTERM definition. Return the semantic action symbol.
2930The semantic action function accepts three arguments:
2931
2932- the state/value stack
2933- the top-of-stack index
2934- the goto table
2935
2936And returns the updated top-of-stack index."
2937 (if (not (aref ruseful r))
2938 (aset rcode r nil)
2939 (let* ((actn (aref rcode r))
2940 (n (aref actn 1)) ; nb of val avail. in stack
2941 (NAME (apply 'format "%s:%d" (aref actn 2)))
2942 (form (wisent-semantic-action-expand-body (aref actn 0) n))
2943 ($l (car form)) ; list of $vars used in body
2944 (form (cdr form)) ; expanded form of body
2945 (nt (aref rlhs r)) ; nonterminal item no.
2946 (bl nil) ; `let*' binding list
2947 $v i j)
2948
2949 ;; Compute $N and $regionN bindings
2950 (setq i n)
2951 (while (> i 0)
2952 (setq j (1+ (* 2 (- n i))))
2953 ;; Only bind $regionI if used in action
2954 (setq $v (intern (format "$region%d" i)))
2955 (if (memq $v $l)
2956 (setq bl (cons `(,$v (cdr (aref ,stack (- ,sp ,j)))) bl)))
2957 ;; Only bind $I if used in action
2958 (setq $v (intern (format "$%d" i)))
2959 (if (memq $v $l)
2960 (setq bl (cons `(,$v (car (aref ,stack (- ,sp ,j)))) bl)))
2961 (setq i (1- i)))
2962
2963 ;; Compute J, the length of rule's RHS. It will give the
2964 ;; current parser state at STACK[SP - 2*J], and where to push
2965 ;; the new semantic value and the next state, respectively at:
2966 ;; STACK[SP - 2*J + 1] and STACK[SP - 2*J + 2]. Generally N,
2967 ;; the maximum number of values available in the stack, is equal
2968 ;; to J. But, for mid-rule actions, N is the number of rule
2969 ;; elements before the action and J is always 0 (empty rule).
2970 (setq i (aref rrhs r)
2971 j 0)
2972 (while (> (aref ritem i) 0)
2973 (setq j (1+ j)
2974 i (1+ i)))
2975
2976 ;; Create the semantic action symbol.
2977 (setq actn (intern NAME (aref rcode 0)))
2978
2979 ;; Store source code in function cell of the semantic action
2980 ;; symbol. It will be byte-compiled at automaton's compilation
2981 ;; time. Using a byte-compiled automaton can significantly
2982 ;; speed up parsing!
2983 (fset actn
2984 `(lambda (,stack ,sp ,gotos)
2985 (let* (,@bl
2986 ($region
2987 ,(cond
2988 ((= n 1)
2989 (if (assq '$region1 bl)
2990 '$region1
2991 `(cdr (aref ,stack (1- ,sp)))))
2992 ((> n 1)
2993 `(wisent-production-bounds
2994 ,stack (- ,sp ,(1- (* 2 n))) (1- ,sp)))))
2995 ($action ,NAME)
2996 ($nterm ',(aref tags nt))
2997 ,@(and (> j 0) `((,sp (- ,sp ,(* j 2)))))
2998 (,state (cdr (assq $nterm
2999 (aref ,gotos
3000 (aref ,stack ,sp))))))
3001 (setq ,sp (+ ,sp 2))
3002 ;; push semantic value
3003 (aset ,stack (1- ,sp) (cons ,form $region))
3004 ;; push next state
3005 (aset ,stack ,sp ,state)
3006 ;; return new top of stack
3007 ,sp)))
3008
3009 ;; Return the semantic action symbol
3010 actn)))
3011\f
3012;;;; ----------------------------
3013;;;; Build parser LALR automaton.
3014;;;; ----------------------------
3015
3016(defun wisent-parser-automaton ()
3017 "Compute and return LALR(1) automaton from GRAMMAR.
3018GRAMMAR is in internal format. GRAM/ACTS are grammar rules
3019in internal format. STARTS defines the start symbols."
3020 ;; Check for useless stuff
3021 (wisent-reduce-grammar)
3022
3023 (wisent-set-derives)
3024 (wisent-set-nullable)
3025 ;; convert to nondeterministic finite state machine.
3026 (wisent-generate-states)
3027 ;; make it deterministic.
3028 (wisent-lalr)
3029 ;; Find and record any conflicts: places where one token of
3030 ;; lookahead is not enough to disambiguate the parsing. Also
3031 ;; resolve s/r conflicts based on precedence declarations.
3032 (wisent-resolve-conflicts)
3033 (wisent-print-results)
3034
3035 (vector (wisent-state-actions) ; action table
3036 (wisent-goto-actions) ; goto table
3037 start-table ; start symbols
3038 (aref rcode 0) ; sem. action symbol obarray
3039 )
3040 )
3041\f
3042;;;; -------------------
3043;;;; Parse input grammar
3044;;;; -------------------
3045
3046(defconst wisent-reserved-symbols (list wisent-error-term)
3047 "The list of reserved symbols.
3048Also all symbols starting with a character defined in
3049`wisent-reserved-capitals' are reserved for internal use.")
3050
3051(defconst wisent-reserved-capitals '(?\$ ?\@)
3052 "The list of reserved capital letters.
3053All symbol starting with one of these letters are reserved for
3054internal use.")
3055
3056(defconst wisent-starts-nonterm '$STARTS
3057 "Main start symbol.
3058It gives the rules for start symbols.")
3059
3060(defvar wisent-single-start-flag nil
3061 "Non-nil means allows only one start symbol like in Bison.
3062That is don't add extra start rules to the grammar. This is
3063useful to compare the Wisent's generated automaton with the Bison's
3064one.")
3065
3066(defsubst wisent-ISVALID-VAR (x)
3067 "Return non-nil if X is a character or an allowed symbol."
3068 (and x (symbolp x)
3069 (not (memq (aref (symbol-name x) 0) wisent-reserved-capitals))
3070 (not (memq x wisent-reserved-symbols))))
3071
3072(defsubst wisent-ISVALID-TOKEN (x)
3073 "Return non-nil if X is a character or an allowed symbol."
3074 (or (wisent-char-p x)
3075 (wisent-ISVALID-VAR x)))
3076
3077(defun wisent-push-token (symbol &optional nocheck)
3078 "Push a new SYMBOL in the list of tokens.
3079Bypass checking if NOCHECK is non-nil."
3080 ;; Check
3081 (or nocheck (wisent-ISVALID-TOKEN symbol)
3082 (error "Invalid terminal symbol: %S" symbol))
3083 (if (memq symbol token-list)
3084 (message "*** duplicate terminal `%s' ignored" symbol)
3085 ;; Set up properties
3086 (wisent-set-prec symbol nil)
3087 (wisent-set-assoc symbol nil)
3088 (wisent-set-item-number symbol ntokens)
3089 ;; Add
3090 (setq ntokens (1+ ntokens)
3091 token-list (cons symbol token-list))))
3092
3093(defun wisent-push-var (symbol &optional nocheck)
3094 "Push a new SYMBOL in the list of nonterminals.
3095Bypass checking if NOCHECK is non-nil."
3096 ;; Check
3097 (unless nocheck
3098 (or (wisent-ISVALID-VAR symbol)
3099 (error "Invalid nonterminal symbol: %S" symbol))
3100 (if (memq symbol var-list)
3101 (error "Nonterminal `%s' already defined" symbol)))
3102 ;; Set up properties
3103 (wisent-set-item-number symbol nvars)
3104 ;; Add
3105 (setq nvars (1+ nvars)
3106 var-list (cons symbol var-list)))
3107
3108(defun wisent-parse-nonterminals (defs)
3109 "Parse nonterminal definitions in DEFS.
3110Fill in each element of the global arrays RPREC, RCODE, RUSEFUL with
3111respectively rule precedence level, semantic action code and
3112usefulness flag. Return a list of rules of the form (LHS . RHS) where
3113LHS and RHS are respectively the Left Hand Side and Right Hand Side of
3114the rule."
3115 (setq rprec nil
3116 rcode nil
3117 nitems 0
3118 nrules 0)
3119 (let (def nonterm rlist rule rules rhs rest item items
3120 rhl plevel semact @n @count iactn)
3121 (setq @count 0)
3122 (while defs
3123 (setq def (car defs)
3124 defs (cdr defs)
3125 nonterm (car def)
3126 rlist (cdr def)
3127 iactn 0)
3128 (or (consp rlist)
3129 (error "Invalid nonterminal definition syntax: %S" def))
3130 (while rlist
3131 (setq rule (car rlist)
3132 rlist (cdr rlist)
3133 items (car rule)
3134 rest (cdr rule)
3135 rhl 0
3136 rhs nil)
3137
3138 ;; Check & count items
3139 (setq nitems (1+ nitems)) ;; LHS item
3140 (while items
3141 (setq item (car items)
3142 items (cdr items)
3143 nitems (1+ nitems)) ;; RHS items
3144 (if (listp item)
3145 ;; Mid-rule action
3146 (progn
3147 (setq @count (1+ @count)
3148 @n (intern (format "@%d" @count)))
3149 (wisent-push-var @n t)
3150 ;; Push a new empty rule with the mid-rule action
3151 (setq semact (vector item rhl (list nonterm iactn))
3152 iactn (1+ iactn)
3153 plevel nil
3154 rcode (cons semact rcode)
3155 rprec (cons plevel rprec)
3156 item @n ;; Replace action by @N nonterminal
3157 rules (cons (list item) rules)
3158 nitems (1+ nitems)
3159 nrules (1+ nrules)))
3160 ;; Check terminal or nonterminal symbol
3161 (cond
3162 ((or (memq item token-list) (memq item var-list)))
3163 ;; Create new literal character token
3164 ((wisent-char-p item) (wisent-push-token item t))
3165 ((error "Symbol `%s' is used, but is not defined as a token and has no rules"
3166 item))))
3167 (setq rhl (1+ rhl)
3168 rhs (cons item rhs)))
3169
3170 ;; Check & collect rule precedence level
3171 (setq plevel (when (vectorp (car rest))
3172 (setq item (car rest)
3173 rest (cdr rest))
3174 (if (and (= (length item) 1)
3175 (memq (aref item 0) token-list)
3176 (wisent-prec (aref item 0)))
3177 (wisent-item-number (aref item 0))
3178 (error "Invalid rule precedence level syntax: %S" item)))
3179 rprec (cons plevel rprec))
3180
3181 ;; Check & collect semantic action body
3182 (setq semact (vector
3183 (if rest
3184 (if (cdr rest)
3185 (error "Invalid semantic action syntax: %S" rest)
3186 (car rest))
3187 ;; Give a default semantic action body: nil
3188 ;; for an empty rule or $1, the value of the
3189 ;; first symbol in the rule, otherwise.
3190 (if (> rhl 0) '$1 '()))
3191 rhl
3192 (list nonterm iactn))
3193 iactn (1+ iactn)
3194 rcode (cons semact rcode))
3195 (setq rules (cons (cons nonterm (nreverse rhs)) rules)
3196 nrules (1+ nrules))))
3197
3198 (setq ruseful (make-vector (1+ nrules) t)
3199 rprec (vconcat (cons nil (nreverse rprec)))
3200 rcode (vconcat (cons nil (nreverse rcode))))
3201 (nreverse rules)
3202 ))
3203
3204(defun wisent-parse-grammar (grammar &optional start-list)
3205 "Parse GRAMMAR and build a suitable internal representation.
3206Optional argument START-LIST defines the start symbols.
3207GRAMMAR is a list of form: (TOKENS ASSOCS . NONTERMS)
3208
3209TOKENS is a list of terminal symbols (tokens).
3210
3211ASSOCS is nil or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
3212describing the associativity of TOKENS. ASSOC-TYPE must be one of the
3213`default-prec' `nonassoc', `left' or `right' symbols. When ASSOC-TYPE
3214is `default-prec', ASSOC-VALUE must be nil or t (the default).
3215Otherwise it is a list of tokens which must have been previously
3216declared in TOKENS.
3217
3218NONTERMS is the list of non terminal definitions (see function
3219`wisent-parse-nonterminals')."
3220 (or (and (consp grammar) (> (length grammar) 2))
3221 (error "Bad input grammar"))
3222
3223 (let (i r rhs pre dpre lst start-var assoc rules item
3224 token var def tokens defs ep-token ep-var ep-def)
3225
3226 ;; Built-in tokens
3227 (setq ntokens 0 nvars 0)
3228 (wisent-push-token wisent-eoi-term t)
3229 (wisent-push-token wisent-error-term t)
3230
3231 ;; Check/collect terminals
3232 (setq lst (car grammar))
3233 (while lst
3234 (wisent-push-token (car lst))
3235 (setq lst (cdr lst)))
3236
3237 ;; Check/Set up tokens precedence & associativity
3238 (setq lst (nth 1 grammar)
3239 pre 0
3240 defs nil
3241 dpre nil
3242 default-prec t)
3243 (while lst
3244 (setq def (car lst)
3245 assoc (car def)
3246 tokens (cdr def)
3247 lst (cdr lst))
3248 (if (eq assoc 'default-prec)
3249 (progn
3250 (or (null (cdr tokens))
3251 (memq (car tokens) '(t nil))
3252 (error "Invalid default-prec value: %S" tokens))
3253 (setq default-prec (car tokens))
3254 (if dpre
3255 (message "*** redefining default-prec to %s"
3256 default-prec))
3257 (setq dpre t))
3258 (or (memq assoc '(left right nonassoc))
3259 (error "Invalid associativity syntax: %S" assoc))
3260 (setq pre (1+ pre))
3261 (while tokens
3262 (setq token (car tokens)
3263 tokens (cdr tokens))
3264 (if (memq token defs)
3265 (message "*** redefining precedence of `%s'" token))
3266 (or (memq token token-list)
3267 ;; Define token not previously declared.
3268 (wisent-push-token token))
3269 (setq defs (cons token defs))
3270 ;; Record the precedence and associativity of the terminal.
3271 (wisent-set-prec token pre)
3272 (wisent-set-assoc token assoc))))
3273
3274 ;; Check/Collect nonterminals
3275 (setq lst (nthcdr 2 grammar)
3276 defs nil)
3277 (while lst
3278 (setq def (car lst)
3279 lst (cdr lst))
3280 (or (consp def)
3281 (error "Invalid nonterminal definition: %S" def))
3282 (if (memq (car def) token-list)
3283 (error "Nonterminal `%s' already defined as token" (car def)))
3284 (wisent-push-var (car def))
3285 (setq defs (cons def defs)))
3286 (or defs
3287 (error "No input grammar"))
3288 (setq defs (nreverse defs))
3289
3290 ;; Set up the start symbol.
3291 (setq start-table nil)
3292 (cond
3293
3294 ;; 1. START-LIST is nil, the start symbol is the first
3295 ;; nonterminal defined in the grammar (Bison like).
3296 ((null start-list)
3297 (setq start-var (caar defs)))
3298
3299 ;; 2. START-LIST contains only one element, it is the start
3300 ;; symbol (Bison like).
3301 ((or wisent-single-start-flag (null (cdr start-list)))
3302 (setq start-var (car start-list))
3303 (or (assq start-var defs)
3304 (error "Start symbol `%s' has no rule" start-var)))
3305
3306 ;; 3. START-LIST contains more than one element. All defines
3307 ;; potential start symbols. One of them (the first one by
3308 ;; default) will be given at parse time to be the parser goal.
3309 ;; If `wisent-single-start-flag' is non-nil that feature is
3310 ;; disabled and the first nonterminal in START-LIST defines
3311 ;; the start symbol, like in case 2 above.
3312 ((not wisent-single-start-flag)
3313
3314 ;; START-LIST is a list of nonterminals '(nt0 ... ntN).
3315 ;; Build and push ad hoc start rules in the grammar:
3316
3317 ;; ($STARTS ((nt0) $1) ((nt1) $1) ... ((ntN) $1))
3318 ;; ($nt1 (($$nt1 nt1) $2))
3319 ;; ...
3320 ;; ($ntN (($$ntN ntN) $2))
3321
3322 ;; Where internal symbols $ntI and $$ntI are respectively
3323 ;; nonterminals and terminals.
3324
3325 ;; The internal start symbol $STARTS is used to build the
3326 ;; LALR(1) automaton. The true default start symbol used by the
3327 ;; parser is the first nonterminal in START-LIST (nt0).
3328 (setq start-var wisent-starts-nonterm
3329 lst (nreverse start-list))
3330 (while lst
3331 (setq var (car lst)
3332 lst (cdr lst))
3333 (or (memq var var-list)
3334 (error "Start symbol `%s' has no rule" var))
3335 (unless (assq var start-table) ;; Ignore duplicates
3336 ;; For each nt start symbol
3337 (setq ep-var (intern (format "$%s" var))
3338 ep-token (intern (format "$$%s" var)))
3339 (wisent-push-token ep-token t)
3340 (wisent-push-var ep-var t)
3341 (setq
3342 ;; Add entry (nt . $$nt) to start-table
3343 start-table (cons (cons var ep-token) start-table)
3344 ;; Add rule ($nt (($$nt nt) $2))
3345 defs (cons (list ep-var (list (list ep-token var) '$2)) defs)
3346 ;; Add start rule (($nt) $1)
3347 ep-def (cons (list (list ep-var) '$1) ep-def))
3348 ))
3349 (wisent-push-var start-var t)
3350 (setq defs (cons (cons start-var ep-def) defs))))
3351
3352 ;; Set up rules main data structure & RPREC, RCODE, RUSEFUL
3353 (setq rules (wisent-parse-nonterminals defs))
3354
3355 ;; Set up the terminal & nonterminal lists.
3356 (setq nsyms (+ ntokens nvars)
3357 token-list (nreverse token-list)
3358 lst var-list
3359 var-list nil)
3360 (while lst
3361 (setq var (car lst)
3362 lst (cdr lst)
3363 var-list (cons var var-list))
3364 (wisent-set-item-number ;; adjust nonterminal item number to
3365 var (+ ntokens (wisent-item-number var)))) ;; I += NTOKENS
3366
3367 ;; Store special item numbers
3368 (setq error-token-number (wisent-item-number wisent-error-term)
3369 start-symbol (wisent-item-number start-var))
3370
3371 ;; Keep symbols in the TAGS vector so that TAGS[I] is the symbol
3372 ;; associated to item number I.
3373 (setq tags (vconcat token-list var-list))
3374 ;; Set up RLHS RRHS & RITEM data structures from list of rules
3375 ;; (LHS . RHS) received from `wisent-parse-nonterminals'.
3376 (setq rlhs (make-vector (1+ nrules) nil)
3377 rrhs (make-vector (1+ nrules) nil)
3378 ritem (make-vector (1+ nitems) nil)
3379 i 0
3380 r 1)
3381 (while rules
3382 (aset rlhs r (wisent-item-number (caar rules)))
3383 (aset rrhs r i)
3384 (setq rhs (cdar rules)
3385 pre nil)
3386 (while rhs
3387 (setq item (wisent-item-number (car rhs)))
3388 ;; Get default precedence level of rule, that is the
3389 ;; precedence of the last terminal in it.
3390 (and (wisent-ISTOKEN item)
3391 default-prec
3392 (setq pre item))
3393
3394 (aset ritem i item)
3395 (setq i (1+ i)
3396 rhs (cdr rhs)))
3397 ;; Setup the precedence level of the rule, that is the one
3398 ;; specified by %prec or the default one.
3399 (and (not (aref rprec r)) ;; Already set by %prec
3400 pre
3401 (wisent-prec (aref tags pre))
3402 (aset rprec r pre))
3403 (aset ritem i (- r))
3404 (setq i (1+ i)
3405 r (1+ r))
3406 (setq rules (cdr rules)))
3407 ))
3408\f
3409;;;; ---------------------
3410;;;; Compile input grammar
3411;;;; ---------------------
3412
3413(defun wisent-compile-grammar (grammar &optional start-list)
3414 "Compile the LALR(1) GRAMMAR.
3415
3416GRAMMAR is a list (TOKENS ASSOCS . NONTERMS) where:
3417
3418- TOKENS is a list of terminal symbols (tokens).
3419
3420- ASSOCS is nil, or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
3421 describing the associativity of TOKENS. ASSOC-TYPE must be one of
3422 the `default-prec' `nonassoc', `left' or `right' symbols. When
3423 ASSOC-TYPE is `default-prec', ASSOC-VALUE must be nil or t (the
3424 default). Otherwise it is a list of tokens which must have been
3425 previously declared in TOKENS.
3426
3427- NONTERMS is a list of nonterminal definitions.
3428
3429Optional argument START-LIST specify the possible grammar start
3430symbols. This is a list of nonterminals which must have been
3431previously declared in GRAMMAR's NONTERMS form. By default, the start
3432symbol is the first nonterminal defined. When START-LIST contains
3433only one element, it is the start symbol. Otherwise, all elements are
3434possible start symbols, unless `wisent-single-start-flag' is non-nil.
3435In that case, the first element is the start symbol, and others are
3436ignored.
3437
3438Return an automaton as a vector: [ACTIONS GOTOS STARTS FUNCTIONS]
3439where:
3440
3441- ACTIONS is a state/token matrix telling the parser what to do at
3442 every state based on the current lookahead token. That is shift,
3443 reduce, accept or error.
3444
3445- GOTOS is a state/nonterminal matrix telling the parser the next
3446 state to go to after reducing with each rule.
3447
3448- STARTS is an alist which maps the allowed start nonterminal symbols
3449 to tokens that will be first shifted into the parser stack.
3450
3451- FUNCTIONS is an obarray of semantic action symbols. Each symbol's
3452 function definition is the semantic action lambda expression."
3453 (if (wisent-automaton-p grammar)
3454 grammar ;; Grammar already compiled just return it
3455 (wisent-with-context compile-grammar
3456 (let* ((gc-cons-threshold 1000000)
3457 automaton)
3458 (garbage-collect)
3459 (setq wisent-new-log-flag t)
3460 ;; Parse input grammar
3461 (wisent-parse-grammar grammar start-list)
3462 ;; Generate the LALR(1) automaton
3463 (setq automaton (wisent-parser-automaton))
3464 automaton))))
3465\f
3466;;;; --------------------------
3467;;;; Byte compile input grammar
3468;;;; --------------------------
3469
3470(require 'bytecomp)
3471
3472(defun wisent-byte-compile-grammar (form)
3473 "Byte compile the `wisent-compile-grammar' FORM.
3474Automatically called by the Emacs Lisp byte compiler as a
3475`byte-compile' handler."
3476 ;; Eval the `wisent-compile-grammar' form to obtain an LALR
3477 ;; automaton internal data structure. Then, because the internal
3478 ;; data structure contains an obarray, convert it to a lisp form so
3479 ;; it can be byte-compiled.
3480 (byte-compile-form (wisent-automaton-lisp-form (eval form))))
3481
3482(put 'wisent-compile-grammar 'byte-compile 'wisent-byte-compile-grammar)
3483
3484(defun wisent-automaton-lisp-form (automaton)
3485 "Return a Lisp form that produces AUTOMATON.
3486See also `wisent-compile-grammar' for more details on AUTOMATON."
3487 (or (wisent-automaton-p automaton)
3488 (signal 'wrong-type-argument
3489 (list 'wisent-automaton-p automaton)))
3490 (let ((obn (make-symbol "ob")) ; Generated obarray name
3491 (obv (aref automaton 3)) ; Semantic actions obarray
3492 )
3493 `(let ((,obn (make-vector 13 0)))
3494 ;; Generate code to initialize the semantic actions obarray,
3495 ;; in local variable OBN.
3496 ,@(let (obcode)
3497 (mapatoms
3498 #'(lambda (s)
3499 (setq obcode
3500 (cons `(fset (intern ,(symbol-name s) ,obn)
3501 #',(symbol-function s))
3502 obcode)))
3503 obv)
3504 obcode)
3505 ;; Generate code to create the automaton.
3506 (vector
3507 ;; In code generated to initialize the action table, take
3508 ;; care of symbols that are interned in the semantic actions
3509 ;; obarray.
3510 (vector
3511 ,@(mapcar
3512 #'(lambda (state) ;; for each state
3513 `(list
3514 ,@(mapcar
3515 #'(lambda (tr) ;; for each transition
3516 (let ((k (car tr)) ; token
3517 (a (cdr tr))) ; action
3518 (if (and (symbolp a)
3519 (intern-soft (symbol-name a) obv))
3520 `(cons ,(if (symbolp k) `(quote ,k) k)
3521 (intern-soft ,(symbol-name a) ,obn))
3522 `(quote ,tr))))
3523 state)))
3524 (aref automaton 0)))
3525 ;; The code of the goto table is unchanged.
3526 ,(aref automaton 1)
3527 ;; The code of the alist of start symbols is unchanged.
3528 ',(aref automaton 2)
3529 ;; The semantic actions obarray is in the local variable OBN.
3530 ,obn))))
3531
3532(provide 'semantic/wisent/comp)
3533
3534;;; semantic/wisent/comp.el ends here