8fd1a1df5eb45252ab2c59429cd5136861c13a0a
[bpt/emacs.git] / lisp / cedet / semantic / wisent / comp.el
1 ;;; semantic/wisent/comp.el --- GNU Bison for Emacs - Grammar compiler
2
3 ;; Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000, 2001, 2002, 2003,
4 ;; 2004, 2005, 2006, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
5
6 ;; Author: David Ponce <david@dponce.com>
7 ;; Maintainer: David Ponce <david@dponce.com>
8 ;; Created: 30 January 2002
9 ;; Keywords: syntax
10
11 ;; This file is part of GNU Emacs.
12
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
17
18 ;; GNU Emacs is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25
26 ;;; Commentary:
27 ;;
28 ;; Grammar compiler that produces Wisent's LALR automatons.
29 ;;
30 ;; Wisent (the European Bison ;-) is an Elisp implementation of the
31 ;; GNU Compiler Compiler Bison. The Elisp code is a port of the C
32 ;; code of GNU Bison 1.28 & 1.31.
33 ;;
34 ;; For more details on the basic concepts for understanding Wisent,
35 ;; read the Bison manual ;)
36 ;;
37 ;; For more details on Wisent itself read the Wisent manual.
38
39 ;;; History:
40 ;;
41
42 ;;; Code:
43 (require 'semantic/wisent)
44 \f
45 ;;;; -------------------
46 ;;;; Misc. useful things
47 ;;;; -------------------
48
49 ;; As much as possible I would like to keep the name of global
50 ;; variables used in Bison without polluting too much the Elisp global
51 ;; name space. Elisp dynamic binding allows that ;-)
52
53 ;; Here are simple macros to easily define and use set of variables
54 ;; binded locally, without all these "reference to free variable"
55 ;; compiler warnings!
56
57 (defmacro wisent-context-name (name)
58 "Return the context name from NAME."
59 `(if (and ,name (symbolp ,name))
60 (intern (format "wisent-context-%s" ,name))
61 (error "Invalid context name: %S" ,name)))
62
63 (defmacro wisent-context-bindings (name)
64 "Return the variables in context NAME."
65 `(symbol-value (wisent-context-name ,name)))
66
67 (defmacro wisent-defcontext (name &rest vars)
68 "Define a context NAME that will bind variables VARS."
69 (let* ((context (wisent-context-name name))
70 (bindings (mapcar #'(lambda (v) (list 'defvar v)) vars)))
71 `(eval-when-compile
72 ,@bindings
73 (defvar ,context ',vars))))
74 (put 'wisent-defcontext 'lisp-indent-function 1)
75
76 (defmacro wisent-with-context (name &rest body)
77 "Bind variables in context NAME then eval BODY."
78 `(let* ,(wisent-context-bindings name)
79 ,@body))
80 (put 'wisent-with-context 'lisp-indent-function 1)
81
82 ;; A naive implementation of data structures! But it suffice here ;-)
83
84 (defmacro wisent-struct (name &rest fields)
85 "Define a simple data structure called NAME.
86 Which contains data stored in FIELDS. FIELDS is a list of symbols
87 which are field names or pairs (FIELD INITIAL-VALUE) where
88 INITIAL-VALUE is a constant used as the initial value of FIELD when
89 the data structure is created. INITIAL-VALUE defaults to nil.
90
91 This defines a `make-NAME' constructor, get-able `NAME-FIELD' and
92 set-able `set-NAME-FIELD' accessors."
93 (let ((size (length fields))
94 (i 0)
95 accors field sufx fun ivals)
96 (while (< i size)
97 (setq field (car fields)
98 fields (cdr fields))
99 (if (consp field)
100 (setq ivals (cons (cadr field) ivals)
101 field (car field))
102 (setq ivals (cons nil ivals)))
103 (setq sufx (format "%s-%s" name field)
104 fun (intern (format "%s" sufx))
105 accors (cons `(defmacro ,fun (s)
106 (list 'aref s ,i))
107 accors)
108 fun (intern (format "set-%s" sufx))
109 accors (cons `(defmacro ,fun (s v)
110 (list 'aset s ,i v))
111 accors)
112 i (1+ i)))
113 `(progn
114 (defmacro ,(intern (format "make-%s" name)) ()
115 (cons 'vector ',(nreverse ivals)))
116 ,@accors)))
117 (put 'wisent-struct 'lisp-indent-function 1)
118
119 ;; Other utilities
120
121 (defsubst wisent-pad-string (s n &optional left)
122 "Fill string S with spaces.
123 Return a new string of at least N characters. Insert spaces on right.
124 If optional LEFT is non-nil insert spaces on left."
125 (let ((i (length s)))
126 (if (< i n)
127 (if left
128 (concat (make-string (- n i) ?\ ) s)
129 (concat s (make-string (- n i) ?\ )))
130 s)))
131 \f
132 ;;;; ------------------------
133 ;;;; Environment dependencies
134 ;;;; ------------------------
135
136 (defconst wisent-BITS-PER-WORD
137 (let ((i 1))
138 (while (not (zerop (lsh 1 i)))
139 (setq i (1+ i)))
140 i))
141
142 (defsubst wisent-WORDSIZE (n)
143 "(N + BITS-PER-WORD - 1) / BITS-PER-WORD."
144 (/ (1- (+ n wisent-BITS-PER-WORD)) wisent-BITS-PER-WORD))
145
146 (defsubst wisent-SETBIT (x i)
147 "X[I/BITS-PER-WORD] |= 1 << (I % BITS-PER-WORD)."
148 (let ((k (/ i wisent-BITS-PER-WORD)))
149 (aset x k (logior (aref x k)
150 (lsh 1 (% i wisent-BITS-PER-WORD))))))
151
152 (defsubst wisent-RESETBIT (x i)
153 "X[I/BITS-PER-WORD] &= ~(1 << (I % BITS-PER-WORD))."
154 (let ((k (/ i wisent-BITS-PER-WORD)))
155 (aset x k (logand (aref x k)
156 (lognot (lsh 1 (% i wisent-BITS-PER-WORD)))))))
157
158 (defsubst wisent-BITISSET (x i)
159 "(X[I/BITS-PER-WORD] & (1 << (I % BITS-PER-WORD))) != 0."
160 (not (zerop (logand (aref x (/ i wisent-BITS-PER-WORD))
161 (lsh 1 (% i wisent-BITS-PER-WORD))))))
162
163 (eval-when-compile
164 (or (fboundp 'noninteractive)
165 ;; Silence the Emacs byte compiler
166 (defun noninteractive nil))
167 )
168
169 (defsubst wisent-noninteractive ()
170 "Return non-nil if running without interactive terminal."
171 (if (featurep 'xemacs)
172 (noninteractive)
173 noninteractive))
174
175 (defvar wisent-debug-flag nil
176 "Non-nil means enable some debug stuff.")
177 \f
178 ;;;; --------------
179 ;;;; Logging/Output
180 ;;;; --------------
181 (defconst wisent-log-buffer-name "*wisent-log*"
182 "Name of the log buffer.")
183
184 (defvar wisent-new-log-flag nil
185 "Non-nil means to start a new report.")
186
187 (defvar wisent-verbose-flag nil
188 "*Non-nil means to report verbose information on generated parser.")
189
190 (defun wisent-toggle-verbose-flag ()
191 "Toggle whether to report verbose information on generated parser."
192 (interactive)
193 (setq wisent-verbose-flag (not wisent-verbose-flag))
194 (when (called-interactively-p 'interactive)
195 (message "Verbose report %sabled"
196 (if wisent-verbose-flag "en" "dis"))))
197
198 (defmacro wisent-log-buffer ()
199 "Return the log buffer.
200 Its name is defined in constant `wisent-log-buffer-name'."
201 `(get-buffer-create wisent-log-buffer-name))
202
203 (defmacro wisent-clear-log ()
204 "Delete the entire contents of the log buffer."
205 `(with-current-buffer (wisent-log-buffer)
206 (erase-buffer)))
207
208 (eval-when-compile (defvar byte-compile-current-file))
209
210 (defun wisent-source ()
211 "Return the current source file name or nil."
212 (let ((source (or (and (boundp 'byte-compile-current-file)
213 byte-compile-current-file)
214 load-file-name (buffer-file-name))))
215 (if source
216 (file-relative-name source))))
217
218 (defun wisent-new-log ()
219 "Start a new entry into the log buffer."
220 (setq wisent-new-log-flag nil)
221 (let ((text (format "\n\n*** Wisent %s - %s\n\n"
222 (or (wisent-source) (buffer-name))
223 (format-time-string "%Y-%m-%d %R"))))
224 (with-current-buffer (wisent-log-buffer)
225 (goto-char (point-max))
226 (insert text))))
227
228 (defsubst wisent-log (&rest args)
229 "Insert text into the log buffer.
230 `format' is applied to ARGS and the result string is inserted into the
231 log buffer returned by the function `wisent-log-buffer'."
232 (and wisent-new-log-flag (wisent-new-log))
233 (with-current-buffer (wisent-log-buffer)
234 (insert (apply 'format args))))
235
236 (defconst wisent-log-file "wisent.output"
237 "The log file.
238 Used when running without interactive terminal.")
239
240 (defun wisent-append-to-log-file ()
241 "Append contents of logging buffer to `wisent-log-file'."
242 (if (get-buffer wisent-log-buffer-name)
243 (condition-case err
244 (with-current-buffer (wisent-log-buffer)
245 (widen)
246 (if (> (point-max) (point-min))
247 (write-region (point-min) (point-max)
248 wisent-log-file t)))
249 (error
250 (message "*** %s" (error-message-string err))))))
251 \f
252 ;;;; -----------------------------------
253 ;;;; Representation of the grammar rules
254 ;;;; -----------------------------------
255
256 ;; ntokens is the number of tokens, and nvars is the number of
257 ;; variables (nonterminals). nsyms is the total number, ntokens +
258 ;; nvars.
259
260 ;; Each symbol (either token or variable) receives a symbol number.
261 ;; Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are
262 ;; for variables. Symbol number zero is the end-of-input token. This
263 ;; token is counted in ntokens.
264
265 ;; The rules receive rule numbers 1 to nrules in the order they are
266 ;; written. Actions and guards are accessed via the rule number.
267
268 ;; The rules themselves are described by three arrays: rrhs, rlhs and
269 ;; ritem. rlhs[R] is the symbol number of the left hand side of rule
270 ;; R. The right hand side is stored as symbol numbers in a portion of
271 ;; ritem. rrhs[R] contains the index in ritem of the beginning of the
272 ;; portion for rule R.
273
274 ;; The length of the portion is one greater than the number of symbols
275 ;; in the rule's right hand side. The last element in the portion
276 ;; contains minus R, which identifies it as the end of a portion and
277 ;; says which rule it is for.
278
279 ;; The portions of ritem come in order of increasing rule number and
280 ;; are followed by an element which is nil to mark the end. nitems is
281 ;; the total length of ritem, not counting the final nil. Each
282 ;; element of ritem is called an "item" and its index in ritem is an
283 ;; item number.
284
285 ;; Item numbers are used in the finite state machine to represent
286 ;; places that parsing can get to.
287
288 ;; The vector rprec contains for each rule, the item number of the
289 ;; symbol giving its precedence level to this rule. The precedence
290 ;; level and associativity of each symbol is recorded in respectively
291 ;; the properties 'wisent--prec and 'wisent--assoc.
292
293 ;; Precedence levels are assigned in increasing order starting with 1
294 ;; so that numerically higher precedence values mean tighter binding
295 ;; as they ought to. nil as a symbol or rule's precedence means none
296 ;; is assigned.
297
298 (defcustom wisent-state-table-size 1009
299 "The size of the state table."
300 :type 'integer
301 :group 'wisent)
302
303 ;; These variables only exist locally in the function
304 ;; `wisent-compile-grammar' and are shared by all other nested
305 ;; callees.
306 (wisent-defcontext compile-grammar
307 F LA LAruleno accessing-symbol conflicts consistent default-prec
308 derives err-table fderives final-state first-reduction first-shift
309 first-state firsts from-state goto-map includes itemset nitemset
310 kernel-base kernel-end kernel-items last-reduction last-shift
311 last-state lookaheads lookaheadset lookback maxrhs ngotos nitems
312 nrules nshifts nstates nsyms ntokens nullable nvars rassoc redset
313 reduction-table ritem rlhs rprec rrc-count rrc-total rrhs ruseful
314 rcode ruleset rulesetsize shift-symbol shift-table shiftset
315 src-count src-total start-table state-table tags this-state to-state
316 tokensetsize ;; nb of words req. to hold a bit for each rule
317 varsetsize ;; nb of words req. to hold a bit for each variable
318 error-token-number start-symbol token-list var-list
319 N P V V1 nuseless-nonterminals nuseless-productions
320 ptable ;; symbols & characters properties
321 )
322
323 (defmacro wisent-ISTOKEN (s)
324 "Return non-nil if item number S defines a token (terminal).
325 That is if S < `ntokens'."
326 `(< ,s ntokens))
327
328 (defmacro wisent-ISVAR(s)
329 "Return non-nil if item number S defines a nonterminal.
330 That is if S >= `ntokens'."
331 `(>= ,s ntokens))
332
333 (defsubst wisent-tag (s)
334 "Return printable form of item number S."
335 (wisent-item-to-string (aref tags s)))
336
337 ;; Symbol and character properties
338
339 (defsubst wisent-put (object propname value)
340 "Store OBJECT's PROPNAME property with value VALUE.
341 Use `eq' to locate OBJECT."
342 (let ((entry (assq object ptable)))
343 (or entry (setq entry (list object) ptable (cons entry ptable)))
344 (setcdr entry (plist-put (cdr entry) propname value))))
345
346 (defsubst wisent-get (object propname)
347 "Return the value of OBJECT's PROPNAME property.
348 Use `eq' to locate OBJECT."
349 (plist-get (cdr (assq object ptable)) propname))
350
351 (defsubst wisent-item-number (x)
352 "Return the item number of symbol X."
353 (wisent-get x 'wisent--item-no))
354
355 (defsubst wisent-set-item-number (x n)
356 "Set the item number of symbol X to N."
357 (wisent-put x 'wisent--item-no n))
358
359 (defsubst wisent-assoc (x)
360 "Return the associativity of symbol X."
361 (wisent-get x 'wisent--assoc))
362
363 (defsubst wisent-set-assoc (x a)
364 "Set the associativity of symbol X to A."
365 (wisent-put x 'wisent--assoc a))
366
367 (defsubst wisent-prec (x)
368 "Return the precedence level of symbol X."
369 (wisent-get x 'wisent--prec))
370
371 (defsubst wisent-set-prec (x p)
372 "Set the precedence level of symbol X to P."
373 (wisent-put x 'wisent--prec p))
374 \f
375 ;;;; ----------------------------------------------------------
376 ;;;; Type definitions for nondeterministic finite state machine
377 ;;;; ----------------------------------------------------------
378
379 ;; These type definitions are used to represent a nondeterministic
380 ;; finite state machine that parses the specified grammar. This
381 ;; information is generated by the function `wisent-generate-states'.
382
383 ;; Each state of the machine is described by a set of items --
384 ;; particular positions in particular rules -- that are the possible
385 ;; places where parsing could continue when the machine is in this
386 ;; state. These symbols at these items are the allowable inputs that
387 ;; can follow now.
388
389 ;; A core represents one state. States are numbered in the number
390 ;; field. When `wisent-generate-states' is finished, the starting
391 ;; state is state 0 and `nstates' is the number of states. (A
392 ;; transition to a state whose state number is `nstates' indicates
393 ;; termination.) All the cores are chained together and `first-state'
394 ;; points to the first one (state 0).
395
396 ;; For each state there is a particular symbol which must have been
397 ;; the last thing accepted to reach that state. It is the
398 ;; accessing-symbol of the core.
399
400 ;; Each core contains a vector of `nitems' items which are the indices
401 ;; in the `ritems' vector of the items that are selected in this
402 ;; state.
403
404 ;; The link field is used for chaining buckets that hash states by
405 ;; their itemsets. This is for recognizing equivalent states and
406 ;; combining them when the states are generated.
407
408 ;; The two types of transitions are shifts (push the lookahead token
409 ;; and read another) and reductions (combine the last n things on the
410 ;; stack via a rule, replace them with the symbol that the rule
411 ;; derives, and leave the lookahead token alone). When the states are
412 ;; generated, these transitions are represented in two other lists.
413
414 ;; Each shifts structure describes the possible shift transitions out
415 ;; of one state, the state whose number is in the number field. The
416 ;; shifts structures are linked through next and first-shift points to
417 ;; them. Each contains a vector of numbers of the states that shift
418 ;; transitions can go to. The accessing-symbol fields of those
419 ;; states' cores say what kind of input leads to them.
420
421 ;; A shift to state zero should be ignored. Conflict resolution
422 ;; deletes shifts by changing them to zero.
423
424 ;; Each reductions structure describes the possible reductions at the
425 ;; state whose number is in the number field. The data is a list of
426 ;; nreds rules, represented by their rule numbers. `first-reduction'
427 ;; points to the list of these structures.
428
429 ;; Conflict resolution can decide that certain tokens in certain
430 ;; states should explicitly be errors (for implementing %nonassoc).
431 ;; For each state, the tokens that are errors for this reason are
432 ;; recorded in an errs structure, which has the state number in its
433 ;; number field. The rest of the errs structure is full of token
434 ;; numbers.
435
436 ;; There is at least one shift transition present in state zero. It
437 ;; leads to a next-to-final state whose accessing-symbol is the
438 ;; grammar's start symbol. The next-to-final state has one shift to
439 ;; the final state, whose accessing-symbol is zero (end of input).
440 ;; The final state has one shift, which goes to the termination state
441 ;; (whose number is `nstates'-1).
442 ;; The reason for the extra state at the end is to placate the
443 ;; parser's strategy of making all decisions one token ahead of its
444 ;; actions.
445
446 (wisent-struct core
447 next ; -> core
448 link ; -> core
449 (number 0)
450 (accessing-symbol 0)
451 (nitems 0)
452 (items [0]))
453
454 (wisent-struct shifts
455 next ; -> shifts
456 (number 0)
457 (nshifts 0)
458 (shifts [0]))
459
460 (wisent-struct reductions
461 next ; -> reductions
462 (number 0)
463 (nreds 0)
464 (rules [0]))
465
466 (wisent-struct errs
467 (nerrs 0)
468 (errs [0]))
469 \f
470 ;;;; --------------------------------------------------------
471 ;;;; Find unreachable terminals, nonterminals and productions
472 ;;;; --------------------------------------------------------
473
474 (defun wisent-bits-equal (L R n)
475 "Visit L and R and return non-nil if their first N elements are `='.
476 L and R must be vectors of integers."
477 (let* ((i (1- n))
478 (iseq t))
479 (while (and iseq (natnump i))
480 (setq iseq (= (aref L i) (aref R i))
481 i (1- i)))
482 iseq))
483
484 (defun wisent-nbits (i)
485 "Return number of bits set in integer I."
486 (let ((count 0))
487 (while (not (zerop i))
488 ;; i ^= (i & ((unsigned) (-(int) i)))
489 (setq i (logxor i (logand i (- i)))
490 count (1+ count)))
491 count))
492
493 (defun wisent-bits-size (S n)
494 "In vector S count the total of bits set in first N elements.
495 S must be a vector of integers."
496 (let* ((i (1- n))
497 (count 0))
498 (while (natnump i)
499 (setq count (+ count (wisent-nbits (aref S i)))
500 i (1- i)))
501 count))
502
503 (defun wisent-useful-production (i N0)
504 "Return non-nil if production I is in useful set N0."
505 (let* ((useful t)
506 (r (aref rrhs i))
507 n)
508 (while (and useful (> (setq n (aref ritem r)) 0))
509 (if (wisent-ISVAR n)
510 (setq useful (wisent-BITISSET N0 (- n ntokens))))
511 (setq r (1+ r)))
512 useful))
513
514 (defun wisent-useless-nonterminals ()
515 "Find out which nonterminals are used."
516 (let (Np Ns i n break)
517 ;; N is set as built. Np is set being built this iteration. P is
518 ;; set of all productions which have a RHS all in N.
519 (setq n (wisent-WORDSIZE nvars)
520 Np (make-vector n 0))
521
522 ;; The set being computed is a set of nonterminals which can
523 ;; derive the empty string or strings consisting of all
524 ;; terminals. At each iteration a nonterminal is added to the set
525 ;; if there is a production with that nonterminal as its LHS for
526 ;; which all the nonterminals in its RHS are already in the set.
527 ;; Iterate until the set being computed remains unchanged. Any
528 ;; nonterminals not in the set at that point are useless in that
529 ;; they will never be used in deriving a sentence of the language.
530
531 ;; This iteration doesn't use any special traversal over the
532 ;; productions. A set is kept of all productions for which all
533 ;; the nonterminals in the RHS are in useful. Only productions
534 ;; not in this set are scanned on each iteration. At the end,
535 ;; this set is saved to be used when finding useful productions:
536 ;; only productions in this set will appear in the final grammar.
537
538 (while (not break)
539 (setq i (1- n))
540 (while (natnump i)
541 ;; Np[i] = N[i]
542 (aset Np i (aref N i))
543 (setq i (1- i)))
544
545 (setq i 1)
546 (while (<= i nrules)
547 (if (not (wisent-BITISSET P i))
548 (when (wisent-useful-production i N)
549 (wisent-SETBIT Np (- (aref rlhs i) ntokens))
550 (wisent-SETBIT P i)))
551 (setq i (1+ i)))
552 (if (wisent-bits-equal N Np n)
553 (setq break t)
554 (setq Ns Np
555 Np N
556 N Ns)))
557 (setq N Np)))
558
559 (defun wisent-inaccessable-symbols ()
560 "Find out which productions are reachable and which symbols are used."
561 ;; Starting with an empty set of productions and a set of symbols
562 ;; which only has the start symbol in it, iterate over all
563 ;; productions until the set of productions remains unchanged for an
564 ;; iteration. For each production which has a LHS in the set of
565 ;; reachable symbols, add the production to the set of reachable
566 ;; productions, and add all of the nonterminals in the RHS of the
567 ;; production to the set of reachable symbols.
568
569 ;; Consider only the (partially) reduced grammar which has only
570 ;; nonterminals in N and productions in P.
571
572 ;; The result is the set P of productions in the reduced grammar,
573 ;; and the set V of symbols in the reduced grammar.
574
575 ;; Although this algorithm also computes the set of terminals which
576 ;; are reachable, no terminal will be deleted from the grammar. Some
577 ;; terminals might not be in the grammar but might be generated by
578 ;; semantic routines, and so the user might want them available with
579 ;; specified numbers. (Is this true?) However, the non reachable
580 ;; terminals are printed (if running in verbose mode) so that the
581 ;; user can know.
582 (let (Vp Vs Pp i tt r n m break)
583 (setq n (wisent-WORDSIZE nsyms)
584 m (wisent-WORDSIZE (1+ nrules))
585 Vp (make-vector n 0)
586 Pp (make-vector m 0))
587
588 ;; If the start symbol isn't useful, then nothing will be useful.
589 (when (wisent-BITISSET N (- start-symbol ntokens))
590 (wisent-SETBIT V start-symbol)
591 (while (not break)
592 (setq i (1- n))
593 (while (natnump i)
594 (aset Vp i (aref V i))
595 (setq i (1- i)))
596 (setq i 1)
597 (while (<= i nrules)
598 (when (and (not (wisent-BITISSET Pp i))
599 (wisent-BITISSET P i)
600 (wisent-BITISSET V (aref rlhs i)))
601 (setq r (aref rrhs i))
602 (while (natnump (setq tt (aref ritem r)))
603 (if (or (wisent-ISTOKEN tt)
604 (wisent-BITISSET N (- tt ntokens)))
605 (wisent-SETBIT Vp tt))
606 (setq r (1+ r)))
607 (wisent-SETBIT Pp i))
608 (setq i (1+ i)))
609 (if (wisent-bits-equal V Vp n)
610 (setq break t)
611 (setq Vs Vp
612 Vp V
613 V Vs))))
614 (setq V Vp)
615
616 ;; Tokens 0, 1 are internal to Wisent. Consider them useful.
617 (wisent-SETBIT V 0) ;; end-of-input token
618 (wisent-SETBIT V 1) ;; error token
619 (setq P Pp)
620
621 (setq nuseless-productions (- nrules (wisent-bits-size P m))
622 nuseless-nonterminals nvars
623 i ntokens)
624 (while (< i nsyms)
625 (if (wisent-BITISSET V i)
626 (setq nuseless-nonterminals (1- nuseless-nonterminals)))
627 (setq i (1+ i)))
628
629 ;; A token that was used in %prec should not be warned about.
630 (setq i 1)
631 (while (<= i nrules)
632 (if (aref rprec i)
633 (wisent-SETBIT V1 (aref rprec i)))
634 (setq i (1+ i)))
635 ))
636
637 (defun wisent-reduce-grammar-tables ()
638 "Disable useless productions."
639 (if (> nuseless-productions 0)
640 (let ((pn 1))
641 (while (<= pn nrules)
642 (aset ruseful pn (wisent-BITISSET P pn))
643 (setq pn (1+ pn))))))
644
645 (defun wisent-nonterminals-reduce ()
646 "Remove useless nonterminals."
647 (let (i n r item nontermmap tags-sorted)
648 ;; Map the nonterminals to their new index: useful first, useless
649 ;; afterwards. Kept for later report.
650 (setq nontermmap (make-vector nvars 0)
651 n ntokens
652 i ntokens)
653 (while (< i nsyms)
654 (when (wisent-BITISSET V i)
655 (aset nontermmap (- i ntokens) n)
656 (setq n (1+ n)))
657 (setq i (1+ i)))
658 (setq i ntokens)
659 (while (< i nsyms)
660 (unless (wisent-BITISSET V i)
661 (aset nontermmap (- i ntokens) n)
662 (setq n (1+ n)))
663 (setq i (1+ i)))
664 ;; Shuffle elements of tables indexed by symbol number
665 (setq tags-sorted (make-vector nvars nil)
666 i ntokens)
667 (while (< i nsyms)
668 (setq n (aref nontermmap (- i ntokens)))
669 (aset tags-sorted (- n ntokens) (aref tags i))
670 (setq i (1+ i)))
671 (setq i ntokens)
672 (while (< i nsyms)
673 (aset tags i (aref tags-sorted (- i ntokens)))
674 (setq i (1+ i)))
675 ;; Replace all symbol numbers in valid data structures.
676 (setq i 1)
677 (while (<= i nrules)
678 (aset rlhs i (aref nontermmap (- (aref rlhs i) ntokens)))
679 (setq i (1+ i)))
680 (setq r 0)
681 (while (setq item (aref ritem r))
682 (if (wisent-ISVAR item)
683 (aset ritem r (aref nontermmap (- item ntokens))))
684 (setq r (1+ r)))
685 (setq start-symbol (aref nontermmap (- start-symbol ntokens))
686 nsyms (- nsyms nuseless-nonterminals)
687 nvars (- nvars nuseless-nonterminals))
688 ))
689
690 (defun wisent-total-useless ()
691 "Report number of useless nonterminals and productions."
692 (let* ((src (wisent-source))
693 (src (if src (concat " in " src) ""))
694 (msg (format "Grammar%s contains" src)))
695 (if (> nuseless-nonterminals 0)
696 (setq msg (format "%s %d useless nonterminal%s"
697 msg nuseless-nonterminals
698 (if (> nuseless-nonterminals 0) "s" ""))))
699 (if (and (> nuseless-nonterminals 0) (> nuseless-productions 0))
700 (setq msg (format "%s and" msg)))
701 (if (> nuseless-productions 0)
702 (setq msg (format "%s %d useless rule%s"
703 msg nuseless-productions
704 (if (> nuseless-productions 0) "s" ""))))
705 (message msg)))
706
707 (defun wisent-reduce-grammar ()
708 "Find unreachable terminals, nonterminals and productions."
709 ;; Allocate the global sets used to compute the reduced grammar
710 (setq N (make-vector (wisent-WORDSIZE nvars) 0)
711 P (make-vector (wisent-WORDSIZE (1+ nrules)) 0)
712 V (make-vector (wisent-WORDSIZE nsyms) 0)
713 V1 (make-vector (wisent-WORDSIZE nsyms) 0)
714 nuseless-nonterminals 0
715 nuseless-productions 0)
716
717 (wisent-useless-nonterminals)
718 (wisent-inaccessable-symbols)
719
720 (when (> (+ nuseless-nonterminals nuseless-productions) 0)
721 (wisent-total-useless)
722 (or (wisent-BITISSET N (- start-symbol ntokens))
723 (error "Start symbol `%s' does not derive any sentence"
724 (wisent-tag start-symbol)))
725 (wisent-reduce-grammar-tables)
726 (if (> nuseless-nonterminals 0)
727 (wisent-nonterminals-reduce))))
728
729 (defun wisent-print-useless ()
730 "Output the detailed results of the reductions."
731 (let (i b r)
732 (when (> nuseless-nonterminals 0)
733 ;; Useless nonterminals have been moved after useful ones.
734 (wisent-log "\n\nUseless nonterminals:\n\n")
735 (setq i 0)
736 (while (< i nuseless-nonterminals)
737 (wisent-log " %s\n" (wisent-tag (+ nsyms i)))
738 (setq i (1+ i))))
739 (setq b nil
740 i 0)
741 (while (< i ntokens)
742 (unless (or (wisent-BITISSET V i) (wisent-BITISSET V1 i))
743 (or b
744 (wisent-log "\n\nTerminals which are not used:\n\n"))
745 (setq b t)
746 (wisent-log " %s\n" (wisent-tag i)))
747 (setq i (1+ i)))
748 (when (> nuseless-productions 0)
749 (wisent-log "\n\nUseless rules:\n\n")
750 (setq i 1)
751 (while (<= i nrules)
752 (unless (aref ruseful i)
753 (wisent-log "#%s " (wisent-pad-string (format "%d" i) 4))
754 (wisent-log "%s:" (wisent-tag (aref rlhs i)))
755 (setq r (aref rrhs i))
756 (while (natnump (aref ritem r))
757 (wisent-log " %s" (wisent-tag (aref ritem r)))
758 (setq r (1+ r)))
759 (wisent-log ";\n"))
760 (setq i (1+ i))))
761 (if (or b (> nuseless-nonterminals 0) (> nuseless-productions 0))
762 (wisent-log "\n\n"))
763 ))
764 \f
765 ;;;; -----------------------------
766 ;;;; Match rules with nonterminals
767 ;;;; -----------------------------
768
769 (defun wisent-set-derives ()
770 "Find, for each variable (nonterminal), which rules can derive it.
771 It sets up the value of DERIVES so that DERIVES[i - NTOKENS] points to
772 a list of rule numbers, terminated with -1."
773 (let (i lhs p q dset delts)
774 (setq dset (make-vector nvars nil)
775 delts (make-vector (1+ nrules) 0))
776 (setq p 0 ;; p = delts
777 i nrules)
778 (while (> i 0)
779 (when (aref ruseful i)
780 (setq lhs (aref rlhs i))
781 ;; p->next = dset[lhs];
782 ;; p->value = i;
783 (aset delts p (cons i (aref dset (- lhs ntokens)))) ;; (value . next)
784 (aset dset (- lhs ntokens) p) ;; dset[lhs] = p
785 (setq p (1+ p)) ;; p++
786 )
787 (setq i (1- i)))
788
789 (setq derives (make-vector nvars nil)
790 i ntokens)
791
792 (while (< i nsyms)
793 (setq q nil
794 p (aref dset (- i ntokens))) ;; p = dset[i]
795
796 (while p
797 (setq p (aref delts p)
798 q (cons (car p) q) ;;q++ = p->value
799 p (cdr p))) ;; p = p->next
800 (setq q (nreverse (cons -1 q))) ;; *q++ = -1
801 (aset derives (- i ntokens) q) ;; derives[i] = q
802 (setq i (1+ i)))
803 ))
804 \f
805 ;;;; --------------------------------------------------------
806 ;;;; Find which nonterminals can expand into the null string.
807 ;;;; --------------------------------------------------------
808
809 (defun wisent-print-nullable ()
810 "Print NULLABLE."
811 (let (i)
812 (wisent-log "NULLABLE\n")
813 (setq i ntokens)
814 (while (< i nsyms)
815 (wisent-log "\t%s: %s\n" (wisent-tag i)
816 (if (aref nullable (- i ntokens))
817 "yes" : "no"))
818 (setq i (1+ i)))
819 (wisent-log "\n\n")))
820
821 (defun wisent-set-nullable ()
822 "Set up NULLABLE.
823 A vector saying which nonterminals can expand into the null string.
824 NULLABLE[i - NTOKENS] is nil if symbol I can do so."
825 (let (ruleno s1 s2 p r squeue rcount rsets relts item any-tokens)
826 (setq squeue (make-vector nvars 0)
827 rcount (make-vector (1+ nrules) 0)
828 rsets (make-vector nvars nil) ;; - ntokens
829 relts (make-vector (+ nitems nvars 1) nil)
830 nullable (make-vector nvars nil)) ;; - ntokens
831 (setq s1 0 s2 0 ;; s1 = s2 = squeue
832 p 0 ;; p = relts
833 ruleno 1)
834 (while (<= ruleno nrules)
835 (when (aref ruseful ruleno)
836 (if (> (aref ritem (aref rrhs ruleno)) 0)
837 (progn
838 ;; This rule has a non empty RHS.
839 (setq any-tokens nil
840 r (aref rrhs ruleno))
841 (while (> (aref ritem r) 0)
842 (if (wisent-ISTOKEN (aref ritem r))
843 (setq any-tokens t))
844 (setq r (1+ r)))
845
846 ;; This rule has only nonterminals: schedule it for the
847 ;; second pass.
848 (unless any-tokens
849 (setq r (aref rrhs ruleno))
850 (while (> (setq item (aref ritem r)) 0)
851 (aset rcount ruleno (1+ (aref rcount ruleno)))
852 ;; p->next = rsets[item];
853 ;; p->value = ruleno;
854 (aset relts p (cons ruleno (aref rsets (- item ntokens))))
855 ;; rsets[item] = p;
856 (aset rsets (- item ntokens) p)
857 (setq p (1+ p)
858 r (1+ r)))))
859 ;; This rule has an empty RHS.
860 ;; assert (ritem[rrhs[ruleno]] == -ruleno)
861 (when (and (aref ruseful ruleno)
862 (setq item (aref rlhs ruleno))
863 (not (aref nullable (- item ntokens))))
864 (aset nullable (- item ntokens) t)
865 (aset squeue s2 item)
866 (setq s2 (1+ s2)))
867 )
868 )
869 (setq ruleno (1+ ruleno)))
870
871 (while (< s1 s2)
872 ;; p = rsets[*s1++]
873 (setq p (aref rsets (- (aref squeue s1) ntokens))
874 s1 (1+ s1))
875 (while p
876 (setq p (aref relts p)
877 ruleno (car p)
878 p (cdr p)) ;; p = p->next
879 ;; if (--rcount[ruleno] == 0)
880 (when (zerop (aset rcount ruleno (1- (aref rcount ruleno))))
881 (setq item (aref rlhs ruleno))
882 (aset nullable (- item ntokens) t)
883 (aset squeue s2 item)
884 (setq s2 (1+ s2)))))
885
886 (if wisent-debug-flag
887 (wisent-print-nullable))
888 ))
889 \f
890 ;;;; -----------
891 ;;;; Subroutines
892 ;;;; -----------
893
894 (defun wisent-print-fderives ()
895 "Print FDERIVES."
896 (let (i j rp)
897 (wisent-log "\n\n\nFDERIVES\n")
898 (setq i ntokens)
899 (while (< i nsyms)
900 (wisent-log "\n\n%s derives\n\n" (wisent-tag i))
901 (setq rp (aref fderives (- i ntokens))
902 j 0)
903 (while (<= j nrules)
904 (if (wisent-BITISSET rp j)
905 (wisent-log " %d\n" j))
906 (setq j (1+ j)))
907 (setq i (1+ i)))))
908
909 (defun wisent-set-fderives ()
910 "Set up FDERIVES.
911 An NVARS by NRULES matrix of bits indicating which rules can help
912 derive the beginning of the data for each nonterminal. For example,
913 if symbol 5 can be derived as the sequence of symbols 8 3 20, and one
914 of the rules for deriving symbol 8 is rule 4, then the
915 \[5 - NTOKENS, 4] bit in FDERIVES is set."
916 (let (i j k)
917 (setq fderives (make-vector nvars nil))
918 (setq i 0)
919 (while (< i nvars)
920 (aset fderives i (make-vector rulesetsize 0))
921 (setq i (1+ i)))
922
923 (wisent-set-firsts)
924
925 (setq i ntokens)
926 (while (< i nsyms)
927 (setq j ntokens)
928 (while (< j nsyms)
929 ;; if (BITISSET (FIRSTS (i), j - ntokens))
930 (when (wisent-BITISSET (aref firsts (- i ntokens)) (- j ntokens))
931 (setq k (aref derives (- j ntokens)))
932 (while (> (car k) 0) ;; derives[j][k] > 0
933 ;; SETBIT (FDERIVES (i), derives[j][k]);
934 (wisent-SETBIT (aref fderives (- i ntokens)) (car k))
935 (setq k (cdr k))))
936 (setq j (1+ j)))
937 (setq i (1+ i)))
938
939 (if wisent-debug-flag
940 (wisent-print-fderives))
941 ))
942
943 (defun wisent-print-firsts ()
944 "Print FIRSTS."
945 (let (i j v)
946 (wisent-log "\n\n\nFIRSTS\n\n")
947 (setq i ntokens)
948 (while (< i nsyms)
949 (wisent-log "\n\n%s firsts\n\n" (wisent-tag i))
950 (setq v (aref firsts (- i ntokens))
951 j 0)
952 (while (< j nvars)
953 (if (wisent-BITISSET v j)
954 (wisent-log "\t\t%d (%s)\n"
955 (+ j ntokens) (wisent-tag (+ j ntokens))))
956 (setq j (1+ j)))
957 (setq i (1+ i)))))
958
959 (defun wisent-TC (R n)
960 "Transitive closure.
961 Given R an N by N matrix of bits, modify its contents to be the
962 transitive closure of what was given."
963 (let (i j k)
964 ;; R (J, I) && R (I, K) => R (J, K).
965 ;; I *must* be the outer loop.
966 (setq i 0)
967 (while (< i n)
968 (setq j 0)
969 (while (< j n)
970 (when (wisent-BITISSET (aref R j) i)
971 (setq k 0)
972 (while (< k n)
973 (if (wisent-BITISSET (aref R i) k)
974 (wisent-SETBIT (aref R j) k))
975 (setq k (1+ k))))
976 (setq j (1+ j)))
977 (setq i (1+ i)))))
978
979 (defun wisent-RTC (R n)
980 "Reflexive Transitive Closure.
981 Same as `wisent-TC' and then set all the bits on the diagonal of R, an
982 N by N matrix of bits."
983 (let (i)
984 (wisent-TC R n)
985 (setq i 0)
986 (while (< i n)
987 (wisent-SETBIT (aref R i) i)
988 (setq i (1+ i)))))
989
990 (defun wisent-set-firsts ()
991 "Set up FIRSTS.
992 An NVARS by NVARS bit matrix indicating which items can represent the
993 beginning of the input corresponding to which other items. For
994 example, if some rule expands symbol 5 into the sequence of symbols 8
995 3 20, the symbol 8 can be the beginning of the data for symbol 5, so
996 the bit [8 - NTOKENS, 5 - NTOKENS] in FIRSTS is set."
997 (let (row symbol sp rowsize i)
998 (setq rowsize (wisent-WORDSIZE nvars)
999 varsetsize rowsize
1000 firsts (make-vector nvars nil)
1001 i 0)
1002 (while (< i nvars)
1003 (aset firsts i (make-vector rowsize 0))
1004 (setq i (1+ i)))
1005
1006 (setq row 0 ;; row = firsts
1007 i ntokens)
1008 (while (< i nsyms)
1009 (setq sp (aref derives (- i ntokens)))
1010 (while (>= (car sp) 0)
1011 (setq symbol (aref ritem (aref rrhs (car sp)))
1012 sp (cdr sp))
1013 (when (wisent-ISVAR symbol)
1014 (setq symbol (- symbol ntokens))
1015 (wisent-SETBIT (aref firsts row) symbol)
1016 ))
1017 (setq row (1+ row)
1018 i (1+ i)))
1019
1020 (wisent-RTC firsts nvars)
1021
1022 (if wisent-debug-flag
1023 (wisent-print-firsts))
1024 ))
1025
1026 (defun wisent-initialize-closure (n)
1027 "Allocate the ITEMSET and RULESET vectors.
1028 And precompute useful data so that `wisent-closure' can be called.
1029 N is the number of elements to allocate for ITEMSET."
1030 (setq itemset (make-vector n 0)
1031 rulesetsize (wisent-WORDSIZE (1+ nrules))
1032 ruleset (make-vector rulesetsize 0))
1033
1034 (wisent-set-fderives))
1035
1036 (defun wisent-print-closure ()
1037 "Print ITEMSET."
1038 (let (i)
1039 (wisent-log "\n\nclosure n = %d\n\n" nitemset)
1040 (setq i 0) ;; isp = itemset
1041 (while (< i nitemset)
1042 (wisent-log " %d\n" (aref itemset i))
1043 (setq i (1+ i)))))
1044
1045 (defun wisent-closure (core n)
1046 "Set up RULESET and ITEMSET for the transitions out of CORE state.
1047 Given a vector of item numbers items, of length N, set up RULESET and
1048 ITEMSET to indicate what rules could be run and which items could be
1049 accepted when those items are the active ones.
1050
1051 RULESET contains a bit for each rule. `wisent-closure' sets the bits
1052 for all rules which could potentially describe the next input to be
1053 read.
1054
1055 ITEMSET is a vector of item numbers; NITEMSET is the number of items
1056 in ITEMSET. `wisent-closure' places there the indices of all items
1057 which represent units of input that could arrive next."
1058 (let (c r v symbol ruleno itemno)
1059 (if (zerop n)
1060 (progn
1061 (setq r 0
1062 v (aref fderives (- start-symbol ntokens)))
1063 (while (< r rulesetsize)
1064 ;; ruleset[r] = FDERIVES (start-symbol)[r];
1065 (aset ruleset r (aref v r))
1066 (setq r (1+ r)))
1067 )
1068 (fillarray ruleset 0)
1069 (setq c 0)
1070 (while (< c n)
1071 (setq symbol (aref ritem (aref core c)))
1072 (when (wisent-ISVAR symbol)
1073 (setq r 0
1074 v (aref fderives (- symbol ntokens)))
1075 (while (< r rulesetsize)
1076 ;; ruleset[r] |= FDERIVES (ritem[core[c]])[r];
1077 (aset ruleset r (logior (aref ruleset r) (aref v r)))
1078 (setq r (1+ r))))
1079 (setq c (1+ c)))
1080 )
1081 (setq nitemset 0
1082 c 0
1083 ruleno 0
1084 r (* rulesetsize wisent-BITS-PER-WORD))
1085 (while (< ruleno r)
1086 (when (wisent-BITISSET ruleset ruleno)
1087 (setq itemno (aref rrhs ruleno))
1088 (while (and (< c n) (< (aref core c) itemno))
1089 (aset itemset nitemset (aref core c))
1090 (setq nitemset (1+ nitemset)
1091 c (1+ c)))
1092 (aset itemset nitemset itemno)
1093 (setq nitemset (1+ nitemset)))
1094 (setq ruleno (1+ ruleno)))
1095
1096 (while (< c n)
1097 (aset itemset nitemset (aref core c))
1098 (setq nitemset (1+ nitemset)
1099 c (1+ c)))
1100
1101 (if wisent-debug-flag
1102 (wisent-print-closure))
1103 ))
1104 \f
1105 ;;;; --------------------------------------------------
1106 ;;;; Generate the nondeterministic finite state machine
1107 ;;;; --------------------------------------------------
1108
1109 (defun wisent-allocate-itemsets ()
1110 "Allocate storage for itemsets."
1111 (let (symbol i count symbol-count)
1112 ;; Count the number of occurrences of all the symbols in RITEMS.
1113 ;; Note that useless productions (hence useless nonterminals) are
1114 ;; browsed too, hence we need to allocate room for _all_ the
1115 ;; symbols.
1116 (setq count 0
1117 symbol-count (make-vector (+ nsyms nuseless-nonterminals) 0)
1118 i 0)
1119 (while (setq symbol (aref ritem i))
1120 (when (> symbol 0)
1121 (setq count (1+ count))
1122 (aset symbol-count symbol (1+ (aref symbol-count symbol))))
1123 (setq i (1+ i)))
1124 ;; See comments before `wisent-new-itemsets'. All the vectors of
1125 ;; items live inside kernel-items. The number of active items
1126 ;; after some symbol cannot be more than the number of times that
1127 ;; symbol appears as an item, which is symbol-count[symbol]. We
1128 ;; allocate that much space for each symbol.
1129 (setq kernel-base (make-vector nsyms nil)
1130 kernel-items (make-vector count 0)
1131 count 0
1132 i 0)
1133 (while (< i nsyms)
1134 (aset kernel-base i count)
1135 (setq count (+ count (aref symbol-count i))
1136 i (1+ i)))
1137 (setq shift-symbol symbol-count
1138 kernel-end (make-vector nsyms nil))
1139 ))
1140
1141 (defun wisent-allocate-storage ()
1142 "Allocate storage for the state machine."
1143 (wisent-allocate-itemsets)
1144 (setq shiftset (make-vector nsyms 0)
1145 redset (make-vector (1+ nrules) 0)
1146 state-table (make-vector wisent-state-table-size nil)))
1147
1148 (defun wisent-new-itemsets ()
1149 "Find which symbols can be shifted in the current state.
1150 And for each one record which items would be active after that shift.
1151 Uses the contents of ITEMSET. SHIFT-SYMBOL is set to a vector of the
1152 symbols that can be shifted. For each symbol in the grammar,
1153 KERNEL-BASE[symbol] points to a vector of item numbers activated if
1154 that symbol is shifted, and KERNEL-END[symbol] points after the end of
1155 that vector."
1156 (let (i shiftcount isp ksp symbol)
1157 (fillarray kernel-end nil)
1158 (setq shiftcount 0
1159 isp 0)
1160 (while (< isp nitemset)
1161 (setq i (aref itemset isp)
1162 isp (1+ isp)
1163 symbol (aref ritem i))
1164 (when (> symbol 0)
1165 (setq ksp (aref kernel-end symbol))
1166 (when (not ksp)
1167 ;; shift-symbol[shiftcount++] = symbol;
1168 (aset shift-symbol shiftcount symbol)
1169 (setq shiftcount (1+ shiftcount)
1170 ksp (aref kernel-base symbol)))
1171 ;; *ksp++ = i + 1;
1172 (aset kernel-items ksp (1+ i))
1173 (setq ksp (1+ ksp))
1174 (aset kernel-end symbol ksp)))
1175 (setq nshifts shiftcount)))
1176
1177 (defun wisent-new-state (symbol)
1178 "Create a new state for those items, if necessary.
1179 SYMBOL is the core accessing-symbol.
1180 Subroutine of `wisent-get-state'."
1181 (let (n p isp1 isp2 iend items)
1182 (setq isp1 (aref kernel-base symbol)
1183 iend (aref kernel-end symbol)
1184 n (- iend isp1)
1185 p (make-core)
1186 items (make-vector n 0))
1187 (set-core-accessing-symbol p symbol)
1188 (set-core-number p nstates)
1189 (set-core-nitems p n)
1190 (set-core-items p items)
1191 (setq isp2 0) ;; isp2 = p->items
1192 (while (< isp1 iend)
1193 ;; *isp2++ = *isp1++;
1194 (aset items isp2 (aref kernel-items isp1))
1195 (setq isp1 (1+ isp1)
1196 isp2 (1+ isp2)))
1197 (set-core-next last-state p)
1198 (setq last-state p
1199 nstates (1+ nstates))
1200 p))
1201
1202 (defun wisent-get-state (symbol)
1203 "Find the state we would get to by shifting SYMBOL.
1204 Return the state number for the state we would get to (from the
1205 current state) by shifting SYMBOL. Create a new state if no
1206 equivalent one exists already. Used by `wisent-append-states'."
1207 (let (key isp1 isp2 iend sp sp2 found n)
1208 (setq isp1 (aref kernel-base symbol)
1209 iend (aref kernel-end symbol)
1210 n (- iend isp1)
1211 key 0)
1212 ;; Add up the target state's active item numbers to get a hash key
1213 (while (< isp1 iend)
1214 (setq key (+ key (aref kernel-items isp1))
1215 isp1 (1+ isp1)))
1216 (setq key (% key wisent-state-table-size)
1217 sp (aref state-table key))
1218 (if sp
1219 (progn
1220 (setq found nil)
1221 (while (not found)
1222 (when (= (core-nitems sp) n)
1223 (setq found t
1224 isp1 (aref kernel-base symbol)
1225 ;; isp2 = sp->items;
1226 sp2 (core-items sp)
1227 isp2 0)
1228
1229 (while (and found (< isp1 iend))
1230 ;; if (*isp1++ != *isp2++)
1231 (if (not (= (aref kernel-items isp1)
1232 (aref sp2 isp2)))
1233 (setq found nil))
1234 (setq isp1 (1+ isp1)
1235 isp2 (1+ isp2))))
1236 (if (not found)
1237 (if (core-link sp)
1238 (setq sp (core-link sp))
1239 ;; sp = sp->link = new-state(symbol)
1240 (setq sp (set-core-link sp (wisent-new-state symbol))
1241 found t)))))
1242 ;; bucket is empty
1243 ;; state-table[key] = sp = new-state(symbol)
1244 (setq sp (wisent-new-state symbol))
1245 (aset state-table key sp))
1246 ;; return (sp->number);
1247 (core-number sp)))
1248
1249 (defun wisent-append-states ()
1250 "Find or create the core structures for states.
1251 Use the information computed by `wisent-new-itemsets' to find the
1252 state numbers reached by each shift transition from the current state.
1253 SHIFTSET is set up as a vector of state numbers of those states."
1254 (let (i j symbol)
1255 ;; First sort shift-symbol into increasing order
1256 (setq i 1)
1257 (while (< i nshifts)
1258 (setq symbol (aref shift-symbol i)
1259 j i)
1260 (while (and (> j 0) (> (aref shift-symbol (1- j)) symbol))
1261 (aset shift-symbol j (aref shift-symbol (1- j)))
1262 (setq j (1- j)))
1263 (aset shift-symbol j symbol)
1264 (setq i (1+ i)))
1265 (setq i 0)
1266 (while (< i nshifts)
1267 (setq symbol (aref shift-symbol i))
1268 (aset shiftset i (wisent-get-state symbol))
1269 (setq i (1+ i)))
1270 ))
1271
1272 (defun wisent-initialize-states ()
1273 "Initialize states."
1274 (let ((p (make-core)))
1275 (setq first-state p
1276 last-state p
1277 this-state p
1278 nstates 1)))
1279
1280 (defun wisent-save-shifts ()
1281 "Save the NSHIFTS of SHIFTSET into the current linked list."
1282 (let (p i shifts)
1283 (setq p (make-shifts)
1284 shifts (make-vector nshifts 0)
1285 i 0)
1286 (set-shifts-number p (core-number this-state))
1287 (set-shifts-nshifts p nshifts)
1288 (set-shifts-shifts p shifts)
1289 (while (< i nshifts)
1290 ;; (p->shifts)[i] = shiftset[i];
1291 (aset shifts i (aref shiftset i))
1292 (setq i (1+ i)))
1293
1294 (if last-shift
1295 (set-shifts-next last-shift p)
1296 (setq first-shift p))
1297 (setq last-shift p)))
1298
1299 (defun wisent-insert-start-shift ()
1300 "Create the next-to-final state.
1301 That is the state to which a shift has already been made in the
1302 initial state. Subroutine of `wisent-augment-automaton'."
1303 (let (statep sp)
1304 (setq statep (make-core))
1305 (set-core-number statep nstates)
1306 (set-core-accessing-symbol statep start-symbol)
1307 (set-core-next last-state statep)
1308 (setq last-state statep)
1309 ;; Make a shift from this state to (what will be) the final state.
1310 (setq sp (make-shifts))
1311 (set-shifts-number sp nstates)
1312 (setq nstates (1+ nstates))
1313 (set-shifts-nshifts sp 1)
1314 (set-shifts-shifts sp (vector nstates))
1315 (set-shifts-next last-shift sp)
1316 (setq last-shift sp)))
1317
1318 (defun wisent-augment-automaton ()
1319 "Set up initial and final states as parser wants them.
1320 Make sure that the initial state has a shift that accepts the
1321 grammar's start symbol and goes to the next-to-final state, which has
1322 a shift going to the final state, which has a shift to the termination
1323 state. Create such states and shifts if they don't happen to exist
1324 already."
1325 (let (i k statep sp sp2 sp1 shifts)
1326 (setq sp first-shift)
1327 (if sp
1328 (progn
1329 (if (zerop (shifts-number sp))
1330 (progn
1331 (setq k (shifts-nshifts sp)
1332 statep (core-next first-state))
1333 ;; The states reached by shifts from first-state are
1334 ;; numbered 1...K. Look for one reached by
1335 ;; START-SYMBOL.
1336 (while (and (< (core-accessing-symbol statep) start-symbol)
1337 (< (core-number statep) k))
1338 (setq statep (core-next statep)))
1339 (if (= (core-accessing-symbol statep) start-symbol)
1340 (progn
1341 ;; We already have a next-to-final state. Make
1342 ;; sure it has a shift to what will be the final
1343 ;; state.
1344 (setq k (core-number statep))
1345 (while (and sp (< (shifts-number sp) k))
1346 (setq sp1 sp
1347 sp (shifts-next sp)))
1348 (if (and sp (= (shifts-number sp) k))
1349 (progn
1350 (setq i (shifts-nshifts sp)
1351 sp2 (make-shifts)
1352 shifts (make-vector (1+ i) 0))
1353 (set-shifts-number sp2 k)
1354 (set-shifts-nshifts sp2 (1+ i))
1355 (set-shifts-shifts sp2 shifts)
1356 (aset shifts 0 nstates)
1357 (while (> i 0)
1358 ;; sp2->shifts[i] = sp->shifts[i - 1];
1359 (aset shifts i (aref (shifts-shifts sp) (1- i)))
1360 (setq i (1- i)))
1361 ;; Patch sp2 into the chain of shifts in
1362 ;; place of sp, following sp1.
1363 (set-shifts-next sp2 (shifts-next sp))
1364 (set-shifts-next sp1 sp2)
1365 (if (eq sp last-shift)
1366 (setq last-shift sp2))
1367 )
1368 (setq sp2 (make-shifts))
1369 (set-shifts-number sp2 k)
1370 (set-shifts-nshifts sp2 1)
1371 (set-shifts-shifts sp2 (vector nstates))
1372 ;; Patch sp2 into the chain of shifts between
1373 ;; sp1 and sp.
1374 (set-shifts-next sp2 sp)
1375 (set-shifts-next sp1 sp2)
1376 (if (not sp)
1377 (setq last-shift sp2))
1378 )
1379 )
1380 ;; There is no next-to-final state as yet.
1381 ;; Add one more shift in FIRST-SHIFT, going to the
1382 ;; next-to-final state (yet to be made).
1383 (setq sp first-shift
1384 sp2 (make-shifts)
1385 i (shifts-nshifts sp)
1386 shifts (make-vector (1+ i) 0))
1387 (set-shifts-nshifts sp2 (1+ i))
1388 (set-shifts-shifts sp2 shifts)
1389 ;; Stick this shift into the vector at the proper place.
1390 (setq statep (core-next first-state)
1391 k 0
1392 i 0)
1393 (while (< i (shifts-nshifts sp))
1394 (when (and (> (core-accessing-symbol statep) start-symbol)
1395 (= i k))
1396 (aset shifts k nstates)
1397 (setq k (1+ k)))
1398 (aset shifts k (aref (shifts-shifts sp) i))
1399 (setq statep (core-next statep))
1400 (setq i (1+ i)
1401 k (1+ k)))
1402 (when (= i k)
1403 (aset shifts k nstates)
1404 (setq k (1+ k)))
1405 ;; Patch sp2 into the chain of shifts in place of
1406 ;; sp, at the beginning.
1407 (set-shifts-next sp2 (shifts-next sp))
1408 (setq first-shift sp2)
1409 (if (eq last-shift sp)
1410 (setq last-shift sp2))
1411 ;; Create the next-to-final state, with shift to
1412 ;; what will be the final state.
1413 (wisent-insert-start-shift)))
1414 ;; The initial state didn't even have any shifts. Give it
1415 ;; one shift, to the next-to-final state.
1416 (setq sp (make-shifts))
1417 (set-shifts-nshifts sp 1)
1418 (set-shifts-shifts sp (vector nstates))
1419 ;; Patch sp into the chain of shifts at the beginning.
1420 (set-shifts-next sp first-shift)
1421 (setq first-shift sp)
1422 ;; Create the next-to-final state, with shift to what will
1423 ;; be the final state.
1424 (wisent-insert-start-shift)))
1425 ;; There are no shifts for any state. Make one shift, from the
1426 ;; initial state to the next-to-final state.
1427 (setq sp (make-shifts))
1428 (set-shifts-nshifts sp 1)
1429 (set-shifts-shifts sp (vector nstates))
1430 ;; Initialize the chain of shifts with sp.
1431 (setq first-shift sp
1432 last-shift sp)
1433 ;; Create the next-to-final state, with shift to what will be
1434 ;; the final state.
1435 (wisent-insert-start-shift))
1436 ;; Make the final state--the one that follows a shift from the
1437 ;; next-to-final state. The symbol for that shift is 0
1438 ;; (end-of-file).
1439 (setq statep (make-core))
1440 (set-core-number statep nstates)
1441 (set-core-next last-state statep)
1442 (setq last-state statep)
1443 ;; Make the shift from the final state to the termination state.
1444 (setq sp (make-shifts))
1445 (set-shifts-number sp nstates)
1446 (setq nstates (1+ nstates))
1447 (set-shifts-nshifts sp 1)
1448 (set-shifts-shifts sp (vector nstates))
1449 (set-shifts-next last-shift sp)
1450 (setq last-shift sp)
1451 ;; Note that the variable FINAL-STATE refers to what we sometimes
1452 ;; call the termination state.
1453 (setq final-state nstates)
1454 ;; Make the termination state.
1455 (setq statep (make-core))
1456 (set-core-number statep nstates)
1457 (setq nstates (1+ nstates))
1458 (set-core-next last-state statep)
1459 (setq last-state statep)))
1460
1461 (defun wisent-save-reductions ()
1462 "Make a reductions structure.
1463 Find which rules can be used for reduction transitions from the
1464 current state and make a reductions structure for the state to record
1465 their rule numbers."
1466 (let (i item count p rules)
1467 ;; Find and count the active items that represent ends of rules.
1468 (setq count 0
1469 i 0)
1470 (while (< i nitemset)
1471 (setq item (aref ritem (aref itemset i)))
1472 (when (< item 0)
1473 (aset redset count (- item))
1474 (setq count (1+ count)))
1475 (setq i (1+ i)))
1476 ;; Make a reductions structure and copy the data into it.
1477 (when (> count 0)
1478 (setq p (make-reductions)
1479 rules (make-vector count 0))
1480 (set-reductions-number p (core-number this-state))
1481 (set-reductions-nreds p count)
1482 (set-reductions-rules p rules)
1483 (setq i 0)
1484 (while (< i count)
1485 ;; (p->rules)[i] = redset[i]
1486 (aset rules i (aref redset i))
1487 (setq i (1+ i)))
1488 (if last-reduction
1489 (set-reductions-next last-reduction p)
1490 (setq first-reduction p))
1491 (setq last-reduction p))))
1492
1493 (defun wisent-generate-states ()
1494 "Compute the nondeterministic finite state machine from the grammar."
1495 (wisent-allocate-storage)
1496 (wisent-initialize-closure nitems)
1497 (wisent-initialize-states)
1498 (while this-state
1499 ;; Set up RULESET and ITEMSET for the transitions out of this
1500 ;; state. RULESET gets a 1 bit for each rule that could reduce
1501 ;; now. ITEMSET gets a vector of all the items that could be
1502 ;; accepted next.
1503 (wisent-closure (core-items this-state) (core-nitems this-state))
1504 ;; Record the reductions allowed out of this state.
1505 (wisent-save-reductions)
1506 ;; Find the itemsets of the states that shifts can reach.
1507 (wisent-new-itemsets)
1508 ;; Find or create the core structures for those states.
1509 (wisent-append-states)
1510 ;; Create the shifts structures for the shifts to those states,
1511 ;; now that the state numbers transitioning to are known.
1512 (if (> nshifts 0)
1513 (wisent-save-shifts))
1514 ;; States are queued when they are created; process them all.
1515 (setq this-state (core-next this-state)))
1516 ;; Set up initial and final states as parser wants them.
1517 (wisent-augment-automaton))
1518 \f
1519 ;;;; ---------------------------
1520 ;;;; Compute look-ahead criteria
1521 ;;;; ---------------------------
1522
1523 ;; Compute how to make the finite state machine deterministic; find
1524 ;; which rules need lookahead in each state, and which lookahead
1525 ;; tokens they accept.
1526
1527 ;; `wisent-lalr', the entry point, builds these data structures:
1528
1529 ;; GOTO-MAP, FROM-STATE and TO-STATE record each shift transition
1530 ;; which accepts a variable (a nonterminal). NGOTOS is the number of
1531 ;; such transitions.
1532 ;; FROM-STATE[t] is the state number which a transition leads from and
1533 ;; TO-STATE[t] is the state number it leads to.
1534 ;; All the transitions that accept a particular variable are grouped
1535 ;; together and GOTO-MAP[i - NTOKENS] is the index in FROM-STATE and
1536 ;; TO-STATE of the first of them.
1537
1538 ;; CONSISTENT[s] is non-nil if no lookahead is needed to decide what
1539 ;; to do in state s.
1540
1541 ;; LARULENO is a vector which records the rules that need lookahead in
1542 ;; various states. The elements of LARULENO that apply to state s are
1543 ;; those from LOOKAHEADS[s] through LOOKAHEADS[s+1]-1. Each element
1544 ;; of LARULENO is a rule number.
1545
1546 ;; If LR is the length of LARULENO, then a number from 0 to LR-1 can
1547 ;; specify both a rule and a state where the rule might be applied.
1548 ;; LA is a LR by NTOKENS matrix of bits.
1549 ;; LA[l, i] is 1 if the rule LARULENO[l] is applicable in the
1550 ;; appropriate state when the next token is symbol i.
1551 ;; If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
1552
1553 (wisent-defcontext digraph
1554 INDEX R VERTICES
1555 infinity top)
1556
1557 (defun wisent-traverse (i)
1558 "Traverse I."
1559 (let (j k height Ri Fi break)
1560 (setq top (1+ top)
1561 height top)
1562 (aset VERTICES top i) ;; VERTICES[++top] = i
1563 (aset INDEX i top) ;; INDEX[i] = height = top
1564
1565 (setq Ri (aref R i))
1566 (when Ri
1567 (setq j 0)
1568 (while (>= (aref Ri j) 0)
1569 (if (zerop (aref INDEX (aref Ri j)))
1570 (wisent-traverse (aref Ri j)))
1571 ;; if (INDEX[i] > INDEX[R[i][j]])
1572 (if (> (aref INDEX i) (aref INDEX (aref Ri j)))
1573 ;; INDEX[i] = INDEX[R[i][j]];
1574 (aset INDEX i (aref INDEX (aref Ri j))))
1575 (setq Fi (aref F i)
1576 k 0)
1577 (while (< k tokensetsize)
1578 ;; F (i)[k] |= F (R[i][j])[k];
1579 (aset Fi k (logior (aref Fi k)
1580 (aref (aref F (aref Ri j)) k)))
1581 (setq k (1+ k)))
1582 (setq j (1+ j))))
1583
1584 (when (= (aref INDEX i) height)
1585 (setq break nil)
1586 (while (not break)
1587 (setq j (aref VERTICES top) ;; j = VERTICES[top--]
1588 top (1- top))
1589 (aset INDEX j infinity)
1590 (if (= i j)
1591 (setq break t)
1592 (setq k 0)
1593 (while (< k tokensetsize)
1594 ;; F (j)[k] = F (i)[k];
1595 (aset (aref F j) k (aref (aref F i) k))
1596 (setq k (1+ k))))))
1597 ))
1598
1599 (defun wisent-digraph (relation)
1600 "Digraph RELATION."
1601 (wisent-with-context digraph
1602 (setq infinity (+ ngotos 2)
1603 INDEX (make-vector (1+ ngotos) 0)
1604 VERTICES (make-vector (1+ ngotos) 0)
1605 top 0
1606 R relation)
1607 (let ((i 0))
1608 (while (< i ngotos)
1609 (if (and (= (aref INDEX i) 0) (aref R i))
1610 (wisent-traverse i))
1611 (setq i (1+ i))))))
1612
1613 (defun wisent-set-state-table ()
1614 "Build state table."
1615 (let (sp)
1616 (setq state-table (make-vector nstates nil)
1617 sp first-state)
1618 (while sp
1619 (aset state-table (core-number sp) sp)
1620 (setq sp (core-next sp)))))
1621
1622 (defun wisent-set-accessing-symbol ()
1623 "Build accessing symbol table."
1624 (let (sp)
1625 (setq accessing-symbol (make-vector nstates 0)
1626 sp first-state)
1627 (while sp
1628 (aset accessing-symbol (core-number sp) (core-accessing-symbol sp))
1629 (setq sp (core-next sp)))))
1630
1631 (defun wisent-set-shift-table ()
1632 "Build shift table."
1633 (let (sp)
1634 (setq shift-table (make-vector nstates nil)
1635 sp first-shift)
1636 (while sp
1637 (aset shift-table (shifts-number sp) sp)
1638 (setq sp (shifts-next sp)))))
1639
1640 (defun wisent-set-reduction-table ()
1641 "Build reduction table."
1642 (let (rp)
1643 (setq reduction-table (make-vector nstates nil)
1644 rp first-reduction)
1645 (while rp
1646 (aset reduction-table (reductions-number rp) rp)
1647 (setq rp (reductions-next rp)))))
1648
1649 (defun wisent-set-maxrhs ()
1650 "Setup MAXRHS length."
1651 (let (i len max)
1652 (setq len 0
1653 max 0
1654 i 0)
1655 (while (aref ritem i)
1656 (if (> (aref ritem i) 0)
1657 (setq len (1+ len))
1658 (if (> len max)
1659 (setq max len))
1660 (setq len 0))
1661 (setq i (1+ i)))
1662 (setq maxrhs max)))
1663
1664 (defun wisent-initialize-LA ()
1665 "Set up LA."
1666 (let (i j k count rp sp np v)
1667 (setq consistent (make-vector nstates nil)
1668 lookaheads (make-vector (1+ nstates) 0)
1669 count 0
1670 i 0)
1671 (while (< i nstates)
1672 (aset lookaheads i count)
1673 (setq rp (aref reduction-table i)
1674 sp (aref shift-table i))
1675 ;; if (rp &&
1676 ;; (rp->nreds > 1
1677 ;; || (sp && ! ISVAR(accessing-symbol[sp->shifts[0]]))))
1678 (if (and rp
1679 (or (> (reductions-nreds rp) 1)
1680 (and sp
1681 (not (wisent-ISVAR
1682 (aref accessing-symbol
1683 (aref (shifts-shifts sp) 0)))))))
1684 (setq count (+ count (reductions-nreds rp)))
1685 (aset consistent i t))
1686
1687 (when sp
1688 (setq k 0
1689 j (shifts-nshifts sp)
1690 v (shifts-shifts sp))
1691 (while (< k j)
1692 (when (= (aref accessing-symbol (aref v k))
1693 error-token-number)
1694 (aset consistent i nil)
1695 (setq k j)) ;; break
1696 (setq k (1+ k))))
1697 (setq i (1+ i)))
1698
1699 (aset lookaheads nstates count)
1700
1701 (if (zerop count)
1702 (progn
1703 (setq LA (make-vector 1 nil)
1704 LAruleno (make-vector 1 0)
1705 lookback (make-vector 1 nil)))
1706 (setq LA (make-vector count nil)
1707 LAruleno (make-vector count 0)
1708 lookback (make-vector count nil)))
1709 (setq i 0 j (length LA))
1710 (while (< i j)
1711 (aset LA i (make-vector tokensetsize 0))
1712 (setq i (1+ i)))
1713
1714 (setq np 0
1715 i 0)
1716 (while (< i nstates)
1717 (when (not (aref consistent i))
1718 (setq rp (aref reduction-table i))
1719 (when rp
1720 (setq j 0
1721 k (reductions-nreds rp)
1722 v (reductions-rules rp))
1723 (while (< j k)
1724 (aset LAruleno np (aref v j))
1725 (setq np (1+ np)
1726 j (1+ j)))))
1727 (setq i (1+ i)))))
1728
1729 (defun wisent-set-goto-map ()
1730 "Set up GOTO-MAP."
1731 (let (sp i j symbol k temp-map state1 state2 v)
1732 (setq goto-map (make-vector (1+ nvars) 0)
1733 temp-map (make-vector (1+ nvars) 0))
1734
1735 (setq ngotos 0
1736 sp first-shift)
1737 (while sp
1738 (setq i (1- (shifts-nshifts sp))
1739 v (shifts-shifts sp))
1740 (while (>= i 0)
1741 (setq symbol (aref accessing-symbol (aref v i)))
1742 (if (wisent-ISTOKEN symbol)
1743 (setq i 0) ;; break
1744 (setq ngotos (1+ ngotos))
1745 ;; goto-map[symbol]++;
1746 (aset goto-map (- symbol ntokens)
1747 (1+ (aref goto-map (- symbol ntokens)))))
1748 (setq i (1- i)))
1749 (setq sp (shifts-next sp)))
1750
1751 (setq k 0
1752 i ntokens
1753 j 0)
1754 (while (< i nsyms)
1755 (aset temp-map j k)
1756 (setq k (+ k (aref goto-map j))
1757 i (1+ i)
1758 j (1+ j)))
1759 (setq i ntokens
1760 j 0)
1761 (while (< i nsyms)
1762 (aset goto-map j (aref temp-map j))
1763 (setq i (1+ i)
1764 j (1+ j)))
1765 ;; goto-map[nsyms] = ngotos;
1766 ;; temp-map[nsyms] = ngotos;
1767 (aset goto-map j ngotos)
1768 (aset temp-map j ngotos)
1769
1770 (setq from-state (make-vector ngotos 0)
1771 to-state (make-vector ngotos 0)
1772 sp first-shift)
1773 (while sp
1774 (setq state1 (shifts-number sp)
1775 v (shifts-shifts sp)
1776 i (1- (shifts-nshifts sp)))
1777 (while (>= i 0)
1778 (setq state2 (aref v i)
1779 symbol (aref accessing-symbol state2))
1780 (if (wisent-ISTOKEN symbol)
1781 (setq i 0) ;; break
1782 ;; k = temp-map[symbol]++;
1783 (setq k (aref temp-map (- symbol ntokens)))
1784 (aset temp-map (- symbol ntokens) (1+ k))
1785 (aset from-state k state1)
1786 (aset to-state k state2))
1787 (setq i (1- i)))
1788 (setq sp (shifts-next sp)))
1789 ))
1790
1791 (defun wisent-map-goto (state symbol)
1792 "Map a STATE/SYMBOL pair into its numeric representation."
1793 (let (high low middle s result)
1794 ;; low = goto-map[symbol];
1795 ;; high = goto-map[symbol + 1] - 1;
1796 (setq low (aref goto-map (- symbol ntokens))
1797 high (1- (aref goto-map (- (1+ symbol) ntokens))))
1798 (while (and (not result) (<= low high))
1799 (setq middle (/ (+ low high) 2)
1800 s (aref from-state middle))
1801 (cond
1802 ((= s state)
1803 (setq result middle))
1804 ((< s state)
1805 (setq low (1+ middle)))
1806 (t
1807 (setq high (1- middle)))))
1808 (or result
1809 (error "Internal error in `wisent-map-goto'"))
1810 ))
1811
1812 (defun wisent-initialize-F ()
1813 "Set up F."
1814 (let (i j k sp edge rowp rp reads nedges stateno symbol v break)
1815 (setq F (make-vector ngotos nil)
1816 i 0)
1817 (while (< i ngotos)
1818 (aset F i (make-vector tokensetsize 0))
1819 (setq i (1+ i)))
1820
1821 (setq reads (make-vector ngotos nil)
1822 edge (make-vector (1+ ngotos) 0)
1823 nedges 0
1824 rowp 0 ;; rowp = F
1825 i 0)
1826 (while (< i ngotos)
1827 (setq stateno (aref to-state i)
1828 sp (aref shift-table stateno))
1829 (when sp
1830 (setq k (shifts-nshifts sp)
1831 v (shifts-shifts sp)
1832 j 0
1833 break nil)
1834 (while (and (not break) (< j k))
1835 ;; symbol = accessing-symbol[sp->shifts[j]];
1836 (setq symbol (aref accessing-symbol (aref v j)))
1837 (if (wisent-ISVAR symbol)
1838 (setq break t) ;; break
1839 (wisent-SETBIT (aref F rowp) symbol)
1840 (setq j (1+ j))))
1841
1842 (while (< j k)
1843 ;; symbol = accessing-symbol[sp->shifts[j]];
1844 (setq symbol (aref accessing-symbol (aref v j)))
1845 (when (aref nullable (- symbol ntokens))
1846 (aset edge nedges (wisent-map-goto stateno symbol))
1847 (setq nedges (1+ nedges)))
1848 (setq j (1+ j)))
1849
1850 (when (> nedges 0)
1851 ;; reads[i] = rp = NEW2(nedges + 1, short);
1852 (setq rp (make-vector (1+ nedges) 0)
1853 j 0)
1854 (aset reads i rp)
1855 (while (< j nedges)
1856 ;; rp[j] = edge[j];
1857 (aset rp j (aref edge j))
1858 (setq j (1+ j)))
1859 (aset rp nedges -1)
1860 (setq nedges 0)))
1861 (setq rowp (1+ rowp))
1862 (setq i (1+ i)))
1863 (wisent-digraph reads)
1864 ))
1865
1866 (defun wisent-add-lookback-edge (stateno ruleno gotono)
1867 "Add a lookback edge.
1868 STATENO, RULENO, GOTONO are self-explanatory."
1869 (let (i k found)
1870 (setq i (aref lookaheads stateno)
1871 k (aref lookaheads (1+ stateno))
1872 found nil)
1873 (while (and (not found) (< i k))
1874 (if (= (aref LAruleno i) ruleno)
1875 (setq found t)
1876 (setq i (1+ i))))
1877
1878 (or found
1879 (error "Internal error in `wisent-add-lookback-edge'"))
1880
1881 ;; value . next
1882 ;; lookback[i] = (gotono . lookback[i])
1883 (aset lookback i (cons gotono (aref lookback i)))))
1884
1885 (defun wisent-transpose (R-arg n)
1886 "Return the transpose of R-ARG, of size N.
1887 Destroy R-ARG, as it is replaced with the result. R-ARG[I] is nil or
1888 a -1 terminated list of numbers. RESULT[NUM] is nil or the -1
1889 terminated list of the I such as NUM is in R-ARG[I]."
1890 (let (i j new-R end-R nedges v sp)
1891 (setq new-R (make-vector n nil)
1892 end-R (make-vector n nil)
1893 nedges (make-vector n 0))
1894
1895 ;; Count.
1896 (setq i 0)
1897 (while (< i n)
1898 (setq v (aref R-arg i))
1899 (when v
1900 (setq j 0)
1901 (while (>= (aref v j) 0)
1902 (aset nedges (aref v j) (1+ (aref nedges (aref v j))))
1903 (setq j (1+ j))))
1904 (setq i (1+ i)))
1905
1906 ;; Allocate.
1907 (setq i 0)
1908 (while (< i n)
1909 (when (> (aref nedges i) 0)
1910 (setq sp (make-vector (1+ (aref nedges i)) 0))
1911 (aset sp (aref nedges i) -1)
1912 (aset new-R i sp)
1913 (aset end-R i 0))
1914 (setq i (1+ i)))
1915
1916 ;; Store.
1917 (setq i 0)
1918 (while (< i n)
1919 (setq v (aref R-arg i))
1920 (when v
1921 (setq j 0)
1922 (while (>= (aref v j) 0)
1923 (aset (aref new-R (aref v j)) (aref end-R (aref v j)) i)
1924 (aset end-R (aref v j) (1+ (aref end-R (aref v j))))
1925 (setq j (1+ j))))
1926 (setq i (1+ i)))
1927
1928 new-R))
1929
1930 (defun wisent-build-relations ()
1931 "Build relations."
1932 (let (i j k rulep rp sp length nedges done state1 stateno
1933 symbol1 symbol2 edge states v)
1934 (setq includes (make-vector ngotos nil)
1935 edge (make-vector (1+ ngotos) 0)
1936 states (make-vector (1+ maxrhs) 0)
1937 i 0)
1938
1939 (while (< i ngotos)
1940 (setq nedges 0
1941 state1 (aref from-state i)
1942 symbol1 (aref accessing-symbol (aref to-state i))
1943 rulep (aref derives (- symbol1 ntokens)))
1944
1945 (while (> (car rulep) 0)
1946 (aset states 0 state1)
1947 (setq length 1
1948 stateno state1
1949 rp (aref rrhs (car rulep))) ;; rp = ritem + rrhs[*rulep]
1950 (while (> (aref ritem rp) 0) ;; *rp > 0
1951 (setq symbol2 (aref ritem rp)
1952 sp (aref shift-table stateno)
1953 k (shifts-nshifts sp)
1954 v (shifts-shifts sp)
1955 j 0)
1956 (while (< j k)
1957 (setq stateno (aref v j))
1958 (if (= (aref accessing-symbol stateno) symbol2)
1959 (setq j k) ;; break
1960 (setq j (1+ j))))
1961 ;; states[length++] = stateno;
1962 (aset states length stateno)
1963 (setq length (1+ length))
1964 (setq rp (1+ rp)))
1965
1966 (if (not (aref consistent stateno))
1967 (wisent-add-lookback-edge stateno (car rulep) i))
1968
1969 (setq length (1- length)
1970 done nil)
1971 (while (not done)
1972 (setq done t
1973 rp (1- rp))
1974 (when (and (>= rp 0) (wisent-ISVAR (aref ritem rp)))
1975 ;; stateno = states[--length];
1976 (setq length (1- length)
1977 stateno (aref states length))
1978 (aset edge nedges (wisent-map-goto stateno (aref ritem rp)))
1979 (setq nedges (1+ nedges))
1980 (if (aref nullable (- (aref ritem rp) ntokens))
1981 (setq done nil))))
1982 (setq rulep (cdr rulep)))
1983
1984 (when (> nedges 0)
1985 (setq v (make-vector (1+ nedges) 0)
1986 j 0)
1987 (aset includes i v)
1988 (while (< j nedges)
1989 (aset v j (aref edge j))
1990 (setq j (1+ j)))
1991 (aset v nedges -1))
1992 (setq i (1+ i)))
1993
1994 (setq includes (wisent-transpose includes ngotos))
1995 ))
1996
1997 (defun wisent-compute-FOLLOWS ()
1998 "Compute follows."
1999 (wisent-digraph includes))
2000
2001 (defun wisent-compute-lookaheads ()
2002 "Compute lookaheads."
2003 (let (i j n v1 v2 sp)
2004 (setq n (aref lookaheads nstates)
2005 i 0)
2006 (while (< i n)
2007 (setq sp (aref lookback i))
2008 (while sp
2009 (setq v1 (aref LA i)
2010 v2 (aref F (car sp))
2011 j 0)
2012 (while (< j tokensetsize)
2013 ;; LA (i)[j] |= F (sp->value)[j]
2014 (aset v1 j (logior (aref v1 j) (aref v2 j)))
2015 (setq j (1+ j)))
2016 (setq sp (cdr sp)))
2017 (setq i (1+ i)))))
2018
2019 (defun wisent-lalr ()
2020 "Make the nondeterministic finite state machine deterministic."
2021 (setq tokensetsize (wisent-WORDSIZE ntokens))
2022 (wisent-set-state-table)
2023 (wisent-set-accessing-symbol)
2024 (wisent-set-shift-table)
2025 (wisent-set-reduction-table)
2026 (wisent-set-maxrhs)
2027 (wisent-initialize-LA)
2028 (wisent-set-goto-map)
2029 (wisent-initialize-F)
2030 (wisent-build-relations)
2031 (wisent-compute-FOLLOWS)
2032 (wisent-compute-lookaheads))
2033 \f
2034 ;;;; -----------------------------------------------
2035 ;;;; Find and resolve or report look-ahead conflicts
2036 ;;;; -----------------------------------------------
2037
2038 (defsubst wisent-log-resolution (state LAno token resolution)
2039 "Log a shift-reduce conflict resolution.
2040 In specified STATE between rule pointed by lookahead number LANO and
2041 TOKEN, resolved as RESOLUTION."
2042 (if (or wisent-verbose-flag wisent-debug-flag)
2043 (wisent-log
2044 "Conflict in state %d between rule %d and token %s resolved as %s.\n"
2045 state (aref LAruleno LAno) (wisent-tag token) resolution)))
2046
2047 (defun wisent-flush-shift (state token)
2048 "Turn off the shift recorded in the specified STATE for TOKEN.
2049 Used when we resolve a shift-reduce conflict in favor of the reduction."
2050 (let (shiftp i k v)
2051 (when (setq shiftp (aref shift-table state))
2052 (setq k (shifts-nshifts shiftp)
2053 v (shifts-shifts shiftp)
2054 i 0)
2055 (while (< i k)
2056 (if (and (not (zerop (aref v i)))
2057 (= token (aref accessing-symbol (aref v i))))
2058 (aset v i 0))
2059 (setq i (1+ i))))))
2060
2061 (defun wisent-resolve-sr-conflict (state lookaheadnum)
2062 "Attempt to resolve shift-reduce conflict for one rule.
2063 Resolve by means of precedence declarations. The conflict occurred in
2064 specified STATE for the rule pointed by the lookahead symbol
2065 LOOKAHEADNUM. It has already been checked that the rule has a
2066 precedence. A conflict is resolved by modifying the shift or reduce
2067 tables so that there is no longer a conflict."
2068 (let (i redprec errp errs nerrs token sprec sassoc)
2069 ;; Find the rule to reduce by to get precedence of reduction
2070 (setq token (aref tags (aref rprec (aref LAruleno lookaheadnum)))
2071 redprec (wisent-prec token)
2072 errp (make-errs)
2073 errs (make-vector ntokens 0)
2074 nerrs 0
2075 i 0)
2076 (set-errs-errs errp errs)
2077 (while (< i ntokens)
2078 (setq token (aref tags i))
2079 (when (and (wisent-BITISSET (aref LA lookaheadnum) i)
2080 (wisent-BITISSET lookaheadset i)
2081 (setq sprec (wisent-prec token)))
2082 ;; Shift-reduce conflict occurs for token number I and it has
2083 ;; a precedence. The precedence of shifting is that of token
2084 ;; I.
2085 (cond
2086 ((< sprec redprec)
2087 (wisent-log-resolution state lookaheadnum i "reduce")
2088 ;; Flush the shift for this token
2089 (wisent-RESETBIT lookaheadset i)
2090 (wisent-flush-shift state i)
2091 )
2092 ((> sprec redprec)
2093 (wisent-log-resolution state lookaheadnum i "shift")
2094 ;; Flush the reduce for this token
2095 (wisent-RESETBIT (aref LA lookaheadnum) i)
2096 )
2097 (t
2098 ;; Matching precedence levels.
2099 ;; For left association, keep only the reduction.
2100 ;; For right association, keep only the shift.
2101 ;; For nonassociation, keep neither.
2102 (setq sassoc (wisent-assoc token))
2103 (cond
2104 ((eq sassoc 'right)
2105 (wisent-log-resolution state lookaheadnum i "shift"))
2106 ((eq sassoc 'left)
2107 (wisent-log-resolution state lookaheadnum i "reduce"))
2108 ((eq sassoc 'nonassoc)
2109 (wisent-log-resolution state lookaheadnum i "an error"))
2110 )
2111 (when (not (eq sassoc 'right))
2112 ;; Flush the shift for this token
2113 (wisent-RESETBIT lookaheadset i)
2114 (wisent-flush-shift state i))
2115 (when (not (eq sassoc 'left))
2116 ;; Flush the reduce for this token
2117 (wisent-RESETBIT (aref LA lookaheadnum) i))
2118 (when (eq sassoc 'nonassoc)
2119 ;; Record an explicit error for this token
2120 (aset errs nerrs i)
2121 (setq nerrs (1+ nerrs)))
2122 )))
2123 (setq i (1+ i)))
2124 (when (> nerrs 0)
2125 (set-errs-nerrs errp nerrs)
2126 (aset err-table state errp))
2127 ))
2128
2129 (defun wisent-set-conflicts (state)
2130 "Find and attempt to resolve conflicts in specified STATE."
2131 (let (i j k v shiftp symbol)
2132 (unless (aref consistent state)
2133 (fillarray lookaheadset 0)
2134
2135 (when (setq shiftp (aref shift-table state))
2136 (setq k (shifts-nshifts shiftp)
2137 v (shifts-shifts shiftp)
2138 i 0)
2139 (while (and (< i k)
2140 (wisent-ISTOKEN
2141 (setq symbol (aref accessing-symbol (aref v i)))))
2142 (or (zerop (aref v i))
2143 (wisent-SETBIT lookaheadset symbol))
2144 (setq i (1+ i))))
2145
2146 ;; Loop over all rules which require lookahead in this state
2147 ;; first check for shift-reduce conflict, and try to resolve
2148 ;; using precedence
2149 (setq i (aref lookaheads state)
2150 k (aref lookaheads (1+ state)))
2151 (while (< i k)
2152 (when (aref rprec (aref LAruleno i))
2153 (setq v (aref LA i)
2154 j 0)
2155 (while (< j tokensetsize)
2156 (if (zerop (logand (aref v j) (aref lookaheadset j)))
2157 (setq j (1+ j))
2158 ;; if (LA (i)[j] & lookaheadset[j])
2159 (wisent-resolve-sr-conflict state i)
2160 (setq j tokensetsize)))) ;; break
2161 (setq i (1+ i)))
2162
2163 ;; Loop over all rules which require lookahead in this state
2164 ;; Check for conflicts not resolved above.
2165 (setq i (aref lookaheads state))
2166 (while (< i k)
2167 (setq v (aref LA i)
2168 j 0)
2169 (while (< j tokensetsize)
2170 ;; if (LA (i)[j] & lookaheadset[j])
2171 (if (not (zerop (logand (aref v j) (aref lookaheadset j))))
2172 (aset conflicts state t))
2173 (setq j (1+ j)))
2174 (setq j 0)
2175 (while (< j tokensetsize)
2176 ;; lookaheadset[j] |= LA (i)[j];
2177 (aset lookaheadset j (logior (aref lookaheadset j)
2178 (aref v j)))
2179 (setq j (1+ j)))
2180 (setq i (1+ i)))
2181 )))
2182
2183 (defun wisent-resolve-conflicts ()
2184 "Find and resolve conflicts."
2185 (let (i)
2186 (setq conflicts (make-vector nstates nil)
2187 shiftset (make-vector tokensetsize 0)
2188 lookaheadset (make-vector tokensetsize 0)
2189 err-table (make-vector nstates nil)
2190 i 0)
2191 (while (< i nstates)
2192 (wisent-set-conflicts i)
2193 (setq i (1+ i)))))
2194
2195 (defun wisent-count-sr-conflicts (state)
2196 "Count the number of shift/reduce conflicts in specified STATE."
2197 (let (i j k shiftp symbol v)
2198 (setq src-count 0
2199 shiftp (aref shift-table state))
2200 (when shiftp
2201 (fillarray shiftset 0)
2202 (fillarray lookaheadset 0)
2203 (setq k (shifts-nshifts shiftp)
2204 v (shifts-shifts shiftp)
2205 i 0)
2206 (while (< i k)
2207 (when (not (zerop (aref v i)))
2208 (setq symbol (aref accessing-symbol (aref v i)))
2209 (if (wisent-ISVAR symbol)
2210 (setq i k) ;; break
2211 (wisent-SETBIT shiftset symbol)))
2212 (setq i (1+ i)))
2213
2214 (setq k (aref lookaheads (1+ state))
2215 i (aref lookaheads state))
2216 (while (< i k)
2217 (setq v (aref LA i)
2218 j 0)
2219 (while (< j tokensetsize)
2220 ;; lookaheadset[j] |= LA (i)[j]
2221 (aset lookaheadset j (logior (aref lookaheadset j)
2222 (aref v j)))
2223 (setq j (1+ j)))
2224 (setq i (1+ i)))
2225
2226 (setq k 0)
2227 (while (< k tokensetsize)
2228 ;; lookaheadset[k] &= shiftset[k];
2229 (aset lookaheadset k (logand (aref lookaheadset k)
2230 (aref shiftset k)))
2231 (setq k (1+ k)))
2232
2233 (setq i 0)
2234 (while (< i ntokens)
2235 (if (wisent-BITISSET lookaheadset i)
2236 (setq src-count (1+ src-count)))
2237 (setq i (1+ i))))
2238 src-count))
2239
2240 (defun wisent-count-rr-conflicts (state)
2241 "Count the number of reduce/reduce conflicts in specified STATE."
2242 (let (i j count n m)
2243 (setq rrc-count 0
2244 m (aref lookaheads state)
2245 n (aref lookaheads (1+ state)))
2246 (when (>= (- n m) 2)
2247 (setq i 0)
2248 (while (< i ntokens)
2249 (setq count 0
2250 j m)
2251 (while (< j n)
2252 (if (wisent-BITISSET (aref LA j) i)
2253 (setq count (1+ count)))
2254 (setq j (1+ j)))
2255
2256 (if (>= count 2)
2257 (setq rrc-count (1+ rrc-count)))
2258 (setq i (1+ i))))
2259 rrc-count))
2260
2261 (defvar wisent-expected-conflicts nil
2262 "*If non-nil suppress the warning about shift/reduce conflicts.
2263 It is a decimal integer N that says there should be no warning if
2264 there are N shift/reduce conflicts and no reduce/reduce conflicts. A
2265 warning is given if there are either more or fewer conflicts, or if
2266 there are any reduce/reduce conflicts.")
2267
2268 (defun wisent-total-conflicts ()
2269 "Report the total number of conflicts."
2270 (unless (and (zerop rrc-total)
2271 (or (zerop src-total)
2272 (= src-total (or wisent-expected-conflicts 0))))
2273 (let* ((src (wisent-source))
2274 (src (if src (concat " in " src) ""))
2275 (msg (format "Grammar%s contains" src)))
2276 (if (> src-total 0)
2277 (setq msg (format "%s %d shift/reduce conflict%s"
2278 msg src-total (if (> src-total 1)
2279 "s" ""))))
2280 (if (and (> src-total 0) (> rrc-total 0))
2281 (setq msg (format "%s and" msg)))
2282 (if (> rrc-total 0)
2283 (setq msg (format "%s %d reduce/reduce conflict%s"
2284 msg rrc-total (if (> rrc-total 1)
2285 "s" ""))))
2286 (message msg))))
2287
2288 (defun wisent-print-conflicts ()
2289 "Report conflicts."
2290 (let (i)
2291 (setq src-total 0
2292 rrc-total 0
2293 i 0)
2294 (while (< i nstates)
2295 (when (aref conflicts i)
2296 (wisent-count-sr-conflicts i)
2297 (wisent-count-rr-conflicts i)
2298 (setq src-total (+ src-total src-count)
2299 rrc-total (+ rrc-total rrc-count))
2300 (when (or wisent-verbose-flag wisent-debug-flag)
2301 (wisent-log "State %d contains" i)
2302 (if (> src-count 0)
2303 (wisent-log " %d shift/reduce conflict%s"
2304 src-count (if (> src-count 1) "s" "")))
2305
2306 (if (and (> src-count 0) (> rrc-count 0))
2307 (wisent-log " and"))
2308
2309 (if (> rrc-count 0)
2310 (wisent-log " %d reduce/reduce conflict%s"
2311 rrc-count (if (> rrc-count 1) "s" "")))
2312
2313 (wisent-log ".\n")))
2314 (setq i (1+ i)))
2315 (wisent-total-conflicts)))
2316 \f
2317 ;;;; --------------------------------------
2318 ;;;; Report information on generated parser
2319 ;;;; --------------------------------------
2320 (defun wisent-print-grammar ()
2321 "Print grammar."
2322 (let (i j r break left-count right-count)
2323
2324 (wisent-log "\n\nGrammar\n\n Number, Rule\n")
2325 (setq i 1)
2326 (while (<= i nrules)
2327 ;; Don't print rules disabled in `wisent-reduce-grammar-tables'.
2328 (when (aref ruseful i)
2329 (wisent-log " %s %s ->"
2330 (wisent-pad-string (number-to-string i) 6)
2331 (wisent-tag (aref rlhs i)))
2332 (setq r (aref rrhs i))
2333 (if (> (aref ritem r) 0)
2334 (while (> (aref ritem r) 0)
2335 (wisent-log " %s" (wisent-tag (aref ritem r)))
2336 (setq r (1+ r)))
2337 (wisent-log " /* empty */"))
2338 (wisent-log "\n"))
2339 (setq i (1+ i)))
2340
2341 (wisent-log "\n\nTerminals, with rules where they appear\n\n")
2342 (wisent-log "%s (-1)\n" (wisent-tag 0))
2343 (setq i 1)
2344 (while (< i ntokens)
2345 (wisent-log "%s (%d)" (wisent-tag i) i)
2346 (setq j 1)
2347 (while (<= j nrules)
2348 (setq r (aref rrhs j)
2349 break nil)
2350 (while (and (not break) (> (aref ritem r) 0))
2351 (if (setq break (= (aref ritem r) i))
2352 (wisent-log " %d" j)
2353 (setq r (1+ r))))
2354 (setq j (1+ j)))
2355 (wisent-log "\n")
2356 (setq i (1+ i)))
2357
2358 (wisent-log "\n\nNonterminals, with rules where they appear\n\n")
2359 (setq i ntokens)
2360 (while (< i nsyms)
2361 (setq left-count 0
2362 right-count 0
2363 j 1)
2364 (while (<= j nrules)
2365 (if (= (aref rlhs j) i)
2366 (setq left-count (1+ left-count)))
2367 (setq r (aref rrhs j)
2368 break nil)
2369 (while (and (not break) (> (aref ritem r) 0))
2370 (if (= (aref ritem r) i)
2371 (setq right-count (1+ right-count)
2372 break t)
2373 (setq r (1+ r))))
2374 (setq j (1+ j)))
2375 (wisent-log "%s (%d)\n " (wisent-tag i) i)
2376 (when (> left-count 0)
2377 (wisent-log " on left:")
2378 (setq j 1)
2379 (while (<= j nrules)
2380 (if (= (aref rlhs j) i)
2381 (wisent-log " %d" j))
2382 (setq j (1+ j))))
2383 (when (> right-count 0)
2384 (if (> left-count 0)
2385 (wisent-log ","))
2386 (wisent-log " on right:")
2387 (setq j 1)
2388 (while (<= j nrules)
2389 (setq r (aref rrhs j)
2390 break nil)
2391 (while (and (not break) (> (aref ritem r) 0))
2392 (if (setq break (= (aref ritem r) i))
2393 (wisent-log " %d" j)
2394 (setq r (1+ r))))
2395 (setq j (1+ j))))
2396 (wisent-log "\n")
2397 (setq i (1+ i)))
2398 ))
2399
2400 (defun wisent-print-reductions (state)
2401 "Print reductions on STATE."
2402 (let (i j k v symbol m n defaulted
2403 default-LA default-rule cmax count shiftp errp nodefault)
2404 (setq nodefault nil
2405 i 0)
2406 (fillarray shiftset 0)
2407
2408 (setq shiftp (aref shift-table state))
2409 (when shiftp
2410 (setq k (shifts-nshifts shiftp)
2411 v (shifts-shifts shiftp)
2412 i 0)
2413 (while (< i k)
2414 (when (not (zerop (aref v i)))
2415 (setq symbol (aref accessing-symbol (aref v i)))
2416 (if (wisent-ISVAR symbol)
2417 (setq i k) ;; break
2418 ;; If this state has a shift for the error token, don't
2419 ;; use a default rule.
2420 (if (= symbol error-token-number)
2421 (setq nodefault t))
2422 (wisent-SETBIT shiftset symbol)))
2423 (setq i (1+ i))))
2424
2425 (setq errp (aref err-table state))
2426 (when errp
2427 (setq k (errs-nerrs errp)
2428 v (errs-errs errp)
2429 i 0)
2430 (while (< i k)
2431 (if (not (zerop (setq symbol (aref v i))))
2432 (wisent-SETBIT shiftset symbol))
2433 (setq i (1+ i))))
2434
2435 (setq m (aref lookaheads state)
2436 n (aref lookaheads (1+ state)))
2437
2438 (cond
2439 ((and (= (- n m) 1) (not nodefault))
2440 (setq default-rule (aref LAruleno m)
2441 v (aref LA m)
2442 k 0)
2443 (while (< k tokensetsize)
2444 (aset lookaheadset k (logand (aref v k)
2445 (aref shiftset k)))
2446 (setq k (1+ k)))
2447
2448 (setq i 0)
2449 (while (< i ntokens)
2450 (if (wisent-BITISSET lookaheadset i)
2451 (wisent-log " %s\t[reduce using rule %d (%s)]\n"
2452 (wisent-tag i) default-rule
2453 (wisent-tag (aref rlhs default-rule))))
2454 (setq i (1+ i)))
2455 (wisent-log " $default\treduce using rule %d (%s)\n\n"
2456 default-rule
2457 (wisent-tag (aref rlhs default-rule)))
2458 )
2459 ((>= (- n m) 1)
2460 (setq cmax 0
2461 default-LA -1
2462 default-rule 0)
2463 (when (not nodefault)
2464 (setq i m)
2465 (while (< i n)
2466 (setq v (aref LA i)
2467 count 0
2468 k 0)
2469 (while (< k tokensetsize)
2470 ;; lookaheadset[k] = LA (i)[k] & ~shiftset[k]
2471 (aset lookaheadset k
2472 (logand (aref v k)
2473 (lognot (aref shiftset k))))
2474 (setq k (1+ k)))
2475 (setq j 0)
2476 (while (< j ntokens)
2477 (if (wisent-BITISSET lookaheadset j)
2478 (setq count (1+ count)))
2479 (setq j (1+ j)))
2480 (if (> count cmax)
2481 (setq cmax count
2482 default-LA i
2483 default-rule (aref LAruleno i)))
2484 (setq k 0)
2485 (while (< k tokensetsize)
2486 (aset shiftset k (logior (aref shiftset k)
2487 (aref lookaheadset k)))
2488 (setq k (1+ k)))
2489 (setq i (1+ i))))
2490
2491 (fillarray shiftset 0)
2492
2493 (when shiftp
2494 (setq k (shifts-nshifts shiftp)
2495 v (shifts-shifts shiftp)
2496 i 0)
2497 (while (< i k)
2498 (when (not (zerop (aref v i)))
2499 (setq symbol (aref accessing-symbol (aref v i)))
2500 (if (wisent-ISVAR symbol)
2501 (setq i k) ;; break
2502 (wisent-SETBIT shiftset symbol)))
2503 (setq i (1+ i))))
2504
2505 (setq i 0)
2506 (while (< i ntokens)
2507 (setq defaulted nil
2508 count (if (wisent-BITISSET shiftset i) 1 0)
2509 j m)
2510 (while (< j n)
2511 (when (wisent-BITISSET (aref LA j) i)
2512 (if (zerop count)
2513 (progn
2514 (if (not (= j default-LA))
2515 (wisent-log
2516 " %s\treduce using rule %d (%s)\n"
2517 (wisent-tag i) (aref LAruleno j)
2518 (wisent-tag (aref rlhs (aref LAruleno j))))
2519 (setq defaulted t))
2520 (setq count (1+ count)))
2521 (if defaulted
2522 (wisent-log
2523 " %s\treduce using rule %d (%s)\n"
2524 (wisent-tag i) (aref LAruleno default-LA)
2525 (wisent-tag (aref rlhs (aref LAruleno default-LA)))))
2526 (setq defaulted nil)
2527 (wisent-log
2528 " %s\t[reduce using rule %d (%s)]\n"
2529 (wisent-tag i) (aref LAruleno j)
2530 (wisent-tag (aref rlhs (aref LAruleno j))))))
2531 (setq j (1+ j)))
2532 (setq i (1+ i)))
2533
2534 (if (>= default-LA 0)
2535 (wisent-log
2536 " $default\treduce using rule %d (%s)\n"
2537 default-rule
2538 (wisent-tag (aref rlhs default-rule))))
2539 ))))
2540
2541 (defun wisent-print-actions (state)
2542 "Print actions on STATE."
2543 (let (i j k v state1 symbol shiftp errp redp rule nerrs break)
2544 (setq shiftp (aref shift-table state)
2545 redp (aref reduction-table state)
2546 errp (aref err-table state))
2547 (if (and (not shiftp) (not redp))
2548 (if (= final-state state)
2549 (wisent-log " $default\taccept\n")
2550 (wisent-log " NO ACTIONS\n"))
2551 (if (not shiftp)
2552 (setq i 0
2553 k 0)
2554 (setq k (shifts-nshifts shiftp)
2555 v (shifts-shifts shiftp)
2556 i 0
2557 break nil)
2558 (while (and (not break) (< i k))
2559 (if (zerop (setq state1 (aref v i)))
2560 (setq i (1+ i))
2561 (setq symbol (aref accessing-symbol state1))
2562 ;; The following line used to be turned off.
2563 (if (wisent-ISVAR symbol)
2564 (setq break t) ;; break
2565 (wisent-log " %s\tshift, and go to state %d\n"
2566 (wisent-tag symbol) state1)
2567 (setq i (1+ i)))))
2568 (if (> i 0)
2569 (wisent-log "\n")))
2570
2571 (when errp
2572 (setq nerrs (errs-nerrs errp)
2573 v (errs-errs errp)
2574 j 0)
2575 (while (< j nerrs)
2576 (if (aref v j)
2577 (wisent-log " %s\terror (nonassociative)\n"
2578 (wisent-tag (aref v j))))
2579 (setq j (1+ j)))
2580 (if (> j 0)
2581 (wisent-log "\n")))
2582
2583 (cond
2584 ((and (aref consistent state) redp)
2585 (setq rule (aref (reductions-rules redp) 0)
2586 symbol (aref rlhs rule))
2587 (wisent-log " $default\treduce using rule %d (%s)\n\n"
2588 rule (wisent-tag symbol))
2589 )
2590 (redp
2591 (wisent-print-reductions state)
2592 ))
2593
2594 (when (< i k)
2595 (setq v (shifts-shifts shiftp))
2596 (while (< i k)
2597 (when (setq state1 (aref v i))
2598 (setq symbol (aref accessing-symbol state1))
2599 (wisent-log " %s\tgo to state %d\n"
2600 (wisent-tag symbol) state1))
2601 (setq i (1+ i)))
2602 (wisent-log "\n"))
2603 )))
2604
2605 (defun wisent-print-core (state)
2606 "Print STATE core."
2607 (let (i k rule statep sp sp1)
2608 (setq statep (aref state-table state)
2609 k (core-nitems statep))
2610 (when (> k 0)
2611 (setq i 0)
2612 (while (< i k)
2613 ;; sp1 = sp = ritem + statep->items[i];
2614 (setq sp1 (aref (core-items statep) i)
2615 sp sp1)
2616 (while (> (aref ritem sp) 0)
2617 (setq sp (1+ sp)))
2618
2619 (setq rule (- (aref ritem sp)))
2620 (wisent-log " %s -> " (wisent-tag (aref rlhs rule)))
2621
2622 (setq sp (aref rrhs rule))
2623 (while (< sp sp1)
2624 (wisent-log "%s " (wisent-tag (aref ritem sp)))
2625 (setq sp (1+ sp)))
2626 (wisent-log ".")
2627 (while (> (aref ritem sp) 0)
2628 (wisent-log " %s" (wisent-tag (aref ritem sp)))
2629 (setq sp (1+ sp)))
2630 (wisent-log " (rule %d)\n" rule)
2631 (setq i (1+ i)))
2632 (wisent-log "\n"))))
2633
2634 (defun wisent-print-state (state)
2635 "Print information on STATE."
2636 (wisent-log "\n\nstate %d\n\n" state)
2637 (wisent-print-core state)
2638 (wisent-print-actions state))
2639
2640 (defun wisent-print-states ()
2641 "Print information on states."
2642 (let ((i 0))
2643 (while (< i nstates)
2644 (wisent-print-state i)
2645 (setq i (1+ i)))))
2646
2647 (defun wisent-print-results ()
2648 "Print information on generated parser.
2649 Report detailed informations if `wisent-verbose-flag' or
2650 `wisent-debug-flag' are non-nil."
2651 (when (or wisent-verbose-flag wisent-debug-flag)
2652 (wisent-print-useless))
2653 (wisent-print-conflicts)
2654 (when (or wisent-verbose-flag wisent-debug-flag)
2655 (wisent-print-grammar)
2656 (wisent-print-states))
2657 ;; Append output to log file when running in batch mode
2658 (when (wisent-noninteractive)
2659 (wisent-append-to-log-file)
2660 (wisent-clear-log)))
2661 \f
2662 ;;;; ---------------------------------
2663 ;;;; Build the generated parser tables
2664 ;;;; ---------------------------------
2665
2666 (defun wisent-action-row (state actrow)
2667 "Figure out the actions for the specified STATE.
2668 Decide what to do for each type of token if seen as the lookahead
2669 token in specified state. The value returned is used as the default
2670 action for the state. In addition, ACTROW is filled with what to do
2671 for each kind of token, index by symbol number, with nil meaning do
2672 the default action. The value 'error, means this situation is an
2673 error. The parser recognizes this value specially.
2674
2675 This is where conflicts are resolved. The loop over lookahead rules
2676 considered lower-numbered rules last, and the last rule considered
2677 that likes a token gets to handle it."
2678 (let (i j k m n v default-rule nreds rule max count
2679 shift-state symbol redp shiftp errp nodefault)
2680
2681 (fillarray actrow nil)
2682
2683 (setq default-rule 0
2684 nodefault nil ;; nil inhibit having any default reduction
2685 nreds 0
2686 m 0
2687 n 0
2688 redp (aref reduction-table state))
2689
2690 (when redp
2691 (setq nreds (reductions-nreds redp))
2692 (when (>= nreds 1)
2693 ;; loop over all the rules available here which require
2694 ;; lookahead
2695 (setq m (aref lookaheads state)
2696 n (aref lookaheads (1+ state))
2697 i (1- n))
2698 (while (>= i m)
2699 ;; and find each token which the rule finds acceptable to
2700 ;; come next
2701 (setq j 0)
2702 (while (< j ntokens)
2703 ;; and record this rule as the rule to use if that token
2704 ;; follows.
2705 (if (wisent-BITISSET (aref LA i) j)
2706 (aset actrow j (- (aref LAruleno i)))
2707 )
2708 (setq j (1+ j)))
2709 (setq i (1- i)))))
2710
2711 ;; Now see which tokens are allowed for shifts in this state. For
2712 ;; them, record the shift as the thing to do. So shift is
2713 ;; preferred to reduce.
2714 (setq shiftp (aref shift-table state))
2715 (when shiftp
2716 (setq k (shifts-nshifts shiftp)
2717 v (shifts-shifts shiftp)
2718 i 0)
2719 (while (< i k)
2720 (setq shift-state (aref v i))
2721 (if (zerop shift-state)
2722 nil ;; continue
2723 (setq symbol (aref accessing-symbol shift-state))
2724 (if (wisent-ISVAR symbol)
2725 (setq i k) ;; break
2726 (aset actrow symbol shift-state)
2727 ;; Do not use any default reduction if there is a shift
2728 ;; for error
2729 (if (= symbol error-token-number)
2730 (setq nodefault t))))
2731 (setq i (1+ i))))
2732
2733 ;; See which tokens are an explicit error in this state (due to
2734 ;; %nonassoc). For them, record error as the action.
2735 (setq errp (aref err-table state))
2736 (when errp
2737 (setq k (errs-nerrs errp)
2738 v (errs-errs errp)
2739 i 0)
2740 (while (< i k)
2741 (aset actrow (aref v i) wisent-error-tag)
2742 (setq i (1+ i))))
2743
2744 ;; Now find the most common reduction and make it the default
2745 ;; action for this state.
2746 (when (and (>= nreds 1) (not nodefault))
2747 (if (aref consistent state)
2748 (setq default-rule (- (aref (reductions-rules redp) 0)))
2749 (setq max 0
2750 i m)
2751 (while (< i n)
2752 (setq count 0
2753 rule (- (aref LAruleno i))
2754 j 0)
2755 (while (< j ntokens)
2756 (if (and (numberp (aref actrow j))
2757 (= (aref actrow j) rule))
2758 (setq count (1+ count)))
2759 (setq j (1+ j)))
2760 (if (> count max)
2761 (setq max count
2762 default-rule rule))
2763 (setq i (1+ i)))
2764 ;; actions which match the default are replaced with zero,
2765 ;; which means "use the default"
2766 (when (> max 0)
2767 (setq j 0)
2768 (while (< j ntokens)
2769 (if (and (numberp (aref actrow j))
2770 (= (aref actrow j) default-rule))
2771 (aset actrow j nil))
2772 (setq j (1+ j)))
2773 )))
2774
2775 ;; If have no default rule, if this is the final state the default
2776 ;; is accept else it is an error. So replace any action which
2777 ;; says "error" with "use default".
2778 (when (zerop default-rule)
2779 (if (= final-state state)
2780 (setq default-rule wisent-accept-tag)
2781 (setq j 0)
2782 (while (< j ntokens)
2783 (if (eq (aref actrow j) wisent-error-tag)
2784 (aset actrow j nil))
2785 (setq j (1+ j)))
2786 (setq default-rule wisent-error-tag)))
2787 default-rule))
2788
2789 (defconst wisent-default-tag 'default
2790 "Tag used in an action table to indicate a default action.")
2791
2792 ;; These variables only exist locally in the function
2793 ;; `wisent-state-actions' and are shared by all other nested callees.
2794 (wisent-defcontext semantic-actions
2795 ;; Uninterned symbols used in code generation.
2796 stack sp gotos state
2797 ;; Name of the current semantic action
2798 NAME)
2799
2800 (defun wisent-state-actions ()
2801 "Figure out the actions for every state.
2802 Return the action table."
2803 ;; Store the semantic action obarray in (unused) RCODE[0].
2804 (aset rcode 0 (make-vector 13 0))
2805 (let (i j action-table actrow action)
2806 (setq action-table (make-vector nstates nil)
2807 actrow (make-vector ntokens nil)
2808 i 0)
2809 (wisent-with-context semantic-actions
2810 (setq stack (make-symbol "stack")
2811 sp (make-symbol "sp")
2812 gotos (make-symbol "gotos")
2813 state (make-symbol "state"))
2814 (while (< i nstates)
2815 (setq action (wisent-action-row i actrow))
2816 ;; Translate a reduction into semantic action
2817 (and (integerp action) (< action 0)
2818 (setq action (wisent-semantic-action (- action))))
2819 (aset action-table i (list (cons wisent-default-tag action)))
2820 (setq j 0)
2821 (while (< j ntokens)
2822 (when (setq action (aref actrow j))
2823 ;; Translate a reduction into semantic action
2824 (and (integerp action) (< action 0)
2825 (setq action (wisent-semantic-action (- action))))
2826 (aset action-table i (cons (cons (aref tags j) action)
2827 (aref action-table i)))
2828 )
2829 (setq j (1+ j)))
2830 (aset action-table i (nreverse (aref action-table i)))
2831 (setq i (1+ i)))
2832 action-table)))
2833
2834 (defun wisent-goto-actions ()
2835 "Figure out what to do after reducing with each rule.
2836 Depending on the saved state from before the beginning of parsing the
2837 data that matched this rule. Return the goto table."
2838 (let (i j m n symbol state goto-table)
2839 (setq goto-table (make-vector nstates nil)
2840 i ntokens)
2841 (while (< i nsyms)
2842 (setq symbol (- i ntokens)
2843 m (aref goto-map symbol)
2844 n (aref goto-map (1+ symbol))
2845 j m)
2846 (while (< j n)
2847 (setq state (aref from-state j))
2848 (aset goto-table state
2849 (cons (cons (aref tags i) (aref to-state j))
2850 (aref goto-table state)))
2851 (setq j (1+ j)))
2852 (setq i (1+ i)))
2853 goto-table))
2854
2855 (defsubst wisent-quote-p (sym)
2856 "Return non-nil if SYM is bound to the `quote' function."
2857 (condition-case nil
2858 (eq (indirect-function sym)
2859 (indirect-function 'quote))
2860 (error nil)))
2861
2862 (defsubst wisent-backquote-p (sym)
2863 "Return non-nil if SYM is bound to the `backquote' function."
2864 (condition-case nil
2865 (eq (indirect-function sym)
2866 (indirect-function 'backquote))
2867 (error nil)))
2868
2869 (defun wisent-check-$N (x m)
2870 "Return non-nil if X is a valid $N or $regionN symbol.
2871 That is if X is a $N or $regionN symbol with N >= 1 and N <= M.
2872 Also warn if X is a $N or $regionN symbol with N < 1 or N > M."
2873 (when (symbolp x)
2874 (let* ((n (symbol-name x))
2875 (i (and (string-match "\\`\\$\\(region\\)?\\([0-9]+\\)\\'" n)
2876 (string-to-number (match-string 2 n)))))
2877 (when i
2878 (if (and (>= i 1) (<= i m))
2879 t
2880 (message
2881 "*** In %s, %s might be a free variable (rule has %s)"
2882 NAME x (format (cond ((< m 1) "no component")
2883 ((= m 1) "%d component")
2884 ("%d components"))
2885 m))
2886 nil)))))
2887
2888 (defun wisent-semantic-action-expand-body (body n &optional found)
2889 "Parse BODY of semantic action.
2890 N is the maximum number of $N variables that can be referenced in
2891 BODY. Warn on references out of permitted range.
2892 Optional argument FOUND is the accumulated list of '$N' references
2893 encountered so far.
2894 Return a cons (FOUND . XBODY), where FOUND is the list of $N
2895 references found in BODY, and XBODY is BODY expression with
2896 `backquote' forms expanded."
2897 (if (not (listp body))
2898 ;; BODY is an atom, no expansion needed
2899 (progn
2900 (if (wisent-check-$N body n)
2901 ;; Accumulate $i symbol
2902 (add-to-list 'found body))
2903 (cons found body))
2904 ;; BODY is a list, expand inside it
2905 (let (xbody sexpr)
2906 ;; If backquote expand it first
2907 (if (wisent-backquote-p (car body))
2908 (setq body (macroexpand body)))
2909 (while body
2910 (setq sexpr (car body)
2911 body (cdr body))
2912 (cond
2913 ;; Function call excepted quote expression
2914 ((and (consp sexpr)
2915 (not (wisent-quote-p (car sexpr))))
2916 (setq sexpr (wisent-semantic-action-expand-body sexpr n found)
2917 found (car sexpr)
2918 sexpr (cdr sexpr)))
2919 ;; $i symbol
2920 ((wisent-check-$N sexpr n)
2921 ;; Accumulate $i symbol
2922 (add-to-list 'found sexpr))
2923 )
2924 ;; Accumulate expanded forms
2925 (setq xbody (nconc xbody (list sexpr))))
2926 (cons found xbody))))
2927
2928 (defun wisent-semantic-action (r)
2929 "Set up the Elisp function for semantic action at rule R.
2930 On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY is the
2931 body of the semantic action, N is the maximum number of values
2932 available in the parser's stack, NTERM is the nonterminal the semantic
2933 action belongs to, and I is the index of the semantic action inside
2934 NTERM definition. Return the semantic action symbol.
2935 The semantic action function accepts three arguments:
2936
2937 - the state/value stack
2938 - the top-of-stack index
2939 - the goto table
2940
2941 And returns the updated top-of-stack index."
2942 (if (not (aref ruseful r))
2943 (aset rcode r nil)
2944 (let* ((actn (aref rcode r))
2945 (n (aref actn 1)) ; nb of val avail. in stack
2946 (NAME (apply 'format "%s:%d" (aref actn 2)))
2947 (form (wisent-semantic-action-expand-body (aref actn 0) n))
2948 ($l (car form)) ; list of $vars used in body
2949 (form (cdr form)) ; expanded form of body
2950 (nt (aref rlhs r)) ; nonterminal item no.
2951 (bl nil) ; `let*' binding list
2952 $v i j)
2953
2954 ;; Compute $N and $regionN bindings
2955 (setq i n)
2956 (while (> i 0)
2957 (setq j (1+ (* 2 (- n i))))
2958 ;; Only bind $regionI if used in action
2959 (setq $v (intern (format "$region%d" i)))
2960 (if (memq $v $l)
2961 (setq bl (cons `(,$v (cdr (aref ,stack (- ,sp ,j)))) bl)))
2962 ;; Only bind $I if used in action
2963 (setq $v (intern (format "$%d" i)))
2964 (if (memq $v $l)
2965 (setq bl (cons `(,$v (car (aref ,stack (- ,sp ,j)))) bl)))
2966 (setq i (1- i)))
2967
2968 ;; Compute J, the length of rule's RHS. It will give the
2969 ;; current parser state at STACK[SP - 2*J], and where to push
2970 ;; the new semantic value and the next state, respectively at:
2971 ;; STACK[SP - 2*J + 1] and STACK[SP - 2*J + 2]. Generally N,
2972 ;; the maximum number of values available in the stack, is equal
2973 ;; to J. But, for mid-rule actions, N is the number of rule
2974 ;; elements before the action and J is always 0 (empty rule).
2975 (setq i (aref rrhs r)
2976 j 0)
2977 (while (> (aref ritem i) 0)
2978 (setq j (1+ j)
2979 i (1+ i)))
2980
2981 ;; Create the semantic action symbol.
2982 (setq actn (intern NAME (aref rcode 0)))
2983
2984 ;; Store source code in function cell of the semantic action
2985 ;; symbol. It will be byte-compiled at automaton's compilation
2986 ;; time. Using a byte-compiled automaton can significantly
2987 ;; speed up parsing!
2988 (fset actn
2989 `(lambda (,stack ,sp ,gotos)
2990 (let* (,@bl
2991 ($region
2992 ,(cond
2993 ((= n 1)
2994 (if (assq '$region1 bl)
2995 '$region1
2996 `(cdr (aref ,stack (1- ,sp)))))
2997 ((> n 1)
2998 `(wisent-production-bounds
2999 ,stack (- ,sp ,(1- (* 2 n))) (1- ,sp)))))
3000 ($action ,NAME)
3001 ($nterm ',(aref tags nt))
3002 ,@(and (> j 0) `((,sp (- ,sp ,(* j 2)))))
3003 (,state (cdr (assq $nterm
3004 (aref ,gotos
3005 (aref ,stack ,sp))))))
3006 (setq ,sp (+ ,sp 2))
3007 ;; push semantic value
3008 (aset ,stack (1- ,sp) (cons ,form $region))
3009 ;; push next state
3010 (aset ,stack ,sp ,state)
3011 ;; return new top of stack
3012 ,sp)))
3013
3014 ;; Return the semantic action symbol
3015 actn)))
3016 \f
3017 ;;;; ----------------------------
3018 ;;;; Build parser LALR automaton.
3019 ;;;; ----------------------------
3020
3021 (defun wisent-parser-automaton ()
3022 "Compute and return LALR(1) automaton from GRAMMAR.
3023 GRAMMAR is in internal format. GRAM/ACTS are grammar rules
3024 in internal format. STARTS defines the start symbols."
3025 ;; Check for useless stuff
3026 (wisent-reduce-grammar)
3027
3028 (wisent-set-derives)
3029 (wisent-set-nullable)
3030 ;; convert to nondeterministic finite state machine.
3031 (wisent-generate-states)
3032 ;; make it deterministic.
3033 (wisent-lalr)
3034 ;; Find and record any conflicts: places where one token of
3035 ;; lookahead is not enough to disambiguate the parsing. Also
3036 ;; resolve s/r conflicts based on precedence declarations.
3037 (wisent-resolve-conflicts)
3038 (wisent-print-results)
3039
3040 (vector (wisent-state-actions) ; action table
3041 (wisent-goto-actions) ; goto table
3042 start-table ; start symbols
3043 (aref rcode 0) ; sem. action symbol obarray
3044 )
3045 )
3046 \f
3047 ;;;; -------------------
3048 ;;;; Parse input grammar
3049 ;;;; -------------------
3050
3051 (defconst wisent-reserved-symbols (list wisent-error-term)
3052 "The list of reserved symbols.
3053 Also all symbols starting with a character defined in
3054 `wisent-reserved-capitals' are reserved for internal use.")
3055
3056 (defconst wisent-reserved-capitals '(?\$ ?\@)
3057 "The list of reserved capital letters.
3058 All symbol starting with one of these letters are reserved for
3059 internal use.")
3060
3061 (defconst wisent-starts-nonterm '$STARTS
3062 "Main start symbol.
3063 It gives the rules for start symbols.")
3064
3065 (defvar wisent-single-start-flag nil
3066 "Non-nil means allows only one start symbol like in Bison.
3067 That is don't add extra start rules to the grammar. This is
3068 useful to compare the Wisent's generated automaton with the Bison's
3069 one.")
3070
3071 (defsubst wisent-ISVALID-VAR (x)
3072 "Return non-nil if X is a character or an allowed symbol."
3073 (and x (symbolp x)
3074 (not (memq (aref (symbol-name x) 0) wisent-reserved-capitals))
3075 (not (memq x wisent-reserved-symbols))))
3076
3077 (defsubst wisent-ISVALID-TOKEN (x)
3078 "Return non-nil if X is a character or an allowed symbol."
3079 (or (wisent-char-p x)
3080 (wisent-ISVALID-VAR x)))
3081
3082 (defun wisent-push-token (symbol &optional nocheck)
3083 "Push a new SYMBOL in the list of tokens.
3084 Bypass checking if NOCHECK is non-nil."
3085 ;; Check
3086 (or nocheck (wisent-ISVALID-TOKEN symbol)
3087 (error "Invalid terminal symbol: %S" symbol))
3088 (if (memq symbol token-list)
3089 (message "*** duplicate terminal `%s' ignored" symbol)
3090 ;; Set up properties
3091 (wisent-set-prec symbol nil)
3092 (wisent-set-assoc symbol nil)
3093 (wisent-set-item-number symbol ntokens)
3094 ;; Add
3095 (setq ntokens (1+ ntokens)
3096 token-list (cons symbol token-list))))
3097
3098 (defun wisent-push-var (symbol &optional nocheck)
3099 "Push a new SYMBOL in the list of nonterminals.
3100 Bypass checking if NOCHECK is non-nil."
3101 ;; Check
3102 (unless nocheck
3103 (or (wisent-ISVALID-VAR symbol)
3104 (error "Invalid nonterminal symbol: %S" symbol))
3105 (if (memq symbol var-list)
3106 (error "Nonterminal `%s' already defined" symbol)))
3107 ;; Set up properties
3108 (wisent-set-item-number symbol nvars)
3109 ;; Add
3110 (setq nvars (1+ nvars)
3111 var-list (cons symbol var-list)))
3112
3113 (defun wisent-parse-nonterminals (defs)
3114 "Parse nonterminal definitions in DEFS.
3115 Fill in each element of the global arrays RPREC, RCODE, RUSEFUL with
3116 respectively rule precedence level, semantic action code and
3117 usefulness flag. Return a list of rules of the form (LHS . RHS) where
3118 LHS and RHS are respectively the Left Hand Side and Right Hand Side of
3119 the rule."
3120 (setq rprec nil
3121 rcode nil
3122 nitems 0
3123 nrules 0)
3124 (let (def nonterm rlist rule rules rhs rest item items
3125 rhl plevel semact @n @count iactn)
3126 (setq @count 0)
3127 (while defs
3128 (setq def (car defs)
3129 defs (cdr defs)
3130 nonterm (car def)
3131 rlist (cdr def)
3132 iactn 0)
3133 (or (consp rlist)
3134 (error "Invalid nonterminal definition syntax: %S" def))
3135 (while rlist
3136 (setq rule (car rlist)
3137 rlist (cdr rlist)
3138 items (car rule)
3139 rest (cdr rule)
3140 rhl 0
3141 rhs nil)
3142
3143 ;; Check & count items
3144 (setq nitems (1+ nitems)) ;; LHS item
3145 (while items
3146 (setq item (car items)
3147 items (cdr items)
3148 nitems (1+ nitems)) ;; RHS items
3149 (if (listp item)
3150 ;; Mid-rule action
3151 (progn
3152 (setq @count (1+ @count)
3153 @n (intern (format "@%d" @count)))
3154 (wisent-push-var @n t)
3155 ;; Push a new empty rule with the mid-rule action
3156 (setq semact (vector item rhl (list nonterm iactn))
3157 iactn (1+ iactn)
3158 plevel nil
3159 rcode (cons semact rcode)
3160 rprec (cons plevel rprec)
3161 item @n ;; Replace action by @N nonterminal
3162 rules (cons (list item) rules)
3163 nitems (1+ nitems)
3164 nrules (1+ nrules)))
3165 ;; Check terminal or nonterminal symbol
3166 (cond
3167 ((or (memq item token-list) (memq item var-list)))
3168 ;; Create new literal character token
3169 ((wisent-char-p item) (wisent-push-token item t))
3170 ((error "Symbol `%s' is used, but is not defined as a token and has no rules"
3171 item))))
3172 (setq rhl (1+ rhl)
3173 rhs (cons item rhs)))
3174
3175 ;; Check & collect rule precedence level
3176 (setq plevel (when (vectorp (car rest))
3177 (setq item (car rest)
3178 rest (cdr rest))
3179 (if (and (= (length item) 1)
3180 (memq (aref item 0) token-list)
3181 (wisent-prec (aref item 0)))
3182 (wisent-item-number (aref item 0))
3183 (error "Invalid rule precedence level syntax: %S" item)))
3184 rprec (cons plevel rprec))
3185
3186 ;; Check & collect semantic action body
3187 (setq semact (vector
3188 (if rest
3189 (if (cdr rest)
3190 (error "Invalid semantic action syntax: %S" rest)
3191 (car rest))
3192 ;; Give a default semantic action body: nil
3193 ;; for an empty rule or $1, the value of the
3194 ;; first symbol in the rule, otherwise.
3195 (if (> rhl 0) '$1 '()))
3196 rhl
3197 (list nonterm iactn))
3198 iactn (1+ iactn)
3199 rcode (cons semact rcode))
3200 (setq rules (cons (cons nonterm (nreverse rhs)) rules)
3201 nrules (1+ nrules))))
3202
3203 (setq ruseful (make-vector (1+ nrules) t)
3204 rprec (vconcat (cons nil (nreverse rprec)))
3205 rcode (vconcat (cons nil (nreverse rcode))))
3206 (nreverse rules)
3207 ))
3208
3209 (defun wisent-parse-grammar (grammar &optional start-list)
3210 "Parse GRAMMAR and build a suitable internal representation.
3211 Optional argument START-LIST defines the start symbols.
3212 GRAMMAR is a list of form: (TOKENS ASSOCS . NONTERMS)
3213
3214 TOKENS is a list of terminal symbols (tokens).
3215
3216 ASSOCS is nil or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
3217 describing the associativity of TOKENS. ASSOC-TYPE must be one of the
3218 `default-prec' `nonassoc', `left' or `right' symbols. When ASSOC-TYPE
3219 is `default-prec', ASSOC-VALUE must be nil or t (the default).
3220 Otherwise it is a list of tokens which must have been previously
3221 declared in TOKENS.
3222
3223 NONTERMS is the list of non terminal definitions (see function
3224 `wisent-parse-nonterminals')."
3225 (or (and (consp grammar) (> (length grammar) 2))
3226 (error "Bad input grammar"))
3227
3228 (let (i r rhs pre dpre lst start-var assoc rules item
3229 token var def tokens defs ep-token ep-var ep-def)
3230
3231 ;; Built-in tokens
3232 (setq ntokens 0 nvars 0)
3233 (wisent-push-token wisent-eoi-term t)
3234 (wisent-push-token wisent-error-term t)
3235
3236 ;; Check/collect terminals
3237 (setq lst (car grammar))
3238 (while lst
3239 (wisent-push-token (car lst))
3240 (setq lst (cdr lst)))
3241
3242 ;; Check/Set up tokens precedence & associativity
3243 (setq lst (nth 1 grammar)
3244 pre 0
3245 defs nil
3246 dpre nil
3247 default-prec t)
3248 (while lst
3249 (setq def (car lst)
3250 assoc (car def)
3251 tokens (cdr def)
3252 lst (cdr lst))
3253 (if (eq assoc 'default-prec)
3254 (progn
3255 (or (null (cdr tokens))
3256 (memq (car tokens) '(t nil))
3257 (error "Invalid default-prec value: %S" tokens))
3258 (setq default-prec (car tokens))
3259 (if dpre
3260 (message "*** redefining default-prec to %s"
3261 default-prec))
3262 (setq dpre t))
3263 (or (memq assoc '(left right nonassoc))
3264 (error "Invalid associativity syntax: %S" assoc))
3265 (setq pre (1+ pre))
3266 (while tokens
3267 (setq token (car tokens)
3268 tokens (cdr tokens))
3269 (if (memq token defs)
3270 (message "*** redefining precedence of `%s'" token))
3271 (or (memq token token-list)
3272 ;; Define token not previously declared.
3273 (wisent-push-token token))
3274 (setq defs (cons token defs))
3275 ;; Record the precedence and associativity of the terminal.
3276 (wisent-set-prec token pre)
3277 (wisent-set-assoc token assoc))))
3278
3279 ;; Check/Collect nonterminals
3280 (setq lst (nthcdr 2 grammar)
3281 defs nil)
3282 (while lst
3283 (setq def (car lst)
3284 lst (cdr lst))
3285 (or (consp def)
3286 (error "Invalid nonterminal definition: %S" def))
3287 (if (memq (car def) token-list)
3288 (error "Nonterminal `%s' already defined as token" (car def)))
3289 (wisent-push-var (car def))
3290 (setq defs (cons def defs)))
3291 (or defs
3292 (error "No input grammar"))
3293 (setq defs (nreverse defs))
3294
3295 ;; Set up the start symbol.
3296 (setq start-table nil)
3297 (cond
3298
3299 ;; 1. START-LIST is nil, the start symbol is the first
3300 ;; nonterminal defined in the grammar (Bison like).
3301 ((null start-list)
3302 (setq start-var (caar defs)))
3303
3304 ;; 2. START-LIST contains only one element, it is the start
3305 ;; symbol (Bison like).
3306 ((or wisent-single-start-flag (null (cdr start-list)))
3307 (setq start-var (car start-list))
3308 (or (assq start-var defs)
3309 (error "Start symbol `%s' has no rule" start-var)))
3310
3311 ;; 3. START-LIST contains more than one element. All defines
3312 ;; potential start symbols. One of them (the first one by
3313 ;; default) will be given at parse time to be the parser goal.
3314 ;; If `wisent-single-start-flag' is non-nil that feature is
3315 ;; disabled and the first nonterminal in START-LIST defines
3316 ;; the start symbol, like in case 2 above.
3317 ((not wisent-single-start-flag)
3318
3319 ;; START-LIST is a list of nonterminals '(nt0 ... ntN).
3320 ;; Build and push ad hoc start rules in the grammar:
3321
3322 ;; ($STARTS ((nt0) $1) ((nt1) $1) ... ((ntN) $1))
3323 ;; ($nt1 (($$nt1 nt1) $2))
3324 ;; ...
3325 ;; ($ntN (($$ntN ntN) $2))
3326
3327 ;; Where internal symbols $ntI and $$ntI are respectively
3328 ;; nonterminals and terminals.
3329
3330 ;; The internal start symbol $STARTS is used to build the
3331 ;; LALR(1) automaton. The true default start symbol used by the
3332 ;; parser is the first nonterminal in START-LIST (nt0).
3333 (setq start-var wisent-starts-nonterm
3334 lst (nreverse start-list))
3335 (while lst
3336 (setq var (car lst)
3337 lst (cdr lst))
3338 (or (memq var var-list)
3339 (error "Start symbol `%s' has no rule" var))
3340 (unless (assq var start-table) ;; Ignore duplicates
3341 ;; For each nt start symbol
3342 (setq ep-var (intern (format "$%s" var))
3343 ep-token (intern (format "$$%s" var)))
3344 (wisent-push-token ep-token t)
3345 (wisent-push-var ep-var t)
3346 (setq
3347 ;; Add entry (nt . $$nt) to start-table
3348 start-table (cons (cons var ep-token) start-table)
3349 ;; Add rule ($nt (($$nt nt) $2))
3350 defs (cons (list ep-var (list (list ep-token var) '$2)) defs)
3351 ;; Add start rule (($nt) $1)
3352 ep-def (cons (list (list ep-var) '$1) ep-def))
3353 ))
3354 (wisent-push-var start-var t)
3355 (setq defs (cons (cons start-var ep-def) defs))))
3356
3357 ;; Set up rules main data structure & RPREC, RCODE, RUSEFUL
3358 (setq rules (wisent-parse-nonterminals defs))
3359
3360 ;; Set up the terminal & nonterminal lists.
3361 (setq nsyms (+ ntokens nvars)
3362 token-list (nreverse token-list)
3363 lst var-list
3364 var-list nil)
3365 (while lst
3366 (setq var (car lst)
3367 lst (cdr lst)
3368 var-list (cons var var-list))
3369 (wisent-set-item-number ;; adjust nonterminal item number to
3370 var (+ ntokens (wisent-item-number var)))) ;; I += NTOKENS
3371
3372 ;; Store special item numbers
3373 (setq error-token-number (wisent-item-number wisent-error-term)
3374 start-symbol (wisent-item-number start-var))
3375
3376 ;; Keep symbols in the TAGS vector so that TAGS[I] is the symbol
3377 ;; associated to item number I.
3378 (setq tags (vconcat token-list var-list))
3379 ;; Set up RLHS RRHS & RITEM data structures from list of rules
3380 ;; (LHS . RHS) received from `wisent-parse-nonterminals'.
3381 (setq rlhs (make-vector (1+ nrules) nil)
3382 rrhs (make-vector (1+ nrules) nil)
3383 ritem (make-vector (1+ nitems) nil)
3384 i 0
3385 r 1)
3386 (while rules
3387 (aset rlhs r (wisent-item-number (caar rules)))
3388 (aset rrhs r i)
3389 (setq rhs (cdar rules)
3390 pre nil)
3391 (while rhs
3392 (setq item (wisent-item-number (car rhs)))
3393 ;; Get default precedence level of rule, that is the
3394 ;; precedence of the last terminal in it.
3395 (and (wisent-ISTOKEN item)
3396 default-prec
3397 (setq pre item))
3398
3399 (aset ritem i item)
3400 (setq i (1+ i)
3401 rhs (cdr rhs)))
3402 ;; Setup the precedence level of the rule, that is the one
3403 ;; specified by %prec or the default one.
3404 (and (not (aref rprec r)) ;; Already set by %prec
3405 pre
3406 (wisent-prec (aref tags pre))
3407 (aset rprec r pre))
3408 (aset ritem i (- r))
3409 (setq i (1+ i)
3410 r (1+ r))
3411 (setq rules (cdr rules)))
3412 ))
3413 \f
3414 ;;;; ---------------------
3415 ;;;; Compile input grammar
3416 ;;;; ---------------------
3417
3418 (defun wisent-compile-grammar (grammar &optional start-list)
3419 "Compile the LALR(1) GRAMMAR.
3420
3421 GRAMMAR is a list (TOKENS ASSOCS . NONTERMS) where:
3422
3423 - TOKENS is a list of terminal symbols (tokens).
3424
3425 - ASSOCS is nil, or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
3426 describing the associativity of TOKENS. ASSOC-TYPE must be one of
3427 the `default-prec' `nonassoc', `left' or `right' symbols. When
3428 ASSOC-TYPE is `default-prec', ASSOC-VALUE must be nil or t (the
3429 default). Otherwise it is a list of tokens which must have been
3430 previously declared in TOKENS.
3431
3432 - NONTERMS is a list of nonterminal definitions.
3433
3434 Optional argument START-LIST specify the possible grammar start
3435 symbols. This is a list of nonterminals which must have been
3436 previously declared in GRAMMAR's NONTERMS form. By default, the start
3437 symbol is the first nonterminal defined. When START-LIST contains
3438 only one element, it is the start symbol. Otherwise, all elements are
3439 possible start symbols, unless `wisent-single-start-flag' is non-nil.
3440 In that case, the first element is the start symbol, and others are
3441 ignored.
3442
3443 Return an automaton as a vector: [ACTIONS GOTOS STARTS FUNCTIONS]
3444 where:
3445
3446 - ACTIONS is a state/token matrix telling the parser what to do at
3447 every state based on the current lookahead token. That is shift,
3448 reduce, accept or error.
3449
3450 - GOTOS is a state/nonterminal matrix telling the parser the next
3451 state to go to after reducing with each rule.
3452
3453 - STARTS is an alist which maps the allowed start nonterminal symbols
3454 to tokens that will be first shifted into the parser stack.
3455
3456 - FUNCTIONS is an obarray of semantic action symbols. Each symbol's
3457 function definition is the semantic action lambda expression."
3458 (if (wisent-automaton-p grammar)
3459 grammar ;; Grammar already compiled just return it
3460 (wisent-with-context compile-grammar
3461 (let* ((gc-cons-threshold 1000000)
3462 automaton)
3463 (garbage-collect)
3464 (setq wisent-new-log-flag t)
3465 ;; Parse input grammar
3466 (wisent-parse-grammar grammar start-list)
3467 ;; Generate the LALR(1) automaton
3468 (setq automaton (wisent-parser-automaton))
3469 automaton))))
3470 \f
3471 ;;;; --------------------------
3472 ;;;; Byte compile input grammar
3473 ;;;; --------------------------
3474
3475 (require 'bytecomp)
3476
3477 (defun wisent-byte-compile-grammar (form)
3478 "Byte compile the `wisent-compile-grammar' FORM.
3479 Automatically called by the Emacs Lisp byte compiler as a
3480 `byte-compile' handler."
3481 ;; Eval the `wisent-compile-grammar' form to obtain an LALR
3482 ;; automaton internal data structure. Then, because the internal
3483 ;; data structure contains an obarray, convert it to a lisp form so
3484 ;; it can be byte-compiled.
3485 (byte-compile-form (wisent-automaton-lisp-form (eval form))))
3486
3487 (put 'wisent-compile-grammar 'byte-compile 'wisent-byte-compile-grammar)
3488
3489 (defun wisent-automaton-lisp-form (automaton)
3490 "Return a Lisp form that produces AUTOMATON.
3491 See also `wisent-compile-grammar' for more details on AUTOMATON."
3492 (or (wisent-automaton-p automaton)
3493 (signal 'wrong-type-argument
3494 (list 'wisent-automaton-p automaton)))
3495 (let ((obn (make-symbol "ob")) ; Generated obarray name
3496 (obv (aref automaton 3)) ; Semantic actions obarray
3497 )
3498 `(let ((,obn (make-vector 13 0)))
3499 ;; Generate code to initialize the semantic actions obarray,
3500 ;; in local variable OBN.
3501 ,@(let (obcode)
3502 (mapatoms
3503 #'(lambda (s)
3504 (setq obcode
3505 (cons `(fset (intern ,(symbol-name s) ,obn)
3506 #',(symbol-function s))
3507 obcode)))
3508 obv)
3509 obcode)
3510 ;; Generate code to create the automaton.
3511 (vector
3512 ;; In code generated to initialize the action table, take
3513 ;; care of symbols that are interned in the semantic actions
3514 ;; obarray.
3515 (vector
3516 ,@(mapcar
3517 #'(lambda (state) ;; for each state
3518 `(list
3519 ,@(mapcar
3520 #'(lambda (tr) ;; for each transition
3521 (let ((k (car tr)) ; token
3522 (a (cdr tr))) ; action
3523 (if (and (symbolp a)
3524 (intern-soft (symbol-name a) obv))
3525 `(cons ,(if (symbolp k) `(quote ,k) k)
3526 (intern-soft ,(symbol-name a) ,obn))
3527 `(quote ,tr))))
3528 state)))
3529 (aref automaton 0)))
3530 ;; The code of the goto table is unchanged.
3531 ,(aref automaton 1)
3532 ;; The code of the alist of start symbols is unchanged.
3533 ',(aref automaton 2)
3534 ;; The semantic actions obarray is in the local variable OBN.
3535 ,obn))))
3536
3537 (provide 'semantic/wisent/comp)
3538
3539 ;; arch-tag: 758ea04c-ea97-466b-9b35-aea0861033c9
3540 ;;; semantic/wisent/comp.el ends here