1 ;; (language ecmascript parse-lalr) -- yacc's parser generator, in Guile
2 ;; Copyright (C) 1984,1989,1990 Free Software Foundation, Inc.
3 ;; Copyright (C) 1996-2002 Dominique Boucher
5 ;; This program is free software: you can redistribute it and/or modify
6 ;; it under the terms of the GNU General Public License as published by
7 ;; the Free Software Foundation, either version 3 of the License, or
8 ;; (at your option) any later version.
10 ;; This program is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ;; GNU General Public License for more details.
15 ;; You should have received a copy of the GNU General Public License
16 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
19 ;; ---------------------------------------------------------------------- ;;
22 This file contains yet another LALR(1) parser generator written in
23 Scheme. In contrast to other such parser generators, this one
24 implements a more efficient algorithm for computing the lookahead sets.
25 The algorithm is the same as used in Bison (GNU yacc) and is described
26 in the following paper:
28 "Efficient Computation of LALR(1) Look-Ahead Set", F. DeRemer and
29 T. Pennello, TOPLAS, vol. 4, no. 4, october 1982.
31 As a consequence, it is not written in a fully functional style.
32 In fact, much of the code is a direct translation from C to Scheme
35 @section Defining a parser
37 The module @code{(language ecmascript parse-lalr)} declares a macro
38 called @code{lalr-parser}:
41 (lalr-parser tokens rules ...)
44 This macro, when given appropriate arguments, generates an LALR(1)
45 syntax analyzer. The macro accepts at least two arguments. The first
46 is a list of symbols which represent the terminal symbols of the
47 grammar. The remaining arguments are the grammar production rules.
49 @section Running the parser
51 The parser generated by the @code{lalr-parser} macro is a function that
52 takes two parameters. The first parameter is a lexical analyzer while
53 the second is an error procedure.
55 The lexical analyzer is zero-argument function (a thunk)
56 invoked each time the parser needs to look-ahead in the token stream.
57 A token is usually a pair whose @code{car} is the symbol corresponding to
58 the token (the same symbol as used in the grammar definition). The
59 @code{cdr} of the pair is the semantic value associated with the token. For
60 example, a string token would have the @code{car} set to @code{'string}
61 while the @code{cdr} is set to the string value @code{"hello"}.
63 Once the end of file is encountered, the lexical analyzer must always
64 return the symbol @code{'*eoi*} each time it is invoked.
66 The error procedure must be a function that accepts at least two
69 @section The grammar format
71 The grammar is specified by first giving the list of terminals and the
72 list of non-terminal definitions. Each non-terminal definition
73 is a list where the first element is the non-terminal and the other
74 elements are the right-hand sides (lists of grammar symbols). In
75 addition to this, each rhs can be followed by a semantic action.
77 For example, consider the following (yacc) grammar for a very simple
91 The same grammar, written for the scheme parser generator, would look
92 like this (with semantic actions)
99 (e (e + t) -> (+ $1 $3)
102 (t (t * f) -> (* $1 $3)
107 In semantic actions, the symbol @code{$n} refers to the synthesized
108 attribute value of the nth symbol in the production. The value
109 associated with the non-terminal on the left is the result of
110 evaluating the semantic action (it defaults to @code{#f}).
112 The above grammar implicitly handles operator precedences. It is also
113 possible to explicitly assign precedences and associativity to
114 terminal symbols and productions a la Yacc. Here is a modified
115 (and augmented) version of the grammar:
124 (e (e + e) -> (+ $1 $3)
128 (- e (prec: uminus)) -> (- $2)
131 The @code{left:} directive is used to specify a set of left-associative
132 operators of the same precedence level, the @code{right:} directive for
133 right-associative operators, and @code{nonassoc:} for operators that
134 are not associative. Note the use of the (apparently) useless
135 terminal @code{uminus}. It is only defined in order to assign to the
136 penultimate rule a precedence level higher than that of @code{*} and
137 @code{/}. The @code{prec:} directive can only appear as the last element of a
138 rule. Finally, note that precedence levels are incremented from
139 left to right, i.e. the precedence level of @code{+} and @code{-} is less
140 than the precedence level of @code{*} and @code{/} since the formers appear
141 first in the list of terminal symbols (token definitions).
143 @section A final note on conflict resolution
145 Conflicts in the grammar are handled in a conventional way.
146 In the absence of precedence directives,
147 Shift/Reduce conflicts are resolved by shifting, and Reduce/Reduce
148 conflicts are resolved by choosing the rule listed first in the
151 You can print the states of the generated parser by evaluating
152 @code{(print-states)}. The format of the output is similar to the one
153 produced by bison when given the -v command-line option.
157 ;;; ---------- SYSTEM DEPENDENT SECTION -----------------
158 ;; put in a module by Richard Todd
159 (define-module (language ecmascript parse-lalr)
160 #:export (lalr-parser
163 ;; this code is by Thien-Thi Nguyen, found in a google search
165 (defmacro def-macro (form . body)
166 `(defmacro ,(car form) ,(cdr form) ,@body))
167 (def-macro (BITS-PER-WORD) 28)
168 (def-macro (lalr-error msg obj) `(throw 'lalr-error ,msg ,obj))
169 (def-macro (logical-or x . y) `(logior ,x ,@y)))
171 ;;; ---------- END OF SYSTEM DEPENDENT SECTION ------------
173 ;; - Macros pour la gestion des vecteurs de bits
175 (def-macro (set-bit v b)
176 `(let ((x (quotient ,b (BITS-PER-WORD)))
177 (y (expt 2 (remainder ,b (BITS-PER-WORD)))))
178 (vector-set! ,v x (logical-or (vector-ref ,v x) y))))
180 (def-macro (bit-union v1 v2 n)
183 (vector-set! ,v1 i (logical-or (vector-ref ,v1 i)
184 (vector-ref ,v2 i)))))
186 ;; - Macro pour les structures de donnees
188 (def-macro (new-core) `(make-vector 4 0))
189 (def-macro (set-core-number! c n) `(vector-set! ,c 0 ,n))
190 (def-macro (set-core-acc-sym! c s) `(vector-set! ,c 1 ,s))
191 (def-macro (set-core-nitems! c n) `(vector-set! ,c 2 ,n))
192 (def-macro (set-core-items! c i) `(vector-set! ,c 3 ,i))
193 (def-macro (core-number c) `(vector-ref ,c 0))
194 (def-macro (core-acc-sym c) `(vector-ref ,c 1))
195 (def-macro (core-nitems c) `(vector-ref ,c 2))
196 (def-macro (core-items c) `(vector-ref ,c 3))
198 (def-macro (new-shift) `(make-vector 3 0))
199 (def-macro (set-shift-number! c x) `(vector-set! ,c 0 ,x))
200 (def-macro (set-shift-nshifts! c x) `(vector-set! ,c 1 ,x))
201 (def-macro (set-shift-shifts! c x) `(vector-set! ,c 2 ,x))
202 (def-macro (shift-number s) `(vector-ref ,s 0))
203 (def-macro (shift-nshifts s) `(vector-ref ,s 1))
204 (def-macro (shift-shifts s) `(vector-ref ,s 2))
206 (def-macro (new-red) `(make-vector 3 0))
207 (def-macro (set-red-number! c x) `(vector-set! ,c 0 ,x))
208 (def-macro (set-red-nreds! c x) `(vector-set! ,c 1 ,x))
209 (def-macro (set-red-rules! c x) `(vector-set! ,c 2 ,x))
210 (def-macro (red-number c) `(vector-ref ,c 0))
211 (def-macro (red-nreds c) `(vector-ref ,c 1))
212 (def-macro (red-rules c) `(vector-ref ,c 2))
216 (def-macro (new-set nelem)
217 `(make-vector ,nelem 0))
220 (def-macro (vector-map f v)
221 `(let ((vm-n (- (vector-length ,v) 1)))
222 (let loop ((vm-low 0) (vm-high vm-n))
223 (if (= vm-low vm-high)
224 (vector-set! ,v vm-low (,f (vector-ref ,v vm-low) vm-low))
225 (let ((vm-middle (quotient (+ vm-low vm-high) 2)))
226 (loop vm-low vm-middle)
227 (loop (+ vm-middle 1) vm-high))))))
231 (define STATE-TABLE-SIZE 1009)
242 (define kernel-base #f)
243 (define kernel-end #f)
244 (define shift-symbol #f)
245 (define shift-set #f)
247 (define state-table #f)
248 (define acces-symbol #f)
249 (define reduction-table #f)
250 (define shift-table #f)
251 (define consistent #f)
252 (define lookaheads #f)
257 (define from-state #f)
261 (define action-table #f)
270 (define first-state #f)
271 (define last-state #f)
272 (define final-state #f)
273 (define first-shift #f)
274 (define last-shift #f)
275 (define first-reduction #f)
276 (define last-reduction #f)
280 (define token-set-size #f)
282 (define (gen-tables! tokens gram)
287 (lambda (terms terms/prec vars gram gram/actions)
288 (set! the-terminals/prec (list->vector terms/prec))
289 (set! the-terminals (list->vector terms))
290 (set! the-nonterminals (list->vector vars))
291 (set! nterms (length terms))
292 (set! nvars (length vars))
293 (set! nsyms (+ nterms nvars))
294 (let ((no-of-rules (length gram/actions))
295 (no-of-items (let loop ((l gram/actions) (count 0))
298 (loop (cdr l) (+ count (length (caar l))))))))
299 (pack-grammar no-of-rules no-of-items gram)
305 (compact-action-table terms)
309 (define (initialize-all)
317 (set! kernel-base #f)
319 (set! shift-symbol #f)
322 (set! state-table (make-vector STATE-TABLE-SIZE '()))
323 (set! acces-symbol #f)
324 (set! reduction-table #f)
325 (set! shift-table #f)
336 (set! action-table #f)
338 (set! first-state #f)
340 (set! final-state #f)
341 (set! first-shift #f)
343 (set! first-reduction #f)
344 (set! last-reduction #f)
348 (set! token-set-size #f)
349 (set! rule-precedences '()))
352 (define (pack-grammar no-of-rules no-of-items gram)
353 (set! nrules (+ no-of-rules 1))
354 (set! nitems no-of-items)
355 (set! rlhs (make-vector nrules #f))
356 (set! rrhs (make-vector nrules #f))
357 (set! ritem (make-vector (+ 1 nitems) #f))
359 (let loop ((p gram) (item-no 0) (rule-no 1))
362 (let loop2 ((prods (cdar p)) (it-no2 item-no) (rl-no2 rule-no))
364 (loop (cdr p) it-no2 rl-no2)
366 (vector-set! rlhs rl-no2 nt)
367 (vector-set! rrhs rl-no2 it-no2)
368 (let loop3 ((rhs (car prods)) (it-no3 it-no2))
371 (vector-set! ritem it-no3 (- rl-no2))
372 (loop2 (cdr prods) (+ it-no3 1) (+ rl-no2 1)))
374 (vector-set! ritem it-no3 (car rhs))
375 (loop3 (cdr rhs) (+ it-no3 1))))))))))))
378 ;; Fonction set-derives
379 ;; --------------------
380 (define (set-derives)
381 (define delts (make-vector (+ nrules 1) 0))
382 (define dset (make-vector nvars -1))
384 (let loop ((i 1) (j 0)) ; i = 0
386 (let ((lhs (vector-ref rlhs i)))
389 (vector-set! delts j (cons i (vector-ref dset lhs)))
390 (vector-set! dset lhs j)
391 (loop (+ i 1) (+ j 1)))
394 (set! derives (make-vector nvars 0))
398 (let ((q (let loop2 ((j (vector-ref dset i)) (s '()))
401 (let ((x (vector-ref delts j)))
402 (loop2 (cdr x) (cons (car x) s)))))))
403 (vector-set! derives i q)
408 (define (set-nullable)
409 (set! nullable (make-vector nvars #f))
410 (let ((squeue (make-vector nvars #f))
411 (rcount (make-vector (+ nrules 1) 0))
412 (rsets (make-vector nvars #f))
413 (relts (make-vector (+ nitems nvars 1) #f)))
414 (let loop ((r 0) (s2 0) (p 0))
415 (let ((*r (vector-ref ritem r)))
418 (let ((symbol (vector-ref rlhs (- *r))))
419 (if (and (>= symbol 0)
420 (not (vector-ref nullable symbol)))
422 (vector-set! nullable symbol #t)
423 (vector-set! squeue s2 symbol)
424 (loop (+ r 1) (+ s2 1) p))))
425 (let loop2 ((r1 r) (any-tokens #f))
426 (let* ((symbol (vector-ref ritem r1)))
428 (loop2 (+ r1 1) (or any-tokens (>= symbol nvars)))
430 (let ((ruleno (- symbol)))
431 (let loop3 ((r2 r) (p2 p))
432 (let ((symbol (vector-ref ritem r2)))
435 (vector-set! rcount ruleno
436 (+ (vector-ref rcount ruleno) 1))
437 (vector-set! relts p2
438 (cons (vector-ref rsets symbol)
440 (vector-set! rsets symbol p2)
441 (loop3 (+ r2 1) (+ p2 1)))
442 (loop (+ r2 1) s2 p2)))))
443 (loop (+ r1 1) s2 p))))))
444 (let loop ((s1 0) (s3 s2))
446 (let loop2 ((p (vector-ref rsets (vector-ref squeue s1))) (s4 s3))
448 (let* ((x (vector-ref relts p))
450 (y (- (vector-ref rcount ruleno) 1)))
451 (vector-set! rcount ruleno y)
453 (let ((symbol (vector-ref rlhs ruleno)))
454 (if (and (>= symbol 0)
455 (not (vector-ref nullable symbol)))
457 (vector-set! nullable symbol #t)
458 (vector-set! squeue s4 symbol)
459 (loop2 (car x) (+ s4 1)))
461 (loop2 (car x) s4))))
462 (loop (+ s1 1) s4)))))))))
466 ; Fonction set-firsts qui calcule un tableau de taille
467 ; nvars et qui donne, pour chaque non-terminal X, une liste des
468 ; non-terminaux pouvant apparaitre au debut d'une derivation a
472 (set! firsts (make-vector nvars '()))
477 (let loop2 ((sp (vector-ref derives i)))
480 (let ((sym (vector-ref ritem (vector-ref rrhs (car sp)))))
482 (vector-set! firsts i (sinsert sym (vector-ref firsts i))))
483 (loop2 (cdr sp)))))))
485 ;; -- reflexive and transitive closure
486 (let loop ((continue #t))
488 (let loop2 ((i 0) (cont #f))
491 (let* ((x (vector-ref firsts i))
492 (y (let loop3 ((l x) (z x))
496 (sunion (vector-ref firsts (car l)) z))))))
500 (vector-set! firsts i y)
501 (loop2 (+ i 1) #t))))))))
506 (vector-set! firsts i (sinsert i (vector-ref firsts i)))
512 ; Fonction set-fderives qui calcule un tableau de taille
513 ; nvars et qui donne, pour chaque non-terminal, une liste des regles pouvant
514 ; etre derivees a partir de ce non-terminal. (se sert de firsts)
516 (define (set-fderives)
517 (set! fderives (make-vector nvars #f))
523 (let ((x (let loop2 ((l (vector-ref firsts i)) (fd '()))
527 (sunion (vector-ref derives (car l)) fd))))))
528 (vector-set! fderives i x)
532 ; Fonction calculant la fermeture d'un ensemble d'items LR0
533 ; ou core est une liste d'items
535 (define (closure core)
537 (define ruleset (make-vector nrules #f))
539 (let loop ((csp core))
540 (if (not (null? csp))
541 (let ((sym (vector-ref ritem (car csp))))
543 (let loop2 ((dsp (vector-ref fderives sym)))
544 (if (not (null? dsp))
546 (vector-set! ruleset (car dsp) #t)
547 (loop2 (cdr dsp))))))
550 (let loop ((ruleno 1) (csp core) (itemsetv '())) ; ruleno = 0
551 (if (< ruleno nrules)
552 (if (vector-ref ruleset ruleno)
553 (let ((itemno (vector-ref rrhs ruleno)))
554 (let loop2 ((c csp) (itemsetv2 itemsetv))
557 (loop2 (cdr c) (cons (car c) itemsetv2))
558 (loop (+ ruleno 1) c (cons itemno itemsetv2)))))
559 (loop (+ ruleno 1) csp itemsetv))
560 (let loop2 ((c csp) (itemsetv2 itemsetv))
562 (loop2 (cdr c) (cons (car c) itemsetv2))
563 (reverse itemsetv2))))))
567 (define (allocate-item-sets)
568 (set! kernel-base (make-vector nsyms 0))
569 (set! kernel-end (make-vector nsyms #f)))
572 (define (allocate-storage)
574 (set! red-set (make-vector (+ nrules 1) 0)))
579 (define (initialize-states)
580 (let ((p (new-core)))
581 (set-core-number! p 0)
582 (set-core-acc-sym! p #f)
583 (set-core-nitems! p 1)
584 (set-core-items! p '(0))
586 (set! first-state (list p))
587 (set! last-state first-state)
592 (define (generate-states)
596 (let loop ((this-state first-state))
597 (if (pair? this-state)
598 (let* ((x (car this-state))
599 (is (closure (core-items x))))
600 (save-reductions x is)
605 (loop (cdr this-state))))))
608 ;; Fonction calculant les symboles sur lesquels il faut "shifter"
609 ;; et regroupe les items en fonction de ces symboles
611 (define (new-itemsets itemset)
613 (set! shift-symbol '())
617 (vector-set! kernel-end i '())
620 (let loop ((isp itemset))
623 (sym (vector-ref ritem i)))
626 (set! shift-symbol (sinsert sym shift-symbol))
627 (let ((x (vector-ref kernel-end sym)))
630 (vector-set! kernel-base sym (cons (+ i 1) x))
631 (vector-set! kernel-end sym (vector-ref kernel-base sym)))
633 (set-cdr! x (list (+ i 1)))
634 (vector-set! kernel-end sym (cdr x)))))))
637 (set! nshifts (length shift-symbol)))
641 (define (get-state sym)
642 (let* ((isp (vector-ref kernel-base sym))
644 (key (let loop ((isp1 isp) (k 0))
646 (modulo k STATE-TABLE-SIZE)
647 (loop (cdr isp1) (+ k (car isp1))))))
648 (sp (vector-ref state-table key)))
650 (let ((x (new-state sym)))
651 (vector-set! state-table key (list x))
654 (if (and (= n (core-nitems (car sp1)))
655 (let loop2 ((i1 isp) (t (core-items (car sp1))))
659 (loop2 (cdr i1) (cdr t))
661 (core-number (car sp1))
662 (if (null? (cdr sp1))
663 (let ((x (new-state sym)))
664 (set-cdr! sp1 (list x))
666 (loop (cdr sp1))))))))
669 (define (new-state sym)
670 (let* ((isp (vector-ref kernel-base sym))
673 (set-core-number! p nstates)
674 (set-core-acc-sym! p sym)
675 (if (= sym nvars) (set! final-state nstates))
676 (set-core-nitems! p n)
677 (set-core-items! p isp)
678 (set-cdr! last-state (list p))
679 (set! last-state (cdr last-state))
680 (set! nstates (+ nstates 1))
686 (define (append-states)
688 (let loop ((l (reverse shift-symbol)))
691 (cons (get-state (car l)) (loop (cdr l)))))))
695 (define (save-shifts core)
696 (let ((p (new-shift)))
697 (set-shift-number! p (core-number core))
698 (set-shift-nshifts! p nshifts)
699 (set-shift-shifts! p shift-set)
702 (set-cdr! last-shift (list p))
703 (set! last-shift (cdr last-shift)))
705 (set! first-shift (list p))
706 (set! last-shift first-shift)))))
708 (define (save-reductions core itemset)
709 (let ((rs (let loop ((l itemset))
712 (let ((item (vector-ref ritem (car l))))
714 (cons (- item) (loop (cdr l)))
718 (set-red-number! p (core-number core))
719 (set-red-nreds! p (length rs))
720 (set-red-rules! p rs)
723 (set-cdr! last-reduction (list p))
724 (set! last-reduction (cdr last-reduction)))
726 (set! first-reduction (list p))
727 (set! last-reduction first-reduction)))))))
733 (set! token-set-size (+ 1 (quotient nterms (BITS-PER-WORD))))
734 (set-accessing-symbol)
736 (set-reduction-table)
743 (compute-lookaheads))
745 (define (set-accessing-symbol)
746 (set! acces-symbol (make-vector nstates #f))
747 (let loop ((l first-state))
750 (vector-set! acces-symbol (core-number x) (core-acc-sym x))
753 (define (set-shift-table)
754 (set! shift-table (make-vector nstates #f))
755 (let loop ((l first-shift))
758 (vector-set! shift-table (shift-number x) x)
761 (define (set-reduction-table)
762 (set! reduction-table (make-vector nstates #f))
763 (let loop ((l first-reduction))
766 (vector-set! reduction-table (red-number x) x)
769 (define (set-max-rhs)
770 (let loop ((p 0) (curmax 0) (length 0))
771 (let ((x (vector-ref ritem p)))
774 (loop (+ p 1) curmax (+ length 1))
775 (loop (+ p 1) (max curmax length) 0))
776 (set! maxrhs curmax)))))
778 (define (initialize-LA)
784 (set! consistent (make-vector nstates #f))
785 (set! lookaheads (make-vector (+ nstates 1) #f))
787 (let loop ((count 0) (i 0))
790 (vector-set! lookaheads i count)
791 (let ((rp (vector-ref reduction-table i))
792 (sp (vector-ref shift-table i)))
794 (or (> (red-nreds rp) 1)
797 (< (vector-ref acces-symbol
798 (last (shift-shifts sp)))
800 (loop (+ count (red-nreds rp)) (+ i 1))
802 (vector-set! consistent i #t)
803 (loop count (+ i 1))))))
806 (vector-set! lookaheads nstates count)
807 (let ((c (max count 1)))
808 (set! LA (make-vector c #f))
809 (do ((j 0 (+ j 1))) ((= j c)) (vector-set! LA j (new-set token-set-size)))
810 (set! LAruleno (make-vector c -1))
811 (set! lookback (make-vector c #f)))
812 (let loop ((i 0) (np 0))
814 (if (vector-ref consistent i)
816 (let ((rp (vector-ref reduction-table i)))
818 (let loop2 ((j (red-rules rp)) (np2 np))
822 (vector-set! LAruleno np2 (car j))
823 (loop2 (cdr j) (+ np2 1)))))
824 (loop (+ i 1) np))))))))))
827 (define (set-goto-map)
828 (set! goto-map (make-vector (+ nvars 1) 0))
829 (let ((temp-map (make-vector (+ nvars 1) 0)))
830 (let loop ((ng 0) (sp first-shift))
832 (let loop2 ((i (reverse (shift-shifts (car sp)))) (ng2 ng))
834 (let ((symbol (vector-ref acces-symbol (car i))))
837 (vector-set! goto-map symbol
838 (+ 1 (vector-ref goto-map symbol)))
839 (loop2 (cdr i) (+ ng2 1)))
840 (loop2 (cdr i) ng2)))
841 (loop ng2 (cdr sp))))
843 (let loop ((k 0) (i 0))
846 (vector-set! temp-map i k)
847 (loop (+ k (vector-ref goto-map i)) (+ i 1)))
852 (vector-set! goto-map i (vector-ref temp-map i)))
855 (vector-set! goto-map nvars ngotos)
856 (vector-set! temp-map nvars ngotos)
857 (set! from-state (make-vector ngotos #f))
858 (set! to-state (make-vector ngotos #f))
860 (do ((sp first-shift (cdr sp)))
863 (state1 (shift-number x)))
864 (do ((i (shift-shifts x) (cdr i)))
866 (let* ((state2 (car i))
867 (symbol (vector-ref acces-symbol state2)))
869 (let ((k (vector-ref temp-map symbol)))
870 (vector-set! temp-map symbol (+ k 1))
871 (vector-set! from-state k state1)
872 (vector-set! to-state k state2))))))))))))))
875 (define (map-goto state symbol)
876 (let loop ((low (vector-ref goto-map symbol))
877 (high (- (vector-ref goto-map (+ symbol 1)) 1)))
880 (display (list "Error in map-goto" state symbol) (current-error-port))
881 (newline (current-error-port))
883 (let* ((middle (quotient (+ low high) 2))
884 (s (vector-ref from-state middle)))
889 (loop (+ middle 1) high))
891 (loop low (- middle 1))))))))
894 (define (initialize-F)
895 (set! F (make-vector ngotos #f))
896 (do ((i 0 (+ i 1))) ((= i ngotos)) (vector-set! F i (new-set token-set-size)))
898 (let ((reads (make-vector ngotos #f)))
900 (let loop ((i 0) (rowp 0))
902 (let* ((rowf (vector-ref F rowp))
903 (stateno (vector-ref to-state i))
904 (sp (vector-ref shift-table stateno)))
906 (let loop2 ((j (shift-shifts sp)) (edges '()))
908 (let ((symbol (vector-ref acces-symbol (car j))))
910 (if (vector-ref nullable symbol)
911 (loop2 (cdr j) (cons (map-goto stateno symbol)
913 (loop2 (cdr j) edges))
915 (set-bit rowf (- symbol nvars))
916 (loop2 (cdr j) edges))))
918 (vector-set! reads i (reverse edges))))))
919 (loop (+ i 1) (+ rowp 1)))))
922 (define (add-lookback-edge stateno ruleno gotono)
923 (let ((k (vector-ref lookaheads (+ stateno 1))))
924 (let loop ((found #f) (i (vector-ref lookaheads stateno)))
925 (if (and (not found) (< i k))
926 (if (= (vector-ref LAruleno i) ruleno)
928 (loop found (+ i 1)))
931 (begin (display "Error in add-lookback-edge : " (current-error-port))
932 (display (list stateno ruleno gotono) (current-error-port))
933 (newline (current-error-port)))
934 (vector-set! lookback i
935 (cons gotono (vector-ref lookback i))))))))
938 (define (transpose r-arg n)
939 (let ((new-end (make-vector n #f))
940 (new-R (make-vector n #f)))
943 (let ((x (list 'bidon)))
944 (vector-set! new-R i x)
945 (vector-set! new-end i x)))
948 (let ((sp (vector-ref r-arg i)))
953 (y (vector-ref new-end x)))
954 (set-cdr! y (cons i (cdr y)))
955 (vector-set! new-end x (cdr y))
956 (loop (cdr sp2))))))))
959 (vector-set! new-R i (cdr (vector-ref new-R i))))
965 (define (build-relations)
967 (define (get-state stateno symbol)
968 (let loop ((j (shift-shifts (vector-ref shift-table stateno)))
973 (if (= (vector-ref acces-symbol st2) symbol)
975 (loop (cdr j) st2))))))
977 (set! includes (make-vector ngotos #f))
980 (let ((state1 (vector-ref from-state i))
981 (symbol1 (vector-ref acces-symbol (vector-ref to-state i))))
982 (let loop ((rulep (vector-ref derives symbol1))
985 (let ((*rulep (car rulep)))
986 (let loop2 ((rp (vector-ref rrhs *rulep))
988 (states (list state1)))
989 (let ((*rp (vector-ref ritem rp)))
991 (let ((st (get-state stateno *rp)))
992 (loop2 (+ rp 1) st (cons st states)))
995 (if (not (vector-ref consistent stateno))
996 (add-lookback-edge stateno *rulep i))
998 (let loop2 ((done #f)
1003 (let ((*rp (vector-ref ritem rp2)))
1004 (if (< -1 *rp nvars)
1005 (loop2 (not (vector-ref nullable *rp))
1008 (cons (map-goto (car stp) *rp) edgp))
1009 (loop2 #t stp rp2 edgp)))
1011 (loop (cdr rulep) edgp))))))))
1012 (vector-set! includes i edges)))))
1013 (set! includes (transpose includes ngotos)))
1017 (define (compute-lookaheads)
1018 (let ((n (vector-ref lookaheads nstates)))
1021 (let loop2 ((sp (vector-ref lookback i)))
1023 (let ((LA-i (vector-ref LA i))
1024 (F-j (vector-ref F (car sp))))
1025 (bit-union LA-i F-j token-set-size)
1027 (loop (+ i 1))))))))
1031 (define (digraph relation)
1032 (define infinity (+ ngotos 2))
1033 (define INDEX (make-vector (+ ngotos 1) 0))
1034 (define VERTICES (make-vector (+ ngotos 1) 0))
1038 (define (traverse i)
1039 (set! top (+ 1 top))
1040 (vector-set! VERTICES top i)
1042 (vector-set! INDEX i height)
1043 (let ((rp (vector-ref R i)))
1045 (let loop ((rp2 rp))
1047 (let ((j (car rp2)))
1048 (if (= 0 (vector-ref INDEX j))
1050 (if (> (vector-ref INDEX i)
1051 (vector-ref INDEX j))
1052 (vector-set! INDEX i (vector-ref INDEX j)))
1053 (let ((F-i (vector-ref F i))
1054 (F-j (vector-ref F j)))
1055 (bit-union F-i F-j token-set-size))
1056 (loop (cdr rp2))))))
1057 (if (= (vector-ref INDEX i) height)
1059 (let ((j (vector-ref VERTICES top)))
1060 (set! top (- top 1))
1061 (vector-set! INDEX j infinity)
1064 (bit-union (vector-ref F i)
1072 (if (and (= 0 (vector-ref INDEX i))
1073 (pair? (vector-ref R i)))
1078 ;; ---------------------------------------------------------------------- ;;
1079 ;; operator precedence management ;;
1080 ;; ---------------------------------------------------------------------- ;;
1082 ; a vector of precedence descriptors where each element
1083 ; is of the form (terminal type precedence)
1084 (define the-terminals/prec #f) ; terminal symbols with precedence
1085 ; the precedence is an integer >= 0
1086 (define (get-symbol-precedence sym)
1087 (caddr (vector-ref the-terminals/prec sym)))
1088 ; the operator type is either 'none, 'left, 'right, or 'nonassoc
1089 (define (get-symbol-assoc sym)
1090 (cadr (vector-ref the-terminals/prec sym)))
1092 (define rule-precedences '())
1093 (define (add-rule-precedence! rule sym)
1094 (set! rule-precedences
1095 (cons (cons rule sym) rule-precedences)))
1097 (define (get-rule-precedence ruleno)
1099 ((assq ruleno rule-precedences)
1101 (get-symbol-precedence (cdr p))))
1103 ;; process the rule symbols from left to right
1104 (let loop ((i (vector-ref rrhs ruleno))
1106 (let ((item (vector-ref ritem i)))
1112 ;; it's a terminal symbol
1113 (loop i1 (get-symbol-precedence (- item nvars)))
1114 (loop i1 prec)))))))))
1116 ;; ---------------------------------------------------------------------- ;;
1117 ;; Build the various tables ;;
1118 ;; ---------------------------------------------------------------------- ;;
1119 (define (build-tables)
1121 (define (resolve-conflict sym rule)
1122 (let ((sym-prec (get-symbol-precedence sym))
1123 (sym-assoc (get-symbol-assoc sym))
1124 (rule-prec (get-rule-precedence rule)))
1126 ((> sym-prec rule-prec) 'shift)
1127 ((< sym-prec rule-prec) 'reduce)
1128 ((eq? sym-assoc 'left) 'reduce)
1129 ((eq? sym-assoc 'right) 'shift)
1132 ;; --- Add an action to the action table ------------------------------ ;;
1133 (define (add-action St Sym Act)
1134 (let* ((x (vector-ref action-table St))
1137 (if (not (= Act (cdr y)))
1138 ;; -- there is a conflict
1140 (if (and (<= (cdr y) 0)
1142 ;; --- reduce/reduce conflict ----------------------- ;;
1144 (display "%% Reduce/Reduce conflict " (current-error-port))
1145 (display "(reduce " (current-error-port))
1146 (display (- Act) (current-error-port))
1147 (display ", reduce " (current-error-port))
1148 (display (- (cdr y)) (current-error-port))
1149 (display ") on " (current-error-port))
1150 (print-symbol (+ Sym nvars) (current-error-port))
1151 (display " in state " (current-error-port))
1152 (display St (current-error-port))
1153 (newline (current-error-port))
1154 (set-cdr! y (max (cdr y) Act)))
1155 ;; --- shift/reduce conflict ------------------------ ;;
1156 ;; can we resolve the conflict using precedences?
1157 (case (resolve-conflict Sym (- (cdr y)))
1163 #f) ; well, nothing to do...
1164 ;; -- signal a conflict!
1166 (display "%% Shift/Reduce conflict " (current-error-port))
1167 (display "(shift " (current-error-port))
1168 (display Act (current-error-port))
1169 (display ", reduce " (current-error-port))
1170 (display (- (cdr y)) (current-error-port))
1171 (display ") on " (current-error-port))
1172 (print-symbol (+ Sym nvars) (current-error-port))
1173 (display " in state " (current-error-port))
1174 (display St (current-error-port))
1175 (newline (current-error-port))
1176 (set-cdr! y Act))))))
1178 (vector-set! action-table St (cons (cons Sym Act) x)))))
1180 (set! action-table (make-vector nstates '()))
1182 (do ((i 0 (+ i 1))) ; i = state
1184 (let ((red (vector-ref reduction-table i)))
1185 (if (and red (>= (red-nreds red) 1))
1186 (if (and (= (red-nreds red) 1) (vector-ref consistent i))
1187 (add-action i 'default (- (car (red-rules red))))
1188 (let ((k (vector-ref lookaheads (+ i 1))))
1189 (let loop ((j (vector-ref lookaheads i)))
1191 (let ((rule (- (vector-ref LAruleno j)))
1192 (lav (vector-ref LA j)))
1193 (let loop2 ((token 0) (x (vector-ref lav 0)) (y 1) (z 0))
1194 (if (< token nterms)
1196 (let ((in-la-set? (modulo x 2)))
1197 (if (= in-la-set? 1)
1198 (add-action i token rule)))
1199 (if (= y (BITS-PER-WORD))
1201 (vector-ref lav (+ z 1))
1204 (loop2 (+ token 1) (quotient x 2) (+ y 1) z)))))
1205 (loop (+ j 1)))))))))
1207 (let ((shiftp (vector-ref shift-table i)))
1209 (let loop ((k (shift-shifts shiftp)))
1211 (let* ((state (car k))
1212 (symbol (vector-ref acces-symbol state)))
1213 (if (>= symbol nvars)
1214 (add-action i (- symbol nvars) state))
1215 (loop (cdr k))))))))
1217 (add-action final-state 0 'accept))
1219 (define (compact-action-table terms)
1220 (define (most-common-action acts)
1222 (let loop ((l acts))
1225 (y (assv x accums)))
1226 (if (and (number? x) (< x 0))
1228 (set-cdr! y (+ 1 (cdr y)))
1229 (set! accums (cons `(,x . 1) accums))))
1232 (let loop ((l accums) (max 0) (sym #f))
1237 (loop (cdr l) (cdr x) (car x))
1238 (loop (cdr l) max sym)))))))
1240 (define (translate-terms acts)
1242 (cons (list-ref terms (car act))
1248 (let ((acts (vector-ref action-table i)))
1249 (if (vector? (vector-ref reduction-table i))
1250 (let ((act (most-common-action acts)))
1251 (vector-set! action-table i
1252 (cons `(*default* . ,(if act act 'error))
1254 (lalr-filter (lambda (x)
1255 (not (eq? (cdr x) act)))
1257 (vector-set! action-table i
1258 (cons `(*default* . *error*)
1259 (translate-terms acts)))))))
1265 (define (rewrite-grammar tokens grammar k)
1269 (define (check-terminal term terms)
1271 ((not (valid-terminal? term))
1272 (lalr-error "invalid terminal: " term))
1273 ((member term terms)
1274 (lalr-error "duplicate definition of terminal: " term))))
1276 (define (prec->type prec)
1277 (cdr (assq prec '((left: . left)
1279 (nonassoc: . nonassoc)))))
1282 ;; --- a few error conditions ---------------------------------------- ;;
1283 ((not (list? tokens))
1284 (lalr-error "Invalid token list: " tokens))
1285 ((not (pair? grammar))
1286 (lalr-error "Grammar definition must have a non-empty list of productions" '()))
1289 ;; --- check the terminals ---------------------------------------- ;;
1290 (let loop1 ((lst tokens)
1292 (rev-terms/prec '())
1295 (let ((term (car lst)))
1298 (if (and (memq (car term) '(left: right: nonassoc:))
1299 (not (null? (cdr term))))
1300 (let ((prec (+ prec-level 1))
1301 (optype (prec->type (car term))))
1302 (let loop-toks ((l (cdr term))
1303 (rev-terms rev-terms)
1304 (rev-terms/prec rev-terms/prec))
1306 (loop1 (cdr lst) rev-terms rev-terms/prec prec)
1307 (let ((term (car l)))
1308 (check-terminal term rev-terms)
1311 (cons term rev-terms)
1312 (cons (list term optype prec) rev-terms/prec))))))
1314 (lalr-error "invalid operator precedence specification: " term)))
1317 (check-terminal term rev-terms)
1319 (cons term rev-terms)
1320 (cons (list term 'none 0) rev-terms/prec)
1323 ;; --- check the grammar rules ------------------------------ ;;
1324 (let loop2 ((lst grammar) (rev-nonterm-defs '()))
1326 (let ((def (car lst)))
1327 (if (not (pair? def))
1328 (lalr-error "Nonterminal definition must be a non-empty list" '())
1329 (let ((nonterm (car def)))
1330 (cond ((not (valid-nonterminal? nonterm))
1331 (lalr-error "Invalid nonterminal:" nonterm))
1332 ((or (member nonterm rev-terms)
1333 (assoc nonterm rev-nonterm-defs))
1334 (lalr-error "Nonterminal previously defined:" nonterm))
1337 (cons def rev-nonterm-defs)))))))
1338 (let* ((terms (cons eoi (reverse rev-terms)))
1339 (terms/prec (cons '(eoi none 0) (reverse rev-terms/prec)))
1340 (nonterm-defs (reverse rev-nonterm-defs))
1341 (nonterms (cons '*start* (map car nonterm-defs))))
1342 (if (= (length nonterms) 1)
1343 (lalr-error "Grammar must contain at least one nonterminal" '())
1344 (let loop-defs ((defs (cons `(*start* (,(cadr nonterms) ,eoi) -> $1)
1349 (let* ((nonterm-def (car defs))
1350 (compiled-def (rewrite-nonterm-def
1354 (loop-defs (cdr defs)
1355 (+ ruleno (length compiled-def))
1356 (cons compiled-def comp-defs)))
1358 (let ((compiled-nonterm-defs (reverse comp-defs)))
1362 (map (lambda (x) (cons (caaar x) (map cdar x)))
1363 compiled-nonterm-defs)
1364 (apply append compiled-nonterm-defs))))))))))))))
1367 (define *arrow* '->)
1369 (define (rewrite-nonterm-def nonterm-def ruleno terms nonterms)
1371 (define No-NT (length nonterms))
1374 (let ((PosInNT (pos-in-list x nonterms)))
1377 (let ((PosInT (pos-in-list x terms)))
1380 (lalr-error "undefined symbol : " x))))))
1382 (define (process-prec-directive rhs ruleno)
1386 (let ((first (car l))
1389 ((or (member first terms) (member first nonterms))
1390 (cons first (loop rest)))
1392 (eq? (car first) 'prec:))
1394 (if (and (pair? (cdr first))
1395 (member (cadr first) terms))
1396 (if (null? (cddr first))
1398 (add-rule-precedence! ruleno (pos-in-list (cadr first) terms))
1400 (lalr-error "prec: directive should be at end of rule: " rhs))
1401 (lalr-error "Invalid prec: directive: " first)))
1403 (lalr-error "Invalid terminal or nonterminal: " first)))))))
1406 (if (not (pair? (cdr nonterm-def)))
1407 (lalr-error "At least one production needed for nonterminal" (car nonterm-def))
1408 (let ((name (symbol->string (car nonterm-def))))
1409 (let loop1 ((lst (cdr nonterm-def))
1411 (rev-productions-and-actions '()))
1412 (if (not (pair? lst))
1413 (reverse rev-productions-and-actions)
1414 (let* ((rhs (process-prec-directive (car lst) (+ ruleno i -1)))
1416 (prod (map encode (cons (car nonterm-def) rhs))))
1417 (for-each (lambda (x)
1418 (if (not (or (member x terms) (member x nonterms)))
1419 (lalr-error "Invalid terminal or nonterminal" x)))
1421 (if (and (pair? rest)
1422 (eq? (car rest) *arrow*)
1426 (cons (cons prod (cadr rest))
1427 rev-productions-and-actions))
1428 (let* ((rhs-length (length rhs))
1431 (cons (list 'quote (string->symbol
1435 (number->string i))))
1437 (if (> j rhs-length)
1439 (cons (string->symbol
1442 (number->string j)))
1443 (loop-j (+ j 1)))))))))
1446 (cons (cons prod action)
1447 rev-productions-and-actions))))))))))
1449 (define (valid-nonterminal? x)
1452 (define (valid-terminal? x)
1455 ;; ---------------------------------------------------------------------- ;;
1457 ;; ---------------------------------------------------------------------- ;;
1458 (define (pos-in-list x lst)
1459 (let loop ((lst lst) (i 0))
1460 (cond ((not (pair? lst)) #f)
1461 ((equal? (car lst) x) i)
1462 (else (loop (cdr lst) (+ i 1))))))
1464 (define (sunion lst1 lst2) ; union of sorted lists
1465 (let loop ((L1 lst1)
1467 (cond ((null? L1) L2)
1470 (let ((x (car L1)) (y (car L2)))
1473 (cons y (loop L1 (cdr L2))))
1475 (cons x (loop (cdr L1) L2)))
1480 (define (sinsert elem lst)
1481 (let loop ((l1 lst))
1488 (cons x (loop (cdr l1))))
1492 (define (lalr-filter p lst)
1496 (let ((x (car l)) (y (cdr l)))
1501 ;; ---------------------------------------------------------------------- ;;
1502 ;; Debugging tools ... ;;
1503 ;; ---------------------------------------------------------------------- ;;
1504 (define the-terminals #f) ; names of terminal symbols
1505 (define the-nonterminals #f) ; non-terminals
1507 (define (print-item item-no)
1508 (let loop ((i item-no))
1509 (let ((v (vector-ref ritem i)))
1513 (nt (vector-ref rlhs rlno)))
1514 (display (vector-ref the-nonterminals nt)) (display " --> ")
1515 (let loop ((i (vector-ref rrhs rlno)))
1516 (let ((v (vector-ref ritem i)))
1530 (define (print-symbol n . port)
1531 (display (if (>= n nvars)
1532 (vector-ref the-terminals (- n nvars))
1533 (vector-ref the-nonterminals n))
1535 (current-output-port)
1538 (define (print-states)
1539 "Print the states of a generated parser."
1540 (define (print-action act)
1543 (display " : Error"))
1545 (display " : Accept input"))
1547 (display " : reduce using rule ")
1550 (display " : shift and goto state ")
1555 (define (print-actions acts)
1556 (let loop ((l acts))
1559 (let ((sym (caar l))
1564 (display "default action"))
1567 (print-symbol (+ sym nvars))
1572 (if (not action-table)
1574 (display "No generated parser available!")
1578 (display "State table") (newline)
1579 (display "-----------") (newline) (newline)
1581 (let loop ((l first-state))
1584 (let* ((core (car l))
1585 (i (core-number core))
1586 (items (core-items core))
1587 (actions (vector-ref action-table i)))
1588 (display "state ") (display i) (newline)
1590 (for-each (lambda (x) (display " ") (print-item x))
1593 (print-actions actions)
1595 (loop (cdr l))))))))
1599 ;; ---------------------------------------------------------------------- ;;
1601 (define build-goto-table
1608 (let loop ((l (shift-shifts shifts)))
1611 (let* ((state (car l))
1612 (symbol (vector-ref acces-symbol state)))
1613 (if (< symbol nvars)
1614 (cons `(,symbol . ,state)
1618 (vector->list shift-table)))))
1621 (define build-reduction-table
1622 (lambda (gram/actions)
1627 (let ((act (cdr p)))
1628 `(lambda (___stack ___sp ___goto-table ___k)
1629 ,(let* ((nt (caar p)) (rhs (cdar p)) (n (length rhs)))
1631 (let loop ((i 1) (l rhs))
1633 (let ((rest (cdr l)))
1640 (vector-ref ___stack (- ___sp ,(- (* i 2) 1))))
1641 (loop (+ i 1) rest)))
1646 `(___push ___stack (- ___sp ,(* 2 n))
1647 ,nt ___goto-table ,(cdr p) ___k)))))))
1652 ;; @section (api "API")
1654 (define-macro (lalr-parser tokens . rules)
1655 (let* ((gram/actions (gen-tables! tokens rules))
1657 `(letrec ((___max-stack-size 500)
1659 (___atable ',action-table)
1660 (___gtable ,(build-goto-table))
1661 (___grow-stack (lambda (stack)
1662 ;; make a new stack twice as big as the original
1663 (let ((new-stack (make-vector (* 2 (vector-length stack)) #f)))
1664 ;; then copy the elements...
1665 (let loop ((i (- (vector-length stack) 1)))
1669 (vector-set! new-stack i (vector-ref stack i))
1670 (loop (- i 1))))))))
1672 (___push (lambda (stack sp new-cat goto-table lval k)
1673 (let* ((state (vector-ref stack sp))
1674 (new-state (cdr (assq new-cat (vector-ref goto-table state))))
1676 (stack (if (< new-sp (vector-length stack))
1678 (___grow-stack stack))))
1679 (vector-set! stack new-sp new-state)
1680 (vector-set! stack (- new-sp 1) lval)
1683 (___action (lambda (x l)
1684 (let ((y (assq x l)))
1685 (if y (cdr y) (cdar l)))))
1687 (___rtable ,(build-reduction-table gram/actions)))
1689 (lambda (lexerp errorp)
1691 (let ((stack (make-vector ___max-stack-size 0)))
1692 (let loop ((stack stack) (sp 0) (input (lexerp)))
1693 (let* ((state (vector-ref stack sp))
1694 (i (if (pair? input) (car input) input))
1695 (attr (if (pair? input) (cdr input) #f))
1696 (act (___action i (vector-ref ___atable state))))
1698 (if (not (symbol? i))
1699 (errorp "PARSE ERROR: invalid token: " input))
1703 ;; Input succesfully parsed
1705 (vector-ref stack 1))
1707 ;; Syntax error in input
1710 (errorp "PARSE ERROR : unexpected end of input ")
1711 (errorp "PARSE ERROR : unexpected token : " input)))
1713 ;; Shift current token on top of the stack
1715 (let ((stack (if (< (+ sp 2) (vector-length stack))
1717 (___grow-stack stack))))
1718 (vector-set! stack (+ sp 1) attr)
1719 (vector-set! stack (+ sp 2) act)
1720 (loop stack (+ sp 2) (lexerp))))
1722 ;; Reduce by rule (- act)
1724 ((vector-ref ___rtable (- act))
1727 (loop stack sp input))))))))))))
1730 ;; arch-tag: 4FE771DE-F56D-11D8-8B77-000A95B4C7DC