Merge commit '81d2c84674f03f9028f26474ab19d3d3f353881a'
[bpt/guile.git] / module / system / base / lalr.upstream.scm
1 ;;;
2 ;;;; An Efficient and Portable LALR(1) Parser Generator for Scheme
3 ;;;
4 ;; Copyright 1993, 2010 Dominique Boucher
5 ;;
6 ;; This program is free software: you can redistribute it and/or
7 ;; modify it under the terms of the GNU Lesser General Public License
8 ;; as published by the Free Software Foundation, either version 3 of
9 ;; the License, or (at your option) any later version.
10 ;;
11 ;; This program is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU Lesser General Public License for more details.
15 ;;
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19
20 (define *lalr-scm-version* "2.4.1")
21
22
23 (cond-expand
24
25 ;; -- Gambit-C
26 (gambit
27
28 (define-macro (def-macro form . body)
29 `(define-macro ,form (let () ,@body)))
30
31 (def-macro (BITS-PER-WORD) 28)
32 (def-macro (logical-or x . y) `(bitwise-ior ,x ,@y))
33 (def-macro (lalr-error msg obj) `(error ,msg ,obj))
34
35 (define pprint pretty-print)
36 (define lalr-keyword? keyword?))
37
38 ;; --
39 (bigloo
40 (define-macro (def-macro form . body)
41 `(define-macro ,form (let () ,@body)))
42
43 (define pprint (lambda (obj) (write obj) (newline)))
44 (define lalr-keyword? keyword?)
45 (def-macro (BITS-PER-WORD) 29)
46 (def-macro (logical-or x . y) `(bit-or ,x ,@y))
47 (def-macro (lalr-error msg obj) `(error "lalr-parser" ,msg ,obj)))
48
49 ;; -- Chicken
50 (chicken
51
52 (define-macro (def-macro form . body)
53 `(define-macro ,form (let () ,@body)))
54
55 (define pprint pretty-print)
56 (define lalr-keyword? symbol?)
57 (def-macro (BITS-PER-WORD) 30)
58 (def-macro (logical-or x . y) `(bitwise-ior ,x ,@y))
59 (def-macro (lalr-error msg obj) `(error ,msg ,obj)))
60
61 ;; -- STKlos
62 (stklos
63 (require "pp")
64
65 (define (pprint form) (pp form :port (current-output-port)))
66
67 (define lalr-keyword? keyword?)
68 (define-macro (BITS-PER-WORD) 30)
69 (define-macro (logical-or x . y) `(bit-or ,x ,@y))
70 (define-macro (lalr-error msg obj) `(error 'lalr-parser ,msg ,obj)))
71
72 ;; -- Guile
73 (guile
74 (use-modules (ice-9 pretty-print))
75 (use-modules (srfi srfi-9))
76
77 (define pprint pretty-print)
78 (define lalr-keyword? symbol?)
79 (define-macro (BITS-PER-WORD) 30)
80 (define-macro (logical-or x . y) `(logior ,x ,@y))
81 (define-macro (lalr-error msg obj) `(error ,msg ,obj)))
82
83 ;; -- Kawa
84 (kawa
85 (require 'pretty-print)
86 (define (BITS-PER-WORD) 30)
87 (define logical-or logior)
88 (define (lalr-keyword? obj) (keyword? obj))
89 (define (pprint obj) (pretty-print obj))
90 (define (lalr-error msg obj) (error msg obj)))
91
92 ;; -- SISC
93 (sisc
94 (import logicops)
95 (import record)
96
97 (define pprint pretty-print)
98 (define lalr-keyword? symbol?)
99 (define-macro BITS-PER-WORD (lambda () 32))
100 (define-macro logical-or (lambda (x . y) `(logor ,x ,@y)))
101 (define-macro (lalr-error msg obj) `(error "~a ~S:" ,msg ,obj)))
102
103
104 (else
105 (error "Unsupported Scheme system")))
106
107
108 (define-record-type lexical-token
109 (make-lexical-token category source value)
110 lexical-token?
111 (category lexical-token-category)
112 (source lexical-token-source)
113 (value lexical-token-value))
114
115
116 (define-record-type source-location
117 (make-source-location input line column offset length)
118 source-location?
119 (input source-location-input)
120 (line source-location-line)
121 (column source-location-column)
122 (offset source-location-offset)
123 (length source-location-length))
124
125
126
127 ;; - Macros pour la gestion des vecteurs de bits
128
129 (define-macro (lalr-parser . arguments)
130 (define (set-bit v b)
131 (let ((x (quotient b (BITS-PER-WORD)))
132 (y (expt 2 (remainder b (BITS-PER-WORD)))))
133 (vector-set! v x (logical-or (vector-ref v x) y))))
134
135 (define (bit-union v1 v2 n)
136 (do ((i 0 (+ i 1)))
137 ((= i n))
138 (vector-set! v1 i (logical-or (vector-ref v1 i)
139 (vector-ref v2 i)))))
140
141 ;; - Macro pour les structures de donnees
142
143 (define (new-core) (make-vector 4 0))
144 (define (set-core-number! c n) (vector-set! c 0 n))
145 (define (set-core-acc-sym! c s) (vector-set! c 1 s))
146 (define (set-core-nitems! c n) (vector-set! c 2 n))
147 (define (set-core-items! c i) (vector-set! c 3 i))
148 (define (core-number c) (vector-ref c 0))
149 (define (core-acc-sym c) (vector-ref c 1))
150 (define (core-nitems c) (vector-ref c 2))
151 (define (core-items c) (vector-ref c 3))
152
153 (define (new-shift) (make-vector 3 0))
154 (define (set-shift-number! c x) (vector-set! c 0 x))
155 (define (set-shift-nshifts! c x) (vector-set! c 1 x))
156 (define (set-shift-shifts! c x) (vector-set! c 2 x))
157 (define (shift-number s) (vector-ref s 0))
158 (define (shift-nshifts s) (vector-ref s 1))
159 (define (shift-shifts s) (vector-ref s 2))
160
161 (define (new-red) (make-vector 3 0))
162 (define (set-red-number! c x) (vector-set! c 0 x))
163 (define (set-red-nreds! c x) (vector-set! c 1 x))
164 (define (set-red-rules! c x) (vector-set! c 2 x))
165 (define (red-number c) (vector-ref c 0))
166 (define (red-nreds c) (vector-ref c 1))
167 (define (red-rules c) (vector-ref c 2))
168
169
170 (define (new-set nelem)
171 (make-vector nelem 0))
172
173
174 (define (vector-map f v)
175 (let ((vm-n (- (vector-length v) 1)))
176 (let loop ((vm-low 0) (vm-high vm-n))
177 (if (= vm-low vm-high)
178 (vector-set! v vm-low (f (vector-ref v vm-low) vm-low))
179 (let ((vm-middle (quotient (+ vm-low vm-high) 2)))
180 (loop vm-low vm-middle)
181 (loop (+ vm-middle 1) vm-high))))))
182
183
184 ;; - Constantes
185 (define STATE-TABLE-SIZE 1009)
186
187
188 ;; - Tableaux
189 (define rrhs #f)
190 (define rlhs #f)
191 (define ritem #f)
192 (define nullable #f)
193 (define derives #f)
194 (define fderives #f)
195 (define firsts #f)
196 (define kernel-base #f)
197 (define kernel-end #f)
198 (define shift-symbol #f)
199 (define shift-set #f)
200 (define red-set #f)
201 (define state-table #f)
202 (define acces-symbol #f)
203 (define reduction-table #f)
204 (define shift-table #f)
205 (define consistent #f)
206 (define lookaheads #f)
207 (define LA #f)
208 (define LAruleno #f)
209 (define lookback #f)
210 (define goto-map #f)
211 (define from-state #f)
212 (define to-state #f)
213 (define includes #f)
214 (define F #f)
215 (define action-table #f)
216
217 ;; - Variables
218 (define nitems #f)
219 (define nrules #f)
220 (define nvars #f)
221 (define nterms #f)
222 (define nsyms #f)
223 (define nstates #f)
224 (define first-state #f)
225 (define last-state #f)
226 (define final-state #f)
227 (define first-shift #f)
228 (define last-shift #f)
229 (define first-reduction #f)
230 (define last-reduction #f)
231 (define nshifts #f)
232 (define maxrhs #f)
233 (define ngotos #f)
234 (define token-set-size #f)
235
236 (define driver-name 'lr-driver)
237
238 (define (gen-tables! tokens gram )
239 (initialize-all)
240 (rewrite-grammar
241 tokens
242 gram
243 (lambda (terms terms/prec vars gram gram/actions)
244 (set! the-terminals/prec (list->vector terms/prec))
245 (set! the-terminals (list->vector terms))
246 (set! the-nonterminals (list->vector vars))
247 (set! nterms (length terms))
248 (set! nvars (length vars))
249 (set! nsyms (+ nterms nvars))
250 (let ((no-of-rules (length gram/actions))
251 (no-of-items (let loop ((l gram/actions) (count 0))
252 (if (null? l)
253 count
254 (loop (cdr l) (+ count (length (caar l))))))))
255 (pack-grammar no-of-rules no-of-items gram)
256 (set-derives)
257 (set-nullable)
258 (generate-states)
259 (lalr)
260 (build-tables)
261 (compact-action-table terms)
262 gram/actions))))
263
264
265 (define (initialize-all)
266 (set! rrhs #f)
267 (set! rlhs #f)
268 (set! ritem #f)
269 (set! nullable #f)
270 (set! derives #f)
271 (set! fderives #f)
272 (set! firsts #f)
273 (set! kernel-base #f)
274 (set! kernel-end #f)
275 (set! shift-symbol #f)
276 (set! shift-set #f)
277 (set! red-set #f)
278 (set! state-table (make-vector STATE-TABLE-SIZE '()))
279 (set! acces-symbol #f)
280 (set! reduction-table #f)
281 (set! shift-table #f)
282 (set! consistent #f)
283 (set! lookaheads #f)
284 (set! LA #f)
285 (set! LAruleno #f)
286 (set! lookback #f)
287 (set! goto-map #f)
288 (set! from-state #f)
289 (set! to-state #f)
290 (set! includes #f)
291 (set! F #f)
292 (set! action-table #f)
293 (set! nstates #f)
294 (set! first-state #f)
295 (set! last-state #f)
296 (set! final-state #f)
297 (set! first-shift #f)
298 (set! last-shift #f)
299 (set! first-reduction #f)
300 (set! last-reduction #f)
301 (set! nshifts #f)
302 (set! maxrhs #f)
303 (set! ngotos #f)
304 (set! token-set-size #f)
305 (set! rule-precedences '()))
306
307
308 (define (pack-grammar no-of-rules no-of-items gram)
309 (set! nrules (+ no-of-rules 1))
310 (set! nitems no-of-items)
311 (set! rlhs (make-vector nrules #f))
312 (set! rrhs (make-vector nrules #f))
313 (set! ritem (make-vector (+ 1 nitems) #f))
314
315 (let loop ((p gram) (item-no 0) (rule-no 1))
316 (if (not (null? p))
317 (let ((nt (caar p)))
318 (let loop2 ((prods (cdar p)) (it-no2 item-no) (rl-no2 rule-no))
319 (if (null? prods)
320 (loop (cdr p) it-no2 rl-no2)
321 (begin
322 (vector-set! rlhs rl-no2 nt)
323 (vector-set! rrhs rl-no2 it-no2)
324 (let loop3 ((rhs (car prods)) (it-no3 it-no2))
325 (if (null? rhs)
326 (begin
327 (vector-set! ritem it-no3 (- rl-no2))
328 (loop2 (cdr prods) (+ it-no3 1) (+ rl-no2 1)))
329 (begin
330 (vector-set! ritem it-no3 (car rhs))
331 (loop3 (cdr rhs) (+ it-no3 1))))))))))))
332
333
334 (define (set-derives)
335 (define delts (make-vector (+ nrules 1) 0))
336 (define dset (make-vector nvars -1))
337
338 (let loop ((i 1) (j 0)) ; i = 0
339 (if (< i nrules)
340 (let ((lhs (vector-ref rlhs i)))
341 (if (>= lhs 0)
342 (begin
343 (vector-set! delts j (cons i (vector-ref dset lhs)))
344 (vector-set! dset lhs j)
345 (loop (+ i 1) (+ j 1)))
346 (loop (+ i 1) j)))))
347
348 (set! derives (make-vector nvars 0))
349
350 (let loop ((i 0))
351 (if (< i nvars)
352 (let ((q (let loop2 ((j (vector-ref dset i)) (s '()))
353 (if (< j 0)
354 s
355 (let ((x (vector-ref delts j)))
356 (loop2 (cdr x) (cons (car x) s)))))))
357 (vector-set! derives i q)
358 (loop (+ i 1))))))
359
360
361
362 (define (set-nullable)
363 (set! nullable (make-vector nvars #f))
364 (let ((squeue (make-vector nvars #f))
365 (rcount (make-vector (+ nrules 1) 0))
366 (rsets (make-vector nvars #f))
367 (relts (make-vector (+ nitems nvars 1) #f)))
368 (let loop ((r 0) (s2 0) (p 0))
369 (let ((*r (vector-ref ritem r)))
370 (if *r
371 (if (< *r 0)
372 (let ((symbol (vector-ref rlhs (- *r))))
373 (if (and (>= symbol 0)
374 (not (vector-ref nullable symbol)))
375 (begin
376 (vector-set! nullable symbol #t)
377 (vector-set! squeue s2 symbol)
378 (loop (+ r 1) (+ s2 1) p))))
379 (let loop2 ((r1 r) (any-tokens #f))
380 (let* ((symbol (vector-ref ritem r1)))
381 (if (> symbol 0)
382 (loop2 (+ r1 1) (or any-tokens (>= symbol nvars)))
383 (if (not any-tokens)
384 (let ((ruleno (- symbol)))
385 (let loop3 ((r2 r) (p2 p))
386 (let ((symbol (vector-ref ritem r2)))
387 (if (> symbol 0)
388 (begin
389 (vector-set! rcount ruleno
390 (+ (vector-ref rcount ruleno) 1))
391 (vector-set! relts p2
392 (cons (vector-ref rsets symbol)
393 ruleno))
394 (vector-set! rsets symbol p2)
395 (loop3 (+ r2 1) (+ p2 1)))
396 (loop (+ r2 1) s2 p2)))))
397 (loop (+ r1 1) s2 p))))))
398 (let loop ((s1 0) (s3 s2))
399 (if (< s1 s3)
400 (let loop2 ((p (vector-ref rsets (vector-ref squeue s1))) (s4 s3))
401 (if p
402 (let* ((x (vector-ref relts p))
403 (ruleno (cdr x))
404 (y (- (vector-ref rcount ruleno) 1)))
405 (vector-set! rcount ruleno y)
406 (if (= y 0)
407 (let ((symbol (vector-ref rlhs ruleno)))
408 (if (and (>= symbol 0)
409 (not (vector-ref nullable symbol)))
410 (begin
411 (vector-set! nullable symbol #t)
412 (vector-set! squeue s4 symbol)
413 (loop2 (car x) (+ s4 1)))
414 (loop2 (car x) s4)))
415 (loop2 (car x) s4))))
416 (loop (+ s1 1) s4)))))))))
417
418
419
420 (define (set-firsts)
421 (set! firsts (make-vector nvars '()))
422
423 ;; -- initialization
424 (let loop ((i 0))
425 (if (< i nvars)
426 (let loop2 ((sp (vector-ref derives i)))
427 (if (null? sp)
428 (loop (+ i 1))
429 (let ((sym (vector-ref ritem (vector-ref rrhs (car sp)))))
430 (if (< -1 sym nvars)
431 (vector-set! firsts i (sinsert sym (vector-ref firsts i))))
432 (loop2 (cdr sp)))))))
433
434 ;; -- reflexive and transitive closure
435 (let loop ((continue #t))
436 (if continue
437 (let loop2 ((i 0) (cont #f))
438 (if (>= i nvars)
439 (loop cont)
440 (let* ((x (vector-ref firsts i))
441 (y (let loop3 ((l x) (z x))
442 (if (null? l)
443 z
444 (loop3 (cdr l)
445 (sunion (vector-ref firsts (car l)) z))))))
446 (if (equal? x y)
447 (loop2 (+ i 1) cont)
448 (begin
449 (vector-set! firsts i y)
450 (loop2 (+ i 1) #t))))))))
451
452 (let loop ((i 0))
453 (if (< i nvars)
454 (begin
455 (vector-set! firsts i (sinsert i (vector-ref firsts i)))
456 (loop (+ i 1))))))
457
458
459
460
461 (define (set-fderives)
462 (set! fderives (make-vector nvars #f))
463
464 (set-firsts)
465
466 (let loop ((i 0))
467 (if (< i nvars)
468 (let ((x (let loop2 ((l (vector-ref firsts i)) (fd '()))
469 (if (null? l)
470 fd
471 (loop2 (cdr l)
472 (sunion (vector-ref derives (car l)) fd))))))
473 (vector-set! fderives i x)
474 (loop (+ i 1))))))
475
476
477 (define (closure core)
478 ;; Initialization
479 (define ruleset (make-vector nrules #f))
480
481 (let loop ((csp core))
482 (if (not (null? csp))
483 (let ((sym (vector-ref ritem (car csp))))
484 (if (< -1 sym nvars)
485 (let loop2 ((dsp (vector-ref fderives sym)))
486 (if (not (null? dsp))
487 (begin
488 (vector-set! ruleset (car dsp) #t)
489 (loop2 (cdr dsp))))))
490 (loop (cdr csp)))))
491
492 (let loop ((ruleno 1) (csp core) (itemsetv '())) ; ruleno = 0
493 (if (< ruleno nrules)
494 (if (vector-ref ruleset ruleno)
495 (let ((itemno (vector-ref rrhs ruleno)))
496 (let loop2 ((c csp) (itemsetv2 itemsetv))
497 (if (and (pair? c)
498 (< (car c) itemno))
499 (loop2 (cdr c) (cons (car c) itemsetv2))
500 (loop (+ ruleno 1) c (cons itemno itemsetv2)))))
501 (loop (+ ruleno 1) csp itemsetv))
502 (let loop2 ((c csp) (itemsetv2 itemsetv))
503 (if (pair? c)
504 (loop2 (cdr c) (cons (car c) itemsetv2))
505 (reverse itemsetv2))))))
506
507
508
509 (define (allocate-item-sets)
510 (set! kernel-base (make-vector nsyms 0))
511 (set! kernel-end (make-vector nsyms #f)))
512
513
514 (define (allocate-storage)
515 (allocate-item-sets)
516 (set! red-set (make-vector (+ nrules 1) 0)))
517
518 ; --
519
520
521 (define (initialize-states)
522 (let ((p (new-core)))
523 (set-core-number! p 0)
524 (set-core-acc-sym! p #f)
525 (set-core-nitems! p 1)
526 (set-core-items! p '(0))
527
528 (set! first-state (list p))
529 (set! last-state first-state)
530 (set! nstates 1)))
531
532
533
534 (define (generate-states)
535 (allocate-storage)
536 (set-fderives)
537 (initialize-states)
538 (let loop ((this-state first-state))
539 (if (pair? this-state)
540 (let* ((x (car this-state))
541 (is (closure (core-items x))))
542 (save-reductions x is)
543 (new-itemsets is)
544 (append-states)
545 (if (> nshifts 0)
546 (save-shifts x))
547 (loop (cdr this-state))))))
548
549
550 (define (new-itemsets itemset)
551 ;; - Initialization
552 (set! shift-symbol '())
553 (let loop ((i 0))
554 (if (< i nsyms)
555 (begin
556 (vector-set! kernel-end i '())
557 (loop (+ i 1)))))
558
559 (let loop ((isp itemset))
560 (if (pair? isp)
561 (let* ((i (car isp))
562 (sym (vector-ref ritem i)))
563 (if (>= sym 0)
564 (begin
565 (set! shift-symbol (sinsert sym shift-symbol))
566 (let ((x (vector-ref kernel-end sym)))
567 (if (null? x)
568 (begin
569 (vector-set! kernel-base sym (cons (+ i 1) x))
570 (vector-set! kernel-end sym (vector-ref kernel-base sym)))
571 (begin
572 (set-cdr! x (list (+ i 1)))
573 (vector-set! kernel-end sym (cdr x)))))))
574 (loop (cdr isp)))))
575
576 (set! nshifts (length shift-symbol)))
577
578
579
580 (define (get-state sym)
581 (let* ((isp (vector-ref kernel-base sym))
582 (n (length isp))
583 (key (let loop ((isp1 isp) (k 0))
584 (if (null? isp1)
585 (modulo k STATE-TABLE-SIZE)
586 (loop (cdr isp1) (+ k (car isp1))))))
587 (sp (vector-ref state-table key)))
588 (if (null? sp)
589 (let ((x (new-state sym)))
590 (vector-set! state-table key (list x))
591 (core-number x))
592 (let loop ((sp1 sp))
593 (if (and (= n (core-nitems (car sp1)))
594 (let loop2 ((i1 isp) (t (core-items (car sp1))))
595 (if (and (pair? i1)
596 (= (car i1)
597 (car t)))
598 (loop2 (cdr i1) (cdr t))
599 (null? i1))))
600 (core-number (car sp1))
601 (if (null? (cdr sp1))
602 (let ((x (new-state sym)))
603 (set-cdr! sp1 (list x))
604 (core-number x))
605 (loop (cdr sp1))))))))
606
607
608 (define (new-state sym)
609 (let* ((isp (vector-ref kernel-base sym))
610 (n (length isp))
611 (p (new-core)))
612 (set-core-number! p nstates)
613 (set-core-acc-sym! p sym)
614 (if (= sym nvars) (set! final-state nstates))
615 (set-core-nitems! p n)
616 (set-core-items! p isp)
617 (set-cdr! last-state (list p))
618 (set! last-state (cdr last-state))
619 (set! nstates (+ nstates 1))
620 p))
621
622
623 ; --
624
625 (define (append-states)
626 (set! shift-set
627 (let loop ((l (reverse shift-symbol)))
628 (if (null? l)
629 '()
630 (cons (get-state (car l)) (loop (cdr l)))))))
631
632 ; --
633
634 (define (save-shifts core)
635 (let ((p (new-shift)))
636 (set-shift-number! p (core-number core))
637 (set-shift-nshifts! p nshifts)
638 (set-shift-shifts! p shift-set)
639 (if last-shift
640 (begin
641 (set-cdr! last-shift (list p))
642 (set! last-shift (cdr last-shift)))
643 (begin
644 (set! first-shift (list p))
645 (set! last-shift first-shift)))))
646
647 (define (save-reductions core itemset)
648 (let ((rs (let loop ((l itemset))
649 (if (null? l)
650 '()
651 (let ((item (vector-ref ritem (car l))))
652 (if (< item 0)
653 (cons (- item) (loop (cdr l)))
654 (loop (cdr l))))))))
655 (if (pair? rs)
656 (let ((p (new-red)))
657 (set-red-number! p (core-number core))
658 (set-red-nreds! p (length rs))
659 (set-red-rules! p rs)
660 (if last-reduction
661 (begin
662 (set-cdr! last-reduction (list p))
663 (set! last-reduction (cdr last-reduction)))
664 (begin
665 (set! first-reduction (list p))
666 (set! last-reduction first-reduction)))))))
667
668
669 ; --
670
671 (define (lalr)
672 (set! token-set-size (+ 1 (quotient nterms (BITS-PER-WORD))))
673 (set-accessing-symbol)
674 (set-shift-table)
675 (set-reduction-table)
676 (set-max-rhs)
677 (initialize-LA)
678 (set-goto-map)
679 (initialize-F)
680 (build-relations)
681 (digraph includes)
682 (compute-lookaheads))
683
684 (define (set-accessing-symbol)
685 (set! acces-symbol (make-vector nstates #f))
686 (let loop ((l first-state))
687 (if (pair? l)
688 (let ((x (car l)))
689 (vector-set! acces-symbol (core-number x) (core-acc-sym x))
690 (loop (cdr l))))))
691
692 (define (set-shift-table)
693 (set! shift-table (make-vector nstates #f))
694 (let loop ((l first-shift))
695 (if (pair? l)
696 (let ((x (car l)))
697 (vector-set! shift-table (shift-number x) x)
698 (loop (cdr l))))))
699
700 (define (set-reduction-table)
701 (set! reduction-table (make-vector nstates #f))
702 (let loop ((l first-reduction))
703 (if (pair? l)
704 (let ((x (car l)))
705 (vector-set! reduction-table (red-number x) x)
706 (loop (cdr l))))))
707
708 (define (set-max-rhs)
709 (let loop ((p 0) (curmax 0) (length 0))
710 (let ((x (vector-ref ritem p)))
711 (if x
712 (if (>= x 0)
713 (loop (+ p 1) curmax (+ length 1))
714 (loop (+ p 1) (max curmax length) 0))
715 (set! maxrhs curmax)))))
716
717 (define (initialize-LA)
718 (define (last l)
719 (if (null? (cdr l))
720 (car l)
721 (last (cdr l))))
722
723 (set! consistent (make-vector nstates #f))
724 (set! lookaheads (make-vector (+ nstates 1) #f))
725
726 (let loop ((count 0) (i 0))
727 (if (< i nstates)
728 (begin
729 (vector-set! lookaheads i count)
730 (let ((rp (vector-ref reduction-table i))
731 (sp (vector-ref shift-table i)))
732 (if (and rp
733 (or (> (red-nreds rp) 1)
734 (and sp
735 (not
736 (< (vector-ref acces-symbol
737 (last (shift-shifts sp)))
738 nvars)))))
739 (loop (+ count (red-nreds rp)) (+ i 1))
740 (begin
741 (vector-set! consistent i #t)
742 (loop count (+ i 1))))))
743
744 (begin
745 (vector-set! lookaheads nstates count)
746 (let ((c (max count 1)))
747 (set! LA (make-vector c #f))
748 (do ((j 0 (+ j 1))) ((= j c)) (vector-set! LA j (new-set token-set-size)))
749 (set! LAruleno (make-vector c -1))
750 (set! lookback (make-vector c #f)))
751 (let loop ((i 0) (np 0))
752 (if (< i nstates)
753 (if (vector-ref consistent i)
754 (loop (+ i 1) np)
755 (let ((rp (vector-ref reduction-table i)))
756 (if rp
757 (let loop2 ((j (red-rules rp)) (np2 np))
758 (if (null? j)
759 (loop (+ i 1) np2)
760 (begin
761 (vector-set! LAruleno np2 (car j))
762 (loop2 (cdr j) (+ np2 1)))))
763 (loop (+ i 1) np))))))))))
764
765
766 (define (set-goto-map)
767 (set! goto-map (make-vector (+ nvars 1) 0))
768 (let ((temp-map (make-vector (+ nvars 1) 0)))
769 (let loop ((ng 0) (sp first-shift))
770 (if (pair? sp)
771 (let loop2 ((i (reverse (shift-shifts (car sp)))) (ng2 ng))
772 (if (pair? i)
773 (let ((symbol (vector-ref acces-symbol (car i))))
774 (if (< symbol nvars)
775 (begin
776 (vector-set! goto-map symbol
777 (+ 1 (vector-ref goto-map symbol)))
778 (loop2 (cdr i) (+ ng2 1)))
779 (loop2 (cdr i) ng2)))
780 (loop ng2 (cdr sp))))
781
782 (let loop ((k 0) (i 0))
783 (if (< i nvars)
784 (begin
785 (vector-set! temp-map i k)
786 (loop (+ k (vector-ref goto-map i)) (+ i 1)))
787
788 (begin
789 (do ((i 0 (+ i 1)))
790 ((>= i nvars))
791 (vector-set! goto-map i (vector-ref temp-map i)))
792
793 (set! ngotos ng)
794 (vector-set! goto-map nvars ngotos)
795 (vector-set! temp-map nvars ngotos)
796 (set! from-state (make-vector ngotos #f))
797 (set! to-state (make-vector ngotos #f))
798
799 (do ((sp first-shift (cdr sp)))
800 ((null? sp))
801 (let* ((x (car sp))
802 (state1 (shift-number x)))
803 (do ((i (shift-shifts x) (cdr i)))
804 ((null? i))
805 (let* ((state2 (car i))
806 (symbol (vector-ref acces-symbol state2)))
807 (if (< symbol nvars)
808 (let ((k (vector-ref temp-map symbol)))
809 (vector-set! temp-map symbol (+ k 1))
810 (vector-set! from-state k state1)
811 (vector-set! to-state k state2))))))))))))))
812
813
814 (define (map-goto state symbol)
815 (let loop ((low (vector-ref goto-map symbol))
816 (high (- (vector-ref goto-map (+ symbol 1)) 1)))
817 (if (> low high)
818 (begin
819 (display (list "Error in map-goto" state symbol)) (newline)
820 0)
821 (let* ((middle (quotient (+ low high) 2))
822 (s (vector-ref from-state middle)))
823 (cond
824 ((= s state)
825 middle)
826 ((< s state)
827 (loop (+ middle 1) high))
828 (else
829 (loop low (- middle 1))))))))
830
831
832 (define (initialize-F)
833 (set! F (make-vector ngotos #f))
834 (do ((i 0 (+ i 1))) ((= i ngotos)) (vector-set! F i (new-set token-set-size)))
835
836 (let ((reads (make-vector ngotos #f)))
837
838 (let loop ((i 0) (rowp 0))
839 (if (< i ngotos)
840 (let* ((rowf (vector-ref F rowp))
841 (stateno (vector-ref to-state i))
842 (sp (vector-ref shift-table stateno)))
843 (if sp
844 (let loop2 ((j (shift-shifts sp)) (edges '()))
845 (if (pair? j)
846 (let ((symbol (vector-ref acces-symbol (car j))))
847 (if (< symbol nvars)
848 (if (vector-ref nullable symbol)
849 (loop2 (cdr j) (cons (map-goto stateno symbol)
850 edges))
851 (loop2 (cdr j) edges))
852 (begin
853 (set-bit rowf (- symbol nvars))
854 (loop2 (cdr j) edges))))
855 (if (pair? edges)
856 (vector-set! reads i (reverse edges))))))
857 (loop (+ i 1) (+ rowp 1)))))
858 (digraph reads)))
859
860 (define (add-lookback-edge stateno ruleno gotono)
861 (let ((k (vector-ref lookaheads (+ stateno 1))))
862 (let loop ((found #f) (i (vector-ref lookaheads stateno)))
863 (if (and (not found) (< i k))
864 (if (= (vector-ref LAruleno i) ruleno)
865 (loop #t i)
866 (loop found (+ i 1)))
867
868 (if (not found)
869 (begin (display "Error in add-lookback-edge : ")
870 (display (list stateno ruleno gotono)) (newline))
871 (vector-set! lookback i
872 (cons gotono (vector-ref lookback i))))))))
873
874
875 (define (transpose r-arg n)
876 (let ((new-end (make-vector n #f))
877 (new-R (make-vector n #f)))
878 (do ((i 0 (+ i 1)))
879 ((= i n))
880 (let ((x (list 'bidon)))
881 (vector-set! new-R i x)
882 (vector-set! new-end i x)))
883 (do ((i 0 (+ i 1)))
884 ((= i n))
885 (let ((sp (vector-ref r-arg i)))
886 (if (pair? sp)
887 (let loop ((sp2 sp))
888 (if (pair? sp2)
889 (let* ((x (car sp2))
890 (y (vector-ref new-end x)))
891 (set-cdr! y (cons i (cdr y)))
892 (vector-set! new-end x (cdr y))
893 (loop (cdr sp2))))))))
894 (do ((i 0 (+ i 1)))
895 ((= i n))
896 (vector-set! new-R i (cdr (vector-ref new-R i))))
897
898 new-R))
899
900
901
902 (define (build-relations)
903
904 (define (get-state stateno symbol)
905 (let loop ((j (shift-shifts (vector-ref shift-table stateno)))
906 (stno stateno))
907 (if (null? j)
908 stno
909 (let ((st2 (car j)))
910 (if (= (vector-ref acces-symbol st2) symbol)
911 st2
912 (loop (cdr j) st2))))))
913
914 (set! includes (make-vector ngotos #f))
915 (do ((i 0 (+ i 1)))
916 ((= i ngotos))
917 (let ((state1 (vector-ref from-state i))
918 (symbol1 (vector-ref acces-symbol (vector-ref to-state i))))
919 (let loop ((rulep (vector-ref derives symbol1))
920 (edges '()))
921 (if (pair? rulep)
922 (let ((*rulep (car rulep)))
923 (let loop2 ((rp (vector-ref rrhs *rulep))
924 (stateno state1)
925 (states (list state1)))
926 (let ((*rp (vector-ref ritem rp)))
927 (if (> *rp 0)
928 (let ((st (get-state stateno *rp)))
929 (loop2 (+ rp 1) st (cons st states)))
930 (begin
931
932 (if (not (vector-ref consistent stateno))
933 (add-lookback-edge stateno *rulep i))
934
935 (let loop2 ((done #f)
936 (stp (cdr states))
937 (rp2 (- rp 1))
938 (edgp edges))
939 (if (not done)
940 (let ((*rp (vector-ref ritem rp2)))
941 (if (< -1 *rp nvars)
942 (loop2 (not (vector-ref nullable *rp))
943 (cdr stp)
944 (- rp2 1)
945 (cons (map-goto (car stp) *rp) edgp))
946 (loop2 #t stp rp2 edgp)))
947
948 (loop (cdr rulep) edgp))))))))
949 (vector-set! includes i edges)))))
950 (set! includes (transpose includes ngotos)))
951
952
953
954 (define (compute-lookaheads)
955 (let ((n (vector-ref lookaheads nstates)))
956 (let loop ((i 0))
957 (if (< i n)
958 (let loop2 ((sp (vector-ref lookback i)))
959 (if (pair? sp)
960 (let ((LA-i (vector-ref LA i))
961 (F-j (vector-ref F (car sp))))
962 (bit-union LA-i F-j token-set-size)
963 (loop2 (cdr sp)))
964 (loop (+ i 1))))))))
965
966
967
968 (define (digraph relation)
969 (define infinity (+ ngotos 2))
970 (define INDEX (make-vector (+ ngotos 1) 0))
971 (define VERTICES (make-vector (+ ngotos 1) 0))
972 (define top 0)
973 (define R relation)
974
975 (define (traverse i)
976 (set! top (+ 1 top))
977 (vector-set! VERTICES top i)
978 (let ((height top))
979 (vector-set! INDEX i height)
980 (let ((rp (vector-ref R i)))
981 (if (pair? rp)
982 (let loop ((rp2 rp))
983 (if (pair? rp2)
984 (let ((j (car rp2)))
985 (if (= 0 (vector-ref INDEX j))
986 (traverse j))
987 (if (> (vector-ref INDEX i)
988 (vector-ref INDEX j))
989 (vector-set! INDEX i (vector-ref INDEX j)))
990 (let ((F-i (vector-ref F i))
991 (F-j (vector-ref F j)))
992 (bit-union F-i F-j token-set-size))
993 (loop (cdr rp2))))))
994 (if (= (vector-ref INDEX i) height)
995 (let loop ()
996 (let ((j (vector-ref VERTICES top)))
997 (set! top (- top 1))
998 (vector-set! INDEX j infinity)
999 (if (not (= i j))
1000 (begin
1001 (bit-union (vector-ref F i)
1002 (vector-ref F j)
1003 token-set-size)
1004 (loop)))))))))
1005
1006 (let loop ((i 0))
1007 (if (< i ngotos)
1008 (begin
1009 (if (and (= 0 (vector-ref INDEX i))
1010 (pair? (vector-ref R i)))
1011 (traverse i))
1012 (loop (+ i 1))))))
1013
1014
1015 ;; ----------------------------------------------------------------------
1016 ;; operator precedence management
1017 ;; ----------------------------------------------------------------------
1018
1019 ;; a vector of precedence descriptors where each element
1020 ;; is of the form (terminal type precedence)
1021 (define the-terminals/prec #f) ; terminal symbols with precedence
1022 ; the precedence is an integer >= 0
1023 (define (get-symbol-precedence sym)
1024 (caddr (vector-ref the-terminals/prec sym)))
1025 ; the operator type is either 'none, 'left, 'right, or 'nonassoc
1026 (define (get-symbol-assoc sym)
1027 (cadr (vector-ref the-terminals/prec sym)))
1028
1029 (define rule-precedences '())
1030 (define (add-rule-precedence! rule sym)
1031 (set! rule-precedences
1032 (cons (cons rule sym) rule-precedences)))
1033
1034 (define (get-rule-precedence ruleno)
1035 (cond
1036 ((assq ruleno rule-precedences)
1037 => (lambda (p)
1038 (get-symbol-precedence (cdr p))))
1039 (else
1040 ;; process the rule symbols from left to right
1041 (let loop ((i (vector-ref rrhs ruleno))
1042 (prec 0))
1043 (let ((item (vector-ref ritem i)))
1044 ;; end of rule
1045 (if (< item 0)
1046 prec
1047 (let ((i1 (+ i 1)))
1048 (if (>= item nvars)
1049 ;; it's a terminal symbol
1050 (loop i1 (get-symbol-precedence (- item nvars)))
1051 (loop i1 prec)))))))))
1052
1053 ;; ----------------------------------------------------------------------
1054 ;; Build the various tables
1055 ;; ----------------------------------------------------------------------
1056
1057 (define expected-conflicts 0)
1058
1059 (define (build-tables)
1060
1061 (define (resolve-conflict sym rule)
1062 (let ((sym-prec (get-symbol-precedence sym))
1063 (sym-assoc (get-symbol-assoc sym))
1064 (rule-prec (get-rule-precedence rule)))
1065 (cond
1066 ((> sym-prec rule-prec) 'shift)
1067 ((< sym-prec rule-prec) 'reduce)
1068 ((eq? sym-assoc 'left) 'reduce)
1069 ((eq? sym-assoc 'right) 'shift)
1070 (else 'none))))
1071
1072 (define conflict-messages '())
1073
1074 (define (add-conflict-message . l)
1075 (set! conflict-messages (cons l conflict-messages)))
1076
1077 (define (log-conflicts)
1078 (if (> (length conflict-messages) expected-conflicts)
1079 (for-each
1080 (lambda (message)
1081 (for-each display message)
1082 (newline))
1083 conflict-messages)))
1084
1085 ;; --- Add an action to the action table
1086 (define (add-action state symbol new-action)
1087 (let* ((state-actions (vector-ref action-table state))
1088 (actions (assv symbol state-actions)))
1089 (if (pair? actions)
1090 (let ((current-action (cadr actions)))
1091 (if (not (= new-action current-action))
1092 ;; -- there is a conflict
1093 (begin
1094 (if (and (<= current-action 0) (<= new-action 0))
1095 ;; --- reduce/reduce conflict
1096 (begin
1097 (add-conflict-message
1098 "%% Reduce/Reduce conflict (reduce " (- new-action) ", reduce " (- current-action)
1099 ") on '" (get-symbol (+ symbol nvars)) "' in state " state)
1100 (if (eq? driver-name 'glr-driver)
1101 (set-cdr! (cdr actions) (cons new-action (cddr actions)))
1102 (set-car! (cdr actions) (max current-action new-action))))
1103 ;; --- shift/reduce conflict
1104 ;; can we resolve the conflict using precedences?
1105 (case (resolve-conflict symbol (- current-action))
1106 ;; -- shift
1107 ((shift) (if (eq? driver-name 'glr-driver)
1108 (set-cdr! (cdr actions) (cons new-action (cddr actions)))
1109 (set-car! (cdr actions) new-action)))
1110 ;; -- reduce
1111 ((reduce) #f) ; well, nothing to do...
1112 ;; -- signal a conflict!
1113 (else (add-conflict-message
1114 "%% Shift/Reduce conflict (shift " new-action ", reduce " (- current-action)
1115 ") on '" (get-symbol (+ symbol nvars)) "' in state " state)
1116 (if (eq? driver-name 'glr-driver)
1117 (set-cdr! (cdr actions) (cons new-action (cddr actions)))
1118 (set-car! (cdr actions) new-action))))))))
1119
1120 (vector-set! action-table state (cons (list symbol new-action) state-actions)))))
1121
1122 (define (add-action-for-all-terminals state action)
1123 (do ((i 1 (+ i 1)))
1124 ((= i nterms))
1125 (add-action state i action)))
1126
1127 (set! action-table (make-vector nstates '()))
1128
1129 (do ((i 0 (+ i 1))) ; i = state
1130 ((= i nstates))
1131 (let ((red (vector-ref reduction-table i)))
1132 (if (and red (>= (red-nreds red) 1))
1133 (if (and (= (red-nreds red) 1) (vector-ref consistent i))
1134 (add-action-for-all-terminals i (- (car (red-rules red))))
1135 (let ((k (vector-ref lookaheads (+ i 1))))
1136 (let loop ((j (vector-ref lookaheads i)))
1137 (if (< j k)
1138 (let ((rule (- (vector-ref LAruleno j)))
1139 (lav (vector-ref LA j)))
1140 (let loop2 ((token 0) (x (vector-ref lav 0)) (y 1) (z 0))
1141 (if (< token nterms)
1142 (begin
1143 (let ((in-la-set? (modulo x 2)))
1144 (if (= in-la-set? 1)
1145 (add-action i token rule)))
1146 (if (= y (BITS-PER-WORD))
1147 (loop2 (+ token 1)
1148 (vector-ref lav (+ z 1))
1149 1
1150 (+ z 1))
1151 (loop2 (+ token 1) (quotient x 2) (+ y 1) z)))))
1152 (loop (+ j 1)))))))))
1153
1154 (let ((shiftp (vector-ref shift-table i)))
1155 (if shiftp
1156 (let loop ((k (shift-shifts shiftp)))
1157 (if (pair? k)
1158 (let* ((state (car k))
1159 (symbol (vector-ref acces-symbol state)))
1160 (if (>= symbol nvars)
1161 (add-action i (- symbol nvars) state))
1162 (loop (cdr k))))))))
1163
1164 (add-action final-state 0 'accept)
1165 (log-conflicts))
1166
1167 (define (compact-action-table terms)
1168 (define (most-common-action acts)
1169 (let ((accums '()))
1170 (let loop ((l acts))
1171 (if (pair? l)
1172 (let* ((x (cadar l))
1173 (y (assv x accums)))
1174 (if (and (number? x) (< x 0))
1175 (if y
1176 (set-cdr! y (+ 1 (cdr y)))
1177 (set! accums (cons `(,x . 1) accums))))
1178 (loop (cdr l)))))
1179
1180 (let loop ((l accums) (max 0) (sym #f))
1181 (if (null? l)
1182 sym
1183 (let ((x (car l)))
1184 (if (> (cdr x) max)
1185 (loop (cdr l) (cdr x) (car x))
1186 (loop (cdr l) max sym)))))))
1187
1188 (define (translate-terms acts)
1189 (map (lambda (act)
1190 (cons (list-ref terms (car act))
1191 (cdr act)))
1192 acts))
1193
1194 (do ((i 0 (+ i 1)))
1195 ((= i nstates))
1196 (let ((acts (vector-ref action-table i)))
1197 (if (vector? (vector-ref reduction-table i))
1198 (let ((act (most-common-action acts)))
1199 (vector-set! action-table i
1200 (cons `(*default* ,(if act act '*error*))
1201 (translate-terms
1202 (lalr-filter (lambda (x)
1203 (not (and (= (length x) 2)
1204 (eq? (cadr x) act))))
1205 acts)))))
1206 (vector-set! action-table i
1207 (cons `(*default* *error*)
1208 (translate-terms acts)))))))
1209
1210
1211
1212 ;; --
1213
1214 (define (rewrite-grammar tokens grammar k)
1215
1216 (define eoi '*eoi*)
1217
1218 (define (check-terminal term terms)
1219 (cond
1220 ((not (valid-terminal? term))
1221 (lalr-error "invalid terminal: " term))
1222 ((member term terms)
1223 (lalr-error "duplicate definition of terminal: " term))))
1224
1225 (define (prec->type prec)
1226 (cdr (assq prec '((left: . left)
1227 (right: . right)
1228 (nonassoc: . nonassoc)))))
1229
1230 (cond
1231 ;; --- a few error conditions
1232 ((not (list? tokens))
1233 (lalr-error "Invalid token list: " tokens))
1234 ((not (pair? grammar))
1235 (lalr-error "Grammar definition must have a non-empty list of productions" '()))
1236
1237 (else
1238 ;; --- check the terminals
1239 (let loop1 ((lst tokens)
1240 (rev-terms '())
1241 (rev-terms/prec '())
1242 (prec-level 0))
1243 (if (pair? lst)
1244 (let ((term (car lst)))
1245 (cond
1246 ((pair? term)
1247 (if (and (memq (car term) '(left: right: nonassoc:))
1248 (not (null? (cdr term))))
1249 (let ((prec (+ prec-level 1))
1250 (optype (prec->type (car term))))
1251 (let loop-toks ((l (cdr term))
1252 (rev-terms rev-terms)
1253 (rev-terms/prec rev-terms/prec))
1254 (if (null? l)
1255 (loop1 (cdr lst) rev-terms rev-terms/prec prec)
1256 (let ((term (car l)))
1257 (check-terminal term rev-terms)
1258 (loop-toks
1259 (cdr l)
1260 (cons term rev-terms)
1261 (cons (list term optype prec) rev-terms/prec))))))
1262
1263 (lalr-error "invalid operator precedence specification: " term)))
1264
1265 (else
1266 (check-terminal term rev-terms)
1267 (loop1 (cdr lst)
1268 (cons term rev-terms)
1269 (cons (list term 'none 0) rev-terms/prec)
1270 prec-level))))
1271
1272 ;; --- check the grammar rules
1273 (let loop2 ((lst grammar) (rev-nonterm-defs '()))
1274 (if (pair? lst)
1275 (let ((def (car lst)))
1276 (if (not (pair? def))
1277 (lalr-error "Nonterminal definition must be a non-empty list" '())
1278 (let ((nonterm (car def)))
1279 (cond ((not (valid-nonterminal? nonterm))
1280 (lalr-error "Invalid nonterminal:" nonterm))
1281 ((or (member nonterm rev-terms)
1282 (assoc nonterm rev-nonterm-defs))
1283 (lalr-error "Nonterminal previously defined:" nonterm))
1284 (else
1285 (loop2 (cdr lst)
1286 (cons def rev-nonterm-defs)))))))
1287 (let* ((terms (cons eoi (cons 'error (reverse rev-terms))))
1288 (terms/prec (cons '(eoi none 0) (cons '(error none 0) (reverse rev-terms/prec))))
1289 (nonterm-defs (reverse rev-nonterm-defs))
1290 (nonterms (cons '*start* (map car nonterm-defs))))
1291 (if (= (length nonterms) 1)
1292 (lalr-error "Grammar must contain at least one nonterminal" '())
1293 (let loop-defs ((defs (cons `(*start* (,(cadr nonterms) ,eoi) : $1)
1294 nonterm-defs))
1295 (ruleno 0)
1296 (comp-defs '()))
1297 (if (pair? defs)
1298 (let* ((nonterm-def (car defs))
1299 (compiled-def (rewrite-nonterm-def
1300 nonterm-def
1301 ruleno
1302 terms nonterms)))
1303 (loop-defs (cdr defs)
1304 (+ ruleno (length compiled-def))
1305 (cons compiled-def comp-defs)))
1306
1307 (let ((compiled-nonterm-defs (reverse comp-defs)))
1308 (k terms
1309 terms/prec
1310 nonterms
1311 (map (lambda (x) (cons (caaar x) (map cdar x)))
1312 compiled-nonterm-defs)
1313 (apply append compiled-nonterm-defs))))))))))))))
1314
1315
1316 (define (rewrite-nonterm-def nonterm-def ruleno terms nonterms)
1317
1318 (define No-NT (length nonterms))
1319
1320 (define (encode x)
1321 (let ((PosInNT (pos-in-list x nonterms)))
1322 (if PosInNT
1323 PosInNT
1324 (let ((PosInT (pos-in-list x terms)))
1325 (if PosInT
1326 (+ No-NT PosInT)
1327 (lalr-error "undefined symbol : " x))))))
1328
1329 (define (process-prec-directive rhs ruleno)
1330 (let loop ((l rhs))
1331 (if (null? l)
1332 '()
1333 (let ((first (car l))
1334 (rest (cdr l)))
1335 (cond
1336 ((or (member first terms) (member first nonterms))
1337 (cons first (loop rest)))
1338 ((and (pair? first)
1339 (eq? (car first) 'prec:))
1340 (if (and (pair? (cdr first))
1341 (null? (cddr first))
1342 (member (cadr first) terms))
1343 (if (null? rest)
1344 (begin
1345 (add-rule-precedence! ruleno (pos-in-list (cadr first) terms))
1346 (loop rest))
1347 (lalr-error "prec: directive should be at end of rule: " rhs))
1348 (lalr-error "Invalid prec: directive: " first)))
1349 (else
1350 (lalr-error "Invalid terminal or nonterminal: " first)))))))
1351
1352 (define (check-error-production rhs)
1353 (let loop ((rhs rhs))
1354 (if (pair? rhs)
1355 (begin
1356 (if (and (eq? (car rhs) 'error)
1357 (or (null? (cdr rhs))
1358 (not (member (cadr rhs) terms))
1359 (not (null? (cddr rhs)))))
1360 (lalr-error "Invalid 'error' production. A single terminal symbol must follow the 'error' token.:" rhs))
1361 (loop (cdr rhs))))))
1362
1363
1364 (if (not (pair? (cdr nonterm-def)))
1365 (lalr-error "At least one production needed for nonterminal:" (car nonterm-def))
1366 (let ((name (symbol->string (car nonterm-def))))
1367 (let loop1 ((lst (cdr nonterm-def))
1368 (i 1)
1369 (rev-productions-and-actions '()))
1370 (if (not (pair? lst))
1371 (reverse rev-productions-and-actions)
1372 (let* ((rhs (process-prec-directive (car lst) (+ ruleno i -1)))
1373 (rest (cdr lst))
1374 (prod (map encode (cons (car nonterm-def) rhs))))
1375 ;; -- check for undefined tokens
1376 (for-each (lambda (x)
1377 (if (not (or (member x terms) (member x nonterms)))
1378 (lalr-error "Invalid terminal or nonterminal:" x)))
1379 rhs)
1380 ;; -- check 'error' productions
1381 (check-error-production rhs)
1382
1383 (if (and (pair? rest)
1384 (eq? (car rest) ':)
1385 (pair? (cdr rest)))
1386 (loop1 (cddr rest)
1387 (+ i 1)
1388 (cons (cons prod (cadr rest))
1389 rev-productions-and-actions))
1390 (let* ((rhs-length (length rhs))
1391 (action
1392 (cons 'vector
1393 (cons (list 'quote (string->symbol
1394 (string-append
1395 name
1396 "-"
1397 (number->string i))))
1398 (let loop-j ((j 1))
1399 (if (> j rhs-length)
1400 '()
1401 (cons (string->symbol
1402 (string-append
1403 "$"
1404 (number->string j)))
1405 (loop-j (+ j 1)))))))))
1406 (loop1 rest
1407 (+ i 1)
1408 (cons (cons prod action)
1409 rev-productions-and-actions))))))))))
1410
1411 (define (valid-nonterminal? x)
1412 (symbol? x))
1413
1414 (define (valid-terminal? x)
1415 (symbol? x)) ; DB
1416
1417 ;; ----------------------------------------------------------------------
1418 ;; Miscellaneous
1419 ;; ----------------------------------------------------------------------
1420 (define (pos-in-list x lst)
1421 (let loop ((lst lst) (i 0))
1422 (cond ((not (pair? lst)) #f)
1423 ((equal? (car lst) x) i)
1424 (else (loop (cdr lst) (+ i 1))))))
1425
1426 (define (sunion lst1 lst2) ; union of sorted lists
1427 (let loop ((L1 lst1)
1428 (L2 lst2))
1429 (cond ((null? L1) L2)
1430 ((null? L2) L1)
1431 (else
1432 (let ((x (car L1)) (y (car L2)))
1433 (cond
1434 ((> x y)
1435 (cons y (loop L1 (cdr L2))))
1436 ((< x y)
1437 (cons x (loop (cdr L1) L2)))
1438 (else
1439 (loop (cdr L1) L2))
1440 ))))))
1441
1442 (define (sinsert elem lst)
1443 (let loop ((l1 lst))
1444 (if (null? l1)
1445 (cons elem l1)
1446 (let ((x (car l1)))
1447 (cond ((< elem x)
1448 (cons elem l1))
1449 ((> elem x)
1450 (cons x (loop (cdr l1))))
1451 (else
1452 l1))))))
1453
1454 (define (lalr-filter p lst)
1455 (let loop ((l lst))
1456 (if (null? l)
1457 '()
1458 (let ((x (car l)) (y (cdr l)))
1459 (if (p x)
1460 (cons x (loop y))
1461 (loop y))))))
1462
1463 ;; ----------------------------------------------------------------------
1464 ;; Debugging tools ...
1465 ;; ----------------------------------------------------------------------
1466 (define the-terminals #f) ; names of terminal symbols
1467 (define the-nonterminals #f) ; non-terminals
1468
1469 (define (print-item item-no)
1470 (let loop ((i item-no))
1471 (let ((v (vector-ref ritem i)))
1472 (if (>= v 0)
1473 (loop (+ i 1))
1474 (let* ((rlno (- v))
1475 (nt (vector-ref rlhs rlno)))
1476 (display (vector-ref the-nonterminals nt)) (display " --> ")
1477 (let loop ((i (vector-ref rrhs rlno)))
1478 (let ((v (vector-ref ritem i)))
1479 (if (= i item-no)
1480 (display ". "))
1481 (if (>= v 0)
1482 (begin
1483 (display (get-symbol v))
1484 (display " ")
1485 (loop (+ i 1)))
1486 (begin
1487 (display " (rule ")
1488 (display (- v))
1489 (display ")")
1490 (newline))))))))))
1491
1492 (define (get-symbol n)
1493 (if (>= n nvars)
1494 (vector-ref the-terminals (- n nvars))
1495 (vector-ref the-nonterminals n)))
1496
1497
1498 (define (print-states)
1499 (define (print-action act)
1500 (cond
1501 ((eq? act '*error*)
1502 (display " : Error"))
1503 ((eq? act 'accept)
1504 (display " : Accept input"))
1505 ((< act 0)
1506 (display " : reduce using rule ")
1507 (display (- act)))
1508 (else
1509 (display " : shift and goto state ")
1510 (display act)))
1511 (newline)
1512 #t)
1513
1514 (define (print-actions acts)
1515 (let loop ((l acts))
1516 (if (null? l)
1517 #t
1518 (let ((sym (caar l))
1519 (act (cadar l)))
1520 (display " ")
1521 (cond
1522 ((eq? sym 'default)
1523 (display "default action"))
1524 (else
1525 (if (number? sym)
1526 (display (get-symbol (+ sym nvars)))
1527 (display sym))))
1528 (print-action act)
1529 (loop (cdr l))))))
1530
1531 (if (not action-table)
1532 (begin
1533 (display "No generated parser available!")
1534 (newline)
1535 #f)
1536 (begin
1537 (display "State table") (newline)
1538 (display "-----------") (newline) (newline)
1539
1540 (let loop ((l first-state))
1541 (if (null? l)
1542 #t
1543 (let* ((core (car l))
1544 (i (core-number core))
1545 (items (core-items core))
1546 (actions (vector-ref action-table i)))
1547 (display "state ") (display i) (newline)
1548 (newline)
1549 (for-each (lambda (x) (display " ") (print-item x))
1550 items)
1551 (newline)
1552 (print-actions actions)
1553 (newline)
1554 (loop (cdr l))))))))
1555
1556
1557
1558 ;; ----------------------------------------------------------------------
1559
1560 (define build-goto-table
1561 (lambda ()
1562 `(vector
1563 ,@(map
1564 (lambda (shifts)
1565 (list 'quote
1566 (if shifts
1567 (let loop ((l (shift-shifts shifts)))
1568 (if (null? l)
1569 '()
1570 (let* ((state (car l))
1571 (symbol (vector-ref acces-symbol state)))
1572 (if (< symbol nvars)
1573 (cons `(,symbol . ,state)
1574 (loop (cdr l)))
1575 (loop (cdr l))))))
1576 '())))
1577 (vector->list shift-table)))))
1578
1579
1580 (define build-reduction-table
1581 (lambda (gram/actions)
1582 `(vector
1583 '()
1584 ,@(map
1585 (lambda (p)
1586 (let ((act (cdr p)))
1587 `(lambda ,(if (eq? driver-name 'lr-driver)
1588 '(___stack ___sp ___goto-table ___push yypushback)
1589 '(___sp ___goto-table ___push))
1590 ,(let* ((nt (caar p)) (rhs (cdar p)) (n (length rhs)))
1591 `(let* (,@(if act
1592 (let loop ((i 1) (l rhs))
1593 (if (pair? l)
1594 (let ((rest (cdr l)))
1595 (cons
1596 `(,(string->symbol
1597 (string-append
1598 "$"
1599 (number->string
1600 (+ (- n i) 1))))
1601 ,(if (eq? driver-name 'lr-driver)
1602 `(vector-ref ___stack (- ___sp ,(- (* i 2) 1)))
1603 `(list-ref ___sp ,(+ (* (- i 1) 2) 1))))
1604 (loop (+ i 1) rest)))
1605 '()))
1606 '()))
1607 ,(if (= nt 0)
1608 '$1
1609 `(___push ,n ,nt ,(cdr p) ,@(if (eq? driver-name 'lr-driver) '() '(___sp)))))))))
1610
1611 gram/actions))))
1612
1613
1614
1615 ;; Options
1616
1617 (define *valid-options*
1618 (list
1619 (cons 'out-table:
1620 (lambda (option)
1621 (and (list? option)
1622 (= (length option) 2)
1623 (string? (cadr option)))))
1624 (cons 'output:
1625 (lambda (option)
1626 (and (list? option)
1627 (= (length option) 3)
1628 (symbol? (cadr option))
1629 (string? (caddr option)))))
1630 (cons 'expect:
1631 (lambda (option)
1632 (and (list? option)
1633 (= (length option) 2)
1634 (integer? (cadr option))
1635 (>= (cadr option) 0))))
1636
1637 (cons 'driver:
1638 (lambda (option)
1639 (and (list? option)
1640 (= (length option) 2)
1641 (symbol? (cadr option))
1642 (memq (cadr option) '(lr glr)))))))
1643
1644
1645 (define (validate-options options)
1646 (for-each
1647 (lambda (option)
1648 (let ((p (assoc (car option) *valid-options*)))
1649 (if (or (not p)
1650 (not ((cdr p) option)))
1651 (lalr-error "Invalid option:" option))))
1652 options))
1653
1654
1655 (define (output-parser! options code)
1656 (let ((option (assq 'output: options)))
1657 (if option
1658 (let ((parser-name (cadr option))
1659 (file-name (caddr option)))
1660 (with-output-to-file file-name
1661 (lambda ()
1662 (pprint `(define ,parser-name ,code))
1663 (newline)))))))
1664
1665
1666 (define (output-table! options)
1667 (let ((option (assq 'out-table: options)))
1668 (if option
1669 (let ((file-name (cadr option)))
1670 (with-output-to-file file-name print-states)))))
1671
1672
1673 (define (set-expected-conflicts! options)
1674 (let ((option (assq 'expect: options)))
1675 (set! expected-conflicts (if option (cadr option) 0))))
1676
1677 (define (set-driver-name! options)
1678 (let ((option (assq 'driver: options)))
1679 (if option
1680 (let ((driver-type (cadr option)))
1681 (set! driver-name (if (eq? driver-type 'glr) 'glr-driver 'lr-driver))))))
1682
1683
1684 ;; -- arguments
1685
1686 (define (extract-arguments lst proc)
1687 (let loop ((options '())
1688 (tokens '())
1689 (rules '())
1690 (lst lst))
1691 (if (pair? lst)
1692 (let ((p (car lst)))
1693 (cond
1694 ((and (pair? p)
1695 (lalr-keyword? (car p))
1696 (assq (car p) *valid-options*))
1697 (loop (cons p options) tokens rules (cdr lst)))
1698 (else
1699 (proc options p (cdr lst)))))
1700 (lalr-error "Malformed lalr-parser form" lst))))
1701
1702
1703 (define (build-driver options tokens rules)
1704 (validate-options options)
1705 (set-expected-conflicts! options)
1706 (set-driver-name! options)
1707 (let* ((gram/actions (gen-tables! tokens rules))
1708 (code `(,driver-name ',action-table ,(build-goto-table) ,(build-reduction-table gram/actions))))
1709
1710 (output-table! options)
1711 (output-parser! options code)
1712 code))
1713
1714 (extract-arguments arguments build-driver))
1715
1716
1717
1718 ;;;
1719 ;;;; --
1720 ;;;; Implementation of the lr-driver
1721 ;;;
1722
1723
1724 (cond-expand
1725 (gambit
1726 (declare
1727 (standard-bindings)
1728 (fixnum)
1729 (block)
1730 (not safe)))
1731 (chicken
1732 (declare
1733 (uses extras)
1734 (usual-integrations)
1735 (fixnum)
1736 (not safe)))
1737 (else))
1738
1739
1740 ;;;
1741 ;;;; Source location utilities
1742 ;;;
1743
1744
1745 ;; This function assumes that src-location-1 and src-location-2 are source-locations
1746 ;; Returns #f if they are not locations for the same input
1747 (define (combine-locations src-location-1 src-location-2)
1748 (let ((offset-1 (source-location-offset src-location-1))
1749 (offset-2 (source-location-offset src-location-2))
1750 (length-1 (source-location-length src-location-1))
1751 (length-2 (source-location-length src-location-2)))
1752
1753 (cond ((not (equal? (source-location-input src-location-1)
1754 (source-location-input src-location-2)))
1755 #f)
1756 ((or (not (number? offset-1)) (not (number? offset-2))
1757 (not (number? length-1)) (not (number? length-2))
1758 (< offset-1 0) (< offset-2 0)
1759 (< length-1 0) (< length-2 0))
1760 (make-source-location (source-location-input src-location-1)
1761 (source-location-line src-location-1)
1762 (source-location-column src-location-1)
1763 -1 -1))
1764 ((<= offset-1 offset-2)
1765 (make-source-location (source-location-input src-location-1)
1766 (source-location-line src-location-1)
1767 (source-location-column src-location-1)
1768 offset-1
1769 (- (+ offset-2 length-2) offset-1)))
1770 (else
1771 (make-source-location (source-location-input src-location-1)
1772 (source-location-line src-location-1)
1773 (source-location-column src-location-1)
1774 offset-2
1775 (- (+ offset-1 length-1) offset-2))))))
1776
1777
1778 ;;;
1779 ;;;; LR-driver
1780 ;;;
1781
1782
1783 (define *max-stack-size* 500)
1784
1785 (define (lr-driver action-table goto-table reduction-table)
1786 (define ___atable action-table)
1787 (define ___gtable goto-table)
1788 (define ___rtable reduction-table)
1789
1790 (define ___lexerp #f)
1791 (define ___errorp #f)
1792
1793 (define ___stack #f)
1794 (define ___sp 0)
1795
1796 (define ___curr-input #f)
1797 (define ___reuse-input #f)
1798
1799 (define ___input #f)
1800 (define (___consume)
1801 (set! ___input (if ___reuse-input ___curr-input (___lexerp)))
1802 (set! ___reuse-input #f)
1803 (set! ___curr-input ___input))
1804
1805 (define (___pushback)
1806 (set! ___reuse-input #t))
1807
1808 (define (___initstack)
1809 (set! ___stack (make-vector *max-stack-size* 0))
1810 (set! ___sp 0))
1811
1812 (define (___growstack)
1813 (let ((new-stack (make-vector (* 2 (vector-length ___stack)) 0)))
1814 (let loop ((i (- (vector-length ___stack) 1)))
1815 (if (>= i 0)
1816 (begin
1817 (vector-set! new-stack i (vector-ref ___stack i))
1818 (loop (- i 1)))))
1819 (set! ___stack new-stack)))
1820
1821 (define (___checkstack)
1822 (if (>= ___sp (vector-length ___stack))
1823 (___growstack)))
1824
1825 (define (___push delta new-category lvalue)
1826 (set! ___sp (- ___sp (* delta 2)))
1827 (let* ((state (vector-ref ___stack ___sp))
1828 (new-state (cdr (assoc new-category (vector-ref ___gtable state)))))
1829 (set! ___sp (+ ___sp 2))
1830 (___checkstack)
1831 (vector-set! ___stack ___sp new-state)
1832 (vector-set! ___stack (- ___sp 1) lvalue)))
1833
1834 (define (___reduce st)
1835 ((vector-ref ___rtable st) ___stack ___sp ___gtable ___push ___pushback))
1836
1837 (define (___shift token attribute)
1838 (set! ___sp (+ ___sp 2))
1839 (___checkstack)
1840 (vector-set! ___stack (- ___sp 1) attribute)
1841 (vector-set! ___stack ___sp token))
1842
1843 (define (___action x l)
1844 (let ((y (assoc x l)))
1845 (if y (cadr y) (cadar l))))
1846
1847 (define (___recover tok)
1848 (let find-state ((sp ___sp))
1849 (if (< sp 0)
1850 (set! ___sp sp)
1851 (let* ((state (vector-ref ___stack sp))
1852 (act (assoc 'error (vector-ref ___atable state))))
1853 (if act
1854 (begin
1855 (set! ___sp sp)
1856 (___sync (cadr act) tok))
1857 (find-state (- sp 2)))))))
1858
1859 (define (___sync state tok)
1860 (let ((sync-set (map car (cdr (vector-ref ___atable state)))))
1861 (set! ___sp (+ ___sp 4))
1862 (___checkstack)
1863 (vector-set! ___stack (- ___sp 3) #f)
1864 (vector-set! ___stack (- ___sp 2) state)
1865 (let skip ()
1866 (let ((i (___category ___input)))
1867 (if (eq? i '*eoi*)
1868 (set! ___sp -1)
1869 (if (memq i sync-set)
1870 (let ((act (assoc i (vector-ref ___atable state))))
1871 (vector-set! ___stack (- ___sp 1) #f)
1872 (vector-set! ___stack ___sp (cadr act)))
1873 (begin
1874 (___consume)
1875 (skip))))))))
1876
1877 (define (___category tok)
1878 (if (lexical-token? tok)
1879 (lexical-token-category tok)
1880 tok))
1881
1882 (define (___value tok)
1883 (if (lexical-token? tok)
1884 (lexical-token-value tok)
1885 tok))
1886
1887 (define (___run)
1888 (let loop ()
1889 (if ___input
1890 (let* ((state (vector-ref ___stack ___sp))
1891 (i (___category ___input))
1892 (attr (___value ___input))
1893 (act (___action i (vector-ref ___atable state))))
1894
1895 (cond ((not (symbol? i))
1896 (___errorp "Syntax error: invalid token: " ___input)
1897 #f)
1898
1899 ;; Input succesfully parsed
1900 ((eq? act 'accept)
1901 (vector-ref ___stack 1))
1902
1903 ;; Syntax error in input
1904 ((eq? act '*error*)
1905 (if (eq? i '*eoi*)
1906 (begin
1907 (___errorp "Syntax error: unexpected end of input")
1908 #f)
1909 (begin
1910 (___errorp "Syntax error: unexpected token : " ___input)
1911 (___recover i)
1912 (if (>= ___sp 0)
1913 (set! ___input #f)
1914 (begin
1915 (set! ___sp 0)
1916 (set! ___input '*eoi*)))
1917 (loop))))
1918
1919 ;; Shift current token on top of the stack
1920 ((>= act 0)
1921 (___shift act attr)
1922 (set! ___input (if (eq? i '*eoi*) '*eoi* #f))
1923 (loop))
1924
1925 ;; Reduce by rule (- act)
1926 (else
1927 (___reduce (- act))
1928 (loop))))
1929
1930 ;; no lookahead, so check if there is a default action
1931 ;; that does not require the lookahead
1932 (let* ((state (vector-ref ___stack ___sp))
1933 (acts (vector-ref ___atable state))
1934 (defact (if (pair? acts) (cadar acts) #f)))
1935 (if (and (= 1 (length acts)) (< defact 0))
1936 (___reduce (- defact))
1937 (___consume))
1938 (loop)))))
1939
1940
1941 (lambda (lexerp errorp)
1942 (set! ___errorp errorp)
1943 (set! ___lexerp lexerp)
1944 (___initstack)
1945 (___run)))
1946
1947
1948 ;;;
1949 ;;;; Simple-minded GLR-driver
1950 ;;;
1951
1952
1953 (define (glr-driver action-table goto-table reduction-table)
1954 (define ___atable action-table)
1955 (define ___gtable goto-table)
1956 (define ___rtable reduction-table)
1957
1958 (define ___lexerp #f)
1959 (define ___errorp #f)
1960
1961 ;; -- Input handling
1962
1963 (define *input* #f)
1964 (define (initialize-lexer lexer)
1965 (set! ___lexerp lexer)
1966 (set! *input* #f))
1967 (define (consume)
1968 (set! *input* (___lexerp)))
1969
1970 (define (token-category tok)
1971 (if (lexical-token? tok)
1972 (lexical-token-category tok)
1973 tok))
1974
1975 (define (token-attribute tok)
1976 (if (lexical-token? tok)
1977 (lexical-token-value tok)
1978 tok))
1979
1980 ;; -- Processes (stacks) handling
1981
1982 (define *processes* '())
1983
1984 (define (initialize-processes)
1985 (set! *processes* '()))
1986 (define (add-process process)
1987 (set! *processes* (cons process *processes*)))
1988 (define (get-processes)
1989 (reverse *processes*))
1990
1991 (define (for-all-processes proc)
1992 (let ((processes (get-processes)))
1993 (initialize-processes)
1994 (for-each proc processes)))
1995
1996 ;; -- parses
1997 (define *parses* '())
1998 (define (get-parses)
1999 *parses*)
2000 (define (initialize-parses)
2001 (set! *parses* '()))
2002 (define (add-parse parse)
2003 (set! *parses* (cons parse *parses*)))
2004
2005
2006 (define (push delta new-category lvalue stack)
2007 (let* ((stack (drop stack (* delta 2)))
2008 (state (car stack))
2009 (new-state (cdr (assv new-category (vector-ref ___gtable state)))))
2010 (cons new-state (cons lvalue stack))))
2011
2012 (define (reduce state stack)
2013 ((vector-ref ___rtable state) stack ___gtable push))
2014
2015 (define (shift state symbol stack)
2016 (cons state (cons symbol stack)))
2017
2018 (define (get-actions token action-list)
2019 (let ((pair (assoc token action-list)))
2020 (if pair
2021 (cdr pair)
2022 (cdar action-list)))) ;; get the default action
2023
2024
2025 (define (run)
2026 (let loop-tokens ()
2027 (consume)
2028 (let ((symbol (token-category *input*))
2029 (attr (token-attribute *input*)))
2030 (for-all-processes
2031 (lambda (process)
2032 (let loop ((stacks (list process)) (active-stacks '()))
2033 (cond ((pair? stacks)
2034 (let* ((stack (car stacks))
2035 (state (car stack)))
2036 (let actions-loop ((actions (get-actions symbol (vector-ref ___atable state)))
2037 (active-stacks active-stacks))
2038 (if (pair? actions)
2039 (let ((action (car actions))
2040 (other-actions (cdr actions)))
2041 (cond ((eq? action '*error*)
2042 (actions-loop other-actions active-stacks))
2043 ((eq? action 'accept)
2044 (add-parse (car (take-right stack 2)))
2045 (actions-loop other-actions active-stacks))
2046 ((>= action 0)
2047 (let ((new-stack (shift action attr stack)))
2048 (add-process new-stack))
2049 (actions-loop other-actions active-stacks))
2050 (else
2051 (let ((new-stack (reduce (- action) stack)))
2052 (actions-loop other-actions (cons new-stack active-stacks))))))
2053 (loop (cdr stacks) active-stacks)))))
2054 ((pair? active-stacks)
2055 (loop (reverse active-stacks) '())))))))
2056 (if (pair? (get-processes))
2057 (loop-tokens))))
2058
2059
2060 (lambda (lexerp errorp)
2061 (set! ___errorp errorp)
2062 (initialize-lexer lexerp)
2063 (initialize-processes)
2064 (initialize-parses)
2065 (add-process '(0))
2066 (run)
2067 (get-parses)))
2068
2069
2070 (define (drop l n)
2071 (cond ((and (> n 0) (pair? l))
2072 (drop (cdr l) (- n 1)))
2073 (else
2074 l)))
2075
2076 (define (take-right l n)
2077 (drop l (- (length l) n)))