Merge commit '8cf2a7ba7432d68b9a055d29f18117be70375af9'
[bpt/guile.git] / module / system / base / lalr.upstream.scm
1 ;;;
2 ;;;; An Efficient and Portable LALR(1) Parser Generator for Scheme
3 ;;;
4 ;; Copyright 2014 Jan Nieuwenhuizen <janneke@gnu.org>
5 ;; Copyright 1993, 2010 Dominique Boucher
6 ;;
7 ;; This program is free software: you can redistribute it and/or
8 ;; modify it under the terms of the GNU Lesser General Public License
9 ;; as published by the Free Software Foundation, either version 3 of
10 ;; the License, or (at your option) any later version.
11 ;;
12 ;; This program is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU Lesser General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20
21 (define *lalr-scm-version* "2.5.0")
22
23
24 (cond-expand
25
26 ;; -- Gambit-C
27 (gambit
28
29 (define-macro (def-macro form . body)
30 `(define-macro ,form (let () ,@body)))
31
32 (def-macro (BITS-PER-WORD) 28)
33 (def-macro (logical-or x . y) `(bitwise-ior ,x ,@y))
34 (def-macro (lalr-error msg obj) `(error ,msg ,obj))
35
36 (define pprint pretty-print)
37 (define lalr-keyword? keyword?)
38 (define (note-source-location lvalue tok) lvalue))
39
40 ;; --
41 (bigloo
42 (define-macro (def-macro form . body)
43 `(define-macro ,form (let () ,@body)))
44
45 (define pprint (lambda (obj) (write obj) (newline)))
46 (define lalr-keyword? keyword?)
47 (def-macro (BITS-PER-WORD) 29)
48 (def-macro (logical-or x . y) `(bit-or ,x ,@y))
49 (def-macro (lalr-error msg obj) `(error "lalr-parser" ,msg ,obj))
50 (define (note-source-location lvalue tok) lvalue))
51
52 ;; -- Chicken
53 (chicken
54
55 (define-macro (def-macro form . body)
56 `(define-macro ,form (let () ,@body)))
57
58 (define pprint pretty-print)
59 (define lalr-keyword? symbol?)
60 (def-macro (BITS-PER-WORD) 30)
61 (def-macro (logical-or x . y) `(bitwise-ior ,x ,@y))
62 (def-macro (lalr-error msg obj) `(error ,msg ,obj))
63 (define (note-source-location lvalue tok) lvalue))
64
65 ;; -- STKlos
66 (stklos
67 (require "pp")
68
69 (define (pprint form) (pp form :port (current-output-port)))
70
71 (define lalr-keyword? keyword?)
72 (define-macro (BITS-PER-WORD) 30)
73 (define-macro (logical-or x . y) `(bit-or ,x ,@y))
74 (define-macro (lalr-error msg obj) `(error 'lalr-parser ,msg ,obj))
75 (define (note-source-location lvalue tok) lvalue))
76
77 ;; -- Guile
78 (guile
79 (use-modules (ice-9 pretty-print))
80 (use-modules (srfi srfi-9))
81
82 (define pprint pretty-print)
83 (define lalr-keyword? symbol?)
84 (define-macro (BITS-PER-WORD) 30)
85 (define-macro (logical-or x . y) `(logior ,x ,@y))
86 (define-macro (lalr-error msg obj) `(error ,msg ,obj))
87 (define (note-source-location lvalue tok)
88 (if (and (supports-source-properties? lvalue)
89 (not (source-property lvalue 'loc))
90 (lexical-token? tok))
91 (set-source-property! lvalue 'loc (lexical-token-source tok)))
92 lvalue))
93
94
95 ;; -- Kawa
96 (kawa
97 (require 'pretty-print)
98 (define (BITS-PER-WORD) 30)
99 (define logical-or logior)
100 (define (lalr-keyword? obj) (keyword? obj))
101 (define (pprint obj) (pretty-print obj))
102 (define (lalr-error msg obj) (error msg obj))
103 (define (note-source-location lvalue tok) lvalue))
104
105 ;; -- SISC
106 (sisc
107 (import logicops)
108 (import record)
109
110 (define pprint pretty-print)
111 (define lalr-keyword? symbol?)
112 (define-macro BITS-PER-WORD (lambda () 32))
113 (define-macro logical-or (lambda (x . y) `(logor ,x ,@y)))
114 (define-macro (lalr-error msg obj) `(error "~a ~S:" ,msg ,obj))
115 (define (note-source-location lvalue tok) lvalue))
116
117 (else
118 (error "Unsupported Scheme system")))
119
120
121 (define-record-type lexical-token
122 (make-lexical-token category source value)
123 lexical-token?
124 (category lexical-token-category)
125 (source lexical-token-source)
126 (value lexical-token-value))
127
128
129 (define-record-type source-location
130 (make-source-location input line column offset length)
131 source-location?
132 (input source-location-input)
133 (line source-location-line)
134 (column source-location-column)
135 (offset source-location-offset)
136 (length source-location-length))
137
138
139
140 ;; - Macros pour la gestion des vecteurs de bits
141
142 (define-macro (lalr-parser . arguments)
143 (define (set-bit v b)
144 (let ((x (quotient b (BITS-PER-WORD)))
145 (y (expt 2 (remainder b (BITS-PER-WORD)))))
146 (vector-set! v x (logical-or (vector-ref v x) y))))
147
148 (define (bit-union v1 v2 n)
149 (do ((i 0 (+ i 1)))
150 ((= i n))
151 (vector-set! v1 i (logical-or (vector-ref v1 i)
152 (vector-ref v2 i)))))
153
154 ;; - Macro pour les structures de donnees
155
156 (define (new-core) (make-vector 4 0))
157 (define (set-core-number! c n) (vector-set! c 0 n))
158 (define (set-core-acc-sym! c s) (vector-set! c 1 s))
159 (define (set-core-nitems! c n) (vector-set! c 2 n))
160 (define (set-core-items! c i) (vector-set! c 3 i))
161 (define (core-number c) (vector-ref c 0))
162 (define (core-acc-sym c) (vector-ref c 1))
163 (define (core-nitems c) (vector-ref c 2))
164 (define (core-items c) (vector-ref c 3))
165
166 (define (new-shift) (make-vector 3 0))
167 (define (set-shift-number! c x) (vector-set! c 0 x))
168 (define (set-shift-nshifts! c x) (vector-set! c 1 x))
169 (define (set-shift-shifts! c x) (vector-set! c 2 x))
170 (define (shift-number s) (vector-ref s 0))
171 (define (shift-nshifts s) (vector-ref s 1))
172 (define (shift-shifts s) (vector-ref s 2))
173
174 (define (new-red) (make-vector 3 0))
175 (define (set-red-number! c x) (vector-set! c 0 x))
176 (define (set-red-nreds! c x) (vector-set! c 1 x))
177 (define (set-red-rules! c x) (vector-set! c 2 x))
178 (define (red-number c) (vector-ref c 0))
179 (define (red-nreds c) (vector-ref c 1))
180 (define (red-rules c) (vector-ref c 2))
181
182
183 (define (new-set nelem)
184 (make-vector nelem 0))
185
186
187 (define (vector-map f v)
188 (let ((vm-n (- (vector-length v) 1)))
189 (let loop ((vm-low 0) (vm-high vm-n))
190 (if (= vm-low vm-high)
191 (vector-set! v vm-low (f (vector-ref v vm-low) vm-low))
192 (let ((vm-middle (quotient (+ vm-low vm-high) 2)))
193 (loop vm-low vm-middle)
194 (loop (+ vm-middle 1) vm-high))))))
195
196
197 ;; - Constantes
198 (define STATE-TABLE-SIZE 1009)
199
200
201 ;; - Tableaux
202 (define rrhs #f)
203 (define rlhs #f)
204 (define ritem #f)
205 (define nullable #f)
206 (define derives #f)
207 (define fderives #f)
208 (define firsts #f)
209 (define kernel-base #f)
210 (define kernel-end #f)
211 (define shift-symbol #f)
212 (define shift-set #f)
213 (define red-set #f)
214 (define state-table #f)
215 (define acces-symbol #f)
216 (define reduction-table #f)
217 (define shift-table #f)
218 (define consistent #f)
219 (define lookaheads #f)
220 (define LA #f)
221 (define LAruleno #f)
222 (define lookback #f)
223 (define goto-map #f)
224 (define from-state #f)
225 (define to-state #f)
226 (define includes #f)
227 (define F #f)
228 (define action-table #f)
229
230 ;; - Variables
231 (define nitems #f)
232 (define nrules #f)
233 (define nvars #f)
234 (define nterms #f)
235 (define nsyms #f)
236 (define nstates #f)
237 (define first-state #f)
238 (define last-state #f)
239 (define final-state #f)
240 (define first-shift #f)
241 (define last-shift #f)
242 (define first-reduction #f)
243 (define last-reduction #f)
244 (define nshifts #f)
245 (define maxrhs #f)
246 (define ngotos #f)
247 (define token-set-size #f)
248
249 (define driver-name 'lr-driver)
250
251 (define (glr-driver?)
252 (eq? driver-name 'glr-driver))
253 (define (lr-driver?)
254 (eq? driver-name 'lr-driver))
255
256 (define (gen-tables! tokens gram )
257 (initialize-all)
258 (rewrite-grammar
259 tokens
260 gram
261 (lambda (terms terms/prec vars gram gram/actions)
262 (set! the-terminals/prec (list->vector terms/prec))
263 (set! the-terminals (list->vector terms))
264 (set! the-nonterminals (list->vector vars))
265 (set! nterms (length terms))
266 (set! nvars (length vars))
267 (set! nsyms (+ nterms nvars))
268 (let ((no-of-rules (length gram/actions))
269 (no-of-items (let loop ((l gram/actions) (count 0))
270 (if (null? l)
271 count
272 (loop (cdr l) (+ count (length (caar l))))))))
273 (pack-grammar no-of-rules no-of-items gram)
274 (set-derives)
275 (set-nullable)
276 (generate-states)
277 (lalr)
278 (build-tables)
279 (compact-action-table terms)
280 gram/actions))))
281
282
283 (define (initialize-all)
284 (set! rrhs #f)
285 (set! rlhs #f)
286 (set! ritem #f)
287 (set! nullable #f)
288 (set! derives #f)
289 (set! fderives #f)
290 (set! firsts #f)
291 (set! kernel-base #f)
292 (set! kernel-end #f)
293 (set! shift-symbol #f)
294 (set! shift-set #f)
295 (set! red-set #f)
296 (set! state-table (make-vector STATE-TABLE-SIZE '()))
297 (set! acces-symbol #f)
298 (set! reduction-table #f)
299 (set! shift-table #f)
300 (set! consistent #f)
301 (set! lookaheads #f)
302 (set! LA #f)
303 (set! LAruleno #f)
304 (set! lookback #f)
305 (set! goto-map #f)
306 (set! from-state #f)
307 (set! to-state #f)
308 (set! includes #f)
309 (set! F #f)
310 (set! action-table #f)
311 (set! nstates #f)
312 (set! first-state #f)
313 (set! last-state #f)
314 (set! final-state #f)
315 (set! first-shift #f)
316 (set! last-shift #f)
317 (set! first-reduction #f)
318 (set! last-reduction #f)
319 (set! nshifts #f)
320 (set! maxrhs #f)
321 (set! ngotos #f)
322 (set! token-set-size #f)
323 (set! rule-precedences '()))
324
325
326 (define (pack-grammar no-of-rules no-of-items gram)
327 (set! nrules (+ no-of-rules 1))
328 (set! nitems no-of-items)
329 (set! rlhs (make-vector nrules #f))
330 (set! rrhs (make-vector nrules #f))
331 (set! ritem (make-vector (+ 1 nitems) #f))
332
333 (let loop ((p gram) (item-no 0) (rule-no 1))
334 (if (not (null? p))
335 (let ((nt (caar p)))
336 (let loop2 ((prods (cdar p)) (it-no2 item-no) (rl-no2 rule-no))
337 (if (null? prods)
338 (loop (cdr p) it-no2 rl-no2)
339 (begin
340 (vector-set! rlhs rl-no2 nt)
341 (vector-set! rrhs rl-no2 it-no2)
342 (let loop3 ((rhs (car prods)) (it-no3 it-no2))
343 (if (null? rhs)
344 (begin
345 (vector-set! ritem it-no3 (- rl-no2))
346 (loop2 (cdr prods) (+ it-no3 1) (+ rl-no2 1)))
347 (begin
348 (vector-set! ritem it-no3 (car rhs))
349 (loop3 (cdr rhs) (+ it-no3 1))))))))))))
350
351
352 (define (set-derives)
353 (define delts (make-vector (+ nrules 1) 0))
354 (define dset (make-vector nvars -1))
355
356 (let loop ((i 1) (j 0)) ; i = 0
357 (if (< i nrules)
358 (let ((lhs (vector-ref rlhs i)))
359 (if (>= lhs 0)
360 (begin
361 (vector-set! delts j (cons i (vector-ref dset lhs)))
362 (vector-set! dset lhs j)
363 (loop (+ i 1) (+ j 1)))
364 (loop (+ i 1) j)))))
365
366 (set! derives (make-vector nvars 0))
367
368 (let loop ((i 0))
369 (if (< i nvars)
370 (let ((q (let loop2 ((j (vector-ref dset i)) (s '()))
371 (if (< j 0)
372 s
373 (let ((x (vector-ref delts j)))
374 (loop2 (cdr x) (cons (car x) s)))))))
375 (vector-set! derives i q)
376 (loop (+ i 1))))))
377
378
379
380 (define (set-nullable)
381 (set! nullable (make-vector nvars #f))
382 (let ((squeue (make-vector nvars #f))
383 (rcount (make-vector (+ nrules 1) 0))
384 (rsets (make-vector nvars #f))
385 (relts (make-vector (+ nitems nvars 1) #f)))
386 (let loop ((r 0) (s2 0) (p 0))
387 (let ((*r (vector-ref ritem r)))
388 (if *r
389 (if (< *r 0)
390 (let ((symbol (vector-ref rlhs (- *r))))
391 (if (and (>= symbol 0)
392 (not (vector-ref nullable symbol)))
393 (begin
394 (vector-set! nullable symbol #t)
395 (vector-set! squeue s2 symbol)
396 (loop (+ r 1) (+ s2 1) p))))
397 (let loop2 ((r1 r) (any-tokens #f))
398 (let* ((symbol (vector-ref ritem r1)))
399 (if (> symbol 0)
400 (loop2 (+ r1 1) (or any-tokens (>= symbol nvars)))
401 (if (not any-tokens)
402 (let ((ruleno (- symbol)))
403 (let loop3 ((r2 r) (p2 p))
404 (let ((symbol (vector-ref ritem r2)))
405 (if (> symbol 0)
406 (begin
407 (vector-set! rcount ruleno
408 (+ (vector-ref rcount ruleno) 1))
409 (vector-set! relts p2
410 (cons (vector-ref rsets symbol)
411 ruleno))
412 (vector-set! rsets symbol p2)
413 (loop3 (+ r2 1) (+ p2 1)))
414 (loop (+ r2 1) s2 p2)))))
415 (loop (+ r1 1) s2 p))))))
416 (let loop ((s1 0) (s3 s2))
417 (if (< s1 s3)
418 (let loop2 ((p (vector-ref rsets (vector-ref squeue s1))) (s4 s3))
419 (if p
420 (let* ((x (vector-ref relts p))
421 (ruleno (cdr x))
422 (y (- (vector-ref rcount ruleno) 1)))
423 (vector-set! rcount ruleno y)
424 (if (= y 0)
425 (let ((symbol (vector-ref rlhs ruleno)))
426 (if (and (>= symbol 0)
427 (not (vector-ref nullable symbol)))
428 (begin
429 (vector-set! nullable symbol #t)
430 (vector-set! squeue s4 symbol)
431 (loop2 (car x) (+ s4 1)))
432 (loop2 (car x) s4)))
433 (loop2 (car x) s4))))
434 (loop (+ s1 1) s4)))))))))
435
436
437
438 (define (set-firsts)
439 (set! firsts (make-vector nvars '()))
440
441 ;; -- initialization
442 (let loop ((i 0))
443 (if (< i nvars)
444 (let loop2 ((sp (vector-ref derives i)))
445 (if (null? sp)
446 (loop (+ i 1))
447 (let ((sym (vector-ref ritem (vector-ref rrhs (car sp)))))
448 (if (< -1 sym nvars)
449 (vector-set! firsts i (sinsert sym (vector-ref firsts i))))
450 (loop2 (cdr sp)))))))
451
452 ;; -- reflexive and transitive closure
453 (let loop ((continue #t))
454 (if continue
455 (let loop2 ((i 0) (cont #f))
456 (if (>= i nvars)
457 (loop cont)
458 (let* ((x (vector-ref firsts i))
459 (y (let loop3 ((l x) (z x))
460 (if (null? l)
461 z
462 (loop3 (cdr l)
463 (sunion (vector-ref firsts (car l)) z))))))
464 (if (equal? x y)
465 (loop2 (+ i 1) cont)
466 (begin
467 (vector-set! firsts i y)
468 (loop2 (+ i 1) #t))))))))
469
470 (let loop ((i 0))
471 (if (< i nvars)
472 (begin
473 (vector-set! firsts i (sinsert i (vector-ref firsts i)))
474 (loop (+ i 1))))))
475
476
477
478
479 (define (set-fderives)
480 (set! fderives (make-vector nvars #f))
481
482 (set-firsts)
483
484 (let loop ((i 0))
485 (if (< i nvars)
486 (let ((x (let loop2 ((l (vector-ref firsts i)) (fd '()))
487 (if (null? l)
488 fd
489 (loop2 (cdr l)
490 (sunion (vector-ref derives (car l)) fd))))))
491 (vector-set! fderives i x)
492 (loop (+ i 1))))))
493
494
495 (define (closure core)
496 ;; Initialization
497 (define ruleset (make-vector nrules #f))
498
499 (let loop ((csp core))
500 (if (not (null? csp))
501 (let ((sym (vector-ref ritem (car csp))))
502 (if (< -1 sym nvars)
503 (let loop2 ((dsp (vector-ref fderives sym)))
504 (if (not (null? dsp))
505 (begin
506 (vector-set! ruleset (car dsp) #t)
507 (loop2 (cdr dsp))))))
508 (loop (cdr csp)))))
509
510 (let loop ((ruleno 1) (csp core) (itemsetv '())) ; ruleno = 0
511 (if (< ruleno nrules)
512 (if (vector-ref ruleset ruleno)
513 (let ((itemno (vector-ref rrhs ruleno)))
514 (let loop2 ((c csp) (itemsetv2 itemsetv))
515 (if (and (pair? c)
516 (< (car c) itemno))
517 (loop2 (cdr c) (cons (car c) itemsetv2))
518 (loop (+ ruleno 1) c (cons itemno itemsetv2)))))
519 (loop (+ ruleno 1) csp itemsetv))
520 (let loop2 ((c csp) (itemsetv2 itemsetv))
521 (if (pair? c)
522 (loop2 (cdr c) (cons (car c) itemsetv2))
523 (reverse itemsetv2))))))
524
525
526
527 (define (allocate-item-sets)
528 (set! kernel-base (make-vector nsyms 0))
529 (set! kernel-end (make-vector nsyms #f)))
530
531
532 (define (allocate-storage)
533 (allocate-item-sets)
534 (set! red-set (make-vector (+ nrules 1) 0)))
535
536 ; --
537
538
539 (define (initialize-states)
540 (let ((p (new-core)))
541 (set-core-number! p 0)
542 (set-core-acc-sym! p #f)
543 (set-core-nitems! p 1)
544 (set-core-items! p '(0))
545
546 (set! first-state (list p))
547 (set! last-state first-state)
548 (set! nstates 1)))
549
550
551
552 (define (generate-states)
553 (allocate-storage)
554 (set-fderives)
555 (initialize-states)
556 (let loop ((this-state first-state))
557 (if (pair? this-state)
558 (let* ((x (car this-state))
559 (is (closure (core-items x))))
560 (save-reductions x is)
561 (new-itemsets is)
562 (append-states)
563 (if (> nshifts 0)
564 (save-shifts x))
565 (loop (cdr this-state))))))
566
567
568 (define (new-itemsets itemset)
569 ;; - Initialization
570 (set! shift-symbol '())
571 (let loop ((i 0))
572 (if (< i nsyms)
573 (begin
574 (vector-set! kernel-end i '())
575 (loop (+ i 1)))))
576
577 (let loop ((isp itemset))
578 (if (pair? isp)
579 (let* ((i (car isp))
580 (sym (vector-ref ritem i)))
581 (if (>= sym 0)
582 (begin
583 (set! shift-symbol (sinsert sym shift-symbol))
584 (let ((x (vector-ref kernel-end sym)))
585 (if (null? x)
586 (begin
587 (vector-set! kernel-base sym (cons (+ i 1) x))
588 (vector-set! kernel-end sym (vector-ref kernel-base sym)))
589 (begin
590 (set-cdr! x (list (+ i 1)))
591 (vector-set! kernel-end sym (cdr x)))))))
592 (loop (cdr isp)))))
593
594 (set! nshifts (length shift-symbol)))
595
596
597
598 (define (get-state sym)
599 (let* ((isp (vector-ref kernel-base sym))
600 (n (length isp))
601 (key (let loop ((isp1 isp) (k 0))
602 (if (null? isp1)
603 (modulo k STATE-TABLE-SIZE)
604 (loop (cdr isp1) (+ k (car isp1))))))
605 (sp (vector-ref state-table key)))
606 (if (null? sp)
607 (let ((x (new-state sym)))
608 (vector-set! state-table key (list x))
609 (core-number x))
610 (let loop ((sp1 sp))
611 (if (and (= n (core-nitems (car sp1)))
612 (let loop2 ((i1 isp) (t (core-items (car sp1))))
613 (if (and (pair? i1)
614 (= (car i1)
615 (car t)))
616 (loop2 (cdr i1) (cdr t))
617 (null? i1))))
618 (core-number (car sp1))
619 (if (null? (cdr sp1))
620 (let ((x (new-state sym)))
621 (set-cdr! sp1 (list x))
622 (core-number x))
623 (loop (cdr sp1))))))))
624
625
626 (define (new-state sym)
627 (let* ((isp (vector-ref kernel-base sym))
628 (n (length isp))
629 (p (new-core)))
630 (set-core-number! p nstates)
631 (set-core-acc-sym! p sym)
632 (if (= sym nvars) (set! final-state nstates))
633 (set-core-nitems! p n)
634 (set-core-items! p isp)
635 (set-cdr! last-state (list p))
636 (set! last-state (cdr last-state))
637 (set! nstates (+ nstates 1))
638 p))
639
640
641 ; --
642
643 (define (append-states)
644 (set! shift-set
645 (let loop ((l (reverse shift-symbol)))
646 (if (null? l)
647 '()
648 (cons (get-state (car l)) (loop (cdr l)))))))
649
650 ; --
651
652 (define (save-shifts core)
653 (let ((p (new-shift)))
654 (set-shift-number! p (core-number core))
655 (set-shift-nshifts! p nshifts)
656 (set-shift-shifts! p shift-set)
657 (if last-shift
658 (begin
659 (set-cdr! last-shift (list p))
660 (set! last-shift (cdr last-shift)))
661 (begin
662 (set! first-shift (list p))
663 (set! last-shift first-shift)))))
664
665 (define (save-reductions core itemset)
666 (let ((rs (let loop ((l itemset))
667 (if (null? l)
668 '()
669 (let ((item (vector-ref ritem (car l))))
670 (if (< item 0)
671 (cons (- item) (loop (cdr l)))
672 (loop (cdr l))))))))
673 (if (pair? rs)
674 (let ((p (new-red)))
675 (set-red-number! p (core-number core))
676 (set-red-nreds! p (length rs))
677 (set-red-rules! p rs)
678 (if last-reduction
679 (begin
680 (set-cdr! last-reduction (list p))
681 (set! last-reduction (cdr last-reduction)))
682 (begin
683 (set! first-reduction (list p))
684 (set! last-reduction first-reduction)))))))
685
686
687 ; --
688
689 (define (lalr)
690 (set! token-set-size (+ 1 (quotient nterms (BITS-PER-WORD))))
691 (set-accessing-symbol)
692 (set-shift-table)
693 (set-reduction-table)
694 (set-max-rhs)
695 (initialize-LA)
696 (set-goto-map)
697 (initialize-F)
698 (build-relations)
699 (digraph includes)
700 (compute-lookaheads))
701
702 (define (set-accessing-symbol)
703 (set! acces-symbol (make-vector nstates #f))
704 (let loop ((l first-state))
705 (if (pair? l)
706 (let ((x (car l)))
707 (vector-set! acces-symbol (core-number x) (core-acc-sym x))
708 (loop (cdr l))))))
709
710 (define (set-shift-table)
711 (set! shift-table (make-vector nstates #f))
712 (let loop ((l first-shift))
713 (if (pair? l)
714 (let ((x (car l)))
715 (vector-set! shift-table (shift-number x) x)
716 (loop (cdr l))))))
717
718 (define (set-reduction-table)
719 (set! reduction-table (make-vector nstates #f))
720 (let loop ((l first-reduction))
721 (if (pair? l)
722 (let ((x (car l)))
723 (vector-set! reduction-table (red-number x) x)
724 (loop (cdr l))))))
725
726 (define (set-max-rhs)
727 (let loop ((p 0) (curmax 0) (length 0))
728 (let ((x (vector-ref ritem p)))
729 (if x
730 (if (>= x 0)
731 (loop (+ p 1) curmax (+ length 1))
732 (loop (+ p 1) (max curmax length) 0))
733 (set! maxrhs curmax)))))
734
735 (define (initialize-LA)
736 (define (last l)
737 (if (null? (cdr l))
738 (car l)
739 (last (cdr l))))
740
741 (set! consistent (make-vector nstates #f))
742 (set! lookaheads (make-vector (+ nstates 1) #f))
743
744 (let loop ((count 0) (i 0))
745 (if (< i nstates)
746 (begin
747 (vector-set! lookaheads i count)
748 (let ((rp (vector-ref reduction-table i))
749 (sp (vector-ref shift-table i)))
750 (if (and rp
751 (or (> (red-nreds rp) 1)
752 (and sp
753 (not
754 (< (vector-ref acces-symbol
755 (last (shift-shifts sp)))
756 nvars)))))
757 (loop (+ count (red-nreds rp)) (+ i 1))
758 (begin
759 (vector-set! consistent i #t)
760 (loop count (+ i 1))))))
761
762 (begin
763 (vector-set! lookaheads nstates count)
764 (let ((c (max count 1)))
765 (set! LA (make-vector c #f))
766 (do ((j 0 (+ j 1))) ((= j c)) (vector-set! LA j (new-set token-set-size)))
767 (set! LAruleno (make-vector c -1))
768 (set! lookback (make-vector c #f)))
769 (let loop ((i 0) (np 0))
770 (if (< i nstates)
771 (if (vector-ref consistent i)
772 (loop (+ i 1) np)
773 (let ((rp (vector-ref reduction-table i)))
774 (if rp
775 (let loop2 ((j (red-rules rp)) (np2 np))
776 (if (null? j)
777 (loop (+ i 1) np2)
778 (begin
779 (vector-set! LAruleno np2 (car j))
780 (loop2 (cdr j) (+ np2 1)))))
781 (loop (+ i 1) np))))))))))
782
783
784 (define (set-goto-map)
785 (set! goto-map (make-vector (+ nvars 1) 0))
786 (let ((temp-map (make-vector (+ nvars 1) 0)))
787 (let loop ((ng 0) (sp first-shift))
788 (if (pair? sp)
789 (let loop2 ((i (reverse (shift-shifts (car sp)))) (ng2 ng))
790 (if (pair? i)
791 (let ((symbol (vector-ref acces-symbol (car i))))
792 (if (< symbol nvars)
793 (begin
794 (vector-set! goto-map symbol
795 (+ 1 (vector-ref goto-map symbol)))
796 (loop2 (cdr i) (+ ng2 1)))
797 (loop2 (cdr i) ng2)))
798 (loop ng2 (cdr sp))))
799
800 (let loop ((k 0) (i 0))
801 (if (< i nvars)
802 (begin
803 (vector-set! temp-map i k)
804 (loop (+ k (vector-ref goto-map i)) (+ i 1)))
805
806 (begin
807 (do ((i 0 (+ i 1)))
808 ((>= i nvars))
809 (vector-set! goto-map i (vector-ref temp-map i)))
810
811 (set! ngotos ng)
812 (vector-set! goto-map nvars ngotos)
813 (vector-set! temp-map nvars ngotos)
814 (set! from-state (make-vector ngotos #f))
815 (set! to-state (make-vector ngotos #f))
816
817 (do ((sp first-shift (cdr sp)))
818 ((null? sp))
819 (let* ((x (car sp))
820 (state1 (shift-number x)))
821 (do ((i (shift-shifts x) (cdr i)))
822 ((null? i))
823 (let* ((state2 (car i))
824 (symbol (vector-ref acces-symbol state2)))
825 (if (< symbol nvars)
826 (let ((k (vector-ref temp-map symbol)))
827 (vector-set! temp-map symbol (+ k 1))
828 (vector-set! from-state k state1)
829 (vector-set! to-state k state2))))))))))))))
830
831
832 (define (map-goto state symbol)
833 (let loop ((low (vector-ref goto-map symbol))
834 (high (- (vector-ref goto-map (+ symbol 1)) 1)))
835 (if (> low high)
836 (begin
837 (display (list "Error in map-goto" state symbol)) (newline)
838 0)
839 (let* ((middle (quotient (+ low high) 2))
840 (s (vector-ref from-state middle)))
841 (cond
842 ((= s state)
843 middle)
844 ((< s state)
845 (loop (+ middle 1) high))
846 (else
847 (loop low (- middle 1))))))))
848
849
850 (define (initialize-F)
851 (set! F (make-vector ngotos #f))
852 (do ((i 0 (+ i 1))) ((= i ngotos)) (vector-set! F i (new-set token-set-size)))
853
854 (let ((reads (make-vector ngotos #f)))
855
856 (let loop ((i 0) (rowp 0))
857 (if (< i ngotos)
858 (let* ((rowf (vector-ref F rowp))
859 (stateno (vector-ref to-state i))
860 (sp (vector-ref shift-table stateno)))
861 (if sp
862 (let loop2 ((j (shift-shifts sp)) (edges '()))
863 (if (pair? j)
864 (let ((symbol (vector-ref acces-symbol (car j))))
865 (if (< symbol nvars)
866 (if (vector-ref nullable symbol)
867 (loop2 (cdr j) (cons (map-goto stateno symbol)
868 edges))
869 (loop2 (cdr j) edges))
870 (begin
871 (set-bit rowf (- symbol nvars))
872 (loop2 (cdr j) edges))))
873 (if (pair? edges)
874 (vector-set! reads i (reverse edges))))))
875 (loop (+ i 1) (+ rowp 1)))))
876 (digraph reads)))
877
878 (define (add-lookback-edge stateno ruleno gotono)
879 (let ((k (vector-ref lookaheads (+ stateno 1))))
880 (let loop ((found #f) (i (vector-ref lookaheads stateno)))
881 (if (and (not found) (< i k))
882 (if (= (vector-ref LAruleno i) ruleno)
883 (loop #t i)
884 (loop found (+ i 1)))
885
886 (if (not found)
887 (begin (display "Error in add-lookback-edge : ")
888 (display (list stateno ruleno gotono)) (newline))
889 (vector-set! lookback i
890 (cons gotono (vector-ref lookback i))))))))
891
892
893 (define (transpose r-arg n)
894 (let ((new-end (make-vector n #f))
895 (new-R (make-vector n #f)))
896 (do ((i 0 (+ i 1)))
897 ((= i n))
898 (let ((x (list 'bidon)))
899 (vector-set! new-R i x)
900 (vector-set! new-end i x)))
901 (do ((i 0 (+ i 1)))
902 ((= i n))
903 (let ((sp (vector-ref r-arg i)))
904 (if (pair? sp)
905 (let loop ((sp2 sp))
906 (if (pair? sp2)
907 (let* ((x (car sp2))
908 (y (vector-ref new-end x)))
909 (set-cdr! y (cons i (cdr y)))
910 (vector-set! new-end x (cdr y))
911 (loop (cdr sp2))))))))
912 (do ((i 0 (+ i 1)))
913 ((= i n))
914 (vector-set! new-R i (cdr (vector-ref new-R i))))
915
916 new-R))
917
918
919
920 (define (build-relations)
921
922 (define (get-state stateno symbol)
923 (let loop ((j (shift-shifts (vector-ref shift-table stateno)))
924 (stno stateno))
925 (if (null? j)
926 stno
927 (let ((st2 (car j)))
928 (if (= (vector-ref acces-symbol st2) symbol)
929 st2
930 (loop (cdr j) st2))))))
931
932 (set! includes (make-vector ngotos #f))
933 (do ((i 0 (+ i 1)))
934 ((= i ngotos))
935 (let ((state1 (vector-ref from-state i))
936 (symbol1 (vector-ref acces-symbol (vector-ref to-state i))))
937 (let loop ((rulep (vector-ref derives symbol1))
938 (edges '()))
939 (if (pair? rulep)
940 (let ((*rulep (car rulep)))
941 (let loop2 ((rp (vector-ref rrhs *rulep))
942 (stateno state1)
943 (states (list state1)))
944 (let ((*rp (vector-ref ritem rp)))
945 (if (> *rp 0)
946 (let ((st (get-state stateno *rp)))
947 (loop2 (+ rp 1) st (cons st states)))
948 (begin
949
950 (if (not (vector-ref consistent stateno))
951 (add-lookback-edge stateno *rulep i))
952
953 (let loop2 ((done #f)
954 (stp (cdr states))
955 (rp2 (- rp 1))
956 (edgp edges))
957 (if (not done)
958 (let ((*rp (vector-ref ritem rp2)))
959 (if (< -1 *rp nvars)
960 (loop2 (not (vector-ref nullable *rp))
961 (cdr stp)
962 (- rp2 1)
963 (cons (map-goto (car stp) *rp) edgp))
964 (loop2 #t stp rp2 edgp)))
965
966 (loop (cdr rulep) edgp))))))))
967 (vector-set! includes i edges)))))
968 (set! includes (transpose includes ngotos)))
969
970
971
972 (define (compute-lookaheads)
973 (let ((n (vector-ref lookaheads nstates)))
974 (let loop ((i 0))
975 (if (< i n)
976 (let loop2 ((sp (vector-ref lookback i)))
977 (if (pair? sp)
978 (let ((LA-i (vector-ref LA i))
979 (F-j (vector-ref F (car sp))))
980 (bit-union LA-i F-j token-set-size)
981 (loop2 (cdr sp)))
982 (loop (+ i 1))))))))
983
984
985
986 (define (digraph relation)
987 (define infinity (+ ngotos 2))
988 (define INDEX (make-vector (+ ngotos 1) 0))
989 (define VERTICES (make-vector (+ ngotos 1) 0))
990 (define top 0)
991 (define R relation)
992
993 (define (traverse i)
994 (set! top (+ 1 top))
995 (vector-set! VERTICES top i)
996 (let ((height top))
997 (vector-set! INDEX i height)
998 (let ((rp (vector-ref R i)))
999 (if (pair? rp)
1000 (let loop ((rp2 rp))
1001 (if (pair? rp2)
1002 (let ((j (car rp2)))
1003 (if (= 0 (vector-ref INDEX j))
1004 (traverse j))
1005 (if (> (vector-ref INDEX i)
1006 (vector-ref INDEX j))
1007 (vector-set! INDEX i (vector-ref INDEX j)))
1008 (let ((F-i (vector-ref F i))
1009 (F-j (vector-ref F j)))
1010 (bit-union F-i F-j token-set-size))
1011 (loop (cdr rp2))))))
1012 (if (= (vector-ref INDEX i) height)
1013 (let loop ()
1014 (let ((j (vector-ref VERTICES top)))
1015 (set! top (- top 1))
1016 (vector-set! INDEX j infinity)
1017 (if (not (= i j))
1018 (begin
1019 (bit-union (vector-ref F i)
1020 (vector-ref F j)
1021 token-set-size)
1022 (loop)))))))))
1023
1024 (let loop ((i 0))
1025 (if (< i ngotos)
1026 (begin
1027 (if (and (= 0 (vector-ref INDEX i))
1028 (pair? (vector-ref R i)))
1029 (traverse i))
1030 (loop (+ i 1))))))
1031
1032
1033 ;; ----------------------------------------------------------------------
1034 ;; operator precedence management
1035 ;; ----------------------------------------------------------------------
1036
1037 ;; a vector of precedence descriptors where each element
1038 ;; is of the form (terminal type precedence)
1039 (define the-terminals/prec #f) ; terminal symbols with precedence
1040 ; the precedence is an integer >= 0
1041 (define (get-symbol-precedence sym)
1042 (caddr (vector-ref the-terminals/prec sym)))
1043 ; the operator type is either 'none, 'left, 'right, or 'nonassoc
1044 (define (get-symbol-assoc sym)
1045 (cadr (vector-ref the-terminals/prec sym)))
1046
1047 (define rule-precedences '())
1048 (define (add-rule-precedence! rule sym)
1049 (set! rule-precedences
1050 (cons (cons rule sym) rule-precedences)))
1051
1052 (define (get-rule-precedence ruleno)
1053 (cond
1054 ((assq ruleno rule-precedences)
1055 => (lambda (p)
1056 (get-symbol-precedence (cdr p))))
1057 (else
1058 ;; process the rule symbols from left to right
1059 (let loop ((i (vector-ref rrhs ruleno))
1060 (prec 0))
1061 (let ((item (vector-ref ritem i)))
1062 ;; end of rule
1063 (if (< item 0)
1064 prec
1065 (let ((i1 (+ i 1)))
1066 (if (>= item nvars)
1067 ;; it's a terminal symbol
1068 (loop i1 (get-symbol-precedence (- item nvars)))
1069 (loop i1 prec)))))))))
1070
1071 ;; ----------------------------------------------------------------------
1072 ;; Build the various tables
1073 ;; ----------------------------------------------------------------------
1074
1075 (define expected-conflicts 0)
1076
1077 (define (build-tables)
1078
1079 (define (resolve-conflict sym rule)
1080 (let ((sym-prec (get-symbol-precedence sym))
1081 (sym-assoc (get-symbol-assoc sym))
1082 (rule-prec (get-rule-precedence rule)))
1083 (cond
1084 ((> sym-prec rule-prec) 'shift)
1085 ((< sym-prec rule-prec) 'reduce)
1086 ((eq? sym-assoc 'left) 'reduce)
1087 ((eq? sym-assoc 'right) 'shift)
1088 (else 'none))))
1089
1090 (define conflict-messages '())
1091
1092 (define (add-conflict-message . l)
1093 (set! conflict-messages (cons l conflict-messages)))
1094
1095 (define (log-conflicts)
1096 (if (> (length conflict-messages) expected-conflicts)
1097 (for-each
1098 (lambda (message)
1099 (for-each display message)
1100 (newline))
1101 conflict-messages)))
1102
1103 ;; --- Add an action to the action table
1104 (define (add-action state symbol new-action)
1105 (let* ((state-actions (vector-ref action-table state))
1106 (actions (assv symbol state-actions)))
1107 (if (pair? actions)
1108 (let ((current-action (cadr actions)))
1109 (if (not (= new-action current-action))
1110 ;; -- there is a conflict
1111 (begin
1112 (if (and (<= current-action 0) (<= new-action 0))
1113 ;; --- reduce/reduce conflict
1114 (begin
1115 (add-conflict-message
1116 "%% Reduce/Reduce conflict (reduce " (- new-action) ", reduce " (- current-action)
1117 ") on '" (get-symbol (+ symbol nvars)) "' in state " state)
1118 (if (glr-driver?)
1119 (set-cdr! (cdr actions) (cons new-action (cddr actions)))
1120 (set-car! (cdr actions) (max current-action new-action))))
1121 ;; --- shift/reduce conflict
1122 ;; can we resolve the conflict using precedences?
1123 (case (resolve-conflict symbol (- current-action))
1124 ;; -- shift
1125 ((shift) (if (glr-driver?)
1126 (set-cdr! (cdr actions) (cons new-action (cddr actions)))
1127 (set-car! (cdr actions) new-action)))
1128 ;; -- reduce
1129 ((reduce) #f) ; well, nothing to do...
1130 ;; -- signal a conflict!
1131 (else (add-conflict-message
1132 "%% Shift/Reduce conflict (shift " new-action ", reduce " (- current-action)
1133 ") on '" (get-symbol (+ symbol nvars)) "' in state " state)
1134 (if (glr-driver?)
1135 (set-cdr! (cdr actions) (cons new-action (cddr actions)))
1136 (set-car! (cdr actions) new-action))))))))
1137
1138 (vector-set! action-table state (cons (list symbol new-action) state-actions)))
1139 ))
1140
1141 (define (add-action-for-all-terminals state action)
1142 (do ((i 1 (+ i 1)))
1143 ((= i nterms))
1144 (add-action state i action)))
1145
1146 (set! action-table (make-vector nstates '()))
1147
1148 (do ((i 0 (+ i 1))) ; i = state
1149 ((= i nstates))
1150 (let ((red (vector-ref reduction-table i)))
1151 (if (and red (>= (red-nreds red) 1))
1152 (if (and (= (red-nreds red) 1) (vector-ref consistent i))
1153 (if (glr-driver?)
1154 (add-action-for-all-terminals i (- (car (red-rules red))))
1155 (add-action i 'default (- (car (red-rules red)))))
1156 (let ((k (vector-ref lookaheads (+ i 1))))
1157 (let loop ((j (vector-ref lookaheads i)))
1158 (if (< j k)
1159 (let ((rule (- (vector-ref LAruleno j)))
1160 (lav (vector-ref LA j)))
1161 (let loop2 ((token 0) (x (vector-ref lav 0)) (y 1) (z 0))
1162 (if (< token nterms)
1163 (begin
1164 (let ((in-la-set? (modulo x 2)))
1165 (if (= in-la-set? 1)
1166 (add-action i token rule)))
1167 (if (= y (BITS-PER-WORD))
1168 (loop2 (+ token 1)
1169 (vector-ref lav (+ z 1))
1170 1
1171 (+ z 1))
1172 (loop2 (+ token 1) (quotient x 2) (+ y 1) z)))))
1173 (loop (+ j 1)))))))))
1174
1175 (let ((shiftp (vector-ref shift-table i)))
1176 (if shiftp
1177 (let loop ((k (shift-shifts shiftp)))
1178 (if (pair? k)
1179 (let* ((state (car k))
1180 (symbol (vector-ref acces-symbol state)))
1181 (if (>= symbol nvars)
1182 (add-action i (- symbol nvars) state))
1183 (loop (cdr k))))))))
1184
1185 (add-action final-state 0 'accept)
1186 (log-conflicts))
1187
1188 (define (compact-action-table terms)
1189 (define (most-common-action acts)
1190 (let ((accums '()))
1191 (let loop ((l acts))
1192 (if (pair? l)
1193 (let* ((x (cadar l))
1194 (y (assv x accums)))
1195 (if (and (number? x) (< x 0))
1196 (if y
1197 (set-cdr! y (+ 1 (cdr y)))
1198 (set! accums (cons `(,x . 1) accums))))
1199 (loop (cdr l)))))
1200
1201 (let loop ((l accums) (max 0) (sym #f))
1202 (if (null? l)
1203 sym
1204 (let ((x (car l)))
1205 (if (> (cdr x) max)
1206 (loop (cdr l) (cdr x) (car x))
1207 (loop (cdr l) max sym)))))))
1208
1209 (define (translate-terms acts)
1210 (map (lambda (act)
1211 (cons (list-ref terms (car act))
1212 (cdr act)))
1213 acts))
1214
1215 (do ((i 0 (+ i 1)))
1216 ((= i nstates))
1217 (let ((acts (vector-ref action-table i)))
1218 (if (vector? (vector-ref reduction-table i))
1219 (let ((act (most-common-action acts)))
1220 (vector-set! action-table i
1221 (cons `(*default* ,(if act act '*error*))
1222 (translate-terms
1223 (lalr-filter (lambda (x)
1224 (not (and (= (length x) 2)
1225 (eq? (cadr x) act))))
1226 acts)))))
1227 (vector-set! action-table i
1228 (cons `(*default* *error*)
1229 (translate-terms acts)))))))
1230
1231
1232
1233 ;; --
1234
1235 (define (rewrite-grammar tokens grammar k)
1236
1237 (define eoi '*eoi*)
1238
1239 (define (check-terminal term terms)
1240 (cond
1241 ((not (valid-terminal? term))
1242 (lalr-error "invalid terminal: " term))
1243 ((member term terms)
1244 (lalr-error "duplicate definition of terminal: " term))))
1245
1246 (define (prec->type prec)
1247 (cdr (assq prec '((left: . left)
1248 (right: . right)
1249 (nonassoc: . nonassoc)))))
1250
1251 (cond
1252 ;; --- a few error conditions
1253 ((not (list? tokens))
1254 (lalr-error "Invalid token list: " tokens))
1255 ((not (pair? grammar))
1256 (lalr-error "Grammar definition must have a non-empty list of productions" '()))
1257
1258 (else
1259 ;; --- check the terminals
1260 (let loop1 ((lst tokens)
1261 (rev-terms '())
1262 (rev-terms/prec '())
1263 (prec-level 0))
1264 (if (pair? lst)
1265 (let ((term (car lst)))
1266 (cond
1267 ((pair? term)
1268 (if (and (memq (car term) '(left: right: nonassoc:))
1269 (not (null? (cdr term))))
1270 (let ((prec (+ prec-level 1))
1271 (optype (prec->type (car term))))
1272 (let loop-toks ((l (cdr term))
1273 (rev-terms rev-terms)
1274 (rev-terms/prec rev-terms/prec))
1275 (if (null? l)
1276 (loop1 (cdr lst) rev-terms rev-terms/prec prec)
1277 (let ((term (car l)))
1278 (check-terminal term rev-terms)
1279 (loop-toks
1280 (cdr l)
1281 (cons term rev-terms)
1282 (cons (list term optype prec) rev-terms/prec))))))
1283
1284 (lalr-error "invalid operator precedence specification: " term)))
1285
1286 (else
1287 (check-terminal term rev-terms)
1288 (loop1 (cdr lst)
1289 (cons term rev-terms)
1290 (cons (list term 'none 0) rev-terms/prec)
1291 prec-level))))
1292
1293 ;; --- check the grammar rules
1294 (let loop2 ((lst grammar) (rev-nonterm-defs '()))
1295 (if (pair? lst)
1296 (let ((def (car lst)))
1297 (if (not (pair? def))
1298 (lalr-error "Nonterminal definition must be a non-empty list" '())
1299 (let ((nonterm (car def)))
1300 (cond ((not (valid-nonterminal? nonterm))
1301 (lalr-error "Invalid nonterminal:" nonterm))
1302 ((or (member nonterm rev-terms)
1303 (assoc nonterm rev-nonterm-defs))
1304 (lalr-error "Nonterminal previously defined:" nonterm))
1305 (else
1306 (loop2 (cdr lst)
1307 (cons def rev-nonterm-defs)))))))
1308 (let* ((terms (cons eoi (cons 'error (reverse rev-terms))))
1309 (terms/prec (cons '(eoi none 0) (cons '(error none 0) (reverse rev-terms/prec))))
1310 (nonterm-defs (reverse rev-nonterm-defs))
1311 (nonterms (cons '*start* (map car nonterm-defs))))
1312 (if (= (length nonterms) 1)
1313 (lalr-error "Grammar must contain at least one nonterminal" '())
1314 (let loop-defs ((defs (cons `(*start* (,(cadr nonterms) ,eoi) : $1)
1315 nonterm-defs))
1316 (ruleno 0)
1317 (comp-defs '()))
1318 (if (pair? defs)
1319 (let* ((nonterm-def (car defs))
1320 (compiled-def (rewrite-nonterm-def
1321 nonterm-def
1322 ruleno
1323 terms nonterms)))
1324 (loop-defs (cdr defs)
1325 (+ ruleno (length compiled-def))
1326 (cons compiled-def comp-defs)))
1327
1328 (let ((compiled-nonterm-defs (reverse comp-defs)))
1329 (k terms
1330 terms/prec
1331 nonterms
1332 (map (lambda (x) (cons (caaar x) (map cdar x)))
1333 compiled-nonterm-defs)
1334 (apply append compiled-nonterm-defs))))))))))))))
1335
1336
1337 (define (rewrite-nonterm-def nonterm-def ruleno terms nonterms)
1338
1339 (define No-NT (length nonterms))
1340
1341 (define (encode x)
1342 (let ((PosInNT (pos-in-list x nonterms)))
1343 (if PosInNT
1344 PosInNT
1345 (let ((PosInT (pos-in-list x terms)))
1346 (if PosInT
1347 (+ No-NT PosInT)
1348 (lalr-error "undefined symbol : " x))))))
1349
1350 (define (process-prec-directive rhs ruleno)
1351 (let loop ((l rhs))
1352 (if (null? l)
1353 '()
1354 (let ((first (car l))
1355 (rest (cdr l)))
1356 (cond
1357 ((or (member first terms) (member first nonterms))
1358 (cons first (loop rest)))
1359 ((and (pair? first)
1360 (eq? (car first) 'prec:))
1361 (if (and (pair? (cdr first))
1362 (null? (cddr first))
1363 (member (cadr first) terms))
1364 (if (null? rest)
1365 (begin
1366 (add-rule-precedence! ruleno (pos-in-list (cadr first) terms))
1367 (loop rest))
1368 (lalr-error "prec: directive should be at end of rule: " rhs))
1369 (lalr-error "Invalid prec: directive: " first)))
1370 (else
1371 (lalr-error "Invalid terminal or nonterminal: " first)))))))
1372
1373 (define (check-error-production rhs)
1374 (let loop ((rhs rhs))
1375 (if (pair? rhs)
1376 (begin
1377 (if (and (eq? (car rhs) 'error)
1378 (or (null? (cdr rhs))
1379 (not (member (cadr rhs) terms))
1380 (not (null? (cddr rhs)))))
1381 (lalr-error "Invalid 'error' production. A single terminal symbol must follow the 'error' token.:" rhs))
1382 (loop (cdr rhs))))))
1383
1384
1385 (if (not (pair? (cdr nonterm-def)))
1386 (lalr-error "At least one production needed for nonterminal:" (car nonterm-def))
1387 (let ((name (symbol->string (car nonterm-def))))
1388 (let loop1 ((lst (cdr nonterm-def))
1389 (i 1)
1390 (rev-productions-and-actions '()))
1391 (if (not (pair? lst))
1392 (reverse rev-productions-and-actions)
1393 (let* ((rhs (process-prec-directive (car lst) (+ ruleno i -1)))
1394 (rest (cdr lst))
1395 (prod (map encode (cons (car nonterm-def) rhs))))
1396 ;; -- check for undefined tokens
1397 (for-each (lambda (x)
1398 (if (not (or (member x terms) (member x nonterms)))
1399 (lalr-error "Invalid terminal or nonterminal:" x)))
1400 rhs)
1401 ;; -- check 'error' productions
1402 (check-error-production rhs)
1403
1404 (if (and (pair? rest)
1405 (eq? (car rest) ':)
1406 (pair? (cdr rest)))
1407 (loop1 (cddr rest)
1408 (+ i 1)
1409 (cons (cons prod (cadr rest))
1410 rev-productions-and-actions))
1411 (let* ((rhs-length (length rhs))
1412 (action
1413 (cons 'vector
1414 (cons (list 'quote (string->symbol
1415 (string-append
1416 name
1417 "-"
1418 (number->string i))))
1419 (let loop-j ((j 1))
1420 (if (> j rhs-length)
1421 '()
1422 (cons (string->symbol
1423 (string-append
1424 "$"
1425 (number->string j)))
1426 (loop-j (+ j 1)))))))))
1427 (loop1 rest
1428 (+ i 1)
1429 (cons (cons prod action)
1430 rev-productions-and-actions))))))))))
1431
1432 (define (valid-nonterminal? x)
1433 (symbol? x))
1434
1435 (define (valid-terminal? x)
1436 (symbol? x)) ; DB
1437
1438 ;; ----------------------------------------------------------------------
1439 ;; Miscellaneous
1440 ;; ----------------------------------------------------------------------
1441 (define (pos-in-list x lst)
1442 (let loop ((lst lst) (i 0))
1443 (cond ((not (pair? lst)) #f)
1444 ((equal? (car lst) x) i)
1445 (else (loop (cdr lst) (+ i 1))))))
1446
1447 (define (sunion lst1 lst2) ; union of sorted lists
1448 (let loop ((L1 lst1)
1449 (L2 lst2))
1450 (cond ((null? L1) L2)
1451 ((null? L2) L1)
1452 (else
1453 (let ((x (car L1)) (y (car L2)))
1454 (cond
1455 ((> x y)
1456 (cons y (loop L1 (cdr L2))))
1457 ((< x y)
1458 (cons x (loop (cdr L1) L2)))
1459 (else
1460 (loop (cdr L1) L2))
1461 ))))))
1462
1463 (define (sinsert elem lst)
1464 (let loop ((l1 lst))
1465 (if (null? l1)
1466 (cons elem l1)
1467 (let ((x (car l1)))
1468 (cond ((< elem x)
1469 (cons elem l1))
1470 ((> elem x)
1471 (cons x (loop (cdr l1))))
1472 (else
1473 l1))))))
1474
1475 (define (lalr-filter p lst)
1476 (let loop ((l lst))
1477 (if (null? l)
1478 '()
1479 (let ((x (car l)) (y (cdr l)))
1480 (if (p x)
1481 (cons x (loop y))
1482 (loop y))))))
1483
1484 ;; ----------------------------------------------------------------------
1485 ;; Debugging tools ...
1486 ;; ----------------------------------------------------------------------
1487 (define the-terminals #f) ; names of terminal symbols
1488 (define the-nonterminals #f) ; non-terminals
1489
1490 (define (print-item item-no)
1491 (let loop ((i item-no))
1492 (let ((v (vector-ref ritem i)))
1493 (if (>= v 0)
1494 (loop (+ i 1))
1495 (let* ((rlno (- v))
1496 (nt (vector-ref rlhs rlno)))
1497 (display (vector-ref the-nonterminals nt)) (display " --> ")
1498 (let loop ((i (vector-ref rrhs rlno)))
1499 (let ((v (vector-ref ritem i)))
1500 (if (= i item-no)
1501 (display ". "))
1502 (if (>= v 0)
1503 (begin
1504 (display (get-symbol v))
1505 (display " ")
1506 (loop (+ i 1)))
1507 (begin
1508 (display " (rule ")
1509 (display (- v))
1510 (display ")")
1511 (newline))))))))))
1512
1513 (define (get-symbol n)
1514 (if (>= n nvars)
1515 (vector-ref the-terminals (- n nvars))
1516 (vector-ref the-nonterminals n)))
1517
1518
1519 (define (print-states)
1520 (define (print-action act)
1521 (cond
1522 ((eq? act '*error*)
1523 (display " : Error"))
1524 ((eq? act 'accept)
1525 (display " : Accept input"))
1526 ((< act 0)
1527 (display " : reduce using rule ")
1528 (display (- act)))
1529 (else
1530 (display " : shift and goto state ")
1531 (display act)))
1532 (newline)
1533 #t)
1534
1535 (define (print-actions acts)
1536 (let loop ((l acts))
1537 (if (null? l)
1538 #t
1539 (let ((sym (caar l))
1540 (act (cadar l)))
1541 (display " ")
1542 (cond
1543 ((eq? sym 'default)
1544 (display "default action"))
1545 (else
1546 (if (number? sym)
1547 (display (get-symbol (+ sym nvars)))
1548 (display sym))))
1549 (print-action act)
1550 (loop (cdr l))))))
1551
1552 (if (not action-table)
1553 (begin
1554 (display "No generated parser available!")
1555 (newline)
1556 #f)
1557 (begin
1558 (display "State table") (newline)
1559 (display "-----------") (newline) (newline)
1560
1561 (let loop ((l first-state))
1562 (if (null? l)
1563 #t
1564 (let* ((core (car l))
1565 (i (core-number core))
1566 (items (core-items core))
1567 (actions (vector-ref action-table i)))
1568 (display "state ") (display i) (newline)
1569 (newline)
1570 (for-each (lambda (x) (display " ") (print-item x))
1571 items)
1572 (newline)
1573 (print-actions actions)
1574 (newline)
1575 (loop (cdr l))))))))
1576
1577
1578
1579 ;; ----------------------------------------------------------------------
1580
1581 (define build-goto-table
1582 (lambda ()
1583 `(vector
1584 ,@(map
1585 (lambda (shifts)
1586 (list 'quote
1587 (if shifts
1588 (let loop ((l (shift-shifts shifts)))
1589 (if (null? l)
1590 '()
1591 (let* ((state (car l))
1592 (symbol (vector-ref acces-symbol state)))
1593 (if (< symbol nvars)
1594 (cons `(,symbol . ,state)
1595 (loop (cdr l)))
1596 (loop (cdr l))))))
1597 '())))
1598 (vector->list shift-table)))))
1599
1600
1601 (define build-reduction-table
1602 (lambda (gram/actions)
1603 `(vector
1604 '()
1605 ,@(map
1606 (lambda (p)
1607 (let ((act (cdr p)))
1608 `(lambda ,(if (eq? driver-name 'lr-driver)
1609 '(___stack ___sp ___goto-table ___push yypushback)
1610 '(___sp ___goto-table ___push))
1611 ,(let* ((nt (caar p)) (rhs (cdar p)) (n (length rhs)))
1612 `(let* (,@(if act
1613 (let loop ((i 1) (l rhs))
1614 (if (pair? l)
1615 (let ((rest (cdr l))
1616 (ns (number->string (+ (- n i) 1))))
1617 (cons
1618 `(tok ,(if (eq? driver-name 'lr-driver)
1619 `(vector-ref ___stack (- ___sp ,(- (* i 2) 1)))
1620 `(list-ref ___sp ,(+ (* (- i 1) 2) 1))))
1621 (cons
1622 `(,(string->symbol (string-append "$" ns))
1623 (if (lexical-token? tok) (lexical-token-value tok) tok))
1624 (cons
1625 `(,(string->symbol (string-append "@" ns))
1626 (if (lexical-token? tok) (lexical-token-source tok) tok))
1627 (loop (+ i 1) rest)))))
1628 '()))
1629 '()))
1630 ,(if (= nt 0)
1631 '$1
1632 `(___push ,n ,nt ,(cdr p) ,@(if (eq? driver-name 'lr-driver) '() '(___sp))
1633 ,(if (eq? driver-name 'lr-driver)
1634 `(vector-ref ___stack (- ___sp ,(length rhs)))
1635 `(list-ref ___sp ,(length rhs))))))))))
1636
1637 gram/actions))))
1638
1639
1640
1641 ;; Options
1642
1643 (define *valid-options*
1644 (list
1645 (cons 'out-table:
1646 (lambda (option)
1647 (and (list? option)
1648 (= (length option) 2)
1649 (string? (cadr option)))))
1650 (cons 'output:
1651 (lambda (option)
1652 (and (list? option)
1653 (= (length option) 3)
1654 (symbol? (cadr option))
1655 (string? (caddr option)))))
1656 (cons 'expect:
1657 (lambda (option)
1658 (and (list? option)
1659 (= (length option) 2)
1660 (integer? (cadr option))
1661 (>= (cadr option) 0))))
1662
1663 (cons 'driver:
1664 (lambda (option)
1665 (and (list? option)
1666 (= (length option) 2)
1667 (symbol? (cadr option))
1668 (memq (cadr option) '(lr glr)))))))
1669
1670
1671 (define (validate-options options)
1672 (for-each
1673 (lambda (option)
1674 (let ((p (assoc (car option) *valid-options*)))
1675 (if (or (not p)
1676 (not ((cdr p) option)))
1677 (lalr-error "Invalid option:" option))))
1678 options))
1679
1680
1681 (define (output-parser! options code)
1682 (let ((option (assq 'output: options)))
1683 (if option
1684 (let ((parser-name (cadr option))
1685 (file-name (caddr option)))
1686 (with-output-to-file file-name
1687 (lambda ()
1688 (pprint `(define ,parser-name ,code))
1689 (newline)))))))
1690
1691
1692 (define (output-table! options)
1693 (let ((option (assq 'out-table: options)))
1694 (if option
1695 (let ((file-name (cadr option)))
1696 (with-output-to-file file-name print-states)))))
1697
1698
1699 (define (set-expected-conflicts! options)
1700 (let ((option (assq 'expect: options)))
1701 (set! expected-conflicts (if option (cadr option) 0))))
1702
1703 (define (set-driver-name! options)
1704 (let ((option (assq 'driver: options)))
1705 (if option
1706 (let ((driver-type (cadr option)))
1707 (set! driver-name (if (eq? driver-type 'glr) 'glr-driver 'lr-driver))))))
1708
1709
1710 ;; -- arguments
1711
1712 (define (extract-arguments lst proc)
1713 (let loop ((options '())
1714 (tokens '())
1715 (rules '())
1716 (lst lst))
1717 (if (pair? lst)
1718 (let ((p (car lst)))
1719 (cond
1720 ((and (pair? p)
1721 (lalr-keyword? (car p))
1722 (assq (car p) *valid-options*))
1723 (loop (cons p options) tokens rules (cdr lst)))
1724 (else
1725 (proc options p (cdr lst)))))
1726 (lalr-error "Malformed lalr-parser form" lst))))
1727
1728
1729 (define (build-driver options tokens rules)
1730 (validate-options options)
1731 (set-expected-conflicts! options)
1732 (set-driver-name! options)
1733 (let* ((gram/actions (gen-tables! tokens rules))
1734 (code `(,driver-name ',action-table ,(build-goto-table) ,(build-reduction-table gram/actions))))
1735
1736 (output-table! options)
1737 (output-parser! options code)
1738 code))
1739
1740 (extract-arguments arguments build-driver))
1741
1742
1743
1744 ;;;
1745 ;;;; --
1746 ;;;; Implementation of the lr-driver
1747 ;;;
1748
1749
1750 (cond-expand
1751 (gambit
1752 (declare
1753 (standard-bindings)
1754 (fixnum)
1755 (block)
1756 (not safe)))
1757 (chicken
1758 (declare
1759 (uses extras)
1760 (usual-integrations)
1761 (fixnum)
1762 (not safe)))
1763 (else))
1764
1765
1766 ;;;
1767 ;;;; Source location utilities
1768 ;;;
1769
1770
1771 ;; This function assumes that src-location-1 and src-location-2 are source-locations
1772 ;; Returns #f if they are not locations for the same input
1773 (define (combine-locations src-location-1 src-location-2)
1774 (let ((offset-1 (source-location-offset src-location-1))
1775 (offset-2 (source-location-offset src-location-2))
1776 (length-1 (source-location-length src-location-1))
1777 (length-2 (source-location-length src-location-2)))
1778
1779 (cond ((not (equal? (source-location-input src-location-1)
1780 (source-location-input src-location-2)))
1781 #f)
1782 ((or (not (number? offset-1)) (not (number? offset-2))
1783 (not (number? length-1)) (not (number? length-2))
1784 (< offset-1 0) (< offset-2 0)
1785 (< length-1 0) (< length-2 0))
1786 (make-source-location (source-location-input src-location-1)
1787 (source-location-line src-location-1)
1788 (source-location-column src-location-1)
1789 -1 -1))
1790 ((<= offset-1 offset-2)
1791 (make-source-location (source-location-input src-location-1)
1792 (source-location-line src-location-1)
1793 (source-location-column src-location-1)
1794 offset-1
1795 (- (+ offset-2 length-2) offset-1)))
1796 (else
1797 (make-source-location (source-location-input src-location-1)
1798 (source-location-line src-location-1)
1799 (source-location-column src-location-1)
1800 offset-2
1801 (- (+ offset-1 length-1) offset-2))))))
1802
1803
1804 ;;;
1805 ;;;; LR-driver
1806 ;;;
1807
1808
1809 (define *max-stack-size* 500)
1810
1811 (define (lr-driver action-table goto-table reduction-table)
1812 (define ___atable action-table)
1813 (define ___gtable goto-table)
1814 (define ___rtable reduction-table)
1815
1816 (define ___lexerp #f)
1817 (define ___errorp #f)
1818
1819 (define ___stack #f)
1820 (define ___sp 0)
1821
1822 (define ___curr-input #f)
1823 (define ___reuse-input #f)
1824
1825 (define ___input #f)
1826 (define (___consume)
1827 (set! ___input (if ___reuse-input ___curr-input (___lexerp)))
1828 (set! ___reuse-input #f)
1829 (set! ___curr-input ___input))
1830
1831 (define (___pushback)
1832 (set! ___reuse-input #t))
1833
1834 (define (___initstack)
1835 (set! ___stack (make-vector *max-stack-size* 0))
1836 (set! ___sp 0))
1837
1838 (define (___growstack)
1839 (let ((new-stack (make-vector (* 2 (vector-length ___stack)) 0)))
1840 (let loop ((i (- (vector-length ___stack) 1)))
1841 (if (>= i 0)
1842 (begin
1843 (vector-set! new-stack i (vector-ref ___stack i))
1844 (loop (- i 1)))))
1845 (set! ___stack new-stack)))
1846
1847 (define (___checkstack)
1848 (if (>= ___sp (vector-length ___stack))
1849 (___growstack)))
1850
1851 (define (___push delta new-category lvalue tok)
1852 (set! ___sp (- ___sp (* delta 2)))
1853 (let* ((state (vector-ref ___stack ___sp))
1854 (new-state (cdr (assoc new-category (vector-ref ___gtable state)))))
1855 (set! ___sp (+ ___sp 2))
1856 (___checkstack)
1857 (vector-set! ___stack ___sp new-state)
1858 (vector-set! ___stack (- ___sp 1) (note-source-location lvalue tok))))
1859
1860 (define (___reduce st)
1861 ((vector-ref ___rtable st) ___stack ___sp ___gtable ___push ___pushback))
1862
1863 (define (___shift token attribute)
1864 (set! ___sp (+ ___sp 2))
1865 (___checkstack)
1866 (vector-set! ___stack (- ___sp 1) attribute)
1867 (vector-set! ___stack ___sp token))
1868
1869 (define (___action x l)
1870 (let ((y (assoc x l)))
1871 (if y (cadr y) (cadar l))))
1872
1873 (define (___recover tok)
1874 (let find-state ((sp ___sp))
1875 (if (< sp 0)
1876 (set! ___sp sp)
1877 (let* ((state (vector-ref ___stack sp))
1878 (act (assoc 'error (vector-ref ___atable state))))
1879 (if act
1880 (begin
1881 (set! ___sp sp)
1882 (___sync (cadr act) tok))
1883 (find-state (- sp 2)))))))
1884
1885 (define (___sync state tok)
1886 (let ((sync-set (map car (cdr (vector-ref ___atable state)))))
1887 (set! ___sp (+ ___sp 4))
1888 (___checkstack)
1889 (vector-set! ___stack (- ___sp 3) #f)
1890 (vector-set! ___stack (- ___sp 2) state)
1891 (let skip ()
1892 (let ((i (___category ___input)))
1893 (if (eq? i '*eoi*)
1894 (set! ___sp -1)
1895 (if (memq i sync-set)
1896 (let ((act (assoc i (vector-ref ___atable state))))
1897 (vector-set! ___stack (- ___sp 1) #f)
1898 (vector-set! ___stack ___sp (cadr act)))
1899 (begin
1900 (___consume)
1901 (skip))))))))
1902
1903 (define (___category tok)
1904 (if (lexical-token? tok)
1905 (lexical-token-category tok)
1906 tok))
1907
1908 (define (___run)
1909 (let loop ()
1910 (if ___input
1911 (let* ((state (vector-ref ___stack ___sp))
1912 (i (___category ___input))
1913 (act (___action i (vector-ref ___atable state))))
1914
1915 (cond ((not (symbol? i))
1916 (___errorp "Syntax error: invalid token: " ___input)
1917 #f)
1918
1919 ;; Input succesfully parsed
1920 ((eq? act 'accept)
1921 (vector-ref ___stack 1))
1922
1923 ;; Syntax error in input
1924 ((eq? act '*error*)
1925 (if (eq? i '*eoi*)
1926 (begin
1927 (___errorp "Syntax error: unexpected end of input")
1928 #f)
1929 (begin
1930 (___errorp "Syntax error: unexpected token : " ___input)
1931 (___recover i)
1932 (if (>= ___sp 0)
1933 (set! ___input #f)
1934 (begin
1935 (set! ___sp 0)
1936 (set! ___input '*eoi*)))
1937 (loop))))
1938
1939 ;; Shift current token on top of the stack
1940 ((>= act 0)
1941 (___shift act ___input)
1942 (set! ___input (if (eq? i '*eoi*) '*eoi* #f))
1943 (loop))
1944
1945 ;; Reduce by rule (- act)
1946 (else
1947 (___reduce (- act))
1948 (loop))))
1949
1950 ;; no lookahead, so check if there is a default action
1951 ;; that does not require the lookahead
1952 (let* ((state (vector-ref ___stack ___sp))
1953 (acts (vector-ref ___atable state))
1954 (defact (if (pair? acts) (cadar acts) #f)))
1955 (if (and (= 1 (length acts)) (< defact 0))
1956 (___reduce (- defact))
1957 (___consume))
1958 (loop)))))
1959
1960
1961 (lambda (lexerp errorp)
1962 (set! ___errorp errorp)
1963 (set! ___lexerp lexerp)
1964 (___initstack)
1965 (___run)))
1966
1967
1968 ;;;
1969 ;;;; Simple-minded GLR-driver
1970 ;;;
1971
1972
1973 (define (glr-driver action-table goto-table reduction-table)
1974 (define ___atable action-table)
1975 (define ___gtable goto-table)
1976 (define ___rtable reduction-table)
1977
1978 (define ___lexerp #f)
1979 (define ___errorp #f)
1980
1981 ;; -- Input handling
1982
1983 (define *input* #f)
1984 (define (initialize-lexer lexer)
1985 (set! ___lexerp lexer)
1986 (set! *input* #f))
1987 (define (consume)
1988 (set! *input* (___lexerp)))
1989
1990 (define (token-category tok)
1991 (if (lexical-token? tok)
1992 (lexical-token-category tok)
1993 tok))
1994
1995 (define (token-attribute tok)
1996 (if (lexical-token? tok)
1997 (lexical-token-value tok)
1998 tok))
1999
2000 ;; -- Processes (stacks) handling
2001
2002 (define *processes* '())
2003
2004 (define (initialize-processes)
2005 (set! *processes* '()))
2006 (define (add-process process)
2007 (set! *processes* (cons process *processes*)))
2008 (define (get-processes)
2009 (reverse *processes*))
2010
2011 (define (for-all-processes proc)
2012 (let ((processes (get-processes)))
2013 (initialize-processes)
2014 (for-each proc processes)))
2015
2016 ;; -- parses
2017 (define *parses* '())
2018 (define (get-parses)
2019 *parses*)
2020 (define (initialize-parses)
2021 (set! *parses* '()))
2022 (define (add-parse parse)
2023 (set! *parses* (cons parse *parses*)))
2024
2025
2026 (define (push delta new-category lvalue stack tok)
2027 (let* ((stack (drop stack (* delta 2)))
2028 (state (car stack))
2029 (new-state (cdr (assv new-category (vector-ref ___gtable state)))))
2030 (cons new-state (cons (note-source-location lvalue tok) stack))))
2031
2032 (define (reduce state stack)
2033 ((vector-ref ___rtable state) stack ___gtable push))
2034
2035 (define (shift state symbol stack)
2036 (cons state (cons symbol stack)))
2037
2038 (define (get-actions token action-list)
2039 (let ((pair (assoc token action-list)))
2040 (if pair
2041 (cdr pair)
2042 (cdar action-list)))) ;; get the default action
2043
2044
2045 (define (run)
2046 (let loop-tokens ()
2047 (consume)
2048 (let ((symbol (token-category *input*)))
2049 (for-all-processes
2050 (lambda (process)
2051 (let loop ((stacks (list process)) (active-stacks '()))
2052 (cond ((pair? stacks)
2053 (let* ((stack (car stacks))
2054 (state (car stack)))
2055 (let actions-loop ((actions (get-actions symbol (vector-ref ___atable state)))
2056 (active-stacks active-stacks))
2057 (if (pair? actions)
2058 (let ((action (car actions))
2059 (other-actions (cdr actions)))
2060 (cond ((eq? action '*error*)
2061 (actions-loop other-actions active-stacks))
2062 ((eq? action 'accept)
2063 (add-parse (car (take-right stack 2)))
2064 (actions-loop other-actions active-stacks))
2065 ((>= action 0)
2066 (let ((new-stack (shift action *input* stack)))
2067 (add-process new-stack))
2068 (actions-loop other-actions active-stacks))
2069 (else
2070 (let ((new-stack (reduce (- action) stack)))
2071 (actions-loop other-actions (cons new-stack active-stacks))))))
2072 (loop (cdr stacks) active-stacks)))))
2073 ((pair? active-stacks)
2074 (loop (reverse active-stacks) '())))))))
2075 (if (pair? (get-processes))
2076 (loop-tokens))))
2077
2078
2079 (lambda (lexerp errorp)
2080 (set! ___errorp errorp)
2081 (initialize-lexer lexerp)
2082 (initialize-processes)
2083 (initialize-parses)
2084 (add-process '(0))
2085 (run)
2086 (get-parses)))
2087
2088
2089 (define (drop l n)
2090 (cond ((and (> n 0) (pair? l))
2091 (drop (cdr l) (- n 1)))
2092 (else
2093 l)))
2094
2095 (define (take-right l n)
2096 (drop l (- (length l) n)))