1 ;;; Guile VM code converters
3 ;; Copyright (C) 2001, 2009, 2012, 2013 Free Software Foundation, Inc.
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 (define-module (language scheme decompile-tree-il)
22 #:use-module (language tree-il)
23 #:use-module (srfi srfi-1)
24 #:use-module (srfi srfi-26)
25 #:use-module (ice-9 receive)
26 #:use-module (ice-9 vlist)
27 #:use-module (ice-9 match)
28 #:use-module (system base syntax)
29 #:export (decompile-tree-il))
31 (define (decompile-tree-il e env opts)
32 (apply do-decompile e env opts))
34 (define* (do-decompile e env
36 (use-derived-syntax? #t)
39 (strip-numeric-suffixes? #f)
42 (receive (output-name-table occurrence-count-table)
43 (choose-output-names e use-derived-syntax? strip-numeric-suffixes?)
45 (define (output-name s) (hashq-ref output-name-table s))
46 (define (occurrence-count s) (hashq-ref occurrence-count-table s))
48 (define (const x) (lambda (_) x))
49 (define (atom? x) (not (or (pair? x) (vector? x))))
51 (define (build-void) '(if #f #f))
53 (define (build-begin es)
59 (define (build-lambda-body e)
61 (('let () body ...) body)
65 (define (build-begin-body e)
70 (define (build-define name e)
72 ((? (const avoid-lambda?)
73 ('lambda formals body ...))
74 `(define (,name ,@formals) ,@body))
75 ((? (const avoid-lambda?)
76 ('lambda* formals body ...))
77 `(define* (,name ,@formals) ,@body))
78 (_ `(define ,name ,e))))
80 (define (build-let names vals body)
81 (match `(let ,(map list names vals)
82 ,@(build-lambda-body body))
84 ((_ (b) ('let* (bs ...) body ...))
85 `(let* (,b ,@bs) ,@body))
86 ((? (const use-derived-syntax?)
87 (_ (b1) ('let (b2) body ...)))
88 `(let* (,b1 ,b2) ,@body))
91 (define (build-letrec in-order? names vals body)
92 (match `(,(if in-order? 'letrec* 'letrec)
93 ,(map list names vals)
94 ,@(build-lambda-body body))
96 ((_ () body ...) `(let () ,@body))
97 ((_ ((name ('lambda (formals ...) body ...)))
100 (if (= (length formals) (length args))
101 `(let ,name ,(map list formals args) ,@body)
103 ((? (const avoid-lambda?)
104 ('letrec* _ body ...))
106 ,@(map build-define names vals)
110 (define (build-if test consequent alternate)
112 (('if #f _) `(if ,test ,consequent))
113 (_ `(if ,test ,consequent ,alternate))))
115 (define (build-and xs)
121 (define (build-or xs)
127 (define (case-test-var test)
129 (('memv (? atom? v) ('quote (datums ...)))
131 (('eqv? (? atom? v) ('quote datum))
135 (define (test->datums v test)
137 ((v 'memv v ('quote (xs ...)))
139 ((v 'eqv? v ('quote x))
143 (define (build-else-tail e)
146 (('and xs ... x) `((,(build-and xs) ,@(build-begin-body x))
148 (_ `((else ,@(build-begin-body e))))))
150 (define (build-cond-else-tail e)
152 (('cond clauses ...) clauses)
153 (_ (build-else-tail e))))
155 (define (build-case-else-tail v e)
157 ((v 'case v clauses ...)
159 ((v 'if ('memv v ('quote (xs ...))) consequent . alternate*)
160 `((,xs ,@(build-begin-body consequent))
161 ,@(build-case-else-tail v (build-begin alternate*))))
162 ((v 'if ('eqv? v ('quote x)) consequent . alternate*)
163 `(((,x) ,@(build-begin-body consequent))
164 ,@(build-case-else-tail v (build-begin alternate*))))
165 (_ (build-else-tail e))))
167 (define (clauses+tail clauses)
169 ((cs ... (and c ('else . _))) (values cs (list c)))
170 (_ (values clauses '()))))
172 (define (build-cond tests consequents alternate)
175 ((1) (build-if (car tests) (car consequents) alternate))
176 (else `(cond ,@(map (lambda (test consequent)
177 `(,test ,@(build-begin-body consequent)))
179 ,@(build-cond-else-tail alternate)))))
181 (define (build-cond-or-case tests consequents alternate)
183 (build-cond tests consequents alternate)
184 (let* ((v (and (not (null? tests))
185 (case-test-var (car tests))))
186 (datum-lists (take-while identity
187 (map (cut test->datums v <>)
189 (n (length datum-lists))
190 (tail (build-case-else-tail v (build-cond
194 (receive (clauses tail) (clauses+tail tail)
195 (let ((n (+ n (length clauses)))
196 (datum-lists (append datum-lists
198 (consequents (append consequents
200 (map cdr clauses)))))
202 (build-cond tests consequents alternate)
204 ,@(map cons datum-lists (map build-begin-body
205 (take consequents n)))
210 (define (recurse-body e)
211 (build-lambda-body (recurse e)))
218 (if (and (self-evaluating? exp) (not (vector? exp)))
223 (build-begin (cons (recurse head)
228 (match `(,(recurse proc) ,@(map recurse args))
229 ((('lambda (formals ...) body ...) args ...)
231 (if (= (length formals) (length args))
232 (build-let formals args (build-begin body))
236 ((<primcall> name args)
237 `(,name ,@(map recurse args)))
239 ((<primitive-ref> name)
242 ((<lexical-ref> gensym)
243 (output-name gensym))
245 ((<lexical-set> gensym exp)
246 `(set! ,(output-name gensym) ,(recurse exp)))
248 ((<module-ref> mod name public?)
249 `(,(if public? '@ '@@) ,mod ,name))
251 ((<module-set> mod name public? exp)
252 `(set! (,(if public? '@ '@@) ,mod ,name) ,(recurse exp)))
254 ((<toplevel-ref> name)
257 ((<toplevel-set> name exp)
258 `(set! ,name ,(recurse exp)))
260 ((<toplevel-define> name exp)
261 (build-define name (recurse exp)))
263 ((<lambda> meta body)
265 (let ((body (recurse body))
266 (doc (assq-ref meta 'documentation)))
270 (('lambda formals body ...)
271 `(lambda ,formals ,doc ,@body))
272 (('lambda* formals body ...)
273 `(lambda* ,formals ,doc ,@body))
274 (('case-lambda (formals body ...) clauses ...)
275 `(case-lambda (,formals ,doc ,@body) ,@clauses))
276 (('case-lambda* (formals body ...) clauses ...)
277 `(case-lambda* (,formals ,doc ,@body) ,@clauses))
281 ((<lambda-case> req opt rest kw inits gensyms body alternate)
282 (let ((names (map output-name gensyms)))
284 ((and (not opt) (not kw) (not alternate))
285 `(lambda ,(if rest (apply cons* names) names)
286 ,@(recurse-body body)))
287 ((and (not opt) (not kw))
288 (let ((alt-expansion (recurse alternate))
289 (formals (if rest (apply cons* names) names)))
290 (case (car alt-expansion)
292 `(case-lambda (,formals ,@(recurse-body body))
293 ,(cdr alt-expansion)))
295 `(case-lambda* (,formals ,@(recurse-body body))
296 ,(cdr alt-expansion)))
298 `(case-lambda (,formals ,@(recurse-body body))
299 ,@(cdr alt-expansion)))
301 `(case-lambda* (,formals ,@(recurse-body body))
302 ,@(cdr alt-expansion))))))
304 (let* ((alt-expansion (and alternate (recurse alternate)))
306 (nopt (if opt (length opt) 0))
307 (restargs (if rest (list-ref names (+ nreq nopt)) '()))
308 (reqargs (list-head names nreq))
312 (list-head (list-tail names nreq) nopt)
314 (list-head inits nopt))))
319 (map output-name (map caddr (cdr kw)))
321 (list-tail inits nopt))
324 '(#:allow-other-keys)
327 (formals `(,@reqargs ,@optargs ,@kwargs . ,restargs)))
328 (if (not alt-expansion)
329 `(lambda* ,formals ,@(recurse-body body))
330 (case (car alt-expansion)
332 `(case-lambda* (,formals ,@(recurse-body body))
333 ,(cdr alt-expansion)))
334 ((case-lambda case-lambda*)
335 `(case-lambda* (,formals ,@(recurse-body body))
336 ,@(cdr alt-expansion))))))))))
338 ((<conditional> test consequent alternate)
339 (define (simplify-test e)
341 (('if ('eqv? (? atom? v) ('quote a)) #t ('eqv? v ('quote b)))
343 (('if ('eqv? (? atom? v) ('quote a)) #t ('memv v ('quote (bs ...))))
344 `(memv ,v '(,a ,@bs)))
347 ('else ('eqv? v ('quote last-datum))))
348 `(memv ,v '(,@datum ,last-datum)))
350 (match `(if ,(simplify-test (recurse test))
351 ,(recurse consequent)
352 ,@(if (void? alternate) '()
353 (list (recurse alternate))))
354 (('if test ('if ('and xs ...) consequent))
355 (build-if (build-and (cons test xs))
358 ((? (const use-derived-syntax?)
359 ('if test1 ('if test2 consequent)))
360 (build-if (build-and (list test1 test2))
363 (('if (? atom? x) x ('or ys ...))
364 (build-or (cons x ys)))
365 ((? (const use-derived-syntax?)
366 ('if (? atom? x) x y))
367 (build-or (list x y)))
368 (('if test consequent)
369 `(if ,test ,consequent))
370 (('if test ('and xs ...) #f)
371 (build-and (cons test xs)))
372 ((? (const use-derived-syntax?)
373 ('if test consequent #f))
374 (build-and (list test consequent)))
375 ((? (const use-derived-syntax?)
376 ('if test1 consequent1
377 ('if test2 consequent2 . alternate*)))
378 (build-cond-or-case (list test1 test2)
379 (list consequent1 consequent2)
380 (build-begin alternate*)))
381 (('if test consequent ('cond clauses ...))
382 `(cond (,test ,@(build-begin-body consequent))
384 (('if ('memv (? atom? v) ('quote (xs ...))) consequent
385 ('case v clauses ...))
386 `(case ,v (,xs ,@(build-begin-body consequent))
388 (('if ('eqv? (? atom? v) ('quote x)) consequent
389 ('case v clauses ...))
390 `(case ,v ((,x) ,@(build-begin-body consequent))
394 ((<let> gensyms vals body)
395 (match (build-let (map output-name gensyms)
398 (('let ((v e)) ('or v xs ...))
400 (if (and (not (null? gensyms))
401 (= 3 (occurrence-count (car gensyms))))
404 (('let ((v e)) ('case v clauses ...))
406 (if (and (not (null? gensyms))
407 ;; FIXME: This fails if any of the 'memv's were
408 ;; optimized into multiple 'eqv?'s, because the
409 ;; occurrence count will be higher than we expect.
410 (= (occurrence-count (car gensyms))
411 (1+ (length (clauses+tail clauses)))))
416 ((<letrec> in-order? gensyms vals body)
417 (build-letrec in-order?
418 (map output-name gensyms)
422 ((<fix> gensyms vals body)
423 ;; not a typo, we really do translate back to letrec. use letrec* since it
424 ;; doesn't matter, and the naive letrec* transformation does not require an
427 (map output-name gensyms)
431 ((<let-values> exp body)
432 `(call-with-values (lambda () ,@(recurse-body exp))
433 ,(recurse (make-lambda #f '() body))))
435 ((<prompt> tag body handler)
442 ((<abort> tag args tail)
443 `(apply abort ,(recurse tag) ,@(map recurse args)
445 (values (recurse e) env)))
447 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
449 ;; Algorithm for choosing better variable names
450 ;; ============================================
452 ;; First we perform an analysis pass, collecting the following
455 ;; * For each gensym: how many occurrences will occur in the output?
457 ;; * For each gensym A: which gensyms does A conflict with? Gensym A
458 ;; and gensym B conflict if they have the same base name (usually the
459 ;; same as the source name, but see below), and if giving them the
460 ;; same name would cause a bad variable reference due to unintentional
463 ;; The occurrence counter is indexed by gensym and is global (within each
464 ;; invocation of the algorithm), implemented using a hash table. We also
465 ;; keep a global mapping from gensym to source name as provided by the
466 ;; binding construct (we prefer not to trust the source names in the
467 ;; lexical ref or set).
469 ;; As we recurse down into lexical binding forms, we keep track of a
470 ;; mapping from base name to an ordered list of bindings, innermost
471 ;; first. When we encounter a variable occurrence, we increment the
472 ;; counter, look up the base name (preferring not to trust the 'name' in
473 ;; the lexical ref or set), and then look up the bindings currently in
474 ;; effect for that base name. Hopefully our gensym will be the first
475 ;; (innermost) binding. If not, we register a conflict between the
476 ;; referenced gensym and the other bound gensyms with the same base name
477 ;; that shadow the binding we want. These are simply the gensyms on the
478 ;; binding list that come before our gensym.
480 ;; Top-level bindings are treated specially. Whenever top-level
481 ;; references are found, they conflict with every lexical binding
482 ;; currently in effect with the same base name. They are guaranteed to
483 ;; be assigned to their source names. For purposes of recording
484 ;; conflicts (which are normally keyed on gensyms) top-level identifiers
485 ;; are assigned a pseudo-gensym that is an interned pair of the form
486 ;; (top-level . <name>). This allows them to be compared using 'eq?'
487 ;; like other gensyms.
489 ;; The base name is normally just the source name. However, if the
490 ;; source name has a suffix of the form "-N" (where N is a positive
491 ;; integer without leading zeroes), then we strip that suffix (multiple
492 ;; times if necessary) to form the base name. We must do this because
493 ;; we add suffixes of that form in order to resolve conflicts, and we
494 ;; must ensure that only identifiers with the same base name can
495 ;; possibly conflict with each other.
497 ;; XXX FIXME: Currently, primitives are treated exactly like top-level
498 ;; bindings. This handles conflicting lexical bindings properly, but
499 ;; does _not_ handle the case where top-level bindings conflict with the
500 ;; needed primitives.
502 ;; Also note that this requires that 'choose-output-names' be kept in
503 ;; sync with 'tree-il->scheme'. Primitives that are introduced by
504 ;; 'tree-il->scheme' must be anticipated by 'choose-output-name'.
506 ;; We also ensure that lexically-bound identifiers found in operator
507 ;; position will never be assigned one of the standard primitive names.
508 ;; This is needed because 'tree-il->scheme' recognizes primitive names
509 ;; in operator position and assumes that they have the standard
513 ;; How we assign an output name to each gensym
514 ;; ===========================================
516 ;; We process the gensyms in order of decreasing occurrence count, with
517 ;; each gensym choosing the best output name possible, as long as it
518 ;; isn't the same name as any of the previously-chosen output names of
519 ;; conflicting gensyms.
524 ;; 'choose-output-names' analyzes the top-level form e, chooses good
525 ;; variable names that are as close as possible to the source names,
526 ;; and returns two values:
528 ;; * a hash table mapping gensym to output name
529 ;; * a hash table mapping gensym to number of occurrences
531 (define choose-output-names
534 ;; This is a list of primitives that 'tree-il->scheme' assumes
535 ;; will have the standard bindings when found in operator
537 (let* ((primitives '(if quote @ @@ set! define define*
538 begin let let* letrec letrec*
540 lambda lambda* case-lambda case-lambda*
541 apply call-with-values dynamic-wind
542 with-fluids fluid-ref fluid-set!
543 call-with-prompt abort memv eqv?))
544 (table (make-hash-table (length primitives))))
545 (for-each (cut hashq-set! table <> #t) primitives)
546 (lambda (name) (hashq-ref table name))))
548 ;; Repeatedly strip suffix of the form "-N", where N is a string
549 ;; that could be produced by number->string given a positive
550 ;; integer. In other words, the first digit of N may not be 0.
551 (define compute-base-name
552 (let ((digits (string->char-set "0123456789")))
553 (define (base-name-string str)
554 (let ((i (string-skip-right str digits)))
555 (if (and i (< (1+ i) (string-length str))
556 (eq? #\- (string-ref str i))
557 (not (eq? #\0 (string-ref str (1+ i)))))
558 (base-name-string (substring str 0 i))
561 (string->symbol (base-name-string (symbol->string sym))))))
563 ;; choose-output-names
564 (lambda (e use-derived-syntax? strip-numeric-suffixes?)
566 (define lexical-gensyms '())
568 (define top-level-intern!
569 (let ((table (make-hash-table)))
571 (let ((h (hashq-create-handle! table name #f)))
572 (or (cdr h) (begin (set-cdr! h (cons 'top-level name))
574 (define (top-level? s) (pair? s))
575 (define (top-level-name s) (cdr s))
577 (define occurrence-count-table (make-hash-table))
578 (define (occurrence-count s) (or (hashq-ref occurrence-count-table s) 0))
579 (define (increment-occurrence-count! s)
580 (let ((h (hashq-create-handle! occurrence-count-table s 0)))
582 (set! lexical-gensyms (cons s lexical-gensyms)))
583 (set-cdr! h (1+ (cdr h)))))
586 (let ((table (make-hash-table)))
588 (let ((h (hashq-create-handle! table name #f)))
589 (or (cdr h) (begin (set-cdr! h (compute-base-name name))
592 (define source-name-table (make-hash-table))
593 (define (set-source-name! s name)
594 (if (not (top-level? s))
595 (let ((name (if strip-numeric-suffixes?
598 (hashq-set! source-name-table s name))))
599 (define (source-name s)
602 (hashq-ref source-name-table s)))
604 (define conflict-table (make-hash-table))
605 (define (conflicts s) (or (hashq-ref conflict-table s) '()))
606 (define (add-conflict! a b)
608 (if (not (top-level? a))
609 (let ((h (hashq-create-handle! conflict-table a '())))
610 (if (not (memq b (cdr h)))
611 (set-cdr! h (cons b (cdr h)))))))
615 (let recurse-with-bindings ((e e) (bindings vlist-null))
618 ;; We call this whenever we encounter a top-level ref or set
619 (define (top-level name)
620 (let ((bname (base-name name)))
621 (let ((s (top-level-intern! name))
622 (conflicts (vhash-foldq* cons '() bname bindings)))
623 (for-each (cut add-conflict! s <>) conflicts))))
625 ;; We call this whenever we encounter a primitive reference.
626 ;; We must also call it for every primitive that might be
627 ;; inserted by 'tree-il->scheme'. It is okay to call this
628 ;; even when 'tree-il->scheme' will not insert the named
629 ;; primitive; the worst that will happen is for a lexical
630 ;; variable of the same name to be renamed unnecessarily.
631 (define (primitive name) (top-level name))
633 ;; We call this whenever we encounter a lexical ref or set.
635 (increment-occurrence-count! s)
638 (lambda (s*) (not (eq? s s*)))
639 (reverse! (vhash-foldq* cons
641 (base-name (source-name s))
643 (for-each (cut add-conflict! s <>) conflicts)))
646 ((<void>) (primitive 'if)) ; (if #f #f)
647 ((<const>) (primitive 'quote))
650 (if (lexical-ref? proc)
651 (let* ((gensym (lexical-ref-gensym proc))
652 (name (source-name gensym)))
653 ;; If the operator position contains a bare variable
654 ;; reference with the same source name as a standard
655 ;; primitive, we must ensure that it will be given a
656 ;; different name, so that 'tree-il->scheme' will not
657 ;; misinterpret the resulting expression.
658 (if (primitive? name)
659 (add-conflict! gensym (top-level-intern! name)))))
661 (for-each recurse args))
663 ((<primitive-ref> name) (primitive name))
664 ((<primcall> name args) (primitive name) (for-each recurse args))
666 ((<lexical-ref> gensym) (lexical gensym))
667 ((<lexical-set> gensym exp)
668 (primitive 'set!) (lexical gensym) (recurse exp))
670 ((<module-ref> public?) (primitive (if public? '@ '@@)))
671 ((<module-set> public? exp)
672 (primitive 'set!) (primitive (if public? '@ '@@)) (recurse exp))
674 ((<toplevel-ref> name) (top-level name))
675 ((<toplevel-set> name exp)
676 (primitive 'set!) (top-level name) (recurse exp))
677 ((<toplevel-define> name exp) (top-level name) (recurse exp))
679 ((<conditional> test consequent alternate)
680 (cond (use-derived-syntax?
681 (primitive 'and) (primitive 'or)
682 (primitive 'cond) (primitive 'case)
683 (primitive 'else) (primitive '=>)))
685 (recurse test) (recurse consequent) (recurse alternate))
688 (primitive 'begin) (recurse head) (recurse tail))
691 (if body (recurse body)))
693 ((<lambda-case> req opt rest kw inits gensyms body alternate)
695 (cond ((or opt kw alternate)
697 (primitive 'case-lambda)
698 (primitive 'case-lambda*)))
700 (if use-derived-syntax? (primitive 'let*))
701 (let* ((names (append req (or opt '()) (if rest (list rest) '())
702 (map cadr (if kw (cdr kw) '()))))
703 (base-names (map base-name names))
705 (fold vhash-consq bindings base-names gensyms)))
706 (for-each increment-occurrence-count! gensyms)
707 (for-each set-source-name! gensyms names)
708 (for-each recurse inits)
709 (recurse-with-bindings body body-bindings)
710 (if alternate (recurse alternate))))
712 ((<let> names gensyms vals body)
714 (cond (use-derived-syntax? (primitive 'let*) (primitive 'or)))
715 (for-each increment-occurrence-count! gensyms)
716 (for-each set-source-name! gensyms names)
717 (for-each recurse vals)
718 (recurse-with-bindings
719 body (fold vhash-consq bindings (map base-name names) gensyms)))
721 ((<letrec> in-order? names gensyms vals body)
723 (cond (use-derived-syntax? (primitive 'let*) (primitive 'or)))
724 (primitive (if in-order? 'letrec* 'letrec))
725 (for-each increment-occurrence-count! gensyms)
726 (for-each set-source-name! gensyms names)
727 (let* ((base-names (map base-name names))
728 (bindings (fold vhash-consq bindings base-names gensyms)))
729 (for-each (cut recurse-with-bindings <> bindings) vals)
730 (recurse-with-bindings body bindings)))
732 ((<fix> names gensyms vals body)
735 (cond (use-derived-syntax? (primitive 'let*) (primitive 'or)))
736 (for-each increment-occurrence-count! gensyms)
737 (for-each set-source-name! gensyms names)
738 (let* ((base-names (map base-name names))
739 (bindings (fold vhash-consq bindings base-names gensyms)))
740 (for-each (cut recurse-with-bindings <> bindings) vals)
741 (recurse-with-bindings body bindings)))
743 ((<let-values> exp body)
744 (primitive 'call-with-values)
745 (recurse exp) (recurse body))
747 ((<prompt> tag body handler)
748 (primitive 'call-with-prompt)
749 (recurse tag) (recurse body) (recurse handler))
751 ((<abort> tag args tail)
754 (recurse tag) (for-each recurse args) (recurse tail)))))
757 (define output-name-table (make-hash-table))
758 (define (set-output-name! s name)
759 (hashq-set! output-name-table s name))
760 (define (output-name s)
763 (hashq-ref output-name-table s)))
765 (define sorted-lexical-gensyms
766 (sort-list lexical-gensyms
767 (lambda (a b) (> (occurrence-count a)
768 (occurrence-count b)))))
770 (for-each (lambda (s)
773 (let ((the-conflicts (conflicts s))
774 (the-source-name (source-name s)))
775 (define (not-yet-taken? name)
776 (not (any (lambda (s*)
777 (and=> (output-name s*)
780 (if (not-yet-taken? the-source-name)
782 (let ((prefix (string-append
783 (symbol->string the-source-name)
785 (let loop ((i 1) (name the-source-name))
786 (if (not-yet-taken? name)
792 (number->string i)))))))))))
793 sorted-lexical-gensyms)
794 (values output-name-table occurrence-count-table)))))