;;; TREE-IL -> GLIL compiler ;; Copyright (C) 2001,2008,2009 Free Software Foundation, Inc. ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public ;;;; License as published by the Free Software Foundation; either ;;;; version 3 of the License, or (at your option) any later version. ;;;; ;;;; This library is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;;; Lesser General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU Lesser General Public ;;;; License along with this library; if not, write to the Free Software ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;;; Code: (define-module (language tree-il analyze) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9) #:use-module (system base syntax) #:use-module (system base message) #:use-module (language tree-il) #:export (analyze-lexicals report-unused-variables)) ;; Allocation is the process of assigning storage locations for lexical ;; variables. A lexical variable has a distinct "address", or storage ;; location, for each procedure in which it is referenced. ;; ;; A variable is "local", i.e., allocated on the stack, if it is ;; referenced from within the procedure that defined it. Otherwise it is ;; a "closure" variable. For example: ;; ;; (lambda (a) a) ; a will be local ;; `a' is local to the procedure. ;; ;; (lambda (a) (lambda () a)) ;; `a' is local to the outer procedure, but a closure variable with ;; respect to the inner procedure. ;; ;; If a variable is ever assigned, it needs to be heap-allocated ;; ("boxed"). This is so that closures and continuations capture the ;; variable's identity, not just one of the values it may have over the ;; course of program execution. If the variable is never assigned, there ;; is no distinction between value and identity, so closing over its ;; identity (whether through closures or continuations) can make a copy ;; of its value instead. ;; ;; Local variables are stored on the stack within a procedure's call ;; frame. Their index into the stack is determined from their linear ;; postion within a procedure's binding path: ;; (let (0 1) ;; (let (2 3) ...) ;; (let (2) ...)) ;; (let (2 3 4) ...)) ;; etc. ;; ;; This algorithm has the problem that variables are only allocated ;; indices at the end of the binding path. If variables bound early in ;; the path are not used in later portions of the path, their indices ;; will not be recycled. This problem is particularly egregious in the ;; expansion of `or': ;; ;; (or x y z) ;; -> (let ((a x)) (if a a (let ((b y)) (if b b z)))) ;; ;; As you can see, the `a' binding is only used in the ephemeral `then' ;; clause of the first `if', but its index would be reserved for the ;; whole of the `or' expansion. So we have a hack for this specific ;; case. A proper solution would be some sort of liveness analysis, and ;; not our linear allocation algorithm. ;; ;; Closure variables are captured when a closure is created, and stored ;; in a vector. Each closure variable has a unique index into that ;; vector. ;; ;; There is one more complication. Procedures bound by may, in ;; some cases, be rendered inline to their parent procedure. That is to ;; say, ;; ;; (letrec ((lp (lambda () (lp)))) (lp)) ;; => (fix ((lp (lambda () (lp)))) (lp)) ;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP; ;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop ;; ;; The upshot is that we don't have to allocate any space for the `lp' ;; closure at all, as it can be rendered inline as a loop. So there is ;; another kind of allocation, "label allocation", in which the ;; procedure is simply a label, placed at the start of the lambda body. ;; The label is the gensym under which the lambda expression is bound. ;; ;; The analyzer checks to see that the label is called with the correct ;; number of arguments. Calls to labels compile to rename + goto. ;; Lambda, the ultimate goto! ;; ;; ;; The return value of `analyze-lexicals' is a hash table, the ;; "allocation". ;; ;; The allocation maps gensyms -- recall that each lexically bound ;; variable has a unique gensym -- to storage locations ("addresses"). ;; Since one gensym may have many storage locations, if it is referenced ;; in many procedures, it is a two-level map. ;; ;; The allocation also stored information on how many local variables ;; need to be allocated for each procedure, lexicals that have been ;; translated into labels, and information on what free variables to ;; capture from its lexical parent procedure. ;; ;; That is: ;; ;; sym -> {lambda -> address} ;; lambda -> (nlocs labels . free-locs) ;; ;; address ::= (local? boxed? . index) ;; labels ::= ((sym . lambda-vars) ...) ;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...) ;; free variable addresses are relative to parent proc. (define (make-hashq k v) (let ((res (make-hash-table))) (hashq-set! res k v) res)) (define (analyze-lexicals x) ;; bound-vars: lambda -> (sym ...) ;; all identifiers bound within a lambda (define bound-vars (make-hash-table)) ;; free-vars: lambda -> (sym ...) ;; all identifiers referenced in a lambda, but not bound ;; NB, this includes identifiers referenced by contained lambdas (define free-vars (make-hash-table)) ;; assigned: sym -> #t ;; variables that are assigned (define assigned (make-hash-table)) ;; refcounts: sym -> count ;; allows us to detect the or-expansion in O(1) time (define refcounts (make-hash-table)) ;; labels: sym -> lambda-vars ;; for determining if fixed-point procedures can be rendered as ;; labels. lambda-vars may be an improper list. (define labels (make-hash-table)) ;; returns variables referenced in expr (define (analyze! x proc labels-in-proc tail? tail-call-args) (define (step y) (analyze! y proc labels-in-proc #f #f)) (define (step-tail y) (analyze! y proc labels-in-proc tail? #f)) (define (step-tail-call y args) (analyze! y proc labels-in-proc #f (and tail? args))) (define (recur/labels x new-proc labels) (analyze! x new-proc (append labels labels-in-proc) #t #f)) (define (recur x new-proc) (analyze! x new-proc '() tail? #f)) (record-case x (( proc args) (apply lset-union eq? (step-tail-call proc args) (map step args))) (( test then else) (lset-union eq? (step test) (step-tail then) (step-tail else))) (( name gensym) (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0))) (if (not (and tail-call-args (memq gensym labels-in-proc) (let ((args (hashq-ref labels gensym))) (and (list? args) (= (length args) (length tail-call-args)))))) (hashq-set! labels gensym #f)) (list gensym)) (( name gensym exp) (hashq-set! assigned gensym #t) (hashq-set! labels gensym #f) (lset-adjoin eq? (step exp) gensym)) (( mod name public? exp) (step exp)) (( name exp) (step exp)) (( name exp) (step exp)) (( exps) (let lp ((exps exps) (ret '())) (cond ((null? exps) '()) ((null? (cdr exps)) (lset-union eq? ret (step-tail (car exps)))) (else (lp (cdr exps) (lset-union eq? ret (step (car exps)))))))) (( vars meta body) (let ((locally-bound (let rev* ((vars vars) (out '())) (cond ((null? vars) out) ((pair? vars) (rev* (cdr vars) (cons (car vars) out))) (else (cons vars out)))))) (hashq-set! bound-vars x locally-bound) (let* ((referenced (recur body x)) (free (lset-difference eq? referenced locally-bound)) (all-bound (reverse! (hashq-ref bound-vars x)))) (hashq-set! bound-vars x all-bound) (hashq-set! free-vars x free) free))) (( vars vals body) (hashq-set! bound-vars proc (append (reverse vars) (hashq-ref bound-vars proc))) (lset-difference eq? (apply lset-union eq? (step-tail body) (map step vals)) vars)) (( vars vals body) (hashq-set! bound-vars proc (append (reverse vars) (hashq-ref bound-vars proc))) (for-each (lambda (sym) (hashq-set! assigned sym #t)) vars) (lset-difference eq? (apply lset-union eq? (step-tail body) (map step vals)) vars)) (( vars vals body) ;; Try to allocate these procedures as labels. (for-each (lambda (sym val) (hashq-set! labels sym (lambda-vars val))) vars vals) (hashq-set! bound-vars proc (append (reverse vars) (hashq-ref bound-vars proc))) ;; Step into subexpressions. (let* ((var-refs (map ;; Since we're trying to label-allocate the lambda, ;; pretend it's not a closure, and just recurse into its ;; body directly. (Otherwise, recursing on a closure ;; that references one of the fix's bound vars would ;; prevent label allocation.) (lambda (x) (record-case x (( (lvars vars) body) (let ((locally-bound (let rev* ((lvars lvars) (out '())) (cond ((null? lvars) out) ((pair? lvars) (rev* (cdr lvars) (cons (car lvars) out))) (else (cons lvars out)))))) (hashq-set! bound-vars x locally-bound) ;; recur/labels, the difference from the closure case (let* ((referenced (recur/labels body x vars)) (free (lset-difference eq? referenced locally-bound)) (all-bound (reverse! (hashq-ref bound-vars x)))) (hashq-set! bound-vars x all-bound) (hashq-set! free-vars x free) free))))) vals)) (vars-with-refs (map cons vars var-refs)) (body-refs (recur/labels body proc vars))) (define (delabel-dependents! sym) (let ((refs (assq-ref vars-with-refs sym))) (if refs (for-each (lambda (sym) (if (hashq-ref labels sym) (begin (hashq-set! labels sym #f) (delabel-dependents! sym)))) refs)))) ;; Stepping into the lambdas and the body might have made some ;; procedures not label-allocatable -- which might have ;; knock-on effects. For example: ;; (fix ((a (lambda () (b))) ;; (b (lambda () a))) ;; (a)) ;; As far as `a' is concerned, both `a' and `b' are ;; label-allocatable. But `b' references `a' not in a proc-tail ;; position, which makes `a' not label-allocatable. The ;; knock-on effect is that, when back-propagating this ;; information to `a', `b' will also become not ;; label-allocatable, as it is referenced within `a', which is ;; allocated as a closure. This is a transitive relationship. (for-each (lambda (sym) (if (not (hashq-ref labels sym)) (delabel-dependents! sym))) vars) ;; Now lift bound variables with label-allocated lambdas to the ;; parent procedure. (for-each (lambda (sym val) (if (hashq-ref labels sym) ;; Remove traces of the label-bound lambda. The free ;; vars will propagate up via the return val. (begin (hashq-set! bound-vars proc (append (hashq-ref bound-vars val) (hashq-ref bound-vars proc))) (hashq-remove! bound-vars val) (hashq-remove! free-vars val)))) vars vals) (lset-difference eq? (apply lset-union eq? body-refs var-refs) vars))) (( vars exp body) (let ((bound (let lp ((out (hashq-ref bound-vars proc)) (in vars)) (if (pair? in) (lp (cons (car in) out) (cdr in)) (if (null? in) out (cons in out)))))) (hashq-set! bound-vars proc bound) (lset-difference eq? (lset-union eq? (step exp) (step-tail body)) bound))) (else '()))) ;; allocation: sym -> {lambda -> address} ;; lambda -> (nlocs labels . free-locs) (define allocation (make-hash-table)) (define (allocate! x proc n) (define (recur y) (allocate! y proc n)) (record-case x (( proc args) (apply max (recur proc) (map recur args))) (( test then else) (max (recur test) (recur then) (recur else))) (( name gensym exp) (recur exp)) (( mod name public? exp) (recur exp)) (( name exp) (recur exp)) (( name exp) (recur exp)) (( exps) (apply max (map recur exps))) (( vars meta body) ;; allocate closure vars in order (let lp ((c (hashq-ref free-vars x)) (n 0)) (if (pair? c) (begin (hashq-set! (hashq-ref allocation (car c)) x `(#f ,(hashq-ref assigned (car c)) . ,n)) (lp (cdr c) (1+ n))))) (let ((nlocs (let lp ((vars vars) (n 0)) (if (not (null? vars)) ;; allocate args (let ((v (if (pair? vars) (car vars) vars))) (hashq-set! allocation v (make-hashq x `(#t ,(hashq-ref assigned v) . ,n))) (lp (if (pair? vars) (cdr vars) '()) (1+ n))) ;; allocate body, return number of additional locals (- (allocate! body x n) n)))) (free-addresses (map (lambda (v) (hashq-ref (hashq-ref allocation v) proc)) (hashq-ref free-vars x))) (labels (filter cdr (map (lambda (sym) (cons sym (hashq-ref labels sym))) (hashq-ref bound-vars x))))) ;; set procedure allocations (hashq-set! allocation x (cons* nlocs labels free-addresses))) n) (( vars vals body) (let ((nmax (apply max (map recur vals)))) (cond ;; the `or' hack ((and (conditional? body) (= (length vars) 1) (let ((v (car vars))) (and (not (hashq-ref assigned v)) (= (hashq-ref refcounts v 0) 2) (lexical-ref? (conditional-test body)) (eq? (lexical-ref-gensym (conditional-test body)) v) (lexical-ref? (conditional-then body)) (eq? (lexical-ref-gensym (conditional-then body)) v)))) (hashq-set! allocation (car vars) (make-hashq proc `(#t #f . ,n))) ;; the 1+ for this var (max nmax (1+ n) (allocate! (conditional-else body) proc n))) (else (let lp ((vars vars) (n n)) (if (null? vars) (max nmax (allocate! body proc n)) (let ((v (car vars))) (hashq-set! allocation v (make-hashq proc `(#t ,(hashq-ref assigned v) . ,n))) (lp (cdr vars) (1+ n))))))))) (( vars vals body) (let lp ((vars vars) (n n)) (if (null? vars) (let ((nmax (apply max (map (lambda (x) (allocate! x proc n)) vals)))) (max nmax (allocate! body proc n))) (let ((v (car vars))) (hashq-set! allocation v (make-hashq proc `(#t ,(hashq-ref assigned v) . ,n))) (lp (cdr vars) (1+ n)))))) (( vars vals body) (let lp ((in vars) (n n)) (if (null? in) (let lp ((vars vars) (vals vals) (nmax n)) (cond ((null? vars) (max nmax (allocate! body proc n))) ((hashq-ref labels (car vars)) ;; allocate label bindings & body inline to proc (lp (cdr vars) (cdr vals) (record-case (car vals) (( vars body) (let lp ((vars vars) (n n)) (if (not (null? vars)) ;; allocate bindings (let ((v (if (pair? vars) (car vars) vars))) (hashq-set! allocation v (make-hashq proc `(#t ,(hashq-ref assigned v) . ,n))) (lp (if (pair? vars) (cdr vars) '()) (1+ n))) ;; allocate body (max nmax (allocate! body proc n)))))))) (else ;; allocate closure (lp (cdr vars) (cdr vals) (max nmax (allocate! (car vals) proc n)))))) (let ((v (car in))) (cond ((hashq-ref assigned v) (error "fixpoint procedures may not be assigned" x)) ((hashq-ref labels v) ;; no binding, it's a label (lp (cdr in) n)) (else ;; allocate closure binding (hashq-set! allocation v (make-hashq proc `(#t #f . ,n))) (lp (cdr in) (1+ n)))))))) (( vars exp body) (let ((nmax (recur exp))) (let lp ((vars vars) (n n)) (cond ((null? vars) (max nmax (allocate! body proc n))) ((not (pair? vars)) (hashq-set! allocation vars (make-hashq proc `(#t ,(hashq-ref assigned vars) . ,n))) ;; the 1+ for this var (max nmax (allocate! body proc (1+ n)))) (else (let ((v (car vars))) (hashq-set! allocation v (make-hashq proc `(#t ,(hashq-ref assigned v) . ,n))) (lp (cdr vars) (1+ n)))))))) (else n))) (analyze! x #f '() #t #f) (allocate! x #f 0) allocation) ;;; ;;; Unused variable analysis. ;;; ;; records are used during tree traversals in ;; `report-unused-variables'. They contain a list of the local vars ;; currently in scope, a list of locals vars that have been referenced, and a ;; "location stack" (the stack of `tree-il-src' values for each parent tree). (define-record-type (make-binding-info vars refs locs) binding-info? (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...) (refs binding-info-refs) ;; (GENSYM ...) (locs binding-info-locs)) ;; (LOCATION ...) (define (report-unused-variables tree) "Report about unused variables in TREE. Return TREE." (define (dotless-list lst) ;; If LST is a dotted list, return a proper list equal to LST except that ;; the very last element is a pair; otherwise return LST. (let loop ((lst lst) (result '())) (cond ((null? lst) (reverse result)) ((pair? lst) (loop (cdr lst) (cons (car lst) result))) (else (loop '() (cons lst result)))))) (tree-il-fold (lambda (x info) ;; X is a leaf: extend INFO's refs accordingly. (let ((refs (binding-info-refs info)) (vars (binding-info-vars info)) (locs (binding-info-locs info))) (record-case x (( gensym) (make-binding-info vars (cons gensym refs) locs)) (else info)))) (lambda (x info) ;; Going down into X: extend INFO's variable list ;; accordingly. (let ((refs (binding-info-refs info)) (vars (binding-info-vars info)) (locs (binding-info-locs info)) (src (tree-il-src x))) (define (extend inner-vars inner-names) (append (map (lambda (var name) (list var name src)) inner-vars inner-names) vars)) (record-case x (( gensym) (make-binding-info vars (cons gensym refs) (cons src locs))) (( vars names) (let ((vars (dotless-list vars)) (names (dotless-list names))) (make-binding-info (extend vars names) refs (cons src locs)))) (( vars names) (make-binding-info (extend vars names) refs (cons src locs))) (( vars names) (make-binding-info (extend vars names) refs (cons src locs))) (( vars names) (make-binding-info (extend vars names) refs (cons src locs))) (( vars names) (make-binding-info (extend vars names) refs (cons src locs))) (else info)))) (lambda (x info) ;; Leaving X's scope: shrink INFO's variable list ;; accordingly and reported unused nested variables. (let ((refs (binding-info-refs info)) (vars (binding-info-vars info)) (locs (binding-info-locs info))) (define (shrink inner-vars refs) (for-each (lambda (var) (let ((gensym (car var))) ;; Don't report lambda parameters as ;; unused. (if (and (not (memq gensym refs)) (not (and (lambda? x) (memq gensym inner-vars)))) (let ((name (cadr var)) ;; We can get approximate ;; source location by going up ;; the LOCS location stack. (loc (or (caddr var) (find pair? locs)))) (warning 'unused-variable loc name))))) (filter (lambda (var) (memq (car var) inner-vars)) vars)) (fold alist-delete vars inner-vars)) ;; For simplicity, we leave REFS untouched, i.e., with ;; names of variables that are now going out of scope. ;; It doesn't hurt as these are unique names, it just ;; makes REFS unnecessarily fat. (record-case x (( vars) (let ((vars (dotless-list vars))) (make-binding-info (shrink vars refs) refs (cdr locs)))) (( vars) (make-binding-info (shrink vars refs) refs (cdr locs))) (( vars) (make-binding-info (shrink vars refs) refs (cdr locs))) (( vars) (make-binding-info (shrink vars refs) refs (cdr locs))) (( vars) (make-binding-info (shrink vars refs) refs (cdr locs))) (else info)))) (make-binding-info '() '() '()) tree) tree)