1 ;;; TREE-IL -> GLIL compiler
3 ;; Copyright (C) 2001,2008,2009 Free Software Foundation, Inc.
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 (define-module (language tree-il analyze)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-9)
24 #:use-module (system base syntax)
25 #:use-module (system base message)
26 #:use-module (language tree-il)
27 #:export (analyze-lexicals
28 report-unused-variables))
30 ;; Allocation is the process of assigning storage locations for lexical
31 ;; variables. A lexical variable has a distinct "address", or storage
32 ;; location, for each procedure in which it is referenced.
34 ;; A variable is "local", i.e., allocated on the stack, if it is
35 ;; referenced from within the procedure that defined it. Otherwise it is
36 ;; a "closure" variable. For example:
38 ;; (lambda (a) a) ; a will be local
39 ;; `a' is local to the procedure.
41 ;; (lambda (a) (lambda () a))
42 ;; `a' is local to the outer procedure, but a closure variable with
43 ;; respect to the inner procedure.
45 ;; If a variable is ever assigned, it needs to be heap-allocated
46 ;; ("boxed"). This is so that closures and continuations capture the
47 ;; variable's identity, not just one of the values it may have over the
48 ;; course of program execution. If the variable is never assigned, there
49 ;; is no distinction between value and identity, so closing over its
50 ;; identity (whether through closures or continuations) can make a copy
51 ;; of its value instead.
53 ;; Local variables are stored on the stack within a procedure's call
54 ;; frame. Their index into the stack is determined from their linear
55 ;; postion within a procedure's binding path:
62 ;; This algorithm has the problem that variables are only allocated
63 ;; indices at the end of the binding path. If variables bound early in
64 ;; the path are not used in later portions of the path, their indices
65 ;; will not be recycled. This problem is particularly egregious in the
69 ;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
71 ;; As you can see, the `a' binding is only used in the ephemeral `then'
72 ;; clause of the first `if', but its index would be reserved for the
73 ;; whole of the `or' expansion. So we have a hack for this specific
74 ;; case. A proper solution would be some sort of liveness analysis, and
75 ;; not our linear allocation algorithm.
77 ;; Closure variables are captured when a closure is created, and stored
78 ;; in a vector. Each closure variable has a unique index into that
81 ;; There is one more complication. Procedures bound by <fix> may, in
82 ;; some cases, be rendered inline to their parent procedure. That is to
85 ;; (letrec ((lp (lambda () (lp)))) (lp))
86 ;; => (fix ((lp (lambda () (lp)))) (lp))
87 ;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
88 ;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
90 ;; The upshot is that we don't have to allocate any space for the `lp'
91 ;; closure at all, as it can be rendered inline as a loop. So there is
92 ;; another kind of allocation, "label allocation", in which the
93 ;; procedure is simply a label, placed at the start of the lambda body.
94 ;; The label is the gensym under which the lambda expression is bound.
96 ;; The analyzer checks to see that the label is called with the correct
97 ;; number of arguments. Calls to labels compile to rename + goto.
98 ;; Lambda, the ultimate goto!
101 ;; The return value of `analyze-lexicals' is a hash table, the
104 ;; The allocation maps gensyms -- recall that each lexically bound
105 ;; variable has a unique gensym -- to storage locations ("addresses").
106 ;; Since one gensym may have many storage locations, if it is referenced
107 ;; in many procedures, it is a two-level map.
109 ;; The allocation also stored information on how many local variables
110 ;; need to be allocated for each procedure, lexicals that have been
111 ;; translated into labels, and information on what free variables to
112 ;; capture from its lexical parent procedure.
116 ;; sym -> {lambda -> address}
117 ;; lambda -> (nlocs labels . free-locs)
119 ;; address ::= (local? boxed? . index)
120 ;; labels ::= ((sym . lambda-vars) ...)
121 ;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
122 ;; free variable addresses are relative to parent proc.
124 (define (make-hashq k v)
125 (let ((res (make-hash-table)))
129 (define (analyze-lexicals x)
130 ;; bound-vars: lambda -> (sym ...)
131 ;; all identifiers bound within a lambda
132 (define bound-vars (make-hash-table))
133 ;; free-vars: lambda -> (sym ...)
134 ;; all identifiers referenced in a lambda, but not bound
135 ;; NB, this includes identifiers referenced by contained lambdas
136 (define free-vars (make-hash-table))
137 ;; assigned: sym -> #t
138 ;; variables that are assigned
139 (define assigned (make-hash-table))
140 ;; refcounts: sym -> count
141 ;; allows us to detect the or-expansion in O(1) time
142 (define refcounts (make-hash-table))
143 ;; labels: sym -> lambda-vars
144 ;; for determining if fixed-point procedures can be rendered as
145 ;; labels. lambda-vars may be an improper list.
146 (define labels (make-hash-table))
148 ;; returns variables referenced in expr
149 (define (analyze! x proc labels-in-proc tail? tail-call-args)
150 (define (step y) (analyze! y proc labels-in-proc #f #f))
151 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
152 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
154 (define (recur/labels x new-proc labels)
155 (analyze! x new-proc (append labels labels-in-proc) #t #f))
156 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
158 ((<application> proc args)
159 (apply lset-union eq? (step-tail-call proc args)
162 ((<conditional> test then else)
163 (lset-union eq? (step test) (step-tail then) (step-tail else)))
165 ((<lexical-ref> name gensym)
166 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
167 (if (not (and tail-call-args
168 (memq gensym labels-in-proc)
169 (let ((args (hashq-ref labels gensym)))
171 (= (length args) (length tail-call-args))))))
172 (hashq-set! labels gensym #f))
175 ((<lexical-set> name gensym exp)
176 (hashq-set! assigned gensym #t)
177 (hashq-set! labels gensym #f)
178 (lset-adjoin eq? (step exp) gensym))
180 ((<module-set> mod name public? exp)
183 ((<toplevel-set> name exp)
186 ((<toplevel-define> name exp)
190 (let lp ((exps exps) (ret '()))
191 (cond ((null? exps) '())
193 (lset-union eq? ret (step-tail (car exps))))
195 (lp (cdr exps) (lset-union eq? ret (step (car exps))))))))
197 ((<lambda> vars meta body)
198 (let ((locally-bound (let rev* ((vars vars) (out '()))
199 (cond ((null? vars) out)
200 ((pair? vars) (rev* (cdr vars)
201 (cons (car vars) out)))
202 (else (cons vars out))))))
203 (hashq-set! bound-vars x locally-bound)
204 (let* ((referenced (recur body x))
205 (free (lset-difference eq? referenced locally-bound))
206 (all-bound (reverse! (hashq-ref bound-vars x))))
207 (hashq-set! bound-vars x all-bound)
208 (hashq-set! free-vars x free)
211 ((<let> vars vals body)
212 (hashq-set! bound-vars proc
213 (append (reverse vars) (hashq-ref bound-vars proc)))
215 (apply lset-union eq? (step-tail body) (map step vals))
218 ((<letrec> vars vals body)
219 (hashq-set! bound-vars proc
220 (append (reverse vars) (hashq-ref bound-vars proc)))
221 (for-each (lambda (sym) (hashq-set! assigned sym #t)) vars)
223 (apply lset-union eq? (step-tail body) (map step vals))
226 ((<fix> vars vals body)
227 ;; Try to allocate these procedures as labels.
228 (for-each (lambda (sym val) (hashq-set! labels sym (lambda-vars val)))
230 (hashq-set! bound-vars proc
231 (append (reverse vars) (hashq-ref bound-vars proc)))
232 ;; Step into subexpressions.
235 ;; Since we're trying to label-allocate the lambda,
236 ;; pretend it's not a closure, and just recurse into its
237 ;; body directly. (Otherwise, recursing on a closure
238 ;; that references one of the fix's bound vars would
239 ;; prevent label allocation.)
242 ((<lambda> (lvars vars) body)
244 (let rev* ((lvars lvars) (out '()))
245 (cond ((null? lvars) out)
246 ((pair? lvars) (rev* (cdr lvars)
247 (cons (car lvars) out)))
248 (else (cons lvars out))))))
249 (hashq-set! bound-vars x locally-bound)
250 ;; recur/labels, the difference from the closure case
251 (let* ((referenced (recur/labels body x vars))
252 (free (lset-difference eq? referenced locally-bound))
253 (all-bound (reverse! (hashq-ref bound-vars x))))
254 (hashq-set! bound-vars x all-bound)
255 (hashq-set! free-vars x free)
258 (vars-with-refs (map cons vars var-refs))
259 (body-refs (recur/labels body proc vars)))
260 (define (delabel-dependents! sym)
261 (let ((refs (assq-ref vars-with-refs sym)))
263 (for-each (lambda (sym)
264 (if (hashq-ref labels sym)
266 (hashq-set! labels sym #f)
267 (delabel-dependents! sym))))
269 ;; Stepping into the lambdas and the body might have made some
270 ;; procedures not label-allocatable -- which might have
271 ;; knock-on effects. For example:
272 ;; (fix ((a (lambda () (b)))
273 ;; (b (lambda () a)))
275 ;; As far as `a' is concerned, both `a' and `b' are
276 ;; label-allocatable. But `b' references `a' not in a proc-tail
277 ;; position, which makes `a' not label-allocatable. The
278 ;; knock-on effect is that, when back-propagating this
279 ;; information to `a', `b' will also become not
280 ;; label-allocatable, as it is referenced within `a', which is
281 ;; allocated as a closure. This is a transitive relationship.
282 (for-each (lambda (sym)
283 (if (not (hashq-ref labels sym))
284 (delabel-dependents! sym)))
286 ;; Now lift bound variables with label-allocated lambdas to the
290 (if (hashq-ref labels sym)
291 ;; Remove traces of the label-bound lambda. The free
292 ;; vars will propagate up via the return val.
294 (hashq-set! bound-vars proc
295 (append (hashq-ref bound-vars val)
296 (hashq-ref bound-vars proc)))
297 (hashq-remove! bound-vars val)
298 (hashq-remove! free-vars val))))
301 (apply lset-union eq? body-refs var-refs)
304 ((<let-values> vars exp body)
305 (let ((bound (let lp ((out (hashq-ref bound-vars proc)) (in vars))
307 (lp (cons (car in) out) (cdr in))
308 (if (null? in) out (cons in out))))))
309 (hashq-set! bound-vars proc bound)
311 (lset-union eq? (step exp) (step-tail body))
316 ;; allocation: sym -> {lambda -> address}
317 ;; lambda -> (nlocs labels . free-locs)
318 (define allocation (make-hash-table))
320 (define (allocate! x proc n)
321 (define (recur y) (allocate! y proc n))
323 ((<application> proc args)
324 (apply max (recur proc) (map recur args)))
326 ((<conditional> test then else)
327 (max (recur test) (recur then) (recur else)))
329 ((<lexical-set> name gensym exp)
332 ((<module-set> mod name public? exp)
335 ((<toplevel-set> name exp)
338 ((<toplevel-define> name exp)
342 (apply max (map recur exps)))
344 ((<lambda> vars meta body)
345 ;; allocate closure vars in order
346 (let lp ((c (hashq-ref free-vars x)) (n 0))
349 (hashq-set! (hashq-ref allocation (car c))
351 `(#f ,(hashq-ref assigned (car c)) . ,n))
352 (lp (cdr c) (1+ n)))))
355 (let lp ((vars vars) (n 0))
356 (if (not (null? vars))
358 (let ((v (if (pair? vars) (car vars) vars)))
359 (hashq-set! allocation v
361 x `(#t ,(hashq-ref assigned v) . ,n)))
362 (lp (if (pair? vars) (cdr vars) '()) (1+ n)))
363 ;; allocate body, return number of additional locals
364 (- (allocate! body x n) n))))
367 (hashq-ref (hashq-ref allocation v) proc))
368 (hashq-ref free-vars x)))
371 (cons sym (hashq-ref labels sym)))
372 (hashq-ref bound-vars x)))))
373 ;; set procedure allocations
374 (hashq-set! allocation x (cons* nlocs labels free-addresses)))
377 ((<let> vars vals body)
378 (let ((nmax (apply max (map recur vals))))
381 ((and (conditional? body)
383 (let ((v (car vars)))
384 (and (not (hashq-ref assigned v))
385 (= (hashq-ref refcounts v 0) 2)
386 (lexical-ref? (conditional-test body))
387 (eq? (lexical-ref-gensym (conditional-test body)) v)
388 (lexical-ref? (conditional-then body))
389 (eq? (lexical-ref-gensym (conditional-then body)) v))))
390 (hashq-set! allocation (car vars)
391 (make-hashq proc `(#t #f . ,n)))
392 ;; the 1+ for this var
393 (max nmax (1+ n) (allocate! (conditional-else body) proc n)))
395 (let lp ((vars vars) (n n))
397 (max nmax (allocate! body proc n))
398 (let ((v (car vars)))
402 `(#t ,(hashq-ref assigned v) . ,n)))
403 (lp (cdr vars) (1+ n)))))))))
405 ((<letrec> vars vals body)
406 (let lp ((vars vars) (n n))
408 (let ((nmax (apply max
410 (allocate! x proc n))
412 (max nmax (allocate! body proc n)))
413 (let ((v (car vars)))
417 `(#t ,(hashq-ref assigned v) . ,n)))
418 (lp (cdr vars) (1+ n))))))
420 ((<fix> vars vals body)
421 (let lp ((in vars) (n n))
423 (let lp ((vars vars) (vals vals) (nmax n))
426 (max nmax (allocate! body proc n)))
427 ((hashq-ref labels (car vars))
428 ;; allocate label bindings & body inline to proc
431 (record-case (car vals)
432 ((<lambda> vars body)
433 (let lp ((vars vars) (n n))
434 (if (not (null? vars))
436 (let ((v (if (pair? vars) (car vars) vars)))
440 proc `(#t ,(hashq-ref assigned v) . ,n)))
441 (lp (if (pair? vars) (cdr vars) '()) (1+ n)))
443 (max nmax (allocate! body proc n))))))))
448 (max nmax (allocate! (car vals) proc n))))))
452 ((hashq-ref assigned v)
453 (error "fixpoint procedures may not be assigned" x))
454 ((hashq-ref labels v)
455 ;; no binding, it's a label
458 ;; allocate closure binding
459 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
460 (lp (cdr in) (1+ n))))))))
462 ((<let-values> vars exp body)
463 (let ((nmax (recur exp)))
464 (let lp ((vars vars) (n n))
467 (max nmax (allocate! body proc n)))
469 (hashq-set! allocation vars
471 `(#t ,(hashq-ref assigned vars) . ,n)))
472 ;; the 1+ for this var
473 (max nmax (allocate! body proc (1+ n))))
475 (let ((v (car vars)))
479 `(#t ,(hashq-ref assigned v) . ,n)))
480 (lp (cdr vars) (1+ n))))))))
484 (analyze! x #f '() #t #f)
491 ;;; Unused variable analysis.
494 ;; <binding-info> records are used during tree traversals in
495 ;; `report-unused-variables'. They contain a list of the local vars
496 ;; currently in scope, a list of locals vars that have been referenced, and a
497 ;; "location stack" (the stack of `tree-il-src' values for each parent tree).
498 (define-record-type <binding-info>
499 (make-binding-info vars refs locs)
501 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
502 (refs binding-info-refs) ;; (GENSYM ...)
503 (locs binding-info-locs)) ;; (LOCATION ...)
505 (define (report-unused-variables tree)
506 "Report about unused variables in TREE. Return TREE."
508 (define (dotless-list lst)
509 ;; If LST is a dotted list, return a proper list equal to LST except that
510 ;; the very last element is a pair; otherwise return LST.
516 (loop (cdr lst) (cons (car lst) result)))
518 (loop '() (cons lst result))))))
520 (tree-il-fold (lambda (x info)
521 ;; X is a leaf: extend INFO's refs accordingly.
522 (let ((refs (binding-info-refs info))
523 (vars (binding-info-vars info))
524 (locs (binding-info-locs info)))
526 ((<lexical-ref> gensym)
527 (make-binding-info vars (cons gensym refs) locs))
531 ;; Going down into X: extend INFO's variable list
533 (let ((refs (binding-info-refs info))
534 (vars (binding-info-vars info))
535 (locs (binding-info-locs info))
536 (src (tree-il-src x)))
537 (define (extend inner-vars inner-names)
538 (append (map (lambda (var name)
544 ((<lexical-set> gensym)
545 (make-binding-info vars (cons gensym refs)
547 ((<lambda> vars names)
548 (let ((vars (dotless-list vars))
549 (names (dotless-list names)))
550 (make-binding-info (extend vars names) refs
553 (make-binding-info (extend vars names) refs
555 ((<letrec> vars names)
556 (make-binding-info (extend vars names) refs
559 (make-binding-info (extend vars names) refs
561 ((<let-values> vars names)
562 (make-binding-info (extend vars names) refs
567 ;; Leaving X's scope: shrink INFO's variable list
568 ;; accordingly and reported unused nested variables.
569 (let ((refs (binding-info-refs info))
570 (vars (binding-info-vars info))
571 (locs (binding-info-locs info)))
572 (define (shrink inner-vars refs)
573 (for-each (lambda (var)
574 (let ((gensym (car var)))
575 ;; Don't report lambda parameters as
577 (if (and (not (memq gensym refs))
578 (not (and (lambda? x)
581 (let ((name (cadr var))
582 ;; We can get approximate
583 ;; source location by going up
584 ;; the LOCS location stack.
587 (warning 'unused-variable loc name)))))
588 (filter (lambda (var)
589 (memq (car var) inner-vars))
591 (fold alist-delete vars inner-vars))
593 ;; For simplicity, we leave REFS untouched, i.e., with
594 ;; names of variables that are now going out of scope.
595 ;; It doesn't hurt as these are unique names, it just
596 ;; makes REFS unnecessarily fat.
599 (let ((vars (dotless-list vars)))
600 (make-binding-info (shrink vars refs) refs
603 (make-binding-info (shrink vars refs) refs
606 (make-binding-info (shrink vars refs) refs
609 (make-binding-info (shrink vars refs) refs
612 (make-binding-info (shrink vars refs) refs
615 (make-binding-info '() '() '())