1 ;;; TREE-IL -> GLIL compiler
3 ;; Copyright (C) 2001,2008,2009 Free Software Foundation, Inc.
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 (define-module (language tree-il analyze)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-9)
24 #:use-module (system base syntax)
25 #:use-module (system base message)
26 #:use-module (language tree-il)
27 #:export (analyze-lexicals
28 report-unused-variables
29 report-possibly-unbound-variables))
31 ;; Allocation is the process of assigning storage locations for lexical
32 ;; variables. A lexical variable has a distinct "address", or storage
33 ;; location, for each procedure in which it is referenced.
35 ;; A variable is "local", i.e., allocated on the stack, if it is
36 ;; referenced from within the procedure that defined it. Otherwise it is
37 ;; a "closure" variable. For example:
39 ;; (lambda (a) a) ; a will be local
40 ;; `a' is local to the procedure.
42 ;; (lambda (a) (lambda () a))
43 ;; `a' is local to the outer procedure, but a closure variable with
44 ;; respect to the inner procedure.
46 ;; If a variable is ever assigned, it needs to be heap-allocated
47 ;; ("boxed"). This is so that closures and continuations capture the
48 ;; variable's identity, not just one of the values it may have over the
49 ;; course of program execution. If the variable is never assigned, there
50 ;; is no distinction between value and identity, so closing over its
51 ;; identity (whether through closures or continuations) can make a copy
52 ;; of its value instead.
54 ;; Local variables are stored on the stack within a procedure's call
55 ;; frame. Their index into the stack is determined from their linear
56 ;; postion within a procedure's binding path:
63 ;; This algorithm has the problem that variables are only allocated
64 ;; indices at the end of the binding path. If variables bound early in
65 ;; the path are not used in later portions of the path, their indices
66 ;; will not be recycled. This problem is particularly egregious in the
70 ;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
72 ;; As you can see, the `a' binding is only used in the ephemeral `then'
73 ;; clause of the first `if', but its index would be reserved for the
74 ;; whole of the `or' expansion. So we have a hack for this specific
75 ;; case. A proper solution would be some sort of liveness analysis, and
76 ;; not our linear allocation algorithm.
78 ;; Closure variables are captured when a closure is created, and stored
79 ;; in a vector. Each closure variable has a unique index into that
82 ;; There is one more complication. Procedures bound by <fix> may, in
83 ;; some cases, be rendered inline to their parent procedure. That is to
86 ;; (letrec ((lp (lambda () (lp)))) (lp))
87 ;; => (fix ((lp (lambda () (lp)))) (lp))
88 ;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
89 ;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
91 ;; The upshot is that we don't have to allocate any space for the `lp'
92 ;; closure at all, as it can be rendered inline as a loop. So there is
93 ;; another kind of allocation, "label allocation", in which the
94 ;; procedure is simply a label, placed at the start of the lambda body.
95 ;; The label is the gensym under which the lambda expression is bound.
97 ;; The analyzer checks to see that the label is called with the correct
98 ;; number of arguments. Calls to labels compile to rename + goto.
99 ;; Lambda, the ultimate goto!
102 ;; The return value of `analyze-lexicals' is a hash table, the
105 ;; The allocation maps gensyms -- recall that each lexically bound
106 ;; variable has a unique gensym -- to storage locations ("addresses").
107 ;; Since one gensym may have many storage locations, if it is referenced
108 ;; in many procedures, it is a two-level map.
110 ;; The allocation also stored information on how many local variables
111 ;; need to be allocated for each procedure, lexicals that have been
112 ;; translated into labels, and information on what free variables to
113 ;; capture from its lexical parent procedure.
117 ;; sym -> {lambda -> address}
118 ;; lambda -> (nlocs labels . free-locs)
120 ;; address ::= (local? boxed? . index)
121 ;; labels ::= ((sym . lambda-vars) ...)
122 ;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
123 ;; free variable addresses are relative to parent proc.
125 (define (make-hashq k v)
126 (let ((res (make-hash-table)))
130 (define (analyze-lexicals x)
131 ;; bound-vars: lambda -> (sym ...)
132 ;; all identifiers bound within a lambda
133 (define bound-vars (make-hash-table))
134 ;; free-vars: lambda -> (sym ...)
135 ;; all identifiers referenced in a lambda, but not bound
136 ;; NB, this includes identifiers referenced by contained lambdas
137 (define free-vars (make-hash-table))
138 ;; assigned: sym -> #t
139 ;; variables that are assigned
140 (define assigned (make-hash-table))
141 ;; refcounts: sym -> count
142 ;; allows us to detect the or-expansion in O(1) time
143 (define refcounts (make-hash-table))
144 ;; labels: sym -> lambda-vars
145 ;; for determining if fixed-point procedures can be rendered as
146 ;; labels. lambda-vars may be an improper list.
147 (define labels (make-hash-table))
149 ;; returns variables referenced in expr
150 (define (analyze! x proc labels-in-proc tail? tail-call-args)
151 (define (step y) (analyze! y proc labels-in-proc #f #f))
152 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
153 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
155 (define (recur/labels x new-proc labels)
156 (analyze! x new-proc (append labels labels-in-proc) #t #f))
157 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
159 ((<application> proc args)
160 (apply lset-union eq? (step-tail-call proc args)
163 ((<conditional> test then else)
164 (lset-union eq? (step test) (step-tail then) (step-tail else)))
166 ((<lexical-ref> gensym)
167 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
168 (if (not (and tail-call-args
169 (memq gensym labels-in-proc)
170 (let ((args (hashq-ref labels gensym)))
172 (= (length args) (length tail-call-args))))))
173 (hashq-set! labels gensym #f))
176 ((<lexical-set> gensym exp)
177 (hashq-set! assigned gensym #t)
178 (hashq-set! labels gensym #f)
179 (lset-adjoin eq? (step exp) gensym))
184 ((<toplevel-set> exp)
187 ((<toplevel-define> exp)
191 (let lp ((exps exps) (ret '()))
192 (cond ((null? exps) '())
194 (lset-union eq? ret (step-tail (car exps))))
196 (lp (cdr exps) (lset-union eq? ret (step (car exps))))))))
198 ((<lambda> vars body)
199 (let ((locally-bound (let rev* ((vars vars) (out '()))
200 (cond ((null? vars) out)
201 ((pair? vars) (rev* (cdr vars)
202 (cons (car vars) out)))
203 (else (cons vars out))))))
204 (hashq-set! bound-vars x locally-bound)
205 (let* ((referenced (recur body x))
206 (free (lset-difference eq? referenced locally-bound))
207 (all-bound (reverse! (hashq-ref bound-vars x))))
208 (hashq-set! bound-vars x all-bound)
209 (hashq-set! free-vars x free)
212 ((<let> vars vals body)
213 (hashq-set! bound-vars proc
214 (append (reverse vars) (hashq-ref bound-vars proc)))
216 (apply lset-union eq? (step-tail body) (map step vals))
219 ((<letrec> vars vals body)
220 (hashq-set! bound-vars proc
221 (append (reverse vars) (hashq-ref bound-vars proc)))
222 (for-each (lambda (sym) (hashq-set! assigned sym #t)) vars)
224 (apply lset-union eq? (step-tail body) (map step vals))
227 ((<fix> vars vals body)
228 ;; Try to allocate these procedures as labels.
229 (for-each (lambda (sym val) (hashq-set! labels sym (lambda-vars val)))
231 (hashq-set! bound-vars proc
232 (append (reverse vars) (hashq-ref bound-vars proc)))
233 ;; Step into subexpressions.
236 ;; Since we're trying to label-allocate the lambda,
237 ;; pretend it's not a closure, and just recurse into its
238 ;; body directly. (Otherwise, recursing on a closure
239 ;; that references one of the fix's bound vars would
240 ;; prevent label allocation.)
243 ((<lambda> (lvars vars) body)
245 (let rev* ((lvars lvars) (out '()))
246 (cond ((null? lvars) out)
247 ((pair? lvars) (rev* (cdr lvars)
248 (cons (car lvars) out)))
249 (else (cons lvars out))))))
250 (hashq-set! bound-vars x locally-bound)
251 ;; recur/labels, the difference from the closure case
252 (let* ((referenced (recur/labels body x vars))
253 (free (lset-difference eq? referenced locally-bound))
254 (all-bound (reverse! (hashq-ref bound-vars x))))
255 (hashq-set! bound-vars x all-bound)
256 (hashq-set! free-vars x free)
259 (vars-with-refs (map cons vars var-refs))
260 (body-refs (recur/labels body proc vars)))
261 (define (delabel-dependents! sym)
262 (let ((refs (assq-ref vars-with-refs sym)))
264 (for-each (lambda (sym)
265 (if (hashq-ref labels sym)
267 (hashq-set! labels sym #f)
268 (delabel-dependents! sym))))
270 ;; Stepping into the lambdas and the body might have made some
271 ;; procedures not label-allocatable -- which might have
272 ;; knock-on effects. For example:
273 ;; (fix ((a (lambda () (b)))
274 ;; (b (lambda () a)))
276 ;; As far as `a' is concerned, both `a' and `b' are
277 ;; label-allocatable. But `b' references `a' not in a proc-tail
278 ;; position, which makes `a' not label-allocatable. The
279 ;; knock-on effect is that, when back-propagating this
280 ;; information to `a', `b' will also become not
281 ;; label-allocatable, as it is referenced within `a', which is
282 ;; allocated as a closure. This is a transitive relationship.
283 (for-each (lambda (sym)
284 (if (not (hashq-ref labels sym))
285 (delabel-dependents! sym)))
287 ;; Now lift bound variables with label-allocated lambdas to the
291 (if (hashq-ref labels sym)
292 ;; Remove traces of the label-bound lambda. The free
293 ;; vars will propagate up via the return val.
295 (hashq-set! bound-vars proc
296 (append (hashq-ref bound-vars val)
297 (hashq-ref bound-vars proc)))
298 (hashq-remove! bound-vars val)
299 (hashq-remove! free-vars val))))
302 (apply lset-union eq? body-refs var-refs)
305 ((<let-values> vars exp body)
306 (let ((bound (let lp ((out (hashq-ref bound-vars proc)) (in vars))
308 (lp (cons (car in) out) (cdr in))
309 (if (null? in) out (cons in out))))))
310 (hashq-set! bound-vars proc bound)
312 (lset-union eq? (step exp) (step-tail body))
317 ;; allocation: sym -> {lambda -> address}
318 ;; lambda -> (nlocs labels . free-locs)
319 (define allocation (make-hash-table))
321 (define (allocate! x proc n)
322 (define (recur y) (allocate! y proc n))
324 ((<application> proc args)
325 (apply max (recur proc) (map recur args)))
327 ((<conditional> test then else)
328 (max (recur test) (recur then) (recur else)))
336 ((<toplevel-set> exp)
339 ((<toplevel-define> exp)
343 (apply max (map recur exps)))
345 ((<lambda> vars body)
346 ;; allocate closure vars in order
347 (let lp ((c (hashq-ref free-vars x)) (n 0))
350 (hashq-set! (hashq-ref allocation (car c))
352 `(#f ,(hashq-ref assigned (car c)) . ,n))
353 (lp (cdr c) (1+ n)))))
356 (let lp ((vars vars) (n 0))
357 (if (not (null? vars))
359 (let ((v (if (pair? vars) (car vars) vars)))
360 (hashq-set! allocation v
362 x `(#t ,(hashq-ref assigned v) . ,n)))
363 (lp (if (pair? vars) (cdr vars) '()) (1+ n)))
364 ;; allocate body, return number of additional locals
365 (- (allocate! body x n) n))))
368 (hashq-ref (hashq-ref allocation v) proc))
369 (hashq-ref free-vars x)))
372 (cons sym (hashq-ref labels sym)))
373 (hashq-ref bound-vars x)))))
374 ;; set procedure allocations
375 (hashq-set! allocation x (cons* nlocs labels free-addresses)))
378 ((<let> vars vals body)
379 (let ((nmax (apply max (map recur vals))))
382 ((and (conditional? body)
384 (let ((v (car vars)))
385 (and (not (hashq-ref assigned v))
386 (= (hashq-ref refcounts v 0) 2)
387 (lexical-ref? (conditional-test body))
388 (eq? (lexical-ref-gensym (conditional-test body)) v)
389 (lexical-ref? (conditional-then body))
390 (eq? (lexical-ref-gensym (conditional-then body)) v))))
391 (hashq-set! allocation (car vars)
392 (make-hashq proc `(#t #f . ,n)))
393 ;; the 1+ for this var
394 (max nmax (1+ n) (allocate! (conditional-else body) proc n)))
396 (let lp ((vars vars) (n n))
398 (max nmax (allocate! body proc n))
399 (let ((v (car vars)))
403 `(#t ,(hashq-ref assigned v) . ,n)))
404 (lp (cdr vars) (1+ n)))))))))
406 ((<letrec> vars vals body)
407 (let lp ((vars vars) (n n))
409 (let ((nmax (apply max
411 (allocate! x proc n))
413 (max nmax (allocate! body proc n)))
414 (let ((v (car vars)))
418 `(#t ,(hashq-ref assigned v) . ,n)))
419 (lp (cdr vars) (1+ n))))))
421 ((<fix> vars vals body)
422 (let lp ((in vars) (n n))
424 (let lp ((vars vars) (vals vals) (nmax n))
427 (max nmax (allocate! body proc n)))
428 ((hashq-ref labels (car vars))
429 ;; allocate label bindings & body inline to proc
432 (record-case (car vals)
433 ((<lambda> vars body)
434 (let lp ((vars vars) (n n))
435 (if (not (null? vars))
437 (let ((v (if (pair? vars) (car vars) vars)))
441 proc `(#t ,(hashq-ref assigned v) . ,n)))
442 (lp (if (pair? vars) (cdr vars) '()) (1+ n)))
444 (max nmax (allocate! body proc n))))))))
449 (max nmax (allocate! (car vals) proc n))))))
453 ((hashq-ref assigned v)
454 (error "fixpoint procedures may not be assigned" x))
455 ((hashq-ref labels v)
456 ;; no binding, it's a label
459 ;; allocate closure binding
460 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
461 (lp (cdr in) (1+ n))))))))
463 ((<let-values> vars exp body)
464 (let ((nmax (recur exp)))
465 (let lp ((vars vars) (n n))
468 (max nmax (allocate! body proc n)))
470 (hashq-set! allocation vars
472 `(#t ,(hashq-ref assigned vars) . ,n)))
473 ;; the 1+ for this var
474 (max nmax (allocate! body proc (1+ n))))
476 (let ((v (car vars)))
480 `(#t ,(hashq-ref assigned v) . ,n)))
481 (lp (cdr vars) (1+ n))))))))
485 (analyze! x #f '() #t #f)
492 ;;; Unused variable analysis.
495 ;; <binding-info> records are used during tree traversals in
496 ;; `report-unused-variables'. They contain a list of the local vars
497 ;; currently in scope, a list of locals vars that have been referenced, and a
498 ;; "location stack" (the stack of `tree-il-src' values for each parent tree).
499 (define-record-type <binding-info>
500 (make-binding-info vars refs locs)
502 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
503 (refs binding-info-refs) ;; (GENSYM ...)
504 (locs binding-info-locs)) ;; (LOCATION ...)
506 (define (report-unused-variables tree env)
507 "Report about unused variables in TREE. Return TREE."
509 (define (dotless-list lst)
510 ;; If LST is a dotted list, return a proper list equal to LST except that
511 ;; the very last element is a pair; otherwise return LST.
517 (loop (cdr lst) (cons (car lst) result)))
519 (loop '() (cons lst result))))))
521 (tree-il-fold (lambda (x info)
522 ;; X is a leaf: extend INFO's refs accordingly.
523 (let ((refs (binding-info-refs info))
524 (vars (binding-info-vars info))
525 (locs (binding-info-locs info)))
527 ((<lexical-ref> gensym)
528 (make-binding-info vars (cons gensym refs) locs))
532 ;; Going down into X: extend INFO's variable list
534 (let ((refs (binding-info-refs info))
535 (vars (binding-info-vars info))
536 (locs (binding-info-locs info))
537 (src (tree-il-src x)))
538 (define (extend inner-vars inner-names)
539 (append (map (lambda (var name)
545 ((<lexical-set> gensym)
546 (make-binding-info vars (cons gensym refs)
548 ((<lambda> vars names)
549 (let ((vars (dotless-list vars))
550 (names (dotless-list names)))
551 (make-binding-info (extend vars names) refs
554 (make-binding-info (extend vars names) refs
556 ((<letrec> vars names)
557 (make-binding-info (extend vars names) refs
560 (make-binding-info (extend vars names) refs
562 ((<let-values> vars names)
563 (make-binding-info (extend vars names) refs
568 ;; Leaving X's scope: shrink INFO's variable list
569 ;; accordingly and reported unused nested variables.
570 (let ((refs (binding-info-refs info))
571 (vars (binding-info-vars info))
572 (locs (binding-info-locs info)))
573 (define (shrink inner-vars refs)
574 (for-each (lambda (var)
575 (let ((gensym (car var)))
576 ;; Don't report lambda parameters as
578 (if (and (not (memq gensym refs))
579 (not (and (lambda? x)
582 (let ((name (cadr var))
583 ;; We can get approximate
584 ;; source location by going up
585 ;; the LOCS location stack.
588 (warning 'unused-variable loc name)))))
589 (filter (lambda (var)
590 (memq (car var) inner-vars))
592 (fold alist-delete vars inner-vars))
594 ;; For simplicity, we leave REFS untouched, i.e., with
595 ;; names of variables that are now going out of scope.
596 ;; It doesn't hurt as these are unique names, it just
597 ;; makes REFS unnecessarily fat.
600 (let ((vars (dotless-list vars)))
601 (make-binding-info (shrink vars refs) refs
604 (make-binding-info (shrink vars refs) refs
607 (make-binding-info (shrink vars refs) refs
610 (make-binding-info (shrink vars refs) refs
613 (make-binding-info (shrink vars refs) refs
616 (make-binding-info '() '() '())
622 ;;; Unbound variable analysis.
625 ;; <toplevel-info> records are used during tree traversal in search of
626 ;; possibly unbound variable. They contain a list of references to
627 ;; potentially unbound top-level variables, a list of the top-level defines
628 ;; that have been encountered, and a "location stack" (see above).
629 (define-record-type <toplevel-info>
630 (make-toplevel-info refs defs locs)
632 (refs toplevel-info-refs) ;; ((VARIABLE-NAME . LOCATION) ...)
633 (defs toplevel-info-defs) ;; (VARIABLE-NAME ...)
634 (locs toplevel-info-locs)) ;; (LOCATION ...)
636 ;; TODO: Combine with `report-unused-variables' so we don't traverse the tree
637 ;; once for each warning type.
639 (define (report-possibly-unbound-variables tree env)
640 "Return possibly unbound variables in TREE. Return TREE."
642 (tree-il-fold (lambda (x info)
643 ;; X is a leaf: extend INFO's refs accordingly.
644 (let ((refs (toplevel-info-refs info))
645 (defs (toplevel-info-defs info))
646 (locs (toplevel-info-locs info)))
647 (define (bound? name)
648 (or (and (module? env)
649 (module-variable env name))
653 ((<toplevel-ref> name src)
656 (let ((src (or src (find pair? locs))))
657 (make-toplevel-info (alist-cons name src refs)
663 ;; Going down into X.
664 (let* ((refs (toplevel-info-refs info))
665 (defs (toplevel-info-defs info))
666 (src (tree-il-src x))
667 (locs (cons src (toplevel-info-locs info))))
668 (define (bound? name)
669 (or (and (module? env)
670 (module-variable env name))
674 ((<toplevel-set> name src)
676 (make-toplevel-info refs defs locs)
677 (let ((src (find pair? locs)))
678 (make-toplevel-info (alist-cons name src refs)
681 ((<toplevel-define> name)
682 (make-toplevel-info (alist-delete name refs eq?)
686 (make-toplevel-info refs defs locs)))))
689 ;; Leaving X's scope.
690 (let ((refs (toplevel-info-refs info))
691 (defs (toplevel-info-defs info))
692 (locs (toplevel-info-locs info)))
693 (make-toplevel-info refs defs (cdr locs))))
695 (make-toplevel-info '() '() '())
698 (for-each (lambda (name+loc)
699 (let ((name (car name+loc))
700 (loc (cdr name+loc)))
701 (warning 'unbound-variable loc name)))
702 (reverse (toplevel-info-refs toplevel)))