1 ;;; TREE-IL -> GLIL compiler
3 ;; Copyright (C) 2001, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 (define-module (language tree-il analyze)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-9)
24 #:use-module (srfi srfi-11)
25 #:use-module (ice-9 vlist)
26 #:use-module (ice-9 match)
27 #:use-module (system base syntax)
28 #:use-module (system base message)
29 #:use-module (system vm program)
30 #:use-module (language tree-il)
31 #:use-module (system base pmatch)
32 #:export (analyze-lexicals
34 unused-variable-analysis
35 unused-toplevel-analysis
36 unbound-variable-analysis
40 ;; Allocation is the process of assigning storage locations for lexical
41 ;; variables. A lexical variable has a distinct "address", or storage
42 ;; location, for each procedure in which it is referenced.
44 ;; A variable is "local", i.e., allocated on the stack, if it is
45 ;; referenced from within the procedure that defined it. Otherwise it is
46 ;; a "closure" variable. For example:
48 ;; (lambda (a) a) ; a will be local
49 ;; `a' is local to the procedure.
51 ;; (lambda (a) (lambda () a))
52 ;; `a' is local to the outer procedure, but a closure variable with
53 ;; respect to the inner procedure.
55 ;; If a variable is ever assigned, it needs to be heap-allocated
56 ;; ("boxed"). This is so that closures and continuations capture the
57 ;; variable's identity, not just one of the values it may have over the
58 ;; course of program execution. If the variable is never assigned, there
59 ;; is no distinction between value and identity, so closing over its
60 ;; identity (whether through closures or continuations) can make a copy
61 ;; of its value instead.
63 ;; Local variables are stored on the stack within a procedure's call
64 ;; frame. Their index into the stack is determined from their linear
65 ;; postion within a procedure's binding path:
72 ;; This algorithm has the problem that variables are only allocated
73 ;; indices at the end of the binding path. If variables bound early in
74 ;; the path are not used in later portions of the path, their indices
75 ;; will not be recycled. This problem is particularly egregious in the
79 ;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
81 ;; As you can see, the `a' binding is only used in the ephemeral
82 ;; `consequent' clause of the first `if', but its index would be
83 ;; reserved for the whole of the `or' expansion. So we have a hack for
84 ;; this specific case. A proper solution would be some sort of liveness
85 ;; analysis, and not our linear allocation algorithm.
87 ;; Closure variables are captured when a closure is created, and stored in a
88 ;; vector inline to the closure object itself. Each closure variable has a
89 ;; unique index into that vector.
91 ;; There is one more complication. Procedures bound by <fix> may, in
92 ;; some cases, be rendered inline to their parent procedure. That is to
95 ;; (letrec ((lp (lambda () (lp)))) (lp))
96 ;; => (fix ((lp (lambda () (lp)))) (lp))
97 ;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
98 ;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
100 ;; The upshot is that we don't have to allocate any space for the `lp'
101 ;; closure at all, as it can be rendered inline as a loop. So there is
102 ;; another kind of allocation, "label allocation", in which the
103 ;; procedure is simply a label, placed at the start of the lambda body.
104 ;; The label is the gensym under which the lambda expression is bound.
106 ;; The analyzer checks to see that the label is called with the correct
107 ;; number of arguments. Calls to labels compile to rename + goto.
108 ;; Lambda, the ultimate goto!
111 ;; The return value of `analyze-lexicals' is a hash table, the
114 ;; The allocation maps gensyms -- recall that each lexically bound
115 ;; variable has a unique gensym -- to storage locations ("addresses").
116 ;; Since one gensym may have many storage locations, if it is referenced
117 ;; in many procedures, it is a two-level map.
119 ;; The allocation also stored information on how many local variables
120 ;; need to be allocated for each procedure, lexicals that have been
121 ;; translated into labels, and information on what free variables to
122 ;; capture from its lexical parent procedure.
124 ;; In addition, we have a conflation: while we're traversing the code,
125 ;; recording information to pass to the compiler, we take the
126 ;; opportunity to generate labels for each lambda-case clause, so that
127 ;; generated code can skip argument checks at runtime if they match at
130 ;; Also, while we're a-traversing and an-allocating, we check prompt
131 ;; handlers to see if the "continuation" argument is used. If not, we
132 ;; mark the prompt as being "escape-only". This allows us to implement
133 ;; `catch' and `throw' using `prompt' and `control', but without causing
134 ;; a continuation to be reified. Heh heh.
138 ;; sym -> {lambda -> address}
139 ;; lambda -> (labels . free-locs)
140 ;; lambda-case -> (gensym . nlocs)
141 ;; prompt -> escape-only?
143 ;; address ::= (local? boxed? . index)
144 ;; labels ::= ((sym . lambda) ...)
145 ;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
146 ;; free variable addresses are relative to parent proc.
148 (define (make-hashq k v)
149 (let ((res (make-hash-table)))
153 (define (analyze-lexicals x)
154 ;; bound-vars: lambda -> (sym ...)
155 ;; all identifiers bound within a lambda
156 (define bound-vars (make-hash-table))
157 ;; free-vars: lambda -> (sym ...)
158 ;; all identifiers referenced in a lambda, but not bound
159 ;; NB, this includes identifiers referenced by contained lambdas
160 (define free-vars (make-hash-table))
161 ;; assigned: sym -> #t
162 ;; variables that are assigned
163 (define assigned (make-hash-table))
164 ;; refcounts: sym -> count
165 ;; allows us to detect the or-expansion in O(1) time
166 (define refcounts (make-hash-table))
167 ;; labels: sym -> lambda
168 ;; for determining if fixed-point procedures can be rendered as
170 (define labels (make-hash-table))
172 ;; returns variables referenced in expr
173 (define (analyze! x proc labels-in-proc tail? tail-call-args)
174 (define (step y) (analyze! y proc '() #f #f))
175 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
176 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
178 (define (recur/labels x new-proc labels)
179 (analyze! x new-proc (append labels labels-in-proc) #t #f))
180 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
183 (apply lset-union eq? (step-tail-call proc args)
187 (apply lset-union eq? (map step args)))
189 ((<conditional> test consequent alternate)
190 (lset-union eq? (step test) (step-tail consequent) (step-tail alternate)))
192 ((<lexical-ref> gensym)
193 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
194 (if (not (and tail-call-args
195 (memq gensym labels-in-proc)
196 (let ((p (hashq-ref labels gensym)))
198 (let lp ((c (lambda-body p)))
199 (and c (lambda-case? c)
201 ;; for now prohibit optional &
202 ;; keyword arguments; can relax this
204 (and (= (length (lambda-case-req c))
205 (length tail-call-args))
206 (not (lambda-case-opt c))
207 (not (lambda-case-kw c))
208 (not (lambda-case-rest c)))
209 (lp (lambda-case-alternate c)))))))))
210 (hashq-set! labels gensym #f))
213 ((<lexical-set> gensym exp)
214 (hashq-set! assigned gensym #t)
215 (hashq-set! labels gensym #f)
216 (lset-adjoin eq? (step exp) gensym))
221 ((<toplevel-set> exp)
224 ((<toplevel-define> exp)
228 (lset-union eq? (step head) (step-tail tail)))
231 ;; order is important here
232 (hashq-set! bound-vars x '())
233 (let ((free (recur body x)))
234 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
235 (hashq-set! free-vars x free)
238 ((<lambda-case> opt kw inits gensyms body alternate)
239 (hashq-set! bound-vars proc
240 (append (reverse gensyms) (hashq-ref bound-vars proc)))
245 (apply lset-union eq? (map step inits))
248 (if alternate (step-tail alternate) '())))
250 ((<let> gensyms vals body)
251 (hashq-set! bound-vars proc
252 (append (reverse gensyms) (hashq-ref bound-vars proc)))
254 (apply lset-union eq? (step-tail body) (map step vals))
257 ((<letrec> gensyms vals body)
258 (hashq-set! bound-vars proc
259 (append (reverse gensyms) (hashq-ref bound-vars proc)))
260 (for-each (lambda (sym) (hashq-set! assigned sym #t)) gensyms)
262 (apply lset-union eq? (step-tail body) (map step vals))
265 ((<fix> gensyms vals body)
266 ;; Try to allocate these procedures as labels.
267 (for-each (lambda (sym val) (hashq-set! labels sym val))
269 (hashq-set! bound-vars proc
270 (append (reverse gensyms) (hashq-ref bound-vars proc)))
271 ;; Step into subexpressions.
274 ;; Since we're trying to label-allocate the lambda,
275 ;; pretend it's not a closure, and just recurse into its
276 ;; body directly. (Otherwise, recursing on a closure
277 ;; that references one of the fix's bound vars would
278 ;; prevent label allocation.)
282 ;; just like the closure case, except here we use
283 ;; recur/labels instead of recur
284 (hashq-set! bound-vars x '())
285 (let ((free (recur/labels body x gensyms)))
286 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
287 (hashq-set! free-vars x free)
290 (vars-with-refs (map cons gensyms var-refs))
291 (body-refs (recur/labels body proc gensyms)))
292 (define (delabel-dependents! sym)
293 (let ((refs (assq-ref vars-with-refs sym)))
295 (for-each (lambda (sym)
296 (if (hashq-ref labels sym)
298 (hashq-set! labels sym #f)
299 (delabel-dependents! sym))))
301 ;; Stepping into the lambdas and the body might have made some
302 ;; procedures not label-allocatable -- which might have
303 ;; knock-on effects. For example:
304 ;; (fix ((a (lambda () (b)))
305 ;; (b (lambda () a)))
307 ;; As far as `a' is concerned, both `a' and `b' are
308 ;; label-allocatable. But `b' references `a' not in a proc-tail
309 ;; position, which makes `a' not label-allocatable. The
310 ;; knock-on effect is that, when back-propagating this
311 ;; information to `a', `b' will also become not
312 ;; label-allocatable, as it is referenced within `a', which is
313 ;; allocated as a closure. This is a transitive relationship.
314 (for-each (lambda (sym)
315 (if (not (hashq-ref labels sym))
316 (delabel-dependents! sym)))
318 ;; Now lift bound variables with label-allocated lambdas to the
322 (if (hashq-ref labels sym)
323 ;; Remove traces of the label-bound lambda. The free
324 ;; vars will propagate up via the return val.
326 (hashq-set! bound-vars proc
327 (append (hashq-ref bound-vars val)
328 (hashq-ref bound-vars proc)))
329 (hashq-remove! bound-vars val)
330 (hashq-remove! free-vars val))))
333 (apply lset-union eq? body-refs var-refs)
336 ((<let-values> exp body)
337 (lset-union eq? (step exp) (step body)))
339 ((<dynwind> winder pre body post unwinder)
340 (lset-union eq? (step winder) (step pre)
342 (step post) (step unwinder)))
344 ((<dynlet> fluids vals body)
345 (apply lset-union eq? (step body) (map step (append fluids vals))))
350 ((<dynset> fluid exp)
351 (lset-union eq? (step fluid) (step exp)))
353 ((<prompt> tag body handler)
354 (lset-union eq? (step tag) (step body) (step-tail handler)))
356 ((<abort> tag args tail)
357 (apply lset-union eq? (step tag) (step tail) (map step args)))
361 ;; allocation: sym -> {lambda -> address}
362 ;; lambda -> (nlocs labels . free-locs)
363 (define allocation (make-hash-table))
365 (define (allocate! x proc n)
366 (define (recur y) (allocate! y proc n))
369 (apply max (recur proc) (map recur args)))
372 (apply max n (map recur args)))
374 ((<conditional> test consequent alternate)
375 (max (recur test) (recur consequent) (recur alternate)))
383 ((<toplevel-set> exp)
386 ((<toplevel-define> exp)
394 ;; allocate closure vars in order
395 (let lp ((c (hashq-ref free-vars x)) (n 0))
398 (hashq-set! (hashq-ref allocation (car c))
400 `(#f ,(hashq-ref assigned (car c)) . ,n))
401 (lp (cdr c) (1+ n)))))
403 (let ((nlocs (allocate! body x 0))
406 (hashq-ref (hashq-ref allocation v) proc))
407 (hashq-ref free-vars x)))
410 (cons sym (hashq-ref labels sym)))
411 (hashq-ref bound-vars x)))))
412 ;; set procedure allocations
413 (hashq-set! allocation x (cons labels free-addresses)))
416 ((<lambda-case> opt kw inits gensyms body alternate)
418 (let lp ((gensyms gensyms) (n n))
422 (allocate! body proc n)
423 ;; inits not logically at the end, but they
425 (map (lambda (x) (allocate! x proc n)) inits))))
426 ;; label and nlocs for the case
427 (hashq-set! allocation x (cons (gensym ":LCASE") nlocs))
430 (hashq-set! allocation (car gensyms)
432 proc `(#t ,(hashq-ref assigned (car gensyms)) . ,n)))
433 (lp (cdr gensyms) (1+ n)))))
434 (if alternate (allocate! alternate proc n) n)))
436 ((<let> gensyms vals body)
437 (let ((nmax (apply max (map recur vals))))
440 ((and (conditional? body)
441 (= (length gensyms) 1)
442 (let ((v (car gensyms)))
443 (and (not (hashq-ref assigned v))
444 (= (hashq-ref refcounts v 0) 2)
445 (lexical-ref? (conditional-test body))
446 (eq? (lexical-ref-gensym (conditional-test body)) v)
447 (lexical-ref? (conditional-consequent body))
448 (eq? (lexical-ref-gensym (conditional-consequent body)) v))))
449 (hashq-set! allocation (car gensyms)
450 (make-hashq proc `(#t #f . ,n)))
451 ;; the 1+ for this var
452 (max nmax (1+ n) (allocate! (conditional-alternate body) proc n)))
454 (let lp ((gensyms gensyms) (n n))
456 (max nmax (allocate! body proc n))
457 (let ((v (car gensyms)))
461 `(#t ,(hashq-ref assigned v) . ,n)))
462 (lp (cdr gensyms) (1+ n)))))))))
464 ((<letrec> gensyms vals body)
465 (let lp ((gensyms gensyms) (n n))
467 (let ((nmax (apply max
469 (allocate! x proc n))
471 (max nmax (allocate! body proc n)))
472 (let ((v (car gensyms)))
476 `(#t ,(hashq-ref assigned v) . ,n)))
477 (lp (cdr gensyms) (1+ n))))))
479 ((<fix> gensyms vals body)
480 (let lp ((in gensyms) (n n))
482 (let lp ((gensyms gensyms) (vals vals) (nmax n))
485 (max nmax (allocate! body proc n)))
486 ((hashq-ref labels (car gensyms))
487 ;; allocate lambda body inline to proc
490 (record-case (car vals)
492 (max nmax (allocate! body proc n))))))
497 (max nmax (allocate! (car vals) proc n))))))
501 ((hashq-ref assigned v)
502 (error "fixpoint procedures may not be assigned" x))
503 ((hashq-ref labels v)
504 ;; no binding, it's a label
507 ;; allocate closure binding
508 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
509 (lp (cdr in) (1+ n))))))))
511 ((<let-values> exp body)
512 (max (recur exp) (recur body)))
514 ((<dynwind> winder pre body post unwinder)
515 (max (recur winder) (recur pre)
517 (recur post) (recur unwinder)))
519 ((<dynlet> fluids vals body)
520 (apply max (recur body) (map recur (append fluids vals))))
525 ((<dynset> fluid exp)
526 (max (recur fluid) (recur exp)))
528 ((<prompt> tag body handler)
529 (let ((cont-var (and (lambda-case? handler)
530 (pair? (lambda-case-gensyms handler))
531 (car (lambda-case-gensyms handler)))))
532 (hashq-set! allocation x
533 (and cont-var (zero? (hashq-ref refcounts cont-var 0))))
534 (max (recur tag) (recur body) (recur handler))))
536 ((<abort> tag args tail)
537 (apply max (recur tag) (recur tail) (map recur args)))
541 (analyze! x #f '() #t #f)
548 ;;; Tree analyses for warnings.
551 (define-record-type <tree-analysis>
552 (make-tree-analysis leaf down up post init)
554 (leaf tree-analysis-leaf) ;; (lambda (x result env locs) ...)
555 (down tree-analysis-down) ;; (lambda (x result env locs) ...)
556 (up tree-analysis-up) ;; (lambda (x result env locs) ...)
557 (post tree-analysis-post) ;; (lambda (result env) ...)
558 (init tree-analysis-init)) ;; arbitrary value
560 (define (analyze-tree analyses tree env)
561 "Run all tree analyses listed in ANALYSES on TREE for ENV, using
562 `tree-il-fold'. Return TREE. The leaf/down/up procedures of each analysis are
563 passed a ``location stack', which is the stack of `tree-il-src' values for each
564 parent tree (a list); it can be used to approximate source location when
565 accurate information is missing from a given `tree-il' element."
567 (define (traverse proc update-locs)
568 ;; Return a tree traversing procedure that returns a list of analysis
569 ;; results prepended by the location stack.
571 (let ((locs (update-locs x (car results))))
572 (cons locs ;; the location stack
573 (map (lambda (analysis result)
574 ((proc analysis) x result env locs))
578 ;; Keeping/extending/shrinking the location stack.
579 (define (keep-locs x locs) locs)
580 (define (extend-locs x locs) (cons (tree-il-src x) locs))
581 (define (shrink-locs x locs) (cdr locs))
584 (tree-il-fold (traverse tree-analysis-leaf keep-locs)
585 (traverse tree-analysis-down extend-locs)
586 (traverse tree-analysis-up shrink-locs)
587 (cons '() ;; empty location stack
588 (map tree-analysis-init analyses))
591 (for-each (lambda (analysis result)
592 ((tree-analysis-post analysis) result env))
600 ;;; Unused variable analysis.
603 ;; <binding-info> records are used during tree traversals in
604 ;; `unused-variable-analysis'. They contain a list of the local vars
605 ;; currently in scope, and a list of locals vars that have been referenced.
606 (define-record-type <binding-info>
607 (make-binding-info vars refs)
609 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
610 (refs binding-info-refs)) ;; (GENSYM ...)
612 (define (gensym? sym)
613 ;; Return #t if SYM is (likely) a generated symbol.
614 (string-any #\space (symbol->string sym)))
616 (define unused-variable-analysis
617 ;; Report unused variables in the given tree.
619 (lambda (x info env locs)
620 ;; X is a leaf: extend INFO's refs accordingly.
621 (let ((refs (binding-info-refs info))
622 (vars (binding-info-vars info)))
624 ((<lexical-ref> gensym)
625 (make-binding-info vars (vhash-consq gensym #t refs)))
628 (lambda (x info env locs)
629 ;; Going down into X: extend INFO's variable list
631 (let ((refs (binding-info-refs info))
632 (vars (binding-info-vars info))
633 (src (tree-il-src x)))
634 (define (extend inner-vars inner-names)
635 (fold (lambda (var name vars)
636 (vhash-consq var (list name src) vars))
642 ((<lexical-set> gensym)
643 (make-binding-info vars (vhash-consq gensym #t refs)))
644 ((<lambda-case> req opt inits rest kw gensyms)
647 ,@(if rest (list rest) '())
648 ,@(if kw (map cadr (cdr kw)) '()))))
649 (make-binding-info (extend gensyms names) refs)))
650 ((<let> gensyms names)
651 (make-binding-info (extend gensyms names) refs))
652 ((<letrec> gensyms names)
653 (make-binding-info (extend gensyms names) refs))
654 ((<fix> gensyms names)
655 (make-binding-info (extend gensyms names) refs))
658 (lambda (x info env locs)
659 ;; Leaving X's scope: shrink INFO's variable list
660 ;; accordingly and reported unused nested variables.
661 (let ((refs (binding-info-refs info))
662 (vars (binding-info-vars info)))
663 (define (shrink inner-vars refs)
666 (let ((gensym (car var)))
667 ;; Don't report lambda parameters as unused.
668 (if (and (memq gensym inner-vars)
669 (not (vhash-assq gensym refs))
670 (not (lambda-case? x)))
671 (let ((name (cadr var))
672 ;; We can get approximate source location by going up
673 ;; the LOCS location stack.
676 (if (and (not (gensym? name))
678 (warning 'unused-variable loc name))))))
680 (vlist-drop vars (length inner-vars)))
682 ;; For simplicity, we leave REFS untouched, i.e., with
683 ;; names of variables that are now going out of scope.
684 ;; It doesn't hurt as these are unique names, it just
685 ;; makes REFS unnecessarily fat.
687 ((<lambda-case> gensyms)
688 (make-binding-info (shrink gensyms refs) refs))
690 (make-binding-info (shrink gensyms refs) refs))
692 (make-binding-info (shrink gensyms refs) refs))
694 (make-binding-info (shrink gensyms refs) refs))
697 (lambda (result env) #t)
698 (make-binding-info vlist-null vlist-null)))
702 ;;; Unused top-level variable analysis.
705 ;; <reference-graph> record top-level definitions that are made, references to
706 ;; top-level definitions and their context (the top-level definition in which
707 ;; the reference appears), as well as the current context (the top-level
708 ;; definition we're currently in). The second part (`refs' below) is
709 ;; effectively a graph from which we can determine unused top-level definitions.
710 (define-record-type <reference-graph>
711 (make-reference-graph refs defs toplevel-context)
713 (defs reference-graph-defs) ;; ((NAME . LOC) ...)
714 (refs reference-graph-refs) ;; ((REF-CONTEXT REF ...) ...)
715 (toplevel-context reference-graph-toplevel-context)) ;; NAME | #f
717 (define (graph-reachable-nodes root refs reachable)
718 ;; Add to REACHABLE the nodes reachable from ROOT in graph REFS. REFS is a
719 ;; vhash mapping nodes to the list of their children: for instance,
720 ;; ((A -> (B C)) (B -> (A)) (C -> ())) corresponds to
729 ;; REACHABLE is a vhash of nodes known to be otherwise reachable.
731 (let loop ((root root)
734 (if (or (vhash-assq root path)
735 (vhash-assq root result))
737 (let* ((children (or (and=> (vhash-assq root refs) cdr) '()))
738 (path (vhash-consq root #t path))
739 (result (fold (lambda (kid result)
740 (loop kid path result))
743 (fold (lambda (kid result)
744 (vhash-consq kid #t result))
748 (define (graph-reachable-nodes* roots refs)
749 ;; Return the list of nodes in REFS reachable from the nodes listed in ROOTS.
750 (vlist-fold (lambda (root+true result)
751 (let* ((root (car root+true))
752 (reachable (graph-reachable-nodes root refs result)))
753 (vhash-consq root #t reachable)))
757 (define (partition* pred vhash)
758 ;; Partition VHASH according to PRED. Return the two resulting vhashes.
760 (vlist-fold (lambda (k+v result)
766 (cons (vhash-consq k v r1) r2)
767 (cons r1 (vhash-consq k v r2)))))
768 (cons vlist-null vlist-null)
770 (values (car result) (cdr result))))
772 (define unused-toplevel-analysis
773 ;; Report unused top-level definitions that are not exported.
774 (let ((add-ref-from-context
776 ;; Add an edge CTX -> NAME in GRAPH.
777 (let* ((refs (reference-graph-refs graph))
778 (defs (reference-graph-defs graph))
779 (ctx (reference-graph-toplevel-context graph))
780 (ctx-refs (or (and=> (vhash-assq ctx refs) cdr) '())))
781 (make-reference-graph (vhash-consq ctx (cons name ctx-refs) refs)
783 (define (macro-variable? name env)
785 (let ((var (module-variable env name)))
786 (and var (variable-bound? var)
787 (macro? (variable-ref var))))))
790 (lambda (x graph env locs)
792 (let ((ctx (reference-graph-toplevel-context graph)))
794 ((<toplevel-ref> name src)
795 (add-ref-from-context graph name))
798 (lambda (x graph env locs)
799 ;; Going down into X.
800 (let ((ctx (reference-graph-toplevel-context graph))
801 (refs (reference-graph-refs graph))
802 (defs (reference-graph-defs graph)))
804 ((<toplevel-define> name src)
806 (defs (vhash-consq name (or src (find pair? locs))
808 (make-reference-graph refs defs name)))
809 ((<toplevel-set> name src)
810 (add-ref-from-context graph name))
813 (lambda (x graph env locs)
814 ;; Leaving X's scope.
817 (let ((refs (reference-graph-refs graph))
818 (defs (reference-graph-defs graph)))
819 (make-reference-graph refs defs #f)))
823 ;; Process the resulting reference graph: determine all private definitions
824 ;; not reachable from any public definition. Macros
825 ;; (syntax-transformers), which are globally bound, never considered
826 ;; unused since we can't tell whether a macro is actually used; in
827 ;; addition, macros are considered roots of the graph since they may use
828 ;; private bindings. FIXME: The `make-syntax-transformer' calls don't
829 ;; contain any literal `toplevel-ref' of the global bindings they use so
830 ;; this strategy fails.
831 (define (exported? name)
833 (module-variable (module-public-interface env) name)
836 (let-values (((public-defs private-defs)
837 (partition* (lambda (name)
839 (macro-variable? name env)))
840 (reference-graph-defs graph))))
841 (let* ((roots (vhash-consq #f #t public-defs))
842 (refs (reference-graph-refs graph))
843 (reachable (graph-reachable-nodes* roots refs))
844 (unused (vlist-filter (lambda (name+src)
845 (not (vhash-assq (car name+src)
848 (vlist-for-each (lambda (name+loc)
849 (let ((name (car name+loc))
850 (loc (cdr name+loc)))
851 (if (not (gensym? name))
852 (warning 'unused-toplevel loc name))))
855 (make-reference-graph vlist-null vlist-null #f))))
859 ;;; Unbound variable analysis.
862 ;; <toplevel-info> records are used during tree traversal in search of
863 ;; possibly unbound variable. They contain a list of references to
864 ;; potentially unbound top-level variables, and a list of the top-level
865 ;; defines that have been encountered.
866 (define-record-type <toplevel-info>
867 (make-toplevel-info refs defs)
869 (refs toplevel-info-refs) ;; ((VARIABLE-NAME . LOCATION) ...)
870 (defs toplevel-info-defs)) ;; (VARIABLE-NAME ...)
872 (define (goops-toplevel-definition proc args env)
873 ;; If call of PROC to ARGS is a GOOPS top-level definition, return
874 ;; the name of the variable being defined; otherwise return #f. This
875 ;; assumes knowledge of the current implementation of `define-class' et al.
876 (define (toplevel-define-arg args)
878 ((($ <const> _ (and (? symbol?) exp)) _)
883 (($ <module-ref> _ '(oop goops) 'toplevel-define! #f)
884 (toplevel-define-arg args))
885 (($ <toplevel-ref> _ 'toplevel-define!)
886 ;; This may be the result of expanding one of the GOOPS macros within
888 (and (eq? env (resolve-module '(oop goops)))
889 (toplevel-define-arg args)))
892 (define unbound-variable-analysis
893 ;; Report possibly unbound variables in the given tree.
895 (lambda (x info env locs)
896 ;; X is a leaf: extend INFO's refs accordingly.
897 (let ((refs (toplevel-info-refs info))
898 (defs (toplevel-info-defs info)))
899 (define (bound? name)
900 (or (and (module? env)
901 (module-variable env name))
902 (vhash-assq name defs)))
905 ((<toplevel-ref> name src)
908 (let ((src (or src (find pair? locs))))
909 (make-toplevel-info (vhash-consq name src refs)
913 (lambda (x info env locs)
914 ;; Going down into X.
915 (let* ((refs (toplevel-info-refs info))
916 (defs (toplevel-info-defs info))
917 (src (tree-il-src x)))
918 (define (bound? name)
919 (or (and (module? env)
920 (module-variable env name))
921 (vhash-assq name defs)))
924 ((<toplevel-set> name src)
926 (make-toplevel-info refs defs)
927 (let ((src (find pair? locs)))
928 (make-toplevel-info (vhash-consq name src refs)
930 ((<toplevel-define> name)
931 (make-toplevel-info (vhash-delq name refs)
932 (vhash-consq name #t defs)))
935 ;; Check for a dynamic top-level definition, as is
936 ;; done by code expanded from GOOPS macros.
937 (let ((name (goops-toplevel-definition proc args
940 (make-toplevel-info (vhash-delq name refs)
941 (vhash-consq name #t defs))
942 (make-toplevel-info refs defs))))
944 (make-toplevel-info refs defs)))))
946 (lambda (x info env locs)
947 ;; Leaving X's scope.
950 (lambda (toplevel env)
951 ;; Post-process the result.
952 (vlist-for-each (lambda (name+loc)
953 (let ((name (car name+loc))
954 (loc (cdr name+loc)))
955 (warning 'unbound-variable loc name)))
956 (vlist-reverse (toplevel-info-refs toplevel))))
958 (make-toplevel-info vlist-null vlist-null)))
965 ;; <arity-info> records contain information about lexical definitions of
966 ;; procedures currently in scope, top-level procedure definitions that have
967 ;; been encountered, and calls to top-level procedures that have been
969 (define-record-type <arity-info>
970 (make-arity-info toplevel-calls lexical-lambdas toplevel-lambdas)
972 (toplevel-calls toplevel-procedure-calls) ;; ((NAME . CALL) ...)
973 (lexical-lambdas lexical-lambdas) ;; ((GENSYM . DEFINITION) ...)
974 (toplevel-lambdas toplevel-lambdas)) ;; ((NAME . DEFINITION) ...)
976 (define (validate-arity proc call lexical?)
977 ;; Validate the argument count of CALL, a tree-il call of
978 ;; PROC, emitting a warning in case of argument count mismatch.
980 (define (filter-keyword-args keywords allow-other-keys? args)
981 ;; Filter keyword arguments from ARGS and return the resulting list.
982 ;; KEYWORDS is the list of allowed keywords, and ALLOW-OTHER-KEYS?
983 ;; specified whethere keywords not listed in KEYWORDS are allowed.
984 (let loop ((args args)
988 (let ((arg (car args)))
989 (if (and (const? arg)
990 (or (memq (const-exp arg) keywords)
991 (and allow-other-keys?
992 (keyword? (const-exp arg)))))
993 (loop (if (pair? (cdr args))
998 (cons arg result)))))))
1000 (define (arities proc)
1001 ;; Return the arities of PROC, which can be either a tree-il or a
1004 (or (and (or (null? x) (pair? x))
1007 (cond ((program? proc)
1008 (values (procedure-name proc)
1010 (list (arity:nreq a) (arity:nopt a) (arity:rest? a)
1011 (map car (arity:kw a))
1012 (arity:allow-other-keys? a)))
1013 (program-arities proc))))
1015 (let ((arity (procedure-minimum-arity proc)))
1016 (values (procedure-name proc)
1017 (list (list (car arity) (cadr arity) (caddr arity)
1020 (let loop ((name #f)
1024 (values name (reverse arities))
1026 ((<lambda-case> req opt rest kw alternate)
1027 (loop name alternate
1028 (cons (list (len req) (len opt) rest
1029 (and (pair? kw) (map car (cdr kw)))
1030 (and (pair? kw) (car kw)))
1032 ((<lambda> meta body)
1033 (loop (assoc-ref meta 'name) body arities))
1035 (values #f #f))))))))
1037 (let ((args (call-args call))
1038 (src (tree-il-src call)))
1039 (call-with-values (lambda () (arities proc))
1040 (lambda (name arities)
1042 (find (lambda (arity)
1044 ((,req ,opt ,rest? ,kw ,aok?)
1045 (let ((args (if (pair? kw)
1046 (filter-keyword-args kw aok? args)
1049 (let ((count (length args)))
1052 (<= count (+ req opt)))))
1058 (warning 'arity-mismatch src
1059 (or name (with-output-to-string (lambda () (write proc))))
1063 (define arity-analysis
1064 ;; Report arity mismatches in the given tree.
1066 (lambda (x info env locs)
1069 (lambda (x info env locs)
1071 (define (extend lexical-name val info)
1072 ;; If VAL is a lambda, add NAME to the lexical-lambdas of INFO.
1073 (let ((toplevel-calls (toplevel-procedure-calls info))
1074 (lexical-lambdas (lexical-lambdas info))
1075 (toplevel-lambdas (toplevel-lambdas info)))
1078 (make-arity-info toplevel-calls
1079 (vhash-consq lexical-name val
1082 ((<lexical-ref> gensym)
1084 (let ((val* (vhash-assq gensym lexical-lambdas)))
1086 (extend lexical-name (cdr val*) info)
1088 ((<toplevel-ref> name)
1090 (make-arity-info toplevel-calls
1091 (vhash-consq lexical-name val
1096 (let ((toplevel-calls (toplevel-procedure-calls info))
1097 (lexical-lambdas (lexical-lambdas info))
1098 (toplevel-lambdas (toplevel-lambdas info)))
1101 ((<toplevel-define> name exp)
1104 (make-arity-info toplevel-calls
1106 (vhash-consq name exp toplevel-lambdas)))
1107 ((<toplevel-ref> name)
1108 ;; alias for another toplevel
1109 (let ((proc (vhash-assq name toplevel-lambdas)))
1110 (make-arity-info toplevel-calls
1112 (vhash-consq (toplevel-define-name x)
1116 toplevel-lambdas))))
1118 ((<let> gensyms vals)
1119 (fold extend info gensyms vals))
1120 ((<letrec> gensyms vals)
1121 (fold extend info gensyms vals))
1122 ((<fix> gensyms vals)
1123 (fold extend info gensyms vals))
1125 ((<call> proc args src)
1128 (validate-arity proc x #t)
1130 ((<toplevel-ref> name)
1131 (make-arity-info (vhash-consq name x toplevel-calls)
1134 ((<lexical-ref> gensym)
1135 (let ((proc (vhash-assq gensym lexical-lambdas)))
1137 (record-case (cdr proc)
1138 ((<toplevel-ref> name)
1139 ;; alias to toplevel
1140 (make-arity-info (vhash-consq name x toplevel-calls)
1144 (validate-arity (cdr proc) x #t)
1147 ;; If GENSYM wasn't found, it may be because it's an
1148 ;; argument of the procedure being compiled.
1153 (lambda (x info env locs)
1155 (define (shrink name val info)
1156 ;; Remove NAME from the lexical-lambdas of INFO.
1157 (let ((toplevel-calls (toplevel-procedure-calls info))
1158 (lexical-lambdas (lexical-lambdas info))
1159 (toplevel-lambdas (toplevel-lambdas info)))
1160 (make-arity-info toplevel-calls
1161 (if (vhash-assq name lexical-lambdas)
1162 (vlist-tail lexical-lambdas)
1166 (let ((toplevel-calls (toplevel-procedure-calls info))
1167 (lexical-lambdas (lexical-lambdas info))
1168 (toplevel-lambdas (toplevel-lambdas info)))
1170 ((<let> gensyms vals)
1171 (fold shrink info gensyms vals))
1172 ((<letrec> gensyms vals)
1173 (fold shrink info gensyms vals))
1174 ((<fix> gensyms vals)
1175 (fold shrink info gensyms vals))
1179 (lambda (result env)
1180 ;; Post-processing: check all top-level procedure calls that have been
1182 (let ((toplevel-calls (toplevel-procedure-calls result))
1183 (toplevel-lambdas (toplevel-lambdas result)))
1186 (let* ((name (car name+call))
1187 (call (cdr name+call))
1189 (or (and=> (vhash-assq name toplevel-lambdas) cdr)
1192 (module-ref env name)))))
1194 ;; handle toplevel aliases
1195 (if (toplevel-ref? proc)
1196 (let ((name (toplevel-ref-name proc)))
1199 (module-ref env name))))
1201 (if (or (lambda? proc*) (procedure? proc*))
1202 (validate-arity proc* call (lambda? proc*)))))
1205 (make-arity-info vlist-null vlist-null vlist-null)))
1209 ;;; `format' argument analysis.
1212 (define &syntax-error
1213 ;; The `throw' key for syntax errors.
1214 (gensym "format-string-syntax-error"))
1216 (define (format-string-argument-count fmt)
1217 ;; Return the minimum and maxium number of arguments that should
1218 ;; follow format string FMT (or, ahem, a good estimate thereof) or
1219 ;; `any' if the format string can be followed by any number of
1222 (define (drop-group chars end)
1223 ;; Drop characters from CHARS until "~END" is encountered.
1224 (let loop ((chars chars)
1227 (throw &syntax-error 'unterminated-iteration)
1229 (if (eq? (car chars) end)
1231 (loop (cdr chars) #f))
1232 (if (eq? (car chars) #\~)
1233 (loop (cdr chars) #t)
1234 (loop (cdr chars) #f))))))
1236 (define (digit? char)
1237 ;; Return true if CHAR is a digit, #f otherwise.
1238 (memq char '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
1240 (define (previous-number chars)
1241 ;; Return the previous series of digits found in CHARS.
1242 (let ((numbers (take-while digit? chars)))
1243 (and (not (null? numbers))
1244 (string->number (list->string (reverse numbers))))))
1246 (let loop ((chars (string->list fmt))
1255 (throw &syntax-error 'unterminated-conditional)
1256 (values min-count max-count))
1260 ((#\~ #\% #\& #\t #\_ #\newline #\( #\))
1261 (loop (cdr chars) 'literal '()
1262 conditions end-group
1263 min-count max-count))
1264 ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\, #\: #\@)
1266 'tilde (cons (car chars) params)
1267 conditions end-group
1268 min-count max-count))
1269 ((#\v #\V) (loop (cdr chars)
1270 'tilde (cons (car chars) params)
1271 conditions end-group
1275 (loop chars 'literal '() '()
1276 (let ((selector (previous-number params))
1277 (at? (memq #\@ params)))
1278 (lambda (chars conds)
1280 (let ((mins (map car conds))
1281 (maxs (map cdr conds))
1283 (< selector (length conds)))))
1284 (if (and (every number? mins)
1285 (every number? maxs))
1286 (loop chars 'literal '() conditions end-group
1289 (car (list-ref conds selector))
1293 (apply min mins)))))
1296 (cdr (list-ref conds selector))
1300 (apply max maxs))))))
1301 (values 'any 'any))))) ;; XXX: approximation
1305 (loop (cdr chars) 'literal '()
1306 (cons (cons min-count max-count) conditions)
1309 (throw &syntax-error 'unexpected-semicolon)))
1312 (end-group (cdr chars)
1313 (reverse (cons (cons min-count max-count)
1315 (throw &syntax-error 'unexpected-conditional-termination)))
1316 ((#\{) (if (memq #\@ params)
1317 (values min-count 'any)
1318 (loop (drop-group (cdr chars) #\})
1320 conditions end-group
1321 (+ 1 min-count) (+ 1 max-count))))
1322 ((#\*) (if (memq #\@ params)
1323 (values 'any 'any) ;; it's unclear what to do here
1326 conditions end-group
1327 (+ (or (previous-number params) 1)
1329 (+ (or (previous-number params) 1)
1332 ;; We don't have enough info to determine the exact number
1333 ;; of args, but we could determine a lower bound (TODO).
1335 (else (loop (cdr chars) 'literal '()
1336 conditions end-group
1337 (+ 1 min-count) (+ 1 max-count)))))
1340 ((#\~) (loop (cdr chars) 'tilde '()
1341 conditions end-group
1342 min-count max-count))
1343 (else (loop (cdr chars) 'literal '()
1344 conditions end-group
1345 min-count max-count))))
1346 (else (error "computer bought the farm" state))))))
1348 (define (const-fmt x)
1349 ;; Return the literal format pattern for X, or #f.
1354 (or ($ <toplevel-ref> _ '_) ($ <module-ref> _ '_))
1355 (($ <const> _ (and (? string?) fmt))))
1356 ;; Gettexted literals, like `(_ "foo")'.
1360 (define format-analysis
1361 ;; Report arity mismatches in the given tree.
1363 (lambda (x _ env locs)
1367 (lambda (x _ env locs)
1369 (define (check-format-args args loc)
1371 ((,port ,fmt . ,rest)
1372 (guard (const-fmt fmt))
1373 (if (and (const? port)
1374 (not (boolean? (const-exp port))))
1375 (warning 'format loc 'wrong-port (const-exp port)))
1376 (let ((fmt (const-fmt fmt))
1377 (count (length rest)))
1379 (catch &syntax-error
1381 (let-values (((min max)
1382 (format-string-argument-count fmt)))
1384 (or (and (or (eq? min 'any) (>= count min))
1385 (or (eq? max 'any) (<= count max)))
1386 (warning 'format loc 'wrong-format-arg-count
1387 fmt min max count)))))
1389 (warning 'format loc 'syntax-error key fmt)))
1390 (warning 'format loc 'wrong-format-string fmt))))
1391 ((,port ,fmt . ,rest)
1392 (if (and (const? port)
1393 (not (boolean? (const-exp port))))
1394 (warn 'format loc 'wrong-port (const-exp port)))
1395 ;; Warn on non-literal format strings, unless they refer to a
1396 ;; lexical variable named "fmt".
1397 (if (record-case fmt
1398 ((<lexical-ref> name)
1399 (not (eq? name 'fmt)))
1401 (warning 'format loc 'non-literal-format-string)))
1403 (warning 'format loc 'wrong-num-args (length args)))))
1405 (define (resolve-toplevel name)
1407 (false-if-exception (module-ref env name))))
1410 (($ <call> src ($ <toplevel-ref> _ name) args)
1411 (let ((proc (resolve-toplevel name)))
1412 (and (or (eq? proc format)
1413 (eq? proc (@ (ice-9 format) format)))
1414 (check-format-args args (or src (find pair? locs))))))
1418 (lambda (x _ env locs)