1 ;;; TREE-IL -> GLIL compiler
3 ;; Copyright (C) 2001, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 (define-module (language tree-il analyze)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-9)
24 #:use-module (srfi srfi-11)
25 #:use-module (srfi srfi-26)
26 #:use-module (ice-9 vlist)
27 #:use-module (ice-9 match)
28 #:use-module (system base syntax)
29 #:use-module (system base message)
30 #:use-module (system vm program)
31 #:use-module (language tree-il)
32 #:use-module (system base pmatch)
33 #:export (analyze-lexicals
35 unused-variable-analysis
36 unused-toplevel-analysis
37 unbound-variable-analysis
41 ;; Allocation is the process of assigning storage locations for lexical
42 ;; variables. A lexical variable has a distinct "address", or storage
43 ;; location, for each procedure in which it is referenced.
45 ;; A variable is "local", i.e., allocated on the stack, if it is
46 ;; referenced from within the procedure that defined it. Otherwise it is
47 ;; a "closure" variable. For example:
49 ;; (lambda (a) a) ; a will be local
50 ;; `a' is local to the procedure.
52 ;; (lambda (a) (lambda () a))
53 ;; `a' is local to the outer procedure, but a closure variable with
54 ;; respect to the inner procedure.
56 ;; If a variable is ever assigned, it needs to be heap-allocated
57 ;; ("boxed"). This is so that closures and continuations capture the
58 ;; variable's identity, not just one of the values it may have over the
59 ;; course of program execution. If the variable is never assigned, there
60 ;; is no distinction between value and identity, so closing over its
61 ;; identity (whether through closures or continuations) can make a copy
62 ;; of its value instead.
64 ;; Local variables are stored on the stack within a procedure's call
65 ;; frame. Their index into the stack is determined from their linear
66 ;; postion within a procedure's binding path:
73 ;; This algorithm has the problem that variables are only allocated
74 ;; indices at the end of the binding path. If variables bound early in
75 ;; the path are not used in later portions of the path, their indices
76 ;; will not be recycled. This problem is particularly egregious in the
80 ;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
82 ;; As you can see, the `a' binding is only used in the ephemeral
83 ;; `consequent' clause of the first `if', but its index would be
84 ;; reserved for the whole of the `or' expansion. So we have a hack for
85 ;; this specific case. A proper solution would be some sort of liveness
86 ;; analysis, and not our linear allocation algorithm.
88 ;; Closure variables are captured when a closure is created, and stored in a
89 ;; vector inline to the closure object itself. Each closure variable has a
90 ;; unique index into that vector.
92 ;; There is one more complication. Procedures bound by <fix> may, in
93 ;; some cases, be rendered inline to their parent procedure. That is to
96 ;; (letrec ((lp (lambda () (lp)))) (lp))
97 ;; => (fix ((lp (lambda () (lp)))) (lp))
98 ;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
99 ;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
101 ;; The upshot is that we don't have to allocate any space for the `lp'
102 ;; closure at all, as it can be rendered inline as a loop. So there is
103 ;; another kind of allocation, "label allocation", in which the
104 ;; procedure is simply a label, placed at the start of the lambda body.
105 ;; The label is the gensym under which the lambda expression is bound.
107 ;; The analyzer checks to see that the label is called with the correct
108 ;; number of arguments. Calls to labels compile to rename + goto.
109 ;; Lambda, the ultimate goto!
112 ;; The return value of `analyze-lexicals' is a hash table, the
115 ;; The allocation maps gensyms -- recall that each lexically bound
116 ;; variable has a unique gensym -- to storage locations ("addresses").
117 ;; Since one gensym may have many storage locations, if it is referenced
118 ;; in many procedures, it is a two-level map.
120 ;; The allocation also stored information on how many local variables
121 ;; need to be allocated for each procedure, lexicals that have been
122 ;; translated into labels, and information on what free variables to
123 ;; capture from its lexical parent procedure.
125 ;; In addition, we have a conflation: while we're traversing the code,
126 ;; recording information to pass to the compiler, we take the
127 ;; opportunity to generate labels for each lambda-case clause, so that
128 ;; generated code can skip argument checks at runtime if they match at
131 ;; Also, while we're a-traversing and an-allocating, we check prompt
132 ;; handlers to see if the "continuation" argument is used. If not, we
133 ;; mark the prompt as being "escape-only". This allows us to implement
134 ;; `catch' and `throw' using `prompt' and `control', but without causing
135 ;; a continuation to be reified. Heh heh.
139 ;; sym -> {lambda -> address}
140 ;; lambda -> (labels . free-locs)
141 ;; lambda-case -> (gensym . nlocs)
142 ;; prompt -> escape-only?
144 ;; address ::= (local? boxed? . index)
145 ;; labels ::= ((sym . lambda) ...)
146 ;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
147 ;; free variable addresses are relative to parent proc.
149 (define (make-hashq k v)
150 (let ((res (make-hash-table)))
154 (define (analyze-lexicals x)
155 ;; bound-vars: lambda -> (sym ...)
156 ;; all identifiers bound within a lambda
157 (define bound-vars (make-hash-table))
158 ;; free-vars: lambda -> (sym ...)
159 ;; all identifiers referenced in a lambda, but not bound
160 ;; NB, this includes identifiers referenced by contained lambdas
161 (define free-vars (make-hash-table))
162 ;; assigned: sym -> #t
163 ;; variables that are assigned
164 (define assigned (make-hash-table))
165 ;; refcounts: sym -> count
166 ;; allows us to detect the or-expansion in O(1) time
167 (define refcounts (make-hash-table))
168 ;; labels: sym -> lambda
169 ;; for determining if fixed-point procedures can be rendered as
171 (define labels (make-hash-table))
173 ;; returns variables referenced in expr
174 (define (analyze! x proc labels-in-proc tail? tail-call-args)
175 (define (step y) (analyze! y proc '() #f #f))
176 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
177 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
179 (define (recur/labels x new-proc labels)
180 (analyze! x new-proc (append labels labels-in-proc) #t #f))
181 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
184 (apply lset-union eq? (step-tail-call proc args)
188 (apply lset-union eq? (map step args)))
190 ((<conditional> test consequent alternate)
191 (lset-union eq? (step test) (step-tail consequent) (step-tail alternate)))
193 ((<lexical-ref> gensym)
194 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
195 (if (not (and tail-call-args
196 (memq gensym labels-in-proc)
197 (let ((p (hashq-ref labels gensym)))
199 (let lp ((c (lambda-body p)))
200 (and c (lambda-case? c)
202 ;; for now prohibit optional &
203 ;; keyword arguments; can relax this
205 (and (= (length (lambda-case-req c))
206 (length tail-call-args))
207 (not (lambda-case-opt c))
208 (not (lambda-case-kw c))
209 (not (lambda-case-rest c)))
210 (lp (lambda-case-alternate c)))))))))
211 (hashq-set! labels gensym #f))
214 ((<lexical-set> gensym exp)
215 (hashq-set! assigned gensym #t)
216 (hashq-set! labels gensym #f)
217 (lset-adjoin eq? (step exp) gensym))
222 ((<toplevel-set> exp)
225 ((<toplevel-define> exp)
229 (lset-union eq? (step head) (step-tail tail)))
232 ;; order is important here
233 (hashq-set! bound-vars x '())
234 (let ((free (recur body x)))
235 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
236 (hashq-set! free-vars x free)
239 ((<lambda-case> opt kw inits gensyms body alternate)
240 (hashq-set! bound-vars proc
241 (append (reverse gensyms) (hashq-ref bound-vars proc)))
246 (apply lset-union eq? (map step inits))
249 (if alternate (step-tail alternate) '())))
251 ((<let> gensyms vals body)
252 (hashq-set! bound-vars proc
253 (append (reverse gensyms) (hashq-ref bound-vars proc)))
255 (apply lset-union eq? (step-tail body) (map step vals))
258 ((<letrec> gensyms vals body)
259 (hashq-set! bound-vars proc
260 (append (reverse gensyms) (hashq-ref bound-vars proc)))
261 (for-each (lambda (sym) (hashq-set! assigned sym #t)) gensyms)
263 (apply lset-union eq? (step-tail body) (map step vals))
266 ((<fix> gensyms vals body)
267 ;; Try to allocate these procedures as labels.
268 (for-each (lambda (sym val) (hashq-set! labels sym val))
270 (hashq-set! bound-vars proc
271 (append (reverse gensyms) (hashq-ref bound-vars proc)))
272 ;; Step into subexpressions.
275 ;; Since we're trying to label-allocate the lambda,
276 ;; pretend it's not a closure, and just recurse into its
277 ;; body directly. (Otherwise, recursing on a closure
278 ;; that references one of the fix's bound vars would
279 ;; prevent label allocation.)
283 ;; just like the closure case, except here we use
284 ;; recur/labels instead of recur
285 (hashq-set! bound-vars x '())
286 (let ((free (recur/labels body x gensyms)))
287 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
288 (hashq-set! free-vars x free)
291 (vars-with-refs (map cons gensyms var-refs))
292 (body-refs (recur/labels body proc gensyms)))
293 (define (delabel-dependents! sym)
294 (let ((refs (assq-ref vars-with-refs sym)))
296 (for-each (lambda (sym)
297 (if (hashq-ref labels sym)
299 (hashq-set! labels sym #f)
300 (delabel-dependents! sym))))
302 ;; Stepping into the lambdas and the body might have made some
303 ;; procedures not label-allocatable -- which might have
304 ;; knock-on effects. For example:
305 ;; (fix ((a (lambda () (b)))
306 ;; (b (lambda () a)))
308 ;; As far as `a' is concerned, both `a' and `b' are
309 ;; label-allocatable. But `b' references `a' not in a proc-tail
310 ;; position, which makes `a' not label-allocatable. The
311 ;; knock-on effect is that, when back-propagating this
312 ;; information to `a', `b' will also become not
313 ;; label-allocatable, as it is referenced within `a', which is
314 ;; allocated as a closure. This is a transitive relationship.
315 (for-each (lambda (sym)
316 (if (not (hashq-ref labels sym))
317 (delabel-dependents! sym)))
319 ;; Now lift bound variables with label-allocated lambdas to the
323 (if (hashq-ref labels sym)
324 ;; Remove traces of the label-bound lambda. The free
325 ;; vars will propagate up via the return val.
327 (hashq-set! bound-vars proc
328 (append (hashq-ref bound-vars val)
329 (hashq-ref bound-vars proc)))
330 (hashq-remove! bound-vars val)
331 (hashq-remove! free-vars val))))
334 (apply lset-union eq? body-refs var-refs)
337 ((<let-values> exp body)
338 (lset-union eq? (step exp) (step body)))
340 ((<prompt> escape-only? tag body handler)
342 (($ <lambda> _ _ handler)
343 (lset-union eq? (step tag) (step body) (step-tail handler)))))
345 ((<abort> tag args tail)
346 (apply lset-union eq? (step tag) (step tail) (map step args)))
350 ;; allocation: sym -> {lambda -> address}
351 ;; lambda -> (labels . free-locs)
352 ;; lambda-case -> (gensym . nlocs)
353 (define allocation (make-hash-table))
355 (define (allocate! x proc n)
356 (define (recur y) (allocate! y proc n))
359 (apply max (recur proc) (map recur args)))
362 (apply max n (map recur args)))
364 ((<conditional> test consequent alternate)
365 (max (recur test) (recur consequent) (recur alternate)))
373 ((<toplevel-set> exp)
376 ((<toplevel-define> exp)
384 ;; allocate closure vars in order
385 (let lp ((c (hashq-ref free-vars x)) (n 0))
388 (hashq-set! (hashq-ref allocation (car c))
390 `(#f ,(hashq-ref assigned (car c)) . ,n))
391 (lp (cdr c) (1+ n)))))
393 (let ((nlocs (allocate! body x 0))
396 (hashq-ref (hashq-ref allocation v) proc))
397 (hashq-ref free-vars x)))
400 (cons sym (hashq-ref labels sym)))
401 (hashq-ref bound-vars x)))))
402 ;; set procedure allocations
403 (hashq-set! allocation x (cons labels free-addresses)))
406 ((<lambda-case> opt kw inits gensyms body alternate)
408 (let lp ((gensyms gensyms) (n n))
412 (allocate! body proc n)
413 ;; inits not logically at the end, but they
415 (map (lambda (x) (allocate! x proc n)) inits))))
416 ;; label and nlocs for the case
417 (hashq-set! allocation x (cons (gensym ":LCASE") nlocs))
420 (hashq-set! allocation (car gensyms)
422 proc `(#t ,(hashq-ref assigned (car gensyms)) . ,n)))
423 (lp (cdr gensyms) (1+ n)))))
424 (if alternate (allocate! alternate proc n) n)))
426 ((<let> gensyms vals body)
427 (let ((nmax (apply max (map recur vals))))
430 ((and (conditional? body)
431 (= (length gensyms) 1)
432 (let ((v (car gensyms)))
433 (and (not (hashq-ref assigned v))
434 (= (hashq-ref refcounts v 0) 2)
435 (lexical-ref? (conditional-test body))
436 (eq? (lexical-ref-gensym (conditional-test body)) v)
437 (lexical-ref? (conditional-consequent body))
438 (eq? (lexical-ref-gensym (conditional-consequent body)) v))))
439 (hashq-set! allocation (car gensyms)
440 (make-hashq proc `(#t #f . ,n)))
441 ;; the 1+ for this var
442 (max nmax (1+ n) (allocate! (conditional-alternate body) proc n)))
444 (let lp ((gensyms gensyms) (n n))
446 (max nmax (allocate! body proc n))
447 (let ((v (car gensyms)))
451 `(#t ,(hashq-ref assigned v) . ,n)))
452 (lp (cdr gensyms) (1+ n)))))))))
454 ((<letrec> gensyms vals body)
455 (let lp ((gensyms gensyms) (n n))
457 (let ((nmax (apply max
459 (allocate! x proc n))
461 (max nmax (allocate! body proc n)))
462 (let ((v (car gensyms)))
466 `(#t ,(hashq-ref assigned v) . ,n)))
467 (lp (cdr gensyms) (1+ n))))))
469 ((<fix> gensyms vals body)
470 (let lp ((in gensyms) (n n))
472 (let lp ((gensyms gensyms) (vals vals) (nmax n))
475 (max nmax (allocate! body proc n)))
476 ((hashq-ref labels (car gensyms))
477 ;; allocate lambda body inline to proc
480 (record-case (car vals)
482 (max nmax (allocate! body proc n))))))
487 (max nmax (allocate! (car vals) proc n))))))
491 ((hashq-ref assigned v)
492 (error "fixpoint procedures may not be assigned" x))
493 ((hashq-ref labels v)
494 ;; no binding, it's a label
497 ;; allocate closure binding
498 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
499 (lp (cdr in) (1+ n))))))))
501 ((<let-values> exp body)
502 (max (recur exp) (recur body)))
504 ((<prompt> escape-only? tag body handler)
506 (($ <lambda> _ _ handler)
507 (max (recur tag) (recur body) (recur handler)))))
509 ((<abort> tag args tail)
510 (apply max (recur tag) (recur tail) (map recur args)))
514 (analyze! x #f '() #t #f)
521 ;;; Tree analyses for warnings.
524 (define-record-type <tree-analysis>
525 (make-tree-analysis down up post init)
527 (down tree-analysis-down) ;; (lambda (x result env locs) ...)
528 (up tree-analysis-up) ;; (lambda (x result env locs) ...)
529 (post tree-analysis-post) ;; (lambda (result env) ...)
530 (init tree-analysis-init)) ;; arbitrary value
532 (define (analyze-tree analyses tree env)
533 "Run all tree analyses listed in ANALYSES on TREE for ENV, using
534 `tree-il-fold'. Return TREE. The down and up procedures of each
535 analysis are passed a ``location stack', which is the stack of
536 `tree-il-src' values for each parent tree (a list); it can be used to
537 approximate source location when accurate information is missing from a
538 given `tree-il' element."
540 (define (traverse proc update-locs)
541 ;; Return a tree traversing procedure that returns a list of analysis
542 ;; results prepended by the location stack.
544 (let ((locs (update-locs x (car results))))
545 (cons locs ;; the location stack
546 (map (lambda (analysis result)
547 ((proc analysis) x result env locs))
551 ;; Extending and shrinking the location stack.
552 (define (extend-locs x locs) (cons (tree-il-src x) locs))
553 (define (shrink-locs x locs) (cdr locs))
556 (tree-il-fold (traverse tree-analysis-down extend-locs)
557 (traverse tree-analysis-up shrink-locs)
558 (cons '() ;; empty location stack
559 (map tree-analysis-init analyses))
562 (for-each (lambda (analysis result)
563 ((tree-analysis-post analysis) result env))
571 ;;; Unused variable analysis.
574 ;; <binding-info> records are used during tree traversals in
575 ;; `unused-variable-analysis'. They contain a list of the local vars
576 ;; currently in scope, and a list of locals vars that have been referenced.
577 (define-record-type <binding-info>
578 (make-binding-info vars refs)
580 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
581 (refs binding-info-refs)) ;; (GENSYM ...)
583 (define (gensym? sym)
584 ;; Return #t if SYM is (likely) a generated symbol.
585 (string-any #\space (symbol->string sym)))
587 (define unused-variable-analysis
588 ;; Report unused variables in the given tree.
590 (lambda (x info env locs)
591 ;; Going down into X: extend INFO's variable list
593 (let ((refs (binding-info-refs info))
594 (vars (binding-info-vars info))
595 (src (tree-il-src x)))
596 (define (extend inner-vars inner-names)
597 (fold (lambda (var name vars)
598 (vhash-consq var (list name src) vars))
604 ((<lexical-ref> gensym)
605 (make-binding-info vars (vhash-consq gensym #t refs)))
606 ((<lexical-set> gensym)
607 (make-binding-info vars (vhash-consq gensym #t refs)))
608 ((<lambda-case> req opt inits rest kw gensyms)
611 ,@(if rest (list rest) '())
612 ,@(if kw (map cadr (cdr kw)) '()))))
613 (make-binding-info (extend gensyms names) refs)))
614 ((<let> gensyms names)
615 (make-binding-info (extend gensyms names) refs))
616 ((<letrec> gensyms names)
617 (make-binding-info (extend gensyms names) refs))
618 ((<fix> gensyms names)
619 (make-binding-info (extend gensyms names) refs))
622 (lambda (x info env locs)
623 ;; Leaving X's scope: shrink INFO's variable list
624 ;; accordingly and reported unused nested variables.
625 (let ((refs (binding-info-refs info))
626 (vars (binding-info-vars info)))
627 (define (shrink inner-vars refs)
630 (let ((gensym (car var)))
631 ;; Don't report lambda parameters as unused.
632 (if (and (memq gensym inner-vars)
633 (not (vhash-assq gensym refs))
634 (not (lambda-case? x)))
635 (let ((name (cadr var))
636 ;; We can get approximate source location by going up
637 ;; the LOCS location stack.
640 (if (and (not (gensym? name))
642 (warning 'unused-variable loc name))))))
644 (vlist-drop vars (length inner-vars)))
646 ;; For simplicity, we leave REFS untouched, i.e., with
647 ;; names of variables that are now going out of scope.
648 ;; It doesn't hurt as these are unique names, it just
649 ;; makes REFS unnecessarily fat.
651 ((<lambda-case> gensyms)
652 (make-binding-info (shrink gensyms refs) refs))
654 (make-binding-info (shrink gensyms refs) refs))
656 (make-binding-info (shrink gensyms refs) refs))
658 (make-binding-info (shrink gensyms refs) refs))
661 (lambda (result env) #t)
662 (make-binding-info vlist-null vlist-null)))
666 ;;; Unused top-level variable analysis.
669 ;; <reference-graph> record top-level definitions that are made, references to
670 ;; top-level definitions and their context (the top-level definition in which
671 ;; the reference appears), as well as the current context (the top-level
672 ;; definition we're currently in). The second part (`refs' below) is
673 ;; effectively a graph from which we can determine unused top-level definitions.
674 (define-record-type <reference-graph>
675 (make-reference-graph refs defs toplevel-context)
677 (defs reference-graph-defs) ;; ((NAME . LOC) ...)
678 (refs reference-graph-refs) ;; ((REF-CONTEXT REF ...) ...)
679 (toplevel-context reference-graph-toplevel-context)) ;; NAME | #f
681 (define (graph-reachable-nodes root refs reachable)
682 ;; Add to REACHABLE the nodes reachable from ROOT in graph REFS. REFS is a
683 ;; vhash mapping nodes to the list of their children: for instance,
684 ;; ((A -> (B C)) (B -> (A)) (C -> ())) corresponds to
693 ;; REACHABLE is a vhash of nodes known to be otherwise reachable.
695 (let loop ((root root)
698 (if (or (vhash-assq root path)
699 (vhash-assq root result))
701 (let* ((children (or (and=> (vhash-assq root refs) cdr) '()))
702 (path (vhash-consq root #t path))
703 (result (fold (lambda (kid result)
704 (loop kid path result))
707 (fold (lambda (kid result)
708 (vhash-consq kid #t result))
712 (define (graph-reachable-nodes* roots refs)
713 ;; Return the list of nodes in REFS reachable from the nodes listed in ROOTS.
714 (vlist-fold (lambda (root+true result)
715 (let* ((root (car root+true))
716 (reachable (graph-reachable-nodes root refs result)))
717 (vhash-consq root #t reachable)))
721 (define (partition* pred vhash)
722 ;; Partition VHASH according to PRED. Return the two resulting vhashes.
724 (vlist-fold (lambda (k+v result)
730 (cons (vhash-consq k v r1) r2)
731 (cons r1 (vhash-consq k v r2)))))
732 (cons vlist-null vlist-null)
734 (values (car result) (cdr result))))
736 (define unused-toplevel-analysis
737 ;; Report unused top-level definitions that are not exported.
738 (let ((add-ref-from-context
740 ;; Add an edge CTX -> NAME in GRAPH.
741 (let* ((refs (reference-graph-refs graph))
742 (defs (reference-graph-defs graph))
743 (ctx (reference-graph-toplevel-context graph))
744 (ctx-refs (or (and=> (vhash-assq ctx refs) cdr) '())))
745 (make-reference-graph (vhash-consq ctx (cons name ctx-refs) refs)
747 (define (macro-variable? name env)
749 (let ((var (module-variable env name)))
750 (and var (variable-bound? var)
751 (macro? (variable-ref var))))))
754 (lambda (x graph env locs)
755 ;; Going down into X.
756 (let ((ctx (reference-graph-toplevel-context graph))
757 (refs (reference-graph-refs graph))
758 (defs (reference-graph-defs graph)))
760 ((<toplevel-ref> name src)
761 (add-ref-from-context graph name))
762 ((<toplevel-define> name src)
764 (defs (vhash-consq name (or src (find pair? locs))
766 (make-reference-graph refs defs name)))
767 ((<toplevel-set> name src)
768 (add-ref-from-context graph name))
771 (lambda (x graph env locs)
772 ;; Leaving X's scope.
775 (let ((refs (reference-graph-refs graph))
776 (defs (reference-graph-defs graph)))
777 (make-reference-graph refs defs #f)))
781 ;; Process the resulting reference graph: determine all private definitions
782 ;; not reachable from any public definition. Macros
783 ;; (syntax-transformers), which are globally bound, never considered
784 ;; unused since we can't tell whether a macro is actually used; in
785 ;; addition, macros are considered roots of the graph since they may use
786 ;; private bindings. FIXME: The `make-syntax-transformer' calls don't
787 ;; contain any literal `toplevel-ref' of the global bindings they use so
788 ;; this strategy fails.
789 (define (exported? name)
791 (module-variable (module-public-interface env) name)
794 (let-values (((public-defs private-defs)
795 (partition* (lambda (name)
797 (macro-variable? name env)))
798 (reference-graph-defs graph))))
799 (let* ((roots (vhash-consq #f #t public-defs))
800 (refs (reference-graph-refs graph))
801 (reachable (graph-reachable-nodes* roots refs))
802 (unused (vlist-filter (lambda (name+src)
803 (not (vhash-assq (car name+src)
806 (vlist-for-each (lambda (name+loc)
807 (let ((name (car name+loc))
808 (loc (cdr name+loc)))
809 (if (not (gensym? name))
810 (warning 'unused-toplevel loc name))))
813 (make-reference-graph vlist-null vlist-null #f))))
817 ;;; Unbound variable analysis.
820 ;; <toplevel-info> records are used during tree traversal in search of
821 ;; possibly unbound variable. They contain a list of references to
822 ;; potentially unbound top-level variables, and a list of the top-level
823 ;; defines that have been encountered.
824 (define-record-type <toplevel-info>
825 (make-toplevel-info refs defs)
827 (refs toplevel-info-refs) ;; ((VARIABLE-NAME . LOCATION) ...)
828 (defs toplevel-info-defs)) ;; (VARIABLE-NAME ...)
830 (define (goops-toplevel-definition proc args env)
831 ;; If call of PROC to ARGS is a GOOPS top-level definition, return
832 ;; the name of the variable being defined; otherwise return #f. This
833 ;; assumes knowledge of the current implementation of `define-class' et al.
834 (define (toplevel-define-arg args)
836 ((($ <const> _ (and (? symbol?) exp)) _)
841 (($ <module-ref> _ '(oop goops) 'toplevel-define! #f)
842 (toplevel-define-arg args))
843 (($ <toplevel-ref> _ 'toplevel-define!)
844 ;; This may be the result of expanding one of the GOOPS macros within
846 (and (eq? env (resolve-module '(oop goops)))
847 (toplevel-define-arg args)))
850 (define unbound-variable-analysis
851 ;; Report possibly unbound variables in the given tree.
853 (lambda (x info env locs)
854 ;; Going down into X.
855 (let* ((refs (toplevel-info-refs info))
856 (defs (toplevel-info-defs info))
857 (src (tree-il-src x)))
858 (define (bound? name)
859 (or (and (module? env)
860 (module-variable env name))
861 (vhash-assq name defs)))
864 ((<toplevel-ref> name src)
867 (let ((src (or src (find pair? locs))))
868 (make-toplevel-info (vhash-consq name src refs)
870 ((<toplevel-set> name src)
872 (make-toplevel-info refs defs)
873 (let ((src (find pair? locs)))
874 (make-toplevel-info (vhash-consq name src refs)
876 ((<toplevel-define> name)
877 (make-toplevel-info (vhash-delq name refs)
878 (vhash-consq name #t defs)))
881 ;; Check for a dynamic top-level definition, as is
882 ;; done by code expanded from GOOPS macros.
883 (let ((name (goops-toplevel-definition proc args
886 (make-toplevel-info (vhash-delq name refs)
887 (vhash-consq name #t defs))
888 (make-toplevel-info refs defs))))
890 (make-toplevel-info refs defs)))))
892 (lambda (x info env locs)
893 ;; Leaving X's scope.
896 (lambda (toplevel env)
897 ;; Post-process the result.
898 (vlist-for-each (lambda (name+loc)
899 (let ((name (car name+loc))
900 (loc (cdr name+loc)))
901 (warning 'unbound-variable loc name)))
902 (vlist-reverse (toplevel-info-refs toplevel))))
904 (make-toplevel-info vlist-null vlist-null)))
911 ;; <arity-info> records contain information about lexical definitions of
912 ;; procedures currently in scope, top-level procedure definitions that have
913 ;; been encountered, and calls to top-level procedures that have been
915 (define-record-type <arity-info>
916 (make-arity-info toplevel-calls lexical-lambdas toplevel-lambdas)
918 (toplevel-calls toplevel-procedure-calls) ;; ((NAME . CALL) ...)
919 (lexical-lambdas lexical-lambdas) ;; ((GENSYM . DEFINITION) ...)
920 (toplevel-lambdas toplevel-lambdas)) ;; ((NAME . DEFINITION) ...)
922 (define (validate-arity proc call lexical?)
923 ;; Validate the argument count of CALL, a tree-il call of
924 ;; PROC, emitting a warning in case of argument count mismatch.
926 (define (filter-keyword-args keywords allow-other-keys? args)
927 ;; Filter keyword arguments from ARGS and return the resulting list.
928 ;; KEYWORDS is the list of allowed keywords, and ALLOW-OTHER-KEYS?
929 ;; specified whethere keywords not listed in KEYWORDS are allowed.
930 (let loop ((args args)
934 (let ((arg (car args)))
935 (if (and (const? arg)
936 (or (memq (const-exp arg) keywords)
937 (and allow-other-keys?
938 (keyword? (const-exp arg)))))
939 (loop (if (pair? (cdr args))
944 (cons arg result)))))))
946 (define (arities proc)
947 ;; Return the arities of PROC, which can be either a tree-il or a
950 (or (and (or (null? x) (pair? x))
953 (cond ((program? proc)
954 (values (procedure-name proc)
956 (list (length (or (assq-ref a 'required) '()))
957 (length (or (assq-ref a 'optional) '()))
958 (and (assq-ref a 'rest) #t)
959 (map car (or (assq-ref a 'keyword) '()))
960 (assq-ref a 'allow-other-keys?)))
961 (program-arguments-alists proc))))
964 ;; An applicable struct.
965 (arities (struct-ref proc 0))
966 ;; An applicable smob.
967 (let ((arity (procedure-minimum-arity proc)))
968 (values (procedure-name proc)
969 (list (list (car arity) (cadr arity) (caddr arity)
976 (values name (reverse arities))
978 ((<lambda-case> req opt rest kw alternate)
980 (cons (list (len req) (len opt) rest
981 (and (pair? kw) (map car (cdr kw)))
982 (and (pair? kw) (car kw)))
984 ((<lambda> meta body)
985 (loop (assoc-ref meta 'name) body arities))
987 (values #f #f))))))))
989 (let ((args (call-args call))
990 (src (tree-il-src call)))
991 (call-with-values (lambda () (arities proc))
992 (lambda (name arities)
994 (find (lambda (arity)
996 ((,req ,opt ,rest? ,kw ,aok?)
997 (let ((args (if (pair? kw)
998 (filter-keyword-args kw aok? args)
1001 (let ((count (length args)))
1004 (<= count (+ req opt)))))
1010 (warning 'arity-mismatch src
1011 (or name (with-output-to-string (lambda () (write proc))))
1015 (define arity-analysis
1016 ;; Report arity mismatches in the given tree.
1018 (lambda (x info env locs)
1020 (define (extend lexical-name val info)
1021 ;; If VAL is a lambda, add NAME to the lexical-lambdas of INFO.
1022 (let ((toplevel-calls (toplevel-procedure-calls info))
1023 (lexical-lambdas (lexical-lambdas info))
1024 (toplevel-lambdas (toplevel-lambdas info)))
1027 (make-arity-info toplevel-calls
1028 (vhash-consq lexical-name val
1031 ((<lexical-ref> gensym)
1033 (let ((val* (vhash-assq gensym lexical-lambdas)))
1035 (extend lexical-name (cdr val*) info)
1037 ((<toplevel-ref> name)
1039 (make-arity-info toplevel-calls
1040 (vhash-consq lexical-name val
1045 (let ((toplevel-calls (toplevel-procedure-calls info))
1046 (lexical-lambdas (lexical-lambdas info))
1047 (toplevel-lambdas (toplevel-lambdas info)))
1050 ((<toplevel-define> name exp)
1053 (make-arity-info toplevel-calls
1055 (vhash-consq name exp toplevel-lambdas)))
1056 ((<toplevel-ref> name)
1057 ;; alias for another toplevel
1058 (let ((proc (vhash-assq name toplevel-lambdas)))
1059 (make-arity-info toplevel-calls
1061 (vhash-consq (toplevel-define-name x)
1065 toplevel-lambdas))))
1067 ((<let> gensyms vals)
1068 (fold extend info gensyms vals))
1069 ((<letrec> gensyms vals)
1070 (fold extend info gensyms vals))
1071 ((<fix> gensyms vals)
1072 (fold extend info gensyms vals))
1074 ((<call> proc args src)
1077 (validate-arity proc x #t)
1079 ((<toplevel-ref> name)
1080 (make-arity-info (vhash-consq name x toplevel-calls)
1083 ((<lexical-ref> gensym)
1084 (let ((proc (vhash-assq gensym lexical-lambdas)))
1086 (record-case (cdr proc)
1087 ((<toplevel-ref> name)
1088 ;; alias to toplevel
1089 (make-arity-info (vhash-consq name x toplevel-calls)
1093 (validate-arity (cdr proc) x #t)
1096 ;; If GENSYM wasn't found, it may be because it's an
1097 ;; argument of the procedure being compiled.
1102 (lambda (x info env locs)
1104 (define (shrink name val info)
1105 ;; Remove NAME from the lexical-lambdas of INFO.
1106 (let ((toplevel-calls (toplevel-procedure-calls info))
1107 (lexical-lambdas (lexical-lambdas info))
1108 (toplevel-lambdas (toplevel-lambdas info)))
1109 (make-arity-info toplevel-calls
1110 (if (vhash-assq name lexical-lambdas)
1111 (vlist-tail lexical-lambdas)
1115 (let ((toplevel-calls (toplevel-procedure-calls info))
1116 (lexical-lambdas (lexical-lambdas info))
1117 (toplevel-lambdas (toplevel-lambdas info)))
1119 ((<let> gensyms vals)
1120 (fold shrink info gensyms vals))
1121 ((<letrec> gensyms vals)
1122 (fold shrink info gensyms vals))
1123 ((<fix> gensyms vals)
1124 (fold shrink info gensyms vals))
1128 (lambda (result env)
1129 ;; Post-processing: check all top-level procedure calls that have been
1131 (let ((toplevel-calls (toplevel-procedure-calls result))
1132 (toplevel-lambdas (toplevel-lambdas result)))
1135 (let* ((name (car name+call))
1136 (call (cdr name+call))
1138 (or (and=> (vhash-assq name toplevel-lambdas) cdr)
1141 (module-ref env name)))))
1143 ;; handle toplevel aliases
1144 (if (toplevel-ref? proc)
1145 (let ((name (toplevel-ref-name proc)))
1148 (module-ref env name))))
1150 (cond ((lambda? proc*)
1151 (validate-arity proc* call #t))
1153 (validate-arity proc* call #f)))))
1156 (make-arity-info vlist-null vlist-null vlist-null)))
1160 ;;; `format' argument analysis.
1163 (define &syntax-error
1164 ;; The `throw' key for syntax errors.
1165 (gensym "format-string-syntax-error"))
1167 (define (format-string-argument-count fmt)
1168 ;; Return the minimum and maxium number of arguments that should
1169 ;; follow format string FMT (or, ahem, a good estimate thereof) or
1170 ;; `any' if the format string can be followed by any number of
1173 (define (drop-group chars end)
1174 ;; Drop characters from CHARS until "~END" is encountered.
1175 (let loop ((chars chars)
1178 (throw &syntax-error 'unterminated-iteration)
1180 (if (eq? (car chars) end)
1182 (loop (cdr chars) #f))
1183 (if (eq? (car chars) #\~)
1184 (loop (cdr chars) #t)
1185 (loop (cdr chars) #f))))))
1187 (define (digit? char)
1188 ;; Return true if CHAR is a digit, #f otherwise.
1189 (memq char '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
1191 (define (previous-number chars)
1192 ;; Return the previous series of digits found in CHARS.
1193 (let ((numbers (take-while digit? chars)))
1194 (and (not (null? numbers))
1195 (string->number (list->string (reverse numbers))))))
1197 (let loop ((chars (string->list fmt))
1206 (throw &syntax-error 'unterminated-conditional)
1207 (values min-count max-count))
1211 ((#\~ #\% #\& #\t #\T #\_ #\newline #\( #\) #\! #\| #\/ #\q #\Q)
1212 (loop (cdr chars) 'literal '()
1213 conditions end-group
1214 min-count max-count))
1215 ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\, #\: #\@ #\+ #\- #\#)
1217 'tilde (cons (car chars) params)
1218 conditions end-group
1219 min-count max-count))
1220 ((#\v #\V) (loop (cdr chars)
1221 'tilde (cons (car chars) params)
1222 conditions end-group
1226 (loop chars 'literal '() '()
1227 (let ((selector (previous-number params))
1228 (at? (memq #\@ params)))
1229 (lambda (chars conds)
1231 (let ((mins (map car conds))
1232 (maxs (map cdr conds))
1234 (< selector (length conds)))))
1235 (if (and (every number? mins)
1236 (every number? maxs))
1237 (loop chars 'literal '() conditions end-group
1240 (car (list-ref conds selector))
1244 (apply min mins)))))
1247 (cdr (list-ref conds selector))
1251 (apply max maxs))))))
1252 (values 'any 'any))))) ;; XXX: approximation
1256 (loop (cdr chars) 'literal '()
1257 (cons (cons min-count max-count) conditions)
1260 (throw &syntax-error 'unexpected-semicolon)))
1263 (end-group (cdr chars)
1264 (reverse (cons (cons min-count max-count)
1266 (throw &syntax-error 'unexpected-conditional-termination)))
1267 ((#\{) (if (memq #\@ params)
1268 (values min-count 'any)
1269 (loop (drop-group (cdr chars) #\})
1271 conditions end-group
1272 (+ 1 min-count) (+ 1 max-count))))
1273 ((#\*) (if (memq #\@ params)
1274 (values 'any 'any) ;; it's unclear what to do here
1277 conditions end-group
1278 (+ (or (previous-number params) 1)
1280 (+ (or (previous-number params) 1)
1283 ;; We don't have enough info to determine the exact number
1284 ;; of args, but we could determine a lower bound (TODO).
1287 (values min-count 'any))
1289 (let ((argc (if (memq #\: params) 2 1)))
1290 (loop (cdr chars) 'literal '()
1291 conditions end-group
1293 (+ argc max-count))))
1295 (if (null? (cdr chars))
1296 (throw &syntax-error 'unexpected-termination)
1297 (loop (cddr chars) 'tilde (cons (cadr chars) params)
1298 conditions end-group min-count max-count)))
1299 (else (loop (cdr chars) 'literal '()
1300 conditions end-group
1301 (+ 1 min-count) (+ 1 max-count)))))
1304 ((#\~) (loop (cdr chars) 'tilde '()
1305 conditions end-group
1306 min-count max-count))
1307 (else (loop (cdr chars) 'literal '()
1308 conditions end-group
1309 min-count max-count))))
1310 (else (error "computer bought the farm" state))))))
1312 (define (proc-ref? exp proc special-name env)
1313 "Return #t when EXP designates procedure PROC in ENV. As a last
1314 resort, return #t when EXP refers to the global variable SPECIAL-NAME."
1317 (cut eq? <> special-name))
1320 (($ <toplevel-ref> _ (? special?))
1321 ;; Allow top-levels like: (define _ (cut gettext <> "my-domain")).
1323 (($ <toplevel-ref> _ name)
1324 (let ((var (module-variable env name)))
1325 (and var (variable-bound? var)
1326 (eq? (variable-ref var) proc))))
1327 (($ <module-ref> _ _ (? special?))
1329 (($ <module-ref> _ module name public?)
1330 (let* ((mod (if public?
1331 (false-if-exception (resolve-interface module))
1332 (resolve-module module #:ensure #f)))
1333 (var (and mod (module-variable mod name))))
1334 (and var (variable-bound? var) (eq? (variable-ref var) proc))))
1335 (($ <lexical-ref> _ (? special?))
1339 (define gettext? (cut proc-ref? <> gettext '_ <>))
1340 (define ngettext? (cut proc-ref? <> ngettext 'N_ <>))
1342 (define (const-fmt x env)
1343 ;; Return the literal format string for X, or #f.
1345 (($ <const> _ (? string? exp))
1347 (($ <call> _ (? (cut gettext? <> env))
1348 (($ <const> _ (? string? fmt))))
1349 ;; Gettexted literals, like `(_ "foo")'.
1351 (($ <call> _ (? (cut ngettext? <> env))
1352 (($ <const> _ (? string? fmt)) ($ <const> _ (? string?)) _ ..1))
1353 ;; Plural gettextized literals, like `(N_ "singular" "plural" n)'.
1355 ;; TODO: Check whether the singular and plural strings have the
1356 ;; same format escapes.
1360 (define format-analysis
1361 ;; Report arity mismatches in the given tree.
1363 (lambda (x _ env locs)
1365 (define (check-format-args args loc)
1367 ((,port ,fmt . ,rest)
1368 (guard (const-fmt fmt env))
1369 (if (and (const? port)
1370 (not (boolean? (const-exp port))))
1371 (warning 'format loc 'wrong-port (const-exp port)))
1372 (let ((fmt (const-fmt fmt env))
1373 (count (length rest)))
1374 (catch &syntax-error
1376 (let-values (((min max)
1377 (format-string-argument-count fmt)))
1379 (or (and (or (eq? min 'any) (>= count min))
1380 (or (eq? max 'any) (<= count max)))
1381 (warning 'format loc 'wrong-format-arg-count
1382 fmt min max count)))))
1384 (warning 'format loc 'syntax-error key fmt)))))
1385 ((,port ,fmt . ,rest)
1386 (if (and (const? port)
1387 (not (boolean? (const-exp port))))
1388 (warning 'format loc 'wrong-port (const-exp port)))
1391 (($ <const> loc* (? (negate string?) fmt))
1392 (warning 'format (or loc* loc) 'wrong-format-string fmt))
1394 ;; Warn on non-literal format strings, unless they refer to
1395 ;; a lexical variable named "fmt".
1396 (($ <lexical-ref> _ fmt)
1398 ((? (negate const?))
1399 (warning 'format loc 'non-literal-format-string))))
1401 (warning 'format loc 'wrong-num-args (length args)))))
1403 (define (check-simple-format-args args loc)
1404 ;; Check the arguments to the `simple-format' procedure, which is
1405 ;; less capable than that of (ice-9 format).
1407 (define allowed-chars
1408 '(#\A #\S #\a #\s #\~ #\%))
1410 (define (format-chars fmt)
1411 (let loop ((chars (string->list fmt))
1417 (loop rest (cons opt result)))
1419 (loop rest result)))))
1422 ((port ($ <const> _ (? string? fmt)) _ ...)
1423 (let ((opts (format-chars fmt)))
1424 (or (every (cut memq <> allowed-chars) opts)
1426 (warning 'format loc 'simple-format fmt
1427 (find (negate (cut memq <> allowed-chars)) opts))
1429 ((port (= (cut const-fmt <> env) (? string? fmt)) args ...)
1430 (check-simple-format-args `(,port ,(make-const loc fmt) ,args) loc))
1433 (define (resolve-toplevel name)
1435 (false-if-exception (module-ref env name))))
1438 (($ <call> src ($ <toplevel-ref> _ name) args)
1439 (let ((proc (resolve-toplevel name)))
1440 (if (or (and (eq? proc (@ (guile) simple-format))
1441 (check-simple-format-args args
1442 (or src (find pair? locs))))
1443 (eq? proc (@ (ice-9 format) format)))
1444 (check-format-args args (or src (find pair? locs))))))
1445 (($ <call> src ($ <module-ref> _ '(ice-9 format) 'format) args)
1446 (check-format-args args (or src (find pair? locs))))
1447 (($ <call> src ($ <module-ref> _ '(guile)
1448 (or 'format 'simple-format))
1450 (and (check-simple-format-args args
1451 (or src (find pair? locs)))
1452 (check-format-args args (or src (find pair? locs)))))
1456 (lambda (x _ env locs)