Use vhashes in `unbound-variable-analysis'.
[bpt/guile.git] / module / language / tree-il / analyze.scm
1 ;;; TREE-IL -> GLIL compiler
2
3 ;; Copyright (C) 2001,2008,2009,2010 Free Software Foundation, Inc.
4
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
9 ;;;;
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Code:
20
21 (define-module (language tree-il analyze)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-9)
24 #:use-module (srfi srfi-11)
25 #:use-module (ice-9 vlist)
26 #:use-module (system base syntax)
27 #:use-module (system base message)
28 #:use-module (system vm program)
29 #:use-module (language tree-il)
30 #:use-module (system base pmatch)
31 #:export (analyze-lexicals
32 analyze-tree
33 unused-variable-analysis
34 unused-toplevel-analysis
35 unbound-variable-analysis
36 arity-analysis))
37
38 ;; Allocation is the process of assigning storage locations for lexical
39 ;; variables. A lexical variable has a distinct "address", or storage
40 ;; location, for each procedure in which it is referenced.
41 ;;
42 ;; A variable is "local", i.e., allocated on the stack, if it is
43 ;; referenced from within the procedure that defined it. Otherwise it is
44 ;; a "closure" variable. For example:
45 ;;
46 ;; (lambda (a) a) ; a will be local
47 ;; `a' is local to the procedure.
48 ;;
49 ;; (lambda (a) (lambda () a))
50 ;; `a' is local to the outer procedure, but a closure variable with
51 ;; respect to the inner procedure.
52 ;;
53 ;; If a variable is ever assigned, it needs to be heap-allocated
54 ;; ("boxed"). This is so that closures and continuations capture the
55 ;; variable's identity, not just one of the values it may have over the
56 ;; course of program execution. If the variable is never assigned, there
57 ;; is no distinction between value and identity, so closing over its
58 ;; identity (whether through closures or continuations) can make a copy
59 ;; of its value instead.
60 ;;
61 ;; Local variables are stored on the stack within a procedure's call
62 ;; frame. Their index into the stack is determined from their linear
63 ;; postion within a procedure's binding path:
64 ;; (let (0 1)
65 ;; (let (2 3) ...)
66 ;; (let (2) ...))
67 ;; (let (2 3 4) ...))
68 ;; etc.
69 ;;
70 ;; This algorithm has the problem that variables are only allocated
71 ;; indices at the end of the binding path. If variables bound early in
72 ;; the path are not used in later portions of the path, their indices
73 ;; will not be recycled. This problem is particularly egregious in the
74 ;; expansion of `or':
75 ;;
76 ;; (or x y z)
77 ;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
78 ;;
79 ;; As you can see, the `a' binding is only used in the ephemeral
80 ;; `consequent' clause of the first `if', but its index would be
81 ;; reserved for the whole of the `or' expansion. So we have a hack for
82 ;; this specific case. A proper solution would be some sort of liveness
83 ;; analysis, and not our linear allocation algorithm.
84 ;;
85 ;; Closure variables are captured when a closure is created, and stored in a
86 ;; vector inline to the closure object itself. Each closure variable has a
87 ;; unique index into that vector.
88 ;;
89 ;; There is one more complication. Procedures bound by <fix> may, in
90 ;; some cases, be rendered inline to their parent procedure. That is to
91 ;; say,
92 ;;
93 ;; (letrec ((lp (lambda () (lp)))) (lp))
94 ;; => (fix ((lp (lambda () (lp)))) (lp))
95 ;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
96 ;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
97 ;;
98 ;; The upshot is that we don't have to allocate any space for the `lp'
99 ;; closure at all, as it can be rendered inline as a loop. So there is
100 ;; another kind of allocation, "label allocation", in which the
101 ;; procedure is simply a label, placed at the start of the lambda body.
102 ;; The label is the gensym under which the lambda expression is bound.
103 ;;
104 ;; The analyzer checks to see that the label is called with the correct
105 ;; number of arguments. Calls to labels compile to rename + goto.
106 ;; Lambda, the ultimate goto!
107 ;;
108 ;;
109 ;; The return value of `analyze-lexicals' is a hash table, the
110 ;; "allocation".
111 ;;
112 ;; The allocation maps gensyms -- recall that each lexically bound
113 ;; variable has a unique gensym -- to storage locations ("addresses").
114 ;; Since one gensym may have many storage locations, if it is referenced
115 ;; in many procedures, it is a two-level map.
116 ;;
117 ;; The allocation also stored information on how many local variables
118 ;; need to be allocated for each procedure, lexicals that have been
119 ;; translated into labels, and information on what free variables to
120 ;; capture from its lexical parent procedure.
121 ;;
122 ;; In addition, we have a conflation: while we're traversing the code,
123 ;; recording information to pass to the compiler, we take the
124 ;; opportunity to generate labels for each lambda-case clause, so that
125 ;; generated code can skip argument checks at runtime if they match at
126 ;; compile-time.
127 ;;
128 ;; Also, while we're a-traversing and an-allocating, we check prompt
129 ;; handlers to see if the "continuation" argument is used. If not, we
130 ;; mark the prompt as being "escape-only". This allows us to implement
131 ;; `catch' and `throw' using `prompt' and `control', but without causing
132 ;; a continuation to be reified. Heh heh.
133 ;;
134 ;; That is:
135 ;;
136 ;; sym -> {lambda -> address}
137 ;; lambda -> (labels . free-locs)
138 ;; lambda-case -> (gensym . nlocs)
139 ;; prompt -> escape-only?
140 ;;
141 ;; address ::= (local? boxed? . index)
142 ;; labels ::= ((sym . lambda) ...)
143 ;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
144 ;; free variable addresses are relative to parent proc.
145
146 (define (make-hashq k v)
147 (let ((res (make-hash-table)))
148 (hashq-set! res k v)
149 res))
150
151 (define (analyze-lexicals x)
152 ;; bound-vars: lambda -> (sym ...)
153 ;; all identifiers bound within a lambda
154 (define bound-vars (make-hash-table))
155 ;; free-vars: lambda -> (sym ...)
156 ;; all identifiers referenced in a lambda, but not bound
157 ;; NB, this includes identifiers referenced by contained lambdas
158 (define free-vars (make-hash-table))
159 ;; assigned: sym -> #t
160 ;; variables that are assigned
161 (define assigned (make-hash-table))
162 ;; refcounts: sym -> count
163 ;; allows us to detect the or-expansion in O(1) time
164 (define refcounts (make-hash-table))
165 ;; labels: sym -> lambda
166 ;; for determining if fixed-point procedures can be rendered as
167 ;; labels.
168 (define labels (make-hash-table))
169
170 ;; returns variables referenced in expr
171 (define (analyze! x proc labels-in-proc tail? tail-call-args)
172 (define (step y) (analyze! y proc labels-in-proc #f #f))
173 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
174 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
175 (and tail? args)))
176 (define (recur/labels x new-proc labels)
177 (analyze! x new-proc (append labels labels-in-proc) #t #f))
178 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
179 (record-case x
180 ((<application> proc args)
181 (apply lset-union eq? (step-tail-call proc args)
182 (map step args)))
183
184 ((<conditional> test consequent alternate)
185 (lset-union eq? (step test) (step-tail consequent) (step-tail alternate)))
186
187 ((<lexical-ref> gensym)
188 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
189 (if (not (and tail-call-args
190 (memq gensym labels-in-proc)
191 (let ((p (hashq-ref labels gensym)))
192 (and p
193 (let lp ((c (lambda-body p)))
194 (and c (lambda-case? c)
195 (or
196 ;; for now prohibit optional &
197 ;; keyword arguments; can relax this
198 ;; restriction later
199 (and (= (length (lambda-case-req c))
200 (length tail-call-args))
201 (not (lambda-case-opt c))
202 (not (lambda-case-kw c))
203 (not (lambda-case-rest c)))
204 (lp (lambda-case-alternate c)))))))))
205 (hashq-set! labels gensym #f))
206 (list gensym))
207
208 ((<lexical-set> gensym exp)
209 (hashq-set! assigned gensym #t)
210 (hashq-set! labels gensym #f)
211 (lset-adjoin eq? (step exp) gensym))
212
213 ((<module-set> exp)
214 (step exp))
215
216 ((<toplevel-set> exp)
217 (step exp))
218
219 ((<toplevel-define> exp)
220 (step exp))
221
222 ((<sequence> exps)
223 (let lp ((exps exps) (ret '()))
224 (cond ((null? exps) '())
225 ((null? (cdr exps))
226 (lset-union eq? ret (step-tail (car exps))))
227 (else
228 (lp (cdr exps) (lset-union eq? ret (step (car exps))))))))
229
230 ((<lambda> body)
231 ;; order is important here
232 (hashq-set! bound-vars x '())
233 (let ((free (recur body x)))
234 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
235 (hashq-set! free-vars x free)
236 free))
237
238 ((<lambda-case> opt kw inits vars body alternate)
239 (hashq-set! bound-vars proc
240 (append (reverse vars) (hashq-ref bound-vars proc)))
241 (lset-union
242 eq?
243 (lset-difference eq?
244 (lset-union eq?
245 (apply lset-union eq? (map step inits))
246 (step-tail body))
247 vars)
248 (if alternate (step-tail alternate) '())))
249
250 ((<let> vars vals body)
251 (hashq-set! bound-vars proc
252 (append (reverse vars) (hashq-ref bound-vars proc)))
253 (lset-difference eq?
254 (apply lset-union eq? (step-tail body) (map step vals))
255 vars))
256
257 ((<letrec> vars vals body)
258 (hashq-set! bound-vars proc
259 (append (reverse vars) (hashq-ref bound-vars proc)))
260 (for-each (lambda (sym) (hashq-set! assigned sym #t)) vars)
261 (lset-difference eq?
262 (apply lset-union eq? (step-tail body) (map step vals))
263 vars))
264
265 ((<fix> vars vals body)
266 ;; Try to allocate these procedures as labels.
267 (for-each (lambda (sym val) (hashq-set! labels sym val))
268 vars vals)
269 (hashq-set! bound-vars proc
270 (append (reverse vars) (hashq-ref bound-vars proc)))
271 ;; Step into subexpressions.
272 (let* ((var-refs
273 (map
274 ;; Since we're trying to label-allocate the lambda,
275 ;; pretend it's not a closure, and just recurse into its
276 ;; body directly. (Otherwise, recursing on a closure
277 ;; that references one of the fix's bound vars would
278 ;; prevent label allocation.)
279 (lambda (x)
280 (record-case x
281 ((<lambda> body)
282 ;; just like the closure case, except here we use
283 ;; recur/labels instead of recur
284 (hashq-set! bound-vars x '())
285 (let ((free (recur/labels body x vars)))
286 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
287 (hashq-set! free-vars x free)
288 free))))
289 vals))
290 (vars-with-refs (map cons vars var-refs))
291 (body-refs (recur/labels body proc vars)))
292 (define (delabel-dependents! sym)
293 (let ((refs (assq-ref vars-with-refs sym)))
294 (if refs
295 (for-each (lambda (sym)
296 (if (hashq-ref labels sym)
297 (begin
298 (hashq-set! labels sym #f)
299 (delabel-dependents! sym))))
300 refs))))
301 ;; Stepping into the lambdas and the body might have made some
302 ;; procedures not label-allocatable -- which might have
303 ;; knock-on effects. For example:
304 ;; (fix ((a (lambda () (b)))
305 ;; (b (lambda () a)))
306 ;; (a))
307 ;; As far as `a' is concerned, both `a' and `b' are
308 ;; label-allocatable. But `b' references `a' not in a proc-tail
309 ;; position, which makes `a' not label-allocatable. The
310 ;; knock-on effect is that, when back-propagating this
311 ;; information to `a', `b' will also become not
312 ;; label-allocatable, as it is referenced within `a', which is
313 ;; allocated as a closure. This is a transitive relationship.
314 (for-each (lambda (sym)
315 (if (not (hashq-ref labels sym))
316 (delabel-dependents! sym)))
317 vars)
318 ;; Now lift bound variables with label-allocated lambdas to the
319 ;; parent procedure.
320 (for-each
321 (lambda (sym val)
322 (if (hashq-ref labels sym)
323 ;; Remove traces of the label-bound lambda. The free
324 ;; vars will propagate up via the return val.
325 (begin
326 (hashq-set! bound-vars proc
327 (append (hashq-ref bound-vars val)
328 (hashq-ref bound-vars proc)))
329 (hashq-remove! bound-vars val)
330 (hashq-remove! free-vars val))))
331 vars vals)
332 (lset-difference eq?
333 (apply lset-union eq? body-refs var-refs)
334 vars)))
335
336 ((<let-values> exp body)
337 (lset-union eq? (step exp) (step body)))
338
339 ((<dynamic-wind> body winder unwinder)
340 (lset-union eq? (step body) (step winder) (step unwinder)))
341
342 ((<prompt> tag body handler pre-unwind-handler)
343 (lset-union eq? (step tag) (step handler)
344 (if pre-unwind-handler (step pre-unwind-handler) '())))
345
346 ((<control> tag type args)
347 (apply lset-union eq? (step tag) (map step args)))
348
349 (else '())))
350
351 ;; allocation: sym -> {lambda -> address}
352 ;; lambda -> (nlocs labels . free-locs)
353 (define allocation (make-hash-table))
354
355 (define (allocate! x proc n)
356 (define (recur y) (allocate! y proc n))
357 (record-case x
358 ((<application> proc args)
359 (apply max (recur proc) (map recur args)))
360
361 ((<conditional> test consequent alternate)
362 (max (recur test) (recur consequent) (recur alternate)))
363
364 ((<lexical-set> exp)
365 (recur exp))
366
367 ((<module-set> exp)
368 (recur exp))
369
370 ((<toplevel-set> exp)
371 (recur exp))
372
373 ((<toplevel-define> exp)
374 (recur exp))
375
376 ((<sequence> exps)
377 (apply max (map recur exps)))
378
379 ((<lambda> body)
380 ;; allocate closure vars in order
381 (let lp ((c (hashq-ref free-vars x)) (n 0))
382 (if (pair? c)
383 (begin
384 (hashq-set! (hashq-ref allocation (car c))
385 x
386 `(#f ,(hashq-ref assigned (car c)) . ,n))
387 (lp (cdr c) (1+ n)))))
388
389 (let ((nlocs (allocate! body x 0))
390 (free-addresses
391 (map (lambda (v)
392 (hashq-ref (hashq-ref allocation v) proc))
393 (hashq-ref free-vars x)))
394 (labels (filter cdr
395 (map (lambda (sym)
396 (cons sym (hashq-ref labels sym)))
397 (hashq-ref bound-vars x)))))
398 ;; set procedure allocations
399 (hashq-set! allocation x (cons labels free-addresses)))
400 n)
401
402 ((<lambda-case> opt kw inits vars body alternate)
403 (max
404 (let lp ((vars vars) (n n))
405 (if (null? vars)
406 (let ((nlocs (apply
407 max
408 (allocate! body proc n)
409 ;; inits not logically at the end, but they
410 ;; are the list...
411 (map (lambda (x) (allocate! x proc n)) inits))))
412 ;; label and nlocs for the case
413 (hashq-set! allocation x (cons (gensym ":LCASE") nlocs))
414 nlocs)
415 (begin
416 (hashq-set! allocation (car vars)
417 (make-hashq
418 proc `(#t ,(hashq-ref assigned (car vars)) . ,n)))
419 (lp (cdr vars) (1+ n)))))
420 (if alternate (allocate! alternate proc n) n)))
421
422 ((<let> vars vals body)
423 (let ((nmax (apply max (map recur vals))))
424 (cond
425 ;; the `or' hack
426 ((and (conditional? body)
427 (= (length vars) 1)
428 (let ((v (car vars)))
429 (and (not (hashq-ref assigned v))
430 (= (hashq-ref refcounts v 0) 2)
431 (lexical-ref? (conditional-test body))
432 (eq? (lexical-ref-gensym (conditional-test body)) v)
433 (lexical-ref? (conditional-consequent body))
434 (eq? (lexical-ref-gensym (conditional-consequent body)) v))))
435 (hashq-set! allocation (car vars)
436 (make-hashq proc `(#t #f . ,n)))
437 ;; the 1+ for this var
438 (max nmax (1+ n) (allocate! (conditional-alternate body) proc n)))
439 (else
440 (let lp ((vars vars) (n n))
441 (if (null? vars)
442 (max nmax (allocate! body proc n))
443 (let ((v (car vars)))
444 (hashq-set!
445 allocation v
446 (make-hashq proc
447 `(#t ,(hashq-ref assigned v) . ,n)))
448 (lp (cdr vars) (1+ n)))))))))
449
450 ((<letrec> vars vals body)
451 (let lp ((vars vars) (n n))
452 (if (null? vars)
453 (let ((nmax (apply max
454 (map (lambda (x)
455 (allocate! x proc n))
456 vals))))
457 (max nmax (allocate! body proc n)))
458 (let ((v (car vars)))
459 (hashq-set!
460 allocation v
461 (make-hashq proc
462 `(#t ,(hashq-ref assigned v) . ,n)))
463 (lp (cdr vars) (1+ n))))))
464
465 ((<fix> vars vals body)
466 (let lp ((in vars) (n n))
467 (if (null? in)
468 (let lp ((vars vars) (vals vals) (nmax n))
469 (cond
470 ((null? vars)
471 (max nmax (allocate! body proc n)))
472 ((hashq-ref labels (car vars))
473 ;; allocate lambda body inline to proc
474 (lp (cdr vars)
475 (cdr vals)
476 (record-case (car vals)
477 ((<lambda> body)
478 (max nmax (allocate! body proc n))))))
479 (else
480 ;; allocate closure
481 (lp (cdr vars)
482 (cdr vals)
483 (max nmax (allocate! (car vals) proc n))))))
484
485 (let ((v (car in)))
486 (cond
487 ((hashq-ref assigned v)
488 (error "fixpoint procedures may not be assigned" x))
489 ((hashq-ref labels v)
490 ;; no binding, it's a label
491 (lp (cdr in) n))
492 (else
493 ;; allocate closure binding
494 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
495 (lp (cdr in) (1+ n))))))))
496
497 ((<let-values> exp body)
498 (max (recur exp) (recur body)))
499
500 ((<dynamic-wind> body winder unwinder)
501 (max (recur body) (recur winder) (recur unwinder)))
502
503 ((<prompt> tag body handler pre-unwind-handler)
504 (let ((cont-var (and (lambda-case? handler)
505 (pair? (lambda-case-vars handler))
506 (car (lambda-case-vars handler)))))
507 (hashq-set! allocation x
508 (and cont-var (zero? (hashq-ref refcounts cont-var 0))))
509 (max (recur tag) (recur body) (recur handler)
510 (if pre-unwind-handler (recur pre-unwind-handler) 0))))
511
512 ((<control> tag type args)
513 (apply max (recur tag) (map recur args)))
514
515 (else n)))
516
517 (analyze! x #f '() #t #f)
518 (allocate! x #f 0)
519
520 allocation)
521
522 \f
523 ;;;
524 ;;; Tree analyses for warnings.
525 ;;;
526
527 (define-record-type <tree-analysis>
528 (make-tree-analysis leaf down up post init)
529 tree-analysis?
530 (leaf tree-analysis-leaf) ;; (lambda (x result env locs) ...)
531 (down tree-analysis-down) ;; (lambda (x result env locs) ...)
532 (up tree-analysis-up) ;; (lambda (x result env locs) ...)
533 (post tree-analysis-post) ;; (lambda (result env) ...)
534 (init tree-analysis-init)) ;; arbitrary value
535
536 (define (analyze-tree analyses tree env)
537 "Run all tree analyses listed in ANALYSES on TREE for ENV, using
538 `tree-il-fold'. Return TREE. The leaf/down/up procedures of each analysis are
539 passed a ``location stack', which is the stack of `tree-il-src' values for each
540 parent tree (a list); it can be used to approximate source location when
541 accurate information is missing from a given `tree-il' element."
542
543 (define (traverse proc update-locs)
544 ;; Return a tree traversing procedure that returns a list of analysis
545 ;; results prepended by the location stack.
546 (lambda (x results)
547 (let ((locs (update-locs x (car results))))
548 (cons locs ;; the location stack
549 (map (lambda (analysis result)
550 ((proc analysis) x result env locs))
551 analyses
552 (cdr results))))))
553
554 ;; Keeping/extending/shrinking the location stack.
555 (define (keep-locs x locs) locs)
556 (define (extend-locs x locs) (cons (tree-il-src x) locs))
557 (define (shrink-locs x locs) (cdr locs))
558
559 (let ((results
560 (tree-il-fold (traverse tree-analysis-leaf keep-locs)
561 (traverse tree-analysis-down extend-locs)
562 (traverse tree-analysis-up shrink-locs)
563 (cons '() ;; empty location stack
564 (map tree-analysis-init analyses))
565 tree)))
566
567 (for-each (lambda (analysis result)
568 ((tree-analysis-post analysis) result env))
569 analyses
570 (cdr results)))
571
572 tree)
573
574 \f
575 ;;;
576 ;;; Unused variable analysis.
577 ;;;
578
579 ;; <binding-info> records are used during tree traversals in
580 ;; `unused-variable-analysis'. They contain a list of the local vars
581 ;; currently in scope, and a list of locals vars that have been referenced.
582 (define-record-type <binding-info>
583 (make-binding-info vars refs)
584 binding-info?
585 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
586 (refs binding-info-refs)) ;; (GENSYM ...)
587
588 (define unused-variable-analysis
589 ;; Report unused variables in the given tree.
590 (make-tree-analysis
591 (lambda (x info env locs)
592 ;; X is a leaf: extend INFO's refs accordingly.
593 (let ((refs (binding-info-refs info))
594 (vars (binding-info-vars info)))
595 (record-case x
596 ((<lexical-ref> gensym)
597 (make-binding-info vars (cons gensym refs)))
598 (else info))))
599
600 (lambda (x info env locs)
601 ;; Going down into X: extend INFO's variable list
602 ;; accordingly.
603 (let ((refs (binding-info-refs info))
604 (vars (binding-info-vars info))
605 (src (tree-il-src x)))
606 (define (extend inner-vars inner-names)
607 (append (map (lambda (var name)
608 (list var name src))
609 inner-vars
610 inner-names)
611 vars))
612 (record-case x
613 ((<lexical-set> gensym)
614 (make-binding-info vars (cons gensym refs)))
615 ((<lambda-case> req opt inits rest kw vars)
616 (let ((names `(,@req
617 ,@(or opt '())
618 ,@(if rest (list rest) '())
619 ,@(if kw (map cadr (cdr kw)) '()))))
620 (make-binding-info (extend vars names) refs)))
621 ((<let> vars names)
622 (make-binding-info (extend vars names) refs))
623 ((<letrec> vars names)
624 (make-binding-info (extend vars names) refs))
625 ((<fix> vars names)
626 (make-binding-info (extend vars names) refs))
627 (else info))))
628
629 (lambda (x info env locs)
630 ;; Leaving X's scope: shrink INFO's variable list
631 ;; accordingly and reported unused nested variables.
632 (let ((refs (binding-info-refs info))
633 (vars (binding-info-vars info)))
634 (define (shrink inner-vars refs)
635 (for-each (lambda (var)
636 (let ((gensym (car var)))
637 ;; Don't report lambda parameters as
638 ;; unused.
639 (if (and (not (memq gensym refs))
640 (not (and (lambda-case? x)
641 (memq gensym
642 inner-vars))))
643 (let ((name (cadr var))
644 ;; We can get approximate
645 ;; source location by going up
646 ;; the LOCS location stack.
647 (loc (or (caddr var)
648 (find pair? locs))))
649 (warning 'unused-variable loc name)))))
650 (filter (lambda (var)
651 (memq (car var) inner-vars))
652 vars))
653 (fold alist-delete vars inner-vars))
654
655 ;; For simplicity, we leave REFS untouched, i.e., with
656 ;; names of variables that are now going out of scope.
657 ;; It doesn't hurt as these are unique names, it just
658 ;; makes REFS unnecessarily fat.
659 (record-case x
660 ((<lambda-case> vars)
661 (make-binding-info (shrink vars refs) refs))
662 ((<let> vars)
663 (make-binding-info (shrink vars refs) refs))
664 ((<letrec> vars)
665 (make-binding-info (shrink vars refs) refs))
666 ((<fix> vars)
667 (make-binding-info (shrink vars refs) refs))
668 (else info))))
669
670 (lambda (result env) #t)
671 (make-binding-info '() '())))
672
673 \f
674 ;;;
675 ;;; Unused top-level variable analysis.
676 ;;;
677
678 ;; <reference-graph> record top-level definitions that are made, references to
679 ;; top-level definitions and their context (the top-level definition in which
680 ;; the reference appears), as well as the current context (the top-level
681 ;; definition we're currently in). The second part (`refs' below) is
682 ;; effectively a graph from which we can determine unused top-level definitions.
683 (define-record-type <reference-graph>
684 (make-reference-graph refs defs toplevel-context)
685 reference-graph?
686 (defs reference-graph-defs) ;; ((NAME . LOC) ...)
687 (refs reference-graph-refs) ;; ((REF-CONTEXT REF ...) ...)
688 (toplevel-context reference-graph-toplevel-context)) ;; NAME | #f
689
690 (define (graph-reachable-nodes root refs reachable)
691 ;; Add to REACHABLE the nodes reachable from ROOT in graph REFS. REFS is a
692 ;; vhash mapping nodes to the list of their children: for instance,
693 ;; ((A -> (B C)) (B -> (A)) (C -> ())) corresponds to
694 ;;
695 ;; ,-------.
696 ;; v |
697 ;; A ----> B
698 ;; |
699 ;; v
700 ;; C
701 ;;
702 ;; REACHABLE is a vhash of nodes known to be otherwise reachable.
703
704 (let loop ((root root)
705 (path vlist-null)
706 (result reachable))
707 (if (or (vhash-assq root path)
708 (vhash-assq root result))
709 result
710 (let* ((children (or (and=> (vhash-assq root refs) cdr) '()))
711 (path (vhash-consq root #t path))
712 (result (fold (lambda (kid result)
713 (loop kid path result))
714 result
715 children)))
716 (fold (lambda (kid result)
717 (vhash-consq kid #t result))
718 result
719 children)))))
720
721 (define (graph-reachable-nodes* roots refs)
722 ;; Return the list of nodes in REFS reachable from the nodes listed in ROOTS.
723 (vlist-fold (lambda (root+true result)
724 (let* ((root (car root+true))
725 (reachable (graph-reachable-nodes root refs result)))
726 (vhash-consq root #t reachable)))
727 vlist-null
728 roots))
729
730 (define (partition* pred vhash)
731 ;; Partition VHASH according to PRED. Return the two resulting vhashes.
732 (let ((result
733 (vlist-fold (lambda (k+v result)
734 (let ((k (car k+v))
735 (v (cdr k+v))
736 (r1 (car result))
737 (r2 (cdr result)))
738 (if (pred k)
739 (cons (vhash-consq k v r1) r2)
740 (cons r1 (vhash-consq k v r2)))))
741 (cons vlist-null vlist-null)
742 vhash)))
743 (values (car result) (cdr result))))
744
745 (define unused-toplevel-analysis
746 ;; Report unused top-level definitions that are not exported.
747 (let ((add-ref-from-context
748 (lambda (graph name)
749 ;; Add an edge CTX -> NAME in GRAPH.
750 (let* ((refs (reference-graph-refs graph))
751 (defs (reference-graph-defs graph))
752 (ctx (reference-graph-toplevel-context graph))
753 (ctx-refs (or (and=> (vhash-assq ctx refs) cdr) '())))
754 (make-reference-graph (vhash-consq ctx (cons name ctx-refs) refs)
755 defs ctx)))))
756 (define (macro-variable? name env)
757 (and (module? env)
758 (let ((var (module-variable env name)))
759 (and var (variable-bound? var)
760 (macro? (variable-ref var))))))
761
762 (make-tree-analysis
763 (lambda (x graph env locs)
764 ;; X is a leaf.
765 (let ((ctx (reference-graph-toplevel-context graph)))
766 (record-case x
767 ((<toplevel-ref> name src)
768 (add-ref-from-context graph name))
769 (else graph))))
770
771 (lambda (x graph env locs)
772 ;; Going down into X.
773 (let ((ctx (reference-graph-toplevel-context graph))
774 (refs (reference-graph-refs graph))
775 (defs (reference-graph-defs graph)))
776 (record-case x
777 ((<toplevel-define> name src)
778 (let ((refs refs)
779 (defs (vhash-consq name (or src (find pair? locs))
780 defs)))
781 (make-reference-graph refs defs name)))
782 ((<toplevel-set> name src)
783 (add-ref-from-context graph name))
784 (else graph))))
785
786 (lambda (x graph env locs)
787 ;; Leaving X's scope.
788 (record-case x
789 ((<toplevel-define>)
790 (let ((refs (reference-graph-refs graph))
791 (defs (reference-graph-defs graph)))
792 (make-reference-graph refs defs #f)))
793 (else graph)))
794
795 (lambda (graph env)
796 ;; Process the resulting reference graph: determine all private definitions
797 ;; not reachable from any public definition. Macros
798 ;; (syntax-transformers), which are globally bound, never considered
799 ;; unused since we can't tell whether a macro is actually used; in
800 ;; addition, macros are considered roots of the graph since they may use
801 ;; private bindings. FIXME: The `make-syntax-transformer' calls don't
802 ;; contain any literal `toplevel-ref' of the global bindings they use so
803 ;; this strategy fails.
804 (define (exported? name)
805 (if (module? env)
806 (module-variable (module-public-interface env) name)
807 #t))
808
809 (let-values (((public-defs private-defs)
810 (partition* (lambda (name)
811 (or (exported? name)
812 (macro-variable? name env)))
813 (reference-graph-defs graph))))
814 (let* ((roots (vhash-consq #f #t public-defs))
815 (refs (reference-graph-refs graph))
816 (reachable (graph-reachable-nodes* roots refs))
817 (unused (vlist-filter (lambda (name+src)
818 (not (vhash-assq (car name+src)
819 reachable)))
820 private-defs)))
821 (vlist-for-each (lambda (name+loc)
822 (let ((name (car name+loc))
823 (loc (cdr name+loc)))
824 (warning 'unused-toplevel loc name)))
825 unused))))
826
827 (make-reference-graph vlist-null vlist-null #f))))
828
829 \f
830 ;;;
831 ;;; Unbound variable analysis.
832 ;;;
833
834 ;; <toplevel-info> records are used during tree traversal in search of
835 ;; possibly unbound variable. They contain a list of references to
836 ;; potentially unbound top-level variables, and a list of the top-level
837 ;; defines that have been encountered.
838 (define-record-type <toplevel-info>
839 (make-toplevel-info refs defs)
840 toplevel-info?
841 (refs toplevel-info-refs) ;; ((VARIABLE-NAME . LOCATION) ...)
842 (defs toplevel-info-defs)) ;; (VARIABLE-NAME ...)
843
844 (define (goops-toplevel-definition proc args env)
845 ;; If application of PROC to ARGS is a GOOPS top-level definition, return
846 ;; the name of the variable being defined; otherwise return #f. This
847 ;; assumes knowledge of the current implementation of `define-class' et al.
848 (define (toplevel-define-arg args)
849 (and (pair? args) (pair? (cdr args)) (null? (cddr args))
850 (record-case (car args)
851 ((<const> exp)
852 (and (symbol? exp) exp))
853 (else #f))))
854
855 (record-case proc
856 ((<module-ref> mod public? name)
857 (and (equal? mod '(oop goops))
858 (not public?)
859 (eq? name 'toplevel-define!)
860 (toplevel-define-arg args)))
861 ((<toplevel-ref> name)
862 ;; This may be the result of expanding one of the GOOPS macros within
863 ;; `oop/goops.scm'.
864 (and (eq? name 'toplevel-define!)
865 (eq? env (resolve-module '(oop goops)))
866 (toplevel-define-arg args)))
867 (else #f)))
868
869 (define unbound-variable-analysis
870 ;; Report possibly unbound variables in the given tree.
871 (make-tree-analysis
872 (lambda (x info env locs)
873 ;; X is a leaf: extend INFO's refs accordingly.
874 (let ((refs (toplevel-info-refs info))
875 (defs (toplevel-info-defs info)))
876 (define (bound? name)
877 (or (and (module? env)
878 (module-variable env name))
879 (vhash-assq name defs)))
880
881 (record-case x
882 ((<toplevel-ref> name src)
883 (if (bound? name)
884 info
885 (let ((src (or src (find pair? locs))))
886 (make-toplevel-info (vhash-consq name src refs)
887 defs))))
888 (else info))))
889
890 (lambda (x info env locs)
891 ;; Going down into X.
892 (let* ((refs (toplevel-info-refs info))
893 (defs (toplevel-info-defs info))
894 (src (tree-il-src x)))
895 (define (bound? name)
896 (or (and (module? env)
897 (module-variable env name))
898 (vhash-assq name defs)))
899
900 (record-case x
901 ((<toplevel-set> name src)
902 (if (bound? name)
903 (make-toplevel-info refs defs)
904 (let ((src (find pair? locs)))
905 (make-toplevel-info (vhash-consq name src refs)
906 defs))))
907 ((<toplevel-define> name)
908 (make-toplevel-info (vhash-delete name refs eq?)
909 (vhash-consq name #t defs)))
910
911 ((<application> proc args)
912 ;; Check for a dynamic top-level definition, as is
913 ;; done by code expanded from GOOPS macros.
914 (let ((name (goops-toplevel-definition proc args
915 env)))
916 (if (symbol? name)
917 (make-toplevel-info (vhash-delete name refs
918 eq?)
919 (vhash-consq name #t defs))
920 (make-toplevel-info refs defs))))
921 (else
922 (make-toplevel-info refs defs)))))
923
924 (lambda (x info env locs)
925 ;; Leaving X's scope.
926 info)
927
928 (lambda (toplevel env)
929 ;; Post-process the result.
930 (vlist-for-each (lambda (name+loc)
931 (let ((name (car name+loc))
932 (loc (cdr name+loc)))
933 (warning 'unbound-variable loc name)))
934 (vlist-reverse (toplevel-info-refs toplevel))))
935
936 (make-toplevel-info vlist-null vlist-null)))
937
938 \f
939 ;;;
940 ;;; Arity analysis.
941 ;;;
942
943 ;; <arity-info> records contain information about lexical definitions of
944 ;; procedures currently in scope, top-level procedure definitions that have
945 ;; been encountered, and calls to top-level procedures that have been
946 ;; encountered.
947 (define-record-type <arity-info>
948 (make-arity-info toplevel-calls lexical-lambdas toplevel-lambdas)
949 arity-info?
950 (toplevel-calls toplevel-procedure-calls) ;; ((NAME . APPLICATION) ...)
951 (lexical-lambdas lexical-lambdas) ;; ((GENSYM . DEFINITION) ...)
952 (toplevel-lambdas toplevel-lambdas)) ;; ((NAME . DEFINITION) ...)
953
954 (define (validate-arity proc application lexical?)
955 ;; Validate the argument count of APPLICATION, a tree-il application of
956 ;; PROC, emitting a warning in case of argument count mismatch.
957
958 (define (filter-keyword-args keywords allow-other-keys? args)
959 ;; Filter keyword arguments from ARGS and return the resulting list.
960 ;; KEYWORDS is the list of allowed keywords, and ALLOW-OTHER-KEYS?
961 ;; specified whethere keywords not listed in KEYWORDS are allowed.
962 (let loop ((args args)
963 (result '()))
964 (if (null? args)
965 (reverse result)
966 (let ((arg (car args)))
967 (if (and (const? arg)
968 (or (memq (const-exp arg) keywords)
969 (and allow-other-keys?
970 (keyword? (const-exp arg)))))
971 (loop (if (pair? (cdr args))
972 (cddr args)
973 '())
974 result)
975 (loop (cdr args)
976 (cons arg result)))))))
977
978 (define (arities proc)
979 ;; Return the arities of PROC, which can be either a tree-il or a
980 ;; procedure.
981 (define (len x)
982 (or (and (or (null? x) (pair? x))
983 (length x))
984 0))
985 (cond ((program? proc)
986 (values (program-name proc)
987 (map (lambda (a)
988 (list (arity:nreq a) (arity:nopt a) (arity:rest? a)
989 (map car (arity:kw a))
990 (arity:allow-other-keys? a)))
991 (program-arities proc))))
992 ((procedure? proc)
993 (let ((arity (procedure-property proc 'arity)))
994 (values (procedure-name proc)
995 (list (list (car arity) (cadr arity) (caddr arity)
996 #f #f)))))
997 (else
998 (let loop ((name #f)
999 (proc proc)
1000 (arities '()))
1001 (if (not proc)
1002 (values name (reverse arities))
1003 (record-case proc
1004 ((<lambda-case> req opt rest kw alternate)
1005 (loop name alternate
1006 (cons (list (len req) (len opt) rest
1007 (and (pair? kw) (map car (cdr kw)))
1008 (and (pair? kw) (car kw)))
1009 arities)))
1010 ((<lambda> meta body)
1011 (loop (assoc-ref meta 'name) body arities))
1012 (else
1013 (values #f #f))))))))
1014
1015 (let ((args (application-args application))
1016 (src (tree-il-src application)))
1017 (call-with-values (lambda () (arities proc))
1018 (lambda (name arities)
1019 (define matches?
1020 (find (lambda (arity)
1021 (pmatch arity
1022 ((,req ,opt ,rest? ,kw ,aok?)
1023 (let ((args (if (pair? kw)
1024 (filter-keyword-args kw aok? args)
1025 args)))
1026 (if (and req opt)
1027 (let ((count (length args)))
1028 (and (>= count req)
1029 (or rest?
1030 (<= count (+ req opt)))))
1031 #t)))
1032 (else #t)))
1033 arities))
1034
1035 (if (not matches?)
1036 (warning 'arity-mismatch src
1037 (or name (with-output-to-string (lambda () (write proc))))
1038 lexical?)))))
1039 #t)
1040
1041 (define arity-analysis
1042 ;; Report arity mismatches in the given tree.
1043 (make-tree-analysis
1044 (lambda (x info env locs)
1045 ;; X is a leaf.
1046 info)
1047 (lambda (x info env locs)
1048 ;; Down into X.
1049 (define (extend lexical-name val info)
1050 ;; If VAL is a lambda, add NAME to the lexical-lambdas of INFO.
1051 (let ((toplevel-calls (toplevel-procedure-calls info))
1052 (lexical-lambdas (lexical-lambdas info))
1053 (toplevel-lambdas (toplevel-lambdas info)))
1054 (record-case val
1055 ((<lambda> body)
1056 (make-arity-info toplevel-calls
1057 (alist-cons lexical-name val
1058 lexical-lambdas)
1059 toplevel-lambdas))
1060 ((<lexical-ref> gensym)
1061 ;; lexical alias
1062 (let ((val* (assq gensym lexical-lambdas)))
1063 (if (pair? val*)
1064 (extend lexical-name (cdr val*) info)
1065 info)))
1066 ((<toplevel-ref> name)
1067 ;; top-level alias
1068 (make-arity-info toplevel-calls
1069 (alist-cons lexical-name val
1070 lexical-lambdas)
1071 toplevel-lambdas))
1072 (else info))))
1073
1074 (let ((toplevel-calls (toplevel-procedure-calls info))
1075 (lexical-lambdas (lexical-lambdas info))
1076 (toplevel-lambdas (toplevel-lambdas info)))
1077
1078 (record-case x
1079 ((<toplevel-define> name exp)
1080 (record-case exp
1081 ((<lambda> body)
1082 (make-arity-info toplevel-calls
1083 lexical-lambdas
1084 (alist-cons name exp toplevel-lambdas)))
1085 ((<toplevel-ref> name)
1086 ;; alias for another toplevel
1087 (let ((proc (assq name toplevel-lambdas)))
1088 (make-arity-info toplevel-calls
1089 lexical-lambdas
1090 (alist-cons (toplevel-define-name x)
1091 (if (pair? proc)
1092 (cdr proc)
1093 exp)
1094 toplevel-lambdas))))
1095 (else info)))
1096 ((<let> vars vals)
1097 (fold extend info vars vals))
1098 ((<letrec> vars vals)
1099 (fold extend info vars vals))
1100 ((<fix> vars vals)
1101 (fold extend info vars vals))
1102
1103 ((<application> proc args src)
1104 (record-case proc
1105 ((<lambda> body)
1106 (validate-arity proc x #t)
1107 info)
1108 ((<toplevel-ref> name)
1109 (make-arity-info (alist-cons name x toplevel-calls)
1110 lexical-lambdas
1111 toplevel-lambdas))
1112 ((<lexical-ref> gensym)
1113 (let ((proc (assq gensym lexical-lambdas)))
1114 (if (pair? proc)
1115 (record-case (cdr proc)
1116 ((<toplevel-ref> name)
1117 ;; alias to toplevel
1118 (make-arity-info (alist-cons name x toplevel-calls)
1119 lexical-lambdas
1120 toplevel-lambdas))
1121 (else
1122 (validate-arity (cdr proc) x #t)
1123 info))
1124
1125 ;; If GENSYM wasn't found, it may be because it's an
1126 ;; argument of the procedure being compiled.
1127 info)))
1128 (else info)))
1129 (else info))))
1130
1131 (lambda (x info env locs)
1132 ;; Up from X.
1133 (define (shrink name val info)
1134 ;; Remove NAME from the lexical-lambdas of INFO.
1135 (let ((toplevel-calls (toplevel-procedure-calls info))
1136 (lexical-lambdas (lexical-lambdas info))
1137 (toplevel-lambdas (toplevel-lambdas info)))
1138 (make-arity-info toplevel-calls
1139 (alist-delete name lexical-lambdas eq?)
1140 toplevel-lambdas)))
1141
1142 (let ((toplevel-calls (toplevel-procedure-calls info))
1143 (lexical-lambdas (lexical-lambdas info))
1144 (toplevel-lambdas (toplevel-lambdas info)))
1145 (record-case x
1146 ((<let> vars vals)
1147 (fold shrink info vars vals))
1148 ((<letrec> vars vals)
1149 (fold shrink info vars vals))
1150 ((<fix> vars vals)
1151 (fold shrink info vars vals))
1152
1153 (else info))))
1154
1155 (lambda (result env)
1156 ;; Post-processing: check all top-level procedure calls that have been
1157 ;; encountered.
1158 (let ((toplevel-calls (toplevel-procedure-calls result))
1159 (toplevel-lambdas (toplevel-lambdas result)))
1160 (for-each (lambda (name+application)
1161 (let* ((name (car name+application))
1162 (application (cdr name+application))
1163 (proc
1164 (or (assoc-ref toplevel-lambdas name)
1165 (and (module? env)
1166 (false-if-exception
1167 (module-ref env name)))))
1168 (proc*
1169 ;; handle toplevel aliases
1170 (if (toplevel-ref? proc)
1171 (let ((name (toplevel-ref-name proc)))
1172 (and (module? env)
1173 (false-if-exception
1174 (module-ref env name))))
1175 proc)))
1176 ;; (format #t "toplevel-call to ~A (~A) from ~A~%"
1177 ;; name proc* application)
1178 (if (or (lambda? proc*) (procedure? proc*))
1179 (validate-arity proc* application (lambda? proc*)))))
1180 toplevel-calls)))
1181
1182 (make-arity-info '() '() '())))