abort always dispatches to VM bytecode, to detect same-invocation aborts
[bpt/guile.git] / module / language / tree-il / analyze.scm
1 ;;; TREE-IL -> GLIL compiler
2
3 ;; Copyright (C) 2001,2008,2009,2010 Free Software Foundation, Inc.
4
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
9 ;;;;
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Code:
20
21 (define-module (language tree-il analyze)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-9)
24 #:use-module (srfi srfi-11)
25 #:use-module (ice-9 vlist)
26 #:use-module (system base syntax)
27 #:use-module (system base message)
28 #:use-module (system vm program)
29 #:use-module (language tree-il)
30 #:use-module (system base pmatch)
31 #:export (analyze-lexicals
32 analyze-tree
33 unused-variable-analysis
34 unused-toplevel-analysis
35 unbound-variable-analysis
36 arity-analysis))
37
38 ;; Allocation is the process of assigning storage locations for lexical
39 ;; variables. A lexical variable has a distinct "address", or storage
40 ;; location, for each procedure in which it is referenced.
41 ;;
42 ;; A variable is "local", i.e., allocated on the stack, if it is
43 ;; referenced from within the procedure that defined it. Otherwise it is
44 ;; a "closure" variable. For example:
45 ;;
46 ;; (lambda (a) a) ; a will be local
47 ;; `a' is local to the procedure.
48 ;;
49 ;; (lambda (a) (lambda () a))
50 ;; `a' is local to the outer procedure, but a closure variable with
51 ;; respect to the inner procedure.
52 ;;
53 ;; If a variable is ever assigned, it needs to be heap-allocated
54 ;; ("boxed"). This is so that closures and continuations capture the
55 ;; variable's identity, not just one of the values it may have over the
56 ;; course of program execution. If the variable is never assigned, there
57 ;; is no distinction between value and identity, so closing over its
58 ;; identity (whether through closures or continuations) can make a copy
59 ;; of its value instead.
60 ;;
61 ;; Local variables are stored on the stack within a procedure's call
62 ;; frame. Their index into the stack is determined from their linear
63 ;; postion within a procedure's binding path:
64 ;; (let (0 1)
65 ;; (let (2 3) ...)
66 ;; (let (2) ...))
67 ;; (let (2 3 4) ...))
68 ;; etc.
69 ;;
70 ;; This algorithm has the problem that variables are only allocated
71 ;; indices at the end of the binding path. If variables bound early in
72 ;; the path are not used in later portions of the path, their indices
73 ;; will not be recycled. This problem is particularly egregious in the
74 ;; expansion of `or':
75 ;;
76 ;; (or x y z)
77 ;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
78 ;;
79 ;; As you can see, the `a' binding is only used in the ephemeral
80 ;; `consequent' clause of the first `if', but its index would be
81 ;; reserved for the whole of the `or' expansion. So we have a hack for
82 ;; this specific case. A proper solution would be some sort of liveness
83 ;; analysis, and not our linear allocation algorithm.
84 ;;
85 ;; Closure variables are captured when a closure is created, and stored in a
86 ;; vector inline to the closure object itself. Each closure variable has a
87 ;; unique index into that vector.
88 ;;
89 ;; There is one more complication. Procedures bound by <fix> may, in
90 ;; some cases, be rendered inline to their parent procedure. That is to
91 ;; say,
92 ;;
93 ;; (letrec ((lp (lambda () (lp)))) (lp))
94 ;; => (fix ((lp (lambda () (lp)))) (lp))
95 ;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
96 ;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
97 ;;
98 ;; The upshot is that we don't have to allocate any space for the `lp'
99 ;; closure at all, as it can be rendered inline as a loop. So there is
100 ;; another kind of allocation, "label allocation", in which the
101 ;; procedure is simply a label, placed at the start of the lambda body.
102 ;; The label is the gensym under which the lambda expression is bound.
103 ;;
104 ;; The analyzer checks to see that the label is called with the correct
105 ;; number of arguments. Calls to labels compile to rename + goto.
106 ;; Lambda, the ultimate goto!
107 ;;
108 ;;
109 ;; The return value of `analyze-lexicals' is a hash table, the
110 ;; "allocation".
111 ;;
112 ;; The allocation maps gensyms -- recall that each lexically bound
113 ;; variable has a unique gensym -- to storage locations ("addresses").
114 ;; Since one gensym may have many storage locations, if it is referenced
115 ;; in many procedures, it is a two-level map.
116 ;;
117 ;; The allocation also stored information on how many local variables
118 ;; need to be allocated for each procedure, lexicals that have been
119 ;; translated into labels, and information on what free variables to
120 ;; capture from its lexical parent procedure.
121 ;;
122 ;; In addition, we have a conflation: while we're traversing the code,
123 ;; recording information to pass to the compiler, we take the
124 ;; opportunity to generate labels for each lambda-case clause, so that
125 ;; generated code can skip argument checks at runtime if they match at
126 ;; compile-time.
127 ;;
128 ;; Also, while we're a-traversing and an-allocating, we check prompt
129 ;; handlers to see if the "continuation" argument is used. If not, we
130 ;; mark the prompt as being "escape-only". This allows us to implement
131 ;; `catch' and `throw' using `prompt' and `control', but without causing
132 ;; a continuation to be reified. Heh heh.
133 ;;
134 ;; That is:
135 ;;
136 ;; sym -> {lambda -> address}
137 ;; lambda -> (labels . free-locs)
138 ;; lambda-case -> (gensym . nlocs)
139 ;; prompt -> escape-only?
140 ;;
141 ;; address ::= (local? boxed? . index)
142 ;; labels ::= ((sym . lambda) ...)
143 ;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
144 ;; free variable addresses are relative to parent proc.
145
146 (define (make-hashq k v)
147 (let ((res (make-hash-table)))
148 (hashq-set! res k v)
149 res))
150
151 (define (analyze-lexicals x)
152 ;; bound-vars: lambda -> (sym ...)
153 ;; all identifiers bound within a lambda
154 (define bound-vars (make-hash-table))
155 ;; free-vars: lambda -> (sym ...)
156 ;; all identifiers referenced in a lambda, but not bound
157 ;; NB, this includes identifiers referenced by contained lambdas
158 (define free-vars (make-hash-table))
159 ;; assigned: sym -> #t
160 ;; variables that are assigned
161 (define assigned (make-hash-table))
162 ;; refcounts: sym -> count
163 ;; allows us to detect the or-expansion in O(1) time
164 (define refcounts (make-hash-table))
165 ;; labels: sym -> lambda
166 ;; for determining if fixed-point procedures can be rendered as
167 ;; labels.
168 (define labels (make-hash-table))
169
170 ;; returns variables referenced in expr
171 (define (analyze! x proc labels-in-proc tail? tail-call-args)
172 (define (step y) (analyze! y proc labels-in-proc #f #f))
173 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
174 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
175 (and tail? args)))
176 (define (recur/labels x new-proc labels)
177 (analyze! x new-proc (append labels labels-in-proc) #t #f))
178 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
179 (record-case x
180 ((<application> proc args)
181 (apply lset-union eq? (step-tail-call proc args)
182 (map step args)))
183
184 ((<conditional> test consequent alternate)
185 (lset-union eq? (step test) (step-tail consequent) (step-tail alternate)))
186
187 ((<lexical-ref> gensym)
188 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
189 (if (not (and tail-call-args
190 (memq gensym labels-in-proc)
191 (let ((p (hashq-ref labels gensym)))
192 (and p
193 (let lp ((c (lambda-body p)))
194 (and c (lambda-case? c)
195 (or
196 ;; for now prohibit optional &
197 ;; keyword arguments; can relax this
198 ;; restriction later
199 (and (= (length (lambda-case-req c))
200 (length tail-call-args))
201 (not (lambda-case-opt c))
202 (not (lambda-case-kw c))
203 (not (lambda-case-rest c)))
204 (lp (lambda-case-alternate c)))))))))
205 (hashq-set! labels gensym #f))
206 (list gensym))
207
208 ((<lexical-set> gensym exp)
209 (hashq-set! assigned gensym #t)
210 (hashq-set! labels gensym #f)
211 (lset-adjoin eq? (step exp) gensym))
212
213 ((<module-set> exp)
214 (step exp))
215
216 ((<toplevel-set> exp)
217 (step exp))
218
219 ((<toplevel-define> exp)
220 (step exp))
221
222 ((<sequence> exps)
223 (let lp ((exps exps) (ret '()))
224 (cond ((null? exps) '())
225 ((null? (cdr exps))
226 (lset-union eq? ret (step-tail (car exps))))
227 (else
228 (lp (cdr exps) (lset-union eq? ret (step (car exps))))))))
229
230 ((<lambda> body)
231 ;; order is important here
232 (hashq-set! bound-vars x '())
233 (let ((free (recur body x)))
234 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
235 (hashq-set! free-vars x free)
236 free))
237
238 ((<lambda-case> opt kw inits vars body alternate)
239 (hashq-set! bound-vars proc
240 (append (reverse vars) (hashq-ref bound-vars proc)))
241 (lset-union
242 eq?
243 (lset-difference eq?
244 (lset-union eq?
245 (apply lset-union eq? (map step inits))
246 (step-tail body))
247 vars)
248 (if alternate (step-tail alternate) '())))
249
250 ((<let> vars vals body)
251 (hashq-set! bound-vars proc
252 (append (reverse vars) (hashq-ref bound-vars proc)))
253 (lset-difference eq?
254 (apply lset-union eq? (step-tail body) (map step vals))
255 vars))
256
257 ((<letrec> vars vals body)
258 (hashq-set! bound-vars proc
259 (append (reverse vars) (hashq-ref bound-vars proc)))
260 (for-each (lambda (sym) (hashq-set! assigned sym #t)) vars)
261 (lset-difference eq?
262 (apply lset-union eq? (step-tail body) (map step vals))
263 vars))
264
265 ((<fix> vars vals body)
266 ;; Try to allocate these procedures as labels.
267 (for-each (lambda (sym val) (hashq-set! labels sym val))
268 vars vals)
269 (hashq-set! bound-vars proc
270 (append (reverse vars) (hashq-ref bound-vars proc)))
271 ;; Step into subexpressions.
272 (let* ((var-refs
273 (map
274 ;; Since we're trying to label-allocate the lambda,
275 ;; pretend it's not a closure, and just recurse into its
276 ;; body directly. (Otherwise, recursing on a closure
277 ;; that references one of the fix's bound vars would
278 ;; prevent label allocation.)
279 (lambda (x)
280 (record-case x
281 ((<lambda> body)
282 ;; just like the closure case, except here we use
283 ;; recur/labels instead of recur
284 (hashq-set! bound-vars x '())
285 (let ((free (recur/labels body x vars)))
286 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
287 (hashq-set! free-vars x free)
288 free))))
289 vals))
290 (vars-with-refs (map cons vars var-refs))
291 (body-refs (recur/labels body proc vars)))
292 (define (delabel-dependents! sym)
293 (let ((refs (assq-ref vars-with-refs sym)))
294 (if refs
295 (for-each (lambda (sym)
296 (if (hashq-ref labels sym)
297 (begin
298 (hashq-set! labels sym #f)
299 (delabel-dependents! sym))))
300 refs))))
301 ;; Stepping into the lambdas and the body might have made some
302 ;; procedures not label-allocatable -- which might have
303 ;; knock-on effects. For example:
304 ;; (fix ((a (lambda () (b)))
305 ;; (b (lambda () a)))
306 ;; (a))
307 ;; As far as `a' is concerned, both `a' and `b' are
308 ;; label-allocatable. But `b' references `a' not in a proc-tail
309 ;; position, which makes `a' not label-allocatable. The
310 ;; knock-on effect is that, when back-propagating this
311 ;; information to `a', `b' will also become not
312 ;; label-allocatable, as it is referenced within `a', which is
313 ;; allocated as a closure. This is a transitive relationship.
314 (for-each (lambda (sym)
315 (if (not (hashq-ref labels sym))
316 (delabel-dependents! sym)))
317 vars)
318 ;; Now lift bound variables with label-allocated lambdas to the
319 ;; parent procedure.
320 (for-each
321 (lambda (sym val)
322 (if (hashq-ref labels sym)
323 ;; Remove traces of the label-bound lambda. The free
324 ;; vars will propagate up via the return val.
325 (begin
326 (hashq-set! bound-vars proc
327 (append (hashq-ref bound-vars val)
328 (hashq-ref bound-vars proc)))
329 (hashq-remove! bound-vars val)
330 (hashq-remove! free-vars val))))
331 vars vals)
332 (lset-difference eq?
333 (apply lset-union eq? body-refs var-refs)
334 vars)))
335
336 ((<let-values> exp body)
337 (lset-union eq? (step exp) (step body)))
338
339 ((<dynwind> body winder unwinder)
340 (lset-union eq? (step body) (step winder) (step unwinder)))
341
342 ((<dynlet> fluids vals body)
343 (apply lset-union eq? (step body) (map step (append fluids vals))))
344
345 ((<dynref> fluid)
346 (step fluid))
347
348 ((<dynset> fluid exp)
349 (lset-union eq? (step fluid) (step exp)))
350
351 ((<prompt> tag body handler)
352 (lset-union eq? (step tag) (step body) (step handler)))
353
354 ((<abort> tag args tail)
355 (apply lset-union eq? (step tag) (step tail) (map step args)))
356
357 (else '())))
358
359 ;; allocation: sym -> {lambda -> address}
360 ;; lambda -> (nlocs labels . free-locs)
361 (define allocation (make-hash-table))
362
363 (define (allocate! x proc n)
364 (define (recur y) (allocate! y proc n))
365 (record-case x
366 ((<application> proc args)
367 (apply max (recur proc) (map recur args)))
368
369 ((<conditional> test consequent alternate)
370 (max (recur test) (recur consequent) (recur alternate)))
371
372 ((<lexical-set> exp)
373 (recur exp))
374
375 ((<module-set> exp)
376 (recur exp))
377
378 ((<toplevel-set> exp)
379 (recur exp))
380
381 ((<toplevel-define> exp)
382 (recur exp))
383
384 ((<sequence> exps)
385 (apply max (map recur exps)))
386
387 ((<lambda> body)
388 ;; allocate closure vars in order
389 (let lp ((c (hashq-ref free-vars x)) (n 0))
390 (if (pair? c)
391 (begin
392 (hashq-set! (hashq-ref allocation (car c))
393 x
394 `(#f ,(hashq-ref assigned (car c)) . ,n))
395 (lp (cdr c) (1+ n)))))
396
397 (let ((nlocs (allocate! body x 0))
398 (free-addresses
399 (map (lambda (v)
400 (hashq-ref (hashq-ref allocation v) proc))
401 (hashq-ref free-vars x)))
402 (labels (filter cdr
403 (map (lambda (sym)
404 (cons sym (hashq-ref labels sym)))
405 (hashq-ref bound-vars x)))))
406 ;; set procedure allocations
407 (hashq-set! allocation x (cons labels free-addresses)))
408 n)
409
410 ((<lambda-case> opt kw inits vars body alternate)
411 (max
412 (let lp ((vars vars) (n n))
413 (if (null? vars)
414 (let ((nlocs (apply
415 max
416 (allocate! body proc n)
417 ;; inits not logically at the end, but they
418 ;; are the list...
419 (map (lambda (x) (allocate! x proc n)) inits))))
420 ;; label and nlocs for the case
421 (hashq-set! allocation x (cons (gensym ":LCASE") nlocs))
422 nlocs)
423 (begin
424 (hashq-set! allocation (car vars)
425 (make-hashq
426 proc `(#t ,(hashq-ref assigned (car vars)) . ,n)))
427 (lp (cdr vars) (1+ n)))))
428 (if alternate (allocate! alternate proc n) n)))
429
430 ((<let> vars vals body)
431 (let ((nmax (apply max (map recur vals))))
432 (cond
433 ;; the `or' hack
434 ((and (conditional? body)
435 (= (length vars) 1)
436 (let ((v (car vars)))
437 (and (not (hashq-ref assigned v))
438 (= (hashq-ref refcounts v 0) 2)
439 (lexical-ref? (conditional-test body))
440 (eq? (lexical-ref-gensym (conditional-test body)) v)
441 (lexical-ref? (conditional-consequent body))
442 (eq? (lexical-ref-gensym (conditional-consequent body)) v))))
443 (hashq-set! allocation (car vars)
444 (make-hashq proc `(#t #f . ,n)))
445 ;; the 1+ for this var
446 (max nmax (1+ n) (allocate! (conditional-alternate body) proc n)))
447 (else
448 (let lp ((vars vars) (n n))
449 (if (null? vars)
450 (max nmax (allocate! body proc n))
451 (let ((v (car vars)))
452 (hashq-set!
453 allocation v
454 (make-hashq proc
455 `(#t ,(hashq-ref assigned v) . ,n)))
456 (lp (cdr vars) (1+ n)))))))))
457
458 ((<letrec> vars vals body)
459 (let lp ((vars vars) (n n))
460 (if (null? vars)
461 (let ((nmax (apply max
462 (map (lambda (x)
463 (allocate! x proc n))
464 vals))))
465 (max nmax (allocate! body proc n)))
466 (let ((v (car vars)))
467 (hashq-set!
468 allocation v
469 (make-hashq proc
470 `(#t ,(hashq-ref assigned v) . ,n)))
471 (lp (cdr vars) (1+ n))))))
472
473 ((<fix> vars vals body)
474 (let lp ((in vars) (n n))
475 (if (null? in)
476 (let lp ((vars vars) (vals vals) (nmax n))
477 (cond
478 ((null? vars)
479 (max nmax (allocate! body proc n)))
480 ((hashq-ref labels (car vars))
481 ;; allocate lambda body inline to proc
482 (lp (cdr vars)
483 (cdr vals)
484 (record-case (car vals)
485 ((<lambda> body)
486 (max nmax (allocate! body proc n))))))
487 (else
488 ;; allocate closure
489 (lp (cdr vars)
490 (cdr vals)
491 (max nmax (allocate! (car vals) proc n))))))
492
493 (let ((v (car in)))
494 (cond
495 ((hashq-ref assigned v)
496 (error "fixpoint procedures may not be assigned" x))
497 ((hashq-ref labels v)
498 ;; no binding, it's a label
499 (lp (cdr in) n))
500 (else
501 ;; allocate closure binding
502 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
503 (lp (cdr in) (1+ n))))))))
504
505 ((<let-values> exp body)
506 (max (recur exp) (recur body)))
507
508 ((<dynwind> body winder unwinder)
509 (max (recur body) (recur winder) (recur unwinder)))
510
511 ((<dynlet> fluids vals body)
512 (apply max (recur body) (map recur (append fluids vals))))
513
514 ((<dynref> fluid)
515 (recur fluid))
516
517 ((<dynset> fluid exp)
518 (max (recur fluid) (recur exp)))
519
520 ((<prompt> tag body handler)
521 (let ((cont-var (and (lambda-case? handler)
522 (pair? (lambda-case-vars handler))
523 (car (lambda-case-vars handler)))))
524 (hashq-set! allocation x
525 (and cont-var (zero? (hashq-ref refcounts cont-var 0))))
526 (max (recur tag) (recur body) (recur handler))))
527
528 ((<abort> tag args tail)
529 (apply max (recur tag) (recur tail) (map recur args)))
530
531 (else n)))
532
533 (analyze! x #f '() #t #f)
534 (allocate! x #f 0)
535
536 allocation)
537
538 \f
539 ;;;
540 ;;; Tree analyses for warnings.
541 ;;;
542
543 (define-record-type <tree-analysis>
544 (make-tree-analysis leaf down up post init)
545 tree-analysis?
546 (leaf tree-analysis-leaf) ;; (lambda (x result env locs) ...)
547 (down tree-analysis-down) ;; (lambda (x result env locs) ...)
548 (up tree-analysis-up) ;; (lambda (x result env locs) ...)
549 (post tree-analysis-post) ;; (lambda (result env) ...)
550 (init tree-analysis-init)) ;; arbitrary value
551
552 (define (analyze-tree analyses tree env)
553 "Run all tree analyses listed in ANALYSES on TREE for ENV, using
554 `tree-il-fold'. Return TREE. The leaf/down/up procedures of each analysis are
555 passed a ``location stack', which is the stack of `tree-il-src' values for each
556 parent tree (a list); it can be used to approximate source location when
557 accurate information is missing from a given `tree-il' element."
558
559 (define (traverse proc update-locs)
560 ;; Return a tree traversing procedure that returns a list of analysis
561 ;; results prepended by the location stack.
562 (lambda (x results)
563 (let ((locs (update-locs x (car results))))
564 (cons locs ;; the location stack
565 (map (lambda (analysis result)
566 ((proc analysis) x result env locs))
567 analyses
568 (cdr results))))))
569
570 ;; Keeping/extending/shrinking the location stack.
571 (define (keep-locs x locs) locs)
572 (define (extend-locs x locs) (cons (tree-il-src x) locs))
573 (define (shrink-locs x locs) (cdr locs))
574
575 (let ((results
576 (tree-il-fold (traverse tree-analysis-leaf keep-locs)
577 (traverse tree-analysis-down extend-locs)
578 (traverse tree-analysis-up shrink-locs)
579 (cons '() ;; empty location stack
580 (map tree-analysis-init analyses))
581 tree)))
582
583 (for-each (lambda (analysis result)
584 ((tree-analysis-post analysis) result env))
585 analyses
586 (cdr results)))
587
588 tree)
589
590 \f
591 ;;;
592 ;;; Unused variable analysis.
593 ;;;
594
595 ;; <binding-info> records are used during tree traversals in
596 ;; `unused-variable-analysis'. They contain a list of the local vars
597 ;; currently in scope, and a list of locals vars that have been referenced.
598 (define-record-type <binding-info>
599 (make-binding-info vars refs)
600 binding-info?
601 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
602 (refs binding-info-refs)) ;; (GENSYM ...)
603
604 (define unused-variable-analysis
605 ;; Report unused variables in the given tree.
606 (make-tree-analysis
607 (lambda (x info env locs)
608 ;; X is a leaf: extend INFO's refs accordingly.
609 (let ((refs (binding-info-refs info))
610 (vars (binding-info-vars info)))
611 (record-case x
612 ((<lexical-ref> gensym)
613 (make-binding-info vars (vhash-consq gensym #t refs)))
614 (else info))))
615
616 (lambda (x info env locs)
617 ;; Going down into X: extend INFO's variable list
618 ;; accordingly.
619 (let ((refs (binding-info-refs info))
620 (vars (binding-info-vars info))
621 (src (tree-il-src x)))
622 (define (extend inner-vars inner-names)
623 (fold (lambda (var name vars)
624 (vhash-consq var (list name src) vars))
625 vars
626 inner-vars
627 inner-names))
628
629 (record-case x
630 ((<lexical-set> gensym)
631 (make-binding-info vars (vhash-consq gensym #t refs)))
632 ((<lambda-case> req opt inits rest kw vars)
633 (let ((names `(,@req
634 ,@(or opt '())
635 ,@(if rest (list rest) '())
636 ,@(if kw (map cadr (cdr kw)) '()))))
637 (make-binding-info (extend vars names) refs)))
638 ((<let> vars names)
639 (make-binding-info (extend vars names) refs))
640 ((<letrec> vars names)
641 (make-binding-info (extend vars names) refs))
642 ((<fix> vars names)
643 (make-binding-info (extend vars names) refs))
644 (else info))))
645
646 (lambda (x info env locs)
647 ;; Leaving X's scope: shrink INFO's variable list
648 ;; accordingly and reported unused nested variables.
649 (let ((refs (binding-info-refs info))
650 (vars (binding-info-vars info)))
651 (define (shrink inner-vars refs)
652 (vlist-for-each
653 (lambda (var)
654 (let ((gensym (car var)))
655 ;; Don't report lambda parameters as unused.
656 (if (and (memq gensym inner-vars)
657 (not (vhash-assq gensym refs))
658 (not (lambda-case? x)))
659 (let ((name (cadr var))
660 ;; We can get approximate source location by going up
661 ;; the LOCS location stack.
662 (loc (or (caddr var)
663 (find pair? locs))))
664 (warning 'unused-variable loc name)))))
665 vars)
666 (vlist-drop vars (length inner-vars)))
667
668 ;; For simplicity, we leave REFS untouched, i.e., with
669 ;; names of variables that are now going out of scope.
670 ;; It doesn't hurt as these are unique names, it just
671 ;; makes REFS unnecessarily fat.
672 (record-case x
673 ((<lambda-case> vars)
674 (make-binding-info (shrink vars refs) refs))
675 ((<let> vars)
676 (make-binding-info (shrink vars refs) refs))
677 ((<letrec> vars)
678 (make-binding-info (shrink vars refs) refs))
679 ((<fix> vars)
680 (make-binding-info (shrink vars refs) refs))
681 (else info))))
682
683 (lambda (result env) #t)
684 (make-binding-info vlist-null vlist-null)))
685
686 \f
687 ;;;
688 ;;; Unused top-level variable analysis.
689 ;;;
690
691 ;; <reference-graph> record top-level definitions that are made, references to
692 ;; top-level definitions and their context (the top-level definition in which
693 ;; the reference appears), as well as the current context (the top-level
694 ;; definition we're currently in). The second part (`refs' below) is
695 ;; effectively a graph from which we can determine unused top-level definitions.
696 (define-record-type <reference-graph>
697 (make-reference-graph refs defs toplevel-context)
698 reference-graph?
699 (defs reference-graph-defs) ;; ((NAME . LOC) ...)
700 (refs reference-graph-refs) ;; ((REF-CONTEXT REF ...) ...)
701 (toplevel-context reference-graph-toplevel-context)) ;; NAME | #f
702
703 (define (graph-reachable-nodes root refs reachable)
704 ;; Add to REACHABLE the nodes reachable from ROOT in graph REFS. REFS is a
705 ;; vhash mapping nodes to the list of their children: for instance,
706 ;; ((A -> (B C)) (B -> (A)) (C -> ())) corresponds to
707 ;;
708 ;; ,-------.
709 ;; v |
710 ;; A ----> B
711 ;; |
712 ;; v
713 ;; C
714 ;;
715 ;; REACHABLE is a vhash of nodes known to be otherwise reachable.
716
717 (let loop ((root root)
718 (path vlist-null)
719 (result reachable))
720 (if (or (vhash-assq root path)
721 (vhash-assq root result))
722 result
723 (let* ((children (or (and=> (vhash-assq root refs) cdr) '()))
724 (path (vhash-consq root #t path))
725 (result (fold (lambda (kid result)
726 (loop kid path result))
727 result
728 children)))
729 (fold (lambda (kid result)
730 (vhash-consq kid #t result))
731 result
732 children)))))
733
734 (define (graph-reachable-nodes* roots refs)
735 ;; Return the list of nodes in REFS reachable from the nodes listed in ROOTS.
736 (vlist-fold (lambda (root+true result)
737 (let* ((root (car root+true))
738 (reachable (graph-reachable-nodes root refs result)))
739 (vhash-consq root #t reachable)))
740 vlist-null
741 roots))
742
743 (define (partition* pred vhash)
744 ;; Partition VHASH according to PRED. Return the two resulting vhashes.
745 (let ((result
746 (vlist-fold (lambda (k+v result)
747 (let ((k (car k+v))
748 (v (cdr k+v))
749 (r1 (car result))
750 (r2 (cdr result)))
751 (if (pred k)
752 (cons (vhash-consq k v r1) r2)
753 (cons r1 (vhash-consq k v r2)))))
754 (cons vlist-null vlist-null)
755 vhash)))
756 (values (car result) (cdr result))))
757
758 (define unused-toplevel-analysis
759 ;; Report unused top-level definitions that are not exported.
760 (let ((add-ref-from-context
761 (lambda (graph name)
762 ;; Add an edge CTX -> NAME in GRAPH.
763 (let* ((refs (reference-graph-refs graph))
764 (defs (reference-graph-defs graph))
765 (ctx (reference-graph-toplevel-context graph))
766 (ctx-refs (or (and=> (vhash-assq ctx refs) cdr) '())))
767 (make-reference-graph (vhash-consq ctx (cons name ctx-refs) refs)
768 defs ctx)))))
769 (define (macro-variable? name env)
770 (and (module? env)
771 (let ((var (module-variable env name)))
772 (and var (variable-bound? var)
773 (macro? (variable-ref var))))))
774
775 (make-tree-analysis
776 (lambda (x graph env locs)
777 ;; X is a leaf.
778 (let ((ctx (reference-graph-toplevel-context graph)))
779 (record-case x
780 ((<toplevel-ref> name src)
781 (add-ref-from-context graph name))
782 (else graph))))
783
784 (lambda (x graph env locs)
785 ;; Going down into X.
786 (let ((ctx (reference-graph-toplevel-context graph))
787 (refs (reference-graph-refs graph))
788 (defs (reference-graph-defs graph)))
789 (record-case x
790 ((<toplevel-define> name src)
791 (let ((refs refs)
792 (defs (vhash-consq name (or src (find pair? locs))
793 defs)))
794 (make-reference-graph refs defs name)))
795 ((<toplevel-set> name src)
796 (add-ref-from-context graph name))
797 (else graph))))
798
799 (lambda (x graph env locs)
800 ;; Leaving X's scope.
801 (record-case x
802 ((<toplevel-define>)
803 (let ((refs (reference-graph-refs graph))
804 (defs (reference-graph-defs graph)))
805 (make-reference-graph refs defs #f)))
806 (else graph)))
807
808 (lambda (graph env)
809 ;; Process the resulting reference graph: determine all private definitions
810 ;; not reachable from any public definition. Macros
811 ;; (syntax-transformers), which are globally bound, never considered
812 ;; unused since we can't tell whether a macro is actually used; in
813 ;; addition, macros are considered roots of the graph since they may use
814 ;; private bindings. FIXME: The `make-syntax-transformer' calls don't
815 ;; contain any literal `toplevel-ref' of the global bindings they use so
816 ;; this strategy fails.
817 (define (exported? name)
818 (if (module? env)
819 (module-variable (module-public-interface env) name)
820 #t))
821
822 (let-values (((public-defs private-defs)
823 (partition* (lambda (name)
824 (or (exported? name)
825 (macro-variable? name env)))
826 (reference-graph-defs graph))))
827 (let* ((roots (vhash-consq #f #t public-defs))
828 (refs (reference-graph-refs graph))
829 (reachable (graph-reachable-nodes* roots refs))
830 (unused (vlist-filter (lambda (name+src)
831 (not (vhash-assq (car name+src)
832 reachable)))
833 private-defs)))
834 (vlist-for-each (lambda (name+loc)
835 (let ((name (car name+loc))
836 (loc (cdr name+loc)))
837 (warning 'unused-toplevel loc name)))
838 unused))))
839
840 (make-reference-graph vlist-null vlist-null #f))))
841
842 \f
843 ;;;
844 ;;; Unbound variable analysis.
845 ;;;
846
847 ;; <toplevel-info> records are used during tree traversal in search of
848 ;; possibly unbound variable. They contain a list of references to
849 ;; potentially unbound top-level variables, and a list of the top-level
850 ;; defines that have been encountered.
851 (define-record-type <toplevel-info>
852 (make-toplevel-info refs defs)
853 toplevel-info?
854 (refs toplevel-info-refs) ;; ((VARIABLE-NAME . LOCATION) ...)
855 (defs toplevel-info-defs)) ;; (VARIABLE-NAME ...)
856
857 (define (goops-toplevel-definition proc args env)
858 ;; If application of PROC to ARGS is a GOOPS top-level definition, return
859 ;; the name of the variable being defined; otherwise return #f. This
860 ;; assumes knowledge of the current implementation of `define-class' et al.
861 (define (toplevel-define-arg args)
862 (and (pair? args) (pair? (cdr args)) (null? (cddr args))
863 (record-case (car args)
864 ((<const> exp)
865 (and (symbol? exp) exp))
866 (else #f))))
867
868 (record-case proc
869 ((<module-ref> mod public? name)
870 (and (equal? mod '(oop goops))
871 (not public?)
872 (eq? name 'toplevel-define!)
873 (toplevel-define-arg args)))
874 ((<toplevel-ref> name)
875 ;; This may be the result of expanding one of the GOOPS macros within
876 ;; `oop/goops.scm'.
877 (and (eq? name 'toplevel-define!)
878 (eq? env (resolve-module '(oop goops)))
879 (toplevel-define-arg args)))
880 (else #f)))
881
882 (define unbound-variable-analysis
883 ;; Report possibly unbound variables in the given tree.
884 (make-tree-analysis
885 (lambda (x info env locs)
886 ;; X is a leaf: extend INFO's refs accordingly.
887 (let ((refs (toplevel-info-refs info))
888 (defs (toplevel-info-defs info)))
889 (define (bound? name)
890 (or (and (module? env)
891 (module-variable env name))
892 (vhash-assq name defs)))
893
894 (record-case x
895 ((<toplevel-ref> name src)
896 (if (bound? name)
897 info
898 (let ((src (or src (find pair? locs))))
899 (make-toplevel-info (vhash-consq name src refs)
900 defs))))
901 (else info))))
902
903 (lambda (x info env locs)
904 ;; Going down into X.
905 (let* ((refs (toplevel-info-refs info))
906 (defs (toplevel-info-defs info))
907 (src (tree-il-src x)))
908 (define (bound? name)
909 (or (and (module? env)
910 (module-variable env name))
911 (vhash-assq name defs)))
912
913 (record-case x
914 ((<toplevel-set> name src)
915 (if (bound? name)
916 (make-toplevel-info refs defs)
917 (let ((src (find pair? locs)))
918 (make-toplevel-info (vhash-consq name src refs)
919 defs))))
920 ((<toplevel-define> name)
921 (make-toplevel-info (vhash-delete name refs eq?)
922 (vhash-consq name #t defs)))
923
924 ((<application> proc args)
925 ;; Check for a dynamic top-level definition, as is
926 ;; done by code expanded from GOOPS macros.
927 (let ((name (goops-toplevel-definition proc args
928 env)))
929 (if (symbol? name)
930 (make-toplevel-info (vhash-delete name refs
931 eq?)
932 (vhash-consq name #t defs))
933 (make-toplevel-info refs defs))))
934 (else
935 (make-toplevel-info refs defs)))))
936
937 (lambda (x info env locs)
938 ;; Leaving X's scope.
939 info)
940
941 (lambda (toplevel env)
942 ;; Post-process the result.
943 (vlist-for-each (lambda (name+loc)
944 (let ((name (car name+loc))
945 (loc (cdr name+loc)))
946 (warning 'unbound-variable loc name)))
947 (vlist-reverse (toplevel-info-refs toplevel))))
948
949 (make-toplevel-info vlist-null vlist-null)))
950
951 \f
952 ;;;
953 ;;; Arity analysis.
954 ;;;
955
956 ;; <arity-info> records contain information about lexical definitions of
957 ;; procedures currently in scope, top-level procedure definitions that have
958 ;; been encountered, and calls to top-level procedures that have been
959 ;; encountered.
960 (define-record-type <arity-info>
961 (make-arity-info toplevel-calls lexical-lambdas toplevel-lambdas)
962 arity-info?
963 (toplevel-calls toplevel-procedure-calls) ;; ((NAME . APPLICATION) ...)
964 (lexical-lambdas lexical-lambdas) ;; ((GENSYM . DEFINITION) ...)
965 (toplevel-lambdas toplevel-lambdas)) ;; ((NAME . DEFINITION) ...)
966
967 (define (validate-arity proc application lexical?)
968 ;; Validate the argument count of APPLICATION, a tree-il application of
969 ;; PROC, emitting a warning in case of argument count mismatch.
970
971 (define (filter-keyword-args keywords allow-other-keys? args)
972 ;; Filter keyword arguments from ARGS and return the resulting list.
973 ;; KEYWORDS is the list of allowed keywords, and ALLOW-OTHER-KEYS?
974 ;; specified whethere keywords not listed in KEYWORDS are allowed.
975 (let loop ((args args)
976 (result '()))
977 (if (null? args)
978 (reverse result)
979 (let ((arg (car args)))
980 (if (and (const? arg)
981 (or (memq (const-exp arg) keywords)
982 (and allow-other-keys?
983 (keyword? (const-exp arg)))))
984 (loop (if (pair? (cdr args))
985 (cddr args)
986 '())
987 result)
988 (loop (cdr args)
989 (cons arg result)))))))
990
991 (define (arities proc)
992 ;; Return the arities of PROC, which can be either a tree-il or a
993 ;; procedure.
994 (define (len x)
995 (or (and (or (null? x) (pair? x))
996 (length x))
997 0))
998 (cond ((program? proc)
999 (values (program-name proc)
1000 (map (lambda (a)
1001 (list (arity:nreq a) (arity:nopt a) (arity:rest? a)
1002 (map car (arity:kw a))
1003 (arity:allow-other-keys? a)))
1004 (program-arities proc))))
1005 ((procedure? proc)
1006 (let ((arity (procedure-property proc 'arity)))
1007 (values (procedure-name proc)
1008 (list (list (car arity) (cadr arity) (caddr arity)
1009 #f #f)))))
1010 (else
1011 (let loop ((name #f)
1012 (proc proc)
1013 (arities '()))
1014 (if (not proc)
1015 (values name (reverse arities))
1016 (record-case proc
1017 ((<lambda-case> req opt rest kw alternate)
1018 (loop name alternate
1019 (cons (list (len req) (len opt) rest
1020 (and (pair? kw) (map car (cdr kw)))
1021 (and (pair? kw) (car kw)))
1022 arities)))
1023 ((<lambda> meta body)
1024 (loop (assoc-ref meta 'name) body arities))
1025 (else
1026 (values #f #f))))))))
1027
1028 (let ((args (application-args application))
1029 (src (tree-il-src application)))
1030 (call-with-values (lambda () (arities proc))
1031 (lambda (name arities)
1032 (define matches?
1033 (find (lambda (arity)
1034 (pmatch arity
1035 ((,req ,opt ,rest? ,kw ,aok?)
1036 (let ((args (if (pair? kw)
1037 (filter-keyword-args kw aok? args)
1038 args)))
1039 (if (and req opt)
1040 (let ((count (length args)))
1041 (and (>= count req)
1042 (or rest?
1043 (<= count (+ req opt)))))
1044 #t)))
1045 (else #t)))
1046 arities))
1047
1048 (if (not matches?)
1049 (warning 'arity-mismatch src
1050 (or name (with-output-to-string (lambda () (write proc))))
1051 lexical?)))))
1052 #t)
1053
1054 (define arity-analysis
1055 ;; Report arity mismatches in the given tree.
1056 (make-tree-analysis
1057 (lambda (x info env locs)
1058 ;; X is a leaf.
1059 info)
1060 (lambda (x info env locs)
1061 ;; Down into X.
1062 (define (extend lexical-name val info)
1063 ;; If VAL is a lambda, add NAME to the lexical-lambdas of INFO.
1064 (let ((toplevel-calls (toplevel-procedure-calls info))
1065 (lexical-lambdas (lexical-lambdas info))
1066 (toplevel-lambdas (toplevel-lambdas info)))
1067 (record-case val
1068 ((<lambda> body)
1069 (make-arity-info toplevel-calls
1070 (vhash-consq lexical-name val
1071 lexical-lambdas)
1072 toplevel-lambdas))
1073 ((<lexical-ref> gensym)
1074 ;; lexical alias
1075 (let ((val* (vhash-assq gensym lexical-lambdas)))
1076 (if (pair? val*)
1077 (extend lexical-name (cdr val*) info)
1078 info)))
1079 ((<toplevel-ref> name)
1080 ;; top-level alias
1081 (make-arity-info toplevel-calls
1082 (vhash-consq lexical-name val
1083 lexical-lambdas)
1084 toplevel-lambdas))
1085 (else info))))
1086
1087 (let ((toplevel-calls (toplevel-procedure-calls info))
1088 (lexical-lambdas (lexical-lambdas info))
1089 (toplevel-lambdas (toplevel-lambdas info)))
1090
1091 (record-case x
1092 ((<toplevel-define> name exp)
1093 (record-case exp
1094 ((<lambda> body)
1095 (make-arity-info toplevel-calls
1096 lexical-lambdas
1097 (vhash-consq name exp toplevel-lambdas)))
1098 ((<toplevel-ref> name)
1099 ;; alias for another toplevel
1100 (let ((proc (vhash-assq name toplevel-lambdas)))
1101 (make-arity-info toplevel-calls
1102 lexical-lambdas
1103 (vhash-consq (toplevel-define-name x)
1104 (if (pair? proc)
1105 (cdr proc)
1106 exp)
1107 toplevel-lambdas))))
1108 (else info)))
1109 ((<let> vars vals)
1110 (fold extend info vars vals))
1111 ((<letrec> vars vals)
1112 (fold extend info vars vals))
1113 ((<fix> vars vals)
1114 (fold extend info vars vals))
1115
1116 ((<application> proc args src)
1117 (record-case proc
1118 ((<lambda> body)
1119 (validate-arity proc x #t)
1120 info)
1121 ((<toplevel-ref> name)
1122 (make-arity-info (vhash-consq name x toplevel-calls)
1123 lexical-lambdas
1124 toplevel-lambdas))
1125 ((<lexical-ref> gensym)
1126 (let ((proc (vhash-assq gensym lexical-lambdas)))
1127 (if (pair? proc)
1128 (record-case (cdr proc)
1129 ((<toplevel-ref> name)
1130 ;; alias to toplevel
1131 (make-arity-info (vhash-consq name x toplevel-calls)
1132 lexical-lambdas
1133 toplevel-lambdas))
1134 (else
1135 (validate-arity (cdr proc) x #t)
1136 info))
1137
1138 ;; If GENSYM wasn't found, it may be because it's an
1139 ;; argument of the procedure being compiled.
1140 info)))
1141 (else info)))
1142 (else info))))
1143
1144 (lambda (x info env locs)
1145 ;; Up from X.
1146 (define (shrink name val info)
1147 ;; Remove NAME from the lexical-lambdas of INFO.
1148 (let ((toplevel-calls (toplevel-procedure-calls info))
1149 (lexical-lambdas (lexical-lambdas info))
1150 (toplevel-lambdas (toplevel-lambdas info)))
1151 (make-arity-info toplevel-calls
1152 (if (vhash-assq name lexical-lambdas)
1153 (vlist-tail lexical-lambdas)
1154 lexical-lambdas)
1155 toplevel-lambdas)))
1156
1157 (let ((toplevel-calls (toplevel-procedure-calls info))
1158 (lexical-lambdas (lexical-lambdas info))
1159 (toplevel-lambdas (toplevel-lambdas info)))
1160 (record-case x
1161 ((<let> vars vals)
1162 (fold shrink info vars vals))
1163 ((<letrec> vars vals)
1164 (fold shrink info vars vals))
1165 ((<fix> vars vals)
1166 (fold shrink info vars vals))
1167
1168 (else info))))
1169
1170 (lambda (result env)
1171 ;; Post-processing: check all top-level procedure calls that have been
1172 ;; encountered.
1173 (let ((toplevel-calls (toplevel-procedure-calls result))
1174 (toplevel-lambdas (toplevel-lambdas result)))
1175 (vlist-for-each
1176 (lambda (name+application)
1177 (let* ((name (car name+application))
1178 (application (cdr name+application))
1179 (proc
1180 (or (and=> (vhash-assq name toplevel-lambdas) cdr)
1181 (and (module? env)
1182 (false-if-exception
1183 (module-ref env name)))))
1184 (proc*
1185 ;; handle toplevel aliases
1186 (if (toplevel-ref? proc)
1187 (let ((name (toplevel-ref-name proc)))
1188 (and (module? env)
1189 (false-if-exception
1190 (module-ref env name))))
1191 proc)))
1192 (if (or (lambda? proc*) (procedure? proc*))
1193 (validate-arity proc* application (lambda? proc*)))))
1194 toplevel-calls)))
1195
1196 (make-arity-info vlist-null vlist-null vlist-null)))