Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / module / language / tree-il / analyze.scm
1 ;;; TREE-IL -> GLIL compiler
2
3 ;; Copyright (C) 2001, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
9 ;;;;
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Code:
20
21 (define-module (language tree-il analyze)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-9)
24 #:use-module (srfi srfi-11)
25 #:use-module (ice-9 vlist)
26 #:use-module (ice-9 match)
27 #:use-module (system base syntax)
28 #:use-module (system base message)
29 #:use-module (system vm program)
30 #:use-module (language tree-il)
31 #:use-module (system base pmatch)
32 #:export (analyze-lexicals
33 analyze-tree
34 unused-variable-analysis
35 unused-toplevel-analysis
36 unbound-variable-analysis
37 arity-analysis
38 format-analysis))
39
40 ;; Allocation is the process of assigning storage locations for lexical
41 ;; variables. A lexical variable has a distinct "address", or storage
42 ;; location, for each procedure in which it is referenced.
43 ;;
44 ;; A variable is "local", i.e., allocated on the stack, if it is
45 ;; referenced from within the procedure that defined it. Otherwise it is
46 ;; a "closure" variable. For example:
47 ;;
48 ;; (lambda (a) a) ; a will be local
49 ;; `a' is local to the procedure.
50 ;;
51 ;; (lambda (a) (lambda () a))
52 ;; `a' is local to the outer procedure, but a closure variable with
53 ;; respect to the inner procedure.
54 ;;
55 ;; If a variable is ever assigned, it needs to be heap-allocated
56 ;; ("boxed"). This is so that closures and continuations capture the
57 ;; variable's identity, not just one of the values it may have over the
58 ;; course of program execution. If the variable is never assigned, there
59 ;; is no distinction between value and identity, so closing over its
60 ;; identity (whether through closures or continuations) can make a copy
61 ;; of its value instead.
62 ;;
63 ;; Local variables are stored on the stack within a procedure's call
64 ;; frame. Their index into the stack is determined from their linear
65 ;; postion within a procedure's binding path:
66 ;; (let (0 1)
67 ;; (let (2 3) ...)
68 ;; (let (2) ...))
69 ;; (let (2 3 4) ...))
70 ;; etc.
71 ;;
72 ;; This algorithm has the problem that variables are only allocated
73 ;; indices at the end of the binding path. If variables bound early in
74 ;; the path are not used in later portions of the path, their indices
75 ;; will not be recycled. This problem is particularly egregious in the
76 ;; expansion of `or':
77 ;;
78 ;; (or x y z)
79 ;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
80 ;;
81 ;; As you can see, the `a' binding is only used in the ephemeral
82 ;; `consequent' clause of the first `if', but its index would be
83 ;; reserved for the whole of the `or' expansion. So we have a hack for
84 ;; this specific case. A proper solution would be some sort of liveness
85 ;; analysis, and not our linear allocation algorithm.
86 ;;
87 ;; Closure variables are captured when a closure is created, and stored in a
88 ;; vector inline to the closure object itself. Each closure variable has a
89 ;; unique index into that vector.
90 ;;
91 ;; There is one more complication. Procedures bound by <fix> may, in
92 ;; some cases, be rendered inline to their parent procedure. That is to
93 ;; say,
94 ;;
95 ;; (letrec ((lp (lambda () (lp)))) (lp))
96 ;; => (fix ((lp (lambda () (lp)))) (lp))
97 ;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
98 ;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
99 ;;
100 ;; The upshot is that we don't have to allocate any space for the `lp'
101 ;; closure at all, as it can be rendered inline as a loop. So there is
102 ;; another kind of allocation, "label allocation", in which the
103 ;; procedure is simply a label, placed at the start of the lambda body.
104 ;; The label is the gensym under which the lambda expression is bound.
105 ;;
106 ;; The analyzer checks to see that the label is called with the correct
107 ;; number of arguments. Calls to labels compile to rename + goto.
108 ;; Lambda, the ultimate goto!
109 ;;
110 ;;
111 ;; The return value of `analyze-lexicals' is a hash table, the
112 ;; "allocation".
113 ;;
114 ;; The allocation maps gensyms -- recall that each lexically bound
115 ;; variable has a unique gensym -- to storage locations ("addresses").
116 ;; Since one gensym may have many storage locations, if it is referenced
117 ;; in many procedures, it is a two-level map.
118 ;;
119 ;; The allocation also stored information on how many local variables
120 ;; need to be allocated for each procedure, lexicals that have been
121 ;; translated into labels, and information on what free variables to
122 ;; capture from its lexical parent procedure.
123 ;;
124 ;; In addition, we have a conflation: while we're traversing the code,
125 ;; recording information to pass to the compiler, we take the
126 ;; opportunity to generate labels for each lambda-case clause, so that
127 ;; generated code can skip argument checks at runtime if they match at
128 ;; compile-time.
129 ;;
130 ;; Also, while we're a-traversing and an-allocating, we check prompt
131 ;; handlers to see if the "continuation" argument is used. If not, we
132 ;; mark the prompt as being "escape-only". This allows us to implement
133 ;; `catch' and `throw' using `prompt' and `control', but without causing
134 ;; a continuation to be reified. Heh heh.
135 ;;
136 ;; That is:
137 ;;
138 ;; sym -> {lambda -> address}
139 ;; lambda -> (labels . free-locs)
140 ;; lambda-case -> (gensym . nlocs)
141 ;; prompt -> escape-only?
142 ;;
143 ;; address ::= (local? boxed? . index)
144 ;; labels ::= ((sym . lambda) ...)
145 ;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
146 ;; free variable addresses are relative to parent proc.
147
148 (define (make-hashq k v)
149 (let ((res (make-hash-table)))
150 (hashq-set! res k v)
151 res))
152
153 (define (analyze-lexicals x)
154 ;; bound-vars: lambda -> (sym ...)
155 ;; all identifiers bound within a lambda
156 (define bound-vars (make-hash-table))
157 ;; free-vars: lambda -> (sym ...)
158 ;; all identifiers referenced in a lambda, but not bound
159 ;; NB, this includes identifiers referenced by contained lambdas
160 (define free-vars (make-hash-table))
161 ;; assigned: sym -> #t
162 ;; variables that are assigned
163 (define assigned (make-hash-table))
164 ;; refcounts: sym -> count
165 ;; allows us to detect the or-expansion in O(1) time
166 (define refcounts (make-hash-table))
167 ;; labels: sym -> lambda
168 ;; for determining if fixed-point procedures can be rendered as
169 ;; labels.
170 (define labels (make-hash-table))
171
172 ;; returns variables referenced in expr
173 (define (analyze! x proc labels-in-proc tail? tail-call-args)
174 (define (step y) (analyze! y proc '() #f #f))
175 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
176 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
177 (and tail? args)))
178 (define (recur/labels x new-proc labels)
179 (analyze! x new-proc (append labels labels-in-proc) #t #f))
180 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
181 (record-case x
182 ((<call> proc args)
183 (apply lset-union eq? (step-tail-call proc args)
184 (map step args)))
185
186 ((<primcall> args)
187 (apply lset-union eq? (map step args)))
188
189 ((<conditional> test consequent alternate)
190 (lset-union eq? (step test) (step-tail consequent) (step-tail alternate)))
191
192 ((<lexical-ref> gensym)
193 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
194 (if (not (and tail-call-args
195 (memq gensym labels-in-proc)
196 (let ((p (hashq-ref labels gensym)))
197 (and p
198 (let lp ((c (lambda-body p)))
199 (and c (lambda-case? c)
200 (or
201 ;; for now prohibit optional &
202 ;; keyword arguments; can relax this
203 ;; restriction later
204 (and (= (length (lambda-case-req c))
205 (length tail-call-args))
206 (not (lambda-case-opt c))
207 (not (lambda-case-kw c))
208 (not (lambda-case-rest c)))
209 (lp (lambda-case-alternate c)))))))))
210 (hashq-set! labels gensym #f))
211 (list gensym))
212
213 ((<lexical-set> gensym exp)
214 (hashq-set! assigned gensym #t)
215 (hashq-set! labels gensym #f)
216 (lset-adjoin eq? (step exp) gensym))
217
218 ((<module-set> exp)
219 (step exp))
220
221 ((<toplevel-set> exp)
222 (step exp))
223
224 ((<toplevel-define> exp)
225 (step exp))
226
227 ((<seq> head tail)
228 (lset-union eq? (step head) (step-tail tail)))
229
230 ((<lambda> body)
231 ;; order is important here
232 (hashq-set! bound-vars x '())
233 (let ((free (recur body x)))
234 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
235 (hashq-set! free-vars x free)
236 free))
237
238 ((<lambda-case> opt kw inits gensyms body alternate)
239 (hashq-set! bound-vars proc
240 (append (reverse gensyms) (hashq-ref bound-vars proc)))
241 (lset-union
242 eq?
243 (lset-difference eq?
244 (lset-union eq?
245 (apply lset-union eq? (map step inits))
246 (step-tail body))
247 gensyms)
248 (if alternate (step-tail alternate) '())))
249
250 ((<let> gensyms vals body)
251 (hashq-set! bound-vars proc
252 (append (reverse gensyms) (hashq-ref bound-vars proc)))
253 (lset-difference eq?
254 (apply lset-union eq? (step-tail body) (map step vals))
255 gensyms))
256
257 ((<letrec> gensyms vals body)
258 (hashq-set! bound-vars proc
259 (append (reverse gensyms) (hashq-ref bound-vars proc)))
260 (for-each (lambda (sym) (hashq-set! assigned sym #t)) gensyms)
261 (lset-difference eq?
262 (apply lset-union eq? (step-tail body) (map step vals))
263 gensyms))
264
265 ((<fix> gensyms vals body)
266 ;; Try to allocate these procedures as labels.
267 (for-each (lambda (sym val) (hashq-set! labels sym val))
268 gensyms vals)
269 (hashq-set! bound-vars proc
270 (append (reverse gensyms) (hashq-ref bound-vars proc)))
271 ;; Step into subexpressions.
272 (let* ((var-refs
273 (map
274 ;; Since we're trying to label-allocate the lambda,
275 ;; pretend it's not a closure, and just recurse into its
276 ;; body directly. (Otherwise, recursing on a closure
277 ;; that references one of the fix's bound vars would
278 ;; prevent label allocation.)
279 (lambda (x)
280 (record-case x
281 ((<lambda> body)
282 ;; just like the closure case, except here we use
283 ;; recur/labels instead of recur
284 (hashq-set! bound-vars x '())
285 (let ((free (recur/labels body x gensyms)))
286 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
287 (hashq-set! free-vars x free)
288 free))))
289 vals))
290 (vars-with-refs (map cons gensyms var-refs))
291 (body-refs (recur/labels body proc gensyms)))
292 (define (delabel-dependents! sym)
293 (let ((refs (assq-ref vars-with-refs sym)))
294 (if refs
295 (for-each (lambda (sym)
296 (if (hashq-ref labels sym)
297 (begin
298 (hashq-set! labels sym #f)
299 (delabel-dependents! sym))))
300 refs))))
301 ;; Stepping into the lambdas and the body might have made some
302 ;; procedures not label-allocatable -- which might have
303 ;; knock-on effects. For example:
304 ;; (fix ((a (lambda () (b)))
305 ;; (b (lambda () a)))
306 ;; (a))
307 ;; As far as `a' is concerned, both `a' and `b' are
308 ;; label-allocatable. But `b' references `a' not in a proc-tail
309 ;; position, which makes `a' not label-allocatable. The
310 ;; knock-on effect is that, when back-propagating this
311 ;; information to `a', `b' will also become not
312 ;; label-allocatable, as it is referenced within `a', which is
313 ;; allocated as a closure. This is a transitive relationship.
314 (for-each (lambda (sym)
315 (if (not (hashq-ref labels sym))
316 (delabel-dependents! sym)))
317 gensyms)
318 ;; Now lift bound variables with label-allocated lambdas to the
319 ;; parent procedure.
320 (for-each
321 (lambda (sym val)
322 (if (hashq-ref labels sym)
323 ;; Remove traces of the label-bound lambda. The free
324 ;; vars will propagate up via the return val.
325 (begin
326 (hashq-set! bound-vars proc
327 (append (hashq-ref bound-vars val)
328 (hashq-ref bound-vars proc)))
329 (hashq-remove! bound-vars val)
330 (hashq-remove! free-vars val))))
331 gensyms vals)
332 (lset-difference eq?
333 (apply lset-union eq? body-refs var-refs)
334 gensyms)))
335
336 ((<let-values> exp body)
337 (lset-union eq? (step exp) (step body)))
338
339 ((<dynwind> winder pre body post unwinder)
340 (lset-union eq? (step winder) (step pre)
341 (step body)
342 (step post) (step unwinder)))
343
344 ((<dynlet> fluids vals body)
345 (apply lset-union eq? (step body) (map step (append fluids vals))))
346
347 ((<dynref> fluid)
348 (step fluid))
349
350 ((<dynset> fluid exp)
351 (lset-union eq? (step fluid) (step exp)))
352
353 ((<prompt> tag body handler)
354 (lset-union eq? (step tag) (step body) (step-tail handler)))
355
356 ((<abort> tag args tail)
357 (apply lset-union eq? (step tag) (step tail) (map step args)))
358
359 (else '())))
360
361 ;; allocation: sym -> {lambda -> address}
362 ;; lambda -> (nlocs labels . free-locs)
363 (define allocation (make-hash-table))
364
365 (define (allocate! x proc n)
366 (define (recur y) (allocate! y proc n))
367 (record-case x
368 ((<call> proc args)
369 (apply max (recur proc) (map recur args)))
370
371 ((<primcall> args)
372 (apply max n (map recur args)))
373
374 ((<conditional> test consequent alternate)
375 (max (recur test) (recur consequent) (recur alternate)))
376
377 ((<lexical-set> exp)
378 (recur exp))
379
380 ((<module-set> exp)
381 (recur exp))
382
383 ((<toplevel-set> exp)
384 (recur exp))
385
386 ((<toplevel-define> exp)
387 (recur exp))
388
389 ((<seq> head tail)
390 (max (recur head)
391 (recur tail)))
392
393 ((<lambda> body)
394 ;; allocate closure vars in order
395 (let lp ((c (hashq-ref free-vars x)) (n 0))
396 (if (pair? c)
397 (begin
398 (hashq-set! (hashq-ref allocation (car c))
399 x
400 `(#f ,(hashq-ref assigned (car c)) . ,n))
401 (lp (cdr c) (1+ n)))))
402
403 (let ((nlocs (allocate! body x 0))
404 (free-addresses
405 (map (lambda (v)
406 (hashq-ref (hashq-ref allocation v) proc))
407 (hashq-ref free-vars x)))
408 (labels (filter cdr
409 (map (lambda (sym)
410 (cons sym (hashq-ref labels sym)))
411 (hashq-ref bound-vars x)))))
412 ;; set procedure allocations
413 (hashq-set! allocation x (cons labels free-addresses)))
414 n)
415
416 ((<lambda-case> opt kw inits gensyms body alternate)
417 (max
418 (let lp ((gensyms gensyms) (n n))
419 (if (null? gensyms)
420 (let ((nlocs (apply
421 max
422 (allocate! body proc n)
423 ;; inits not logically at the end, but they
424 ;; are the list...
425 (map (lambda (x) (allocate! x proc n)) inits))))
426 ;; label and nlocs for the case
427 (hashq-set! allocation x (cons (gensym ":LCASE") nlocs))
428 nlocs)
429 (begin
430 (hashq-set! allocation (car gensyms)
431 (make-hashq
432 proc `(#t ,(hashq-ref assigned (car gensyms)) . ,n)))
433 (lp (cdr gensyms) (1+ n)))))
434 (if alternate (allocate! alternate proc n) n)))
435
436 ((<let> gensyms vals body)
437 (let ((nmax (apply max (map recur vals))))
438 (cond
439 ;; the `or' hack
440 ((and (conditional? body)
441 (= (length gensyms) 1)
442 (let ((v (car gensyms)))
443 (and (not (hashq-ref assigned v))
444 (= (hashq-ref refcounts v 0) 2)
445 (lexical-ref? (conditional-test body))
446 (eq? (lexical-ref-gensym (conditional-test body)) v)
447 (lexical-ref? (conditional-consequent body))
448 (eq? (lexical-ref-gensym (conditional-consequent body)) v))))
449 (hashq-set! allocation (car gensyms)
450 (make-hashq proc `(#t #f . ,n)))
451 ;; the 1+ for this var
452 (max nmax (1+ n) (allocate! (conditional-alternate body) proc n)))
453 (else
454 (let lp ((gensyms gensyms) (n n))
455 (if (null? gensyms)
456 (max nmax (allocate! body proc n))
457 (let ((v (car gensyms)))
458 (hashq-set!
459 allocation v
460 (make-hashq proc
461 `(#t ,(hashq-ref assigned v) . ,n)))
462 (lp (cdr gensyms) (1+ n)))))))))
463
464 ((<letrec> gensyms vals body)
465 (let lp ((gensyms gensyms) (n n))
466 (if (null? gensyms)
467 (let ((nmax (apply max
468 (map (lambda (x)
469 (allocate! x proc n))
470 vals))))
471 (max nmax (allocate! body proc n)))
472 (let ((v (car gensyms)))
473 (hashq-set!
474 allocation v
475 (make-hashq proc
476 `(#t ,(hashq-ref assigned v) . ,n)))
477 (lp (cdr gensyms) (1+ n))))))
478
479 ((<fix> gensyms vals body)
480 (let lp ((in gensyms) (n n))
481 (if (null? in)
482 (let lp ((gensyms gensyms) (vals vals) (nmax n))
483 (cond
484 ((null? gensyms)
485 (max nmax (allocate! body proc n)))
486 ((hashq-ref labels (car gensyms))
487 ;; allocate lambda body inline to proc
488 (lp (cdr gensyms)
489 (cdr vals)
490 (record-case (car vals)
491 ((<lambda> body)
492 (max nmax (allocate! body proc n))))))
493 (else
494 ;; allocate closure
495 (lp (cdr gensyms)
496 (cdr vals)
497 (max nmax (allocate! (car vals) proc n))))))
498
499 (let ((v (car in)))
500 (cond
501 ((hashq-ref assigned v)
502 (error "fixpoint procedures may not be assigned" x))
503 ((hashq-ref labels v)
504 ;; no binding, it's a label
505 (lp (cdr in) n))
506 (else
507 ;; allocate closure binding
508 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
509 (lp (cdr in) (1+ n))))))))
510
511 ((<let-values> exp body)
512 (max (recur exp) (recur body)))
513
514 ((<dynwind> winder pre body post unwinder)
515 (max (recur winder) (recur pre)
516 (recur body)
517 (recur post) (recur unwinder)))
518
519 ((<dynlet> fluids vals body)
520 (apply max (recur body) (map recur (append fluids vals))))
521
522 ((<dynref> fluid)
523 (recur fluid))
524
525 ((<dynset> fluid exp)
526 (max (recur fluid) (recur exp)))
527
528 ((<prompt> tag body handler)
529 (let ((cont-var (and (lambda-case? handler)
530 (pair? (lambda-case-gensyms handler))
531 (car (lambda-case-gensyms handler)))))
532 (hashq-set! allocation x
533 (and cont-var (zero? (hashq-ref refcounts cont-var 0))))
534 (max (recur tag) (recur body) (recur handler))))
535
536 ((<abort> tag args tail)
537 (apply max (recur tag) (recur tail) (map recur args)))
538
539 (else n)))
540
541 (analyze! x #f '() #t #f)
542 (allocate! x #f 0)
543
544 allocation)
545
546 \f
547 ;;;
548 ;;; Tree analyses for warnings.
549 ;;;
550
551 (define-record-type <tree-analysis>
552 (make-tree-analysis leaf down up post init)
553 tree-analysis?
554 (leaf tree-analysis-leaf) ;; (lambda (x result env locs) ...)
555 (down tree-analysis-down) ;; (lambda (x result env locs) ...)
556 (up tree-analysis-up) ;; (lambda (x result env locs) ...)
557 (post tree-analysis-post) ;; (lambda (result env) ...)
558 (init tree-analysis-init)) ;; arbitrary value
559
560 (define (analyze-tree analyses tree env)
561 "Run all tree analyses listed in ANALYSES on TREE for ENV, using
562 `tree-il-fold'. Return TREE. The leaf/down/up procedures of each analysis are
563 passed a ``location stack', which is the stack of `tree-il-src' values for each
564 parent tree (a list); it can be used to approximate source location when
565 accurate information is missing from a given `tree-il' element."
566
567 (define (traverse proc update-locs)
568 ;; Return a tree traversing procedure that returns a list of analysis
569 ;; results prepended by the location stack.
570 (lambda (x results)
571 (let ((locs (update-locs x (car results))))
572 (cons locs ;; the location stack
573 (map (lambda (analysis result)
574 ((proc analysis) x result env locs))
575 analyses
576 (cdr results))))))
577
578 ;; Keeping/extending/shrinking the location stack.
579 (define (keep-locs x locs) locs)
580 (define (extend-locs x locs) (cons (tree-il-src x) locs))
581 (define (shrink-locs x locs) (cdr locs))
582
583 (let ((results
584 (tree-il-fold (traverse tree-analysis-leaf keep-locs)
585 (traverse tree-analysis-down extend-locs)
586 (traverse tree-analysis-up shrink-locs)
587 (cons '() ;; empty location stack
588 (map tree-analysis-init analyses))
589 tree)))
590
591 (for-each (lambda (analysis result)
592 ((tree-analysis-post analysis) result env))
593 analyses
594 (cdr results)))
595
596 tree)
597
598 \f
599 ;;;
600 ;;; Unused variable analysis.
601 ;;;
602
603 ;; <binding-info> records are used during tree traversals in
604 ;; `unused-variable-analysis'. They contain a list of the local vars
605 ;; currently in scope, and a list of locals vars that have been referenced.
606 (define-record-type <binding-info>
607 (make-binding-info vars refs)
608 binding-info?
609 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
610 (refs binding-info-refs)) ;; (GENSYM ...)
611
612 (define (gensym? sym)
613 ;; Return #t if SYM is (likely) a generated symbol.
614 (string-any #\space (symbol->string sym)))
615
616 (define unused-variable-analysis
617 ;; Report unused variables in the given tree.
618 (make-tree-analysis
619 (lambda (x info env locs)
620 ;; X is a leaf: extend INFO's refs accordingly.
621 (let ((refs (binding-info-refs info))
622 (vars (binding-info-vars info)))
623 (record-case x
624 ((<lexical-ref> gensym)
625 (make-binding-info vars (vhash-consq gensym #t refs)))
626 (else info))))
627
628 (lambda (x info env locs)
629 ;; Going down into X: extend INFO's variable list
630 ;; accordingly.
631 (let ((refs (binding-info-refs info))
632 (vars (binding-info-vars info))
633 (src (tree-il-src x)))
634 (define (extend inner-vars inner-names)
635 (fold (lambda (var name vars)
636 (vhash-consq var (list name src) vars))
637 vars
638 inner-vars
639 inner-names))
640
641 (record-case x
642 ((<lexical-set> gensym)
643 (make-binding-info vars (vhash-consq gensym #t refs)))
644 ((<lambda-case> req opt inits rest kw gensyms)
645 (let ((names `(,@req
646 ,@(or opt '())
647 ,@(if rest (list rest) '())
648 ,@(if kw (map cadr (cdr kw)) '()))))
649 (make-binding-info (extend gensyms names) refs)))
650 ((<let> gensyms names)
651 (make-binding-info (extend gensyms names) refs))
652 ((<letrec> gensyms names)
653 (make-binding-info (extend gensyms names) refs))
654 ((<fix> gensyms names)
655 (make-binding-info (extend gensyms names) refs))
656 (else info))))
657
658 (lambda (x info env locs)
659 ;; Leaving X's scope: shrink INFO's variable list
660 ;; accordingly and reported unused nested variables.
661 (let ((refs (binding-info-refs info))
662 (vars (binding-info-vars info)))
663 (define (shrink inner-vars refs)
664 (vlist-for-each
665 (lambda (var)
666 (let ((gensym (car var)))
667 ;; Don't report lambda parameters as unused.
668 (if (and (memq gensym inner-vars)
669 (not (vhash-assq gensym refs))
670 (not (lambda-case? x)))
671 (let ((name (cadr var))
672 ;; We can get approximate source location by going up
673 ;; the LOCS location stack.
674 (loc (or (caddr var)
675 (find pair? locs))))
676 (if (and (not (gensym? name))
677 (not (eq? name '_)))
678 (warning 'unused-variable loc name))))))
679 vars)
680 (vlist-drop vars (length inner-vars)))
681
682 ;; For simplicity, we leave REFS untouched, i.e., with
683 ;; names of variables that are now going out of scope.
684 ;; It doesn't hurt as these are unique names, it just
685 ;; makes REFS unnecessarily fat.
686 (record-case x
687 ((<lambda-case> gensyms)
688 (make-binding-info (shrink gensyms refs) refs))
689 ((<let> gensyms)
690 (make-binding-info (shrink gensyms refs) refs))
691 ((<letrec> gensyms)
692 (make-binding-info (shrink gensyms refs) refs))
693 ((<fix> gensyms)
694 (make-binding-info (shrink gensyms refs) refs))
695 (else info))))
696
697 (lambda (result env) #t)
698 (make-binding-info vlist-null vlist-null)))
699
700 \f
701 ;;;
702 ;;; Unused top-level variable analysis.
703 ;;;
704
705 ;; <reference-graph> record top-level definitions that are made, references to
706 ;; top-level definitions and their context (the top-level definition in which
707 ;; the reference appears), as well as the current context (the top-level
708 ;; definition we're currently in). The second part (`refs' below) is
709 ;; effectively a graph from which we can determine unused top-level definitions.
710 (define-record-type <reference-graph>
711 (make-reference-graph refs defs toplevel-context)
712 reference-graph?
713 (defs reference-graph-defs) ;; ((NAME . LOC) ...)
714 (refs reference-graph-refs) ;; ((REF-CONTEXT REF ...) ...)
715 (toplevel-context reference-graph-toplevel-context)) ;; NAME | #f
716
717 (define (graph-reachable-nodes root refs reachable)
718 ;; Add to REACHABLE the nodes reachable from ROOT in graph REFS. REFS is a
719 ;; vhash mapping nodes to the list of their children: for instance,
720 ;; ((A -> (B C)) (B -> (A)) (C -> ())) corresponds to
721 ;;
722 ;; ,-------.
723 ;; v |
724 ;; A ----> B
725 ;; |
726 ;; v
727 ;; C
728 ;;
729 ;; REACHABLE is a vhash of nodes known to be otherwise reachable.
730
731 (let loop ((root root)
732 (path vlist-null)
733 (result reachable))
734 (if (or (vhash-assq root path)
735 (vhash-assq root result))
736 result
737 (let* ((children (or (and=> (vhash-assq root refs) cdr) '()))
738 (path (vhash-consq root #t path))
739 (result (fold (lambda (kid result)
740 (loop kid path result))
741 result
742 children)))
743 (fold (lambda (kid result)
744 (vhash-consq kid #t result))
745 result
746 children)))))
747
748 (define (graph-reachable-nodes* roots refs)
749 ;; Return the list of nodes in REFS reachable from the nodes listed in ROOTS.
750 (vlist-fold (lambda (root+true result)
751 (let* ((root (car root+true))
752 (reachable (graph-reachable-nodes root refs result)))
753 (vhash-consq root #t reachable)))
754 vlist-null
755 roots))
756
757 (define (partition* pred vhash)
758 ;; Partition VHASH according to PRED. Return the two resulting vhashes.
759 (let ((result
760 (vlist-fold (lambda (k+v result)
761 (let ((k (car k+v))
762 (v (cdr k+v))
763 (r1 (car result))
764 (r2 (cdr result)))
765 (if (pred k)
766 (cons (vhash-consq k v r1) r2)
767 (cons r1 (vhash-consq k v r2)))))
768 (cons vlist-null vlist-null)
769 vhash)))
770 (values (car result) (cdr result))))
771
772 (define unused-toplevel-analysis
773 ;; Report unused top-level definitions that are not exported.
774 (let ((add-ref-from-context
775 (lambda (graph name)
776 ;; Add an edge CTX -> NAME in GRAPH.
777 (let* ((refs (reference-graph-refs graph))
778 (defs (reference-graph-defs graph))
779 (ctx (reference-graph-toplevel-context graph))
780 (ctx-refs (or (and=> (vhash-assq ctx refs) cdr) '())))
781 (make-reference-graph (vhash-consq ctx (cons name ctx-refs) refs)
782 defs ctx)))))
783 (define (macro-variable? name env)
784 (and (module? env)
785 (let ((var (module-variable env name)))
786 (and var (variable-bound? var)
787 (macro? (variable-ref var))))))
788
789 (make-tree-analysis
790 (lambda (x graph env locs)
791 ;; X is a leaf.
792 (let ((ctx (reference-graph-toplevel-context graph)))
793 (record-case x
794 ((<toplevel-ref> name src)
795 (add-ref-from-context graph name))
796 (else graph))))
797
798 (lambda (x graph env locs)
799 ;; Going down into X.
800 (let ((ctx (reference-graph-toplevel-context graph))
801 (refs (reference-graph-refs graph))
802 (defs (reference-graph-defs graph)))
803 (record-case x
804 ((<toplevel-define> name src)
805 (let ((refs refs)
806 (defs (vhash-consq name (or src (find pair? locs))
807 defs)))
808 (make-reference-graph refs defs name)))
809 ((<toplevel-set> name src)
810 (add-ref-from-context graph name))
811 (else graph))))
812
813 (lambda (x graph env locs)
814 ;; Leaving X's scope.
815 (record-case x
816 ((<toplevel-define>)
817 (let ((refs (reference-graph-refs graph))
818 (defs (reference-graph-defs graph)))
819 (make-reference-graph refs defs #f)))
820 (else graph)))
821
822 (lambda (graph env)
823 ;; Process the resulting reference graph: determine all private definitions
824 ;; not reachable from any public definition. Macros
825 ;; (syntax-transformers), which are globally bound, never considered
826 ;; unused since we can't tell whether a macro is actually used; in
827 ;; addition, macros are considered roots of the graph since they may use
828 ;; private bindings. FIXME: The `make-syntax-transformer' calls don't
829 ;; contain any literal `toplevel-ref' of the global bindings they use so
830 ;; this strategy fails.
831 (define (exported? name)
832 (if (module? env)
833 (module-variable (module-public-interface env) name)
834 #t))
835
836 (let-values (((public-defs private-defs)
837 (partition* (lambda (name)
838 (or (exported? name)
839 (macro-variable? name env)))
840 (reference-graph-defs graph))))
841 (let* ((roots (vhash-consq #f #t public-defs))
842 (refs (reference-graph-refs graph))
843 (reachable (graph-reachable-nodes* roots refs))
844 (unused (vlist-filter (lambda (name+src)
845 (not (vhash-assq (car name+src)
846 reachable)))
847 private-defs)))
848 (vlist-for-each (lambda (name+loc)
849 (let ((name (car name+loc))
850 (loc (cdr name+loc)))
851 (if (not (gensym? name))
852 (warning 'unused-toplevel loc name))))
853 unused))))
854
855 (make-reference-graph vlist-null vlist-null #f))))
856
857 \f
858 ;;;
859 ;;; Unbound variable analysis.
860 ;;;
861
862 ;; <toplevel-info> records are used during tree traversal in search of
863 ;; possibly unbound variable. They contain a list of references to
864 ;; potentially unbound top-level variables, and a list of the top-level
865 ;; defines that have been encountered.
866 (define-record-type <toplevel-info>
867 (make-toplevel-info refs defs)
868 toplevel-info?
869 (refs toplevel-info-refs) ;; ((VARIABLE-NAME . LOCATION) ...)
870 (defs toplevel-info-defs)) ;; (VARIABLE-NAME ...)
871
872 (define (goops-toplevel-definition proc args env)
873 ;; If call of PROC to ARGS is a GOOPS top-level definition, return
874 ;; the name of the variable being defined; otherwise return #f. This
875 ;; assumes knowledge of the current implementation of `define-class' et al.
876 (define (toplevel-define-arg args)
877 (match args
878 ((($ <const> _ (and (? symbol?) exp)) _)
879 exp)
880 (_ #f)))
881
882 (match proc
883 (($ <module-ref> _ '(oop goops) 'toplevel-define! #f)
884 (toplevel-define-arg args))
885 (($ <toplevel-ref> _ 'toplevel-define!)
886 ;; This may be the result of expanding one of the GOOPS macros within
887 ;; `oop/goops.scm'.
888 (and (eq? env (resolve-module '(oop goops)))
889 (toplevel-define-arg args)))
890 (_ #f)))
891
892 (define unbound-variable-analysis
893 ;; Report possibly unbound variables in the given tree.
894 (make-tree-analysis
895 (lambda (x info env locs)
896 ;; X is a leaf: extend INFO's refs accordingly.
897 (let ((refs (toplevel-info-refs info))
898 (defs (toplevel-info-defs info)))
899 (define (bound? name)
900 (or (and (module? env)
901 (module-variable env name))
902 (vhash-assq name defs)))
903
904 (record-case x
905 ((<toplevel-ref> name src)
906 (if (bound? name)
907 info
908 (let ((src (or src (find pair? locs))))
909 (make-toplevel-info (vhash-consq name src refs)
910 defs))))
911 (else info))))
912
913 (lambda (x info env locs)
914 ;; Going down into X.
915 (let* ((refs (toplevel-info-refs info))
916 (defs (toplevel-info-defs info))
917 (src (tree-il-src x)))
918 (define (bound? name)
919 (or (and (module? env)
920 (module-variable env name))
921 (vhash-assq name defs)))
922
923 (record-case x
924 ((<toplevel-set> name src)
925 (if (bound? name)
926 (make-toplevel-info refs defs)
927 (let ((src (find pair? locs)))
928 (make-toplevel-info (vhash-consq name src refs)
929 defs))))
930 ((<toplevel-define> name)
931 (make-toplevel-info (vhash-delq name refs)
932 (vhash-consq name #t defs)))
933
934 ((<call> proc args)
935 ;; Check for a dynamic top-level definition, as is
936 ;; done by code expanded from GOOPS macros.
937 (let ((name (goops-toplevel-definition proc args
938 env)))
939 (if (symbol? name)
940 (make-toplevel-info (vhash-delq name refs)
941 (vhash-consq name #t defs))
942 (make-toplevel-info refs defs))))
943 (else
944 (make-toplevel-info refs defs)))))
945
946 (lambda (x info env locs)
947 ;; Leaving X's scope.
948 info)
949
950 (lambda (toplevel env)
951 ;; Post-process the result.
952 (vlist-for-each (lambda (name+loc)
953 (let ((name (car name+loc))
954 (loc (cdr name+loc)))
955 (warning 'unbound-variable loc name)))
956 (vlist-reverse (toplevel-info-refs toplevel))))
957
958 (make-toplevel-info vlist-null vlist-null)))
959
960 \f
961 ;;;
962 ;;; Arity analysis.
963 ;;;
964
965 ;; <arity-info> records contain information about lexical definitions of
966 ;; procedures currently in scope, top-level procedure definitions that have
967 ;; been encountered, and calls to top-level procedures that have been
968 ;; encountered.
969 (define-record-type <arity-info>
970 (make-arity-info toplevel-calls lexical-lambdas toplevel-lambdas)
971 arity-info?
972 (toplevel-calls toplevel-procedure-calls) ;; ((NAME . CALL) ...)
973 (lexical-lambdas lexical-lambdas) ;; ((GENSYM . DEFINITION) ...)
974 (toplevel-lambdas toplevel-lambdas)) ;; ((NAME . DEFINITION) ...)
975
976 (define (validate-arity proc call lexical?)
977 ;; Validate the argument count of CALL, a tree-il call of
978 ;; PROC, emitting a warning in case of argument count mismatch.
979
980 (define (filter-keyword-args keywords allow-other-keys? args)
981 ;; Filter keyword arguments from ARGS and return the resulting list.
982 ;; KEYWORDS is the list of allowed keywords, and ALLOW-OTHER-KEYS?
983 ;; specified whethere keywords not listed in KEYWORDS are allowed.
984 (let loop ((args args)
985 (result '()))
986 (if (null? args)
987 (reverse result)
988 (let ((arg (car args)))
989 (if (and (const? arg)
990 (or (memq (const-exp arg) keywords)
991 (and allow-other-keys?
992 (keyword? (const-exp arg)))))
993 (loop (if (pair? (cdr args))
994 (cddr args)
995 '())
996 result)
997 (loop (cdr args)
998 (cons arg result)))))))
999
1000 (define (arities proc)
1001 ;; Return the arities of PROC, which can be either a tree-il or a
1002 ;; procedure.
1003 (define (len x)
1004 (or (and (or (null? x) (pair? x))
1005 (length x))
1006 0))
1007 (cond ((program? proc)
1008 (values (procedure-name proc)
1009 (map (lambda (a)
1010 (list (arity:nreq a) (arity:nopt a) (arity:rest? a)
1011 (map car (arity:kw a))
1012 (arity:allow-other-keys? a)))
1013 (program-arities proc))))
1014 ((procedure? proc)
1015 (let ((arity (procedure-minimum-arity proc)))
1016 (values (procedure-name proc)
1017 (list (list (car arity) (cadr arity) (caddr arity)
1018 #f #f)))))
1019 (else
1020 (let loop ((name #f)
1021 (proc proc)
1022 (arities '()))
1023 (if (not proc)
1024 (values name (reverse arities))
1025 (record-case proc
1026 ((<lambda-case> req opt rest kw alternate)
1027 (loop name alternate
1028 (cons (list (len req) (len opt) rest
1029 (and (pair? kw) (map car (cdr kw)))
1030 (and (pair? kw) (car kw)))
1031 arities)))
1032 ((<lambda> meta body)
1033 (loop (assoc-ref meta 'name) body arities))
1034 (else
1035 (values #f #f))))))))
1036
1037 (let ((args (call-args call))
1038 (src (tree-il-src call)))
1039 (call-with-values (lambda () (arities proc))
1040 (lambda (name arities)
1041 (define matches?
1042 (find (lambda (arity)
1043 (pmatch arity
1044 ((,req ,opt ,rest? ,kw ,aok?)
1045 (let ((args (if (pair? kw)
1046 (filter-keyword-args kw aok? args)
1047 args)))
1048 (if (and req opt)
1049 (let ((count (length args)))
1050 (and (>= count req)
1051 (or rest?
1052 (<= count (+ req opt)))))
1053 #t)))
1054 (else #t)))
1055 arities))
1056
1057 (if (not matches?)
1058 (warning 'arity-mismatch src
1059 (or name (with-output-to-string (lambda () (write proc))))
1060 lexical?)))))
1061 #t)
1062
1063 (define arity-analysis
1064 ;; Report arity mismatches in the given tree.
1065 (make-tree-analysis
1066 (lambda (x info env locs)
1067 ;; X is a leaf.
1068 info)
1069 (lambda (x info env locs)
1070 ;; Down into X.
1071 (define (extend lexical-name val info)
1072 ;; If VAL is a lambda, add NAME to the lexical-lambdas of INFO.
1073 (let ((toplevel-calls (toplevel-procedure-calls info))
1074 (lexical-lambdas (lexical-lambdas info))
1075 (toplevel-lambdas (toplevel-lambdas info)))
1076 (record-case val
1077 ((<lambda> body)
1078 (make-arity-info toplevel-calls
1079 (vhash-consq lexical-name val
1080 lexical-lambdas)
1081 toplevel-lambdas))
1082 ((<lexical-ref> gensym)
1083 ;; lexical alias
1084 (let ((val* (vhash-assq gensym lexical-lambdas)))
1085 (if (pair? val*)
1086 (extend lexical-name (cdr val*) info)
1087 info)))
1088 ((<toplevel-ref> name)
1089 ;; top-level alias
1090 (make-arity-info toplevel-calls
1091 (vhash-consq lexical-name val
1092 lexical-lambdas)
1093 toplevel-lambdas))
1094 (else info))))
1095
1096 (let ((toplevel-calls (toplevel-procedure-calls info))
1097 (lexical-lambdas (lexical-lambdas info))
1098 (toplevel-lambdas (toplevel-lambdas info)))
1099
1100 (record-case x
1101 ((<toplevel-define> name exp)
1102 (record-case exp
1103 ((<lambda> body)
1104 (make-arity-info toplevel-calls
1105 lexical-lambdas
1106 (vhash-consq name exp toplevel-lambdas)))
1107 ((<toplevel-ref> name)
1108 ;; alias for another toplevel
1109 (let ((proc (vhash-assq name toplevel-lambdas)))
1110 (make-arity-info toplevel-calls
1111 lexical-lambdas
1112 (vhash-consq (toplevel-define-name x)
1113 (if (pair? proc)
1114 (cdr proc)
1115 exp)
1116 toplevel-lambdas))))
1117 (else info)))
1118 ((<let> gensyms vals)
1119 (fold extend info gensyms vals))
1120 ((<letrec> gensyms vals)
1121 (fold extend info gensyms vals))
1122 ((<fix> gensyms vals)
1123 (fold extend info gensyms vals))
1124
1125 ((<call> proc args src)
1126 (record-case proc
1127 ((<lambda> body)
1128 (validate-arity proc x #t)
1129 info)
1130 ((<toplevel-ref> name)
1131 (make-arity-info (vhash-consq name x toplevel-calls)
1132 lexical-lambdas
1133 toplevel-lambdas))
1134 ((<lexical-ref> gensym)
1135 (let ((proc (vhash-assq gensym lexical-lambdas)))
1136 (if (pair? proc)
1137 (record-case (cdr proc)
1138 ((<toplevel-ref> name)
1139 ;; alias to toplevel
1140 (make-arity-info (vhash-consq name x toplevel-calls)
1141 lexical-lambdas
1142 toplevel-lambdas))
1143 (else
1144 (validate-arity (cdr proc) x #t)
1145 info))
1146
1147 ;; If GENSYM wasn't found, it may be because it's an
1148 ;; argument of the procedure being compiled.
1149 info)))
1150 (else info)))
1151 (else info))))
1152
1153 (lambda (x info env locs)
1154 ;; Up from X.
1155 (define (shrink name val info)
1156 ;; Remove NAME from the lexical-lambdas of INFO.
1157 (let ((toplevel-calls (toplevel-procedure-calls info))
1158 (lexical-lambdas (lexical-lambdas info))
1159 (toplevel-lambdas (toplevel-lambdas info)))
1160 (make-arity-info toplevel-calls
1161 (if (vhash-assq name lexical-lambdas)
1162 (vlist-tail lexical-lambdas)
1163 lexical-lambdas)
1164 toplevel-lambdas)))
1165
1166 (let ((toplevel-calls (toplevel-procedure-calls info))
1167 (lexical-lambdas (lexical-lambdas info))
1168 (toplevel-lambdas (toplevel-lambdas info)))
1169 (record-case x
1170 ((<let> gensyms vals)
1171 (fold shrink info gensyms vals))
1172 ((<letrec> gensyms vals)
1173 (fold shrink info gensyms vals))
1174 ((<fix> gensyms vals)
1175 (fold shrink info gensyms vals))
1176
1177 (else info))))
1178
1179 (lambda (result env)
1180 ;; Post-processing: check all top-level procedure calls that have been
1181 ;; encountered.
1182 (let ((toplevel-calls (toplevel-procedure-calls result))
1183 (toplevel-lambdas (toplevel-lambdas result)))
1184 (vlist-for-each
1185 (lambda (name+call)
1186 (let* ((name (car name+call))
1187 (call (cdr name+call))
1188 (proc
1189 (or (and=> (vhash-assq name toplevel-lambdas) cdr)
1190 (and (module? env)
1191 (false-if-exception
1192 (module-ref env name)))))
1193 (proc*
1194 ;; handle toplevel aliases
1195 (if (toplevel-ref? proc)
1196 (let ((name (toplevel-ref-name proc)))
1197 (and (module? env)
1198 (false-if-exception
1199 (module-ref env name))))
1200 proc)))
1201 (if (or (lambda? proc*) (procedure? proc*))
1202 (validate-arity proc* call (lambda? proc*)))))
1203 toplevel-calls)))
1204
1205 (make-arity-info vlist-null vlist-null vlist-null)))
1206
1207 \f
1208 ;;;
1209 ;;; `format' argument analysis.
1210 ;;;
1211
1212 (define &syntax-error
1213 ;; The `throw' key for syntax errors.
1214 (gensym "format-string-syntax-error"))
1215
1216 (define (format-string-argument-count fmt)
1217 ;; Return the minimum and maxium number of arguments that should
1218 ;; follow format string FMT (or, ahem, a good estimate thereof) or
1219 ;; `any' if the format string can be followed by any number of
1220 ;; arguments.
1221
1222 (define (drop-group chars end)
1223 ;; Drop characters from CHARS until "~END" is encountered.
1224 (let loop ((chars chars)
1225 (tilde? #f))
1226 (if (null? chars)
1227 (throw &syntax-error 'unterminated-iteration)
1228 (if tilde?
1229 (if (eq? (car chars) end)
1230 (cdr chars)
1231 (loop (cdr chars) #f))
1232 (if (eq? (car chars) #\~)
1233 (loop (cdr chars) #t)
1234 (loop (cdr chars) #f))))))
1235
1236 (define (digit? char)
1237 ;; Return true if CHAR is a digit, #f otherwise.
1238 (memq char '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
1239
1240 (define (previous-number chars)
1241 ;; Return the previous series of digits found in CHARS.
1242 (let ((numbers (take-while digit? chars)))
1243 (and (not (null? numbers))
1244 (string->number (list->string (reverse numbers))))))
1245
1246 (let loop ((chars (string->list fmt))
1247 (state 'literal)
1248 (params '())
1249 (conditions '())
1250 (end-group #f)
1251 (min-count 0)
1252 (max-count 0))
1253 (if (null? chars)
1254 (if end-group
1255 (throw &syntax-error 'unterminated-conditional)
1256 (values min-count max-count))
1257 (case state
1258 ((tilde)
1259 (case (car chars)
1260 ((#\~ #\% #\& #\t #\_ #\newline #\( #\))
1261 (loop (cdr chars) 'literal '()
1262 conditions end-group
1263 min-count max-count))
1264 ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\, #\: #\@)
1265 (loop (cdr chars)
1266 'tilde (cons (car chars) params)
1267 conditions end-group
1268 min-count max-count))
1269 ((#\v #\V) (loop (cdr chars)
1270 'tilde (cons (car chars) params)
1271 conditions end-group
1272 (+ 1 min-count)
1273 (+ 1 max-count)))
1274 ((#\[)
1275 (loop chars 'literal '() '()
1276 (let ((selector (previous-number params))
1277 (at? (memq #\@ params)))
1278 (lambda (chars conds)
1279 ;; end of group
1280 (let ((mins (map car conds))
1281 (maxs (map cdr conds))
1282 (sel? (and selector
1283 (< selector (length conds)))))
1284 (if (and (every number? mins)
1285 (every number? maxs))
1286 (loop chars 'literal '() conditions end-group
1287 (+ min-count
1288 (if sel?
1289 (car (list-ref conds selector))
1290 (+ (if at? 0 1)
1291 (if (null? mins)
1292 0
1293 (apply min mins)))))
1294 (+ max-count
1295 (if sel?
1296 (cdr (list-ref conds selector))
1297 (+ (if at? 0 1)
1298 (if (null? maxs)
1299 0
1300 (apply max maxs))))))
1301 (values 'any 'any))))) ;; XXX: approximation
1302 0 0))
1303 ((#\;)
1304 (if end-group
1305 (loop (cdr chars) 'literal '()
1306 (cons (cons min-count max-count) conditions)
1307 end-group
1308 0 0)
1309 (throw &syntax-error 'unexpected-semicolon)))
1310 ((#\])
1311 (if end-group
1312 (end-group (cdr chars)
1313 (reverse (cons (cons min-count max-count)
1314 conditions)))
1315 (throw &syntax-error 'unexpected-conditional-termination)))
1316 ((#\{) (if (memq #\@ params)
1317 (values min-count 'any)
1318 (loop (drop-group (cdr chars) #\})
1319 'literal '()
1320 conditions end-group
1321 (+ 1 min-count) (+ 1 max-count))))
1322 ((#\*) (if (memq #\@ params)
1323 (values 'any 'any) ;; it's unclear what to do here
1324 (loop (cdr chars)
1325 'literal '()
1326 conditions end-group
1327 (+ (or (previous-number params) 1)
1328 min-count)
1329 (+ (or (previous-number params) 1)
1330 max-count))))
1331 ((#\? #\k)
1332 ;; We don't have enough info to determine the exact number
1333 ;; of args, but we could determine a lower bound (TODO).
1334 (values 'any 'any))
1335 (else (loop (cdr chars) 'literal '()
1336 conditions end-group
1337 (+ 1 min-count) (+ 1 max-count)))))
1338 ((literal)
1339 (case (car chars)
1340 ((#\~) (loop (cdr chars) 'tilde '()
1341 conditions end-group
1342 min-count max-count))
1343 (else (loop (cdr chars) 'literal '()
1344 conditions end-group
1345 min-count max-count))))
1346 (else (error "computer bought the farm" state))))))
1347
1348 (define (const-fmt x)
1349 ;; Return the literal format pattern for X, or #f.
1350 (match x
1351 (($ <const> _ exp)
1352 exp)
1353 (($ <call> _
1354 (or ($ <toplevel-ref> _ '_) ($ <module-ref> _ '_))
1355 (($ <const> _ (and (? string?) fmt))))
1356 ;; Gettexted literals, like `(_ "foo")'.
1357 fmt)
1358 (_ #f)))
1359
1360 (define format-analysis
1361 ;; Report arity mismatches in the given tree.
1362 (make-tree-analysis
1363 (lambda (x _ env locs)
1364 ;; X is a leaf.
1365 #t)
1366
1367 (lambda (x _ env locs)
1368 ;; Down into X.
1369 (define (check-format-args args loc)
1370 (pmatch args
1371 ((,port ,fmt . ,rest)
1372 (guard (const-fmt fmt))
1373 (if (and (const? port)
1374 (not (boolean? (const-exp port))))
1375 (warning 'format loc 'wrong-port (const-exp port)))
1376 (let ((fmt (const-fmt fmt))
1377 (count (length rest)))
1378 (if (string? fmt)
1379 (catch &syntax-error
1380 (lambda ()
1381 (let-values (((min max)
1382 (format-string-argument-count fmt)))
1383 (and min max
1384 (or (and (or (eq? min 'any) (>= count min))
1385 (or (eq? max 'any) (<= count max)))
1386 (warning 'format loc 'wrong-format-arg-count
1387 fmt min max count)))))
1388 (lambda (_ key)
1389 (warning 'format loc 'syntax-error key fmt)))
1390 (warning 'format loc 'wrong-format-string fmt))))
1391 ((,port ,fmt . ,rest)
1392 (if (and (const? port)
1393 (not (boolean? (const-exp port))))
1394 (warn 'format loc 'wrong-port (const-exp port)))
1395 ;; Warn on non-literal format strings, unless they refer to a
1396 ;; lexical variable named "fmt".
1397 (if (record-case fmt
1398 ((<lexical-ref> name)
1399 (not (eq? name 'fmt)))
1400 (else #t))
1401 (warning 'format loc 'non-literal-format-string)))
1402 (else
1403 (warning 'format loc 'wrong-num-args (length args)))))
1404
1405 (define (resolve-toplevel name)
1406 (and (module? env)
1407 (false-if-exception (module-ref env name))))
1408
1409 (match x
1410 (($ <call> src ($ <toplevel-ref> _ name) args)
1411 (let ((proc (resolve-toplevel name)))
1412 (and (or (eq? proc format)
1413 (eq? proc (@ (ice-9 format) format)))
1414 (check-format-args args (or src (find pair? locs))))))
1415 (_ #t))
1416 #t)
1417
1418 (lambda (x _ env locs)
1419 ;; Up from X.
1420 #t)
1421
1422 (lambda (_ env)
1423 ;; Post-processing.
1424 #t)
1425
1426 #t))