Merge commit '58147d67806e1f54c447d7eabac35b1a5086c3a6'
[bpt/guile.git] / module / language / tree-il / analyze.scm
1 ;;; TREE-IL -> GLIL compiler
2
3 ;; Copyright (C) 2001, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
9 ;;;;
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Code:
20
21 (define-module (language tree-il analyze)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-9)
24 #:use-module (srfi srfi-11)
25 #:use-module (srfi srfi-26)
26 #:use-module (ice-9 vlist)
27 #:use-module (ice-9 match)
28 #:use-module (system base syntax)
29 #:use-module (system base message)
30 #:use-module (system vm program)
31 #:use-module (language tree-il)
32 #:use-module (system base pmatch)
33 #:export (analyze-lexicals
34 analyze-tree
35 unused-variable-analysis
36 unused-toplevel-analysis
37 unbound-variable-analysis
38 arity-analysis
39 format-analysis))
40
41 ;; Allocation is the process of assigning storage locations for lexical
42 ;; variables. A lexical variable has a distinct "address", or storage
43 ;; location, for each procedure in which it is referenced.
44 ;;
45 ;; A variable is "local", i.e., allocated on the stack, if it is
46 ;; referenced from within the procedure that defined it. Otherwise it is
47 ;; a "closure" variable. For example:
48 ;;
49 ;; (lambda (a) a) ; a will be local
50 ;; `a' is local to the procedure.
51 ;;
52 ;; (lambda (a) (lambda () a))
53 ;; `a' is local to the outer procedure, but a closure variable with
54 ;; respect to the inner procedure.
55 ;;
56 ;; If a variable is ever assigned, it needs to be heap-allocated
57 ;; ("boxed"). This is so that closures and continuations capture the
58 ;; variable's identity, not just one of the values it may have over the
59 ;; course of program execution. If the variable is never assigned, there
60 ;; is no distinction between value and identity, so closing over its
61 ;; identity (whether through closures or continuations) can make a copy
62 ;; of its value instead.
63 ;;
64 ;; Local variables are stored on the stack within a procedure's call
65 ;; frame. Their index into the stack is determined from their linear
66 ;; postion within a procedure's binding path:
67 ;; (let (0 1)
68 ;; (let (2 3) ...)
69 ;; (let (2) ...))
70 ;; (let (2 3 4) ...))
71 ;; etc.
72 ;;
73 ;; This algorithm has the problem that variables are only allocated
74 ;; indices at the end of the binding path. If variables bound early in
75 ;; the path are not used in later portions of the path, their indices
76 ;; will not be recycled. This problem is particularly egregious in the
77 ;; expansion of `or':
78 ;;
79 ;; (or x y z)
80 ;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
81 ;;
82 ;; As you can see, the `a' binding is only used in the ephemeral
83 ;; `consequent' clause of the first `if', but its index would be
84 ;; reserved for the whole of the `or' expansion. So we have a hack for
85 ;; this specific case. A proper solution would be some sort of liveness
86 ;; analysis, and not our linear allocation algorithm.
87 ;;
88 ;; Closure variables are captured when a closure is created, and stored in a
89 ;; vector inline to the closure object itself. Each closure variable has a
90 ;; unique index into that vector.
91 ;;
92 ;; There is one more complication. Procedures bound by <fix> may, in
93 ;; some cases, be rendered inline to their parent procedure. That is to
94 ;; say,
95 ;;
96 ;; (letrec ((lp (lambda () (lp)))) (lp))
97 ;; => (fix ((lp (lambda () (lp)))) (lp))
98 ;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
99 ;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
100 ;;
101 ;; The upshot is that we don't have to allocate any space for the `lp'
102 ;; closure at all, as it can be rendered inline as a loop. So there is
103 ;; another kind of allocation, "label allocation", in which the
104 ;; procedure is simply a label, placed at the start of the lambda body.
105 ;; The label is the gensym under which the lambda expression is bound.
106 ;;
107 ;; The analyzer checks to see that the label is called with the correct
108 ;; number of arguments. Calls to labels compile to rename + goto.
109 ;; Lambda, the ultimate goto!
110 ;;
111 ;;
112 ;; The return value of `analyze-lexicals' is a hash table, the
113 ;; "allocation".
114 ;;
115 ;; The allocation maps gensyms -- recall that each lexically bound
116 ;; variable has a unique gensym -- to storage locations ("addresses").
117 ;; Since one gensym may have many storage locations, if it is referenced
118 ;; in many procedures, it is a two-level map.
119 ;;
120 ;; The allocation also stored information on how many local variables
121 ;; need to be allocated for each procedure, lexicals that have been
122 ;; translated into labels, and information on what free variables to
123 ;; capture from its lexical parent procedure.
124 ;;
125 ;; In addition, we have a conflation: while we're traversing the code,
126 ;; recording information to pass to the compiler, we take the
127 ;; opportunity to generate labels for each lambda-case clause, so that
128 ;; generated code can skip argument checks at runtime if they match at
129 ;; compile-time.
130 ;;
131 ;; Also, while we're a-traversing and an-allocating, we check prompt
132 ;; handlers to see if the "continuation" argument is used. If not, we
133 ;; mark the prompt as being "escape-only". This allows us to implement
134 ;; `catch' and `throw' using `prompt' and `control', but without causing
135 ;; a continuation to be reified. Heh heh.
136 ;;
137 ;; That is:
138 ;;
139 ;; sym -> {lambda -> address}
140 ;; lambda -> (labels . free-locs)
141 ;; lambda-case -> (gensym . nlocs)
142 ;; prompt -> escape-only?
143 ;;
144 ;; address ::= (local? boxed? . index)
145 ;; labels ::= ((sym . lambda) ...)
146 ;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
147 ;; free variable addresses are relative to parent proc.
148
149 (define (make-hashq k v)
150 (let ((res (make-hash-table)))
151 (hashq-set! res k v)
152 res))
153
154 (define (analyze-lexicals x)
155 ;; bound-vars: lambda -> (sym ...)
156 ;; all identifiers bound within a lambda
157 (define bound-vars (make-hash-table))
158 ;; free-vars: lambda -> (sym ...)
159 ;; all identifiers referenced in a lambda, but not bound
160 ;; NB, this includes identifiers referenced by contained lambdas
161 (define free-vars (make-hash-table))
162 ;; assigned: sym -> #t
163 ;; variables that are assigned
164 (define assigned (make-hash-table))
165 ;; refcounts: sym -> count
166 ;; allows us to detect the or-expansion in O(1) time
167 (define refcounts (make-hash-table))
168 ;; labels: sym -> lambda
169 ;; for determining if fixed-point procedures can be rendered as
170 ;; labels.
171 (define labels (make-hash-table))
172
173 ;; returns variables referenced in expr
174 (define (analyze! x proc labels-in-proc tail? tail-call-args)
175 (define (step y) (analyze! y proc '() #f #f))
176 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
177 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
178 (and tail? args)))
179 (define (recur/labels x new-proc labels)
180 (analyze! x new-proc (append labels labels-in-proc) #t #f))
181 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
182 (record-case x
183 ((<call> proc args)
184 (apply lset-union eq? (step-tail-call proc args)
185 (map step args)))
186
187 ((<primcall> args)
188 (apply lset-union eq? (map step args)))
189
190 ((<conditional> test consequent alternate)
191 (lset-union eq? (step test) (step-tail consequent) (step-tail alternate)))
192
193 ((<lexical-ref> gensym)
194 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
195 (if (not (and tail-call-args
196 (memq gensym labels-in-proc)
197 (let ((p (hashq-ref labels gensym)))
198 (and p
199 (let lp ((c (lambda-body p)))
200 (and c (lambda-case? c)
201 (or
202 ;; for now prohibit optional &
203 ;; keyword arguments; can relax this
204 ;; restriction later
205 (and (= (length (lambda-case-req c))
206 (length tail-call-args))
207 (not (lambda-case-opt c))
208 (not (lambda-case-kw c))
209 (not (lambda-case-rest c)))
210 (lp (lambda-case-alternate c)))))))))
211 (hashq-set! labels gensym #f))
212 (list gensym))
213
214 ((<lexical-set> gensym exp)
215 (hashq-set! assigned gensym #t)
216 (hashq-set! labels gensym #f)
217 (lset-adjoin eq? (step exp) gensym))
218
219 ((<module-set> exp)
220 (step exp))
221
222 ((<toplevel-set> exp)
223 (step exp))
224
225 ((<toplevel-define> exp)
226 (step exp))
227
228 ((<seq> head tail)
229 (lset-union eq? (step head) (step-tail tail)))
230
231 ((<lambda> body)
232 ;; order is important here
233 (hashq-set! bound-vars x '())
234 (let ((free (recur body x)))
235 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
236 (hashq-set! free-vars x free)
237 free))
238
239 ((<lambda-case> opt kw inits gensyms body alternate)
240 (hashq-set! bound-vars proc
241 (append (reverse gensyms) (hashq-ref bound-vars proc)))
242 (lset-union
243 eq?
244 (lset-difference eq?
245 (lset-union eq?
246 (apply lset-union eq? (map step inits))
247 (step-tail body))
248 gensyms)
249 (if alternate (step-tail alternate) '())))
250
251 ((<let> gensyms vals body)
252 (hashq-set! bound-vars proc
253 (append (reverse gensyms) (hashq-ref bound-vars proc)))
254 (lset-difference eq?
255 (apply lset-union eq? (step-tail body) (map step vals))
256 gensyms))
257
258 ((<letrec> gensyms vals body)
259 (hashq-set! bound-vars proc
260 (append (reverse gensyms) (hashq-ref bound-vars proc)))
261 (for-each (lambda (sym) (hashq-set! assigned sym #t)) gensyms)
262 (lset-difference eq?
263 (apply lset-union eq? (step-tail body) (map step vals))
264 gensyms))
265
266 ((<fix> gensyms vals body)
267 ;; Try to allocate these procedures as labels.
268 (for-each (lambda (sym val) (hashq-set! labels sym val))
269 gensyms vals)
270 (hashq-set! bound-vars proc
271 (append (reverse gensyms) (hashq-ref bound-vars proc)))
272 ;; Step into subexpressions.
273 (let* ((var-refs
274 (map
275 ;; Since we're trying to label-allocate the lambda,
276 ;; pretend it's not a closure, and just recurse into its
277 ;; body directly. (Otherwise, recursing on a closure
278 ;; that references one of the fix's bound vars would
279 ;; prevent label allocation.)
280 (lambda (x)
281 (record-case x
282 ((<lambda> body)
283 ;; just like the closure case, except here we use
284 ;; recur/labels instead of recur
285 (hashq-set! bound-vars x '())
286 (let ((free (recur/labels body x gensyms)))
287 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
288 (hashq-set! free-vars x free)
289 free))))
290 vals))
291 (vars-with-refs (map cons gensyms var-refs))
292 (body-refs (recur/labels body proc gensyms)))
293 (define (delabel-dependents! sym)
294 (let ((refs (assq-ref vars-with-refs sym)))
295 (if refs
296 (for-each (lambda (sym)
297 (if (hashq-ref labels sym)
298 (begin
299 (hashq-set! labels sym #f)
300 (delabel-dependents! sym))))
301 refs))))
302 ;; Stepping into the lambdas and the body might have made some
303 ;; procedures not label-allocatable -- which might have
304 ;; knock-on effects. For example:
305 ;; (fix ((a (lambda () (b)))
306 ;; (b (lambda () a)))
307 ;; (a))
308 ;; As far as `a' is concerned, both `a' and `b' are
309 ;; label-allocatable. But `b' references `a' not in a proc-tail
310 ;; position, which makes `a' not label-allocatable. The
311 ;; knock-on effect is that, when back-propagating this
312 ;; information to `a', `b' will also become not
313 ;; label-allocatable, as it is referenced within `a', which is
314 ;; allocated as a closure. This is a transitive relationship.
315 (for-each (lambda (sym)
316 (if (not (hashq-ref labels sym))
317 (delabel-dependents! sym)))
318 gensyms)
319 ;; Now lift bound variables with label-allocated lambdas to the
320 ;; parent procedure.
321 (for-each
322 (lambda (sym val)
323 (if (hashq-ref labels sym)
324 ;; Remove traces of the label-bound lambda. The free
325 ;; vars will propagate up via the return val.
326 (begin
327 (hashq-set! bound-vars proc
328 (append (hashq-ref bound-vars val)
329 (hashq-ref bound-vars proc)))
330 (hashq-remove! bound-vars val)
331 (hashq-remove! free-vars val))))
332 gensyms vals)
333 (lset-difference eq?
334 (apply lset-union eq? body-refs var-refs)
335 gensyms)))
336
337 ((<let-values> exp body)
338 (lset-union eq? (step exp) (step body)))
339
340 ((<prompt> escape-only? tag body handler)
341 (match handler
342 (($ <lambda> _ _ handler)
343 (lset-union eq? (step tag) (step body) (step-tail handler)))))
344
345 ((<abort> tag args tail)
346 (apply lset-union eq? (step tag) (step tail) (map step args)))
347
348 (else '())))
349
350 ;; allocation: sym -> {lambda -> address}
351 ;; lambda -> (labels . free-locs)
352 ;; lambda-case -> (gensym . nlocs)
353 (define allocation (make-hash-table))
354
355 (define (allocate! x proc n)
356 (define (recur y) (allocate! y proc n))
357 (record-case x
358 ((<call> proc args)
359 (apply max (recur proc) (map recur args)))
360
361 ((<primcall> args)
362 (apply max n (map recur args)))
363
364 ((<conditional> test consequent alternate)
365 (max (recur test) (recur consequent) (recur alternate)))
366
367 ((<lexical-set> exp)
368 (recur exp))
369
370 ((<module-set> exp)
371 (recur exp))
372
373 ((<toplevel-set> exp)
374 (recur exp))
375
376 ((<toplevel-define> exp)
377 (recur exp))
378
379 ((<seq> head tail)
380 (max (recur head)
381 (recur tail)))
382
383 ((<lambda> body)
384 ;; allocate closure vars in order
385 (let lp ((c (hashq-ref free-vars x)) (n 0))
386 (if (pair? c)
387 (begin
388 (hashq-set! (hashq-ref allocation (car c))
389 x
390 `(#f ,(hashq-ref assigned (car c)) . ,n))
391 (lp (cdr c) (1+ n)))))
392
393 (let ((nlocs (allocate! body x 0))
394 (free-addresses
395 (map (lambda (v)
396 (hashq-ref (hashq-ref allocation v) proc))
397 (hashq-ref free-vars x)))
398 (labels (filter cdr
399 (map (lambda (sym)
400 (cons sym (hashq-ref labels sym)))
401 (hashq-ref bound-vars x)))))
402 ;; set procedure allocations
403 (hashq-set! allocation x (cons labels free-addresses)))
404 n)
405
406 ((<lambda-case> opt kw inits gensyms body alternate)
407 (max
408 (let lp ((gensyms gensyms) (n n))
409 (if (null? gensyms)
410 (let ((nlocs (apply
411 max
412 (allocate! body proc n)
413 ;; inits not logically at the end, but they
414 ;; are the list...
415 (map (lambda (x) (allocate! x proc n)) inits))))
416 ;; label and nlocs for the case
417 (hashq-set! allocation x (cons (gensym ":LCASE") nlocs))
418 nlocs)
419 (begin
420 (hashq-set! allocation (car gensyms)
421 (make-hashq
422 proc `(#t ,(hashq-ref assigned (car gensyms)) . ,n)))
423 (lp (cdr gensyms) (1+ n)))))
424 (if alternate (allocate! alternate proc n) n)))
425
426 ((<let> gensyms vals body)
427 (let ((nmax (apply max (map recur vals))))
428 (cond
429 ;; the `or' hack
430 ((and (conditional? body)
431 (= (length gensyms) 1)
432 (let ((v (car gensyms)))
433 (and (not (hashq-ref assigned v))
434 (= (hashq-ref refcounts v 0) 2)
435 (lexical-ref? (conditional-test body))
436 (eq? (lexical-ref-gensym (conditional-test body)) v)
437 (lexical-ref? (conditional-consequent body))
438 (eq? (lexical-ref-gensym (conditional-consequent body)) v))))
439 (hashq-set! allocation (car gensyms)
440 (make-hashq proc `(#t #f . ,n)))
441 ;; the 1+ for this var
442 (max nmax (1+ n) (allocate! (conditional-alternate body) proc n)))
443 (else
444 (let lp ((gensyms gensyms) (n n))
445 (if (null? gensyms)
446 (max nmax (allocate! body proc n))
447 (let ((v (car gensyms)))
448 (hashq-set!
449 allocation v
450 (make-hashq proc
451 `(#t ,(hashq-ref assigned v) . ,n)))
452 (lp (cdr gensyms) (1+ n)))))))))
453
454 ((<letrec> gensyms vals body)
455 (let lp ((gensyms gensyms) (n n))
456 (if (null? gensyms)
457 (let ((nmax (apply max
458 (map (lambda (x)
459 (allocate! x proc n))
460 vals))))
461 (max nmax (allocate! body proc n)))
462 (let ((v (car gensyms)))
463 (hashq-set!
464 allocation v
465 (make-hashq proc
466 `(#t ,(hashq-ref assigned v) . ,n)))
467 (lp (cdr gensyms) (1+ n))))))
468
469 ((<fix> gensyms vals body)
470 (let lp ((in gensyms) (n n))
471 (if (null? in)
472 (let lp ((gensyms gensyms) (vals vals) (nmax n))
473 (cond
474 ((null? gensyms)
475 (max nmax (allocate! body proc n)))
476 ((hashq-ref labels (car gensyms))
477 ;; allocate lambda body inline to proc
478 (lp (cdr gensyms)
479 (cdr vals)
480 (record-case (car vals)
481 ((<lambda> body)
482 (max nmax (allocate! body proc n))))))
483 (else
484 ;; allocate closure
485 (lp (cdr gensyms)
486 (cdr vals)
487 (max nmax (allocate! (car vals) proc n))))))
488
489 (let ((v (car in)))
490 (cond
491 ((hashq-ref assigned v)
492 (error "fixpoint procedures may not be assigned" x))
493 ((hashq-ref labels v)
494 ;; no binding, it's a label
495 (lp (cdr in) n))
496 (else
497 ;; allocate closure binding
498 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
499 (lp (cdr in) (1+ n))))))))
500
501 ((<let-values> exp body)
502 (max (recur exp) (recur body)))
503
504 ((<prompt> escape-only? tag body handler)
505 (match handler
506 (($ <lambda> _ _ handler)
507 (max (recur tag) (recur body) (recur handler)))))
508
509 ((<abort> tag args tail)
510 (apply max (recur tag) (recur tail) (map recur args)))
511
512 (else n)))
513
514 (analyze! x #f '() #t #f)
515 (allocate! x #f 0)
516
517 allocation)
518
519 \f
520 ;;;
521 ;;; Tree analyses for warnings.
522 ;;;
523
524 (define-record-type <tree-analysis>
525 (make-tree-analysis down up post init)
526 tree-analysis?
527 (down tree-analysis-down) ;; (lambda (x result env locs) ...)
528 (up tree-analysis-up) ;; (lambda (x result env locs) ...)
529 (post tree-analysis-post) ;; (lambda (result env) ...)
530 (init tree-analysis-init)) ;; arbitrary value
531
532 (define (analyze-tree analyses tree env)
533 "Run all tree analyses listed in ANALYSES on TREE for ENV, using
534 `tree-il-fold'. Return TREE. The down and up procedures of each
535 analysis are passed a ``location stack', which is the stack of
536 `tree-il-src' values for each parent tree (a list); it can be used to
537 approximate source location when accurate information is missing from a
538 given `tree-il' element."
539
540 (define (traverse proc update-locs)
541 ;; Return a tree traversing procedure that returns a list of analysis
542 ;; results prepended by the location stack.
543 (lambda (x results)
544 (let ((locs (update-locs x (car results))))
545 (cons locs ;; the location stack
546 (map (lambda (analysis result)
547 ((proc analysis) x result env locs))
548 analyses
549 (cdr results))))))
550
551 ;; Extending and shrinking the location stack.
552 (define (extend-locs x locs) (cons (tree-il-src x) locs))
553 (define (shrink-locs x locs) (cdr locs))
554
555 (let ((results
556 (tree-il-fold (traverse tree-analysis-down extend-locs)
557 (traverse tree-analysis-up shrink-locs)
558 (cons '() ;; empty location stack
559 (map tree-analysis-init analyses))
560 tree)))
561
562 (for-each (lambda (analysis result)
563 ((tree-analysis-post analysis) result env))
564 analyses
565 (cdr results)))
566
567 tree)
568
569 \f
570 ;;;
571 ;;; Unused variable analysis.
572 ;;;
573
574 ;; <binding-info> records are used during tree traversals in
575 ;; `unused-variable-analysis'. They contain a list of the local vars
576 ;; currently in scope, and a list of locals vars that have been referenced.
577 (define-record-type <binding-info>
578 (make-binding-info vars refs)
579 binding-info?
580 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
581 (refs binding-info-refs)) ;; (GENSYM ...)
582
583 (define (gensym? sym)
584 ;; Return #t if SYM is (likely) a generated symbol.
585 (string-any #\space (symbol->string sym)))
586
587 (define unused-variable-analysis
588 ;; Report unused variables in the given tree.
589 (make-tree-analysis
590 (lambda (x info env locs)
591 ;; Going down into X: extend INFO's variable list
592 ;; accordingly.
593 (let ((refs (binding-info-refs info))
594 (vars (binding-info-vars info))
595 (src (tree-il-src x)))
596 (define (extend inner-vars inner-names)
597 (fold (lambda (var name vars)
598 (vhash-consq var (list name src) vars))
599 vars
600 inner-vars
601 inner-names))
602
603 (record-case x
604 ((<lexical-ref> gensym)
605 (make-binding-info vars (vhash-consq gensym #t refs)))
606 ((<lexical-set> gensym)
607 (make-binding-info vars (vhash-consq gensym #t refs)))
608 ((<lambda-case> req opt inits rest kw gensyms)
609 (let ((names `(,@req
610 ,@(or opt '())
611 ,@(if rest (list rest) '())
612 ,@(if kw (map cadr (cdr kw)) '()))))
613 (make-binding-info (extend gensyms names) refs)))
614 ((<let> gensyms names)
615 (make-binding-info (extend gensyms names) refs))
616 ((<letrec> gensyms names)
617 (make-binding-info (extend gensyms names) refs))
618 ((<fix> gensyms names)
619 (make-binding-info (extend gensyms names) refs))
620 (else info))))
621
622 (lambda (x info env locs)
623 ;; Leaving X's scope: shrink INFO's variable list
624 ;; accordingly and reported unused nested variables.
625 (let ((refs (binding-info-refs info))
626 (vars (binding-info-vars info)))
627 (define (shrink inner-vars refs)
628 (vlist-for-each
629 (lambda (var)
630 (let ((gensym (car var)))
631 ;; Don't report lambda parameters as unused.
632 (if (and (memq gensym inner-vars)
633 (not (vhash-assq gensym refs))
634 (not (lambda-case? x)))
635 (let ((name (cadr var))
636 ;; We can get approximate source location by going up
637 ;; the LOCS location stack.
638 (loc (or (caddr var)
639 (find pair? locs))))
640 (if (and (not (gensym? name))
641 (not (eq? name '_)))
642 (warning 'unused-variable loc name))))))
643 vars)
644 (vlist-drop vars (length inner-vars)))
645
646 ;; For simplicity, we leave REFS untouched, i.e., with
647 ;; names of variables that are now going out of scope.
648 ;; It doesn't hurt as these are unique names, it just
649 ;; makes REFS unnecessarily fat.
650 (record-case x
651 ((<lambda-case> gensyms)
652 (make-binding-info (shrink gensyms refs) refs))
653 ((<let> gensyms)
654 (make-binding-info (shrink gensyms refs) refs))
655 ((<letrec> gensyms)
656 (make-binding-info (shrink gensyms refs) refs))
657 ((<fix> gensyms)
658 (make-binding-info (shrink gensyms refs) refs))
659 (else info))))
660
661 (lambda (result env) #t)
662 (make-binding-info vlist-null vlist-null)))
663
664 \f
665 ;;;
666 ;;; Unused top-level variable analysis.
667 ;;;
668
669 ;; <reference-graph> record top-level definitions that are made, references to
670 ;; top-level definitions and their context (the top-level definition in which
671 ;; the reference appears), as well as the current context (the top-level
672 ;; definition we're currently in). The second part (`refs' below) is
673 ;; effectively a graph from which we can determine unused top-level definitions.
674 (define-record-type <reference-graph>
675 (make-reference-graph refs defs toplevel-context)
676 reference-graph?
677 (defs reference-graph-defs) ;; ((NAME . LOC) ...)
678 (refs reference-graph-refs) ;; ((REF-CONTEXT REF ...) ...)
679 (toplevel-context reference-graph-toplevel-context)) ;; NAME | #f
680
681 (define (graph-reachable-nodes root refs reachable)
682 ;; Add to REACHABLE the nodes reachable from ROOT in graph REFS. REFS is a
683 ;; vhash mapping nodes to the list of their children: for instance,
684 ;; ((A -> (B C)) (B -> (A)) (C -> ())) corresponds to
685 ;;
686 ;; ,-------.
687 ;; v |
688 ;; A ----> B
689 ;; |
690 ;; v
691 ;; C
692 ;;
693 ;; REACHABLE is a vhash of nodes known to be otherwise reachable.
694
695 (let loop ((root root)
696 (path vlist-null)
697 (result reachable))
698 (if (or (vhash-assq root path)
699 (vhash-assq root result))
700 result
701 (let* ((children (or (and=> (vhash-assq root refs) cdr) '()))
702 (path (vhash-consq root #t path))
703 (result (fold (lambda (kid result)
704 (loop kid path result))
705 result
706 children)))
707 (fold (lambda (kid result)
708 (vhash-consq kid #t result))
709 result
710 children)))))
711
712 (define (graph-reachable-nodes* roots refs)
713 ;; Return the list of nodes in REFS reachable from the nodes listed in ROOTS.
714 (vlist-fold (lambda (root+true result)
715 (let* ((root (car root+true))
716 (reachable (graph-reachable-nodes root refs result)))
717 (vhash-consq root #t reachable)))
718 vlist-null
719 roots))
720
721 (define (partition* pred vhash)
722 ;; Partition VHASH according to PRED. Return the two resulting vhashes.
723 (let ((result
724 (vlist-fold (lambda (k+v result)
725 (let ((k (car k+v))
726 (v (cdr k+v))
727 (r1 (car result))
728 (r2 (cdr result)))
729 (if (pred k)
730 (cons (vhash-consq k v r1) r2)
731 (cons r1 (vhash-consq k v r2)))))
732 (cons vlist-null vlist-null)
733 vhash)))
734 (values (car result) (cdr result))))
735
736 (define unused-toplevel-analysis
737 ;; Report unused top-level definitions that are not exported.
738 (let ((add-ref-from-context
739 (lambda (graph name)
740 ;; Add an edge CTX -> NAME in GRAPH.
741 (let* ((refs (reference-graph-refs graph))
742 (defs (reference-graph-defs graph))
743 (ctx (reference-graph-toplevel-context graph))
744 (ctx-refs (or (and=> (vhash-assq ctx refs) cdr) '())))
745 (make-reference-graph (vhash-consq ctx (cons name ctx-refs) refs)
746 defs ctx)))))
747 (define (macro-variable? name env)
748 (and (module? env)
749 (let ((var (module-variable env name)))
750 (and var (variable-bound? var)
751 (macro? (variable-ref var))))))
752
753 (make-tree-analysis
754 (lambda (x graph env locs)
755 ;; Going down into X.
756 (let ((ctx (reference-graph-toplevel-context graph))
757 (refs (reference-graph-refs graph))
758 (defs (reference-graph-defs graph)))
759 (record-case x
760 ((<toplevel-ref> name src)
761 (add-ref-from-context graph name))
762 ((<toplevel-define> name src)
763 (let ((refs refs)
764 (defs (vhash-consq name (or src (find pair? locs))
765 defs)))
766 (make-reference-graph refs defs name)))
767 ((<toplevel-set> name src)
768 (add-ref-from-context graph name))
769 (else graph))))
770
771 (lambda (x graph env locs)
772 ;; Leaving X's scope.
773 (record-case x
774 ((<toplevel-define>)
775 (let ((refs (reference-graph-refs graph))
776 (defs (reference-graph-defs graph)))
777 (make-reference-graph refs defs #f)))
778 (else graph)))
779
780 (lambda (graph env)
781 ;; Process the resulting reference graph: determine all private definitions
782 ;; not reachable from any public definition. Macros
783 ;; (syntax-transformers), which are globally bound, never considered
784 ;; unused since we can't tell whether a macro is actually used; in
785 ;; addition, macros are considered roots of the graph since they may use
786 ;; private bindings. FIXME: The `make-syntax-transformer' calls don't
787 ;; contain any literal `toplevel-ref' of the global bindings they use so
788 ;; this strategy fails.
789 (define (exported? name)
790 (if (module? env)
791 (module-variable (module-public-interface env) name)
792 #t))
793
794 (let-values (((public-defs private-defs)
795 (partition* (lambda (name)
796 (or (exported? name)
797 (macro-variable? name env)))
798 (reference-graph-defs graph))))
799 (let* ((roots (vhash-consq #f #t public-defs))
800 (refs (reference-graph-refs graph))
801 (reachable (graph-reachable-nodes* roots refs))
802 (unused (vlist-filter (lambda (name+src)
803 (not (vhash-assq (car name+src)
804 reachable)))
805 private-defs)))
806 (vlist-for-each (lambda (name+loc)
807 (let ((name (car name+loc))
808 (loc (cdr name+loc)))
809 (if (not (gensym? name))
810 (warning 'unused-toplevel loc name))))
811 unused))))
812
813 (make-reference-graph vlist-null vlist-null #f))))
814
815 \f
816 ;;;
817 ;;; Unbound variable analysis.
818 ;;;
819
820 ;; <toplevel-info> records are used during tree traversal in search of
821 ;; possibly unbound variable. They contain a list of references to
822 ;; potentially unbound top-level variables, and a list of the top-level
823 ;; defines that have been encountered.
824 (define-record-type <toplevel-info>
825 (make-toplevel-info refs defs)
826 toplevel-info?
827 (refs toplevel-info-refs) ;; ((VARIABLE-NAME . LOCATION) ...)
828 (defs toplevel-info-defs)) ;; (VARIABLE-NAME ...)
829
830 (define (goops-toplevel-definition proc args env)
831 ;; If call of PROC to ARGS is a GOOPS top-level definition, return
832 ;; the name of the variable being defined; otherwise return #f. This
833 ;; assumes knowledge of the current implementation of `define-class' et al.
834 (define (toplevel-define-arg args)
835 (match args
836 ((($ <const> _ (and (? symbol?) exp)) _)
837 exp)
838 (_ #f)))
839
840 (match proc
841 (($ <module-ref> _ '(oop goops) 'toplevel-define! #f)
842 (toplevel-define-arg args))
843 (($ <toplevel-ref> _ 'toplevel-define!)
844 ;; This may be the result of expanding one of the GOOPS macros within
845 ;; `oop/goops.scm'.
846 (and (eq? env (resolve-module '(oop goops)))
847 (toplevel-define-arg args)))
848 (_ #f)))
849
850 (define unbound-variable-analysis
851 ;; Report possibly unbound variables in the given tree.
852 (make-tree-analysis
853 (lambda (x info env locs)
854 ;; Going down into X.
855 (let* ((refs (toplevel-info-refs info))
856 (defs (toplevel-info-defs info))
857 (src (tree-il-src x)))
858 (define (bound? name)
859 (or (and (module? env)
860 (module-variable env name))
861 (vhash-assq name defs)))
862
863 (record-case x
864 ((<toplevel-ref> name src)
865 (if (bound? name)
866 info
867 (let ((src (or src (find pair? locs))))
868 (make-toplevel-info (vhash-consq name src refs)
869 defs))))
870 ((<toplevel-set> name src)
871 (if (bound? name)
872 (make-toplevel-info refs defs)
873 (let ((src (find pair? locs)))
874 (make-toplevel-info (vhash-consq name src refs)
875 defs))))
876 ((<toplevel-define> name)
877 (make-toplevel-info (vhash-delq name refs)
878 (vhash-consq name #t defs)))
879
880 ((<call> proc args)
881 ;; Check for a dynamic top-level definition, as is
882 ;; done by code expanded from GOOPS macros.
883 (let ((name (goops-toplevel-definition proc args
884 env)))
885 (if (symbol? name)
886 (make-toplevel-info (vhash-delq name refs)
887 (vhash-consq name #t defs))
888 (make-toplevel-info refs defs))))
889 (else
890 (make-toplevel-info refs defs)))))
891
892 (lambda (x info env locs)
893 ;; Leaving X's scope.
894 info)
895
896 (lambda (toplevel env)
897 ;; Post-process the result.
898 (vlist-for-each (lambda (name+loc)
899 (let ((name (car name+loc))
900 (loc (cdr name+loc)))
901 (warning 'unbound-variable loc name)))
902 (vlist-reverse (toplevel-info-refs toplevel))))
903
904 (make-toplevel-info vlist-null vlist-null)))
905
906 \f
907 ;;;
908 ;;; Arity analysis.
909 ;;;
910
911 ;; <arity-info> records contain information about lexical definitions of
912 ;; procedures currently in scope, top-level procedure definitions that have
913 ;; been encountered, and calls to top-level procedures that have been
914 ;; encountered.
915 (define-record-type <arity-info>
916 (make-arity-info toplevel-calls lexical-lambdas toplevel-lambdas)
917 arity-info?
918 (toplevel-calls toplevel-procedure-calls) ;; ((NAME . CALL) ...)
919 (lexical-lambdas lexical-lambdas) ;; ((GENSYM . DEFINITION) ...)
920 (toplevel-lambdas toplevel-lambdas)) ;; ((NAME . DEFINITION) ...)
921
922 (define (validate-arity proc call lexical?)
923 ;; Validate the argument count of CALL, a tree-il call of
924 ;; PROC, emitting a warning in case of argument count mismatch.
925
926 (define (filter-keyword-args keywords allow-other-keys? args)
927 ;; Filter keyword arguments from ARGS and return the resulting list.
928 ;; KEYWORDS is the list of allowed keywords, and ALLOW-OTHER-KEYS?
929 ;; specified whethere keywords not listed in KEYWORDS are allowed.
930 (let loop ((args args)
931 (result '()))
932 (if (null? args)
933 (reverse result)
934 (let ((arg (car args)))
935 (if (and (const? arg)
936 (or (memq (const-exp arg) keywords)
937 (and allow-other-keys?
938 (keyword? (const-exp arg)))))
939 (loop (if (pair? (cdr args))
940 (cddr args)
941 '())
942 result)
943 (loop (cdr args)
944 (cons arg result)))))))
945
946 (define (arities proc)
947 ;; Return the arities of PROC, which can be either a tree-il or a
948 ;; procedure.
949 (define (len x)
950 (or (and (or (null? x) (pair? x))
951 (length x))
952 0))
953 (cond ((program? proc)
954 (values (procedure-name proc)
955 (map (lambda (a)
956 (list (length (or (assq-ref a 'required) '()))
957 (length (or (assq-ref a 'optional) '()))
958 (and (assq-ref a 'rest) #t)
959 (map car (or (assq-ref a 'keyword) '()))
960 (assq-ref a 'allow-other-keys?)))
961 (program-arguments-alists proc))))
962 ((procedure? proc)
963 (if (struct? proc)
964 ;; An applicable struct.
965 (arities (struct-ref proc 0))
966 ;; An applicable smob.
967 (let ((arity (procedure-minimum-arity proc)))
968 (values (procedure-name proc)
969 (list (list (car arity) (cadr arity) (caddr arity)
970 #f #f))))))
971 (else
972 (let loop ((name #f)
973 (proc proc)
974 (arities '()))
975 (if (not proc)
976 (values name (reverse arities))
977 (record-case proc
978 ((<lambda-case> req opt rest kw alternate)
979 (loop name alternate
980 (cons (list (len req) (len opt) rest
981 (and (pair? kw) (map car (cdr kw)))
982 (and (pair? kw) (car kw)))
983 arities)))
984 ((<lambda> meta body)
985 (loop (assoc-ref meta 'name) body arities))
986 (else
987 (values #f #f))))))))
988
989 (let ((args (call-args call))
990 (src (tree-il-src call)))
991 (call-with-values (lambda () (arities proc))
992 (lambda (name arities)
993 (define matches?
994 (find (lambda (arity)
995 (pmatch arity
996 ((,req ,opt ,rest? ,kw ,aok?)
997 (let ((args (if (pair? kw)
998 (filter-keyword-args kw aok? args)
999 args)))
1000 (if (and req opt)
1001 (let ((count (length args)))
1002 (and (>= count req)
1003 (or rest?
1004 (<= count (+ req opt)))))
1005 #t)))
1006 (else #t)))
1007 arities))
1008
1009 (if (not matches?)
1010 (warning 'arity-mismatch src
1011 (or name (with-output-to-string (lambda () (write proc))))
1012 lexical?)))))
1013 #t)
1014
1015 (define arity-analysis
1016 ;; Report arity mismatches in the given tree.
1017 (make-tree-analysis
1018 (lambda (x info env locs)
1019 ;; Down into X.
1020 (define (extend lexical-name val info)
1021 ;; If VAL is a lambda, add NAME to the lexical-lambdas of INFO.
1022 (let ((toplevel-calls (toplevel-procedure-calls info))
1023 (lexical-lambdas (lexical-lambdas info))
1024 (toplevel-lambdas (toplevel-lambdas info)))
1025 (record-case val
1026 ((<lambda> body)
1027 (make-arity-info toplevel-calls
1028 (vhash-consq lexical-name val
1029 lexical-lambdas)
1030 toplevel-lambdas))
1031 ((<lexical-ref> gensym)
1032 ;; lexical alias
1033 (let ((val* (vhash-assq gensym lexical-lambdas)))
1034 (if (pair? val*)
1035 (extend lexical-name (cdr val*) info)
1036 info)))
1037 ((<toplevel-ref> name)
1038 ;; top-level alias
1039 (make-arity-info toplevel-calls
1040 (vhash-consq lexical-name val
1041 lexical-lambdas)
1042 toplevel-lambdas))
1043 (else info))))
1044
1045 (let ((toplevel-calls (toplevel-procedure-calls info))
1046 (lexical-lambdas (lexical-lambdas info))
1047 (toplevel-lambdas (toplevel-lambdas info)))
1048
1049 (record-case x
1050 ((<toplevel-define> name exp)
1051 (record-case exp
1052 ((<lambda> body)
1053 (make-arity-info toplevel-calls
1054 lexical-lambdas
1055 (vhash-consq name exp toplevel-lambdas)))
1056 ((<toplevel-ref> name)
1057 ;; alias for another toplevel
1058 (let ((proc (vhash-assq name toplevel-lambdas)))
1059 (make-arity-info toplevel-calls
1060 lexical-lambdas
1061 (vhash-consq (toplevel-define-name x)
1062 (if (pair? proc)
1063 (cdr proc)
1064 exp)
1065 toplevel-lambdas))))
1066 (else info)))
1067 ((<let> gensyms vals)
1068 (fold extend info gensyms vals))
1069 ((<letrec> gensyms vals)
1070 (fold extend info gensyms vals))
1071 ((<fix> gensyms vals)
1072 (fold extend info gensyms vals))
1073
1074 ((<call> proc args src)
1075 (record-case proc
1076 ((<lambda> body)
1077 (validate-arity proc x #t)
1078 info)
1079 ((<toplevel-ref> name)
1080 (make-arity-info (vhash-consq name x toplevel-calls)
1081 lexical-lambdas
1082 toplevel-lambdas))
1083 ((<lexical-ref> gensym)
1084 (let ((proc (vhash-assq gensym lexical-lambdas)))
1085 (if (pair? proc)
1086 (record-case (cdr proc)
1087 ((<toplevel-ref> name)
1088 ;; alias to toplevel
1089 (make-arity-info (vhash-consq name x toplevel-calls)
1090 lexical-lambdas
1091 toplevel-lambdas))
1092 (else
1093 (validate-arity (cdr proc) x #t)
1094 info))
1095
1096 ;; If GENSYM wasn't found, it may be because it's an
1097 ;; argument of the procedure being compiled.
1098 info)))
1099 (else info)))
1100 (else info))))
1101
1102 (lambda (x info env locs)
1103 ;; Up from X.
1104 (define (shrink name val info)
1105 ;; Remove NAME from the lexical-lambdas of INFO.
1106 (let ((toplevel-calls (toplevel-procedure-calls info))
1107 (lexical-lambdas (lexical-lambdas info))
1108 (toplevel-lambdas (toplevel-lambdas info)))
1109 (make-arity-info toplevel-calls
1110 (if (vhash-assq name lexical-lambdas)
1111 (vlist-tail lexical-lambdas)
1112 lexical-lambdas)
1113 toplevel-lambdas)))
1114
1115 (let ((toplevel-calls (toplevel-procedure-calls info))
1116 (lexical-lambdas (lexical-lambdas info))
1117 (toplevel-lambdas (toplevel-lambdas info)))
1118 (record-case x
1119 ((<let> gensyms vals)
1120 (fold shrink info gensyms vals))
1121 ((<letrec> gensyms vals)
1122 (fold shrink info gensyms vals))
1123 ((<fix> gensyms vals)
1124 (fold shrink info gensyms vals))
1125
1126 (else info))))
1127
1128 (lambda (result env)
1129 ;; Post-processing: check all top-level procedure calls that have been
1130 ;; encountered.
1131 (let ((toplevel-calls (toplevel-procedure-calls result))
1132 (toplevel-lambdas (toplevel-lambdas result)))
1133 (vlist-for-each
1134 (lambda (name+call)
1135 (let* ((name (car name+call))
1136 (call (cdr name+call))
1137 (proc
1138 (or (and=> (vhash-assq name toplevel-lambdas) cdr)
1139 (and (module? env)
1140 (false-if-exception
1141 (module-ref env name)))))
1142 (proc*
1143 ;; handle toplevel aliases
1144 (if (toplevel-ref? proc)
1145 (let ((name (toplevel-ref-name proc)))
1146 (and (module? env)
1147 (false-if-exception
1148 (module-ref env name))))
1149 proc)))
1150 (cond ((lambda? proc*)
1151 (validate-arity proc* call #t))
1152 ((procedure? proc*)
1153 (validate-arity proc* call #f)))))
1154 toplevel-calls)))
1155
1156 (make-arity-info vlist-null vlist-null vlist-null)))
1157
1158 \f
1159 ;;;
1160 ;;; `format' argument analysis.
1161 ;;;
1162
1163 (define &syntax-error
1164 ;; The `throw' key for syntax errors.
1165 (gensym "format-string-syntax-error"))
1166
1167 (define (format-string-argument-count fmt)
1168 ;; Return the minimum and maxium number of arguments that should
1169 ;; follow format string FMT (or, ahem, a good estimate thereof) or
1170 ;; `any' if the format string can be followed by any number of
1171 ;; arguments.
1172
1173 (define (drop-group chars end)
1174 ;; Drop characters from CHARS until "~END" is encountered.
1175 (let loop ((chars chars)
1176 (tilde? #f))
1177 (if (null? chars)
1178 (throw &syntax-error 'unterminated-iteration)
1179 (if tilde?
1180 (if (eq? (car chars) end)
1181 (cdr chars)
1182 (loop (cdr chars) #f))
1183 (if (eq? (car chars) #\~)
1184 (loop (cdr chars) #t)
1185 (loop (cdr chars) #f))))))
1186
1187 (define (digit? char)
1188 ;; Return true if CHAR is a digit, #f otherwise.
1189 (memq char '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
1190
1191 (define (previous-number chars)
1192 ;; Return the previous series of digits found in CHARS.
1193 (let ((numbers (take-while digit? chars)))
1194 (and (not (null? numbers))
1195 (string->number (list->string (reverse numbers))))))
1196
1197 (let loop ((chars (string->list fmt))
1198 (state 'literal)
1199 (params '())
1200 (conditions '())
1201 (end-group #f)
1202 (min-count 0)
1203 (max-count 0))
1204 (if (null? chars)
1205 (if end-group
1206 (throw &syntax-error 'unterminated-conditional)
1207 (values min-count max-count))
1208 (case state
1209 ((tilde)
1210 (case (car chars)
1211 ((#\~ #\% #\& #\t #\T #\_ #\newline #\( #\) #\! #\| #\/ #\q #\Q)
1212 (loop (cdr chars) 'literal '()
1213 conditions end-group
1214 min-count max-count))
1215 ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\, #\: #\@ #\+ #\- #\#)
1216 (loop (cdr chars)
1217 'tilde (cons (car chars) params)
1218 conditions end-group
1219 min-count max-count))
1220 ((#\v #\V) (loop (cdr chars)
1221 'tilde (cons (car chars) params)
1222 conditions end-group
1223 (+ 1 min-count)
1224 (+ 1 max-count)))
1225 ((#\[)
1226 (loop chars 'literal '() '()
1227 (let ((selector (previous-number params))
1228 (at? (memq #\@ params)))
1229 (lambda (chars conds)
1230 ;; end of group
1231 (let ((mins (map car conds))
1232 (maxs (map cdr conds))
1233 (sel? (and selector
1234 (< selector (length conds)))))
1235 (if (and (every number? mins)
1236 (every number? maxs))
1237 (loop chars 'literal '() conditions end-group
1238 (+ min-count
1239 (if sel?
1240 (car (list-ref conds selector))
1241 (+ (if at? 0 1)
1242 (if (null? mins)
1243 0
1244 (apply min mins)))))
1245 (+ max-count
1246 (if sel?
1247 (cdr (list-ref conds selector))
1248 (+ (if at? 0 1)
1249 (if (null? maxs)
1250 0
1251 (apply max maxs))))))
1252 (values 'any 'any))))) ;; XXX: approximation
1253 0 0))
1254 ((#\;)
1255 (if end-group
1256 (loop (cdr chars) 'literal '()
1257 (cons (cons min-count max-count) conditions)
1258 end-group
1259 0 0)
1260 (throw &syntax-error 'unexpected-semicolon)))
1261 ((#\])
1262 (if end-group
1263 (end-group (cdr chars)
1264 (reverse (cons (cons min-count max-count)
1265 conditions)))
1266 (throw &syntax-error 'unexpected-conditional-termination)))
1267 ((#\{) (if (memq #\@ params)
1268 (values min-count 'any)
1269 (loop (drop-group (cdr chars) #\})
1270 'literal '()
1271 conditions end-group
1272 (+ 1 min-count) (+ 1 max-count))))
1273 ((#\*) (if (memq #\@ params)
1274 (values 'any 'any) ;; it's unclear what to do here
1275 (loop (cdr chars)
1276 'literal '()
1277 conditions end-group
1278 (+ (or (previous-number params) 1)
1279 min-count)
1280 (+ (or (previous-number params) 1)
1281 max-count))))
1282 ((#\? #\k #\K)
1283 ;; We don't have enough info to determine the exact number
1284 ;; of args, but we could determine a lower bound (TODO).
1285 (values 'any 'any))
1286 ((#\^)
1287 (values min-count 'any))
1288 ((#\h #\H)
1289 (let ((argc (if (memq #\: params) 2 1)))
1290 (loop (cdr chars) 'literal '()
1291 conditions end-group
1292 (+ argc min-count)
1293 (+ argc max-count))))
1294 ((#\')
1295 (if (null? (cdr chars))
1296 (throw &syntax-error 'unexpected-termination)
1297 (loop (cddr chars) 'tilde (cons (cadr chars) params)
1298 conditions end-group min-count max-count)))
1299 (else (loop (cdr chars) 'literal '()
1300 conditions end-group
1301 (+ 1 min-count) (+ 1 max-count)))))
1302 ((literal)
1303 (case (car chars)
1304 ((#\~) (loop (cdr chars) 'tilde '()
1305 conditions end-group
1306 min-count max-count))
1307 (else (loop (cdr chars) 'literal '()
1308 conditions end-group
1309 min-count max-count))))
1310 (else (error "computer bought the farm" state))))))
1311
1312 (define (proc-ref? exp proc special-name env)
1313 "Return #t when EXP designates procedure PROC in ENV. As a last
1314 resort, return #t when EXP refers to the global variable SPECIAL-NAME."
1315
1316 (define special?
1317 (cut eq? <> special-name))
1318
1319 (match exp
1320 (($ <toplevel-ref> _ (? special?))
1321 ;; Allow top-levels like: (define _ (cut gettext <> "my-domain")).
1322 #t)
1323 (($ <toplevel-ref> _ name)
1324 (let ((var (module-variable env name)))
1325 (and var (variable-bound? var)
1326 (eq? (variable-ref var) proc))))
1327 (($ <module-ref> _ _ (? special?))
1328 #t)
1329 (($ <module-ref> _ module name public?)
1330 (let* ((mod (if public?
1331 (false-if-exception (resolve-interface module))
1332 (resolve-module module #:ensure #f)))
1333 (var (and mod (module-variable mod name))))
1334 (and var (variable-bound? var) (eq? (variable-ref var) proc))))
1335 (($ <lexical-ref> _ (? special?))
1336 #t)
1337 (_ #f)))
1338
1339 (define gettext? (cut proc-ref? <> gettext '_ <>))
1340 (define ngettext? (cut proc-ref? <> ngettext 'N_ <>))
1341
1342 (define (const-fmt x env)
1343 ;; Return the literal format string for X, or #f.
1344 (match x
1345 (($ <const> _ (? string? exp))
1346 exp)
1347 (($ <call> _ (? (cut gettext? <> env))
1348 (($ <const> _ (? string? fmt))))
1349 ;; Gettexted literals, like `(_ "foo")'.
1350 fmt)
1351 (($ <call> _ (? (cut ngettext? <> env))
1352 (($ <const> _ (? string? fmt)) ($ <const> _ (? string?)) _ ..1))
1353 ;; Plural gettextized literals, like `(N_ "singular" "plural" n)'.
1354
1355 ;; TODO: Check whether the singular and plural strings have the
1356 ;; same format escapes.
1357 fmt)
1358 (_ #f)))
1359
1360 (define format-analysis
1361 ;; Report arity mismatches in the given tree.
1362 (make-tree-analysis
1363 (lambda (x _ env locs)
1364 ;; Down into X.
1365 (define (check-format-args args loc)
1366 (pmatch args
1367 ((,port ,fmt . ,rest)
1368 (guard (const-fmt fmt env))
1369 (if (and (const? port)
1370 (not (boolean? (const-exp port))))
1371 (warning 'format loc 'wrong-port (const-exp port)))
1372 (let ((fmt (const-fmt fmt env))
1373 (count (length rest)))
1374 (catch &syntax-error
1375 (lambda ()
1376 (let-values (((min max)
1377 (format-string-argument-count fmt)))
1378 (and min max
1379 (or (and (or (eq? min 'any) (>= count min))
1380 (or (eq? max 'any) (<= count max)))
1381 (warning 'format loc 'wrong-format-arg-count
1382 fmt min max count)))))
1383 (lambda (_ key)
1384 (warning 'format loc 'syntax-error key fmt)))))
1385 ((,port ,fmt . ,rest)
1386 (if (and (const? port)
1387 (not (boolean? (const-exp port))))
1388 (warning 'format loc 'wrong-port (const-exp port)))
1389
1390 (match fmt
1391 (($ <const> loc* (? (negate string?) fmt))
1392 (warning 'format (or loc* loc) 'wrong-format-string fmt))
1393
1394 ;; Warn on non-literal format strings, unless they refer to
1395 ;; a lexical variable named "fmt".
1396 (($ <lexical-ref> _ fmt)
1397 #t)
1398 ((? (negate const?))
1399 (warning 'format loc 'non-literal-format-string))))
1400 (else
1401 (warning 'format loc 'wrong-num-args (length args)))))
1402
1403 (define (check-simple-format-args args loc)
1404 ;; Check the arguments to the `simple-format' procedure, which is
1405 ;; less capable than that of (ice-9 format).
1406
1407 (define allowed-chars
1408 '(#\A #\S #\a #\s #\~ #\%))
1409
1410 (define (format-chars fmt)
1411 (let loop ((chars (string->list fmt))
1412 (result '()))
1413 (match chars
1414 (()
1415 (reverse result))
1416 ((#\~ opt rest ...)
1417 (loop rest (cons opt result)))
1418 ((_ rest ...)
1419 (loop rest result)))))
1420
1421 (match args
1422 ((port ($ <const> _ (? string? fmt)) _ ...)
1423 (let ((opts (format-chars fmt)))
1424 (or (every (cut memq <> allowed-chars) opts)
1425 (begin
1426 (warning 'format loc 'simple-format fmt
1427 (find (negate (cut memq <> allowed-chars)) opts))
1428 #f))))
1429 ((port (= (cut const-fmt <> env) (? string? fmt)) args ...)
1430 (check-simple-format-args `(,port ,(make-const loc fmt) ,args) loc))
1431 (_ #t)))
1432
1433 (define (resolve-toplevel name)
1434 (and (module? env)
1435 (false-if-exception (module-ref env name))))
1436
1437 (match x
1438 (($ <call> src ($ <toplevel-ref> _ name) args)
1439 (let ((proc (resolve-toplevel name)))
1440 (if (or (and (eq? proc (@ (guile) simple-format))
1441 (check-simple-format-args args
1442 (or src (find pair? locs))))
1443 (eq? proc (@ (ice-9 format) format)))
1444 (check-format-args args (or src (find pair? locs))))))
1445 (($ <call> src ($ <module-ref> _ '(ice-9 format) 'format) args)
1446 (check-format-args args (or src (find pair? locs))))
1447 (($ <call> src ($ <module-ref> _ '(guile)
1448 (or 'format 'simple-format))
1449 args)
1450 (and (check-simple-format-args args
1451 (or src (find pair? locs)))
1452 (check-format-args args (or src (find pair? locs)))))
1453 (_ #t))
1454 #t)
1455
1456 (lambda (x _ env locs)
1457 ;; Up from X.
1458 #t)
1459
1460 (lambda (_ env)
1461 ;; Post-processing.
1462 #t)
1463
1464 #t))