allow case-lambda expressions with no clauses
[bpt/guile.git] / module / language / tree-il / analyze.scm
CommitLineData
cf10678f
AW
1;;; TREE-IL -> GLIL compiler
2
b4af80a4 3;; Copyright (C) 2001, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
cf10678f 4
53befeb7
NJ
5;;;; This library is free software; you can redistribute it and/or
6;;;; modify it under the terms of the GNU Lesser General Public
7;;;; License as published by the Free Software Foundation; either
8;;;; version 3 of the License, or (at your option) any later version.
bcae9a98 9;;;;
53befeb7
NJ
10;;;; This library is distributed in the hope that it will be useful,
11;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;;; Lesser General Public License for more details.
bcae9a98 14;;;;
53befeb7
NJ
15;;;; You should have received a copy of the GNU Lesser General Public
16;;;; License along with this library; if not, write to the Free Software
17;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cf10678f
AW
18
19;;; Code:
20
21(define-module (language tree-il analyze)
66d3e9a3 22 #:use-module (srfi srfi-1)
4b856371 23 #:use-module (srfi srfi-9)
bcae9a98 24 #:use-module (srfi srfi-11)
60273407 25 #:use-module (srfi srfi-26)
5cbf2e1d 26 #:use-module (ice-9 vlist)
16a3b316 27 #:use-module (ice-9 match)
cf10678f 28 #:use-module (system base syntax)
4b856371 29 #:use-module (system base message)
af5ed549 30 #:use-module (system vm program)
cf10678f 31 #:use-module (language tree-il)
99480e11 32 #:use-module (system base pmatch)
4b856371 33 #:export (analyze-lexicals
48b1db75
LC
34 analyze-tree
35 unused-variable-analysis
bcae9a98 36 unused-toplevel-analysis
ae03cf1f 37 unbound-variable-analysis
75365375
LC
38 arity-analysis
39 format-analysis))
cf10678f 40
66d3e9a3
AW
41;; Allocation is the process of assigning storage locations for lexical
42;; variables. A lexical variable has a distinct "address", or storage
43;; location, for each procedure in which it is referenced.
44;;
45;; A variable is "local", i.e., allocated on the stack, if it is
46;; referenced from within the procedure that defined it. Otherwise it is
47;; a "closure" variable. For example:
48;;
49;; (lambda (a) a) ; a will be local
50;; `a' is local to the procedure.
51;;
52;; (lambda (a) (lambda () a))
53;; `a' is local to the outer procedure, but a closure variable with
54;; respect to the inner procedure.
55;;
56;; If a variable is ever assigned, it needs to be heap-allocated
57;; ("boxed"). This is so that closures and continuations capture the
58;; variable's identity, not just one of the values it may have over the
59;; course of program execution. If the variable is never assigned, there
60;; is no distinction between value and identity, so closing over its
61;; identity (whether through closures or continuations) can make a copy
62;; of its value instead.
63;;
64;; Local variables are stored on the stack within a procedure's call
65;; frame. Their index into the stack is determined from their linear
66;; postion within a procedure's binding path:
cf10678f
AW
67;; (let (0 1)
68;; (let (2 3) ...)
69;; (let (2) ...))
70;; (let (2 3 4) ...))
71;; etc.
72;;
5af166bd
AW
73;; This algorithm has the problem that variables are only allocated
74;; indices at the end of the binding path. If variables bound early in
75;; the path are not used in later portions of the path, their indices
76;; will not be recycled. This problem is particularly egregious in the
77;; expansion of `or':
78;;
79;; (or x y z)
80;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
81;;
b6d93b11
AW
82;; As you can see, the `a' binding is only used in the ephemeral
83;; `consequent' clause of the first `if', but its index would be
84;; reserved for the whole of the `or' expansion. So we have a hack for
85;; this specific case. A proper solution would be some sort of liveness
86;; analysis, and not our linear allocation algorithm.
5af166bd 87;;
282d128c
AW
88;; Closure variables are captured when a closure is created, and stored in a
89;; vector inline to the closure object itself. Each closure variable has a
90;; unique index into that vector.
66d3e9a3 91;;
9059993f
AW
92;; There is one more complication. Procedures bound by <fix> may, in
93;; some cases, be rendered inline to their parent procedure. That is to
94;; say,
95;;
96;; (letrec ((lp (lambda () (lp)))) (lp))
97;; => (fix ((lp (lambda () (lp)))) (lp))
98;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
99;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
100;;
101;; The upshot is that we don't have to allocate any space for the `lp'
102;; closure at all, as it can be rendered inline as a loop. So there is
103;; another kind of allocation, "label allocation", in which the
104;; procedure is simply a label, placed at the start of the lambda body.
105;; The label is the gensym under which the lambda expression is bound.
106;;
107;; The analyzer checks to see that the label is called with the correct
108;; number of arguments. Calls to labels compile to rename + goto.
109;; Lambda, the ultimate goto!
110;;
66d3e9a3
AW
111;;
112;; The return value of `analyze-lexicals' is a hash table, the
113;; "allocation".
114;;
115;; The allocation maps gensyms -- recall that each lexically bound
116;; variable has a unique gensym -- to storage locations ("addresses").
117;; Since one gensym may have many storage locations, if it is referenced
118;; in many procedures, it is a two-level map.
119;;
120;; The allocation also stored information on how many local variables
9059993f
AW
121;; need to be allocated for each procedure, lexicals that have been
122;; translated into labels, and information on what free variables to
123;; capture from its lexical parent procedure.
66d3e9a3 124;;
8a4ca0ea
AW
125;; In addition, we have a conflation: while we're traversing the code,
126;; recording information to pass to the compiler, we take the
127;; opportunity to generate labels for each lambda-case clause, so that
128;; generated code can skip argument checks at runtime if they match at
129;; compile-time.
130;;
282d128c
AW
131;; Also, while we're a-traversing and an-allocating, we check prompt
132;; handlers to see if the "continuation" argument is used. If not, we
133;; mark the prompt as being "escape-only". This allows us to implement
134;; `catch' and `throw' using `prompt' and `control', but without causing
135;; a continuation to be reified. Heh heh.
136;;
66d3e9a3
AW
137;; That is:
138;;
139;; sym -> {lambda -> address}
8a4ca0ea
AW
140;; lambda -> (labels . free-locs)
141;; lambda-case -> (gensym . nlocs)
282d128c 142;; prompt -> escape-only?
66d3e9a3 143;;
9059993f 144;; address ::= (local? boxed? . index)
8a4ca0ea 145;; labels ::= ((sym . lambda) ...)
66d3e9a3
AW
146;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
147;; free variable addresses are relative to parent proc.
148
149(define (make-hashq k v)
150 (let ((res (make-hash-table)))
151 (hashq-set! res k v)
152 res))
cf10678f
AW
153
154(define (analyze-lexicals x)
66d3e9a3
AW
155 ;; bound-vars: lambda -> (sym ...)
156 ;; all identifiers bound within a lambda
9059993f 157 (define bound-vars (make-hash-table))
66d3e9a3
AW
158 ;; free-vars: lambda -> (sym ...)
159 ;; all identifiers referenced in a lambda, but not bound
160 ;; NB, this includes identifiers referenced by contained lambdas
9059993f 161 (define free-vars (make-hash-table))
66d3e9a3
AW
162 ;; assigned: sym -> #t
163 ;; variables that are assigned
d97b69d9 164 (define assigned (make-hash-table))
5af166bd 165 ;; refcounts: sym -> count
66d3e9a3 166 ;; allows us to detect the or-expansion in O(1) time
9059993f 167 (define refcounts (make-hash-table))
8a4ca0ea 168 ;; labels: sym -> lambda
9059993f 169 ;; for determining if fixed-point procedures can be rendered as
8a4ca0ea 170 ;; labels.
9059993f
AW
171 (define labels (make-hash-table))
172
66d3e9a3 173 ;; returns variables referenced in expr
d97b69d9 174 (define (analyze! x proc labels-in-proc tail? tail-call-args)
aa9c1985 175 (define (step y) (analyze! y proc '() #f #f))
d97b69d9
AW
176 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
177 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
178 (and tail? args)))
179 (define (recur/labels x new-proc labels)
180 (analyze! x new-proc (append labels labels-in-proc) #t #f))
181 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
cf10678f
AW
182 (record-case x
183 ((<application> proc args)
d97b69d9
AW
184 (apply lset-union eq? (step-tail-call proc args)
185 (map step args)))
cf10678f 186
b6d93b11
AW
187 ((<conditional> test consequent alternate)
188 (lset-union eq? (step test) (step-tail consequent) (step-tail alternate)))
cf10678f 189
e5f5113c 190 ((<lexical-ref> gensym)
5af166bd 191 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
d97b69d9
AW
192 (if (not (and tail-call-args
193 (memq gensym labels-in-proc)
8a4ca0ea
AW
194 (let ((p (hashq-ref labels gensym)))
195 (and p
196 (let lp ((c (lambda-body p)))
197 (and c (lambda-case? c)
198 (or
199 ;; for now prohibit optional &
200 ;; keyword arguments; can relax this
201 ;; restriction later
202 (and (= (length (lambda-case-req c))
203 (length tail-call-args))
204 (not (lambda-case-opt c))
205 (not (lambda-case-kw c))
1e2a8edb 206 (not (lambda-case-rest c)))
3a88cb3b 207 (lp (lambda-case-alternate c)))))))))
d97b69d9 208 (hashq-set! labels gensym #f))
66d3e9a3 209 (list gensym))
cf10678f 210
e5f5113c 211 ((<lexical-set> gensym exp)
66d3e9a3 212 (hashq-set! assigned gensym #t)
d97b69d9 213 (hashq-set! labels gensym #f)
66d3e9a3 214 (lset-adjoin eq? (step exp) gensym))
cf10678f 215
e5f5113c 216 ((<module-set> exp)
cf10678f
AW
217 (step exp))
218
e5f5113c 219 ((<toplevel-set> exp)
cf10678f
AW
220 (step exp))
221
e5f5113c 222 ((<toplevel-define> exp)
cf10678f
AW
223 (step exp))
224
225 ((<sequence> exps)
d97b69d9
AW
226 (let lp ((exps exps) (ret '()))
227 (cond ((null? exps) '())
228 ((null? (cdr exps))
229 (lset-union eq? ret (step-tail (car exps))))
230 (else
231 (lp (cdr exps) (lset-union eq? ret (step (car exps))))))))
cf10678f 232
8a4ca0ea
AW
233 ((<lambda> body)
234 ;; order is important here
235 (hashq-set! bound-vars x '())
236 (let ((free (recur body x)))
237 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
238 (hashq-set! free-vars x free)
239 free))
240
93f63467 241 ((<lambda-case> opt kw inits gensyms body alternate)
8a4ca0ea 242 (hashq-set! bound-vars proc
93f63467 243 (append (reverse gensyms) (hashq-ref bound-vars proc)))
8a4ca0ea
AW
244 (lset-union
245 eq?
246 (lset-difference eq?
b0c8c187
AW
247 (lset-union eq?
248 (apply lset-union eq? (map step inits))
8a4ca0ea 249 (step-tail body))
93f63467 250 gensyms)
3a88cb3b 251 (if alternate (step-tail alternate) '())))
66d3e9a3 252
93f63467 253 ((<let> gensyms vals body)
66d3e9a3 254 (hashq-set! bound-vars proc
93f63467 255 (append (reverse gensyms) (hashq-ref bound-vars proc)))
66d3e9a3 256 (lset-difference eq?
d97b69d9 257 (apply lset-union eq? (step-tail body) (map step vals))
93f63467 258 gensyms))
cf10678f 259
93f63467 260 ((<letrec> gensyms vals body)
66d3e9a3 261 (hashq-set! bound-vars proc
93f63467
AW
262 (append (reverse gensyms) (hashq-ref bound-vars proc)))
263 (for-each (lambda (sym) (hashq-set! assigned sym #t)) gensyms)
66d3e9a3 264 (lset-difference eq?
d97b69d9 265 (apply lset-union eq? (step-tail body) (map step vals))
93f63467 266 gensyms))
66d3e9a3 267
93f63467 268 ((<fix> gensyms vals body)
d97b69d9 269 ;; Try to allocate these procedures as labels.
8a4ca0ea 270 (for-each (lambda (sym val) (hashq-set! labels sym val))
93f63467 271 gensyms vals)
c21c89b1 272 (hashq-set! bound-vars proc
93f63467 273 (append (reverse gensyms) (hashq-ref bound-vars proc)))
d97b69d9
AW
274 ;; Step into subexpressions.
275 (let* ((var-refs
276 (map
277 ;; Since we're trying to label-allocate the lambda,
278 ;; pretend it's not a closure, and just recurse into its
279 ;; body directly. (Otherwise, recursing on a closure
280 ;; that references one of the fix's bound vars would
281 ;; prevent label allocation.)
282 (lambda (x)
283 (record-case x
8a4ca0ea
AW
284 ((<lambda> body)
285 ;; just like the closure case, except here we use
286 ;; recur/labels instead of recur
287 (hashq-set! bound-vars x '())
93f63467 288 (let ((free (recur/labels body x gensyms)))
8a4ca0ea
AW
289 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
290 (hashq-set! free-vars x free)
291 free))))
d97b69d9 292 vals))
93f63467
AW
293 (vars-with-refs (map cons gensyms var-refs))
294 (body-refs (recur/labels body proc gensyms)))
d97b69d9
AW
295 (define (delabel-dependents! sym)
296 (let ((refs (assq-ref vars-with-refs sym)))
297 (if refs
298 (for-each (lambda (sym)
299 (if (hashq-ref labels sym)
300 (begin
301 (hashq-set! labels sym #f)
302 (delabel-dependents! sym))))
303 refs))))
304 ;; Stepping into the lambdas and the body might have made some
305 ;; procedures not label-allocatable -- which might have
306 ;; knock-on effects. For example:
307 ;; (fix ((a (lambda () (b)))
308 ;; (b (lambda () a)))
309 ;; (a))
310 ;; As far as `a' is concerned, both `a' and `b' are
311 ;; label-allocatable. But `b' references `a' not in a proc-tail
312 ;; position, which makes `a' not label-allocatable. The
313 ;; knock-on effect is that, when back-propagating this
314 ;; information to `a', `b' will also become not
315 ;; label-allocatable, as it is referenced within `a', which is
316 ;; allocated as a closure. This is a transitive relationship.
317 (for-each (lambda (sym)
318 (if (not (hashq-ref labels sym))
319 (delabel-dependents! sym)))
93f63467 320 gensyms)
d97b69d9
AW
321 ;; Now lift bound variables with label-allocated lambdas to the
322 ;; parent procedure.
323 (for-each
324 (lambda (sym val)
325 (if (hashq-ref labels sym)
326 ;; Remove traces of the label-bound lambda. The free
327 ;; vars will propagate up via the return val.
328 (begin
329 (hashq-set! bound-vars proc
330 (append (hashq-ref bound-vars val)
331 (hashq-ref bound-vars proc)))
332 (hashq-remove! bound-vars val)
333 (hashq-remove! free-vars val))))
93f63467 334 gensyms vals)
d97b69d9
AW
335 (lset-difference eq?
336 (apply lset-union eq? body-refs var-refs)
93f63467 337 gensyms)))
c21c89b1 338
8a4ca0ea
AW
339 ((<let-values> exp body)
340 (lset-union eq? (step exp) (step body)))
66d3e9a3 341
8da6ab34 342 ((<dynwind> body winder unwinder)
282d128c
AW
343 (lset-union eq? (step body) (step winder) (step unwinder)))
344
d7c53a86
AW
345 ((<dynlet> fluids vals body)
346 (apply lset-union eq? (step body) (map step (append fluids vals))))
347
706a705e
AW
348 ((<dynref> fluid)
349 (step fluid))
350
351 ((<dynset> fluid exp)
352 (lset-union eq? (step fluid) (step exp)))
353
07a0c7d5 354 ((<prompt> tag body handler)
b9204185 355 (lset-union eq? (step tag) (step body) (step-tail handler)))
282d128c 356
2d026f04
AW
357 ((<abort> tag args tail)
358 (apply lset-union eq? (step tag) (step tail) (map step args)))
282d128c 359
66d3e9a3
AW
360 (else '())))
361
9059993f 362 ;; allocation: sym -> {lambda -> address}
3004fe26
MW
363 ;; lambda -> (labels . free-locs)
364 ;; lambda-case -> (gensym . nlocs)
9059993f
AW
365 (define allocation (make-hash-table))
366
66d3e9a3
AW
367 (define (allocate! x proc n)
368 (define (recur y) (allocate! y proc n))
369 (record-case x
370 ((<application> proc args)
371 (apply max (recur proc) (map recur args)))
cf10678f 372
b6d93b11
AW
373 ((<conditional> test consequent alternate)
374 (max (recur test) (recur consequent) (recur alternate)))
cf10678f 375
e5f5113c 376 ((<lexical-set> exp)
66d3e9a3
AW
377 (recur exp))
378
e5f5113c 379 ((<module-set> exp)
66d3e9a3
AW
380 (recur exp))
381
e5f5113c 382 ((<toplevel-set> exp)
66d3e9a3
AW
383 (recur exp))
384
e5f5113c 385 ((<toplevel-define> exp)
66d3e9a3
AW
386 (recur exp))
387
388 ((<sequence> exps)
389 (apply max (map recur exps)))
390
8a4ca0ea 391 ((<lambda> body)
66d3e9a3
AW
392 ;; allocate closure vars in order
393 (let lp ((c (hashq-ref free-vars x)) (n 0))
394 (if (pair? c)
395 (begin
396 (hashq-set! (hashq-ref allocation (car c))
397 x
398 `(#f ,(hashq-ref assigned (car c)) . ,n))
399 (lp (cdr c) (1+ n)))))
400
8a4ca0ea 401 (let ((nlocs (allocate! body x 0))
66d3e9a3
AW
402 (free-addresses
403 (map (lambda (v)
404 (hashq-ref (hashq-ref allocation v) proc))
9059993f
AW
405 (hashq-ref free-vars x)))
406 (labels (filter cdr
407 (map (lambda (sym)
408 (cons sym (hashq-ref labels sym)))
409 (hashq-ref bound-vars x)))))
66d3e9a3 410 ;; set procedure allocations
8a4ca0ea 411 (hashq-set! allocation x (cons labels free-addresses)))
66d3e9a3 412 n)
cf10678f 413
93f63467 414 ((<lambda-case> opt kw inits gensyms body alternate)
8a4ca0ea 415 (max
93f63467
AW
416 (let lp ((gensyms gensyms) (n n))
417 (if (null? gensyms)
b0c8c187
AW
418 (let ((nlocs (apply
419 max
b0c8c187
AW
420 (allocate! body proc n)
421 ;; inits not logically at the end, but they
422 ;; are the list...
9a9d82c2 423 (map (lambda (x) (allocate! x proc n)) inits))))
8a4ca0ea
AW
424 ;; label and nlocs for the case
425 (hashq-set! allocation x (cons (gensym ":LCASE") nlocs))
426 nlocs)
427 (begin
93f63467 428 (hashq-set! allocation (car gensyms)
8a4ca0ea 429 (make-hashq
93f63467
AW
430 proc `(#t ,(hashq-ref assigned (car gensyms)) . ,n)))
431 (lp (cdr gensyms) (1+ n)))))
3a88cb3b 432 (if alternate (allocate! alternate proc n) n)))
8a4ca0ea 433
93f63467 434 ((<let> gensyms vals body)
66d3e9a3
AW
435 (let ((nmax (apply max (map recur vals))))
436 (cond
437 ;; the `or' hack
438 ((and (conditional? body)
93f63467
AW
439 (= (length gensyms) 1)
440 (let ((v (car gensyms)))
66d3e9a3
AW
441 (and (not (hashq-ref assigned v))
442 (= (hashq-ref refcounts v 0) 2)
443 (lexical-ref? (conditional-test body))
444 (eq? (lexical-ref-gensym (conditional-test body)) v)
b6d93b11
AW
445 (lexical-ref? (conditional-consequent body))
446 (eq? (lexical-ref-gensym (conditional-consequent body)) v))))
93f63467 447 (hashq-set! allocation (car gensyms)
66d3e9a3
AW
448 (make-hashq proc `(#t #f . ,n)))
449 ;; the 1+ for this var
b6d93b11 450 (max nmax (1+ n) (allocate! (conditional-alternate body) proc n)))
66d3e9a3 451 (else
93f63467
AW
452 (let lp ((gensyms gensyms) (n n))
453 (if (null? gensyms)
66d3e9a3 454 (max nmax (allocate! body proc n))
93f63467 455 (let ((v (car gensyms)))
cf10678f
AW
456 (hashq-set!
457 allocation v
66d3e9a3
AW
458 (make-hashq proc
459 `(#t ,(hashq-ref assigned v) . ,n)))
93f63467 460 (lp (cdr gensyms) (1+ n)))))))))
66d3e9a3 461
93f63467
AW
462 ((<letrec> gensyms vals body)
463 (let lp ((gensyms gensyms) (n n))
464 (if (null? gensyms)
66d3e9a3
AW
465 (let ((nmax (apply max
466 (map (lambda (x)
467 (allocate! x proc n))
468 vals))))
469 (max nmax (allocate! body proc n)))
93f63467 470 (let ((v (car gensyms)))
66d3e9a3
AW
471 (hashq-set!
472 allocation v
473 (make-hashq proc
474 `(#t ,(hashq-ref assigned v) . ,n)))
93f63467 475 (lp (cdr gensyms) (1+ n))))))
cf10678f 476
93f63467
AW
477 ((<fix> gensyms vals body)
478 (let lp ((in gensyms) (n n))
d97b69d9 479 (if (null? in)
93f63467 480 (let lp ((gensyms gensyms) (vals vals) (nmax n))
d97b69d9 481 (cond
93f63467 482 ((null? gensyms)
d97b69d9 483 (max nmax (allocate! body proc n)))
93f63467 484 ((hashq-ref labels (car gensyms))
8a4ca0ea 485 ;; allocate lambda body inline to proc
93f63467 486 (lp (cdr gensyms)
d97b69d9
AW
487 (cdr vals)
488 (record-case (car vals)
8a4ca0ea
AW
489 ((<lambda> body)
490 (max nmax (allocate! body proc n))))))
d97b69d9
AW
491 (else
492 ;; allocate closure
93f63467 493 (lp (cdr gensyms)
d97b69d9
AW
494 (cdr vals)
495 (max nmax (allocate! (car vals) proc n))))))
496
497 (let ((v (car in)))
498 (cond
499 ((hashq-ref assigned v)
500 (error "fixpoint procedures may not be assigned" x))
501 ((hashq-ref labels v)
502 ;; no binding, it's a label
503 (lp (cdr in) n))
504 (else
505 ;; allocate closure binding
506 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
507 (lp (cdr in) (1+ n))))))))
c21c89b1 508
8a4ca0ea
AW
509 ((<let-values> exp body)
510 (max (recur exp) (recur body)))
66d3e9a3 511
8da6ab34 512 ((<dynwind> body winder unwinder)
282d128c
AW
513 (max (recur body) (recur winder) (recur unwinder)))
514
d7c53a86
AW
515 ((<dynlet> fluids vals body)
516 (apply max (recur body) (map recur (append fluids vals))))
517
706a705e
AW
518 ((<dynref> fluid)
519 (recur fluid))
520
521 ((<dynset> fluid exp)
522 (max (recur fluid) (recur exp)))
523
07a0c7d5 524 ((<prompt> tag body handler)
282d128c 525 (let ((cont-var (and (lambda-case? handler)
93f63467
AW
526 (pair? (lambda-case-gensyms handler))
527 (car (lambda-case-gensyms handler)))))
282d128c
AW
528 (hashq-set! allocation x
529 (and cont-var (zero? (hashq-ref refcounts cont-var 0))))
07a0c7d5 530 (max (recur tag) (recur body) (recur handler))))
282d128c 531
2d026f04
AW
532 ((<abort> tag args tail)
533 (apply max (recur tag) (recur tail) (map recur args)))
282d128c 534
66d3e9a3 535 (else n)))
cf10678f 536
d97b69d9 537 (analyze! x #f '() #t #f)
66d3e9a3 538 (allocate! x #f 0)
cf10678f
AW
539
540 allocation)
4b856371
LC
541
542\f
48b1db75
LC
543;;;
544;;; Tree analyses for warnings.
545;;;
546
547(define-record-type <tree-analysis>
548 (make-tree-analysis leaf down up post init)
549 tree-analysis?
795ab688
LC
550 (leaf tree-analysis-leaf) ;; (lambda (x result env locs) ...)
551 (down tree-analysis-down) ;; (lambda (x result env locs) ...)
552 (up tree-analysis-up) ;; (lambda (x result env locs) ...)
48b1db75
LC
553 (post tree-analysis-post) ;; (lambda (result env) ...)
554 (init tree-analysis-init)) ;; arbitrary value
555
556(define (analyze-tree analyses tree env)
557 "Run all tree analyses listed in ANALYSES on TREE for ENV, using
795ab688
LC
558`tree-il-fold'. Return TREE. The leaf/down/up procedures of each analysis are
559passed a ``location stack', which is the stack of `tree-il-src' values for each
560parent tree (a list); it can be used to approximate source location when
561accurate information is missing from a given `tree-il' element."
562
563 (define (traverse proc update-locs)
564 ;; Return a tree traversing procedure that returns a list of analysis
565 ;; results prepended by the location stack.
48b1db75 566 (lambda (x results)
795ab688
LC
567 (let ((locs (update-locs x (car results))))
568 (cons locs ;; the location stack
569 (map (lambda (analysis result)
570 ((proc analysis) x result env locs))
571 analyses
572 (cdr results))))))
573
574 ;; Keeping/extending/shrinking the location stack.
575 (define (keep-locs x locs) locs)
576 (define (extend-locs x locs) (cons (tree-il-src x) locs))
577 (define (shrink-locs x locs) (cdr locs))
48b1db75
LC
578
579 (let ((results
795ab688
LC
580 (tree-il-fold (traverse tree-analysis-leaf keep-locs)
581 (traverse tree-analysis-down extend-locs)
582 (traverse tree-analysis-up shrink-locs)
583 (cons '() ;; empty location stack
584 (map tree-analysis-init analyses))
48b1db75
LC
585 tree)))
586
587 (for-each (lambda (analysis result)
588 ((tree-analysis-post analysis) result env))
589 analyses
795ab688 590 (cdr results)))
48b1db75
LC
591
592 tree)
593
594\f
4b856371
LC
595;;;
596;;; Unused variable analysis.
597;;;
598
599;; <binding-info> records are used during tree traversals in
795ab688
LC
600;; `unused-variable-analysis'. They contain a list of the local vars
601;; currently in scope, and a list of locals vars that have been referenced.
4b856371 602(define-record-type <binding-info>
795ab688 603 (make-binding-info vars refs)
4b856371
LC
604 binding-info?
605 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
795ab688 606 (refs binding-info-refs)) ;; (GENSYM ...)
4b856371 607
3a1a883b
LC
608(define (gensym? sym)
609 ;; Return #t if SYM is (likely) a generated symbol.
610 (string-any #\space (symbol->string sym)))
611
48b1db75 612(define unused-variable-analysis
ae03cf1f 613 ;; Report unused variables in the given tree.
48b1db75 614 (make-tree-analysis
795ab688 615 (lambda (x info env locs)
48b1db75
LC
616 ;; X is a leaf: extend INFO's refs accordingly.
617 (let ((refs (binding-info-refs info))
795ab688 618 (vars (binding-info-vars info)))
48b1db75
LC
619 (record-case x
620 ((<lexical-ref> gensym)
a670e672 621 (make-binding-info vars (vhash-consq gensym #t refs)))
48b1db75
LC
622 (else info))))
623
795ab688 624 (lambda (x info env locs)
48b1db75
LC
625 ;; Going down into X: extend INFO's variable list
626 ;; accordingly.
627 (let ((refs (binding-info-refs info))
628 (vars (binding-info-vars info))
48b1db75
LC
629 (src (tree-il-src x)))
630 (define (extend inner-vars inner-names)
a670e672
LC
631 (fold (lambda (var name vars)
632 (vhash-consq var (list name src) vars))
633 vars
634 inner-vars
635 inner-names))
636
48b1db75
LC
637 (record-case x
638 ((<lexical-set> gensym)
a670e672 639 (make-binding-info vars (vhash-consq gensym #t refs)))
93f63467 640 ((<lambda-case> req opt inits rest kw gensyms)
48b1db75 641 (let ((names `(,@req
632e7c32 642 ,@(or opt '())
48b1db75
LC
643 ,@(if rest (list rest) '())
644 ,@(if kw (map cadr (cdr kw)) '()))))
93f63467
AW
645 (make-binding-info (extend gensyms names) refs)))
646 ((<let> gensyms names)
647 (make-binding-info (extend gensyms names) refs))
648 ((<letrec> gensyms names)
649 (make-binding-info (extend gensyms names) refs))
650 ((<fix> gensyms names)
651 (make-binding-info (extend gensyms names) refs))
48b1db75
LC
652 (else info))))
653
795ab688 654 (lambda (x info env locs)
48b1db75
LC
655 ;; Leaving X's scope: shrink INFO's variable list
656 ;; accordingly and reported unused nested variables.
657 (let ((refs (binding-info-refs info))
795ab688 658 (vars (binding-info-vars info)))
48b1db75 659 (define (shrink inner-vars refs)
a670e672
LC
660 (vlist-for-each
661 (lambda (var)
662 (let ((gensym (car var)))
663 ;; Don't report lambda parameters as unused.
664 (if (and (memq gensym inner-vars)
665 (not (vhash-assq gensym refs))
666 (not (lambda-case? x)))
667 (let ((name (cadr var))
668 ;; We can get approximate source location by going up
669 ;; the LOCS location stack.
670 (loc (or (caddr var)
671 (find pair? locs))))
3a1a883b
LC
672 (if (and (not (gensym? name))
673 (not (eq? name '_)))
674 (warning 'unused-variable loc name))))))
a670e672
LC
675 vars)
676 (vlist-drop vars (length inner-vars)))
48b1db75
LC
677
678 ;; For simplicity, we leave REFS untouched, i.e., with
679 ;; names of variables that are now going out of scope.
680 ;; It doesn't hurt as these are unique names, it just
681 ;; makes REFS unnecessarily fat.
682 (record-case x
93f63467
AW
683 ((<lambda-case> gensyms)
684 (make-binding-info (shrink gensyms refs) refs))
685 ((<let> gensyms)
686 (make-binding-info (shrink gensyms refs) refs))
687 ((<letrec> gensyms)
688 (make-binding-info (shrink gensyms refs) refs))
689 ((<fix> gensyms)
690 (make-binding-info (shrink gensyms refs) refs))
48b1db75
LC
691 (else info))))
692
693 (lambda (result env) #t)
a670e672 694 (make-binding-info vlist-null vlist-null)))
f67ddf9d
LC
695
696\f
bcae9a98
LC
697;;;
698;;; Unused top-level variable analysis.
699;;;
700
628ddb80 701;; <reference-graph> record top-level definitions that are made, references to
bcae9a98
LC
702;; top-level definitions and their context (the top-level definition in which
703;; the reference appears), as well as the current context (the top-level
704;; definition we're currently in). The second part (`refs' below) is
628ddb80
LC
705;; effectively a graph from which we can determine unused top-level definitions.
706(define-record-type <reference-graph>
707 (make-reference-graph refs defs toplevel-context)
708 reference-graph?
709 (defs reference-graph-defs) ;; ((NAME . LOC) ...)
710 (refs reference-graph-refs) ;; ((REF-CONTEXT REF ...) ...)
711 (toplevel-context reference-graph-toplevel-context)) ;; NAME | #f
712
5cbf2e1d
LC
713(define (graph-reachable-nodes root refs reachable)
714 ;; Add to REACHABLE the nodes reachable from ROOT in graph REFS. REFS is a
715 ;; vhash mapping nodes to the list of their children: for instance,
716 ;; ((A -> (B C)) (B -> (A)) (C -> ())) corresponds to
bcae9a98
LC
717 ;;
718 ;; ,-------.
719 ;; v |
720 ;; A ----> B
721 ;; |
722 ;; v
723 ;; C
5cbf2e1d
LC
724 ;;
725 ;; REACHABLE is a vhash of nodes known to be otherwise reachable.
bcae9a98
LC
726
727 (let loop ((root root)
5cbf2e1d
LC
728 (path vlist-null)
729 (result reachable))
730 (if (or (vhash-assq root path)
731 (vhash-assq root result))
bcae9a98 732 result
5cbf2e1d
LC
733 (let* ((children (or (and=> (vhash-assq root refs) cdr) '()))
734 (path (vhash-consq root #t path))
735 (result (fold (lambda (kid result)
736 (loop kid path result))
737 result
738 children)))
739 (fold (lambda (kid result)
740 (vhash-consq kid #t result))
741 result
742 children)))))
bcae9a98 743
628ddb80 744(define (graph-reachable-nodes* roots refs)
bcae9a98 745 ;; Return the list of nodes in REFS reachable from the nodes listed in ROOTS.
5cbf2e1d
LC
746 (vlist-fold (lambda (root+true result)
747 (let* ((root (car root+true))
748 (reachable (graph-reachable-nodes root refs result)))
749 (vhash-consq root #t reachable)))
750 vlist-null
751 roots))
752
753(define (partition* pred vhash)
754 ;; Partition VHASH according to PRED. Return the two resulting vhashes.
755 (let ((result
756 (vlist-fold (lambda (k+v result)
757 (let ((k (car k+v))
758 (v (cdr k+v))
759 (r1 (car result))
760 (r2 (cdr result)))
761 (if (pred k)
762 (cons (vhash-consq k v r1) r2)
763 (cons r1 (vhash-consq k v r2)))))
764 (cons vlist-null vlist-null)
765 vhash)))
766 (values (car result) (cdr result))))
bcae9a98
LC
767
768(define unused-toplevel-analysis
769 ;; Report unused top-level definitions that are not exported.
770 (let ((add-ref-from-context
628ddb80
LC
771 (lambda (graph name)
772 ;; Add an edge CTX -> NAME in GRAPH.
773 (let* ((refs (reference-graph-refs graph))
774 (defs (reference-graph-defs graph))
775 (ctx (reference-graph-toplevel-context graph))
5cbf2e1d
LC
776 (ctx-refs (or (and=> (vhash-assq ctx refs) cdr) '())))
777 (make-reference-graph (vhash-consq ctx (cons name ctx-refs) refs)
628ddb80 778 defs ctx)))))
bcae9a98
LC
779 (define (macro-variable? name env)
780 (and (module? env)
781 (let ((var (module-variable env name)))
782 (and var (variable-bound? var)
783 (macro? (variable-ref var))))))
784
785 (make-tree-analysis
628ddb80 786 (lambda (x graph env locs)
bcae9a98 787 ;; X is a leaf.
628ddb80 788 (let ((ctx (reference-graph-toplevel-context graph)))
bcae9a98
LC
789 (record-case x
790 ((<toplevel-ref> name src)
628ddb80
LC
791 (add-ref-from-context graph name))
792 (else graph))))
bcae9a98 793
628ddb80 794 (lambda (x graph env locs)
bcae9a98 795 ;; Going down into X.
628ddb80
LC
796 (let ((ctx (reference-graph-toplevel-context graph))
797 (refs (reference-graph-refs graph))
798 (defs (reference-graph-defs graph)))
bcae9a98
LC
799 (record-case x
800 ((<toplevel-define> name src)
801 (let ((refs refs)
5cbf2e1d
LC
802 (defs (vhash-consq name (or src (find pair? locs))
803 defs)))
628ddb80 804 (make-reference-graph refs defs name)))
bcae9a98 805 ((<toplevel-set> name src)
628ddb80
LC
806 (add-ref-from-context graph name))
807 (else graph))))
bcae9a98 808
628ddb80 809 (lambda (x graph env locs)
bcae9a98
LC
810 ;; Leaving X's scope.
811 (record-case x
812 ((<toplevel-define>)
628ddb80
LC
813 (let ((refs (reference-graph-refs graph))
814 (defs (reference-graph-defs graph)))
815 (make-reference-graph refs defs #f)))
816 (else graph)))
bcae9a98 817
628ddb80
LC
818 (lambda (graph env)
819 ;; Process the resulting reference graph: determine all private definitions
bcae9a98
LC
820 ;; not reachable from any public definition. Macros
821 ;; (syntax-transformers), which are globally bound, never considered
822 ;; unused since we can't tell whether a macro is actually used; in
628ddb80 823 ;; addition, macros are considered roots of the graph since they may use
bcae9a98
LC
824 ;; private bindings. FIXME: The `make-syntax-transformer' calls don't
825 ;; contain any literal `toplevel-ref' of the global bindings they use so
826 ;; this strategy fails.
827 (define (exported? name)
828 (if (module? env)
829 (module-variable (module-public-interface env) name)
830 #t))
831
832 (let-values (((public-defs private-defs)
5cbf2e1d
LC
833 (partition* (lambda (name)
834 (or (exported? name)
835 (macro-variable? name env)))
836 (reference-graph-defs graph))))
837 (let* ((roots (vhash-consq #f #t public-defs))
628ddb80
LC
838 (refs (reference-graph-refs graph))
839 (reachable (graph-reachable-nodes* roots refs))
5cbf2e1d
LC
840 (unused (vlist-filter (lambda (name+src)
841 (not (vhash-assq (car name+src)
842 reachable)))
843 private-defs)))
844 (vlist-for-each (lambda (name+loc)
845 (let ((name (car name+loc))
846 (loc (cdr name+loc)))
3a1a883b
LC
847 (if (not (gensym? name))
848 (warning 'unused-toplevel loc name))))
5cbf2e1d
LC
849 unused))))
850
851 (make-reference-graph vlist-null vlist-null #f))))
bcae9a98
LC
852
853\f
f67ddf9d
LC
854;;;
855;;; Unbound variable analysis.
856;;;
857
858;; <toplevel-info> records are used during tree traversal in search of
859;; possibly unbound variable. They contain a list of references to
795ab688
LC
860;; potentially unbound top-level variables, and a list of the top-level
861;; defines that have been encountered.
f67ddf9d 862(define-record-type <toplevel-info>
795ab688 863 (make-toplevel-info refs defs)
f67ddf9d
LC
864 toplevel-info?
865 (refs toplevel-info-refs) ;; ((VARIABLE-NAME . LOCATION) ...)
795ab688 866 (defs toplevel-info-defs)) ;; (VARIABLE-NAME ...)
f67ddf9d 867
6bb891dc 868(define (goops-toplevel-definition proc args env)
b6d2306d
LC
869 ;; If application of PROC to ARGS is a GOOPS top-level definition, return
870 ;; the name of the variable being defined; otherwise return #f. This
871 ;; assumes knowledge of the current implementation of `define-class' et al.
6bb891dc 872 (define (toplevel-define-arg args)
16a3b316
LC
873 (match args
874 ((($ <const> _ (and (? symbol?) exp)) _)
875 exp)
876 (_ #f)))
877
878 (match proc
879 (($ <module-ref> _ '(oop goops) 'toplevel-define! #f)
880 (toplevel-define-arg args))
881 (($ <toplevel-ref> _ 'toplevel-define!)
6bb891dc
LC
882 ;; This may be the result of expanding one of the GOOPS macros within
883 ;; `oop/goops.scm'.
16a3b316 884 (and (eq? env (resolve-module '(oop goops)))
6bb891dc 885 (toplevel-define-arg args)))
16a3b316 886 (_ #f)))
b6d2306d 887
48b1db75 888(define unbound-variable-analysis
ae03cf1f 889 ;; Report possibly unbound variables in the given tree.
48b1db75 890 (make-tree-analysis
795ab688 891 (lambda (x info env locs)
48b1db75
LC
892 ;; X is a leaf: extend INFO's refs accordingly.
893 (let ((refs (toplevel-info-refs info))
795ab688 894 (defs (toplevel-info-defs info)))
48b1db75
LC
895 (define (bound? name)
896 (or (and (module? env)
897 (module-variable env name))
04ea6fb5 898 (vhash-assq name defs)))
48b1db75
LC
899
900 (record-case x
901 ((<toplevel-ref> name src)
902 (if (bound? name)
903 info
904 (let ((src (or src (find pair? locs))))
04ea6fb5 905 (make-toplevel-info (vhash-consq name src refs)
795ab688 906 defs))))
48b1db75
LC
907 (else info))))
908
795ab688 909 (lambda (x info env locs)
48b1db75
LC
910 ;; Going down into X.
911 (let* ((refs (toplevel-info-refs info))
912 (defs (toplevel-info-defs info))
795ab688 913 (src (tree-il-src x)))
48b1db75
LC
914 (define (bound? name)
915 (or (and (module? env)
916 (module-variable env name))
04ea6fb5 917 (vhash-assq name defs)))
48b1db75
LC
918
919 (record-case x
920 ((<toplevel-set> name src)
921 (if (bound? name)
795ab688 922 (make-toplevel-info refs defs)
48b1db75 923 (let ((src (find pair? locs)))
04ea6fb5 924 (make-toplevel-info (vhash-consq name src refs)
795ab688 925 defs))))
48b1db75 926 ((<toplevel-define> name)
1e1808c9 927 (make-toplevel-info (vhash-delq name refs)
04ea6fb5 928 (vhash-consq name #t defs)))
48b1db75
LC
929
930 ((<application> proc args)
931 ;; Check for a dynamic top-level definition, as is
932 ;; done by code expanded from GOOPS macros.
933 (let ((name (goops-toplevel-definition proc args
934 env)))
935 (if (symbol? name)
1e1808c9 936 (make-toplevel-info (vhash-delq name refs)
04ea6fb5 937 (vhash-consq name #t defs))
795ab688 938 (make-toplevel-info refs defs))))
48b1db75 939 (else
795ab688 940 (make-toplevel-info refs defs)))))
48b1db75 941
795ab688 942 (lambda (x info env locs)
48b1db75 943 ;; Leaving X's scope.
bcae9a98 944 info)
48b1db75
LC
945
946 (lambda (toplevel env)
947 ;; Post-process the result.
04ea6fb5
LC
948 (vlist-for-each (lambda (name+loc)
949 (let ((name (car name+loc))
950 (loc (cdr name+loc)))
951 (warning 'unbound-variable loc name)))
952 (vlist-reverse (toplevel-info-refs toplevel))))
48b1db75 953
04ea6fb5 954 (make-toplevel-info vlist-null vlist-null)))
ae03cf1f
LC
955
956\f
957;;;
958;;; Arity analysis.
959;;;
960
af5ed549 961;; <arity-info> records contain information about lexical definitions of
ae03cf1f
LC
962;; procedures currently in scope, top-level procedure definitions that have
963;; been encountered, and calls to top-level procedures that have been
964;; encountered.
965(define-record-type <arity-info>
966 (make-arity-info toplevel-calls lexical-lambdas toplevel-lambdas)
967 arity-info?
968 (toplevel-calls toplevel-procedure-calls) ;; ((NAME . APPLICATION) ...)
969 (lexical-lambdas lexical-lambdas) ;; ((GENSYM . DEFINITION) ...)
970 (toplevel-lambdas toplevel-lambdas)) ;; ((NAME . DEFINITION) ...)
971
972(define (validate-arity proc application lexical?)
973 ;; Validate the argument count of APPLICATION, a tree-il application of
974 ;; PROC, emitting a warning in case of argument count mismatch.
975
af5ed549
LC
976 (define (filter-keyword-args keywords allow-other-keys? args)
977 ;; Filter keyword arguments from ARGS and return the resulting list.
978 ;; KEYWORDS is the list of allowed keywords, and ALLOW-OTHER-KEYS?
979 ;; specified whethere keywords not listed in KEYWORDS are allowed.
980 (let loop ((args args)
981 (result '()))
982 (if (null? args)
983 (reverse result)
984 (let ((arg (car args)))
985 (if (and (const? arg)
986 (or (memq (const-exp arg) keywords)
987 (and allow-other-keys?
988 (keyword? (const-exp arg)))))
989 (loop (if (pair? (cdr args))
990 (cddr args)
991 '())
992 result)
993 (loop (cdr args)
994 (cons arg result)))))))
995
99480e11
LC
996 (define (arities proc)
997 ;; Return the arities of PROC, which can be either a tree-il or a
ae03cf1f
LC
998 ;; procedure.
999 (define (len x)
1000 (or (and (or (null? x) (pair? x))
1001 (length x))
1002 0))
af5ed549 1003 (cond ((program? proc)
1e23b461 1004 (values (procedure-name proc)
99480e11
LC
1005 (map (lambda (a)
1006 (list (arity:nreq a) (arity:nopt a) (arity:rest? a)
1007 (map car (arity:kw a))
1008 (arity:allow-other-keys? a)))
1009 (program-arities proc))))
ae03cf1f 1010 ((procedure? proc)
15bb587f
AW
1011 (if (struct? proc)
1012 ;; An applicable struct.
1013 (arities (struct-ref proc 0))
1014 ;; An applicable smob.
1015 (let ((arity (procedure-minimum-arity proc)))
1016 (values (procedure-name proc)
1017 (list (list (car arity) (cadr arity) (caddr arity)
1018 #f #f))))))
ae03cf1f 1019 (else
99480e11
LC
1020 (let loop ((name #f)
1021 (proc proc)
1022 (arities '()))
1023 (if (not proc)
1024 (values name (reverse arities))
1025 (record-case proc
3a88cb3b
AW
1026 ((<lambda-case> req opt rest kw alternate)
1027 (loop name alternate
99480e11
LC
1028 (cons (list (len req) (len opt) rest
1029 (and (pair? kw) (map car (cdr kw)))
1030 (and (pair? kw) (car kw)))
1031 arities)))
1032 ((<lambda> meta body)
1033 (loop (assoc-ref meta 'name) body arities))
1034 (else
1035 (values #f #f))))))))
ae03cf1f
LC
1036
1037 (let ((args (application-args application))
1038 (src (tree-il-src application)))
99480e11
LC
1039 (call-with-values (lambda () (arities proc))
1040 (lambda (name arities)
1041 (define matches?
1042 (find (lambda (arity)
1043 (pmatch arity
1044 ((,req ,opt ,rest? ,kw ,aok?)
1045 (let ((args (if (pair? kw)
1046 (filter-keyword-args kw aok? args)
1047 args)))
1048 (if (and req opt)
1049 (let ((count (length args)))
1050 (and (>= count req)
1051 (or rest?
1052 (<= count (+ req opt)))))
1053 #t)))
1054 (else #t)))
1055 arities))
1056
1057 (if (not matches?)
1058 (warning 'arity-mismatch src
1059 (or name (with-output-to-string (lambda () (write proc))))
1060 lexical?)))))
ae03cf1f
LC
1061 #t)
1062
1063(define arity-analysis
1064 ;; Report arity mismatches in the given tree.
1065 (make-tree-analysis
795ab688 1066 (lambda (x info env locs)
ae03cf1f
LC
1067 ;; X is a leaf.
1068 info)
795ab688 1069 (lambda (x info env locs)
ae03cf1f
LC
1070 ;; Down into X.
1071 (define (extend lexical-name val info)
1072 ;; If VAL is a lambda, add NAME to the lexical-lambdas of INFO.
1073 (let ((toplevel-calls (toplevel-procedure-calls info))
1074 (lexical-lambdas (lexical-lambdas info))
1075 (toplevel-lambdas (toplevel-lambdas info)))
1076 (record-case val
1077 ((<lambda> body)
1078 (make-arity-info toplevel-calls
df685ee4
LC
1079 (vhash-consq lexical-name val
1080 lexical-lambdas)
ae03cf1f
LC
1081 toplevel-lambdas))
1082 ((<lexical-ref> gensym)
1083 ;; lexical alias
df685ee4 1084 (let ((val* (vhash-assq gensym lexical-lambdas)))
ae03cf1f
LC
1085 (if (pair? val*)
1086 (extend lexical-name (cdr val*) info)
1087 info)))
1088 ((<toplevel-ref> name)
1089 ;; top-level alias
1090 (make-arity-info toplevel-calls
df685ee4
LC
1091 (vhash-consq lexical-name val
1092 lexical-lambdas)
ae03cf1f
LC
1093 toplevel-lambdas))
1094 (else info))))
1095
1096 (let ((toplevel-calls (toplevel-procedure-calls info))
1097 (lexical-lambdas (lexical-lambdas info))
1098 (toplevel-lambdas (toplevel-lambdas info)))
1099
1100 (record-case x
1101 ((<toplevel-define> name exp)
1102 (record-case exp
1103 ((<lambda> body)
1104 (make-arity-info toplevel-calls
1105 lexical-lambdas
df685ee4 1106 (vhash-consq name exp toplevel-lambdas)))
ae03cf1f
LC
1107 ((<toplevel-ref> name)
1108 ;; alias for another toplevel
df685ee4 1109 (let ((proc (vhash-assq name toplevel-lambdas)))
ae03cf1f
LC
1110 (make-arity-info toplevel-calls
1111 lexical-lambdas
df685ee4
LC
1112 (vhash-consq (toplevel-define-name x)
1113 (if (pair? proc)
1114 (cdr proc)
1115 exp)
1116 toplevel-lambdas))))
ae03cf1f 1117 (else info)))
93f63467
AW
1118 ((<let> gensyms vals)
1119 (fold extend info gensyms vals))
1120 ((<letrec> gensyms vals)
1121 (fold extend info gensyms vals))
1122 ((<fix> gensyms vals)
1123 (fold extend info gensyms vals))
ae03cf1f
LC
1124
1125 ((<application> proc args src)
1126 (record-case proc
1127 ((<lambda> body)
1128 (validate-arity proc x #t)
1129 info)
1130 ((<toplevel-ref> name)
df685ee4 1131 (make-arity-info (vhash-consq name x toplevel-calls)
ae03cf1f
LC
1132 lexical-lambdas
1133 toplevel-lambdas))
1134 ((<lexical-ref> gensym)
df685ee4 1135 (let ((proc (vhash-assq gensym lexical-lambdas)))
ae03cf1f
LC
1136 (if (pair? proc)
1137 (record-case (cdr proc)
1138 ((<toplevel-ref> name)
1139 ;; alias to toplevel
df685ee4 1140 (make-arity-info (vhash-consq name x toplevel-calls)
ae03cf1f
LC
1141 lexical-lambdas
1142 toplevel-lambdas))
1143 (else
1144 (validate-arity (cdr proc) x #t)
1145 info))
1146
1147 ;; If GENSYM wasn't found, it may be because it's an
1148 ;; argument of the procedure being compiled.
1149 info)))
1150 (else info)))
1151 (else info))))
1152
795ab688 1153 (lambda (x info env locs)
ae03cf1f
LC
1154 ;; Up from X.
1155 (define (shrink name val info)
1156 ;; Remove NAME from the lexical-lambdas of INFO.
1157 (let ((toplevel-calls (toplevel-procedure-calls info))
1158 (lexical-lambdas (lexical-lambdas info))
1159 (toplevel-lambdas (toplevel-lambdas info)))
1160 (make-arity-info toplevel-calls
df685ee4
LC
1161 (if (vhash-assq name lexical-lambdas)
1162 (vlist-tail lexical-lambdas)
1163 lexical-lambdas)
ae03cf1f
LC
1164 toplevel-lambdas)))
1165
1166 (let ((toplevel-calls (toplevel-procedure-calls info))
1167 (lexical-lambdas (lexical-lambdas info))
1168 (toplevel-lambdas (toplevel-lambdas info)))
1169 (record-case x
93f63467
AW
1170 ((<let> gensyms vals)
1171 (fold shrink info gensyms vals))
1172 ((<letrec> gensyms vals)
1173 (fold shrink info gensyms vals))
1174 ((<fix> gensyms vals)
1175 (fold shrink info gensyms vals))
ae03cf1f
LC
1176
1177 (else info))))
1178
1179 (lambda (result env)
1180 ;; Post-processing: check all top-level procedure calls that have been
1181 ;; encountered.
1182 (let ((toplevel-calls (toplevel-procedure-calls result))
1183 (toplevel-lambdas (toplevel-lambdas result)))
df685ee4
LC
1184 (vlist-for-each
1185 (lambda (name+application)
1186 (let* ((name (car name+application))
1187 (application (cdr name+application))
1188 (proc
1189 (or (and=> (vhash-assq name toplevel-lambdas) cdr)
1190 (and (module? env)
1191 (false-if-exception
1192 (module-ref env name)))))
1193 (proc*
1194 ;; handle toplevel aliases
1195 (if (toplevel-ref? proc)
1196 (let ((name (toplevel-ref-name proc)))
1197 (and (module? env)
1198 (false-if-exception
1199 (module-ref env name))))
1200 proc)))
2c5f0bdb
LC
1201 (cond ((lambda? proc*)
1202 (validate-arity proc* application #t))
2c5f0bdb
LC
1203 ((procedure? proc*)
1204 (validate-arity proc* application #f)))))
df685ee4
LC
1205 toplevel-calls)))
1206
1207 (make-arity-info vlist-null vlist-null vlist-null)))
75365375
LC
1208
1209\f
1210;;;
1211;;; `format' argument analysis.
1212;;;
1213
8e6c15a6
LC
1214(define &syntax-error
1215 ;; The `throw' key for syntax errors.
1216 (gensym "format-string-syntax-error"))
1217
75365375 1218(define (format-string-argument-count fmt)
e0697241
LC
1219 ;; Return the minimum and maxium number of arguments that should
1220 ;; follow format string FMT (or, ahem, a good estimate thereof) or
1221 ;; `any' if the format string can be followed by any number of
1222 ;; arguments.
1223
1224 (define (drop-group chars end)
1225 ;; Drop characters from CHARS until "~END" is encountered.
1226 (let loop ((chars chars)
1227 (tilde? #f))
1228 (if (null? chars)
8e6c15a6 1229 (throw &syntax-error 'unterminated-iteration)
e0697241
LC
1230 (if tilde?
1231 (if (eq? (car chars) end)
1232 (cdr chars)
1233 (loop (cdr chars) #f))
1234 (if (eq? (car chars) #\~)
1235 (loop (cdr chars) #t)
1236 (loop (cdr chars) #f))))))
1237
1238 (define (digit? char)
1239 ;; Return true if CHAR is a digit, #f otherwise.
1240 (memq char '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
1241
1242 (define (previous-number chars)
1243 ;; Return the previous series of digits found in CHARS.
1244 (let ((numbers (take-while digit? chars)))
1245 (and (not (null? numbers))
1246 (string->number (list->string (reverse numbers))))))
1247
1248 (let loop ((chars (string->list fmt))
1249 (state 'literal)
1250 (params '())
1251 (conditions '())
1252 (end-group #f)
1253 (min-count 0)
1254 (max-count 0))
75365375 1255 (if (null? chars)
e0697241 1256 (if end-group
8e6c15a6 1257 (throw &syntax-error 'unterminated-conditional)
e0697241
LC
1258 (values min-count max-count))
1259 (case state
1260 ((tilde)
1261 (case (car chars)
90baf8cd 1262 ((#\~ #\% #\& #\t #\T #\_ #\newline #\( #\) #\! #\| #\/ #\q #\Q)
e0697241
LC
1263 (loop (cdr chars) 'literal '()
1264 conditions end-group
1265 min-count max-count))
90baf8cd 1266 ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\, #\: #\@ #\+ #\- #\#)
e0697241
LC
1267 (loop (cdr chars)
1268 'tilde (cons (car chars) params)
1269 conditions end-group
1270 min-count max-count))
1271 ((#\v #\V) (loop (cdr chars)
1272 'tilde (cons (car chars) params)
1273 conditions end-group
1274 (+ 1 min-count)
1275 (+ 1 max-count)))
1276 ((#\[)
1277 (loop chars 'literal '() '()
1278 (let ((selector (previous-number params))
1279 (at? (memq #\@ params)))
1280 (lambda (chars conds)
1281 ;; end of group
1282 (let ((mins (map car conds))
1283 (maxs (map cdr conds))
1284 (sel? (and selector
1285 (< selector (length conds)))))
1286 (if (and (every number? mins)
1287 (every number? maxs))
1288 (loop chars 'literal '() conditions end-group
1289 (+ min-count
1290 (if sel?
1291 (car (list-ref conds selector))
1292 (+ (if at? 0 1)
1293 (if (null? mins)
1294 0
1295 (apply min mins)))))
1296 (+ max-count
1297 (if sel?
1298 (cdr (list-ref conds selector))
1299 (+ (if at? 0 1)
1300 (if (null? maxs)
1301 0
1302 (apply max maxs))))))
8e6c15a6 1303 (values 'any 'any))))) ;; XXX: approximation
e0697241
LC
1304 0 0))
1305 ((#\;)
8e6c15a6
LC
1306 (if end-group
1307 (loop (cdr chars) 'literal '()
1308 (cons (cons min-count max-count) conditions)
1309 end-group
1310 0 0)
1311 (throw &syntax-error 'unexpected-semicolon)))
e0697241
LC
1312 ((#\])
1313 (if end-group
1314 (end-group (cdr chars)
1315 (reverse (cons (cons min-count max-count)
1316 conditions)))
8e6c15a6 1317 (throw &syntax-error 'unexpected-conditional-termination)))
e0697241
LC
1318 ((#\{) (if (memq #\@ params)
1319 (values min-count 'any)
1320 (loop (drop-group (cdr chars) #\})
1321 'literal '()
1322 conditions end-group
1323 (+ 1 min-count) (+ 1 max-count))))
1324 ((#\*) (if (memq #\@ params)
1325 (values 'any 'any) ;; it's unclear what to do here
1326 (loop (cdr chars)
1327 'literal '()
1328 conditions end-group
1329 (+ (or (previous-number params) 1)
1330 min-count)
1331 (+ (or (previous-number params) 1)
1332 max-count))))
90baf8cd 1333 ((#\? #\k #\K)
e0697241
LC
1334 ;; We don't have enough info to determine the exact number
1335 ;; of args, but we could determine a lower bound (TODO).
1336 (values 'any 'any))
90baf8cd
IP
1337 ((#\^)
1338 (values min-count 'any))
b4af80a4
LC
1339 ((#\h #\H)
1340 (let ((argc (if (memq #\: params) 2 1)))
1341 (loop (cdr chars) 'literal '()
1342 conditions end-group
1343 (+ argc min-count)
1344 (+ argc max-count))))
90baf8cd
IP
1345 ((#\')
1346 (if (null? (cdr chars))
1347 (throw &syntax-error 'unexpected-termination)
1348 (loop (cddr chars) 'tilde (cons (cadr chars) params)
1349 conditions end-group min-count max-count)))
e0697241
LC
1350 (else (loop (cdr chars) 'literal '()
1351 conditions end-group
1352 (+ 1 min-count) (+ 1 max-count)))))
1353 ((literal)
1354 (case (car chars)
1355 ((#\~) (loop (cdr chars) 'tilde '()
1356 conditions end-group
1357 min-count max-count))
1358 (else (loop (cdr chars) 'literal '()
1359 conditions end-group
1360 min-count max-count))))
1361 (else (error "computer bought the farm" state))))))
75365375 1362
98385ed2
LC
1363(define (proc-ref? exp proc special-name env)
1364 "Return #t when EXP designates procedure PROC in ENV. As a last
1365resort, return #t when EXP refers to the global variable SPECIAL-NAME."
8a74ffe8
LC
1366
1367 (define special?
1368 (cut eq? <> special-name))
1369
98385ed2 1370 (match exp
8a74ffe8
LC
1371 (($ <toplevel-ref> _ (? special?))
1372 ;; Allow top-levels like: (define _ (cut gettext <> "my-domain")).
1373 #t)
afc98031 1374 (($ <toplevel-ref> _ name)
dab48cc5 1375 (let ((var (module-variable env name)))
8a74ffe8
LC
1376 (and var (variable-bound? var)
1377 (eq? (variable-ref var) proc))))
1378 (($ <module-ref> _ _ (? special?))
1379 #t)
afc98031 1380 (($ <module-ref> _ module name public?)
dab48cc5
AW
1381 (let* ((mod (if public?
1382 (false-if-exception (resolve-interface module))
4c984747 1383 (resolve-module module #:ensure #f)))
dab48cc5 1384 (var (and mod (module-variable mod name))))
8a74ffe8
LC
1385 (and var (variable-bound? var) (eq? (variable-ref var) proc))))
1386 (($ <lexical-ref> _ (? special?))
4c984747 1387 #t)
afc98031
LC
1388 (_ #f)))
1389
98385ed2
LC
1390(define gettext? (cut proc-ref? <> gettext '_ <>))
1391(define ngettext? (cut proc-ref? <> ngettext 'N_ <>))
1392
afc98031 1393(define (const-fmt x env)
98385ed2 1394 ;; Return the literal format string for X, or #f.
16a3b316 1395 (match x
98385ed2 1396 (($ <const> _ (? string? exp))
3936cebc 1397 exp)
afc98031
LC
1398 (($ <application> _ (? (cut gettext? <> env))
1399 (($ <const> _ (? string? fmt))))
3936cebc 1400 ;; Gettexted literals, like `(_ "foo")'.
16a3b316 1401 fmt)
98385ed2
LC
1402 (($ <application> _ (? (cut ngettext? <> env))
1403 (($ <const> _ (? string? fmt)) ($ <const> _ (? string?)) _ ..1))
1404 ;; Plural gettextized literals, like `(N_ "singular" "plural" n)'.
1405
1406 ;; TODO: Check whether the singular and plural strings have the
1407 ;; same format escapes.
1408 fmt)
16a3b316 1409 (_ #f)))
56e31389 1410
75365375
LC
1411(define format-analysis
1412 ;; Report arity mismatches in the given tree.
1413 (make-tree-analysis
1414 (lambda (x _ env locs)
1415 ;; X is a leaf.
1416 #t)
1417
1418 (lambda (x _ env locs)
1419 ;; Down into X.
1420 (define (check-format-args args loc)
1421 (pmatch args
1422 ((,port ,fmt . ,rest)
afc98031 1423 (guard (const-fmt fmt env))
60f01304
LC
1424 (if (and (const? port)
1425 (not (boolean? (const-exp port))))
1426 (warning 'format loc 'wrong-port (const-exp port)))
afc98031 1427 (let ((fmt (const-fmt fmt env))
e0697241 1428 (count (length rest)))
98385ed2
LC
1429 (catch &syntax-error
1430 (lambda ()
1431 (let-values (((min max)
1432 (format-string-argument-count fmt)))
1433 (and min max
1434 (or (and (or (eq? min 'any) (>= count min))
1435 (or (eq? max 'any) (<= count max)))
1436 (warning 'format loc 'wrong-format-arg-count
1437 fmt min max count)))))
1438 (lambda (_ key)
1439 (warning 'format loc 'syntax-error key fmt)))))
60f01304 1440 ((,port ,fmt . ,rest)
56e31389
AW
1441 (if (and (const? port)
1442 (not (boolean? (const-exp port))))
3a822fff 1443 (warning 'format loc 'wrong-port (const-exp port)))
98385ed2
LC
1444
1445 (match fmt
1446 (($ <const> loc* (? (negate string?) fmt))
1447 (warning 'format (or loc* loc) 'wrong-format-string fmt))
1448
1449 ;; Warn on non-literal format strings, unless they refer to
1450 ;; a lexical variable named "fmt".
1451 (($ <lexical-ref> _ fmt)
1452 #t)
1453 ((? (negate const?))
1454 (warning 'format loc 'non-literal-format-string))))
60f01304
LC
1455 (else
1456 (warning 'format loc 'wrong-num-args (length args)))))
75365375 1457
60273407
LC
1458 (define (check-simple-format-args args loc)
1459 ;; Check the arguments to the `simple-format' procedure, which is
1460 ;; less capable than that of (ice-9 format).
1461
1462 (define allowed-chars
1463 '(#\A #\S #\a #\s #\~ #\%))
1464
1465 (define (format-chars fmt)
1466 (let loop ((chars (string->list fmt))
1467 (result '()))
1468 (match chars
1469 (()
1470 (reverse result))
1471 ((#\~ opt rest ...)
1472 (loop rest (cons opt result)))
1473 ((_ rest ...)
1474 (loop rest result)))))
1475
1476 (match args
1477 ((port ($ <const> _ (? string? fmt)) _ ...)
1478 (let ((opts (format-chars fmt)))
1479 (or (every (cut memq <> allowed-chars) opts)
1480 (begin
1481 (warning 'format loc 'simple-format fmt
1482 (find (negate (cut memq <> allowed-chars)) opts))
1483 #f))))
98385ed2
LC
1484 ((port (= (cut const-fmt <> env) (? string? fmt)) args ...)
1485 (check-simple-format-args `(,port ,(make-const loc fmt) ,args) loc))
60273407
LC
1486 (_ #t)))
1487
75365375
LC
1488 (define (resolve-toplevel name)
1489 (and (module? env)
1490 (false-if-exception (module-ref env name))))
1491
16a3b316
LC
1492 (match x
1493 (($ <application> src ($ <toplevel-ref> _ name) args)
1494 (let ((proc (resolve-toplevel name)))
60273407
LC
1495 (if (or (and (eq? proc (@ (guile) simple-format))
1496 (check-simple-format-args args
1497 (or src (find pair? locs))))
1498 (eq? proc (@ (ice-9 format) format)))
1499 (check-format-args args (or src (find pair? locs))))))
1500 (($ <application> src ($ <module-ref> _ '(ice-9 format) 'format) args)
1501 (check-format-args args (or src (find pair? locs))))
1502 (($ <application> src ($ <module-ref> _ '(guile)
1503 (or 'format 'simple-format))
1504 args)
1505 (and (check-simple-format-args args
1506 (or src (find pair? locs)))
1507 (check-format-args args (or src (find pair? locs)))))
16a3b316 1508 (_ #t))
75365375
LC
1509 #t)
1510
1511 (lambda (x _ env locs)
1512 ;; Up from X.
1513 #t)
1514
1515 (lambda (_ env)
1516 ;; Post-processing.
1517 #t)
1518
1519 #t))