Optimize Equality Primitives
[bpt/guile.git] / module / language / tree-il / analyze.scm
CommitLineData
cf10678f
AW
1;;; TREE-IL -> GLIL compiler
2
b4af80a4 3;; Copyright (C) 2001, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
cf10678f 4
53befeb7
NJ
5;;;; This library is free software; you can redistribute it and/or
6;;;; modify it under the terms of the GNU Lesser General Public
7;;;; License as published by the Free Software Foundation; either
8;;;; version 3 of the License, or (at your option) any later version.
bcae9a98 9;;;;
53befeb7
NJ
10;;;; This library is distributed in the hope that it will be useful,
11;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;;; Lesser General Public License for more details.
bcae9a98 14;;;;
53befeb7
NJ
15;;;; You should have received a copy of the GNU Lesser General Public
16;;;; License along with this library; if not, write to the Free Software
17;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cf10678f
AW
18
19;;; Code:
20
21(define-module (language tree-il analyze)
66d3e9a3 22 #:use-module (srfi srfi-1)
4b856371 23 #:use-module (srfi srfi-9)
bcae9a98 24 #:use-module (srfi srfi-11)
60273407 25 #:use-module (srfi srfi-26)
5cbf2e1d 26 #:use-module (ice-9 vlist)
16a3b316 27 #:use-module (ice-9 match)
cf10678f 28 #:use-module (system base syntax)
4b856371 29 #:use-module (system base message)
af5ed549 30 #:use-module (system vm program)
cf10678f 31 #:use-module (language tree-il)
99480e11 32 #:use-module (system base pmatch)
4b856371 33 #:export (analyze-lexicals
48b1db75
LC
34 analyze-tree
35 unused-variable-analysis
bcae9a98 36 unused-toplevel-analysis
ae03cf1f 37 unbound-variable-analysis
75365375
LC
38 arity-analysis
39 format-analysis))
cf10678f 40
66d3e9a3
AW
41;; Allocation is the process of assigning storage locations for lexical
42;; variables. A lexical variable has a distinct "address", or storage
43;; location, for each procedure in which it is referenced.
44;;
45;; A variable is "local", i.e., allocated on the stack, if it is
46;; referenced from within the procedure that defined it. Otherwise it is
47;; a "closure" variable. For example:
48;;
49;; (lambda (a) a) ; a will be local
50;; `a' is local to the procedure.
51;;
52;; (lambda (a) (lambda () a))
53;; `a' is local to the outer procedure, but a closure variable with
54;; respect to the inner procedure.
55;;
56;; If a variable is ever assigned, it needs to be heap-allocated
57;; ("boxed"). This is so that closures and continuations capture the
58;; variable's identity, not just one of the values it may have over the
59;; course of program execution. If the variable is never assigned, there
60;; is no distinction between value and identity, so closing over its
61;; identity (whether through closures or continuations) can make a copy
62;; of its value instead.
63;;
64;; Local variables are stored on the stack within a procedure's call
65;; frame. Their index into the stack is determined from their linear
66;; postion within a procedure's binding path:
cf10678f
AW
67;; (let (0 1)
68;; (let (2 3) ...)
69;; (let (2) ...))
70;; (let (2 3 4) ...))
71;; etc.
72;;
5af166bd
AW
73;; This algorithm has the problem that variables are only allocated
74;; indices at the end of the binding path. If variables bound early in
75;; the path are not used in later portions of the path, their indices
76;; will not be recycled. This problem is particularly egregious in the
77;; expansion of `or':
78;;
79;; (or x y z)
80;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
81;;
b6d93b11
AW
82;; As you can see, the `a' binding is only used in the ephemeral
83;; `consequent' clause of the first `if', but its index would be
84;; reserved for the whole of the `or' expansion. So we have a hack for
85;; this specific case. A proper solution would be some sort of liveness
86;; analysis, and not our linear allocation algorithm.
5af166bd 87;;
282d128c
AW
88;; Closure variables are captured when a closure is created, and stored in a
89;; vector inline to the closure object itself. Each closure variable has a
90;; unique index into that vector.
66d3e9a3 91;;
9059993f
AW
92;; There is one more complication. Procedures bound by <fix> may, in
93;; some cases, be rendered inline to their parent procedure. That is to
94;; say,
95;;
96;; (letrec ((lp (lambda () (lp)))) (lp))
97;; => (fix ((lp (lambda () (lp)))) (lp))
98;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
99;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
100;;
101;; The upshot is that we don't have to allocate any space for the `lp'
102;; closure at all, as it can be rendered inline as a loop. So there is
103;; another kind of allocation, "label allocation", in which the
104;; procedure is simply a label, placed at the start of the lambda body.
105;; The label is the gensym under which the lambda expression is bound.
106;;
107;; The analyzer checks to see that the label is called with the correct
108;; number of arguments. Calls to labels compile to rename + goto.
109;; Lambda, the ultimate goto!
110;;
66d3e9a3
AW
111;;
112;; The return value of `analyze-lexicals' is a hash table, the
113;; "allocation".
114;;
115;; The allocation maps gensyms -- recall that each lexically bound
116;; variable has a unique gensym -- to storage locations ("addresses").
117;; Since one gensym may have many storage locations, if it is referenced
118;; in many procedures, it is a two-level map.
119;;
120;; The allocation also stored information on how many local variables
9059993f
AW
121;; need to be allocated for each procedure, lexicals that have been
122;; translated into labels, and information on what free variables to
123;; capture from its lexical parent procedure.
66d3e9a3 124;;
8a4ca0ea
AW
125;; In addition, we have a conflation: while we're traversing the code,
126;; recording information to pass to the compiler, we take the
127;; opportunity to generate labels for each lambda-case clause, so that
128;; generated code can skip argument checks at runtime if they match at
129;; compile-time.
130;;
282d128c
AW
131;; Also, while we're a-traversing and an-allocating, we check prompt
132;; handlers to see if the "continuation" argument is used. If not, we
133;; mark the prompt as being "escape-only". This allows us to implement
134;; `catch' and `throw' using `prompt' and `control', but without causing
135;; a continuation to be reified. Heh heh.
136;;
66d3e9a3
AW
137;; That is:
138;;
139;; sym -> {lambda -> address}
8a4ca0ea
AW
140;; lambda -> (labels . free-locs)
141;; lambda-case -> (gensym . nlocs)
282d128c 142;; prompt -> escape-only?
66d3e9a3 143;;
9059993f 144;; address ::= (local? boxed? . index)
8a4ca0ea 145;; labels ::= ((sym . lambda) ...)
66d3e9a3
AW
146;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
147;; free variable addresses are relative to parent proc.
148
149(define (make-hashq k v)
150 (let ((res (make-hash-table)))
151 (hashq-set! res k v)
152 res))
cf10678f
AW
153
154(define (analyze-lexicals x)
66d3e9a3
AW
155 ;; bound-vars: lambda -> (sym ...)
156 ;; all identifiers bound within a lambda
9059993f 157 (define bound-vars (make-hash-table))
66d3e9a3
AW
158 ;; free-vars: lambda -> (sym ...)
159 ;; all identifiers referenced in a lambda, but not bound
160 ;; NB, this includes identifiers referenced by contained lambdas
9059993f 161 (define free-vars (make-hash-table))
66d3e9a3
AW
162 ;; assigned: sym -> #t
163 ;; variables that are assigned
d97b69d9 164 (define assigned (make-hash-table))
5af166bd 165 ;; refcounts: sym -> count
66d3e9a3 166 ;; allows us to detect the or-expansion in O(1) time
9059993f 167 (define refcounts (make-hash-table))
8a4ca0ea 168 ;; labels: sym -> lambda
9059993f 169 ;; for determining if fixed-point procedures can be rendered as
8a4ca0ea 170 ;; labels.
9059993f
AW
171 (define labels (make-hash-table))
172
66d3e9a3 173 ;; returns variables referenced in expr
d97b69d9 174 (define (analyze! x proc labels-in-proc tail? tail-call-args)
aa9c1985 175 (define (step y) (analyze! y proc '() #f #f))
d97b69d9
AW
176 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
177 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
178 (and tail? args)))
179 (define (recur/labels x new-proc labels)
180 (analyze! x new-proc (append labels labels-in-proc) #t #f))
181 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
cf10678f 182 (record-case x
7081d4f9 183 ((<call> proc args)
d97b69d9
AW
184 (apply lset-union eq? (step-tail-call proc args)
185 (map step args)))
cf10678f 186
a881a4ae
AW
187 ((<primcall> args)
188 (apply lset-union eq? (map step args)))
189
b6d93b11
AW
190 ((<conditional> test consequent alternate)
191 (lset-union eq? (step test) (step-tail consequent) (step-tail alternate)))
cf10678f 192
e5f5113c 193 ((<lexical-ref> gensym)
5af166bd 194 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
d97b69d9
AW
195 (if (not (and tail-call-args
196 (memq gensym labels-in-proc)
8a4ca0ea
AW
197 (let ((p (hashq-ref labels gensym)))
198 (and p
199 (let lp ((c (lambda-body p)))
200 (and c (lambda-case? c)
201 (or
202 ;; for now prohibit optional &
203 ;; keyword arguments; can relax this
204 ;; restriction later
205 (and (= (length (lambda-case-req c))
206 (length tail-call-args))
207 (not (lambda-case-opt c))
208 (not (lambda-case-kw c))
1e2a8edb 209 (not (lambda-case-rest c)))
3a88cb3b 210 (lp (lambda-case-alternate c)))))))))
d97b69d9 211 (hashq-set! labels gensym #f))
66d3e9a3 212 (list gensym))
cf10678f 213
e5f5113c 214 ((<lexical-set> gensym exp)
66d3e9a3 215 (hashq-set! assigned gensym #t)
d97b69d9 216 (hashq-set! labels gensym #f)
66d3e9a3 217 (lset-adjoin eq? (step exp) gensym))
cf10678f 218
e5f5113c 219 ((<module-set> exp)
cf10678f
AW
220 (step exp))
221
e5f5113c 222 ((<toplevel-set> exp)
cf10678f
AW
223 (step exp))
224
e5f5113c 225 ((<toplevel-define> exp)
cf10678f
AW
226 (step exp))
227
6fc3eae4
AW
228 ((<seq> head tail)
229 (lset-union eq? (step head) (step-tail tail)))
cf10678f 230
8a4ca0ea
AW
231 ((<lambda> body)
232 ;; order is important here
233 (hashq-set! bound-vars x '())
234 (let ((free (recur body x)))
235 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
236 (hashq-set! free-vars x free)
237 free))
238
93f63467 239 ((<lambda-case> opt kw inits gensyms body alternate)
8a4ca0ea 240 (hashq-set! bound-vars proc
93f63467 241 (append (reverse gensyms) (hashq-ref bound-vars proc)))
8a4ca0ea
AW
242 (lset-union
243 eq?
244 (lset-difference eq?
b0c8c187
AW
245 (lset-union eq?
246 (apply lset-union eq? (map step inits))
8a4ca0ea 247 (step-tail body))
93f63467 248 gensyms)
3a88cb3b 249 (if alternate (step-tail alternate) '())))
66d3e9a3 250
93f63467 251 ((<let> gensyms vals body)
66d3e9a3 252 (hashq-set! bound-vars proc
93f63467 253 (append (reverse gensyms) (hashq-ref bound-vars proc)))
66d3e9a3 254 (lset-difference eq?
d97b69d9 255 (apply lset-union eq? (step-tail body) (map step vals))
93f63467 256 gensyms))
cf10678f 257
93f63467 258 ((<letrec> gensyms vals body)
66d3e9a3 259 (hashq-set! bound-vars proc
93f63467
AW
260 (append (reverse gensyms) (hashq-ref bound-vars proc)))
261 (for-each (lambda (sym) (hashq-set! assigned sym #t)) gensyms)
66d3e9a3 262 (lset-difference eq?
d97b69d9 263 (apply lset-union eq? (step-tail body) (map step vals))
93f63467 264 gensyms))
66d3e9a3 265
93f63467 266 ((<fix> gensyms vals body)
d97b69d9 267 ;; Try to allocate these procedures as labels.
8a4ca0ea 268 (for-each (lambda (sym val) (hashq-set! labels sym val))
93f63467 269 gensyms vals)
c21c89b1 270 (hashq-set! bound-vars proc
93f63467 271 (append (reverse gensyms) (hashq-ref bound-vars proc)))
d97b69d9
AW
272 ;; Step into subexpressions.
273 (let* ((var-refs
274 (map
275 ;; Since we're trying to label-allocate the lambda,
276 ;; pretend it's not a closure, and just recurse into its
277 ;; body directly. (Otherwise, recursing on a closure
278 ;; that references one of the fix's bound vars would
279 ;; prevent label allocation.)
280 (lambda (x)
281 (record-case x
8a4ca0ea
AW
282 ((<lambda> body)
283 ;; just like the closure case, except here we use
284 ;; recur/labels instead of recur
285 (hashq-set! bound-vars x '())
93f63467 286 (let ((free (recur/labels body x gensyms)))
8a4ca0ea
AW
287 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
288 (hashq-set! free-vars x free)
289 free))))
d97b69d9 290 vals))
93f63467
AW
291 (vars-with-refs (map cons gensyms var-refs))
292 (body-refs (recur/labels body proc gensyms)))
d97b69d9
AW
293 (define (delabel-dependents! sym)
294 (let ((refs (assq-ref vars-with-refs sym)))
295 (if refs
296 (for-each (lambda (sym)
297 (if (hashq-ref labels sym)
298 (begin
299 (hashq-set! labels sym #f)
300 (delabel-dependents! sym))))
301 refs))))
302 ;; Stepping into the lambdas and the body might have made some
303 ;; procedures not label-allocatable -- which might have
304 ;; knock-on effects. For example:
305 ;; (fix ((a (lambda () (b)))
306 ;; (b (lambda () a)))
307 ;; (a))
308 ;; As far as `a' is concerned, both `a' and `b' are
309 ;; label-allocatable. But `b' references `a' not in a proc-tail
310 ;; position, which makes `a' not label-allocatable. The
311 ;; knock-on effect is that, when back-propagating this
312 ;; information to `a', `b' will also become not
313 ;; label-allocatable, as it is referenced within `a', which is
314 ;; allocated as a closure. This is a transitive relationship.
315 (for-each (lambda (sym)
316 (if (not (hashq-ref labels sym))
317 (delabel-dependents! sym)))
93f63467 318 gensyms)
d97b69d9
AW
319 ;; Now lift bound variables with label-allocated lambdas to the
320 ;; parent procedure.
321 (for-each
322 (lambda (sym val)
323 (if (hashq-ref labels sym)
324 ;; Remove traces of the label-bound lambda. The free
325 ;; vars will propagate up via the return val.
326 (begin
327 (hashq-set! bound-vars proc
328 (append (hashq-ref bound-vars val)
329 (hashq-ref bound-vars proc)))
330 (hashq-remove! bound-vars val)
331 (hashq-remove! free-vars val))))
93f63467 332 gensyms vals)
d97b69d9
AW
333 (lset-difference eq?
334 (apply lset-union eq? body-refs var-refs)
93f63467 335 gensyms)))
c21c89b1 336
8a4ca0ea
AW
337 ((<let-values> exp body)
338 (lset-union eq? (step exp) (step body)))
66d3e9a3 339
880e7948
AW
340 ((<dynwind> winder pre body post unwinder)
341 (lset-union eq? (step winder) (step pre)
342 (step body)
343 (step post) (step unwinder)))
282d128c 344
d7c53a86
AW
345 ((<dynlet> fluids vals body)
346 (apply lset-union eq? (step body) (map step (append fluids vals))))
347
706a705e
AW
348 ((<dynref> fluid)
349 (step fluid))
350
351 ((<dynset> fluid exp)
352 (lset-union eq? (step fluid) (step exp)))
353
07a0c7d5 354 ((<prompt> tag body handler)
b9204185 355 (lset-union eq? (step tag) (step body) (step-tail handler)))
282d128c 356
2d026f04
AW
357 ((<abort> tag args tail)
358 (apply lset-union eq? (step tag) (step tail) (map step args)))
282d128c 359
66d3e9a3
AW
360 (else '())))
361
9059993f 362 ;; allocation: sym -> {lambda -> address}
3004fe26
MW
363 ;; lambda -> (labels . free-locs)
364 ;; lambda-case -> (gensym . nlocs)
9059993f
AW
365 (define allocation (make-hash-table))
366
66d3e9a3
AW
367 (define (allocate! x proc n)
368 (define (recur y) (allocate! y proc n))
369 (record-case x
7081d4f9 370 ((<call> proc args)
66d3e9a3 371 (apply max (recur proc) (map recur args)))
cf10678f 372
a881a4ae
AW
373 ((<primcall> args)
374 (apply max n (map recur args)))
375
b6d93b11
AW
376 ((<conditional> test consequent alternate)
377 (max (recur test) (recur consequent) (recur alternate)))
cf10678f 378
e5f5113c 379 ((<lexical-set> exp)
66d3e9a3
AW
380 (recur exp))
381
e5f5113c 382 ((<module-set> exp)
66d3e9a3
AW
383 (recur exp))
384
e5f5113c 385 ((<toplevel-set> exp)
66d3e9a3
AW
386 (recur exp))
387
e5f5113c 388 ((<toplevel-define> exp)
66d3e9a3
AW
389 (recur exp))
390
6fc3eae4
AW
391 ((<seq> head tail)
392 (max (recur head)
393 (recur tail)))
66d3e9a3 394
8a4ca0ea 395 ((<lambda> body)
66d3e9a3
AW
396 ;; allocate closure vars in order
397 (let lp ((c (hashq-ref free-vars x)) (n 0))
398 (if (pair? c)
399 (begin
400 (hashq-set! (hashq-ref allocation (car c))
401 x
402 `(#f ,(hashq-ref assigned (car c)) . ,n))
403 (lp (cdr c) (1+ n)))))
404
8a4ca0ea 405 (let ((nlocs (allocate! body x 0))
66d3e9a3
AW
406 (free-addresses
407 (map (lambda (v)
408 (hashq-ref (hashq-ref allocation v) proc))
9059993f
AW
409 (hashq-ref free-vars x)))
410 (labels (filter cdr
411 (map (lambda (sym)
412 (cons sym (hashq-ref labels sym)))
413 (hashq-ref bound-vars x)))))
66d3e9a3 414 ;; set procedure allocations
8a4ca0ea 415 (hashq-set! allocation x (cons labels free-addresses)))
66d3e9a3 416 n)
cf10678f 417
93f63467 418 ((<lambda-case> opt kw inits gensyms body alternate)
8a4ca0ea 419 (max
93f63467
AW
420 (let lp ((gensyms gensyms) (n n))
421 (if (null? gensyms)
b0c8c187
AW
422 (let ((nlocs (apply
423 max
b0c8c187
AW
424 (allocate! body proc n)
425 ;; inits not logically at the end, but they
426 ;; are the list...
9a9d82c2 427 (map (lambda (x) (allocate! x proc n)) inits))))
8a4ca0ea
AW
428 ;; label and nlocs for the case
429 (hashq-set! allocation x (cons (gensym ":LCASE") nlocs))
430 nlocs)
431 (begin
93f63467 432 (hashq-set! allocation (car gensyms)
8a4ca0ea 433 (make-hashq
93f63467
AW
434 proc `(#t ,(hashq-ref assigned (car gensyms)) . ,n)))
435 (lp (cdr gensyms) (1+ n)))))
3a88cb3b 436 (if alternate (allocate! alternate proc n) n)))
8a4ca0ea 437
93f63467 438 ((<let> gensyms vals body)
66d3e9a3
AW
439 (let ((nmax (apply max (map recur vals))))
440 (cond
441 ;; the `or' hack
442 ((and (conditional? body)
93f63467
AW
443 (= (length gensyms) 1)
444 (let ((v (car gensyms)))
66d3e9a3
AW
445 (and (not (hashq-ref assigned v))
446 (= (hashq-ref refcounts v 0) 2)
447 (lexical-ref? (conditional-test body))
448 (eq? (lexical-ref-gensym (conditional-test body)) v)
b6d93b11
AW
449 (lexical-ref? (conditional-consequent body))
450 (eq? (lexical-ref-gensym (conditional-consequent body)) v))))
93f63467 451 (hashq-set! allocation (car gensyms)
66d3e9a3
AW
452 (make-hashq proc `(#t #f . ,n)))
453 ;; the 1+ for this var
b6d93b11 454 (max nmax (1+ n) (allocate! (conditional-alternate body) proc n)))
66d3e9a3 455 (else
93f63467
AW
456 (let lp ((gensyms gensyms) (n n))
457 (if (null? gensyms)
66d3e9a3 458 (max nmax (allocate! body proc n))
93f63467 459 (let ((v (car gensyms)))
cf10678f
AW
460 (hashq-set!
461 allocation v
66d3e9a3
AW
462 (make-hashq proc
463 `(#t ,(hashq-ref assigned v) . ,n)))
93f63467 464 (lp (cdr gensyms) (1+ n)))))))))
66d3e9a3 465
93f63467
AW
466 ((<letrec> gensyms vals body)
467 (let lp ((gensyms gensyms) (n n))
468 (if (null? gensyms)
66d3e9a3
AW
469 (let ((nmax (apply max
470 (map (lambda (x)
471 (allocate! x proc n))
472 vals))))
473 (max nmax (allocate! body proc n)))
93f63467 474 (let ((v (car gensyms)))
66d3e9a3
AW
475 (hashq-set!
476 allocation v
477 (make-hashq proc
478 `(#t ,(hashq-ref assigned v) . ,n)))
93f63467 479 (lp (cdr gensyms) (1+ n))))))
cf10678f 480
93f63467
AW
481 ((<fix> gensyms vals body)
482 (let lp ((in gensyms) (n n))
d97b69d9 483 (if (null? in)
93f63467 484 (let lp ((gensyms gensyms) (vals vals) (nmax n))
d97b69d9 485 (cond
93f63467 486 ((null? gensyms)
d97b69d9 487 (max nmax (allocate! body proc n)))
93f63467 488 ((hashq-ref labels (car gensyms))
8a4ca0ea 489 ;; allocate lambda body inline to proc
93f63467 490 (lp (cdr gensyms)
d97b69d9
AW
491 (cdr vals)
492 (record-case (car vals)
8a4ca0ea
AW
493 ((<lambda> body)
494 (max nmax (allocate! body proc n))))))
d97b69d9
AW
495 (else
496 ;; allocate closure
93f63467 497 (lp (cdr gensyms)
d97b69d9
AW
498 (cdr vals)
499 (max nmax (allocate! (car vals) proc n))))))
500
501 (let ((v (car in)))
502 (cond
503 ((hashq-ref assigned v)
504 (error "fixpoint procedures may not be assigned" x))
505 ((hashq-ref labels v)
506 ;; no binding, it's a label
507 (lp (cdr in) n))
508 (else
509 ;; allocate closure binding
510 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
511 (lp (cdr in) (1+ n))))))))
c21c89b1 512
8a4ca0ea
AW
513 ((<let-values> exp body)
514 (max (recur exp) (recur body)))
66d3e9a3 515
880e7948
AW
516 ((<dynwind> winder pre body post unwinder)
517 (max (recur winder) (recur pre)
518 (recur body)
519 (recur post) (recur unwinder)))
282d128c 520
d7c53a86
AW
521 ((<dynlet> fluids vals body)
522 (apply max (recur body) (map recur (append fluids vals))))
523
706a705e
AW
524 ((<dynref> fluid)
525 (recur fluid))
526
527 ((<dynset> fluid exp)
528 (max (recur fluid) (recur exp)))
529
07a0c7d5 530 ((<prompt> tag body handler)
282d128c 531 (let ((cont-var (and (lambda-case? handler)
93f63467
AW
532 (pair? (lambda-case-gensyms handler))
533 (car (lambda-case-gensyms handler)))))
282d128c
AW
534 (hashq-set! allocation x
535 (and cont-var (zero? (hashq-ref refcounts cont-var 0))))
07a0c7d5 536 (max (recur tag) (recur body) (recur handler))))
282d128c 537
2d026f04
AW
538 ((<abort> tag args tail)
539 (apply max (recur tag) (recur tail) (map recur args)))
282d128c 540
66d3e9a3 541 (else n)))
cf10678f 542
d97b69d9 543 (analyze! x #f '() #t #f)
66d3e9a3 544 (allocate! x #f 0)
cf10678f
AW
545
546 allocation)
4b856371
LC
547
548\f
48b1db75
LC
549;;;
550;;; Tree analyses for warnings.
551;;;
552
553(define-record-type <tree-analysis>
554 (make-tree-analysis leaf down up post init)
555 tree-analysis?
795ab688
LC
556 (leaf tree-analysis-leaf) ;; (lambda (x result env locs) ...)
557 (down tree-analysis-down) ;; (lambda (x result env locs) ...)
558 (up tree-analysis-up) ;; (lambda (x result env locs) ...)
48b1db75
LC
559 (post tree-analysis-post) ;; (lambda (result env) ...)
560 (init tree-analysis-init)) ;; arbitrary value
561
562(define (analyze-tree analyses tree env)
563 "Run all tree analyses listed in ANALYSES on TREE for ENV, using
795ab688
LC
564`tree-il-fold'. Return TREE. The leaf/down/up procedures of each analysis are
565passed a ``location stack', which is the stack of `tree-il-src' values for each
566parent tree (a list); it can be used to approximate source location when
567accurate information is missing from a given `tree-il' element."
568
569 (define (traverse proc update-locs)
570 ;; Return a tree traversing procedure that returns a list of analysis
571 ;; results prepended by the location stack.
48b1db75 572 (lambda (x results)
795ab688
LC
573 (let ((locs (update-locs x (car results))))
574 (cons locs ;; the location stack
575 (map (lambda (analysis result)
576 ((proc analysis) x result env locs))
577 analyses
578 (cdr results))))))
579
580 ;; Keeping/extending/shrinking the location stack.
581 (define (keep-locs x locs) locs)
582 (define (extend-locs x locs) (cons (tree-il-src x) locs))
583 (define (shrink-locs x locs) (cdr locs))
48b1db75
LC
584
585 (let ((results
795ab688
LC
586 (tree-il-fold (traverse tree-analysis-leaf keep-locs)
587 (traverse tree-analysis-down extend-locs)
588 (traverse tree-analysis-up shrink-locs)
589 (cons '() ;; empty location stack
590 (map tree-analysis-init analyses))
48b1db75
LC
591 tree)))
592
593 (for-each (lambda (analysis result)
594 ((tree-analysis-post analysis) result env))
595 analyses
795ab688 596 (cdr results)))
48b1db75
LC
597
598 tree)
599
600\f
4b856371
LC
601;;;
602;;; Unused variable analysis.
603;;;
604
605;; <binding-info> records are used during tree traversals in
795ab688
LC
606;; `unused-variable-analysis'. They contain a list of the local vars
607;; currently in scope, and a list of locals vars that have been referenced.
4b856371 608(define-record-type <binding-info>
795ab688 609 (make-binding-info vars refs)
4b856371
LC
610 binding-info?
611 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
795ab688 612 (refs binding-info-refs)) ;; (GENSYM ...)
4b856371 613
3a1a883b
LC
614(define (gensym? sym)
615 ;; Return #t if SYM is (likely) a generated symbol.
616 (string-any #\space (symbol->string sym)))
617
48b1db75 618(define unused-variable-analysis
ae03cf1f 619 ;; Report unused variables in the given tree.
48b1db75 620 (make-tree-analysis
795ab688 621 (lambda (x info env locs)
48b1db75
LC
622 ;; X is a leaf: extend INFO's refs accordingly.
623 (let ((refs (binding-info-refs info))
795ab688 624 (vars (binding-info-vars info)))
48b1db75
LC
625 (record-case x
626 ((<lexical-ref> gensym)
a670e672 627 (make-binding-info vars (vhash-consq gensym #t refs)))
48b1db75
LC
628 (else info))))
629
795ab688 630 (lambda (x info env locs)
48b1db75
LC
631 ;; Going down into X: extend INFO's variable list
632 ;; accordingly.
633 (let ((refs (binding-info-refs info))
634 (vars (binding-info-vars info))
48b1db75
LC
635 (src (tree-il-src x)))
636 (define (extend inner-vars inner-names)
a670e672
LC
637 (fold (lambda (var name vars)
638 (vhash-consq var (list name src) vars))
639 vars
640 inner-vars
641 inner-names))
642
48b1db75
LC
643 (record-case x
644 ((<lexical-set> gensym)
a670e672 645 (make-binding-info vars (vhash-consq gensym #t refs)))
93f63467 646 ((<lambda-case> req opt inits rest kw gensyms)
48b1db75 647 (let ((names `(,@req
632e7c32 648 ,@(or opt '())
48b1db75
LC
649 ,@(if rest (list rest) '())
650 ,@(if kw (map cadr (cdr kw)) '()))))
93f63467
AW
651 (make-binding-info (extend gensyms names) refs)))
652 ((<let> gensyms names)
653 (make-binding-info (extend gensyms names) refs))
654 ((<letrec> gensyms names)
655 (make-binding-info (extend gensyms names) refs))
656 ((<fix> gensyms names)
657 (make-binding-info (extend gensyms names) refs))
48b1db75
LC
658 (else info))))
659
795ab688 660 (lambda (x info env locs)
48b1db75
LC
661 ;; Leaving X's scope: shrink INFO's variable list
662 ;; accordingly and reported unused nested variables.
663 (let ((refs (binding-info-refs info))
795ab688 664 (vars (binding-info-vars info)))
48b1db75 665 (define (shrink inner-vars refs)
a670e672
LC
666 (vlist-for-each
667 (lambda (var)
668 (let ((gensym (car var)))
669 ;; Don't report lambda parameters as unused.
670 (if (and (memq gensym inner-vars)
671 (not (vhash-assq gensym refs))
672 (not (lambda-case? x)))
673 (let ((name (cadr var))
674 ;; We can get approximate source location by going up
675 ;; the LOCS location stack.
676 (loc (or (caddr var)
677 (find pair? locs))))
3a1a883b
LC
678 (if (and (not (gensym? name))
679 (not (eq? name '_)))
680 (warning 'unused-variable loc name))))))
a670e672
LC
681 vars)
682 (vlist-drop vars (length inner-vars)))
48b1db75
LC
683
684 ;; For simplicity, we leave REFS untouched, i.e., with
685 ;; names of variables that are now going out of scope.
686 ;; It doesn't hurt as these are unique names, it just
687 ;; makes REFS unnecessarily fat.
688 (record-case x
93f63467
AW
689 ((<lambda-case> gensyms)
690 (make-binding-info (shrink gensyms refs) refs))
691 ((<let> gensyms)
692 (make-binding-info (shrink gensyms refs) refs))
693 ((<letrec> gensyms)
694 (make-binding-info (shrink gensyms refs) refs))
695 ((<fix> gensyms)
696 (make-binding-info (shrink gensyms refs) refs))
48b1db75
LC
697 (else info))))
698
699 (lambda (result env) #t)
a670e672 700 (make-binding-info vlist-null vlist-null)))
f67ddf9d
LC
701
702\f
bcae9a98
LC
703;;;
704;;; Unused top-level variable analysis.
705;;;
706
628ddb80 707;; <reference-graph> record top-level definitions that are made, references to
bcae9a98
LC
708;; top-level definitions and their context (the top-level definition in which
709;; the reference appears), as well as the current context (the top-level
710;; definition we're currently in). The second part (`refs' below) is
628ddb80
LC
711;; effectively a graph from which we can determine unused top-level definitions.
712(define-record-type <reference-graph>
713 (make-reference-graph refs defs toplevel-context)
714 reference-graph?
715 (defs reference-graph-defs) ;; ((NAME . LOC) ...)
716 (refs reference-graph-refs) ;; ((REF-CONTEXT REF ...) ...)
717 (toplevel-context reference-graph-toplevel-context)) ;; NAME | #f
718
5cbf2e1d
LC
719(define (graph-reachable-nodes root refs reachable)
720 ;; Add to REACHABLE the nodes reachable from ROOT in graph REFS. REFS is a
721 ;; vhash mapping nodes to the list of their children: for instance,
722 ;; ((A -> (B C)) (B -> (A)) (C -> ())) corresponds to
bcae9a98
LC
723 ;;
724 ;; ,-------.
725 ;; v |
726 ;; A ----> B
727 ;; |
728 ;; v
729 ;; C
5cbf2e1d
LC
730 ;;
731 ;; REACHABLE is a vhash of nodes known to be otherwise reachable.
bcae9a98
LC
732
733 (let loop ((root root)
5cbf2e1d
LC
734 (path vlist-null)
735 (result reachable))
736 (if (or (vhash-assq root path)
737 (vhash-assq root result))
bcae9a98 738 result
5cbf2e1d
LC
739 (let* ((children (or (and=> (vhash-assq root refs) cdr) '()))
740 (path (vhash-consq root #t path))
741 (result (fold (lambda (kid result)
742 (loop kid path result))
743 result
744 children)))
745 (fold (lambda (kid result)
746 (vhash-consq kid #t result))
747 result
748 children)))))
bcae9a98 749
628ddb80 750(define (graph-reachable-nodes* roots refs)
bcae9a98 751 ;; Return the list of nodes in REFS reachable from the nodes listed in ROOTS.
5cbf2e1d
LC
752 (vlist-fold (lambda (root+true result)
753 (let* ((root (car root+true))
754 (reachable (graph-reachable-nodes root refs result)))
755 (vhash-consq root #t reachable)))
756 vlist-null
757 roots))
758
759(define (partition* pred vhash)
760 ;; Partition VHASH according to PRED. Return the two resulting vhashes.
761 (let ((result
762 (vlist-fold (lambda (k+v result)
763 (let ((k (car k+v))
764 (v (cdr k+v))
765 (r1 (car result))
766 (r2 (cdr result)))
767 (if (pred k)
768 (cons (vhash-consq k v r1) r2)
769 (cons r1 (vhash-consq k v r2)))))
770 (cons vlist-null vlist-null)
771 vhash)))
772 (values (car result) (cdr result))))
bcae9a98
LC
773
774(define unused-toplevel-analysis
775 ;; Report unused top-level definitions that are not exported.
776 (let ((add-ref-from-context
628ddb80
LC
777 (lambda (graph name)
778 ;; Add an edge CTX -> NAME in GRAPH.
779 (let* ((refs (reference-graph-refs graph))
780 (defs (reference-graph-defs graph))
781 (ctx (reference-graph-toplevel-context graph))
5cbf2e1d
LC
782 (ctx-refs (or (and=> (vhash-assq ctx refs) cdr) '())))
783 (make-reference-graph (vhash-consq ctx (cons name ctx-refs) refs)
628ddb80 784 defs ctx)))))
bcae9a98
LC
785 (define (macro-variable? name env)
786 (and (module? env)
787 (let ((var (module-variable env name)))
788 (and var (variable-bound? var)
789 (macro? (variable-ref var))))))
790
791 (make-tree-analysis
628ddb80 792 (lambda (x graph env locs)
bcae9a98 793 ;; X is a leaf.
628ddb80 794 (let ((ctx (reference-graph-toplevel-context graph)))
bcae9a98
LC
795 (record-case x
796 ((<toplevel-ref> name src)
628ddb80
LC
797 (add-ref-from-context graph name))
798 (else graph))))
bcae9a98 799
628ddb80 800 (lambda (x graph env locs)
bcae9a98 801 ;; Going down into X.
628ddb80
LC
802 (let ((ctx (reference-graph-toplevel-context graph))
803 (refs (reference-graph-refs graph))
804 (defs (reference-graph-defs graph)))
bcae9a98
LC
805 (record-case x
806 ((<toplevel-define> name src)
807 (let ((refs refs)
5cbf2e1d
LC
808 (defs (vhash-consq name (or src (find pair? locs))
809 defs)))
628ddb80 810 (make-reference-graph refs defs name)))
bcae9a98 811 ((<toplevel-set> name src)
628ddb80
LC
812 (add-ref-from-context graph name))
813 (else graph))))
bcae9a98 814
628ddb80 815 (lambda (x graph env locs)
bcae9a98
LC
816 ;; Leaving X's scope.
817 (record-case x
818 ((<toplevel-define>)
628ddb80
LC
819 (let ((refs (reference-graph-refs graph))
820 (defs (reference-graph-defs graph)))
821 (make-reference-graph refs defs #f)))
822 (else graph)))
bcae9a98 823
628ddb80
LC
824 (lambda (graph env)
825 ;; Process the resulting reference graph: determine all private definitions
bcae9a98
LC
826 ;; not reachable from any public definition. Macros
827 ;; (syntax-transformers), which are globally bound, never considered
828 ;; unused since we can't tell whether a macro is actually used; in
628ddb80 829 ;; addition, macros are considered roots of the graph since they may use
bcae9a98
LC
830 ;; private bindings. FIXME: The `make-syntax-transformer' calls don't
831 ;; contain any literal `toplevel-ref' of the global bindings they use so
832 ;; this strategy fails.
833 (define (exported? name)
834 (if (module? env)
835 (module-variable (module-public-interface env) name)
836 #t))
837
838 (let-values (((public-defs private-defs)
5cbf2e1d
LC
839 (partition* (lambda (name)
840 (or (exported? name)
841 (macro-variable? name env)))
842 (reference-graph-defs graph))))
843 (let* ((roots (vhash-consq #f #t public-defs))
628ddb80
LC
844 (refs (reference-graph-refs graph))
845 (reachable (graph-reachable-nodes* roots refs))
5cbf2e1d
LC
846 (unused (vlist-filter (lambda (name+src)
847 (not (vhash-assq (car name+src)
848 reachable)))
849 private-defs)))
850 (vlist-for-each (lambda (name+loc)
851 (let ((name (car name+loc))
852 (loc (cdr name+loc)))
3a1a883b
LC
853 (if (not (gensym? name))
854 (warning 'unused-toplevel loc name))))
5cbf2e1d
LC
855 unused))))
856
857 (make-reference-graph vlist-null vlist-null #f))))
bcae9a98
LC
858
859\f
f67ddf9d
LC
860;;;
861;;; Unbound variable analysis.
862;;;
863
864;; <toplevel-info> records are used during tree traversal in search of
865;; possibly unbound variable. They contain a list of references to
795ab688
LC
866;; potentially unbound top-level variables, and a list of the top-level
867;; defines that have been encountered.
f67ddf9d 868(define-record-type <toplevel-info>
795ab688 869 (make-toplevel-info refs defs)
f67ddf9d
LC
870 toplevel-info?
871 (refs toplevel-info-refs) ;; ((VARIABLE-NAME . LOCATION) ...)
795ab688 872 (defs toplevel-info-defs)) ;; (VARIABLE-NAME ...)
f67ddf9d 873
6bb891dc 874(define (goops-toplevel-definition proc args env)
7081d4f9 875 ;; If call of PROC to ARGS is a GOOPS top-level definition, return
b6d2306d
LC
876 ;; the name of the variable being defined; otherwise return #f. This
877 ;; assumes knowledge of the current implementation of `define-class' et al.
6bb891dc 878 (define (toplevel-define-arg args)
16a3b316
LC
879 (match args
880 ((($ <const> _ (and (? symbol?) exp)) _)
881 exp)
882 (_ #f)))
883
884 (match proc
885 (($ <module-ref> _ '(oop goops) 'toplevel-define! #f)
886 (toplevel-define-arg args))
887 (($ <toplevel-ref> _ 'toplevel-define!)
6bb891dc
LC
888 ;; This may be the result of expanding one of the GOOPS macros within
889 ;; `oop/goops.scm'.
16a3b316 890 (and (eq? env (resolve-module '(oop goops)))
6bb891dc 891 (toplevel-define-arg args)))
16a3b316 892 (_ #f)))
b6d2306d 893
48b1db75 894(define unbound-variable-analysis
ae03cf1f 895 ;; Report possibly unbound variables in the given tree.
48b1db75 896 (make-tree-analysis
795ab688 897 (lambda (x info env locs)
48b1db75
LC
898 ;; X is a leaf: extend INFO's refs accordingly.
899 (let ((refs (toplevel-info-refs info))
795ab688 900 (defs (toplevel-info-defs info)))
48b1db75
LC
901 (define (bound? name)
902 (or (and (module? env)
903 (module-variable env name))
04ea6fb5 904 (vhash-assq name defs)))
48b1db75
LC
905
906 (record-case x
907 ((<toplevel-ref> name src)
908 (if (bound? name)
909 info
910 (let ((src (or src (find pair? locs))))
04ea6fb5 911 (make-toplevel-info (vhash-consq name src refs)
795ab688 912 defs))))
48b1db75
LC
913 (else info))))
914
795ab688 915 (lambda (x info env locs)
48b1db75
LC
916 ;; Going down into X.
917 (let* ((refs (toplevel-info-refs info))
918 (defs (toplevel-info-defs info))
795ab688 919 (src (tree-il-src x)))
48b1db75
LC
920 (define (bound? name)
921 (or (and (module? env)
922 (module-variable env name))
04ea6fb5 923 (vhash-assq name defs)))
48b1db75
LC
924
925 (record-case x
926 ((<toplevel-set> name src)
927 (if (bound? name)
795ab688 928 (make-toplevel-info refs defs)
48b1db75 929 (let ((src (find pair? locs)))
04ea6fb5 930 (make-toplevel-info (vhash-consq name src refs)
795ab688 931 defs))))
48b1db75 932 ((<toplevel-define> name)
1e1808c9 933 (make-toplevel-info (vhash-delq name refs)
04ea6fb5 934 (vhash-consq name #t defs)))
48b1db75 935
7081d4f9 936 ((<call> proc args)
48b1db75
LC
937 ;; Check for a dynamic top-level definition, as is
938 ;; done by code expanded from GOOPS macros.
939 (let ((name (goops-toplevel-definition proc args
940 env)))
941 (if (symbol? name)
1e1808c9 942 (make-toplevel-info (vhash-delq name refs)
04ea6fb5 943 (vhash-consq name #t defs))
795ab688 944 (make-toplevel-info refs defs))))
48b1db75 945 (else
795ab688 946 (make-toplevel-info refs defs)))))
48b1db75 947
795ab688 948 (lambda (x info env locs)
48b1db75 949 ;; Leaving X's scope.
bcae9a98 950 info)
48b1db75
LC
951
952 (lambda (toplevel env)
953 ;; Post-process the result.
04ea6fb5
LC
954 (vlist-for-each (lambda (name+loc)
955 (let ((name (car name+loc))
956 (loc (cdr name+loc)))
957 (warning 'unbound-variable loc name)))
958 (vlist-reverse (toplevel-info-refs toplevel))))
48b1db75 959
04ea6fb5 960 (make-toplevel-info vlist-null vlist-null)))
ae03cf1f
LC
961
962\f
963;;;
964;;; Arity analysis.
965;;;
966
af5ed549 967;; <arity-info> records contain information about lexical definitions of
ae03cf1f
LC
968;; procedures currently in scope, top-level procedure definitions that have
969;; been encountered, and calls to top-level procedures that have been
970;; encountered.
971(define-record-type <arity-info>
972 (make-arity-info toplevel-calls lexical-lambdas toplevel-lambdas)
973 arity-info?
7081d4f9 974 (toplevel-calls toplevel-procedure-calls) ;; ((NAME . CALL) ...)
ae03cf1f
LC
975 (lexical-lambdas lexical-lambdas) ;; ((GENSYM . DEFINITION) ...)
976 (toplevel-lambdas toplevel-lambdas)) ;; ((NAME . DEFINITION) ...)
977
7081d4f9
AW
978(define (validate-arity proc call lexical?)
979 ;; Validate the argument count of CALL, a tree-il call of
ae03cf1f
LC
980 ;; PROC, emitting a warning in case of argument count mismatch.
981
af5ed549
LC
982 (define (filter-keyword-args keywords allow-other-keys? args)
983 ;; Filter keyword arguments from ARGS and return the resulting list.
984 ;; KEYWORDS is the list of allowed keywords, and ALLOW-OTHER-KEYS?
985 ;; specified whethere keywords not listed in KEYWORDS are allowed.
986 (let loop ((args args)
987 (result '()))
988 (if (null? args)
989 (reverse result)
990 (let ((arg (car args)))
991 (if (and (const? arg)
992 (or (memq (const-exp arg) keywords)
993 (and allow-other-keys?
994 (keyword? (const-exp arg)))))
995 (loop (if (pair? (cdr args))
996 (cddr args)
997 '())
998 result)
999 (loop (cdr args)
1000 (cons arg result)))))))
1001
99480e11
LC
1002 (define (arities proc)
1003 ;; Return the arities of PROC, which can be either a tree-il or a
ae03cf1f
LC
1004 ;; procedure.
1005 (define (len x)
1006 (or (and (or (null? x) (pair? x))
1007 (length x))
1008 0))
af5ed549 1009 (cond ((program? proc)
1e23b461 1010 (values (procedure-name proc)
99480e11
LC
1011 (map (lambda (a)
1012 (list (arity:nreq a) (arity:nopt a) (arity:rest? a)
1013 (map car (arity:kw a))
1014 (arity:allow-other-keys? a)))
1015 (program-arities proc))))
ae03cf1f 1016 ((procedure? proc)
3fc7e2c1 1017 (let ((arity (procedure-minimum-arity proc)))
ae03cf1f 1018 (values (procedure-name proc)
99480e11
LC
1019 (list (list (car arity) (cadr arity) (caddr arity)
1020 #f #f)))))
ae03cf1f 1021 (else
99480e11
LC
1022 (let loop ((name #f)
1023 (proc proc)
1024 (arities '()))
1025 (if (not proc)
1026 (values name (reverse arities))
1027 (record-case proc
3a88cb3b
AW
1028 ((<lambda-case> req opt rest kw alternate)
1029 (loop name alternate
99480e11
LC
1030 (cons (list (len req) (len opt) rest
1031 (and (pair? kw) (map car (cdr kw)))
1032 (and (pair? kw) (car kw)))
1033 arities)))
1034 ((<lambda> meta body)
1035 (loop (assoc-ref meta 'name) body arities))
1036 (else
1037 (values #f #f))))))))
ae03cf1f 1038
7081d4f9
AW
1039 (let ((args (call-args call))
1040 (src (tree-il-src call)))
99480e11
LC
1041 (call-with-values (lambda () (arities proc))
1042 (lambda (name arities)
1043 (define matches?
1044 (find (lambda (arity)
1045 (pmatch arity
1046 ((,req ,opt ,rest? ,kw ,aok?)
1047 (let ((args (if (pair? kw)
1048 (filter-keyword-args kw aok? args)
1049 args)))
1050 (if (and req opt)
1051 (let ((count (length args)))
1052 (and (>= count req)
1053 (or rest?
1054 (<= count (+ req opt)))))
1055 #t)))
1056 (else #t)))
1057 arities))
1058
1059 (if (not matches?)
1060 (warning 'arity-mismatch src
1061 (or name (with-output-to-string (lambda () (write proc))))
1062 lexical?)))))
ae03cf1f
LC
1063 #t)
1064
1065(define arity-analysis
1066 ;; Report arity mismatches in the given tree.
1067 (make-tree-analysis
795ab688 1068 (lambda (x info env locs)
ae03cf1f
LC
1069 ;; X is a leaf.
1070 info)
795ab688 1071 (lambda (x info env locs)
ae03cf1f
LC
1072 ;; Down into X.
1073 (define (extend lexical-name val info)
1074 ;; If VAL is a lambda, add NAME to the lexical-lambdas of INFO.
1075 (let ((toplevel-calls (toplevel-procedure-calls info))
1076 (lexical-lambdas (lexical-lambdas info))
1077 (toplevel-lambdas (toplevel-lambdas info)))
1078 (record-case val
1079 ((<lambda> body)
1080 (make-arity-info toplevel-calls
df685ee4
LC
1081 (vhash-consq lexical-name val
1082 lexical-lambdas)
ae03cf1f
LC
1083 toplevel-lambdas))
1084 ((<lexical-ref> gensym)
1085 ;; lexical alias
df685ee4 1086 (let ((val* (vhash-assq gensym lexical-lambdas)))
ae03cf1f
LC
1087 (if (pair? val*)
1088 (extend lexical-name (cdr val*) info)
1089 info)))
1090 ((<toplevel-ref> name)
1091 ;; top-level alias
1092 (make-arity-info toplevel-calls
df685ee4
LC
1093 (vhash-consq lexical-name val
1094 lexical-lambdas)
ae03cf1f
LC
1095 toplevel-lambdas))
1096 (else info))))
1097
1098 (let ((toplevel-calls (toplevel-procedure-calls info))
1099 (lexical-lambdas (lexical-lambdas info))
1100 (toplevel-lambdas (toplevel-lambdas info)))
1101
1102 (record-case x
1103 ((<toplevel-define> name exp)
1104 (record-case exp
1105 ((<lambda> body)
1106 (make-arity-info toplevel-calls
1107 lexical-lambdas
df685ee4 1108 (vhash-consq name exp toplevel-lambdas)))
ae03cf1f
LC
1109 ((<toplevel-ref> name)
1110 ;; alias for another toplevel
df685ee4 1111 (let ((proc (vhash-assq name toplevel-lambdas)))
ae03cf1f
LC
1112 (make-arity-info toplevel-calls
1113 lexical-lambdas
df685ee4
LC
1114 (vhash-consq (toplevel-define-name x)
1115 (if (pair? proc)
1116 (cdr proc)
1117 exp)
1118 toplevel-lambdas))))
ae03cf1f 1119 (else info)))
93f63467
AW
1120 ((<let> gensyms vals)
1121 (fold extend info gensyms vals))
1122 ((<letrec> gensyms vals)
1123 (fold extend info gensyms vals))
1124 ((<fix> gensyms vals)
1125 (fold extend info gensyms vals))
ae03cf1f 1126
7081d4f9 1127 ((<call> proc args src)
ae03cf1f
LC
1128 (record-case proc
1129 ((<lambda> body)
1130 (validate-arity proc x #t)
1131 info)
1132 ((<toplevel-ref> name)
df685ee4 1133 (make-arity-info (vhash-consq name x toplevel-calls)
ae03cf1f
LC
1134 lexical-lambdas
1135 toplevel-lambdas))
1136 ((<lexical-ref> gensym)
df685ee4 1137 (let ((proc (vhash-assq gensym lexical-lambdas)))
ae03cf1f
LC
1138 (if (pair? proc)
1139 (record-case (cdr proc)
1140 ((<toplevel-ref> name)
1141 ;; alias to toplevel
df685ee4 1142 (make-arity-info (vhash-consq name x toplevel-calls)
ae03cf1f
LC
1143 lexical-lambdas
1144 toplevel-lambdas))
1145 (else
1146 (validate-arity (cdr proc) x #t)
1147 info))
1148
1149 ;; If GENSYM wasn't found, it may be because it's an
1150 ;; argument of the procedure being compiled.
1151 info)))
1152 (else info)))
1153 (else info))))
1154
795ab688 1155 (lambda (x info env locs)
ae03cf1f
LC
1156 ;; Up from X.
1157 (define (shrink name val info)
1158 ;; Remove NAME from the lexical-lambdas of INFO.
1159 (let ((toplevel-calls (toplevel-procedure-calls info))
1160 (lexical-lambdas (lexical-lambdas info))
1161 (toplevel-lambdas (toplevel-lambdas info)))
1162 (make-arity-info toplevel-calls
df685ee4
LC
1163 (if (vhash-assq name lexical-lambdas)
1164 (vlist-tail lexical-lambdas)
1165 lexical-lambdas)
ae03cf1f
LC
1166 toplevel-lambdas)))
1167
1168 (let ((toplevel-calls (toplevel-procedure-calls info))
1169 (lexical-lambdas (lexical-lambdas info))
1170 (toplevel-lambdas (toplevel-lambdas info)))
1171 (record-case x
93f63467
AW
1172 ((<let> gensyms vals)
1173 (fold shrink info gensyms vals))
1174 ((<letrec> gensyms vals)
1175 (fold shrink info gensyms vals))
1176 ((<fix> gensyms vals)
1177 (fold shrink info gensyms vals))
ae03cf1f
LC
1178
1179 (else info))))
1180
1181 (lambda (result env)
1182 ;; Post-processing: check all top-level procedure calls that have been
1183 ;; encountered.
1184 (let ((toplevel-calls (toplevel-procedure-calls result))
1185 (toplevel-lambdas (toplevel-lambdas result)))
df685ee4 1186 (vlist-for-each
7081d4f9
AW
1187 (lambda (name+call)
1188 (let* ((name (car name+call))
1189 (call (cdr name+call))
df685ee4
LC
1190 (proc
1191 (or (and=> (vhash-assq name toplevel-lambdas) cdr)
1192 (and (module? env)
1193 (false-if-exception
1194 (module-ref env name)))))
1195 (proc*
1196 ;; handle toplevel aliases
1197 (if (toplevel-ref? proc)
1198 (let ((name (toplevel-ref-name proc)))
1199 (and (module? env)
1200 (false-if-exception
1201 (module-ref env name))))
1202 proc)))
1203 (if (or (lambda? proc*) (procedure? proc*))
7081d4f9 1204 (validate-arity proc* call (lambda? proc*)))))
df685ee4
LC
1205 toplevel-calls)))
1206
1207 (make-arity-info vlist-null vlist-null vlist-null)))
75365375
LC
1208
1209\f
1210;;;
1211;;; `format' argument analysis.
1212;;;
1213
8e6c15a6
LC
1214(define &syntax-error
1215 ;; The `throw' key for syntax errors.
1216 (gensym "format-string-syntax-error"))
1217
75365375 1218(define (format-string-argument-count fmt)
e0697241
LC
1219 ;; Return the minimum and maxium number of arguments that should
1220 ;; follow format string FMT (or, ahem, a good estimate thereof) or
1221 ;; `any' if the format string can be followed by any number of
1222 ;; arguments.
1223
1224 (define (drop-group chars end)
1225 ;; Drop characters from CHARS until "~END" is encountered.
1226 (let loop ((chars chars)
1227 (tilde? #f))
1228 (if (null? chars)
8e6c15a6 1229 (throw &syntax-error 'unterminated-iteration)
e0697241
LC
1230 (if tilde?
1231 (if (eq? (car chars) end)
1232 (cdr chars)
1233 (loop (cdr chars) #f))
1234 (if (eq? (car chars) #\~)
1235 (loop (cdr chars) #t)
1236 (loop (cdr chars) #f))))))
1237
1238 (define (digit? char)
1239 ;; Return true if CHAR is a digit, #f otherwise.
1240 (memq char '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
1241
1242 (define (previous-number chars)
1243 ;; Return the previous series of digits found in CHARS.
1244 (let ((numbers (take-while digit? chars)))
1245 (and (not (null? numbers))
1246 (string->number (list->string (reverse numbers))))))
1247
1248 (let loop ((chars (string->list fmt))
1249 (state 'literal)
1250 (params '())
1251 (conditions '())
1252 (end-group #f)
1253 (min-count 0)
1254 (max-count 0))
75365375 1255 (if (null? chars)
e0697241 1256 (if end-group
8e6c15a6 1257 (throw &syntax-error 'unterminated-conditional)
e0697241
LC
1258 (values min-count max-count))
1259 (case state
1260 ((tilde)
1261 (case (car chars)
1262 ((#\~ #\% #\& #\t #\_ #\newline #\( #\))
1263 (loop (cdr chars) 'literal '()
1264 conditions end-group
1265 min-count max-count))
1266 ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\, #\: #\@)
1267 (loop (cdr chars)
1268 'tilde (cons (car chars) params)
1269 conditions end-group
1270 min-count max-count))
1271 ((#\v #\V) (loop (cdr chars)
1272 'tilde (cons (car chars) params)
1273 conditions end-group
1274 (+ 1 min-count)
1275 (+ 1 max-count)))
1276 ((#\[)
1277 (loop chars 'literal '() '()
1278 (let ((selector (previous-number params))
1279 (at? (memq #\@ params)))
1280 (lambda (chars conds)
1281 ;; end of group
1282 (let ((mins (map car conds))
1283 (maxs (map cdr conds))
1284 (sel? (and selector
1285 (< selector (length conds)))))
1286 (if (and (every number? mins)
1287 (every number? maxs))
1288 (loop chars 'literal '() conditions end-group
1289 (+ min-count
1290 (if sel?
1291 (car (list-ref conds selector))
1292 (+ (if at? 0 1)
1293 (if (null? mins)
1294 0
1295 (apply min mins)))))
1296 (+ max-count
1297 (if sel?
1298 (cdr (list-ref conds selector))
1299 (+ (if at? 0 1)
1300 (if (null? maxs)
1301 0
1302 (apply max maxs))))))
8e6c15a6 1303 (values 'any 'any))))) ;; XXX: approximation
e0697241
LC
1304 0 0))
1305 ((#\;)
8e6c15a6
LC
1306 (if end-group
1307 (loop (cdr chars) 'literal '()
1308 (cons (cons min-count max-count) conditions)
1309 end-group
1310 0 0)
1311 (throw &syntax-error 'unexpected-semicolon)))
e0697241
LC
1312 ((#\])
1313 (if end-group
1314 (end-group (cdr chars)
1315 (reverse (cons (cons min-count max-count)
1316 conditions)))
8e6c15a6 1317 (throw &syntax-error 'unexpected-conditional-termination)))
e0697241
LC
1318 ((#\{) (if (memq #\@ params)
1319 (values min-count 'any)
1320 (loop (drop-group (cdr chars) #\})
1321 'literal '()
1322 conditions end-group
1323 (+ 1 min-count) (+ 1 max-count))))
1324 ((#\*) (if (memq #\@ params)
1325 (values 'any 'any) ;; it's unclear what to do here
1326 (loop (cdr chars)
1327 'literal '()
1328 conditions end-group
1329 (+ (or (previous-number params) 1)
1330 min-count)
1331 (+ (or (previous-number params) 1)
1332 max-count))))
1333 ((#\? #\k)
1334 ;; We don't have enough info to determine the exact number
1335 ;; of args, but we could determine a lower bound (TODO).
1336 (values 'any 'any))
b4af80a4
LC
1337 ((#\h #\H)
1338 (let ((argc (if (memq #\: params) 2 1)))
1339 (loop (cdr chars) 'literal '()
1340 conditions end-group
1341 (+ argc min-count)
1342 (+ argc max-count))))
e0697241
LC
1343 (else (loop (cdr chars) 'literal '()
1344 conditions end-group
1345 (+ 1 min-count) (+ 1 max-count)))))
1346 ((literal)
1347 (case (car chars)
1348 ((#\~) (loop (cdr chars) 'tilde '()
1349 conditions end-group
1350 min-count max-count))
1351 (else (loop (cdr chars) 'literal '()
1352 conditions end-group
1353 min-count max-count))))
1354 (else (error "computer bought the farm" state))))))
75365375 1355
56e31389 1356(define (const-fmt x)
16a3b316
LC
1357 ;; Return the literal format pattern for X, or #f.
1358 (match x
1359 (($ <const> _ exp)
3936cebc 1360 exp)
ca128245 1361 (($ <call> _
16a3b316
LC
1362 (or ($ <toplevel-ref> _ '_) ($ <module-ref> _ '_))
1363 (($ <const> _ (and (? string?) fmt))))
3936cebc 1364 ;; Gettexted literals, like `(_ "foo")'.
16a3b316
LC
1365 fmt)
1366 (_ #f)))
56e31389 1367
75365375
LC
1368(define format-analysis
1369 ;; Report arity mismatches in the given tree.
1370 (make-tree-analysis
1371 (lambda (x _ env locs)
1372 ;; X is a leaf.
1373 #t)
1374
1375 (lambda (x _ env locs)
1376 ;; Down into X.
1377 (define (check-format-args args loc)
1378 (pmatch args
1379 ((,port ,fmt . ,rest)
56e31389 1380 (guard (const-fmt fmt))
60f01304
LC
1381 (if (and (const? port)
1382 (not (boolean? (const-exp port))))
1383 (warning 'format loc 'wrong-port (const-exp port)))
56e31389 1384 (let ((fmt (const-fmt fmt))
e0697241 1385 (count (length rest)))
60f01304 1386 (if (string? fmt)
8e6c15a6
LC
1387 (catch &syntax-error
1388 (lambda ()
1389 (let-values (((min max)
1390 (format-string-argument-count fmt)))
1391 (and min max
1392 (or (and (or (eq? min 'any) (>= count min))
1393 (or (eq? max 'any) (<= count max)))
1394 (warning 'format loc 'wrong-format-arg-count
1395 fmt min max count)))))
1396 (lambda (_ key)
1397 (warning 'format loc 'syntax-error key fmt)))
60f01304
LC
1398 (warning 'format loc 'wrong-format-string fmt))))
1399 ((,port ,fmt . ,rest)
56e31389
AW
1400 (if (and (const? port)
1401 (not (boolean? (const-exp port))))
3a822fff 1402 (warning 'format loc 'wrong-port (const-exp port)))
5414d333
AW
1403 ;; Warn on non-literal format strings, unless they refer to a
1404 ;; lexical variable named "fmt".
1405 (if (record-case fmt
1406 ((<lexical-ref> name)
1407 (not (eq? name 'fmt)))
1408 (else #t))
1409 (warning 'format loc 'non-literal-format-string)))
60f01304
LC
1410 (else
1411 (warning 'format loc 'wrong-num-args (length args)))))
75365375 1412
60273407
LC
1413 (define (check-simple-format-args args loc)
1414 ;; Check the arguments to the `simple-format' procedure, which is
1415 ;; less capable than that of (ice-9 format).
1416
1417 (define allowed-chars
1418 '(#\A #\S #\a #\s #\~ #\%))
1419
1420 (define (format-chars fmt)
1421 (let loop ((chars (string->list fmt))
1422 (result '()))
1423 (match chars
1424 (()
1425 (reverse result))
1426 ((#\~ opt rest ...)
1427 (loop rest (cons opt result)))
1428 ((_ rest ...)
1429 (loop rest result)))))
1430
1431 (match args
1432 ((port ($ <const> _ (? string? fmt)) _ ...)
1433 (let ((opts (format-chars fmt)))
1434 (or (every (cut memq <> allowed-chars) opts)
1435 (begin
1436 (warning 'format loc 'simple-format fmt
1437 (find (negate (cut memq <> allowed-chars)) opts))
1438 #f))))
1439 ((port (($ <const> _ '_) fmt) args ...)
1440 (check-simple-format-args `(,port ,fmt ,args) loc))
1441 (_ #t)))
1442
75365375
LC
1443 (define (resolve-toplevel name)
1444 (and (module? env)
1445 (false-if-exception (module-ref env name))))
1446
16a3b316 1447 (match x
ca128245 1448 (($ <call> src ($ <toplevel-ref> _ name) args)
16a3b316 1449 (let ((proc (resolve-toplevel name)))
60273407
LC
1450 (if (or (and (eq? proc (@ (guile) simple-format))
1451 (check-simple-format-args args
1452 (or src (find pair? locs))))
1453 (eq? proc (@ (ice-9 format) format)))
1454 (check-format-args args (or src (find pair? locs))))))
dfadcf85 1455 (($ <call> src ($ <module-ref> _ '(ice-9 format) 'format) args)
60273407 1456 (check-format-args args (or src (find pair? locs))))
dfadcf85
AW
1457 (($ <call> src ($ <module-ref> _ '(guile)
1458 (or 'format 'simple-format))
60273407
LC
1459 args)
1460 (and (check-simple-format-args args
1461 (or src (find pair? locs)))
1462 (check-format-args args (or src (find pair? locs)))))
16a3b316 1463 (_ #t))
75365375
LC
1464 #t)
1465
1466 (lambda (x _ env locs)
1467 ;; Up from X.
1468 #t)
1469
1470 (lambda (_ env)
1471 ;; Post-processing.
1472 #t)
1473
1474 #t))