Merge remote-tracking branch 'origin/master'
[bpt/guile.git] / module / language / tree-il / analyze.scm
CommitLineData
cf10678f
AW
1;;; TREE-IL -> GLIL compiler
2
b4af80a4 3;; Copyright (C) 2001, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
cf10678f 4
53befeb7
NJ
5;;;; This library is free software; you can redistribute it and/or
6;;;; modify it under the terms of the GNU Lesser General Public
7;;;; License as published by the Free Software Foundation; either
8;;;; version 3 of the License, or (at your option) any later version.
bcae9a98 9;;;;
53befeb7
NJ
10;;;; This library is distributed in the hope that it will be useful,
11;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;;; Lesser General Public License for more details.
bcae9a98 14;;;;
53befeb7
NJ
15;;;; You should have received a copy of the GNU Lesser General Public
16;;;; License along with this library; if not, write to the Free Software
17;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cf10678f
AW
18
19;;; Code:
20
21(define-module (language tree-il analyze)
66d3e9a3 22 #:use-module (srfi srfi-1)
4b856371 23 #:use-module (srfi srfi-9)
bcae9a98 24 #:use-module (srfi srfi-11)
60273407 25 #:use-module (srfi srfi-26)
5cbf2e1d 26 #:use-module (ice-9 vlist)
16a3b316 27 #:use-module (ice-9 match)
cf10678f 28 #:use-module (system base syntax)
4b856371 29 #:use-module (system base message)
af5ed549 30 #:use-module (system vm program)
cf10678f 31 #:use-module (language tree-il)
99480e11 32 #:use-module (system base pmatch)
4b856371 33 #:export (analyze-lexicals
48b1db75
LC
34 analyze-tree
35 unused-variable-analysis
bcae9a98 36 unused-toplevel-analysis
ae03cf1f 37 unbound-variable-analysis
75365375
LC
38 arity-analysis
39 format-analysis))
cf10678f 40
66d3e9a3
AW
41;; Allocation is the process of assigning storage locations for lexical
42;; variables. A lexical variable has a distinct "address", or storage
43;; location, for each procedure in which it is referenced.
44;;
45;; A variable is "local", i.e., allocated on the stack, if it is
46;; referenced from within the procedure that defined it. Otherwise it is
47;; a "closure" variable. For example:
48;;
49;; (lambda (a) a) ; a will be local
50;; `a' is local to the procedure.
51;;
52;; (lambda (a) (lambda () a))
53;; `a' is local to the outer procedure, but a closure variable with
54;; respect to the inner procedure.
55;;
56;; If a variable is ever assigned, it needs to be heap-allocated
57;; ("boxed"). This is so that closures and continuations capture the
58;; variable's identity, not just one of the values it may have over the
59;; course of program execution. If the variable is never assigned, there
60;; is no distinction between value and identity, so closing over its
61;; identity (whether through closures or continuations) can make a copy
62;; of its value instead.
63;;
64;; Local variables are stored on the stack within a procedure's call
65;; frame. Their index into the stack is determined from their linear
66;; postion within a procedure's binding path:
cf10678f
AW
67;; (let (0 1)
68;; (let (2 3) ...)
69;; (let (2) ...))
70;; (let (2 3 4) ...))
71;; etc.
72;;
5af166bd
AW
73;; This algorithm has the problem that variables are only allocated
74;; indices at the end of the binding path. If variables bound early in
75;; the path are not used in later portions of the path, their indices
76;; will not be recycled. This problem is particularly egregious in the
77;; expansion of `or':
78;;
79;; (or x y z)
80;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
81;;
b6d93b11
AW
82;; As you can see, the `a' binding is only used in the ephemeral
83;; `consequent' clause of the first `if', but its index would be
84;; reserved for the whole of the `or' expansion. So we have a hack for
85;; this specific case. A proper solution would be some sort of liveness
86;; analysis, and not our linear allocation algorithm.
5af166bd 87;;
282d128c
AW
88;; Closure variables are captured when a closure is created, and stored in a
89;; vector inline to the closure object itself. Each closure variable has a
90;; unique index into that vector.
66d3e9a3 91;;
9059993f
AW
92;; There is one more complication. Procedures bound by <fix> may, in
93;; some cases, be rendered inline to their parent procedure. That is to
94;; say,
95;;
96;; (letrec ((lp (lambda () (lp)))) (lp))
97;; => (fix ((lp (lambda () (lp)))) (lp))
98;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
99;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
100;;
101;; The upshot is that we don't have to allocate any space for the `lp'
102;; closure at all, as it can be rendered inline as a loop. So there is
103;; another kind of allocation, "label allocation", in which the
104;; procedure is simply a label, placed at the start of the lambda body.
105;; The label is the gensym under which the lambda expression is bound.
106;;
107;; The analyzer checks to see that the label is called with the correct
108;; number of arguments. Calls to labels compile to rename + goto.
109;; Lambda, the ultimate goto!
110;;
66d3e9a3
AW
111;;
112;; The return value of `analyze-lexicals' is a hash table, the
113;; "allocation".
114;;
115;; The allocation maps gensyms -- recall that each lexically bound
116;; variable has a unique gensym -- to storage locations ("addresses").
117;; Since one gensym may have many storage locations, if it is referenced
118;; in many procedures, it is a two-level map.
119;;
120;; The allocation also stored information on how many local variables
9059993f
AW
121;; need to be allocated for each procedure, lexicals that have been
122;; translated into labels, and information on what free variables to
123;; capture from its lexical parent procedure.
66d3e9a3 124;;
8a4ca0ea
AW
125;; In addition, we have a conflation: while we're traversing the code,
126;; recording information to pass to the compiler, we take the
127;; opportunity to generate labels for each lambda-case clause, so that
128;; generated code can skip argument checks at runtime if they match at
129;; compile-time.
130;;
282d128c
AW
131;; Also, while we're a-traversing and an-allocating, we check prompt
132;; handlers to see if the "continuation" argument is used. If not, we
133;; mark the prompt as being "escape-only". This allows us to implement
134;; `catch' and `throw' using `prompt' and `control', but without causing
135;; a continuation to be reified. Heh heh.
136;;
66d3e9a3
AW
137;; That is:
138;;
139;; sym -> {lambda -> address}
8a4ca0ea
AW
140;; lambda -> (labels . free-locs)
141;; lambda-case -> (gensym . nlocs)
282d128c 142;; prompt -> escape-only?
66d3e9a3 143;;
9059993f 144;; address ::= (local? boxed? . index)
8a4ca0ea 145;; labels ::= ((sym . lambda) ...)
66d3e9a3
AW
146;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
147;; free variable addresses are relative to parent proc.
148
149(define (make-hashq k v)
150 (let ((res (make-hash-table)))
151 (hashq-set! res k v)
152 res))
cf10678f
AW
153
154(define (analyze-lexicals x)
66d3e9a3
AW
155 ;; bound-vars: lambda -> (sym ...)
156 ;; all identifiers bound within a lambda
9059993f 157 (define bound-vars (make-hash-table))
66d3e9a3
AW
158 ;; free-vars: lambda -> (sym ...)
159 ;; all identifiers referenced in a lambda, but not bound
160 ;; NB, this includes identifiers referenced by contained lambdas
9059993f 161 (define free-vars (make-hash-table))
66d3e9a3
AW
162 ;; assigned: sym -> #t
163 ;; variables that are assigned
d97b69d9 164 (define assigned (make-hash-table))
5af166bd 165 ;; refcounts: sym -> count
66d3e9a3 166 ;; allows us to detect the or-expansion in O(1) time
9059993f 167 (define refcounts (make-hash-table))
8a4ca0ea 168 ;; labels: sym -> lambda
9059993f 169 ;; for determining if fixed-point procedures can be rendered as
8a4ca0ea 170 ;; labels.
9059993f
AW
171 (define labels (make-hash-table))
172
66d3e9a3 173 ;; returns variables referenced in expr
d97b69d9 174 (define (analyze! x proc labels-in-proc tail? tail-call-args)
aa9c1985 175 (define (step y) (analyze! y proc '() #f #f))
d97b69d9
AW
176 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
177 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
178 (and tail? args)))
179 (define (recur/labels x new-proc labels)
180 (analyze! x new-proc (append labels labels-in-proc) #t #f))
181 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
cf10678f 182 (record-case x
7081d4f9 183 ((<call> proc args)
d97b69d9
AW
184 (apply lset-union eq? (step-tail-call proc args)
185 (map step args)))
cf10678f 186
a881a4ae
AW
187 ((<primcall> args)
188 (apply lset-union eq? (map step args)))
189
b6d93b11
AW
190 ((<conditional> test consequent alternate)
191 (lset-union eq? (step test) (step-tail consequent) (step-tail alternate)))
cf10678f 192
e5f5113c 193 ((<lexical-ref> gensym)
5af166bd 194 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
d97b69d9
AW
195 (if (not (and tail-call-args
196 (memq gensym labels-in-proc)
8a4ca0ea
AW
197 (let ((p (hashq-ref labels gensym)))
198 (and p
199 (let lp ((c (lambda-body p)))
200 (and c (lambda-case? c)
201 (or
202 ;; for now prohibit optional &
203 ;; keyword arguments; can relax this
204 ;; restriction later
205 (and (= (length (lambda-case-req c))
206 (length tail-call-args))
207 (not (lambda-case-opt c))
208 (not (lambda-case-kw c))
1e2a8edb 209 (not (lambda-case-rest c)))
3a88cb3b 210 (lp (lambda-case-alternate c)))))))))
d97b69d9 211 (hashq-set! labels gensym #f))
66d3e9a3 212 (list gensym))
cf10678f 213
e5f5113c 214 ((<lexical-set> gensym exp)
66d3e9a3 215 (hashq-set! assigned gensym #t)
d97b69d9 216 (hashq-set! labels gensym #f)
66d3e9a3 217 (lset-adjoin eq? (step exp) gensym))
cf10678f 218
e5f5113c 219 ((<module-set> exp)
cf10678f
AW
220 (step exp))
221
e5f5113c 222 ((<toplevel-set> exp)
cf10678f
AW
223 (step exp))
224
e5f5113c 225 ((<toplevel-define> exp)
cf10678f
AW
226 (step exp))
227
6fc3eae4
AW
228 ((<seq> head tail)
229 (lset-union eq? (step head) (step-tail tail)))
cf10678f 230
8a4ca0ea
AW
231 ((<lambda> body)
232 ;; order is important here
233 (hashq-set! bound-vars x '())
234 (let ((free (recur body x)))
235 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
236 (hashq-set! free-vars x free)
237 free))
238
93f63467 239 ((<lambda-case> opt kw inits gensyms body alternate)
8a4ca0ea 240 (hashq-set! bound-vars proc
93f63467 241 (append (reverse gensyms) (hashq-ref bound-vars proc)))
8a4ca0ea
AW
242 (lset-union
243 eq?
244 (lset-difference eq?
b0c8c187
AW
245 (lset-union eq?
246 (apply lset-union eq? (map step inits))
8a4ca0ea 247 (step-tail body))
93f63467 248 gensyms)
3a88cb3b 249 (if alternate (step-tail alternate) '())))
66d3e9a3 250
93f63467 251 ((<let> gensyms vals body)
66d3e9a3 252 (hashq-set! bound-vars proc
93f63467 253 (append (reverse gensyms) (hashq-ref bound-vars proc)))
66d3e9a3 254 (lset-difference eq?
d97b69d9 255 (apply lset-union eq? (step-tail body) (map step vals))
93f63467 256 gensyms))
cf10678f 257
93f63467 258 ((<letrec> gensyms vals body)
66d3e9a3 259 (hashq-set! bound-vars proc
93f63467
AW
260 (append (reverse gensyms) (hashq-ref bound-vars proc)))
261 (for-each (lambda (sym) (hashq-set! assigned sym #t)) gensyms)
66d3e9a3 262 (lset-difference eq?
d97b69d9 263 (apply lset-union eq? (step-tail body) (map step vals))
93f63467 264 gensyms))
66d3e9a3 265
93f63467 266 ((<fix> gensyms vals body)
d97b69d9 267 ;; Try to allocate these procedures as labels.
8a4ca0ea 268 (for-each (lambda (sym val) (hashq-set! labels sym val))
93f63467 269 gensyms vals)
c21c89b1 270 (hashq-set! bound-vars proc
93f63467 271 (append (reverse gensyms) (hashq-ref bound-vars proc)))
d97b69d9
AW
272 ;; Step into subexpressions.
273 (let* ((var-refs
274 (map
275 ;; Since we're trying to label-allocate the lambda,
276 ;; pretend it's not a closure, and just recurse into its
277 ;; body directly. (Otherwise, recursing on a closure
278 ;; that references one of the fix's bound vars would
279 ;; prevent label allocation.)
280 (lambda (x)
281 (record-case x
8a4ca0ea
AW
282 ((<lambda> body)
283 ;; just like the closure case, except here we use
284 ;; recur/labels instead of recur
285 (hashq-set! bound-vars x '())
93f63467 286 (let ((free (recur/labels body x gensyms)))
8a4ca0ea
AW
287 (hashq-set! bound-vars x (reverse! (hashq-ref bound-vars x)))
288 (hashq-set! free-vars x free)
289 free))))
d97b69d9 290 vals))
93f63467
AW
291 (vars-with-refs (map cons gensyms var-refs))
292 (body-refs (recur/labels body proc gensyms)))
d97b69d9
AW
293 (define (delabel-dependents! sym)
294 (let ((refs (assq-ref vars-with-refs sym)))
295 (if refs
296 (for-each (lambda (sym)
297 (if (hashq-ref labels sym)
298 (begin
299 (hashq-set! labels sym #f)
300 (delabel-dependents! sym))))
301 refs))))
302 ;; Stepping into the lambdas and the body might have made some
303 ;; procedures not label-allocatable -- which might have
304 ;; knock-on effects. For example:
305 ;; (fix ((a (lambda () (b)))
306 ;; (b (lambda () a)))
307 ;; (a))
308 ;; As far as `a' is concerned, both `a' and `b' are
309 ;; label-allocatable. But `b' references `a' not in a proc-tail
310 ;; position, which makes `a' not label-allocatable. The
311 ;; knock-on effect is that, when back-propagating this
312 ;; information to `a', `b' will also become not
313 ;; label-allocatable, as it is referenced within `a', which is
314 ;; allocated as a closure. This is a transitive relationship.
315 (for-each (lambda (sym)
316 (if (not (hashq-ref labels sym))
317 (delabel-dependents! sym)))
93f63467 318 gensyms)
d97b69d9
AW
319 ;; Now lift bound variables with label-allocated lambdas to the
320 ;; parent procedure.
321 (for-each
322 (lambda (sym val)
323 (if (hashq-ref labels sym)
324 ;; Remove traces of the label-bound lambda. The free
325 ;; vars will propagate up via the return val.
326 (begin
327 (hashq-set! bound-vars proc
328 (append (hashq-ref bound-vars val)
329 (hashq-ref bound-vars proc)))
330 (hashq-remove! bound-vars val)
331 (hashq-remove! free-vars val))))
93f63467 332 gensyms vals)
d97b69d9
AW
333 (lset-difference eq?
334 (apply lset-union eq? body-refs var-refs)
93f63467 335 gensyms)))
c21c89b1 336
8a4ca0ea
AW
337 ((<let-values> exp body)
338 (lset-union eq? (step exp) (step body)))
66d3e9a3 339
880e7948
AW
340 ((<dynwind> winder pre body post unwinder)
341 (lset-union eq? (step winder) (step pre)
342 (step body)
343 (step post) (step unwinder)))
282d128c 344
d7c53a86
AW
345 ((<dynlet> fluids vals body)
346 (apply lset-union eq? (step body) (map step (append fluids vals))))
347
706a705e
AW
348 ((<dynref> fluid)
349 (step fluid))
350
351 ((<dynset> fluid exp)
352 (lset-union eq? (step fluid) (step exp)))
353
07a0c7d5 354 ((<prompt> tag body handler)
b9204185 355 (lset-union eq? (step tag) (step body) (step-tail handler)))
282d128c 356
2d026f04
AW
357 ((<abort> tag args tail)
358 (apply lset-union eq? (step tag) (step tail) (map step args)))
282d128c 359
66d3e9a3
AW
360 (else '())))
361
9059993f 362 ;; allocation: sym -> {lambda -> address}
3004fe26
MW
363 ;; lambda -> (labels . free-locs)
364 ;; lambda-case -> (gensym . nlocs)
9059993f
AW
365 (define allocation (make-hash-table))
366
66d3e9a3
AW
367 (define (allocate! x proc n)
368 (define (recur y) (allocate! y proc n))
369 (record-case x
7081d4f9 370 ((<call> proc args)
66d3e9a3 371 (apply max (recur proc) (map recur args)))
cf10678f 372
a881a4ae
AW
373 ((<primcall> args)
374 (apply max n (map recur args)))
375
b6d93b11
AW
376 ((<conditional> test consequent alternate)
377 (max (recur test) (recur consequent) (recur alternate)))
cf10678f 378
e5f5113c 379 ((<lexical-set> exp)
66d3e9a3
AW
380 (recur exp))
381
e5f5113c 382 ((<module-set> exp)
66d3e9a3
AW
383 (recur exp))
384
e5f5113c 385 ((<toplevel-set> exp)
66d3e9a3
AW
386 (recur exp))
387
e5f5113c 388 ((<toplevel-define> exp)
66d3e9a3
AW
389 (recur exp))
390
6fc3eae4
AW
391 ((<seq> head tail)
392 (max (recur head)
393 (recur tail)))
66d3e9a3 394
8a4ca0ea 395 ((<lambda> body)
66d3e9a3
AW
396 ;; allocate closure vars in order
397 (let lp ((c (hashq-ref free-vars x)) (n 0))
398 (if (pair? c)
399 (begin
400 (hashq-set! (hashq-ref allocation (car c))
401 x
402 `(#f ,(hashq-ref assigned (car c)) . ,n))
403 (lp (cdr c) (1+ n)))))
404
8a4ca0ea 405 (let ((nlocs (allocate! body x 0))
66d3e9a3
AW
406 (free-addresses
407 (map (lambda (v)
408 (hashq-ref (hashq-ref allocation v) proc))
9059993f
AW
409 (hashq-ref free-vars x)))
410 (labels (filter cdr
411 (map (lambda (sym)
412 (cons sym (hashq-ref labels sym)))
413 (hashq-ref bound-vars x)))))
66d3e9a3 414 ;; set procedure allocations
8a4ca0ea 415 (hashq-set! allocation x (cons labels free-addresses)))
66d3e9a3 416 n)
cf10678f 417
93f63467 418 ((<lambda-case> opt kw inits gensyms body alternate)
8a4ca0ea 419 (max
93f63467
AW
420 (let lp ((gensyms gensyms) (n n))
421 (if (null? gensyms)
b0c8c187
AW
422 (let ((nlocs (apply
423 max
b0c8c187
AW
424 (allocate! body proc n)
425 ;; inits not logically at the end, but they
426 ;; are the list...
9a9d82c2 427 (map (lambda (x) (allocate! x proc n)) inits))))
8a4ca0ea
AW
428 ;; label and nlocs for the case
429 (hashq-set! allocation x (cons (gensym ":LCASE") nlocs))
430 nlocs)
431 (begin
93f63467 432 (hashq-set! allocation (car gensyms)
8a4ca0ea 433 (make-hashq
93f63467
AW
434 proc `(#t ,(hashq-ref assigned (car gensyms)) . ,n)))
435 (lp (cdr gensyms) (1+ n)))))
3a88cb3b 436 (if alternate (allocate! alternate proc n) n)))
8a4ca0ea 437
93f63467 438 ((<let> gensyms vals body)
66d3e9a3
AW
439 (let ((nmax (apply max (map recur vals))))
440 (cond
441 ;; the `or' hack
442 ((and (conditional? body)
93f63467
AW
443 (= (length gensyms) 1)
444 (let ((v (car gensyms)))
66d3e9a3
AW
445 (and (not (hashq-ref assigned v))
446 (= (hashq-ref refcounts v 0) 2)
447 (lexical-ref? (conditional-test body))
448 (eq? (lexical-ref-gensym (conditional-test body)) v)
b6d93b11
AW
449 (lexical-ref? (conditional-consequent body))
450 (eq? (lexical-ref-gensym (conditional-consequent body)) v))))
93f63467 451 (hashq-set! allocation (car gensyms)
66d3e9a3
AW
452 (make-hashq proc `(#t #f . ,n)))
453 ;; the 1+ for this var
b6d93b11 454 (max nmax (1+ n) (allocate! (conditional-alternate body) proc n)))
66d3e9a3 455 (else
93f63467
AW
456 (let lp ((gensyms gensyms) (n n))
457 (if (null? gensyms)
66d3e9a3 458 (max nmax (allocate! body proc n))
93f63467 459 (let ((v (car gensyms)))
cf10678f
AW
460 (hashq-set!
461 allocation v
66d3e9a3
AW
462 (make-hashq proc
463 `(#t ,(hashq-ref assigned v) . ,n)))
93f63467 464 (lp (cdr gensyms) (1+ n)))))))))
66d3e9a3 465
93f63467
AW
466 ((<letrec> gensyms vals body)
467 (let lp ((gensyms gensyms) (n n))
468 (if (null? gensyms)
66d3e9a3
AW
469 (let ((nmax (apply max
470 (map (lambda (x)
471 (allocate! x proc n))
472 vals))))
473 (max nmax (allocate! body proc n)))
93f63467 474 (let ((v (car gensyms)))
66d3e9a3
AW
475 (hashq-set!
476 allocation v
477 (make-hashq proc
478 `(#t ,(hashq-ref assigned v) . ,n)))
93f63467 479 (lp (cdr gensyms) (1+ n))))))
cf10678f 480
93f63467
AW
481 ((<fix> gensyms vals body)
482 (let lp ((in gensyms) (n n))
d97b69d9 483 (if (null? in)
93f63467 484 (let lp ((gensyms gensyms) (vals vals) (nmax n))
d97b69d9 485 (cond
93f63467 486 ((null? gensyms)
d97b69d9 487 (max nmax (allocate! body proc n)))
93f63467 488 ((hashq-ref labels (car gensyms))
8a4ca0ea 489 ;; allocate lambda body inline to proc
93f63467 490 (lp (cdr gensyms)
d97b69d9
AW
491 (cdr vals)
492 (record-case (car vals)
8a4ca0ea
AW
493 ((<lambda> body)
494 (max nmax (allocate! body proc n))))))
d97b69d9
AW
495 (else
496 ;; allocate closure
93f63467 497 (lp (cdr gensyms)
d97b69d9
AW
498 (cdr vals)
499 (max nmax (allocate! (car vals) proc n))))))
500
501 (let ((v (car in)))
502 (cond
503 ((hashq-ref assigned v)
504 (error "fixpoint procedures may not be assigned" x))
505 ((hashq-ref labels v)
506 ;; no binding, it's a label
507 (lp (cdr in) n))
508 (else
509 ;; allocate closure binding
510 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
511 (lp (cdr in) (1+ n))))))))
c21c89b1 512
8a4ca0ea
AW
513 ((<let-values> exp body)
514 (max (recur exp) (recur body)))
66d3e9a3 515
880e7948
AW
516 ((<dynwind> winder pre body post unwinder)
517 (max (recur winder) (recur pre)
518 (recur body)
519 (recur post) (recur unwinder)))
282d128c 520
d7c53a86
AW
521 ((<dynlet> fluids vals body)
522 (apply max (recur body) (map recur (append fluids vals))))
523
706a705e
AW
524 ((<dynref> fluid)
525 (recur fluid))
526
527 ((<dynset> fluid exp)
528 (max (recur fluid) (recur exp)))
529
07a0c7d5 530 ((<prompt> tag body handler)
282d128c 531 (let ((cont-var (and (lambda-case? handler)
93f63467
AW
532 (pair? (lambda-case-gensyms handler))
533 (car (lambda-case-gensyms handler)))))
282d128c
AW
534 (hashq-set! allocation x
535 (and cont-var (zero? (hashq-ref refcounts cont-var 0))))
07a0c7d5 536 (max (recur tag) (recur body) (recur handler))))
282d128c 537
2d026f04
AW
538 ((<abort> tag args tail)
539 (apply max (recur tag) (recur tail) (map recur args)))
282d128c 540
66d3e9a3 541 (else n)))
cf10678f 542
d97b69d9 543 (analyze! x #f '() #t #f)
66d3e9a3 544 (allocate! x #f 0)
cf10678f
AW
545
546 allocation)
4b856371
LC
547
548\f
48b1db75
LC
549;;;
550;;; Tree analyses for warnings.
551;;;
552
553(define-record-type <tree-analysis>
554 (make-tree-analysis leaf down up post init)
555 tree-analysis?
795ab688
LC
556 (leaf tree-analysis-leaf) ;; (lambda (x result env locs) ...)
557 (down tree-analysis-down) ;; (lambda (x result env locs) ...)
558 (up tree-analysis-up) ;; (lambda (x result env locs) ...)
48b1db75
LC
559 (post tree-analysis-post) ;; (lambda (result env) ...)
560 (init tree-analysis-init)) ;; arbitrary value
561
562(define (analyze-tree analyses tree env)
563 "Run all tree analyses listed in ANALYSES on TREE for ENV, using
795ab688
LC
564`tree-il-fold'. Return TREE. The leaf/down/up procedures of each analysis are
565passed a ``location stack', which is the stack of `tree-il-src' values for each
566parent tree (a list); it can be used to approximate source location when
567accurate information is missing from a given `tree-il' element."
568
569 (define (traverse proc update-locs)
570 ;; Return a tree traversing procedure that returns a list of analysis
571 ;; results prepended by the location stack.
48b1db75 572 (lambda (x results)
795ab688
LC
573 (let ((locs (update-locs x (car results))))
574 (cons locs ;; the location stack
575 (map (lambda (analysis result)
576 ((proc analysis) x result env locs))
577 analyses
578 (cdr results))))))
579
580 ;; Keeping/extending/shrinking the location stack.
581 (define (keep-locs x locs) locs)
582 (define (extend-locs x locs) (cons (tree-il-src x) locs))
583 (define (shrink-locs x locs) (cdr locs))
48b1db75
LC
584
585 (let ((results
795ab688
LC
586 (tree-il-fold (traverse tree-analysis-leaf keep-locs)
587 (traverse tree-analysis-down extend-locs)
588 (traverse tree-analysis-up shrink-locs)
589 (cons '() ;; empty location stack
590 (map tree-analysis-init analyses))
48b1db75
LC
591 tree)))
592
593 (for-each (lambda (analysis result)
594 ((tree-analysis-post analysis) result env))
595 analyses
795ab688 596 (cdr results)))
48b1db75
LC
597
598 tree)
599
600\f
4b856371
LC
601;;;
602;;; Unused variable analysis.
603;;;
604
605;; <binding-info> records are used during tree traversals in
795ab688
LC
606;; `unused-variable-analysis'. They contain a list of the local vars
607;; currently in scope, and a list of locals vars that have been referenced.
4b856371 608(define-record-type <binding-info>
795ab688 609 (make-binding-info vars refs)
4b856371
LC
610 binding-info?
611 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
795ab688 612 (refs binding-info-refs)) ;; (GENSYM ...)
4b856371 613
3a1a883b
LC
614(define (gensym? sym)
615 ;; Return #t if SYM is (likely) a generated symbol.
616 (string-any #\space (symbol->string sym)))
617
48b1db75 618(define unused-variable-analysis
ae03cf1f 619 ;; Report unused variables in the given tree.
48b1db75 620 (make-tree-analysis
795ab688 621 (lambda (x info env locs)
48b1db75
LC
622 ;; X is a leaf: extend INFO's refs accordingly.
623 (let ((refs (binding-info-refs info))
795ab688 624 (vars (binding-info-vars info)))
48b1db75
LC
625 (record-case x
626 ((<lexical-ref> gensym)
a670e672 627 (make-binding-info vars (vhash-consq gensym #t refs)))
48b1db75
LC
628 (else info))))
629
795ab688 630 (lambda (x info env locs)
48b1db75
LC
631 ;; Going down into X: extend INFO's variable list
632 ;; accordingly.
633 (let ((refs (binding-info-refs info))
634 (vars (binding-info-vars info))
48b1db75
LC
635 (src (tree-il-src x)))
636 (define (extend inner-vars inner-names)
a670e672
LC
637 (fold (lambda (var name vars)
638 (vhash-consq var (list name src) vars))
639 vars
640 inner-vars
641 inner-names))
642
48b1db75
LC
643 (record-case x
644 ((<lexical-set> gensym)
a670e672 645 (make-binding-info vars (vhash-consq gensym #t refs)))
93f63467 646 ((<lambda-case> req opt inits rest kw gensyms)
48b1db75 647 (let ((names `(,@req
632e7c32 648 ,@(or opt '())
48b1db75
LC
649 ,@(if rest (list rest) '())
650 ,@(if kw (map cadr (cdr kw)) '()))))
93f63467
AW
651 (make-binding-info (extend gensyms names) refs)))
652 ((<let> gensyms names)
653 (make-binding-info (extend gensyms names) refs))
654 ((<letrec> gensyms names)
655 (make-binding-info (extend gensyms names) refs))
656 ((<fix> gensyms names)
657 (make-binding-info (extend gensyms names) refs))
48b1db75
LC
658 (else info))))
659
795ab688 660 (lambda (x info env locs)
48b1db75
LC
661 ;; Leaving X's scope: shrink INFO's variable list
662 ;; accordingly and reported unused nested variables.
663 (let ((refs (binding-info-refs info))
795ab688 664 (vars (binding-info-vars info)))
48b1db75 665 (define (shrink inner-vars refs)
a670e672
LC
666 (vlist-for-each
667 (lambda (var)
668 (let ((gensym (car var)))
669 ;; Don't report lambda parameters as unused.
670 (if (and (memq gensym inner-vars)
671 (not (vhash-assq gensym refs))
672 (not (lambda-case? x)))
673 (let ((name (cadr var))
674 ;; We can get approximate source location by going up
675 ;; the LOCS location stack.
676 (loc (or (caddr var)
677 (find pair? locs))))
3a1a883b
LC
678 (if (and (not (gensym? name))
679 (not (eq? name '_)))
680 (warning 'unused-variable loc name))))))
a670e672
LC
681 vars)
682 (vlist-drop vars (length inner-vars)))
48b1db75
LC
683
684 ;; For simplicity, we leave REFS untouched, i.e., with
685 ;; names of variables that are now going out of scope.
686 ;; It doesn't hurt as these are unique names, it just
687 ;; makes REFS unnecessarily fat.
688 (record-case x
93f63467
AW
689 ((<lambda-case> gensyms)
690 (make-binding-info (shrink gensyms refs) refs))
691 ((<let> gensyms)
692 (make-binding-info (shrink gensyms refs) refs))
693 ((<letrec> gensyms)
694 (make-binding-info (shrink gensyms refs) refs))
695 ((<fix> gensyms)
696 (make-binding-info (shrink gensyms refs) refs))
48b1db75
LC
697 (else info))))
698
699 (lambda (result env) #t)
a670e672 700 (make-binding-info vlist-null vlist-null)))
f67ddf9d
LC
701
702\f
bcae9a98
LC
703;;;
704;;; Unused top-level variable analysis.
705;;;
706
628ddb80 707;; <reference-graph> record top-level definitions that are made, references to
bcae9a98
LC
708;; top-level definitions and their context (the top-level definition in which
709;; the reference appears), as well as the current context (the top-level
710;; definition we're currently in). The second part (`refs' below) is
628ddb80
LC
711;; effectively a graph from which we can determine unused top-level definitions.
712(define-record-type <reference-graph>
713 (make-reference-graph refs defs toplevel-context)
714 reference-graph?
715 (defs reference-graph-defs) ;; ((NAME . LOC) ...)
716 (refs reference-graph-refs) ;; ((REF-CONTEXT REF ...) ...)
717 (toplevel-context reference-graph-toplevel-context)) ;; NAME | #f
718
5cbf2e1d
LC
719(define (graph-reachable-nodes root refs reachable)
720 ;; Add to REACHABLE the nodes reachable from ROOT in graph REFS. REFS is a
721 ;; vhash mapping nodes to the list of their children: for instance,
722 ;; ((A -> (B C)) (B -> (A)) (C -> ())) corresponds to
bcae9a98
LC
723 ;;
724 ;; ,-------.
725 ;; v |
726 ;; A ----> B
727 ;; |
728 ;; v
729 ;; C
5cbf2e1d
LC
730 ;;
731 ;; REACHABLE is a vhash of nodes known to be otherwise reachable.
bcae9a98
LC
732
733 (let loop ((root root)
5cbf2e1d
LC
734 (path vlist-null)
735 (result reachable))
736 (if (or (vhash-assq root path)
737 (vhash-assq root result))
bcae9a98 738 result
5cbf2e1d
LC
739 (let* ((children (or (and=> (vhash-assq root refs) cdr) '()))
740 (path (vhash-consq root #t path))
741 (result (fold (lambda (kid result)
742 (loop kid path result))
743 result
744 children)))
745 (fold (lambda (kid result)
746 (vhash-consq kid #t result))
747 result
748 children)))))
bcae9a98 749
628ddb80 750(define (graph-reachable-nodes* roots refs)
bcae9a98 751 ;; Return the list of nodes in REFS reachable from the nodes listed in ROOTS.
5cbf2e1d
LC
752 (vlist-fold (lambda (root+true result)
753 (let* ((root (car root+true))
754 (reachable (graph-reachable-nodes root refs result)))
755 (vhash-consq root #t reachable)))
756 vlist-null
757 roots))
758
759(define (partition* pred vhash)
760 ;; Partition VHASH according to PRED. Return the two resulting vhashes.
761 (let ((result
762 (vlist-fold (lambda (k+v result)
763 (let ((k (car k+v))
764 (v (cdr k+v))
765 (r1 (car result))
766 (r2 (cdr result)))
767 (if (pred k)
768 (cons (vhash-consq k v r1) r2)
769 (cons r1 (vhash-consq k v r2)))))
770 (cons vlist-null vlist-null)
771 vhash)))
772 (values (car result) (cdr result))))
bcae9a98
LC
773
774(define unused-toplevel-analysis
775 ;; Report unused top-level definitions that are not exported.
776 (let ((add-ref-from-context
628ddb80
LC
777 (lambda (graph name)
778 ;; Add an edge CTX -> NAME in GRAPH.
779 (let* ((refs (reference-graph-refs graph))
780 (defs (reference-graph-defs graph))
781 (ctx (reference-graph-toplevel-context graph))
5cbf2e1d
LC
782 (ctx-refs (or (and=> (vhash-assq ctx refs) cdr) '())))
783 (make-reference-graph (vhash-consq ctx (cons name ctx-refs) refs)
628ddb80 784 defs ctx)))))
bcae9a98
LC
785 (define (macro-variable? name env)
786 (and (module? env)
787 (let ((var (module-variable env name)))
788 (and var (variable-bound? var)
789 (macro? (variable-ref var))))))
790
791 (make-tree-analysis
628ddb80 792 (lambda (x graph env locs)
bcae9a98 793 ;; X is a leaf.
628ddb80 794 (let ((ctx (reference-graph-toplevel-context graph)))
bcae9a98
LC
795 (record-case x
796 ((<toplevel-ref> name src)
628ddb80
LC
797 (add-ref-from-context graph name))
798 (else graph))))
bcae9a98 799
628ddb80 800 (lambda (x graph env locs)
bcae9a98 801 ;; Going down into X.
628ddb80
LC
802 (let ((ctx (reference-graph-toplevel-context graph))
803 (refs (reference-graph-refs graph))
804 (defs (reference-graph-defs graph)))
bcae9a98
LC
805 (record-case x
806 ((<toplevel-define> name src)
807 (let ((refs refs)
5cbf2e1d
LC
808 (defs (vhash-consq name (or src (find pair? locs))
809 defs)))
628ddb80 810 (make-reference-graph refs defs name)))
bcae9a98 811 ((<toplevel-set> name src)
628ddb80
LC
812 (add-ref-from-context graph name))
813 (else graph))))
bcae9a98 814
628ddb80 815 (lambda (x graph env locs)
bcae9a98
LC
816 ;; Leaving X's scope.
817 (record-case x
818 ((<toplevel-define>)
628ddb80
LC
819 (let ((refs (reference-graph-refs graph))
820 (defs (reference-graph-defs graph)))
821 (make-reference-graph refs defs #f)))
822 (else graph)))
bcae9a98 823
628ddb80
LC
824 (lambda (graph env)
825 ;; Process the resulting reference graph: determine all private definitions
bcae9a98
LC
826 ;; not reachable from any public definition. Macros
827 ;; (syntax-transformers), which are globally bound, never considered
828 ;; unused since we can't tell whether a macro is actually used; in
628ddb80 829 ;; addition, macros are considered roots of the graph since they may use
bcae9a98
LC
830 ;; private bindings. FIXME: The `make-syntax-transformer' calls don't
831 ;; contain any literal `toplevel-ref' of the global bindings they use so
832 ;; this strategy fails.
833 (define (exported? name)
834 (if (module? env)
835 (module-variable (module-public-interface env) name)
836 #t))
837
838 (let-values (((public-defs private-defs)
5cbf2e1d
LC
839 (partition* (lambda (name)
840 (or (exported? name)
841 (macro-variable? name env)))
842 (reference-graph-defs graph))))
843 (let* ((roots (vhash-consq #f #t public-defs))
628ddb80
LC
844 (refs (reference-graph-refs graph))
845 (reachable (graph-reachable-nodes* roots refs))
5cbf2e1d
LC
846 (unused (vlist-filter (lambda (name+src)
847 (not (vhash-assq (car name+src)
848 reachable)))
849 private-defs)))
850 (vlist-for-each (lambda (name+loc)
851 (let ((name (car name+loc))
852 (loc (cdr name+loc)))
3a1a883b
LC
853 (if (not (gensym? name))
854 (warning 'unused-toplevel loc name))))
5cbf2e1d
LC
855 unused))))
856
857 (make-reference-graph vlist-null vlist-null #f))))
bcae9a98
LC
858
859\f
f67ddf9d
LC
860;;;
861;;; Unbound variable analysis.
862;;;
863
864;; <toplevel-info> records are used during tree traversal in search of
865;; possibly unbound variable. They contain a list of references to
795ab688
LC
866;; potentially unbound top-level variables, and a list of the top-level
867;; defines that have been encountered.
f67ddf9d 868(define-record-type <toplevel-info>
795ab688 869 (make-toplevel-info refs defs)
f67ddf9d
LC
870 toplevel-info?
871 (refs toplevel-info-refs) ;; ((VARIABLE-NAME . LOCATION) ...)
795ab688 872 (defs toplevel-info-defs)) ;; (VARIABLE-NAME ...)
f67ddf9d 873
6bb891dc 874(define (goops-toplevel-definition proc args env)
7081d4f9 875 ;; If call of PROC to ARGS is a GOOPS top-level definition, return
b6d2306d
LC
876 ;; the name of the variable being defined; otherwise return #f. This
877 ;; assumes knowledge of the current implementation of `define-class' et al.
6bb891dc 878 (define (toplevel-define-arg args)
16a3b316
LC
879 (match args
880 ((($ <const> _ (and (? symbol?) exp)) _)
881 exp)
882 (_ #f)))
883
884 (match proc
885 (($ <module-ref> _ '(oop goops) 'toplevel-define! #f)
886 (toplevel-define-arg args))
887 (($ <toplevel-ref> _ 'toplevel-define!)
6bb891dc
LC
888 ;; This may be the result of expanding one of the GOOPS macros within
889 ;; `oop/goops.scm'.
16a3b316 890 (and (eq? env (resolve-module '(oop goops)))
6bb891dc 891 (toplevel-define-arg args)))
16a3b316 892 (_ #f)))
b6d2306d 893
48b1db75 894(define unbound-variable-analysis
ae03cf1f 895 ;; Report possibly unbound variables in the given tree.
48b1db75 896 (make-tree-analysis
795ab688 897 (lambda (x info env locs)
48b1db75
LC
898 ;; X is a leaf: extend INFO's refs accordingly.
899 (let ((refs (toplevel-info-refs info))
795ab688 900 (defs (toplevel-info-defs info)))
48b1db75
LC
901 (define (bound? name)
902 (or (and (module? env)
903 (module-variable env name))
04ea6fb5 904 (vhash-assq name defs)))
48b1db75
LC
905
906 (record-case x
907 ((<toplevel-ref> name src)
908 (if (bound? name)
909 info
910 (let ((src (or src (find pair? locs))))
04ea6fb5 911 (make-toplevel-info (vhash-consq name src refs)
795ab688 912 defs))))
48b1db75
LC
913 (else info))))
914
795ab688 915 (lambda (x info env locs)
48b1db75
LC
916 ;; Going down into X.
917 (let* ((refs (toplevel-info-refs info))
918 (defs (toplevel-info-defs info))
795ab688 919 (src (tree-il-src x)))
48b1db75
LC
920 (define (bound? name)
921 (or (and (module? env)
922 (module-variable env name))
04ea6fb5 923 (vhash-assq name defs)))
48b1db75
LC
924
925 (record-case x
926 ((<toplevel-set> name src)
927 (if (bound? name)
795ab688 928 (make-toplevel-info refs defs)
48b1db75 929 (let ((src (find pair? locs)))
04ea6fb5 930 (make-toplevel-info (vhash-consq name src refs)
795ab688 931 defs))))
48b1db75 932 ((<toplevel-define> name)
1e1808c9 933 (make-toplevel-info (vhash-delq name refs)
04ea6fb5 934 (vhash-consq name #t defs)))
48b1db75 935
7081d4f9 936 ((<call> proc args)
48b1db75
LC
937 ;; Check for a dynamic top-level definition, as is
938 ;; done by code expanded from GOOPS macros.
939 (let ((name (goops-toplevel-definition proc args
940 env)))
941 (if (symbol? name)
1e1808c9 942 (make-toplevel-info (vhash-delq name refs)
04ea6fb5 943 (vhash-consq name #t defs))
795ab688 944 (make-toplevel-info refs defs))))
48b1db75 945 (else
795ab688 946 (make-toplevel-info refs defs)))))
48b1db75 947
795ab688 948 (lambda (x info env locs)
48b1db75 949 ;; Leaving X's scope.
bcae9a98 950 info)
48b1db75
LC
951
952 (lambda (toplevel env)
953 ;; Post-process the result.
04ea6fb5
LC
954 (vlist-for-each (lambda (name+loc)
955 (let ((name (car name+loc))
956 (loc (cdr name+loc)))
957 (warning 'unbound-variable loc name)))
958 (vlist-reverse (toplevel-info-refs toplevel))))
48b1db75 959
04ea6fb5 960 (make-toplevel-info vlist-null vlist-null)))
ae03cf1f
LC
961
962\f
963;;;
964;;; Arity analysis.
965;;;
966
af5ed549 967;; <arity-info> records contain information about lexical definitions of
ae03cf1f
LC
968;; procedures currently in scope, top-level procedure definitions that have
969;; been encountered, and calls to top-level procedures that have been
970;; encountered.
971(define-record-type <arity-info>
972 (make-arity-info toplevel-calls lexical-lambdas toplevel-lambdas)
973 arity-info?
7081d4f9 974 (toplevel-calls toplevel-procedure-calls) ;; ((NAME . CALL) ...)
ae03cf1f
LC
975 (lexical-lambdas lexical-lambdas) ;; ((GENSYM . DEFINITION) ...)
976 (toplevel-lambdas toplevel-lambdas)) ;; ((NAME . DEFINITION) ...)
977
7081d4f9
AW
978(define (validate-arity proc call lexical?)
979 ;; Validate the argument count of CALL, a tree-il call of
ae03cf1f
LC
980 ;; PROC, emitting a warning in case of argument count mismatch.
981
af5ed549
LC
982 (define (filter-keyword-args keywords allow-other-keys? args)
983 ;; Filter keyword arguments from ARGS and return the resulting list.
984 ;; KEYWORDS is the list of allowed keywords, and ALLOW-OTHER-KEYS?
985 ;; specified whethere keywords not listed in KEYWORDS are allowed.
986 (let loop ((args args)
987 (result '()))
988 (if (null? args)
989 (reverse result)
990 (let ((arg (car args)))
991 (if (and (const? arg)
992 (or (memq (const-exp arg) keywords)
993 (and allow-other-keys?
994 (keyword? (const-exp arg)))))
995 (loop (if (pair? (cdr args))
996 (cddr args)
997 '())
998 result)
999 (loop (cdr args)
1000 (cons arg result)))))))
1001
99480e11
LC
1002 (define (arities proc)
1003 ;; Return the arities of PROC, which can be either a tree-il or a
ae03cf1f
LC
1004 ;; procedure.
1005 (define (len x)
1006 (or (and (or (null? x) (pair? x))
1007 (length x))
1008 0))
af5ed549 1009 (cond ((program? proc)
1e23b461 1010 (values (procedure-name proc)
99480e11
LC
1011 (map (lambda (a)
1012 (list (arity:nreq a) (arity:nopt a) (arity:rest? a)
1013 (map car (arity:kw a))
1014 (arity:allow-other-keys? a)))
1015 (program-arities proc))))
ae03cf1f 1016 ((procedure? proc)
15bb587f
AW
1017 (if (struct? proc)
1018 ;; An applicable struct.
1019 (arities (struct-ref proc 0))
1020 ;; An applicable smob.
1021 (let ((arity (procedure-minimum-arity proc)))
1022 (values (procedure-name proc)
1023 (list (list (car arity) (cadr arity) (caddr arity)
1024 #f #f))))))
ae03cf1f 1025 (else
99480e11
LC
1026 (let loop ((name #f)
1027 (proc proc)
1028 (arities '()))
1029 (if (not proc)
1030 (values name (reverse arities))
1031 (record-case proc
3a88cb3b
AW
1032 ((<lambda-case> req opt rest kw alternate)
1033 (loop name alternate
99480e11
LC
1034 (cons (list (len req) (len opt) rest
1035 (and (pair? kw) (map car (cdr kw)))
1036 (and (pair? kw) (car kw)))
1037 arities)))
1038 ((<lambda> meta body)
1039 (loop (assoc-ref meta 'name) body arities))
1040 (else
1041 (values #f #f))))))))
ae03cf1f 1042
7081d4f9
AW
1043 (let ((args (call-args call))
1044 (src (tree-il-src call)))
99480e11
LC
1045 (call-with-values (lambda () (arities proc))
1046 (lambda (name arities)
1047 (define matches?
1048 (find (lambda (arity)
1049 (pmatch arity
1050 ((,req ,opt ,rest? ,kw ,aok?)
1051 (let ((args (if (pair? kw)
1052 (filter-keyword-args kw aok? args)
1053 args)))
1054 (if (and req opt)
1055 (let ((count (length args)))
1056 (and (>= count req)
1057 (or rest?
1058 (<= count (+ req opt)))))
1059 #t)))
1060 (else #t)))
1061 arities))
1062
1063 (if (not matches?)
1064 (warning 'arity-mismatch src
1065 (or name (with-output-to-string (lambda () (write proc))))
1066 lexical?)))))
ae03cf1f
LC
1067 #t)
1068
1069(define arity-analysis
1070 ;; Report arity mismatches in the given tree.
1071 (make-tree-analysis
795ab688 1072 (lambda (x info env locs)
ae03cf1f
LC
1073 ;; X is a leaf.
1074 info)
795ab688 1075 (lambda (x info env locs)
ae03cf1f
LC
1076 ;; Down into X.
1077 (define (extend lexical-name val info)
1078 ;; If VAL is a lambda, add NAME to the lexical-lambdas of INFO.
1079 (let ((toplevel-calls (toplevel-procedure-calls info))
1080 (lexical-lambdas (lexical-lambdas info))
1081 (toplevel-lambdas (toplevel-lambdas info)))
1082 (record-case val
1083 ((<lambda> body)
1084 (make-arity-info toplevel-calls
df685ee4
LC
1085 (vhash-consq lexical-name val
1086 lexical-lambdas)
ae03cf1f
LC
1087 toplevel-lambdas))
1088 ((<lexical-ref> gensym)
1089 ;; lexical alias
df685ee4 1090 (let ((val* (vhash-assq gensym lexical-lambdas)))
ae03cf1f
LC
1091 (if (pair? val*)
1092 (extend lexical-name (cdr val*) info)
1093 info)))
1094 ((<toplevel-ref> name)
1095 ;; top-level alias
1096 (make-arity-info toplevel-calls
df685ee4
LC
1097 (vhash-consq lexical-name val
1098 lexical-lambdas)
ae03cf1f
LC
1099 toplevel-lambdas))
1100 (else info))))
1101
1102 (let ((toplevel-calls (toplevel-procedure-calls info))
1103 (lexical-lambdas (lexical-lambdas info))
1104 (toplevel-lambdas (toplevel-lambdas info)))
1105
1106 (record-case x
1107 ((<toplevel-define> name exp)
1108 (record-case exp
1109 ((<lambda> body)
1110 (make-arity-info toplevel-calls
1111 lexical-lambdas
df685ee4 1112 (vhash-consq name exp toplevel-lambdas)))
ae03cf1f
LC
1113 ((<toplevel-ref> name)
1114 ;; alias for another toplevel
df685ee4 1115 (let ((proc (vhash-assq name toplevel-lambdas)))
ae03cf1f
LC
1116 (make-arity-info toplevel-calls
1117 lexical-lambdas
df685ee4
LC
1118 (vhash-consq (toplevel-define-name x)
1119 (if (pair? proc)
1120 (cdr proc)
1121 exp)
1122 toplevel-lambdas))))
ae03cf1f 1123 (else info)))
93f63467
AW
1124 ((<let> gensyms vals)
1125 (fold extend info gensyms vals))
1126 ((<letrec> gensyms vals)
1127 (fold extend info gensyms vals))
1128 ((<fix> gensyms vals)
1129 (fold extend info gensyms vals))
ae03cf1f 1130
7081d4f9 1131 ((<call> proc args src)
ae03cf1f
LC
1132 (record-case proc
1133 ((<lambda> body)
1134 (validate-arity proc x #t)
1135 info)
1136 ((<toplevel-ref> name)
df685ee4 1137 (make-arity-info (vhash-consq name x toplevel-calls)
ae03cf1f
LC
1138 lexical-lambdas
1139 toplevel-lambdas))
1140 ((<lexical-ref> gensym)
df685ee4 1141 (let ((proc (vhash-assq gensym lexical-lambdas)))
ae03cf1f
LC
1142 (if (pair? proc)
1143 (record-case (cdr proc)
1144 ((<toplevel-ref> name)
1145 ;; alias to toplevel
df685ee4 1146 (make-arity-info (vhash-consq name x toplevel-calls)
ae03cf1f
LC
1147 lexical-lambdas
1148 toplevel-lambdas))
1149 (else
1150 (validate-arity (cdr proc) x #t)
1151 info))
1152
1153 ;; If GENSYM wasn't found, it may be because it's an
1154 ;; argument of the procedure being compiled.
1155 info)))
1156 (else info)))
1157 (else info))))
1158
795ab688 1159 (lambda (x info env locs)
ae03cf1f
LC
1160 ;; Up from X.
1161 (define (shrink name val info)
1162 ;; Remove NAME from the lexical-lambdas of INFO.
1163 (let ((toplevel-calls (toplevel-procedure-calls info))
1164 (lexical-lambdas (lexical-lambdas info))
1165 (toplevel-lambdas (toplevel-lambdas info)))
1166 (make-arity-info toplevel-calls
df685ee4
LC
1167 (if (vhash-assq name lexical-lambdas)
1168 (vlist-tail lexical-lambdas)
1169 lexical-lambdas)
ae03cf1f
LC
1170 toplevel-lambdas)))
1171
1172 (let ((toplevel-calls (toplevel-procedure-calls info))
1173 (lexical-lambdas (lexical-lambdas info))
1174 (toplevel-lambdas (toplevel-lambdas info)))
1175 (record-case x
93f63467
AW
1176 ((<let> gensyms vals)
1177 (fold shrink info gensyms vals))
1178 ((<letrec> gensyms vals)
1179 (fold shrink info gensyms vals))
1180 ((<fix> gensyms vals)
1181 (fold shrink info gensyms vals))
ae03cf1f
LC
1182
1183 (else info))))
1184
1185 (lambda (result env)
1186 ;; Post-processing: check all top-level procedure calls that have been
1187 ;; encountered.
1188 (let ((toplevel-calls (toplevel-procedure-calls result))
1189 (toplevel-lambdas (toplevel-lambdas result)))
df685ee4 1190 (vlist-for-each
7081d4f9
AW
1191 (lambda (name+call)
1192 (let* ((name (car name+call))
1193 (call (cdr name+call))
df685ee4
LC
1194 (proc
1195 (or (and=> (vhash-assq name toplevel-lambdas) cdr)
1196 (and (module? env)
1197 (false-if-exception
1198 (module-ref env name)))))
1199 (proc*
1200 ;; handle toplevel aliases
1201 (if (toplevel-ref? proc)
1202 (let ((name (toplevel-ref-name proc)))
1203 (and (module? env)
1204 (false-if-exception
1205 (module-ref env name))))
1206 proc)))
2c5f0bdb 1207 (cond ((lambda? proc*)
74bbb994 1208 (validate-arity proc* call #t))
2c5f0bdb 1209 ((procedure? proc*)
74bbb994 1210 (validate-arity proc* call #f)))))
df685ee4
LC
1211 toplevel-calls)))
1212
1213 (make-arity-info vlist-null vlist-null vlist-null)))
75365375
LC
1214
1215\f
1216;;;
1217;;; `format' argument analysis.
1218;;;
1219
8e6c15a6
LC
1220(define &syntax-error
1221 ;; The `throw' key for syntax errors.
1222 (gensym "format-string-syntax-error"))
1223
75365375 1224(define (format-string-argument-count fmt)
e0697241
LC
1225 ;; Return the minimum and maxium number of arguments that should
1226 ;; follow format string FMT (or, ahem, a good estimate thereof) or
1227 ;; `any' if the format string can be followed by any number of
1228 ;; arguments.
1229
1230 (define (drop-group chars end)
1231 ;; Drop characters from CHARS until "~END" is encountered.
1232 (let loop ((chars chars)
1233 (tilde? #f))
1234 (if (null? chars)
8e6c15a6 1235 (throw &syntax-error 'unterminated-iteration)
e0697241
LC
1236 (if tilde?
1237 (if (eq? (car chars) end)
1238 (cdr chars)
1239 (loop (cdr chars) #f))
1240 (if (eq? (car chars) #\~)
1241 (loop (cdr chars) #t)
1242 (loop (cdr chars) #f))))))
1243
1244 (define (digit? char)
1245 ;; Return true if CHAR is a digit, #f otherwise.
1246 (memq char '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
1247
1248 (define (previous-number chars)
1249 ;; Return the previous series of digits found in CHARS.
1250 (let ((numbers (take-while digit? chars)))
1251 (and (not (null? numbers))
1252 (string->number (list->string (reverse numbers))))))
1253
1254 (let loop ((chars (string->list fmt))
1255 (state 'literal)
1256 (params '())
1257 (conditions '())
1258 (end-group #f)
1259 (min-count 0)
1260 (max-count 0))
75365375 1261 (if (null? chars)
e0697241 1262 (if end-group
8e6c15a6 1263 (throw &syntax-error 'unterminated-conditional)
e0697241
LC
1264 (values min-count max-count))
1265 (case state
1266 ((tilde)
1267 (case (car chars)
1268 ((#\~ #\% #\& #\t #\_ #\newline #\( #\))
1269 (loop (cdr chars) 'literal '()
1270 conditions end-group
1271 min-count max-count))
1272 ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\, #\: #\@)
1273 (loop (cdr chars)
1274 'tilde (cons (car chars) params)
1275 conditions end-group
1276 min-count max-count))
1277 ((#\v #\V) (loop (cdr chars)
1278 'tilde (cons (car chars) params)
1279 conditions end-group
1280 (+ 1 min-count)
1281 (+ 1 max-count)))
1282 ((#\[)
1283 (loop chars 'literal '() '()
1284 (let ((selector (previous-number params))
1285 (at? (memq #\@ params)))
1286 (lambda (chars conds)
1287 ;; end of group
1288 (let ((mins (map car conds))
1289 (maxs (map cdr conds))
1290 (sel? (and selector
1291 (< selector (length conds)))))
1292 (if (and (every number? mins)
1293 (every number? maxs))
1294 (loop chars 'literal '() conditions end-group
1295 (+ min-count
1296 (if sel?
1297 (car (list-ref conds selector))
1298 (+ (if at? 0 1)
1299 (if (null? mins)
1300 0
1301 (apply min mins)))))
1302 (+ max-count
1303 (if sel?
1304 (cdr (list-ref conds selector))
1305 (+ (if at? 0 1)
1306 (if (null? maxs)
1307 0
1308 (apply max maxs))))))
8e6c15a6 1309 (values 'any 'any))))) ;; XXX: approximation
e0697241
LC
1310 0 0))
1311 ((#\;)
8e6c15a6
LC
1312 (if end-group
1313 (loop (cdr chars) 'literal '()
1314 (cons (cons min-count max-count) conditions)
1315 end-group
1316 0 0)
1317 (throw &syntax-error 'unexpected-semicolon)))
e0697241
LC
1318 ((#\])
1319 (if end-group
1320 (end-group (cdr chars)
1321 (reverse (cons (cons min-count max-count)
1322 conditions)))
8e6c15a6 1323 (throw &syntax-error 'unexpected-conditional-termination)))
e0697241
LC
1324 ((#\{) (if (memq #\@ params)
1325 (values min-count 'any)
1326 (loop (drop-group (cdr chars) #\})
1327 'literal '()
1328 conditions end-group
1329 (+ 1 min-count) (+ 1 max-count))))
1330 ((#\*) (if (memq #\@ params)
1331 (values 'any 'any) ;; it's unclear what to do here
1332 (loop (cdr chars)
1333 'literal '()
1334 conditions end-group
1335 (+ (or (previous-number params) 1)
1336 min-count)
1337 (+ (or (previous-number params) 1)
1338 max-count))))
1339 ((#\? #\k)
1340 ;; We don't have enough info to determine the exact number
1341 ;; of args, but we could determine a lower bound (TODO).
1342 (values 'any 'any))
b4af80a4
LC
1343 ((#\h #\H)
1344 (let ((argc (if (memq #\: params) 2 1)))
1345 (loop (cdr chars) 'literal '()
1346 conditions end-group
1347 (+ argc min-count)
1348 (+ argc max-count))))
e0697241
LC
1349 (else (loop (cdr chars) 'literal '()
1350 conditions end-group
1351 (+ 1 min-count) (+ 1 max-count)))))
1352 ((literal)
1353 (case (car chars)
1354 ((#\~) (loop (cdr chars) 'tilde '()
1355 conditions end-group
1356 min-count max-count))
1357 (else (loop (cdr chars) 'literal '()
1358 conditions end-group
1359 min-count max-count))))
1360 (else (error "computer bought the farm" state))))))
75365375 1361
98385ed2
LC
1362(define (proc-ref? exp proc special-name env)
1363 "Return #t when EXP designates procedure PROC in ENV. As a last
1364resort, return #t when EXP refers to the global variable SPECIAL-NAME."
8a74ffe8
LC
1365
1366 (define special?
1367 (cut eq? <> special-name))
1368
98385ed2 1369 (match exp
8a74ffe8
LC
1370 (($ <toplevel-ref> _ (? special?))
1371 ;; Allow top-levels like: (define _ (cut gettext <> "my-domain")).
1372 #t)
afc98031 1373 (($ <toplevel-ref> _ name)
dab48cc5 1374 (let ((var (module-variable env name)))
8a74ffe8
LC
1375 (and var (variable-bound? var)
1376 (eq? (variable-ref var) proc))))
1377 (($ <module-ref> _ _ (? special?))
1378 #t)
afc98031 1379 (($ <module-ref> _ module name public?)
dab48cc5
AW
1380 (let* ((mod (if public?
1381 (false-if-exception (resolve-interface module))
4c984747 1382 (resolve-module module #:ensure #f)))
dab48cc5
AW
1383 (var (and mod (module-variable mod name))))
1384 (and var (variable-bound? var) (eq? (variable-ref var) proc))))
8a74ffe8 1385 (($ <lexical-ref> _ (? special?))
4c984747 1386 #t)
afc98031
LC
1387 (_ #f)))
1388
98385ed2
LC
1389(define gettext? (cut proc-ref? <> gettext '_ <>))
1390(define ngettext? (cut proc-ref? <> ngettext 'N_ <>))
1391
afc98031 1392(define (const-fmt x env)
98385ed2 1393 ;; Return the literal format string for X, or #f.
16a3b316 1394 (match x
98385ed2 1395 (($ <const> _ (? string? exp))
3936cebc 1396 exp)
9d15db65 1397 (($ <call> _ (? (cut gettext? <> env))
afc98031 1398 (($ <const> _ (? string? fmt))))
3936cebc 1399 ;; Gettexted literals, like `(_ "foo")'.
16a3b316 1400 fmt)
9d15db65 1401 (($ <call> _ (? (cut ngettext? <> env))
98385ed2
LC
1402 (($ <const> _ (? string? fmt)) ($ <const> _ (? string?)) _ ..1))
1403 ;; Plural gettextized literals, like `(N_ "singular" "plural" n)'.
1404
1405 ;; TODO: Check whether the singular and plural strings have the
1406 ;; same format escapes.
1407 fmt)
16a3b316 1408 (_ #f)))
56e31389 1409
75365375
LC
1410(define format-analysis
1411 ;; Report arity mismatches in the given tree.
1412 (make-tree-analysis
1413 (lambda (x _ env locs)
1414 ;; X is a leaf.
1415 #t)
1416
1417 (lambda (x _ env locs)
1418 ;; Down into X.
1419 (define (check-format-args args loc)
1420 (pmatch args
1421 ((,port ,fmt . ,rest)
afc98031 1422 (guard (const-fmt fmt env))
60f01304
LC
1423 (if (and (const? port)
1424 (not (boolean? (const-exp port))))
1425 (warning 'format loc 'wrong-port (const-exp port)))
afc98031 1426 (let ((fmt (const-fmt fmt env))
e0697241 1427 (count (length rest)))
98385ed2
LC
1428 (catch &syntax-error
1429 (lambda ()
1430 (let-values (((min max)
1431 (format-string-argument-count fmt)))
1432 (and min max
1433 (or (and (or (eq? min 'any) (>= count min))
1434 (or (eq? max 'any) (<= count max)))
1435 (warning 'format loc 'wrong-format-arg-count
1436 fmt min max count)))))
1437 (lambda (_ key)
1438 (warning 'format loc 'syntax-error key fmt)))))
60f01304 1439 ((,port ,fmt . ,rest)
56e31389
AW
1440 (if (and (const? port)
1441 (not (boolean? (const-exp port))))
3a822fff 1442 (warning 'format loc 'wrong-port (const-exp port)))
98385ed2
LC
1443
1444 (match fmt
1445 (($ <const> loc* (? (negate string?) fmt))
1446 (warning 'format (or loc* loc) 'wrong-format-string fmt))
1447
1448 ;; Warn on non-literal format strings, unless they refer to
1449 ;; a lexical variable named "fmt".
1450 (($ <lexical-ref> _ fmt)
1451 #t)
1452 ((? (negate const?))
1453 (warning 'format loc 'non-literal-format-string))))
60f01304
LC
1454 (else
1455 (warning 'format loc 'wrong-num-args (length args)))))
75365375 1456
60273407
LC
1457 (define (check-simple-format-args args loc)
1458 ;; Check the arguments to the `simple-format' procedure, which is
1459 ;; less capable than that of (ice-9 format).
1460
1461 (define allowed-chars
1462 '(#\A #\S #\a #\s #\~ #\%))
1463
1464 (define (format-chars fmt)
1465 (let loop ((chars (string->list fmt))
1466 (result '()))
1467 (match chars
1468 (()
1469 (reverse result))
1470 ((#\~ opt rest ...)
1471 (loop rest (cons opt result)))
1472 ((_ rest ...)
1473 (loop rest result)))))
1474
1475 (match args
1476 ((port ($ <const> _ (? string? fmt)) _ ...)
1477 (let ((opts (format-chars fmt)))
1478 (or (every (cut memq <> allowed-chars) opts)
1479 (begin
1480 (warning 'format loc 'simple-format fmt
1481 (find (negate (cut memq <> allowed-chars)) opts))
1482 #f))))
98385ed2
LC
1483 ((port (= (cut const-fmt <> env) (? string? fmt)) args ...)
1484 (check-simple-format-args `(,port ,(make-const loc fmt) ,args) loc))
60273407
LC
1485 (_ #t)))
1486
75365375
LC
1487 (define (resolve-toplevel name)
1488 (and (module? env)
1489 (false-if-exception (module-ref env name))))
1490
16a3b316 1491 (match x
ca128245 1492 (($ <call> src ($ <toplevel-ref> _ name) args)
16a3b316 1493 (let ((proc (resolve-toplevel name)))
60273407
LC
1494 (if (or (and (eq? proc (@ (guile) simple-format))
1495 (check-simple-format-args args
1496 (or src (find pair? locs))))
1497 (eq? proc (@ (ice-9 format) format)))
1498 (check-format-args args (or src (find pair? locs))))))
dfadcf85 1499 (($ <call> src ($ <module-ref> _ '(ice-9 format) 'format) args)
60273407 1500 (check-format-args args (or src (find pair? locs))))
dfadcf85
AW
1501 (($ <call> src ($ <module-ref> _ '(guile)
1502 (or 'format 'simple-format))
60273407
LC
1503 args)
1504 (and (check-simple-format-args args
1505 (or src (find pair? locs)))
1506 (check-format-args args (or src (find pair? locs)))))
16a3b316 1507 (_ #t))
75365375
LC
1508 #t)
1509
1510 (lambda (x _ env locs)
1511 ;; Up from X.
1512 #t)
1513
1514 (lambda (_ env)
1515 ;; Post-processing.
1516 #t)
1517
1518 #t))