Merge commit 'origin/master'
[bpt/guile.git] / module / language / tree-il / analyze.scm
1 ;;; TREE-IL -> GLIL compiler
2
3 ;; Copyright (C) 2001,2008,2009 Free Software Foundation, Inc.
4
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
9 ;;;;
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Code:
20
21 (define-module (language tree-il analyze)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-9)
24 #:use-module (system base syntax)
25 #:use-module (system base message)
26 #:use-module (language tree-il)
27 #:export (analyze-lexicals
28 report-unused-variables))
29
30 ;; Allocation is the process of assigning storage locations for lexical
31 ;; variables. A lexical variable has a distinct "address", or storage
32 ;; location, for each procedure in which it is referenced.
33 ;;
34 ;; A variable is "local", i.e., allocated on the stack, if it is
35 ;; referenced from within the procedure that defined it. Otherwise it is
36 ;; a "closure" variable. For example:
37 ;;
38 ;; (lambda (a) a) ; a will be local
39 ;; `a' is local to the procedure.
40 ;;
41 ;; (lambda (a) (lambda () a))
42 ;; `a' is local to the outer procedure, but a closure variable with
43 ;; respect to the inner procedure.
44 ;;
45 ;; If a variable is ever assigned, it needs to be heap-allocated
46 ;; ("boxed"). This is so that closures and continuations capture the
47 ;; variable's identity, not just one of the values it may have over the
48 ;; course of program execution. If the variable is never assigned, there
49 ;; is no distinction between value and identity, so closing over its
50 ;; identity (whether through closures or continuations) can make a copy
51 ;; of its value instead.
52 ;;
53 ;; Local variables are stored on the stack within a procedure's call
54 ;; frame. Their index into the stack is determined from their linear
55 ;; postion within a procedure's binding path:
56 ;; (let (0 1)
57 ;; (let (2 3) ...)
58 ;; (let (2) ...))
59 ;; (let (2 3 4) ...))
60 ;; etc.
61 ;;
62 ;; This algorithm has the problem that variables are only allocated
63 ;; indices at the end of the binding path. If variables bound early in
64 ;; the path are not used in later portions of the path, their indices
65 ;; will not be recycled. This problem is particularly egregious in the
66 ;; expansion of `or':
67 ;;
68 ;; (or x y z)
69 ;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
70 ;;
71 ;; As you can see, the `a' binding is only used in the ephemeral `then'
72 ;; clause of the first `if', but its index would be reserved for the
73 ;; whole of the `or' expansion. So we have a hack for this specific
74 ;; case. A proper solution would be some sort of liveness analysis, and
75 ;; not our linear allocation algorithm.
76 ;;
77 ;; Closure variables are captured when a closure is created, and stored
78 ;; in a vector. Each closure variable has a unique index into that
79 ;; vector.
80 ;;
81 ;; There is one more complication. Procedures bound by <fix> may, in
82 ;; some cases, be rendered inline to their parent procedure. That is to
83 ;; say,
84 ;;
85 ;; (letrec ((lp (lambda () (lp)))) (lp))
86 ;; => (fix ((lp (lambda () (lp)))) (lp))
87 ;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
88 ;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
89 ;;
90 ;; The upshot is that we don't have to allocate any space for the `lp'
91 ;; closure at all, as it can be rendered inline as a loop. So there is
92 ;; another kind of allocation, "label allocation", in which the
93 ;; procedure is simply a label, placed at the start of the lambda body.
94 ;; The label is the gensym under which the lambda expression is bound.
95 ;;
96 ;; The analyzer checks to see that the label is called with the correct
97 ;; number of arguments. Calls to labels compile to rename + goto.
98 ;; Lambda, the ultimate goto!
99 ;;
100 ;;
101 ;; The return value of `analyze-lexicals' is a hash table, the
102 ;; "allocation".
103 ;;
104 ;; The allocation maps gensyms -- recall that each lexically bound
105 ;; variable has a unique gensym -- to storage locations ("addresses").
106 ;; Since one gensym may have many storage locations, if it is referenced
107 ;; in many procedures, it is a two-level map.
108 ;;
109 ;; The allocation also stored information on how many local variables
110 ;; need to be allocated for each procedure, lexicals that have been
111 ;; translated into labels, and information on what free variables to
112 ;; capture from its lexical parent procedure.
113 ;;
114 ;; That is:
115 ;;
116 ;; sym -> {lambda -> address}
117 ;; lambda -> (nlocs labels . free-locs)
118 ;;
119 ;; address ::= (local? boxed? . index)
120 ;; labels ::= ((sym . lambda-vars) ...)
121 ;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
122 ;; free variable addresses are relative to parent proc.
123
124 (define (make-hashq k v)
125 (let ((res (make-hash-table)))
126 (hashq-set! res k v)
127 res))
128
129 (define (analyze-lexicals x)
130 ;; bound-vars: lambda -> (sym ...)
131 ;; all identifiers bound within a lambda
132 (define bound-vars (make-hash-table))
133 ;; free-vars: lambda -> (sym ...)
134 ;; all identifiers referenced in a lambda, but not bound
135 ;; NB, this includes identifiers referenced by contained lambdas
136 (define free-vars (make-hash-table))
137 ;; assigned: sym -> #t
138 ;; variables that are assigned
139 (define assigned (make-hash-table))
140 ;; refcounts: sym -> count
141 ;; allows us to detect the or-expansion in O(1) time
142 (define refcounts (make-hash-table))
143 ;; labels: sym -> lambda-vars
144 ;; for determining if fixed-point procedures can be rendered as
145 ;; labels. lambda-vars may be an improper list.
146 (define labels (make-hash-table))
147
148 ;; returns variables referenced in expr
149 (define (analyze! x proc labels-in-proc tail? tail-call-args)
150 (define (step y) (analyze! y proc labels-in-proc #f #f))
151 (define (step-tail y) (analyze! y proc labels-in-proc tail? #f))
152 (define (step-tail-call y args) (analyze! y proc labels-in-proc #f
153 (and tail? args)))
154 (define (recur/labels x new-proc labels)
155 (analyze! x new-proc (append labels labels-in-proc) #t #f))
156 (define (recur x new-proc) (analyze! x new-proc '() tail? #f))
157 (record-case x
158 ((<application> proc args)
159 (apply lset-union eq? (step-tail-call proc args)
160 (map step args)))
161
162 ((<conditional> test then else)
163 (lset-union eq? (step test) (step-tail then) (step-tail else)))
164
165 ((<lexical-ref> name gensym)
166 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
167 (if (not (and tail-call-args
168 (memq gensym labels-in-proc)
169 (let ((args (hashq-ref labels gensym)))
170 (and (list? args)
171 (= (length args) (length tail-call-args))))))
172 (hashq-set! labels gensym #f))
173 (list gensym))
174
175 ((<lexical-set> name gensym exp)
176 (hashq-set! assigned gensym #t)
177 (hashq-set! labels gensym #f)
178 (lset-adjoin eq? (step exp) gensym))
179
180 ((<module-set> mod name public? exp)
181 (step exp))
182
183 ((<toplevel-set> name exp)
184 (step exp))
185
186 ((<toplevel-define> name exp)
187 (step exp))
188
189 ((<sequence> exps)
190 (let lp ((exps exps) (ret '()))
191 (cond ((null? exps) '())
192 ((null? (cdr exps))
193 (lset-union eq? ret (step-tail (car exps))))
194 (else
195 (lp (cdr exps) (lset-union eq? ret (step (car exps))))))))
196
197 ((<lambda> vars meta body)
198 (let ((locally-bound (let rev* ((vars vars) (out '()))
199 (cond ((null? vars) out)
200 ((pair? vars) (rev* (cdr vars)
201 (cons (car vars) out)))
202 (else (cons vars out))))))
203 (hashq-set! bound-vars x locally-bound)
204 (let* ((referenced (recur body x))
205 (free (lset-difference eq? referenced locally-bound))
206 (all-bound (reverse! (hashq-ref bound-vars x))))
207 (hashq-set! bound-vars x all-bound)
208 (hashq-set! free-vars x free)
209 free)))
210
211 ((<let> vars vals body)
212 (hashq-set! bound-vars proc
213 (append (reverse vars) (hashq-ref bound-vars proc)))
214 (lset-difference eq?
215 (apply lset-union eq? (step-tail body) (map step vals))
216 vars))
217
218 ((<letrec> vars vals body)
219 (hashq-set! bound-vars proc
220 (append (reverse vars) (hashq-ref bound-vars proc)))
221 (for-each (lambda (sym) (hashq-set! assigned sym #t)) vars)
222 (lset-difference eq?
223 (apply lset-union eq? (step-tail body) (map step vals))
224 vars))
225
226 ((<fix> vars vals body)
227 ;; Try to allocate these procedures as labels.
228 (for-each (lambda (sym val) (hashq-set! labels sym (lambda-vars val)))
229 vars vals)
230 (hashq-set! bound-vars proc
231 (append (reverse vars) (hashq-ref bound-vars proc)))
232 ;; Step into subexpressions.
233 (let* ((var-refs
234 (map
235 ;; Since we're trying to label-allocate the lambda,
236 ;; pretend it's not a closure, and just recurse into its
237 ;; body directly. (Otherwise, recursing on a closure
238 ;; that references one of the fix's bound vars would
239 ;; prevent label allocation.)
240 (lambda (x)
241 (record-case x
242 ((<lambda> (lvars vars) body)
243 (let ((locally-bound
244 (let rev* ((lvars lvars) (out '()))
245 (cond ((null? lvars) out)
246 ((pair? lvars) (rev* (cdr lvars)
247 (cons (car lvars) out)))
248 (else (cons lvars out))))))
249 (hashq-set! bound-vars x locally-bound)
250 ;; recur/labels, the difference from the closure case
251 (let* ((referenced (recur/labels body x vars))
252 (free (lset-difference eq? referenced locally-bound))
253 (all-bound (reverse! (hashq-ref bound-vars x))))
254 (hashq-set! bound-vars x all-bound)
255 (hashq-set! free-vars x free)
256 free)))))
257 vals))
258 (vars-with-refs (map cons vars var-refs))
259 (body-refs (recur/labels body proc vars)))
260 (define (delabel-dependents! sym)
261 (let ((refs (assq-ref vars-with-refs sym)))
262 (if refs
263 (for-each (lambda (sym)
264 (if (hashq-ref labels sym)
265 (begin
266 (hashq-set! labels sym #f)
267 (delabel-dependents! sym))))
268 refs))))
269 ;; Stepping into the lambdas and the body might have made some
270 ;; procedures not label-allocatable -- which might have
271 ;; knock-on effects. For example:
272 ;; (fix ((a (lambda () (b)))
273 ;; (b (lambda () a)))
274 ;; (a))
275 ;; As far as `a' is concerned, both `a' and `b' are
276 ;; label-allocatable. But `b' references `a' not in a proc-tail
277 ;; position, which makes `a' not label-allocatable. The
278 ;; knock-on effect is that, when back-propagating this
279 ;; information to `a', `b' will also become not
280 ;; label-allocatable, as it is referenced within `a', which is
281 ;; allocated as a closure. This is a transitive relationship.
282 (for-each (lambda (sym)
283 (if (not (hashq-ref labels sym))
284 (delabel-dependents! sym)))
285 vars)
286 ;; Now lift bound variables with label-allocated lambdas to the
287 ;; parent procedure.
288 (for-each
289 (lambda (sym val)
290 (if (hashq-ref labels sym)
291 ;; Remove traces of the label-bound lambda. The free
292 ;; vars will propagate up via the return val.
293 (begin
294 (hashq-set! bound-vars proc
295 (append (hashq-ref bound-vars val)
296 (hashq-ref bound-vars proc)))
297 (hashq-remove! bound-vars val)
298 (hashq-remove! free-vars val))))
299 vars vals)
300 (lset-difference eq?
301 (apply lset-union eq? body-refs var-refs)
302 vars)))
303
304 ((<let-values> vars exp body)
305 (let ((bound (let lp ((out (hashq-ref bound-vars proc)) (in vars))
306 (if (pair? in)
307 (lp (cons (car in) out) (cdr in))
308 (if (null? in) out (cons in out))))))
309 (hashq-set! bound-vars proc bound)
310 (lset-difference eq?
311 (lset-union eq? (step exp) (step-tail body))
312 bound)))
313
314 (else '())))
315
316 ;; allocation: sym -> {lambda -> address}
317 ;; lambda -> (nlocs labels . free-locs)
318 (define allocation (make-hash-table))
319
320 (define (allocate! x proc n)
321 (define (recur y) (allocate! y proc n))
322 (record-case x
323 ((<application> proc args)
324 (apply max (recur proc) (map recur args)))
325
326 ((<conditional> test then else)
327 (max (recur test) (recur then) (recur else)))
328
329 ((<lexical-set> name gensym exp)
330 (recur exp))
331
332 ((<module-set> mod name public? exp)
333 (recur exp))
334
335 ((<toplevel-set> name exp)
336 (recur exp))
337
338 ((<toplevel-define> name exp)
339 (recur exp))
340
341 ((<sequence> exps)
342 (apply max (map recur exps)))
343
344 ((<lambda> vars meta body)
345 ;; allocate closure vars in order
346 (let lp ((c (hashq-ref free-vars x)) (n 0))
347 (if (pair? c)
348 (begin
349 (hashq-set! (hashq-ref allocation (car c))
350 x
351 `(#f ,(hashq-ref assigned (car c)) . ,n))
352 (lp (cdr c) (1+ n)))))
353
354 (let ((nlocs
355 (let lp ((vars vars) (n 0))
356 (if (not (null? vars))
357 ;; allocate args
358 (let ((v (if (pair? vars) (car vars) vars)))
359 (hashq-set! allocation v
360 (make-hashq
361 x `(#t ,(hashq-ref assigned v) . ,n)))
362 (lp (if (pair? vars) (cdr vars) '()) (1+ n)))
363 ;; allocate body, return number of additional locals
364 (- (allocate! body x n) n))))
365 (free-addresses
366 (map (lambda (v)
367 (hashq-ref (hashq-ref allocation v) proc))
368 (hashq-ref free-vars x)))
369 (labels (filter cdr
370 (map (lambda (sym)
371 (cons sym (hashq-ref labels sym)))
372 (hashq-ref bound-vars x)))))
373 ;; set procedure allocations
374 (hashq-set! allocation x (cons* nlocs labels free-addresses)))
375 n)
376
377 ((<let> vars vals body)
378 (let ((nmax (apply max (map recur vals))))
379 (cond
380 ;; the `or' hack
381 ((and (conditional? body)
382 (= (length vars) 1)
383 (let ((v (car vars)))
384 (and (not (hashq-ref assigned v))
385 (= (hashq-ref refcounts v 0) 2)
386 (lexical-ref? (conditional-test body))
387 (eq? (lexical-ref-gensym (conditional-test body)) v)
388 (lexical-ref? (conditional-then body))
389 (eq? (lexical-ref-gensym (conditional-then body)) v))))
390 (hashq-set! allocation (car vars)
391 (make-hashq proc `(#t #f . ,n)))
392 ;; the 1+ for this var
393 (max nmax (1+ n) (allocate! (conditional-else body) proc n)))
394 (else
395 (let lp ((vars vars) (n n))
396 (if (null? vars)
397 (max nmax (allocate! body proc n))
398 (let ((v (car vars)))
399 (hashq-set!
400 allocation v
401 (make-hashq proc
402 `(#t ,(hashq-ref assigned v) . ,n)))
403 (lp (cdr vars) (1+ n)))))))))
404
405 ((<letrec> vars vals body)
406 (let lp ((vars vars) (n n))
407 (if (null? vars)
408 (let ((nmax (apply max
409 (map (lambda (x)
410 (allocate! x proc n))
411 vals))))
412 (max nmax (allocate! body proc n)))
413 (let ((v (car vars)))
414 (hashq-set!
415 allocation v
416 (make-hashq proc
417 `(#t ,(hashq-ref assigned v) . ,n)))
418 (lp (cdr vars) (1+ n))))))
419
420 ((<fix> vars vals body)
421 (let lp ((in vars) (n n))
422 (if (null? in)
423 (let lp ((vars vars) (vals vals) (nmax n))
424 (cond
425 ((null? vars)
426 (max nmax (allocate! body proc n)))
427 ((hashq-ref labels (car vars))
428 ;; allocate label bindings & body inline to proc
429 (lp (cdr vars)
430 (cdr vals)
431 (record-case (car vals)
432 ((<lambda> vars body)
433 (let lp ((vars vars) (n n))
434 (if (not (null? vars))
435 ;; allocate bindings
436 (let ((v (if (pair? vars) (car vars) vars)))
437 (hashq-set!
438 allocation v
439 (make-hashq
440 proc `(#t ,(hashq-ref assigned v) . ,n)))
441 (lp (if (pair? vars) (cdr vars) '()) (1+ n)))
442 ;; allocate body
443 (max nmax (allocate! body proc n))))))))
444 (else
445 ;; allocate closure
446 (lp (cdr vars)
447 (cdr vals)
448 (max nmax (allocate! (car vals) proc n))))))
449
450 (let ((v (car in)))
451 (cond
452 ((hashq-ref assigned v)
453 (error "fixpoint procedures may not be assigned" x))
454 ((hashq-ref labels v)
455 ;; no binding, it's a label
456 (lp (cdr in) n))
457 (else
458 ;; allocate closure binding
459 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
460 (lp (cdr in) (1+ n))))))))
461
462 ((<let-values> vars exp body)
463 (let ((nmax (recur exp)))
464 (let lp ((vars vars) (n n))
465 (cond
466 ((null? vars)
467 (max nmax (allocate! body proc n)))
468 ((not (pair? vars))
469 (hashq-set! allocation vars
470 (make-hashq proc
471 `(#t ,(hashq-ref assigned vars) . ,n)))
472 ;; the 1+ for this var
473 (max nmax (allocate! body proc (1+ n))))
474 (else
475 (let ((v (car vars)))
476 (hashq-set!
477 allocation v
478 (make-hashq proc
479 `(#t ,(hashq-ref assigned v) . ,n)))
480 (lp (cdr vars) (1+ n))))))))
481
482 (else n)))
483
484 (analyze! x #f '() #t #f)
485 (allocate! x #f 0)
486
487 allocation)
488
489 \f
490 ;;;
491 ;;; Unused variable analysis.
492 ;;;
493
494 ;; <binding-info> records are used during tree traversals in
495 ;; `report-unused-variables'. They contain a list of the local vars
496 ;; currently in scope, a list of locals vars that have been referenced, and a
497 ;; "location stack" (the stack of `tree-il-src' values for each parent tree).
498 (define-record-type <binding-info>
499 (make-binding-info vars refs locs)
500 binding-info?
501 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
502 (refs binding-info-refs) ;; (GENSYM ...)
503 (locs binding-info-locs)) ;; (LOCATION ...)
504
505 (define (report-unused-variables tree)
506 "Report about unused variables in TREE. Return TREE."
507
508 (define (dotless-list lst)
509 ;; If LST is a dotted list, return a proper list equal to LST except that
510 ;; the very last element is a pair; otherwise return LST.
511 (let loop ((lst lst)
512 (result '()))
513 (cond ((null? lst)
514 (reverse result))
515 ((pair? lst)
516 (loop (cdr lst) (cons (car lst) result)))
517 (else
518 (loop '() (cons lst result))))))
519
520 (tree-il-fold (lambda (x info)
521 ;; X is a leaf: extend INFO's refs accordingly.
522 (let ((refs (binding-info-refs info))
523 (vars (binding-info-vars info))
524 (locs (binding-info-locs info)))
525 (record-case x
526 ((<lexical-ref> gensym)
527 (make-binding-info vars (cons gensym refs) locs))
528 (else info))))
529
530 (lambda (x info)
531 ;; Going down into X: extend INFO's variable list
532 ;; accordingly.
533 (let ((refs (binding-info-refs info))
534 (vars (binding-info-vars info))
535 (locs (binding-info-locs info))
536 (src (tree-il-src x)))
537 (define (extend inner-vars inner-names)
538 (append (map (lambda (var name)
539 (list var name src))
540 inner-vars
541 inner-names)
542 vars))
543 (record-case x
544 ((<lexical-set> gensym)
545 (make-binding-info vars (cons gensym refs)
546 (cons src locs)))
547 ((<lambda> vars names)
548 (let ((vars (dotless-list vars))
549 (names (dotless-list names)))
550 (make-binding-info (extend vars names) refs
551 (cons src locs))))
552 ((<let> vars names)
553 (make-binding-info (extend vars names) refs
554 (cons src locs)))
555 ((<letrec> vars names)
556 (make-binding-info (extend vars names) refs
557 (cons src locs)))
558 ((<fix> vars names)
559 (make-binding-info (extend vars names) refs
560 (cons src locs)))
561 ((<let-values> vars names)
562 (make-binding-info (extend vars names) refs
563 (cons src locs)))
564 (else info))))
565
566 (lambda (x info)
567 ;; Leaving X's scope: shrink INFO's variable list
568 ;; accordingly and reported unused nested variables.
569 (let ((refs (binding-info-refs info))
570 (vars (binding-info-vars info))
571 (locs (binding-info-locs info)))
572 (define (shrink inner-vars refs)
573 (for-each (lambda (var)
574 (let ((gensym (car var)))
575 ;; Don't report lambda parameters as
576 ;; unused.
577 (if (and (not (memq gensym refs))
578 (not (and (lambda? x)
579 (memq gensym
580 inner-vars))))
581 (let ((name (cadr var))
582 ;; We can get approximate
583 ;; source location by going up
584 ;; the LOCS location stack.
585 (loc (or (caddr var)
586 (find pair? locs))))
587 (warning 'unused-variable loc name)))))
588 (filter (lambda (var)
589 (memq (car var) inner-vars))
590 vars))
591 (fold alist-delete vars inner-vars))
592
593 ;; For simplicity, we leave REFS untouched, i.e., with
594 ;; names of variables that are now going out of scope.
595 ;; It doesn't hurt as these are unique names, it just
596 ;; makes REFS unnecessarily fat.
597 (record-case x
598 ((<lambda> vars)
599 (let ((vars (dotless-list vars)))
600 (make-binding-info (shrink vars refs) refs
601 (cdr locs))))
602 ((<let> vars)
603 (make-binding-info (shrink vars refs) refs
604 (cdr locs)))
605 ((<letrec> vars)
606 (make-binding-info (shrink vars refs) refs
607 (cdr locs)))
608 ((<fix> vars)
609 (make-binding-info (shrink vars refs) refs
610 (cdr locs)))
611 ((<let-values> vars)
612 (make-binding-info (shrink vars refs) refs
613 (cdr locs)))
614 (else info))))
615 (make-binding-info '() '() '())
616 tree)
617 tree)