add label alist to lambda allocations in tree-il->glil compiler
[bpt/guile.git] / module / language / tree-il / analyze.scm
CommitLineData
cf10678f
AW
1;;; TREE-IL -> GLIL compiler
2
3;; Copyright (C) 2001,2008,2009 Free Software Foundation, Inc.
4
53befeb7
NJ
5;;;; This library is free software; you can redistribute it and/or
6;;;; modify it under the terms of the GNU Lesser General Public
7;;;; License as published by the Free Software Foundation; either
8;;;; version 3 of the License, or (at your option) any later version.
9;;;;
10;;;; This library is distributed in the hope that it will be useful,
11;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;;; Lesser General Public License for more details.
14;;;;
15;;;; You should have received a copy of the GNU Lesser General Public
16;;;; License along with this library; if not, write to the Free Software
17;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
cf10678f
AW
18
19;;; Code:
20
21(define-module (language tree-il analyze)
66d3e9a3 22 #:use-module (srfi srfi-1)
4b856371 23 #:use-module (srfi srfi-9)
cf10678f 24 #:use-module (system base syntax)
4b856371 25 #:use-module (system base message)
cf10678f 26 #:use-module (language tree-il)
4b856371
LC
27 #:export (analyze-lexicals
28 report-unused-variables))
cf10678f 29
66d3e9a3
AW
30;; Allocation is the process of assigning storage locations for lexical
31;; variables. A lexical variable has a distinct "address", or storage
32;; location, for each procedure in which it is referenced.
33;;
34;; A variable is "local", i.e., allocated on the stack, if it is
35;; referenced from within the procedure that defined it. Otherwise it is
36;; a "closure" variable. For example:
37;;
38;; (lambda (a) a) ; a will be local
39;; `a' is local to the procedure.
40;;
41;; (lambda (a) (lambda () a))
42;; `a' is local to the outer procedure, but a closure variable with
43;; respect to the inner procedure.
44;;
45;; If a variable is ever assigned, it needs to be heap-allocated
46;; ("boxed"). This is so that closures and continuations capture the
47;; variable's identity, not just one of the values it may have over the
48;; course of program execution. If the variable is never assigned, there
49;; is no distinction between value and identity, so closing over its
50;; identity (whether through closures or continuations) can make a copy
51;; of its value instead.
52;;
53;; Local variables are stored on the stack within a procedure's call
54;; frame. Their index into the stack is determined from their linear
55;; postion within a procedure's binding path:
cf10678f
AW
56;; (let (0 1)
57;; (let (2 3) ...)
58;; (let (2) ...))
59;; (let (2 3 4) ...))
60;; etc.
61;;
5af166bd
AW
62;; This algorithm has the problem that variables are only allocated
63;; indices at the end of the binding path. If variables bound early in
64;; the path are not used in later portions of the path, their indices
65;; will not be recycled. This problem is particularly egregious in the
66;; expansion of `or':
67;;
68;; (or x y z)
69;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
70;;
71;; As you can see, the `a' binding is only used in the ephemeral `then'
72;; clause of the first `if', but its index would be reserved for the
73;; whole of the `or' expansion. So we have a hack for this specific
74;; case. A proper solution would be some sort of liveness analysis, and
75;; not our linear allocation algorithm.
76;;
66d3e9a3
AW
77;; Closure variables are captured when a closure is created, and stored
78;; in a vector. Each closure variable has a unique index into that
79;; vector.
80;;
9059993f
AW
81;; There is one more complication. Procedures bound by <fix> may, in
82;; some cases, be rendered inline to their parent procedure. That is to
83;; say,
84;;
85;; (letrec ((lp (lambda () (lp)))) (lp))
86;; => (fix ((lp (lambda () (lp)))) (lp))
87;; => goto FIX-BODY; LP: goto LP; FIX-BODY: goto LP;
88;; ^ jump over the loop ^ the fixpoint lp ^ starting off the loop
89;;
90;; The upshot is that we don't have to allocate any space for the `lp'
91;; closure at all, as it can be rendered inline as a loop. So there is
92;; another kind of allocation, "label allocation", in which the
93;; procedure is simply a label, placed at the start of the lambda body.
94;; The label is the gensym under which the lambda expression is bound.
95;;
96;; The analyzer checks to see that the label is called with the correct
97;; number of arguments. Calls to labels compile to rename + goto.
98;; Lambda, the ultimate goto!
99;;
66d3e9a3
AW
100;;
101;; The return value of `analyze-lexicals' is a hash table, the
102;; "allocation".
103;;
104;; The allocation maps gensyms -- recall that each lexically bound
105;; variable has a unique gensym -- to storage locations ("addresses").
106;; Since one gensym may have many storage locations, if it is referenced
107;; in many procedures, it is a two-level map.
108;;
109;; The allocation also stored information on how many local variables
9059993f
AW
110;; need to be allocated for each procedure, lexicals that have been
111;; translated into labels, and information on what free variables to
112;; capture from its lexical parent procedure.
66d3e9a3
AW
113;;
114;; That is:
115;;
116;; sym -> {lambda -> address}
9059993f 117;; lambda -> (nlocs labels . free-locs)
66d3e9a3 118;;
9059993f
AW
119;; address ::= (local? boxed? . index)
120;; labels ::= ((sym . lambda-vars) ...)
66d3e9a3
AW
121;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
122;; free variable addresses are relative to parent proc.
123
124(define (make-hashq k v)
125 (let ((res (make-hash-table)))
126 (hashq-set! res k v)
127 res))
cf10678f
AW
128
129(define (analyze-lexicals x)
66d3e9a3
AW
130 ;; bound-vars: lambda -> (sym ...)
131 ;; all identifiers bound within a lambda
9059993f 132 (define bound-vars (make-hash-table))
66d3e9a3
AW
133 ;; free-vars: lambda -> (sym ...)
134 ;; all identifiers referenced in a lambda, but not bound
135 ;; NB, this includes identifiers referenced by contained lambdas
9059993f 136 (define free-vars (make-hash-table))
66d3e9a3 137 ;; assigned: sym -> #t
9059993f 138 (define assigned (make-hash-table))
66d3e9a3 139 ;; variables that are assigned
5af166bd 140 ;; refcounts: sym -> count
66d3e9a3 141 ;; allows us to detect the or-expansion in O(1) time
9059993f
AW
142 (define refcounts (make-hash-table))
143 ;; labels: sym -> lambda-vars
144 ;; for determining if fixed-point procedures can be rendered as
145 ;; labels. lambda-vars may be an improper list.
146 (define labels (make-hash-table))
147
66d3e9a3
AW
148 ;; returns variables referenced in expr
149 (define (analyze! x proc)
150 (define (step y) (analyze! y proc))
151 (define (recur x new-proc) (analyze! x new-proc))
cf10678f
AW
152 (record-case x
153 ((<application> proc args)
66d3e9a3 154 (apply lset-union eq? (step proc) (map step args)))
cf10678f
AW
155
156 ((<conditional> test then else)
66d3e9a3 157 (lset-union eq? (step test) (step then) (step else)))
cf10678f
AW
158
159 ((<lexical-ref> name gensym)
5af166bd 160 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
66d3e9a3 161 (list gensym))
cf10678f
AW
162
163 ((<lexical-set> name gensym exp)
66d3e9a3
AW
164 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
165 (hashq-set! assigned gensym #t)
166 (lset-adjoin eq? (step exp) gensym))
cf10678f
AW
167
168 ((<module-set> mod name public? exp)
169 (step exp))
170
171 ((<toplevel-set> name exp)
172 (step exp))
173
174 ((<toplevel-define> name exp)
175 (step exp))
176
177 ((<sequence> exps)
66d3e9a3 178 (apply lset-union eq? (map step exps)))
cf10678f
AW
179
180 ((<lambda> vars meta body)
66d3e9a3
AW
181 (let ((locally-bound (let rev* ((vars vars) (out '()))
182 (cond ((null? vars) out)
183 ((pair? vars) (rev* (cdr vars)
184 (cons (car vars) out)))
185 (else (cons vars out))))))
186 (hashq-set! bound-vars x locally-bound)
187 (let* ((referenced (recur body x))
188 (free (lset-difference eq? referenced locally-bound))
189 (all-bound (reverse! (hashq-ref bound-vars x))))
190 (hashq-set! bound-vars x all-bound)
191 (hashq-set! free-vars x free)
192 free)))
193
f4aa8d53 194 ((<let> vars vals body)
66d3e9a3
AW
195 (hashq-set! bound-vars proc
196 (append (reverse vars) (hashq-ref bound-vars proc)))
197 (lset-difference eq?
198 (apply lset-union eq? (step body) (map step vals))
199 vars))
cf10678f 200
f4aa8d53 201 ((<letrec> vars vals body)
66d3e9a3
AW
202 (hashq-set! bound-vars proc
203 (append (reverse vars) (hashq-ref bound-vars proc)))
204 (for-each (lambda (sym) (hashq-set! assigned sym #t)) vars)
205 (lset-difference eq?
206 (apply lset-union eq? (step body) (map step vals))
207 vars))
208
c21c89b1
AW
209 ((<fix> vars vals body)
210 (hashq-set! bound-vars proc
211 (append (reverse vars) (hashq-ref bound-vars proc)))
212 (lset-difference eq?
213 (apply lset-union eq? (step body) (map step vals))
214 vars))
215
f4aa8d53 216 ((<let-values> vars exp body)
bca488f1
AW
217 (let ((bound (let lp ((out (hashq-ref bound-vars proc)) (in vars))
218 (if (pair? in)
219 (lp (cons (car in) out) (cdr in))
220 (if (null? in) out (cons in out))))))
221 (hashq-set! bound-vars proc bound)
222 (lset-difference eq?
223 (lset-union eq? (step exp) (step body))
224 bound)))
66d3e9a3
AW
225
226 (else '())))
227
9059993f
AW
228 ;; allocation: sym -> {lambda -> address}
229 ;; lambda -> (nlocs labels . free-locs)
230 (define allocation (make-hash-table))
231
66d3e9a3
AW
232 (define (allocate! x proc n)
233 (define (recur y) (allocate! y proc n))
234 (record-case x
235 ((<application> proc args)
236 (apply max (recur proc) (map recur args)))
cf10678f 237
66d3e9a3
AW
238 ((<conditional> test then else)
239 (max (recur test) (recur then) (recur else)))
cf10678f 240
66d3e9a3
AW
241 ((<lexical-set> name gensym exp)
242 (recur exp))
243
244 ((<module-set> mod name public? exp)
245 (recur exp))
246
247 ((<toplevel-set> name exp)
248 (recur exp))
249
250 ((<toplevel-define> name exp)
251 (recur exp))
252
253 ((<sequence> exps)
254 (apply max (map recur exps)))
255
256 ((<lambda> vars meta body)
257 ;; allocate closure vars in order
258 (let lp ((c (hashq-ref free-vars x)) (n 0))
259 (if (pair? c)
260 (begin
261 (hashq-set! (hashq-ref allocation (car c))
262 x
263 `(#f ,(hashq-ref assigned (car c)) . ,n))
264 (lp (cdr c) (1+ n)))))
265
266 (let ((nlocs
267 (let lp ((vars vars) (n 0))
268 (if (not (null? vars))
269 ;; allocate args
270 (let ((v (if (pair? vars) (car vars) vars)))
271 (hashq-set! allocation v
272 (make-hashq
273 x `(#t ,(hashq-ref assigned v) . ,n)))
274 (lp (if (pair? vars) (cdr vars) '()) (1+ n)))
275 ;; allocate body, return number of additional locals
276 (- (allocate! body x n) n))))
277 (free-addresses
278 (map (lambda (v)
279 (hashq-ref (hashq-ref allocation v) proc))
9059993f
AW
280 (hashq-ref free-vars x)))
281 (labels (filter cdr
282 (map (lambda (sym)
283 (cons sym (hashq-ref labels sym)))
284 (hashq-ref bound-vars x)))))
66d3e9a3 285 ;; set procedure allocations
9059993f 286 (hashq-set! allocation x (cons* nlocs labels free-addresses)))
66d3e9a3 287 n)
cf10678f 288
66d3e9a3
AW
289 ((<let> vars vals body)
290 (let ((nmax (apply max (map recur vals))))
291 (cond
292 ;; the `or' hack
293 ((and (conditional? body)
294 (= (length vars) 1)
295 (let ((v (car vars)))
296 (and (not (hashq-ref assigned v))
297 (= (hashq-ref refcounts v 0) 2)
298 (lexical-ref? (conditional-test body))
299 (eq? (lexical-ref-gensym (conditional-test body)) v)
300 (lexical-ref? (conditional-then body))
301 (eq? (lexical-ref-gensym (conditional-then body)) v))))
302 (hashq-set! allocation (car vars)
303 (make-hashq proc `(#t #f . ,n)))
304 ;; the 1+ for this var
305 (max nmax (1+ n) (allocate! (conditional-else body) proc n)))
306 (else
307 (let lp ((vars vars) (n n))
308 (if (null? vars)
309 (max nmax (allocate! body proc n))
310 (let ((v (car vars)))
cf10678f
AW
311 (hashq-set!
312 allocation v
66d3e9a3
AW
313 (make-hashq proc
314 `(#t ,(hashq-ref assigned v) . ,n)))
315 (lp (cdr vars) (1+ n)))))))))
316
317 ((<letrec> vars vals body)
318 (let lp ((vars vars) (n n))
319 (if (null? vars)
320 (let ((nmax (apply max
321 (map (lambda (x)
322 (allocate! x proc n))
323 vals))))
324 (max nmax (allocate! body proc n)))
325 (let ((v (car vars)))
326 (hashq-set!
327 allocation v
328 (make-hashq proc
329 `(#t ,(hashq-ref assigned v) . ,n)))
330 (lp (cdr vars) (1+ n))))))
cf10678f 331
c21c89b1
AW
332 ((<fix> vars vals body)
333 (let lp ((vars vars) (n n))
334 (if (null? vars)
335 (let ((nmax (apply max
336 (map (lambda (x)
337 (allocate! x proc n))
338 vals))))
339 (max nmax (allocate! body proc n)))
340 (let ((v (car vars)))
341 (if (hashq-ref assigned v)
342 (error "fixpoint procedures may not be assigned" x))
343 (hashq-set! allocation v (make-hashq proc `(#t #f . ,n)))
344 (lp (cdr vars) (1+ n))))))
345
66d3e9a3
AW
346 ((<let-values> vars exp body)
347 (let ((nmax (recur exp)))
cf10678f 348 (let lp ((vars vars) (n n))
bca488f1
AW
349 (cond
350 ((null? vars)
351 (max nmax (allocate! body proc n)))
352 ((not (pair? vars))
353 (hashq-set! allocation vars
354 (make-hashq proc
355 `(#t ,(hashq-ref assigned vars) . ,n)))
356 ;; the 1+ for this var
357 (max nmax (allocate! body proc (1+ n))))
358 (else
80af1168
AW
359 (let ((v (car vars)))
360 (hashq-set!
361 allocation v
362 (make-hashq proc
363 `(#t ,(hashq-ref assigned v) . ,n)))
364 (lp (cdr vars) (1+ n))))))))
66d3e9a3
AW
365
366 (else n)))
cf10678f 367
66d3e9a3
AW
368 (analyze! x #f)
369 (allocate! x #f 0)
cf10678f
AW
370
371 allocation)
4b856371
LC
372
373\f
374;;;
375;;; Unused variable analysis.
376;;;
377
378;; <binding-info> records are used during tree traversals in
379;; `report-unused-variables'. They contain a list of the local vars
380;; currently in scope, a list of locals vars that have been referenced, and a
381;; "location stack" (the stack of `tree-il-src' values for each parent tree).
382(define-record-type <binding-info>
383 (make-binding-info vars refs locs)
384 binding-info?
385 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
386 (refs binding-info-refs) ;; (GENSYM ...)
387 (locs binding-info-locs)) ;; (LOCATION ...)
388
389(define (report-unused-variables tree)
390 "Report about unused variables in TREE. Return TREE."
391
392 (define (dotless-list lst)
393 ;; If LST is a dotted list, return a proper list equal to LST except that
394 ;; the very last element is a pair; otherwise return LST.
395 (let loop ((lst lst)
396 (result '()))
397 (cond ((null? lst)
398 (reverse result))
399 ((pair? lst)
400 (loop (cdr lst) (cons (car lst) result)))
401 (else
402 (loop '() (cons lst result))))))
403
404 (tree-il-fold (lambda (x info)
405 ;; X is a leaf: extend INFO's refs accordingly.
406 (let ((refs (binding-info-refs info))
407 (vars (binding-info-vars info))
408 (locs (binding-info-locs info)))
409 (record-case x
410 ((<lexical-ref> gensym)
411 (make-binding-info vars (cons gensym refs) locs))
412 (else info))))
413
414 (lambda (x info)
415 ;; Going down into X: extend INFO's variable list
416 ;; accordingly.
417 (let ((refs (binding-info-refs info))
418 (vars (binding-info-vars info))
419 (locs (binding-info-locs info))
420 (src (tree-il-src x)))
421 (define (extend inner-vars inner-names)
422 (append (map (lambda (var name)
423 (list var name src))
424 inner-vars
425 inner-names)
426 vars))
427 (record-case x
428 ((<lexical-set> gensym)
429 (make-binding-info vars (cons gensym refs)
430 (cons src locs)))
431 ((<lambda> vars names)
432 (let ((vars (dotless-list vars))
433 (names (dotless-list names)))
434 (make-binding-info (extend vars names) refs
435 (cons src locs))))
436 ((<let> vars names)
437 (make-binding-info (extend vars names) refs
438 (cons src locs)))
439 ((<letrec> vars names)
440 (make-binding-info (extend vars names) refs
441 (cons src locs)))
c21c89b1
AW
442 ((<fix> vars names)
443 (make-binding-info (extend vars names) refs
444 (cons src locs)))
4b856371
LC
445 ((<let-values> vars names)
446 (make-binding-info (extend vars names) refs
447 (cons src locs)))
448 (else info))))
449
450 (lambda (x info)
451 ;; Leaving X's scope: shrink INFO's variable list
452 ;; accordingly and reported unused nested variables.
453 (let ((refs (binding-info-refs info))
454 (vars (binding-info-vars info))
455 (locs (binding-info-locs info)))
456 (define (shrink inner-vars refs)
457 (for-each (lambda (var)
458 (let ((gensym (car var)))
459 ;; Don't report lambda parameters as
460 ;; unused.
461 (if (and (not (memq gensym refs))
462 (not (and (lambda? x)
463 (memq gensym
464 inner-vars))))
465 (let ((name (cadr var))
466 ;; We can get approximate
467 ;; source location by going up
468 ;; the LOCS location stack.
469 (loc (or (caddr var)
470 (find pair? locs))))
471 (warning 'unused-variable loc name)))))
472 (filter (lambda (var)
473 (memq (car var) inner-vars))
474 vars))
475 (fold alist-delete vars inner-vars))
476
477 ;; For simplicity, we leave REFS untouched, i.e., with
478 ;; names of variables that are now going out of scope.
479 ;; It doesn't hurt as these are unique names, it just
480 ;; makes REFS unnecessarily fat.
481 (record-case x
482 ((<lambda> vars)
483 (let ((vars (dotless-list vars)))
484 (make-binding-info (shrink vars refs) refs
485 (cdr locs))))
486 ((<let> vars)
487 (make-binding-info (shrink vars refs) refs
488 (cdr locs)))
489 ((<letrec> vars)
490 (make-binding-info (shrink vars refs) refs
491 (cdr locs)))
c21c89b1
AW
492 ((<fix> vars)
493 (make-binding-info (shrink vars refs) refs
494 (cdr locs)))
4b856371
LC
495 ((<let-values> vars)
496 (make-binding-info (shrink vars refs) refs
497 (cdr locs)))
498 (else info))))
499 (make-binding-info '() '() '())
500 tree)
501 tree)