Merge branch 'master' of git://git.savannah.gnu.org/guile into elisp
[bpt/guile.git] / module / language / tree-il / analyze.scm
1 ;;; TREE-IL -> GLIL compiler
2
3 ;; Copyright (C) 2001,2008,2009 Free Software Foundation, Inc.
4
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
9 ;;;;
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Code:
20
21 (define-module (language tree-il analyze)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-9)
24 #:use-module (system base syntax)
25 #:use-module (system base message)
26 #:use-module (language tree-il)
27 #:export (analyze-lexicals
28 report-unused-variables))
29
30 ;; Allocation is the process of assigning storage locations for lexical
31 ;; variables. A lexical variable has a distinct "address", or storage
32 ;; location, for each procedure in which it is referenced.
33 ;;
34 ;; A variable is "local", i.e., allocated on the stack, if it is
35 ;; referenced from within the procedure that defined it. Otherwise it is
36 ;; a "closure" variable. For example:
37 ;;
38 ;; (lambda (a) a) ; a will be local
39 ;; `a' is local to the procedure.
40 ;;
41 ;; (lambda (a) (lambda () a))
42 ;; `a' is local to the outer procedure, but a closure variable with
43 ;; respect to the inner procedure.
44 ;;
45 ;; If a variable is ever assigned, it needs to be heap-allocated
46 ;; ("boxed"). This is so that closures and continuations capture the
47 ;; variable's identity, not just one of the values it may have over the
48 ;; course of program execution. If the variable is never assigned, there
49 ;; is no distinction between value and identity, so closing over its
50 ;; identity (whether through closures or continuations) can make a copy
51 ;; of its value instead.
52 ;;
53 ;; Local variables are stored on the stack within a procedure's call
54 ;; frame. Their index into the stack is determined from their linear
55 ;; postion within a procedure's binding path:
56 ;; (let (0 1)
57 ;; (let (2 3) ...)
58 ;; (let (2) ...))
59 ;; (let (2 3 4) ...))
60 ;; etc.
61 ;;
62 ;; This algorithm has the problem that variables are only allocated
63 ;; indices at the end of the binding path. If variables bound early in
64 ;; the path are not used in later portions of the path, their indices
65 ;; will not be recycled. This problem is particularly egregious in the
66 ;; expansion of `or':
67 ;;
68 ;; (or x y z)
69 ;; -> (let ((a x)) (if a a (let ((b y)) (if b b z))))
70 ;;
71 ;; As you can see, the `a' binding is only used in the ephemeral `then'
72 ;; clause of the first `if', but its index would be reserved for the
73 ;; whole of the `or' expansion. So we have a hack for this specific
74 ;; case. A proper solution would be some sort of liveness analysis, and
75 ;; not our linear allocation algorithm.
76 ;;
77 ;; Closure variables are captured when a closure is created, and stored
78 ;; in a vector. Each closure variable has a unique index into that
79 ;; vector.
80 ;;
81 ;;
82 ;; The return value of `analyze-lexicals' is a hash table, the
83 ;; "allocation".
84 ;;
85 ;; The allocation maps gensyms -- recall that each lexically bound
86 ;; variable has a unique gensym -- to storage locations ("addresses").
87 ;; Since one gensym may have many storage locations, if it is referenced
88 ;; in many procedures, it is a two-level map.
89 ;;
90 ;; The allocation also stored information on how many local variables
91 ;; need to be allocated for each procedure, and information on what free
92 ;; variables to capture from its lexical parent procedure.
93 ;;
94 ;; That is:
95 ;;
96 ;; sym -> {lambda -> address}
97 ;; lambda -> (nlocs . free-locs)
98 ;;
99 ;; address := (local? boxed? . index)
100 ;; free-locs ::= ((sym0 . address0) (sym1 . address1) ...)
101 ;; free variable addresses are relative to parent proc.
102
103 (define (make-hashq k v)
104 (let ((res (make-hash-table)))
105 (hashq-set! res k v)
106 res))
107
108 (define (analyze-lexicals x)
109 ;; bound-vars: lambda -> (sym ...)
110 ;; all identifiers bound within a lambda
111 ;; free-vars: lambda -> (sym ...)
112 ;; all identifiers referenced in a lambda, but not bound
113 ;; NB, this includes identifiers referenced by contained lambdas
114 ;; assigned: sym -> #t
115 ;; variables that are assigned
116 ;; refcounts: sym -> count
117 ;; allows us to detect the or-expansion in O(1) time
118
119 ;; returns variables referenced in expr
120 (define (analyze! x proc)
121 (define (step y) (analyze! y proc))
122 (define (recur x new-proc) (analyze! x new-proc))
123 (record-case x
124 ((<application> proc args)
125 (apply lset-union eq? (step proc) (map step args)))
126
127 ((<conditional> test then else)
128 (lset-union eq? (step test) (step then) (step else)))
129
130 ((<lexical-ref> name gensym)
131 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
132 (list gensym))
133
134 ((<lexical-set> name gensym exp)
135 (hashq-set! refcounts gensym (1+ (hashq-ref refcounts gensym 0)))
136 (hashq-set! assigned gensym #t)
137 (lset-adjoin eq? (step exp) gensym))
138
139 ((<module-set> mod name public? exp)
140 (step exp))
141
142 ((<toplevel-set> name exp)
143 (step exp))
144
145 ((<toplevel-define> name exp)
146 (step exp))
147
148 ((<sequence> exps)
149 (apply lset-union eq? (map step exps)))
150
151 ((<lambda> vars meta body)
152 (let ((locally-bound (let rev* ((vars vars) (out '()))
153 (cond ((null? vars) out)
154 ((pair? vars) (rev* (cdr vars)
155 (cons (car vars) out)))
156 (else (cons vars out))))))
157 (hashq-set! bound-vars x locally-bound)
158 (let* ((referenced (recur body x))
159 (free (lset-difference eq? referenced locally-bound))
160 (all-bound (reverse! (hashq-ref bound-vars x))))
161 (hashq-set! bound-vars x all-bound)
162 (hashq-set! free-vars x free)
163 free)))
164
165 ((<let> vars vals body)
166 (hashq-set! bound-vars proc
167 (append (reverse vars) (hashq-ref bound-vars proc)))
168 (lset-difference eq?
169 (apply lset-union eq? (step body) (map step vals))
170 vars))
171
172 ((<letrec> vars vals body)
173 (hashq-set! bound-vars proc
174 (append (reverse vars) (hashq-ref bound-vars proc)))
175 (for-each (lambda (sym) (hashq-set! assigned sym #t)) vars)
176 (lset-difference eq?
177 (apply lset-union eq? (step body) (map step vals))
178 vars))
179
180 ((<let-values> vars exp body)
181 (hashq-set! bound-vars proc
182 (let lp ((out (hashq-ref bound-vars proc)) (in vars))
183 (if (pair? in)
184 (lp (cons (car in) out) (cdr in))
185 (if (null? in) out (cons in out)))))
186 (lset-difference eq?
187 (lset-union eq? (step exp) (step body))
188 vars))
189
190 (else '())))
191
192 (define (allocate! x proc n)
193 (define (recur y) (allocate! y proc n))
194 (record-case x
195 ((<application> proc args)
196 (apply max (recur proc) (map recur args)))
197
198 ((<conditional> test then else)
199 (max (recur test) (recur then) (recur else)))
200
201 ((<lexical-set> name gensym exp)
202 (recur exp))
203
204 ((<module-set> mod name public? exp)
205 (recur exp))
206
207 ((<toplevel-set> name exp)
208 (recur exp))
209
210 ((<toplevel-define> name exp)
211 (recur exp))
212
213 ((<sequence> exps)
214 (apply max (map recur exps)))
215
216 ((<lambda> vars meta body)
217 ;; allocate closure vars in order
218 (let lp ((c (hashq-ref free-vars x)) (n 0))
219 (if (pair? c)
220 (begin
221 (hashq-set! (hashq-ref allocation (car c))
222 x
223 `(#f ,(hashq-ref assigned (car c)) . ,n))
224 (lp (cdr c) (1+ n)))))
225
226 (let ((nlocs
227 (let lp ((vars vars) (n 0))
228 (if (not (null? vars))
229 ;; allocate args
230 (let ((v (if (pair? vars) (car vars) vars)))
231 (hashq-set! allocation v
232 (make-hashq
233 x `(#t ,(hashq-ref assigned v) . ,n)))
234 (lp (if (pair? vars) (cdr vars) '()) (1+ n)))
235 ;; allocate body, return number of additional locals
236 (- (allocate! body x n) n))))
237 (free-addresses
238 (map (lambda (v)
239 (hashq-ref (hashq-ref allocation v) proc))
240 (hashq-ref free-vars x))))
241 ;; set procedure allocations
242 (hashq-set! allocation x (cons nlocs free-addresses)))
243 n)
244
245 ((<let> vars vals body)
246 (let ((nmax (apply max (map recur vals))))
247 (cond
248 ;; the `or' hack
249 ((and (conditional? body)
250 (= (length vars) 1)
251 (let ((v (car vars)))
252 (and (not (hashq-ref assigned v))
253 (= (hashq-ref refcounts v 0) 2)
254 (lexical-ref? (conditional-test body))
255 (eq? (lexical-ref-gensym (conditional-test body)) v)
256 (lexical-ref? (conditional-then body))
257 (eq? (lexical-ref-gensym (conditional-then body)) v))))
258 (hashq-set! allocation (car vars)
259 (make-hashq proc `(#t #f . ,n)))
260 ;; the 1+ for this var
261 (max nmax (1+ n) (allocate! (conditional-else body) proc n)))
262 (else
263 (let lp ((vars vars) (n n))
264 (if (null? vars)
265 (max nmax (allocate! body proc n))
266 (let ((v (car vars)))
267 (hashq-set!
268 allocation v
269 (make-hashq proc
270 `(#t ,(hashq-ref assigned v) . ,n)))
271 (lp (cdr vars) (1+ n)))))))))
272
273 ((<letrec> vars vals body)
274 (let lp ((vars vars) (n n))
275 (if (null? vars)
276 (let ((nmax (apply max
277 (map (lambda (x)
278 (allocate! x proc n))
279 vals))))
280 (max nmax (allocate! body proc n)))
281 (let ((v (car vars)))
282 (hashq-set!
283 allocation v
284 (make-hashq proc
285 `(#t ,(hashq-ref assigned v) . ,n)))
286 (lp (cdr vars) (1+ n))))))
287
288 ((<let-values> vars exp body)
289 (let ((nmax (recur exp)))
290 (let lp ((vars vars) (n n))
291 (if (null? vars)
292 (max nmax (allocate! body proc n))
293 (let ((v (if (pair? vars) (car vars) vars)))
294 (let ((v (car vars)))
295 (hashq-set!
296 allocation v
297 (make-hashq proc
298 `(#t ,(hashq-ref assigned v) . ,n)))
299 (lp (cdr vars) (1+ n))))))))
300
301 (else n)))
302
303 (define bound-vars (make-hash-table))
304 (define free-vars (make-hash-table))
305 (define assigned (make-hash-table))
306 (define refcounts (make-hash-table))
307
308 (define allocation (make-hash-table))
309
310 (analyze! x #f)
311 (allocate! x #f 0)
312
313 allocation)
314
315 \f
316 ;;;
317 ;;; Unused variable analysis.
318 ;;;
319
320 ;; <binding-info> records are used during tree traversals in
321 ;; `report-unused-variables'. They contain a list of the local vars
322 ;; currently in scope, a list of locals vars that have been referenced, and a
323 ;; "location stack" (the stack of `tree-il-src' values for each parent tree).
324 (define-record-type <binding-info>
325 (make-binding-info vars refs locs)
326 binding-info?
327 (vars binding-info-vars) ;; ((GENSYM NAME LOCATION) ...)
328 (refs binding-info-refs) ;; (GENSYM ...)
329 (locs binding-info-locs)) ;; (LOCATION ...)
330
331 (define (report-unused-variables tree)
332 "Report about unused variables in TREE. Return TREE."
333
334 (define (dotless-list lst)
335 ;; If LST is a dotted list, return a proper list equal to LST except that
336 ;; the very last element is a pair; otherwise return LST.
337 (let loop ((lst lst)
338 (result '()))
339 (cond ((null? lst)
340 (reverse result))
341 ((pair? lst)
342 (loop (cdr lst) (cons (car lst) result)))
343 (else
344 (loop '() (cons lst result))))))
345
346 (tree-il-fold (lambda (x info)
347 ;; X is a leaf: extend INFO's refs accordingly.
348 (let ((refs (binding-info-refs info))
349 (vars (binding-info-vars info))
350 (locs (binding-info-locs info)))
351 (record-case x
352 ((<lexical-ref> gensym)
353 (make-binding-info vars (cons gensym refs) locs))
354 (else info))))
355
356 (lambda (x info)
357 ;; Going down into X: extend INFO's variable list
358 ;; accordingly.
359 (let ((refs (binding-info-refs info))
360 (vars (binding-info-vars info))
361 (locs (binding-info-locs info))
362 (src (tree-il-src x)))
363 (define (extend inner-vars inner-names)
364 (append (map (lambda (var name)
365 (list var name src))
366 inner-vars
367 inner-names)
368 vars))
369 (record-case x
370 ((<lexical-set> gensym)
371 (make-binding-info vars (cons gensym refs)
372 (cons src locs)))
373 ((<lambda> vars names)
374 (let ((vars (dotless-list vars))
375 (names (dotless-list names)))
376 (make-binding-info (extend vars names) refs
377 (cons src locs))))
378 ((<let> vars names)
379 (make-binding-info (extend vars names) refs
380 (cons src locs)))
381 ((<letrec> vars names)
382 (make-binding-info (extend vars names) refs
383 (cons src locs)))
384 ((<let-values> vars names)
385 (make-binding-info (extend vars names) refs
386 (cons src locs)))
387 (else info))))
388
389 (lambda (x info)
390 ;; Leaving X's scope: shrink INFO's variable list
391 ;; accordingly and reported unused nested variables.
392 (let ((refs (binding-info-refs info))
393 (vars (binding-info-vars info))
394 (locs (binding-info-locs info)))
395 (define (shrink inner-vars refs)
396 (for-each (lambda (var)
397 (let ((gensym (car var)))
398 ;; Don't report lambda parameters as
399 ;; unused.
400 (if (and (not (memq gensym refs))
401 (not (and (lambda? x)
402 (memq gensym
403 inner-vars))))
404 (let ((name (cadr var))
405 ;; We can get approximate
406 ;; source location by going up
407 ;; the LOCS location stack.
408 (loc (or (caddr var)
409 (find pair? locs))))
410 (warning 'unused-variable loc name)))))
411 (filter (lambda (var)
412 (memq (car var) inner-vars))
413 vars))
414 (fold alist-delete vars inner-vars))
415
416 ;; For simplicity, we leave REFS untouched, i.e., with
417 ;; names of variables that are now going out of scope.
418 ;; It doesn't hurt as these are unique names, it just
419 ;; makes REFS unnecessarily fat.
420 (record-case x
421 ((<lambda> vars)
422 (let ((vars (dotless-list vars)))
423 (make-binding-info (shrink vars refs) refs
424 (cdr locs))))
425 ((<let> vars)
426 (make-binding-info (shrink vars refs) refs
427 (cdr locs)))
428 ((<letrec> vars)
429 (make-binding-info (shrink vars refs) refs
430 (cdr locs)))
431 ((<let-values> vars)
432 (make-binding-info (shrink vars refs) refs
433 (cdr locs)))
434 (else info))))
435 (make-binding-info '() '() '())
436 tree)
437 tree)