Commit | Line | Data |
---|---|---|
6e8ad823 AW |
1 | ;;; Continuation-passing style (CPS) intermediate language (IL) |
2 | ||
7ab76a83 | 3 | ;; Copyright (C) 2013, 2014 Free Software Foundation, Inc. |
6e8ad823 AW |
4 | |
5 | ;;;; This library is free software; you can redistribute it and/or | |
6 | ;;;; modify it under the terms of the GNU Lesser General Public | |
7 | ;;;; License as published by the Free Software Foundation; either | |
8 | ;;;; version 3 of the License, or (at your option) any later version. | |
9 | ;;;; | |
10 | ;;;; This library is distributed in the hope that it will be useful, | |
11 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | ;;;; Lesser General Public License for more details. | |
14 | ;;;; | |
15 | ;;;; You should have received a copy of the GNU Lesser General Public | |
16 | ;;;; License along with this library; if not, write to the Free Software | |
17 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 | ||
19 | ;;; Commentary: | |
20 | ;;; | |
21 | ;;; A module to assign stack slots to variables in a CPS term. | |
22 | ;;; | |
23 | ;;; Code: | |
24 | ||
25 | (define-module (language cps slot-allocation) | |
26 | #:use-module (ice-9 match) | |
27 | #:use-module (srfi srfi-1) | |
28 | #:use-module (srfi srfi-9) | |
29 | #:use-module (srfi srfi-26) | |
30 | #:use-module (language cps) | |
31 | #:use-module (language cps dfg) | |
32 | #:export (allocate-slots | |
33 | lookup-slot | |
987c1f5f | 34 | lookup-maybe-slot |
6e8ad823 AW |
35 | lookup-constant-value |
36 | lookup-maybe-constant-value | |
37 | lookup-nlocals | |
38 | lookup-call-proc-slot | |
02c624fc AW |
39 | lookup-parallel-moves |
40 | lookup-dead-slot-map)) | |
6e8ad823 | 41 | |
6e8ad823 | 42 | (define-record-type $allocation |
987c1f5f AW |
43 | (make-allocation dfa slots |
44 | has-constv constant-values | |
45 | call-allocations | |
46 | nlocals) | |
6e8ad823 | 47 | allocation? |
987c1f5f AW |
48 | |
49 | ;; A DFA records all variables bound in a function, and assigns them | |
50 | ;; indices. The slot in which a variable is stored at runtime can be | |
51 | ;; had by indexing into the SLOTS vector with the variable's index. | |
52 | ;; | |
53 | (dfa allocation-dfa) | |
54 | (slots allocation-slots) | |
55 | ||
56 | ;; Not all variables have slots allocated. Variables that are | |
57 | ;; constant and that are only used by primcalls that can accept | |
58 | ;; constants directly are not allocated to slots, and their SLOT value | |
59 | ;; is false. Likewise constants that are only used by calls are not | |
60 | ;; allocated into slots, to avoid needless copying. If a variable is | |
61 | ;; constant, its constant value is set in the CONSTANT-VALUES vector | |
62 | ;; and the corresponding bit in the HAS-CONSTV bitvector is set. | |
63 | ;; | |
64 | (has-constv allocation-has-constv) | |
65 | (constant-values allocation-constant-values) | |
66 | ||
67 | ;; Some continuations have additional associated information. This | |
68 | ;; addition information is a /call allocation/. Call allocations | |
69 | ;; record the way that functions are passed values, and how their | |
70 | ;; return values are rebound to local variables. | |
71 | ;; | |
02c624fc AW |
72 | ;; A call allocation contains three pieces of information: the call's |
73 | ;; /proc slot/, a set of /parallel moves/, and a /dead slot map/. The | |
74 | ;; proc slot indicates the slot of a procedure in a procedure call, or | |
75 | ;; where the procedure would be in a multiple-value return. The | |
76 | ;; parallel moves shuffle locals into position for a call, or shuffle | |
77 | ;; returned values back into place. Though they use the same slot, | |
78 | ;; moves for a call are called "call moves", and moves to handle a | |
79 | ;; return are "return moves". The dead slot map indicates, for a | |
80 | ;; call, what slots should be ignored by GC when marking the frame. | |
987c1f5f | 81 | ;; |
36527695 | 82 | ;; $kreceive continuations record a proc slot and a set of return moves |
987c1f5f AW |
83 | ;; to adapt multiple values from the stack to local variables. |
84 | ;; | |
85 | ;; Tail calls record arg moves, but no proc slot. | |
86 | ;; | |
02c624fc AW |
87 | ;; Non-tail calls record arg moves, a call slot, and a dead slot map. |
88 | ;; Multiple-valued returns will have an associated $kreceive | |
89 | ;; continuation, which records the same proc slot, but has return | |
90 | ;; moves and no dead slot map. | |
987c1f5f | 91 | ;; |
36527695 | 92 | ;; $prompt handlers are $kreceive continuations like any other. |
987c1f5f AW |
93 | ;; |
94 | ;; $values expressions with more than 1 value record moves but have no | |
02c624fc | 95 | ;; proc slot or dead slot map. |
8d59d55e | 96 | ;; |
987c1f5f AW |
97 | ;; A set of moves is expressed as an ordered list of (SRC . DST) |
98 | ;; moves, where SRC and DST are slots. This may involve a temporary | |
02c624fc | 99 | ;; variable. A dead slot map is a bitfield, as an integer. |
6e8ad823 | 100 | ;; |
987c1f5f AW |
101 | (call-allocations allocation-call-allocations) |
102 | ||
103 | ;; The number of locals for a $kclause. | |
104 | ;; | |
105 | (nlocals allocation-nlocals)) | |
106 | ||
107 | (define-record-type $call-allocation | |
02c624fc | 108 | (make-call-allocation proc-slot moves dead-slot-map) |
987c1f5f AW |
109 | call-allocation? |
110 | (proc-slot call-allocation-proc-slot) | |
02c624fc AW |
111 | (moves call-allocation-moves) |
112 | (dead-slot-map call-allocation-dead-slot-map)) | |
6e8ad823 AW |
113 | |
114 | (define (find-first-zero n) | |
115 | ;; Naive implementation. | |
116 | (let lp ((slot 0)) | |
117 | (if (logbit? slot n) | |
118 | (lp (1+ slot)) | |
119 | slot))) | |
120 | ||
987c1f5f AW |
121 | (define (find-first-trailing-zero n) |
122 | (let lp ((slot (let lp ((count 2)) | |
123 | (if (< n (ash 1 (1- count))) | |
124 | count | |
125 | ;; Grow upper bound slower than factor 2 to avoid | |
126 | ;; needless bignum allocation on 32-bit systems | |
127 | ;; when there are more than 16 locals. | |
128 | (lp (+ count (ash count -1))))))) | |
6e8ad823 AW |
129 | (if (or (zero? slot) (logbit? (1- slot) n)) |
130 | slot | |
131 | (lp (1- slot))))) | |
132 | ||
987c1f5f AW |
133 | (define (lookup-maybe-slot sym allocation) |
134 | (match allocation | |
135 | (($ $allocation dfa slots) | |
136 | (vector-ref slots (dfa-var-idx dfa sym))))) | |
6e8ad823 AW |
137 | |
138 | (define (lookup-slot sym allocation) | |
987c1f5f AW |
139 | (or (lookup-maybe-slot sym allocation) |
140 | (error "Variable not allocated to a slot" sym))) | |
6e8ad823 AW |
141 | |
142 | (define (lookup-constant-value sym allocation) | |
987c1f5f AW |
143 | (match allocation |
144 | (($ $allocation dfa slots has-constv constant-values) | |
145 | (let ((idx (dfa-var-idx dfa sym))) | |
146 | (if (bitvector-ref has-constv idx) | |
147 | (vector-ref constant-values idx) | |
148 | (error "Variable does not have constant value" sym)))))) | |
6e8ad823 AW |
149 | |
150 | (define (lookup-maybe-constant-value sym allocation) | |
987c1f5f AW |
151 | (match allocation |
152 | (($ $allocation dfa slots has-constv constant-values) | |
153 | (let ((idx (dfa-var-idx dfa sym))) | |
154 | (values (bitvector-ref has-constv idx) | |
155 | (vector-ref constant-values idx)))))) | |
6e8ad823 | 156 | |
987c1f5f AW |
157 | (define (lookup-call-allocation k allocation) |
158 | (or (hashq-ref (allocation-call-allocations allocation) k) | |
159 | (error "Continuation not a call" k))) | |
6e8ad823 | 160 | |
987c1f5f AW |
161 | (define (lookup-call-proc-slot k allocation) |
162 | (or (call-allocation-proc-slot (lookup-call-allocation k allocation)) | |
163 | (error "Call has no proc slot" k))) | |
6e8ad823 AW |
164 | |
165 | (define (lookup-parallel-moves k allocation) | |
987c1f5f AW |
166 | (or (call-allocation-moves (lookup-call-allocation k allocation)) |
167 | (error "Call has no use parallel moves slot" k))) | |
168 | ||
02c624fc AW |
169 | (define (lookup-dead-slot-map k allocation) |
170 | (or (call-allocation-dead-slot-map (lookup-call-allocation k allocation)) | |
171 | (error "Call has no dead slot map" k))) | |
172 | ||
987c1f5f AW |
173 | (define (lookup-nlocals k allocation) |
174 | (or (hashq-ref (allocation-nlocals allocation) k) | |
175 | (error "Not a clause continuation" k))) | |
6e8ad823 AW |
176 | |
177 | (define (solve-parallel-move src dst tmp) | |
178 | "Solve the parallel move problem between src and dst slot lists, which | |
179 | are comparable with eqv?. A tmp slot may be used." | |
180 | ||
181 | ;; This algorithm is taken from: "Tilting at windmills with Coq: | |
182 | ;; formal verification of a compilation algorithm for parallel moves" | |
183 | ;; by Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy | |
184 | ;; <http://gallium.inria.fr/~xleroy/publi/parallel-move.pdf> | |
185 | ||
186 | (define (split-move moves reg) | |
187 | (let loop ((revhead '()) (tail moves)) | |
188 | (match tail | |
189 | (((and s+d (s . d)) . rest) | |
190 | (if (eqv? s reg) | |
191 | (cons d (append-reverse revhead rest)) | |
192 | (loop (cons s+d revhead) rest))) | |
193 | (_ #f)))) | |
194 | ||
195 | (define (replace-last-source reg moves) | |
196 | (match moves | |
197 | ((moves ... (s . d)) | |
198 | (append moves (list (cons reg d)))))) | |
199 | ||
200 | (let loop ((to-move (map cons src dst)) | |
201 | (being-moved '()) | |
202 | (moved '()) | |
203 | (last-source #f)) | |
204 | ;; 'last-source' should always be equivalent to: | |
205 | ;; (and (pair? being-moved) (car (last being-moved))) | |
206 | (match being-moved | |
207 | (() (match to-move | |
208 | (() (reverse moved)) | |
209 | (((and s+d (s . d)) . t1) | |
210 | (if (or (eqv? s d) ; idempotent | |
211 | (not s)) ; src is a constant and can be loaded directly | |
212 | (loop t1 '() moved #f) | |
213 | (loop t1 (list s+d) moved s))))) | |
214 | (((and s+d (s . d)) . b) | |
215 | (match (split-move to-move d) | |
216 | ((r . t1) (loop t1 (acons d r being-moved) moved last-source)) | |
217 | (#f (match b | |
218 | (() (loop to-move '() (cons s+d moved) #f)) | |
219 | (_ (if (eqv? d last-source) | |
220 | (loop to-move | |
221 | (replace-last-source tmp b) | |
222 | (cons s+d (acons d tmp moved)) | |
223 | tmp) | |
224 | (loop to-move b (cons s+d moved) last-source)))))))))) | |
225 | ||
863034a8 AW |
226 | (define (dead-after-def? k-idx v-idx dfa) |
227 | (not (bitvector-ref (dfa-k-in dfa k-idx) v-idx))) | |
e636f424 | 228 | |
863034a8 AW |
229 | (define (dead-after-use? k-idx v-idx dfa) |
230 | (not (bitvector-ref (dfa-k-out dfa k-idx) v-idx))) | |
e636f424 | 231 | |
d258fccc | 232 | (define (allocate-slots fun dfg) |
987c1f5f | 233 | (let* ((dfa (compute-live-variables fun dfg)) |
7dbf40ea AW |
234 | (min-label (dfg-min-label dfg)) |
235 | (label-count (dfg-label-count dfg)) | |
236 | (usev (make-vector label-count '())) | |
237 | (defv (make-vector label-count '())) | |
987c1f5f AW |
238 | (slots (make-vector (dfa-var-count dfa) #f)) |
239 | (constant-values (make-vector (dfa-var-count dfa) #f)) | |
240 | (has-constv (make-bitvector (dfa-var-count dfa) #f)) | |
241 | (has-slotv (make-bitvector (dfa-var-count dfa) #t)) | |
242 | (needs-slotv (make-bitvector (dfa-var-count dfa) #t)) | |
0c247a2f | 243 | (needs-hintv (make-bitvector (dfa-var-count dfa) #f)) |
987c1f5f AW |
244 | (call-allocations (make-hash-table)) |
245 | (nlocals 0) ; Mutable. It pains me. | |
246 | (nlocals-table (make-hash-table))) | |
247 | ||
7dbf40ea AW |
248 | (define (label->idx label) (- label min-label)) |
249 | (define (idx->label idx) (+ idx min-label)) | |
250 | ||
987c1f5f AW |
251 | (define (bump-nlocals! nlocals*) |
252 | (when (< nlocals nlocals*) | |
253 | (set! nlocals nlocals*))) | |
254 | ||
255 | (define (empty-live-slots) | |
256 | #b0) | |
257 | ||
258 | (define (add-live-slot slot live-slots) | |
259 | (logior live-slots (ash 1 slot))) | |
260 | ||
261 | (define (kill-dead-slot slot live-slots) | |
262 | (logand live-slots (lognot (ash 1 slot)))) | |
263 | ||
264 | (define (compute-slot live-slots hint) | |
265 | (if (and hint (not (logbit? hint live-slots))) | |
266 | hint | |
267 | (find-first-zero live-slots))) | |
268 | ||
269 | (define (compute-call-proc-slot live-slots) | |
f8085163 | 270 | (+ 2 (find-first-trailing-zero live-slots))) |
987c1f5f AW |
271 | |
272 | (define (compute-prompt-handler-proc-slot live-slots) | |
273 | (1- (find-first-trailing-zero live-slots))) | |
274 | ||
275 | (define (recompute-live-slots k nargs) | |
863034a8 | 276 | (let ((in (dfa-k-in dfa (label->idx k)))) |
987c1f5f AW |
277 | (let lp ((v 0) (live-slots (1- (ash 1 (1+ nargs))))) |
278 | (let ((v (bit-position #t in v))) | |
279 | (if v | |
280 | (let ((slot (vector-ref slots v))) | |
281 | (lp (1+ v) | |
282 | (if slot | |
283 | (add-live-slot slot live-slots) | |
284 | live-slots))) | |
285 | live-slots))))) | |
286 | ||
287 | (define* (allocate! var-idx hint live) | |
288 | (cond | |
289 | ((not (bitvector-ref needs-slotv var-idx)) live) | |
290 | ((vector-ref slots var-idx) => (cut add-live-slot <> live)) | |
c79f873e | 291 | ((and (not hint) (bitvector-ref needs-hintv var-idx)) live) |
987c1f5f AW |
292 | (else |
293 | (let ((slot (compute-slot live hint))) | |
294 | (bump-nlocals! (1+ slot)) | |
295 | (vector-set! slots var-idx slot) | |
296 | (add-live-slot slot live))))) | |
297 | ||
298 | ;; Although some parallel moves may proceed without a temporary | |
299 | ;; slot, in general one is needed. That temporary slot must not be | |
300 | ;; part of the source or destination sets, and that slot should not | |
301 | ;; correspond to a live variable. Usually the source and | |
302 | ;; destination sets are a subset of the union of the live sets | |
303 | ;; before and after the move. However for stack slots that don't | |
304 | ;; have names -- those slots that correspond to function arguments | |
305 | ;; or to function return values -- it could be that they are out of | |
306 | ;; the computed live set. In that case they need to be adjoined to | |
307 | ;; the live set, used when choosing a temporary slot. | |
308 | (define (compute-tmp-slot live stack-slots) | |
309 | (find-first-zero (fold add-live-slot live stack-slots))) | |
310 | ||
311 | (define (parallel-move src-slots dst-slots tmp-slot) | |
312 | (let ((moves (solve-parallel-move src-slots dst-slots tmp-slot))) | |
313 | (when (assv tmp-slot moves) | |
314 | (bump-nlocals! (1+ tmp-slot))) | |
315 | moves)) | |
316 | ||
317 | ;; Find variables that are actually constant, and determine which | |
318 | ;; of those can avoid slot allocation. | |
319 | (define (compute-constants!) | |
320 | (let lp ((n 0)) | |
321 | (when (< n (vector-length constant-values)) | |
322 | (let ((sym (dfa-var-sym dfa n))) | |
323 | (call-with-values (lambda () (find-constant-value sym dfg)) | |
324 | (lambda (has-const? const) | |
325 | (when has-const? | |
326 | (bitvector-set! has-constv n has-const?) | |
327 | (vector-set! constant-values n const) | |
328 | (when (not (constant-needs-allocation? sym const dfg)) | |
329 | (bitvector-set! needs-slotv n #f))) | |
330 | (lp (1+ n)))))))) | |
331 | ||
332 | ;; Record uses and defs, as lists of variable indexes, indexed by | |
7dbf40ea | 333 | ;; label index. |
987c1f5f AW |
334 | (define (compute-uses-and-defs!) |
335 | (let lp ((n 0)) | |
336 | (when (< n (vector-length usev)) | |
7dbf40ea | 337 | (match (lookup-cont (idx->label n) dfg) |
8320f504 | 338 | (($ $kfun src meta self) |
987c1f5f AW |
339 | (vector-set! defv n (list (dfa-var-idx dfa self)))) |
340 | (($ $kargs names syms body) | |
341 | (vector-set! defv n (map (cut dfa-var-idx dfa <>) syms)) | |
342 | (vector-set! usev n | |
343 | (map (cut dfa-var-idx dfa <>) | |
344 | (match (find-expression body) | |
345 | (($ $call proc args) | |
346 | (cons proc args)) | |
b3ae2b50 AW |
347 | (($ $callk k proc args) |
348 | (cons proc args)) | |
987c1f5f AW |
349 | (($ $primcall name args) |
350 | args) | |
351 | (($ $values args) | |
352 | args) | |
7ab76a83 | 353 | (($ $prompt escape? tag handler) |
987c1f5f AW |
354 | (list tag)) |
355 | (_ '()))))) | |
356 | (_ #f)) | |
357 | (lp (1+ n))))) | |
358 | ||
fa48a2f7 AW |
359 | ;; Results of function calls that are not used don't need to be |
360 | ;; allocated to slots. | |
361 | (define (compute-unused-results!) | |
7dbf40ea AW |
362 | (define (kreceive-get-kargs kreceive) |
363 | (match (lookup-cont kreceive dfg) | |
364 | (($ $kreceive arity kargs) kargs) | |
fa48a2f7 | 365 | (_ #f))) |
7dbf40ea | 366 | (let ((candidates (make-bitvector label-count #f))) |
36527695 | 367 | ;; Find all $kargs that are the successors of $kreceive nodes. |
fa48a2f7 | 368 | (let lp ((n 0)) |
7dbf40ea AW |
369 | (when (< n label-count) |
370 | (and=> (kreceive-get-kargs (idx->label n)) | |
fa48a2f7 | 371 | (lambda (kargs) |
7dbf40ea | 372 | (bitvector-set! candidates (label->idx kargs) #t))) |
fa48a2f7 | 373 | (lp (1+ n)))) |
36527695 | 374 | ;; For $kargs that only have $kreceive predecessors, remove unused |
fa48a2f7 AW |
375 | ;; variables from the needs-slotv set. |
376 | (let lp ((n 0)) | |
377 | (let ((n (bit-position #t candidates n))) | |
378 | (when n | |
7dbf40ea | 379 | (match (lookup-predecessors (idx->label n) dfg) |
36527695 | 380 | ;; At least one kreceive is in the predecessor set, so we |
fa48a2f7 AW |
381 | ;; only need to do the check for nodes with >1 |
382 | ;; predecessor. | |
36527695 | 383 | ((or (_) ((? kreceive-get-kargs) ...)) |
fa48a2f7 | 384 | (for-each (lambda (var) |
863034a8 | 385 | (when (dead-after-def? n var dfa) |
fa48a2f7 AW |
386 | (bitvector-set! needs-slotv var #f))) |
387 | (vector-ref defv n))) | |
388 | (_ #f)) | |
389 | (lp (1+ n))))))) | |
390 | ||
0c247a2f AW |
391 | ;; Compute the set of variables whose allocation should be delayed |
392 | ;; until a "hint" is known about where to allocate them. This is | |
393 | ;; the case for some procedure arguments. | |
394 | ;; | |
395 | ;; This algorithm used is a conservative approximation of what | |
396 | ;; really should happen, which would be eager allocation of call | |
397 | ;; frames as soon as it's known that a call will happen. It would | |
398 | ;; be nice to recast this as a proper data-flow problem. | |
399 | (define (compute-needs-hint!) | |
0c247a2f | 400 | (define (live-before n) |
863034a8 | 401 | (dfa-k-in dfa n)) |
0c247a2f | 402 | (define (live-after n) |
863034a8 | 403 | (dfa-k-out dfa n)) |
0c247a2f AW |
404 | |
405 | ;; Walk backwards. At a call, compute the set of variables that | |
406 | ;; have allocated slots and are live before but not after. This | |
407 | ;; set contains candidates for needs-hintv. | |
408 | (define (scan-for-call n) | |
409 | (when (<= 0 n) | |
7dbf40ea | 410 | (match (lookup-cont (idx->label n) dfg) |
0c247a2f AW |
411 | (($ $kargs names syms body) |
412 | (match (find-expression body) | |
b3ae2b50 | 413 | ((or ($ $call) ($ $callk)) |
0c247a2f AW |
414 | (let ((args (make-bitvector (bitvector-length needs-slotv) #f))) |
415 | (bit-set*! args (live-before n) #t) | |
416 | (bit-set*! args (live-after n) #f) | |
417 | (bit-set*! args no-slot-needed #f) | |
418 | (if (bit-position #t args 0) | |
419 | (scan-for-hints (1- n) args) | |
420 | (scan-for-call (1- n))))) | |
421 | (_ (scan-for-call (1- n))))) | |
422 | (_ (scan-for-call (1- n)))))) | |
423 | ||
424 | ;; Walk backwards in the current basic block. Stop when the block | |
425 | ;; ends, we reach a call, or when an expression kills a value. | |
426 | (define (scan-for-hints n args) | |
427 | (when (< 0 n) | |
7dbf40ea | 428 | (match (lookup-cont (idx->label n) dfg) |
0c247a2f | 429 | (($ $kargs names syms body) |
7dbf40ea AW |
430 | (match (lookup-predecessors (idx->label (1+ n)) dfg) |
431 | (((? (cut eqv? <> (idx->label n)))) | |
0c247a2f AW |
432 | ;; If we are indeed in the same basic block, then if we |
433 | ;; are finished with the scan, we kill uses of the | |
434 | ;; terminator, but leave its definitions. | |
435 | (match (find-expression body) | |
cf8bb037 | 436 | ((or ($ $void) ($ $const) ($ $prim) ($ $closure) |
f4092958 AW |
437 | ($ $primcall) ($ $prompt) |
438 | ;; If $values has more than one argument, it may | |
439 | ;; use a temporary, which would invalidate our | |
440 | ;; assumptions that slots not allocated are not | |
441 | ;; used. | |
442 | ($ $values (or () (_)))) | |
0c247a2f AW |
443 | (let ((dead (make-bitvector (bitvector-length args) #f))) |
444 | (bit-set*! dead (live-before n) #t) | |
445 | (bit-set*! dead (live-after n) #f) | |
446 | (bit-set*! dead no-slot-needed #f) | |
447 | (if (bit-position #t dead 0) | |
448 | (finish-hints n (live-before n) args) | |
449 | (scan-for-hints (1- n) args)))) | |
b3ae2b50 | 450 | ((or ($ $call) ($ $callk) ($ $values)) |
0c247a2f AW |
451 | (finish-hints n (live-before n) args)))) |
452 | ;; Otherwise we kill uses of the block entry. | |
453 | (_ (finish-hints n (live-before (1+ n)) args)))) | |
454 | (_ (finish-hints n (live-before (1+ n)) args))))) | |
455 | ||
456 | ;; Add definitions ARGS minus KILL to NEED-HINTS, and go back to | |
457 | ;; looking for calls. | |
458 | (define (finish-hints n kill args) | |
459 | (bit-invert! args) | |
460 | (bit-set*! args kill #t) | |
461 | (bit-invert! args) | |
462 | (bit-set*! needs-hintv args #t) | |
463 | (scan-for-call n)) | |
464 | ||
465 | (define no-slot-needed | |
466 | (make-bitvector (bitvector-length needs-slotv) #f)) | |
467 | ||
468 | (bit-set*! no-slot-needed needs-slotv #t) | |
469 | (bit-invert! no-slot-needed) | |
7dbf40ea | 470 | (scan-for-call (1- label-count))) |
0c247a2f | 471 | |
987c1f5f | 472 | (define (allocate-call label k uses pre-live post-live) |
7dbf40ea | 473 | (match (lookup-cont k dfg) |
987c1f5f AW |
474 | (($ $ktail) |
475 | (let* ((tail-nlocals (length uses)) | |
476 | (tail-slots (iota tail-nlocals)) | |
0c247a2f | 477 | (pre-live (fold allocate! pre-live uses tail-slots)) |
987c1f5f AW |
478 | (moves (parallel-move (map (cut vector-ref slots <>) uses) |
479 | tail-slots | |
480 | (compute-tmp-slot pre-live tail-slots)))) | |
481 | (bump-nlocals! tail-nlocals) | |
482 | (hashq-set! call-allocations label | |
02c624fc | 483 | (make-call-allocation #f moves #f)))) |
36527695 | 484 | (($ $kreceive arity kargs) |
987c1f5f AW |
485 | (let* ((proc-slot (compute-call-proc-slot post-live)) |
486 | (call-slots (map (cut + proc-slot <>) (iota (length uses)))) | |
0c247a2f | 487 | (pre-live (fold allocate! pre-live uses call-slots)) |
987c1f5f AW |
488 | (arg-moves (parallel-move (map (cut vector-ref slots <>) uses) |
489 | call-slots | |
490 | (compute-tmp-slot pre-live | |
491 | call-slots))) | |
7dbf40ea | 492 | (result-vars (vector-ref defv (label->idx kargs))) |
987c1f5f AW |
493 | (value-slots (map (cut + proc-slot 1 <>) |
494 | (iota (length result-vars)))) | |
ad4f6be1 AW |
495 | ;; Shuffle the first result down to the lowest slot, and |
496 | ;; leave any remaining results where they are. This | |
497 | ;; strikes a balance between avoiding shuffling, | |
498 | ;; especially for unused extra values, and avoiding | |
499 | ;; frame size growth due to sparse locals. | |
500 | (result-live (match (cons result-vars value-slots) | |
501 | ((() . ()) post-live) | |
502 | (((var . vars) . (slot . slots)) | |
503 | (fold allocate! | |
504 | (allocate! var #f post-live) | |
505 | vars slots)))) | |
987c1f5f | 506 | (result-slots (map (cut vector-ref slots <>) result-vars)) |
fa48a2f7 AW |
507 | ;; Filter out unused results. |
508 | (value-slots (filter-map (lambda (val result) (and result val)) | |
509 | value-slots result-slots)) | |
510 | (result-slots (filter (lambda (x) x) result-slots)) | |
987c1f5f AW |
511 | (result-moves (parallel-move value-slots |
512 | result-slots | |
513 | (compute-tmp-slot result-live | |
02c624fc AW |
514 | value-slots))) |
515 | (dead-slot-map (logand (1- (ash 1 (- proc-slot 2))) | |
516 | (lognot post-live)))) | |
987c1f5f AW |
517 | (bump-nlocals! (+ proc-slot (length uses))) |
518 | (hashq-set! call-allocations label | |
02c624fc | 519 | (make-call-allocation proc-slot arg-moves dead-slot-map)) |
987c1f5f | 520 | (hashq-set! call-allocations k |
02c624fc | 521 | (make-call-allocation proc-slot result-moves #f)))) |
987c1f5f | 522 | |
6e8ad823 | 523 | (_ |
987c1f5f AW |
524 | (let* ((proc-slot (compute-call-proc-slot post-live)) |
525 | (call-slots (map (cut + proc-slot <>) (iota (length uses)))) | |
0c247a2f | 526 | (pre-live (fold allocate! pre-live uses call-slots)) |
987c1f5f AW |
527 | (arg-moves (parallel-move (map (cut vector-ref slots <>) uses) |
528 | call-slots | |
529 | (compute-tmp-slot pre-live | |
530 | call-slots)))) | |
531 | (bump-nlocals! (+ proc-slot (length uses))) | |
532 | (hashq-set! call-allocations label | |
02c624fc | 533 | (make-call-allocation proc-slot arg-moves #f)))))) |
987c1f5f AW |
534 | |
535 | (define (allocate-values label k uses pre-live post-live) | |
7dbf40ea | 536 | (match (lookup-cont k dfg) |
8a2d420f AW |
537 | (($ $ktail) |
538 | (let* ((src-slots (map (cut vector-ref slots <>) uses)) | |
539 | (tail-nlocals (1+ (length uses))) | |
540 | (dst-slots (cdr (iota tail-nlocals))) | |
541 | (moves (parallel-move src-slots dst-slots | |
542 | (compute-tmp-slot pre-live dst-slots)))) | |
543 | (bump-nlocals! tail-nlocals) | |
544 | (hashq-set! call-allocations label | |
02c624fc | 545 | (make-call-allocation #f moves #f)))) |
8a2d420f AW |
546 | (($ $kargs (_) (_)) |
547 | ;; When there is only one value in play, we allow the dst to be | |
548 | ;; hinted (see scan-for-hints). If the src doesn't have a | |
549 | ;; slot, then the actual slot for the dst would end up being | |
550 | ;; decided by the call that uses it. Because we don't know the | |
551 | ;; slot, we can't really compute the parallel moves in that | |
552 | ;; case, so just bail and rely on the bytecode emitter to | |
553 | ;; handle the one-value case specially. | |
7dbf40ea | 554 | (match (cons uses (vector-ref defv (label->idx k))) |
8a2d420f AW |
555 | (((src) . (dst)) |
556 | (allocate! dst (vector-ref slots src) post-live)))) | |
557 | (($ $kargs) | |
558 | (let* ((src-slots (map (cut vector-ref slots <>) uses)) | |
7dbf40ea | 559 | (dst-vars (vector-ref defv (label->idx k))) |
8a2d420f AW |
560 | (result-live (fold allocate! post-live dst-vars src-slots)) |
561 | (dst-slots (map (cut vector-ref slots <>) dst-vars)) | |
562 | (moves (parallel-move src-slots dst-slots | |
563 | (compute-tmp-slot (logior pre-live result-live) | |
564 | '())))) | |
565 | (hashq-set! call-allocations label | |
02c624fc | 566 | (make-call-allocation #f moves #f)))) |
8a2d420f | 567 | (($ $kif) #f))) |
987c1f5f AW |
568 | |
569 | (define (allocate-prompt label k handler nargs) | |
7dbf40ea | 570 | (match (lookup-cont handler dfg) |
36527695 | 571 | (($ $kreceive arity kargs) |
987c1f5f AW |
572 | (let* ((handler-live (recompute-live-slots handler nargs)) |
573 | (proc-slot (compute-prompt-handler-proc-slot handler-live)) | |
7dbf40ea | 574 | (result-vars (vector-ref defv (label->idx kargs))) |
987c1f5f AW |
575 | (value-slots (map (cut + proc-slot 1 <>) |
576 | (iota (length result-vars)))) | |
577 | (result-live (fold allocate! | |
578 | handler-live result-vars value-slots)) | |
579 | (result-slots (map (cut vector-ref slots <>) result-vars)) | |
fa48a2f7 AW |
580 | ;; Filter out unused results. |
581 | (value-slots (filter-map (lambda (val result) (and result val)) | |
582 | value-slots result-slots)) | |
583 | (result-slots (filter (lambda (x) x) result-slots)) | |
987c1f5f AW |
584 | (moves (parallel-move value-slots |
585 | result-slots | |
586 | (compute-tmp-slot result-live | |
587 | value-slots)))) | |
588 | (bump-nlocals! (+ proc-slot 1 (length result-vars))) | |
589 | (hashq-set! call-allocations handler | |
02c624fc | 590 | (make-call-allocation proc-slot moves #f)))))) |
987c1f5f AW |
591 | |
592 | (define (allocate-defs! n live) | |
593 | (fold (cut allocate! <> #f <>) live (vector-ref defv n))) | |
594 | ||
595 | ;; This traversal will visit definitions before uses, as | |
596 | ;; definitions dominate uses and a block's dominator will appear | |
597 | ;; before it, in reverse post-order. | |
598 | (define (visit-clause n nargs live) | |
599 | (let lp ((n n) (live live)) | |
7dbf40ea | 600 | (define (kill-dead live vars-by-label-idx pred) |
987c1f5f AW |
601 | (fold (lambda (v live) |
602 | (let ((slot (vector-ref slots v))) | |
603 | (if (and slot | |
604 | (> slot nargs) | |
863034a8 | 605 | (pred n v dfa)) |
987c1f5f AW |
606 | (kill-dead-slot slot live) |
607 | live))) | |
608 | live | |
7dbf40ea | 609 | (vector-ref vars-by-label-idx n))) |
987c1f5f AW |
610 | (define (kill-dead-defs live) |
611 | (kill-dead live defv dead-after-def?)) | |
612 | (define (kill-dead-uses live) | |
613 | (kill-dead live usev dead-after-use?)) | |
7dbf40ea | 614 | (if (= n label-count) |
987c1f5f | 615 | n |
7dbf40ea | 616 | (let* ((label (idx->label n)) |
987c1f5f AW |
617 | (live (if (control-point? label dfg) |
618 | (recompute-live-slots label nargs) | |
619 | live)) | |
620 | (live (kill-dead-defs (allocate-defs! n live))) | |
621 | (post-live (kill-dead-uses live))) | |
622 | ;; LIVE are the live slots coming into the term. | |
623 | ;; POST-LIVE is the subset that is still live after the | |
624 | ;; term uses its inputs. | |
7dbf40ea | 625 | (match (lookup-cont (idx->label n) dfg) |
987c1f5f AW |
626 | (($ $kclause) n) |
627 | (($ $kargs names syms body) | |
628 | (let ((uses (vector-ref usev n))) | |
629 | (match (find-call body) | |
b3ae2b50 | 630 | (($ $continue k src (or ($ $call) ($ $callk))) |
987c1f5f AW |
631 | (allocate-call label k uses live post-live)) |
632 | (($ $continue k src ($ $primcall)) #t) | |
8a2d420f | 633 | (($ $continue k src ($ $values)) |
987c1f5f | 634 | (allocate-values label k uses live post-live)) |
7ab76a83 | 635 | (($ $continue k src ($ $prompt escape? tag handler)) |
987c1f5f AW |
636 | (allocate-prompt label k handler nargs)) |
637 | (_ #f))) | |
638 | (lp (1+ n) post-live)) | |
36527695 | 639 | ((or ($ $kreceive) ($ $kif) ($ $ktail)) |
987c1f5f AW |
640 | (lp (1+ n) post-live))))))) |
641 | ||
642 | (define (visit-entry) | |
643 | (define (visit-clauses n live) | |
644 | (unless (eqv? live (add-live-slot 0 (empty-live-slots))) | |
645 | (error "Unexpected clause live set")) | |
646 | (set! nlocals 1) | |
7dbf40ea | 647 | (match (lookup-cont (idx->label n) dfg) |
90dce16d | 648 | (($ $kclause arity ($ $cont kbody ($ $kargs names)) alternate) |
7dbf40ea AW |
649 | (unless (eq? (idx->label (1+ n)) kbody) |
650 | (error "Unexpected label order")) | |
0c247a2f AW |
651 | (let* ((nargs (length names)) |
652 | (next (visit-clause (1+ n) | |
653 | nargs | |
654 | (fold allocate! live | |
655 | (vector-ref defv (1+ n)) | |
656 | (cdr (iota (1+ nargs))))))) | |
7dbf40ea AW |
657 | (hashq-set! nlocals-table (idx->label n) nlocals) |
658 | (when (< next label-count) | |
90dce16d AW |
659 | (match alternate |
660 | (($ $cont kalt) | |
7dbf40ea | 661 | (unless (eq? kalt (idx->label next)) |
90dce16d | 662 | (error "Unexpected clause order")))) |
0c247a2f | 663 | (visit-clauses next live)))))) |
7dbf40ea | 664 | (match (lookup-cont (idx->label 0) dfg) |
8320f504 | 665 | (($ $kfun src meta self) |
987c1f5f AW |
666 | (visit-clauses 1 (allocate-defs! 0 (empty-live-slots)))))) |
667 | ||
668 | (compute-constants!) | |
669 | (compute-uses-and-defs!) | |
fa48a2f7 | 670 | (compute-unused-results!) |
0c247a2f | 671 | (compute-needs-hint!) |
987c1f5f AW |
672 | (visit-entry) |
673 | ||
674 | (make-allocation dfa slots | |
675 | has-constv constant-values | |
676 | call-allocations | |
677 | nlocals-table))) |