Commit | Line | Data |
---|---|---|
6e8ad823 AW |
1 | ;;; Continuation-passing style (CPS) intermediate language (IL) |
2 | ||
7ab76a83 | 3 | ;; Copyright (C) 2013, 2014 Free Software Foundation, Inc. |
6e8ad823 AW |
4 | |
5 | ;;;; This library is free software; you can redistribute it and/or | |
6 | ;;;; modify it under the terms of the GNU Lesser General Public | |
7 | ;;;; License as published by the Free Software Foundation; either | |
8 | ;;;; version 3 of the License, or (at your option) any later version. | |
9 | ;;;; | |
10 | ;;;; This library is distributed in the hope that it will be useful, | |
11 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | ;;;; Lesser General Public License for more details. | |
14 | ;;;; | |
15 | ;;;; You should have received a copy of the GNU Lesser General Public | |
16 | ;;;; License along with this library; if not, write to the Free Software | |
17 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 | ||
19 | ;;; Commentary: | |
20 | ;;; | |
21 | ;;; A module to assign stack slots to variables in a CPS term. | |
22 | ;;; | |
23 | ;;; Code: | |
24 | ||
25 | (define-module (language cps slot-allocation) | |
26 | #:use-module (ice-9 match) | |
27 | #:use-module (srfi srfi-1) | |
28 | #:use-module (srfi srfi-9) | |
29 | #:use-module (srfi srfi-26) | |
30 | #:use-module (language cps) | |
31 | #:use-module (language cps dfg) | |
32 | #:export (allocate-slots | |
33 | lookup-slot | |
987c1f5f | 34 | lookup-maybe-slot |
6e8ad823 AW |
35 | lookup-constant-value |
36 | lookup-maybe-constant-value | |
37 | lookup-nlocals | |
38 | lookup-call-proc-slot | |
02c624fc AW |
39 | lookup-parallel-moves |
40 | lookup-dead-slot-map)) | |
6e8ad823 | 41 | |
6e8ad823 | 42 | (define-record-type $allocation |
987c1f5f AW |
43 | (make-allocation dfa slots |
44 | has-constv constant-values | |
45 | call-allocations | |
46 | nlocals) | |
6e8ad823 | 47 | allocation? |
987c1f5f AW |
48 | |
49 | ;; A DFA records all variables bound in a function, and assigns them | |
50 | ;; indices. The slot in which a variable is stored at runtime can be | |
51 | ;; had by indexing into the SLOTS vector with the variable's index. | |
52 | ;; | |
53 | (dfa allocation-dfa) | |
54 | (slots allocation-slots) | |
55 | ||
56 | ;; Not all variables have slots allocated. Variables that are | |
57 | ;; constant and that are only used by primcalls that can accept | |
58 | ;; constants directly are not allocated to slots, and their SLOT value | |
59 | ;; is false. Likewise constants that are only used by calls are not | |
60 | ;; allocated into slots, to avoid needless copying. If a variable is | |
61 | ;; constant, its constant value is set in the CONSTANT-VALUES vector | |
62 | ;; and the corresponding bit in the HAS-CONSTV bitvector is set. | |
63 | ;; | |
64 | (has-constv allocation-has-constv) | |
65 | (constant-values allocation-constant-values) | |
66 | ||
67 | ;; Some continuations have additional associated information. This | |
68 | ;; addition information is a /call allocation/. Call allocations | |
69 | ;; record the way that functions are passed values, and how their | |
70 | ;; return values are rebound to local variables. | |
71 | ;; | |
02c624fc AW |
72 | ;; A call allocation contains three pieces of information: the call's |
73 | ;; /proc slot/, a set of /parallel moves/, and a /dead slot map/. The | |
74 | ;; proc slot indicates the slot of a procedure in a procedure call, or | |
75 | ;; where the procedure would be in a multiple-value return. The | |
76 | ;; parallel moves shuffle locals into position for a call, or shuffle | |
77 | ;; returned values back into place. Though they use the same slot, | |
78 | ;; moves for a call are called "call moves", and moves to handle a | |
79 | ;; return are "return moves". The dead slot map indicates, for a | |
80 | ;; call, what slots should be ignored by GC when marking the frame. | |
987c1f5f | 81 | ;; |
36527695 | 82 | ;; $kreceive continuations record a proc slot and a set of return moves |
987c1f5f AW |
83 | ;; to adapt multiple values from the stack to local variables. |
84 | ;; | |
85 | ;; Tail calls record arg moves, but no proc slot. | |
86 | ;; | |
02c624fc AW |
87 | ;; Non-tail calls record arg moves, a call slot, and a dead slot map. |
88 | ;; Multiple-valued returns will have an associated $kreceive | |
89 | ;; continuation, which records the same proc slot, but has return | |
90 | ;; moves and no dead slot map. | |
987c1f5f | 91 | ;; |
36527695 | 92 | ;; $prompt handlers are $kreceive continuations like any other. |
987c1f5f AW |
93 | ;; |
94 | ;; $values expressions with more than 1 value record moves but have no | |
02c624fc | 95 | ;; proc slot or dead slot map. |
8d59d55e | 96 | ;; |
987c1f5f AW |
97 | ;; A set of moves is expressed as an ordered list of (SRC . DST) |
98 | ;; moves, where SRC and DST are slots. This may involve a temporary | |
02c624fc | 99 | ;; variable. A dead slot map is a bitfield, as an integer. |
6e8ad823 | 100 | ;; |
987c1f5f AW |
101 | (call-allocations allocation-call-allocations) |
102 | ||
103 | ;; The number of locals for a $kclause. | |
104 | ;; | |
105 | (nlocals allocation-nlocals)) | |
106 | ||
107 | (define-record-type $call-allocation | |
02c624fc | 108 | (make-call-allocation proc-slot moves dead-slot-map) |
987c1f5f AW |
109 | call-allocation? |
110 | (proc-slot call-allocation-proc-slot) | |
02c624fc AW |
111 | (moves call-allocation-moves) |
112 | (dead-slot-map call-allocation-dead-slot-map)) | |
6e8ad823 AW |
113 | |
114 | (define (find-first-zero n) | |
115 | ;; Naive implementation. | |
116 | (let lp ((slot 0)) | |
117 | (if (logbit? slot n) | |
118 | (lp (1+ slot)) | |
119 | slot))) | |
120 | ||
987c1f5f AW |
121 | (define (find-first-trailing-zero n) |
122 | (let lp ((slot (let lp ((count 2)) | |
123 | (if (< n (ash 1 (1- count))) | |
124 | count | |
125 | ;; Grow upper bound slower than factor 2 to avoid | |
126 | ;; needless bignum allocation on 32-bit systems | |
127 | ;; when there are more than 16 locals. | |
128 | (lp (+ count (ash count -1))))))) | |
6e8ad823 AW |
129 | (if (or (zero? slot) (logbit? (1- slot) n)) |
130 | slot | |
131 | (lp (1- slot))))) | |
132 | ||
987c1f5f AW |
133 | (define (lookup-maybe-slot sym allocation) |
134 | (match allocation | |
135 | (($ $allocation dfa slots) | |
136 | (vector-ref slots (dfa-var-idx dfa sym))))) | |
6e8ad823 AW |
137 | |
138 | (define (lookup-slot sym allocation) | |
987c1f5f AW |
139 | (or (lookup-maybe-slot sym allocation) |
140 | (error "Variable not allocated to a slot" sym))) | |
6e8ad823 AW |
141 | |
142 | (define (lookup-constant-value sym allocation) | |
987c1f5f AW |
143 | (match allocation |
144 | (($ $allocation dfa slots has-constv constant-values) | |
145 | (let ((idx (dfa-var-idx dfa sym))) | |
146 | (if (bitvector-ref has-constv idx) | |
147 | (vector-ref constant-values idx) | |
148 | (error "Variable does not have constant value" sym)))))) | |
6e8ad823 AW |
149 | |
150 | (define (lookup-maybe-constant-value sym allocation) | |
987c1f5f AW |
151 | (match allocation |
152 | (($ $allocation dfa slots has-constv constant-values) | |
153 | (let ((idx (dfa-var-idx dfa sym))) | |
154 | (values (bitvector-ref has-constv idx) | |
155 | (vector-ref constant-values idx)))))) | |
6e8ad823 | 156 | |
987c1f5f AW |
157 | (define (lookup-call-allocation k allocation) |
158 | (or (hashq-ref (allocation-call-allocations allocation) k) | |
159 | (error "Continuation not a call" k))) | |
6e8ad823 | 160 | |
987c1f5f AW |
161 | (define (lookup-call-proc-slot k allocation) |
162 | (or (call-allocation-proc-slot (lookup-call-allocation k allocation)) | |
163 | (error "Call has no proc slot" k))) | |
6e8ad823 AW |
164 | |
165 | (define (lookup-parallel-moves k allocation) | |
987c1f5f AW |
166 | (or (call-allocation-moves (lookup-call-allocation k allocation)) |
167 | (error "Call has no use parallel moves slot" k))) | |
168 | ||
02c624fc AW |
169 | (define (lookup-dead-slot-map k allocation) |
170 | (or (call-allocation-dead-slot-map (lookup-call-allocation k allocation)) | |
171 | (error "Call has no dead slot map" k))) | |
172 | ||
987c1f5f AW |
173 | (define (lookup-nlocals k allocation) |
174 | (or (hashq-ref (allocation-nlocals allocation) k) | |
175 | (error "Not a clause continuation" k))) | |
6e8ad823 AW |
176 | |
177 | (define (solve-parallel-move src dst tmp) | |
178 | "Solve the parallel move problem between src and dst slot lists, which | |
179 | are comparable with eqv?. A tmp slot may be used." | |
180 | ||
181 | ;; This algorithm is taken from: "Tilting at windmills with Coq: | |
182 | ;; formal verification of a compilation algorithm for parallel moves" | |
183 | ;; by Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy | |
184 | ;; <http://gallium.inria.fr/~xleroy/publi/parallel-move.pdf> | |
185 | ||
186 | (define (split-move moves reg) | |
187 | (let loop ((revhead '()) (tail moves)) | |
188 | (match tail | |
189 | (((and s+d (s . d)) . rest) | |
190 | (if (eqv? s reg) | |
191 | (cons d (append-reverse revhead rest)) | |
192 | (loop (cons s+d revhead) rest))) | |
193 | (_ #f)))) | |
194 | ||
195 | (define (replace-last-source reg moves) | |
196 | (match moves | |
197 | ((moves ... (s . d)) | |
198 | (append moves (list (cons reg d)))))) | |
199 | ||
200 | (let loop ((to-move (map cons src dst)) | |
201 | (being-moved '()) | |
202 | (moved '()) | |
203 | (last-source #f)) | |
204 | ;; 'last-source' should always be equivalent to: | |
205 | ;; (and (pair? being-moved) (car (last being-moved))) | |
206 | (match being-moved | |
207 | (() (match to-move | |
208 | (() (reverse moved)) | |
209 | (((and s+d (s . d)) . t1) | |
210 | (if (or (eqv? s d) ; idempotent | |
211 | (not s)) ; src is a constant and can be loaded directly | |
212 | (loop t1 '() moved #f) | |
213 | (loop t1 (list s+d) moved s))))) | |
214 | (((and s+d (s . d)) . b) | |
215 | (match (split-move to-move d) | |
216 | ((r . t1) (loop t1 (acons d r being-moved) moved last-source)) | |
217 | (#f (match b | |
218 | (() (loop to-move '() (cons s+d moved) #f)) | |
219 | (_ (if (eqv? d last-source) | |
220 | (loop to-move | |
221 | (replace-last-source tmp b) | |
222 | (cons s+d (acons d tmp moved)) | |
223 | tmp) | |
224 | (loop to-move b (cons s+d moved) last-source)))))))))) | |
225 | ||
e636f424 AW |
226 | (define (dead-after-def? def-k v-idx dfa) |
227 | (let ((l (dfa-k-idx dfa def-k))) | |
228 | (not (bitvector-ref (dfa-k-in dfa l) v-idx)))) | |
229 | ||
230 | (define (dead-after-use? use-k v-idx dfa) | |
231 | (let ((l (dfa-k-idx dfa use-k))) | |
232 | (not (bitvector-ref (dfa-k-out dfa l) v-idx)))) | |
233 | ||
d258fccc | 234 | (define (allocate-slots fun dfg) |
987c1f5f | 235 | (let* ((dfa (compute-live-variables fun dfg)) |
7dbf40ea AW |
236 | (min-label (dfg-min-label dfg)) |
237 | (label-count (dfg-label-count dfg)) | |
238 | (usev (make-vector label-count '())) | |
239 | (defv (make-vector label-count '())) | |
987c1f5f AW |
240 | (slots (make-vector (dfa-var-count dfa) #f)) |
241 | (constant-values (make-vector (dfa-var-count dfa) #f)) | |
242 | (has-constv (make-bitvector (dfa-var-count dfa) #f)) | |
243 | (has-slotv (make-bitvector (dfa-var-count dfa) #t)) | |
244 | (needs-slotv (make-bitvector (dfa-var-count dfa) #t)) | |
0c247a2f | 245 | (needs-hintv (make-bitvector (dfa-var-count dfa) #f)) |
987c1f5f AW |
246 | (call-allocations (make-hash-table)) |
247 | (nlocals 0) ; Mutable. It pains me. | |
248 | (nlocals-table (make-hash-table))) | |
249 | ||
7dbf40ea AW |
250 | (define (label->idx label) (- label min-label)) |
251 | (define (idx->label idx) (+ idx min-label)) | |
252 | ||
987c1f5f AW |
253 | (define (bump-nlocals! nlocals*) |
254 | (when (< nlocals nlocals*) | |
255 | (set! nlocals nlocals*))) | |
256 | ||
257 | (define (empty-live-slots) | |
258 | #b0) | |
259 | ||
260 | (define (add-live-slot slot live-slots) | |
261 | (logior live-slots (ash 1 slot))) | |
262 | ||
263 | (define (kill-dead-slot slot live-slots) | |
264 | (logand live-slots (lognot (ash 1 slot)))) | |
265 | ||
266 | (define (compute-slot live-slots hint) | |
267 | (if (and hint (not (logbit? hint live-slots))) | |
268 | hint | |
269 | (find-first-zero live-slots))) | |
270 | ||
271 | (define (compute-call-proc-slot live-slots) | |
f8085163 | 272 | (+ 2 (find-first-trailing-zero live-slots))) |
987c1f5f AW |
273 | |
274 | (define (compute-prompt-handler-proc-slot live-slots) | |
275 | (1- (find-first-trailing-zero live-slots))) | |
276 | ||
277 | (define (recompute-live-slots k nargs) | |
278 | (let ((in (dfa-k-in dfa (dfa-k-idx dfa k)))) | |
279 | (let lp ((v 0) (live-slots (1- (ash 1 (1+ nargs))))) | |
280 | (let ((v (bit-position #t in v))) | |
281 | (if v | |
282 | (let ((slot (vector-ref slots v))) | |
283 | (lp (1+ v) | |
284 | (if slot | |
285 | (add-live-slot slot live-slots) | |
286 | live-slots))) | |
287 | live-slots))))) | |
288 | ||
289 | (define* (allocate! var-idx hint live) | |
290 | (cond | |
291 | ((not (bitvector-ref needs-slotv var-idx)) live) | |
292 | ((vector-ref slots var-idx) => (cut add-live-slot <> live)) | |
c79f873e | 293 | ((and (not hint) (bitvector-ref needs-hintv var-idx)) live) |
987c1f5f AW |
294 | (else |
295 | (let ((slot (compute-slot live hint))) | |
296 | (bump-nlocals! (1+ slot)) | |
297 | (vector-set! slots var-idx slot) | |
298 | (add-live-slot slot live))))) | |
299 | ||
300 | ;; Although some parallel moves may proceed without a temporary | |
301 | ;; slot, in general one is needed. That temporary slot must not be | |
302 | ;; part of the source or destination sets, and that slot should not | |
303 | ;; correspond to a live variable. Usually the source and | |
304 | ;; destination sets are a subset of the union of the live sets | |
305 | ;; before and after the move. However for stack slots that don't | |
306 | ;; have names -- those slots that correspond to function arguments | |
307 | ;; or to function return values -- it could be that they are out of | |
308 | ;; the computed live set. In that case they need to be adjoined to | |
309 | ;; the live set, used when choosing a temporary slot. | |
310 | (define (compute-tmp-slot live stack-slots) | |
311 | (find-first-zero (fold add-live-slot live stack-slots))) | |
312 | ||
313 | (define (parallel-move src-slots dst-slots tmp-slot) | |
314 | (let ((moves (solve-parallel-move src-slots dst-slots tmp-slot))) | |
315 | (when (assv tmp-slot moves) | |
316 | (bump-nlocals! (1+ tmp-slot))) | |
317 | moves)) | |
318 | ||
319 | ;; Find variables that are actually constant, and determine which | |
320 | ;; of those can avoid slot allocation. | |
321 | (define (compute-constants!) | |
322 | (let lp ((n 0)) | |
323 | (when (< n (vector-length constant-values)) | |
324 | (let ((sym (dfa-var-sym dfa n))) | |
325 | (call-with-values (lambda () (find-constant-value sym dfg)) | |
326 | (lambda (has-const? const) | |
327 | (when has-const? | |
328 | (bitvector-set! has-constv n has-const?) | |
329 | (vector-set! constant-values n const) | |
330 | (when (not (constant-needs-allocation? sym const dfg)) | |
331 | (bitvector-set! needs-slotv n #f))) | |
332 | (lp (1+ n)))))))) | |
333 | ||
334 | ;; Record uses and defs, as lists of variable indexes, indexed by | |
7dbf40ea | 335 | ;; label index. |
987c1f5f AW |
336 | (define (compute-uses-and-defs!) |
337 | (let lp ((n 0)) | |
338 | (when (< n (vector-length usev)) | |
7dbf40ea | 339 | (match (lookup-cont (idx->label n) dfg) |
24b611e8 | 340 | (($ $kentry src meta self) |
987c1f5f AW |
341 | (vector-set! defv n (list (dfa-var-idx dfa self)))) |
342 | (($ $kargs names syms body) | |
343 | (vector-set! defv n (map (cut dfa-var-idx dfa <>) syms)) | |
344 | (vector-set! usev n | |
345 | (map (cut dfa-var-idx dfa <>) | |
346 | (match (find-expression body) | |
347 | (($ $call proc args) | |
348 | (cons proc args)) | |
b3ae2b50 AW |
349 | (($ $callk k proc args) |
350 | (cons proc args)) | |
987c1f5f AW |
351 | (($ $primcall name args) |
352 | args) | |
353 | (($ $values args) | |
354 | args) | |
7ab76a83 | 355 | (($ $prompt escape? tag handler) |
987c1f5f AW |
356 | (list tag)) |
357 | (_ '()))))) | |
358 | (_ #f)) | |
359 | (lp (1+ n))))) | |
360 | ||
fa48a2f7 AW |
361 | ;; Results of function calls that are not used don't need to be |
362 | ;; allocated to slots. | |
363 | (define (compute-unused-results!) | |
7dbf40ea AW |
364 | (define (kreceive-get-kargs kreceive) |
365 | (match (lookup-cont kreceive dfg) | |
366 | (($ $kreceive arity kargs) kargs) | |
fa48a2f7 | 367 | (_ #f))) |
7dbf40ea | 368 | (let ((candidates (make-bitvector label-count #f))) |
36527695 | 369 | ;; Find all $kargs that are the successors of $kreceive nodes. |
fa48a2f7 | 370 | (let lp ((n 0)) |
7dbf40ea AW |
371 | (when (< n label-count) |
372 | (and=> (kreceive-get-kargs (idx->label n)) | |
fa48a2f7 | 373 | (lambda (kargs) |
7dbf40ea | 374 | (bitvector-set! candidates (label->idx kargs) #t))) |
fa48a2f7 | 375 | (lp (1+ n)))) |
36527695 | 376 | ;; For $kargs that only have $kreceive predecessors, remove unused |
fa48a2f7 AW |
377 | ;; variables from the needs-slotv set. |
378 | (let lp ((n 0)) | |
379 | (let ((n (bit-position #t candidates n))) | |
380 | (when n | |
7dbf40ea | 381 | (match (lookup-predecessors (idx->label n) dfg) |
36527695 | 382 | ;; At least one kreceive is in the predecessor set, so we |
fa48a2f7 AW |
383 | ;; only need to do the check for nodes with >1 |
384 | ;; predecessor. | |
36527695 | 385 | ((or (_) ((? kreceive-get-kargs) ...)) |
fa48a2f7 | 386 | (for-each (lambda (var) |
7dbf40ea | 387 | (when (dead-after-def? (idx->label n) var dfa) |
fa48a2f7 AW |
388 | (bitvector-set! needs-slotv var #f))) |
389 | (vector-ref defv n))) | |
390 | (_ #f)) | |
391 | (lp (1+ n))))))) | |
392 | ||
0c247a2f AW |
393 | ;; Compute the set of variables whose allocation should be delayed |
394 | ;; until a "hint" is known about where to allocate them. This is | |
395 | ;; the case for some procedure arguments. | |
396 | ;; | |
397 | ;; This algorithm used is a conservative approximation of what | |
398 | ;; really should happen, which would be eager allocation of call | |
399 | ;; frames as soon as it's known that a call will happen. It would | |
400 | ;; be nice to recast this as a proper data-flow problem. | |
401 | (define (compute-needs-hint!) | |
402 | ;; We traverse the graph using reverse-post-order on a forward | |
403 | ;; control-flow graph, but we did the live variable analysis in | |
404 | ;; the opposite direction -- so the continuation numbers don't | |
405 | ;; correspond. This helper adapts them. | |
7dbf40ea AW |
406 | (define (label-idx->dfa-k-idx n) |
407 | (dfa-k-idx dfa (idx->label n))) | |
0c247a2f AW |
408 | |
409 | (define (live-before n) | |
7dbf40ea | 410 | (dfa-k-in dfa (label-idx->dfa-k-idx n))) |
0c247a2f | 411 | (define (live-after n) |
7dbf40ea | 412 | (dfa-k-out dfa (label-idx->dfa-k-idx n))) |
0c247a2f AW |
413 | |
414 | ;; Walk backwards. At a call, compute the set of variables that | |
415 | ;; have allocated slots and are live before but not after. This | |
416 | ;; set contains candidates for needs-hintv. | |
417 | (define (scan-for-call n) | |
418 | (when (<= 0 n) | |
7dbf40ea | 419 | (match (lookup-cont (idx->label n) dfg) |
0c247a2f AW |
420 | (($ $kargs names syms body) |
421 | (match (find-expression body) | |
b3ae2b50 | 422 | ((or ($ $call) ($ $callk)) |
0c247a2f AW |
423 | (let ((args (make-bitvector (bitvector-length needs-slotv) #f))) |
424 | (bit-set*! args (live-before n) #t) | |
425 | (bit-set*! args (live-after n) #f) | |
426 | (bit-set*! args no-slot-needed #f) | |
427 | (if (bit-position #t args 0) | |
428 | (scan-for-hints (1- n) args) | |
429 | (scan-for-call (1- n))))) | |
430 | (_ (scan-for-call (1- n))))) | |
431 | (_ (scan-for-call (1- n)))))) | |
432 | ||
433 | ;; Walk backwards in the current basic block. Stop when the block | |
434 | ;; ends, we reach a call, or when an expression kills a value. | |
435 | (define (scan-for-hints n args) | |
436 | (when (< 0 n) | |
7dbf40ea | 437 | (match (lookup-cont (idx->label n) dfg) |
0c247a2f | 438 | (($ $kargs names syms body) |
7dbf40ea AW |
439 | (match (lookup-predecessors (idx->label (1+ n)) dfg) |
440 | (((? (cut eqv? <> (idx->label n)))) | |
0c247a2f AW |
441 | ;; If we are indeed in the same basic block, then if we |
442 | ;; are finished with the scan, we kill uses of the | |
443 | ;; terminator, but leave its definitions. | |
444 | (match (find-expression body) | |
445 | ((or ($ $void) ($ $const) ($ $prim) ($ $fun) | |
f4092958 AW |
446 | ($ $primcall) ($ $prompt) |
447 | ;; If $values has more than one argument, it may | |
448 | ;; use a temporary, which would invalidate our | |
449 | ;; assumptions that slots not allocated are not | |
450 | ;; used. | |
451 | ($ $values (or () (_)))) | |
0c247a2f AW |
452 | (let ((dead (make-bitvector (bitvector-length args) #f))) |
453 | (bit-set*! dead (live-before n) #t) | |
454 | (bit-set*! dead (live-after n) #f) | |
455 | (bit-set*! dead no-slot-needed #f) | |
456 | (if (bit-position #t dead 0) | |
457 | (finish-hints n (live-before n) args) | |
458 | (scan-for-hints (1- n) args)))) | |
b3ae2b50 | 459 | ((or ($ $call) ($ $callk) ($ $values)) |
0c247a2f AW |
460 | (finish-hints n (live-before n) args)))) |
461 | ;; Otherwise we kill uses of the block entry. | |
462 | (_ (finish-hints n (live-before (1+ n)) args)))) | |
463 | (_ (finish-hints n (live-before (1+ n)) args))))) | |
464 | ||
465 | ;; Add definitions ARGS minus KILL to NEED-HINTS, and go back to | |
466 | ;; looking for calls. | |
467 | (define (finish-hints n kill args) | |
468 | (bit-invert! args) | |
469 | (bit-set*! args kill #t) | |
470 | (bit-invert! args) | |
471 | (bit-set*! needs-hintv args #t) | |
472 | (scan-for-call n)) | |
473 | ||
474 | (define no-slot-needed | |
475 | (make-bitvector (bitvector-length needs-slotv) #f)) | |
476 | ||
477 | (bit-set*! no-slot-needed needs-slotv #t) | |
478 | (bit-invert! no-slot-needed) | |
7dbf40ea | 479 | (scan-for-call (1- label-count))) |
0c247a2f | 480 | |
987c1f5f | 481 | (define (allocate-call label k uses pre-live post-live) |
7dbf40ea | 482 | (match (lookup-cont k dfg) |
987c1f5f AW |
483 | (($ $ktail) |
484 | (let* ((tail-nlocals (length uses)) | |
485 | (tail-slots (iota tail-nlocals)) | |
0c247a2f | 486 | (pre-live (fold allocate! pre-live uses tail-slots)) |
987c1f5f AW |
487 | (moves (parallel-move (map (cut vector-ref slots <>) uses) |
488 | tail-slots | |
489 | (compute-tmp-slot pre-live tail-slots)))) | |
490 | (bump-nlocals! tail-nlocals) | |
491 | (hashq-set! call-allocations label | |
02c624fc | 492 | (make-call-allocation #f moves #f)))) |
36527695 | 493 | (($ $kreceive arity kargs) |
987c1f5f AW |
494 | (let* ((proc-slot (compute-call-proc-slot post-live)) |
495 | (call-slots (map (cut + proc-slot <>) (iota (length uses)))) | |
0c247a2f | 496 | (pre-live (fold allocate! pre-live uses call-slots)) |
987c1f5f AW |
497 | (arg-moves (parallel-move (map (cut vector-ref slots <>) uses) |
498 | call-slots | |
499 | (compute-tmp-slot pre-live | |
500 | call-slots))) | |
7dbf40ea | 501 | (result-vars (vector-ref defv (label->idx kargs))) |
987c1f5f AW |
502 | (value-slots (map (cut + proc-slot 1 <>) |
503 | (iota (length result-vars)))) | |
ad4f6be1 AW |
504 | ;; Shuffle the first result down to the lowest slot, and |
505 | ;; leave any remaining results where they are. This | |
506 | ;; strikes a balance between avoiding shuffling, | |
507 | ;; especially for unused extra values, and avoiding | |
508 | ;; frame size growth due to sparse locals. | |
509 | (result-live (match (cons result-vars value-slots) | |
510 | ((() . ()) post-live) | |
511 | (((var . vars) . (slot . slots)) | |
512 | (fold allocate! | |
513 | (allocate! var #f post-live) | |
514 | vars slots)))) | |
987c1f5f | 515 | (result-slots (map (cut vector-ref slots <>) result-vars)) |
fa48a2f7 AW |
516 | ;; Filter out unused results. |
517 | (value-slots (filter-map (lambda (val result) (and result val)) | |
518 | value-slots result-slots)) | |
519 | (result-slots (filter (lambda (x) x) result-slots)) | |
987c1f5f AW |
520 | (result-moves (parallel-move value-slots |
521 | result-slots | |
522 | (compute-tmp-slot result-live | |
02c624fc AW |
523 | value-slots))) |
524 | (dead-slot-map (logand (1- (ash 1 (- proc-slot 2))) | |
525 | (lognot post-live)))) | |
987c1f5f AW |
526 | (bump-nlocals! (+ proc-slot (length uses))) |
527 | (hashq-set! call-allocations label | |
02c624fc | 528 | (make-call-allocation proc-slot arg-moves dead-slot-map)) |
987c1f5f | 529 | (hashq-set! call-allocations k |
02c624fc | 530 | (make-call-allocation proc-slot result-moves #f)))) |
987c1f5f | 531 | |
6e8ad823 | 532 | (_ |
987c1f5f AW |
533 | (let* ((proc-slot (compute-call-proc-slot post-live)) |
534 | (call-slots (map (cut + proc-slot <>) (iota (length uses)))) | |
0c247a2f | 535 | (pre-live (fold allocate! pre-live uses call-slots)) |
987c1f5f AW |
536 | (arg-moves (parallel-move (map (cut vector-ref slots <>) uses) |
537 | call-slots | |
538 | (compute-tmp-slot pre-live | |
539 | call-slots)))) | |
540 | (bump-nlocals! (+ proc-slot (length uses))) | |
541 | (hashq-set! call-allocations label | |
02c624fc | 542 | (make-call-allocation proc-slot arg-moves #f)))))) |
987c1f5f AW |
543 | |
544 | (define (allocate-values label k uses pre-live post-live) | |
7dbf40ea | 545 | (match (lookup-cont k dfg) |
8a2d420f AW |
546 | (($ $ktail) |
547 | (let* ((src-slots (map (cut vector-ref slots <>) uses)) | |
548 | (tail-nlocals (1+ (length uses))) | |
549 | (dst-slots (cdr (iota tail-nlocals))) | |
550 | (moves (parallel-move src-slots dst-slots | |
551 | (compute-tmp-slot pre-live dst-slots)))) | |
552 | (bump-nlocals! tail-nlocals) | |
553 | (hashq-set! call-allocations label | |
02c624fc | 554 | (make-call-allocation #f moves #f)))) |
8a2d420f AW |
555 | (($ $kargs (_) (_)) |
556 | ;; When there is only one value in play, we allow the dst to be | |
557 | ;; hinted (see scan-for-hints). If the src doesn't have a | |
558 | ;; slot, then the actual slot for the dst would end up being | |
559 | ;; decided by the call that uses it. Because we don't know the | |
560 | ;; slot, we can't really compute the parallel moves in that | |
561 | ;; case, so just bail and rely on the bytecode emitter to | |
562 | ;; handle the one-value case specially. | |
7dbf40ea | 563 | (match (cons uses (vector-ref defv (label->idx k))) |
8a2d420f AW |
564 | (((src) . (dst)) |
565 | (allocate! dst (vector-ref slots src) post-live)))) | |
566 | (($ $kargs) | |
567 | (let* ((src-slots (map (cut vector-ref slots <>) uses)) | |
7dbf40ea | 568 | (dst-vars (vector-ref defv (label->idx k))) |
8a2d420f AW |
569 | (result-live (fold allocate! post-live dst-vars src-slots)) |
570 | (dst-slots (map (cut vector-ref slots <>) dst-vars)) | |
571 | (moves (parallel-move src-slots dst-slots | |
572 | (compute-tmp-slot (logior pre-live result-live) | |
573 | '())))) | |
574 | (hashq-set! call-allocations label | |
02c624fc | 575 | (make-call-allocation #f moves #f)))) |
8a2d420f | 576 | (($ $kif) #f))) |
987c1f5f AW |
577 | |
578 | (define (allocate-prompt label k handler nargs) | |
7dbf40ea | 579 | (match (lookup-cont handler dfg) |
36527695 | 580 | (($ $kreceive arity kargs) |
987c1f5f AW |
581 | (let* ((handler-live (recompute-live-slots handler nargs)) |
582 | (proc-slot (compute-prompt-handler-proc-slot handler-live)) | |
7dbf40ea | 583 | (result-vars (vector-ref defv (label->idx kargs))) |
987c1f5f AW |
584 | (value-slots (map (cut + proc-slot 1 <>) |
585 | (iota (length result-vars)))) | |
586 | (result-live (fold allocate! | |
587 | handler-live result-vars value-slots)) | |
588 | (result-slots (map (cut vector-ref slots <>) result-vars)) | |
fa48a2f7 AW |
589 | ;; Filter out unused results. |
590 | (value-slots (filter-map (lambda (val result) (and result val)) | |
591 | value-slots result-slots)) | |
592 | (result-slots (filter (lambda (x) x) result-slots)) | |
987c1f5f AW |
593 | (moves (parallel-move value-slots |
594 | result-slots | |
595 | (compute-tmp-slot result-live | |
596 | value-slots)))) | |
597 | (bump-nlocals! (+ proc-slot 1 (length result-vars))) | |
598 | (hashq-set! call-allocations handler | |
02c624fc | 599 | (make-call-allocation proc-slot moves #f)))))) |
987c1f5f AW |
600 | |
601 | (define (allocate-defs! n live) | |
602 | (fold (cut allocate! <> #f <>) live (vector-ref defv n))) | |
603 | ||
604 | ;; This traversal will visit definitions before uses, as | |
605 | ;; definitions dominate uses and a block's dominator will appear | |
606 | ;; before it, in reverse post-order. | |
607 | (define (visit-clause n nargs live) | |
608 | (let lp ((n n) (live live)) | |
7dbf40ea | 609 | (define (kill-dead live vars-by-label-idx pred) |
987c1f5f AW |
610 | (fold (lambda (v live) |
611 | (let ((slot (vector-ref slots v))) | |
612 | (if (and slot | |
613 | (> slot nargs) | |
7dbf40ea | 614 | (pred (idx->label n) v dfa)) |
987c1f5f AW |
615 | (kill-dead-slot slot live) |
616 | live))) | |
617 | live | |
7dbf40ea | 618 | (vector-ref vars-by-label-idx n))) |
987c1f5f AW |
619 | (define (kill-dead-defs live) |
620 | (kill-dead live defv dead-after-def?)) | |
621 | (define (kill-dead-uses live) | |
622 | (kill-dead live usev dead-after-use?)) | |
7dbf40ea | 623 | (if (= n label-count) |
987c1f5f | 624 | n |
7dbf40ea | 625 | (let* ((label (idx->label n)) |
987c1f5f AW |
626 | (live (if (control-point? label dfg) |
627 | (recompute-live-slots label nargs) | |
628 | live)) | |
629 | (live (kill-dead-defs (allocate-defs! n live))) | |
630 | (post-live (kill-dead-uses live))) | |
631 | ;; LIVE are the live slots coming into the term. | |
632 | ;; POST-LIVE is the subset that is still live after the | |
633 | ;; term uses its inputs. | |
7dbf40ea | 634 | (match (lookup-cont (idx->label n) dfg) |
987c1f5f AW |
635 | (($ $kclause) n) |
636 | (($ $kargs names syms body) | |
637 | (let ((uses (vector-ref usev n))) | |
638 | (match (find-call body) | |
b3ae2b50 | 639 | (($ $continue k src (or ($ $call) ($ $callk))) |
987c1f5f AW |
640 | (allocate-call label k uses live post-live)) |
641 | (($ $continue k src ($ $primcall)) #t) | |
8a2d420f | 642 | (($ $continue k src ($ $values)) |
987c1f5f | 643 | (allocate-values label k uses live post-live)) |
7ab76a83 | 644 | (($ $continue k src ($ $prompt escape? tag handler)) |
987c1f5f AW |
645 | (allocate-prompt label k handler nargs)) |
646 | (_ #f))) | |
647 | (lp (1+ n) post-live)) | |
36527695 | 648 | ((or ($ $kreceive) ($ $kif) ($ $ktail)) |
987c1f5f AW |
649 | (lp (1+ n) post-live))))))) |
650 | ||
651 | (define (visit-entry) | |
652 | (define (visit-clauses n live) | |
653 | (unless (eqv? live (add-live-slot 0 (empty-live-slots))) | |
654 | (error "Unexpected clause live set")) | |
655 | (set! nlocals 1) | |
7dbf40ea | 656 | (match (lookup-cont (idx->label n) dfg) |
90dce16d | 657 | (($ $kclause arity ($ $cont kbody ($ $kargs names)) alternate) |
7dbf40ea AW |
658 | (unless (eq? (idx->label (1+ n)) kbody) |
659 | (error "Unexpected label order")) | |
0c247a2f AW |
660 | (let* ((nargs (length names)) |
661 | (next (visit-clause (1+ n) | |
662 | nargs | |
663 | (fold allocate! live | |
664 | (vector-ref defv (1+ n)) | |
665 | (cdr (iota (1+ nargs))))))) | |
7dbf40ea AW |
666 | (hashq-set! nlocals-table (idx->label n) nlocals) |
667 | (when (< next label-count) | |
90dce16d AW |
668 | (match alternate |
669 | (($ $cont kalt) | |
7dbf40ea | 670 | (unless (eq? kalt (idx->label next)) |
90dce16d | 671 | (error "Unexpected clause order")))) |
0c247a2f | 672 | (visit-clauses next live)))))) |
7dbf40ea | 673 | (match (lookup-cont (idx->label 0) dfg) |
24b611e8 | 674 | (($ $kentry src meta self) |
987c1f5f AW |
675 | (visit-clauses 1 (allocate-defs! 0 (empty-live-slots)))))) |
676 | ||
677 | (compute-constants!) | |
678 | (compute-uses-and-defs!) | |
fa48a2f7 | 679 | (compute-unused-results!) |
0c247a2f | 680 | (compute-needs-hint!) |
987c1f5f AW |
681 | (visit-entry) |
682 | ||
683 | (make-allocation dfa slots | |
684 | has-constv constant-values | |
685 | call-allocations | |
686 | nlocals-table))) |