Commit | Line | Data |
---|---|---|
6e8ad823 AW |
1 | ;;; Continuation-passing style (CPS) intermediate language (IL) |
2 | ||
a9ec16f9 | 3 | ;; Copyright (C) 2013, 2014, 2015 Free Software Foundation, Inc. |
6e8ad823 AW |
4 | |
5 | ;;;; This library is free software; you can redistribute it and/or | |
6 | ;;;; modify it under the terms of the GNU Lesser General Public | |
7 | ;;;; License as published by the Free Software Foundation; either | |
8 | ;;;; version 3 of the License, or (at your option) any later version. | |
9 | ;;;; | |
10 | ;;;; This library is distributed in the hope that it will be useful, | |
11 | ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | ;;;; Lesser General Public License for more details. | |
14 | ;;;; | |
15 | ;;;; You should have received a copy of the GNU Lesser General Public | |
16 | ;;;; License along with this library; if not, write to the Free Software | |
17 | ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 | ||
19 | ;;; Commentary: | |
20 | ;;; | |
21 | ;;; A module to assign stack slots to variables in a CPS term. | |
22 | ;;; | |
23 | ;;; Code: | |
24 | ||
25 | (define-module (language cps slot-allocation) | |
26 | #:use-module (ice-9 match) | |
27 | #:use-module (srfi srfi-1) | |
28 | #:use-module (srfi srfi-9) | |
29 | #:use-module (srfi srfi-26) | |
30 | #:use-module (language cps) | |
31 | #:use-module (language cps dfg) | |
5ded8498 | 32 | #:use-module (language cps intset) |
6e8ad823 AW |
33 | #:export (allocate-slots |
34 | lookup-slot | |
987c1f5f | 35 | lookup-maybe-slot |
6e8ad823 AW |
36 | lookup-constant-value |
37 | lookup-maybe-constant-value | |
38 | lookup-nlocals | |
39 | lookup-call-proc-slot | |
02c624fc AW |
40 | lookup-parallel-moves |
41 | lookup-dead-slot-map)) | |
6e8ad823 | 42 | |
6e8ad823 | 43 | (define-record-type $allocation |
987c1f5f AW |
44 | (make-allocation dfa slots |
45 | has-constv constant-values | |
46 | call-allocations | |
47 | nlocals) | |
6e8ad823 | 48 | allocation? |
987c1f5f AW |
49 | |
50 | ;; A DFA records all variables bound in a function, and assigns them | |
51 | ;; indices. The slot in which a variable is stored at runtime can be | |
52 | ;; had by indexing into the SLOTS vector with the variable's index. | |
53 | ;; | |
54 | (dfa allocation-dfa) | |
55 | (slots allocation-slots) | |
56 | ||
57 | ;; Not all variables have slots allocated. Variables that are | |
58 | ;; constant and that are only used by primcalls that can accept | |
59 | ;; constants directly are not allocated to slots, and their SLOT value | |
60 | ;; is false. Likewise constants that are only used by calls are not | |
61 | ;; allocated into slots, to avoid needless copying. If a variable is | |
62 | ;; constant, its constant value is set in the CONSTANT-VALUES vector | |
63 | ;; and the corresponding bit in the HAS-CONSTV bitvector is set. | |
64 | ;; | |
65 | (has-constv allocation-has-constv) | |
66 | (constant-values allocation-constant-values) | |
67 | ||
68 | ;; Some continuations have additional associated information. This | |
69 | ;; addition information is a /call allocation/. Call allocations | |
70 | ;; record the way that functions are passed values, and how their | |
71 | ;; return values are rebound to local variables. | |
72 | ;; | |
02c624fc AW |
73 | ;; A call allocation contains three pieces of information: the call's |
74 | ;; /proc slot/, a set of /parallel moves/, and a /dead slot map/. The | |
75 | ;; proc slot indicates the slot of a procedure in a procedure call, or | |
76 | ;; where the procedure would be in a multiple-value return. The | |
77 | ;; parallel moves shuffle locals into position for a call, or shuffle | |
78 | ;; returned values back into place. Though they use the same slot, | |
79 | ;; moves for a call are called "call moves", and moves to handle a | |
80 | ;; return are "return moves". The dead slot map indicates, for a | |
81 | ;; call, what slots should be ignored by GC when marking the frame. | |
987c1f5f | 82 | ;; |
36527695 | 83 | ;; $kreceive continuations record a proc slot and a set of return moves |
987c1f5f AW |
84 | ;; to adapt multiple values from the stack to local variables. |
85 | ;; | |
86 | ;; Tail calls record arg moves, but no proc slot. | |
87 | ;; | |
02c624fc AW |
88 | ;; Non-tail calls record arg moves, a call slot, and a dead slot map. |
89 | ;; Multiple-valued returns will have an associated $kreceive | |
90 | ;; continuation, which records the same proc slot, but has return | |
91 | ;; moves and no dead slot map. | |
987c1f5f | 92 | ;; |
36527695 | 93 | ;; $prompt handlers are $kreceive continuations like any other. |
987c1f5f AW |
94 | ;; |
95 | ;; $values expressions with more than 1 value record moves but have no | |
02c624fc | 96 | ;; proc slot or dead slot map. |
8d59d55e | 97 | ;; |
987c1f5f AW |
98 | ;; A set of moves is expressed as an ordered list of (SRC . DST) |
99 | ;; moves, where SRC and DST are slots. This may involve a temporary | |
02c624fc | 100 | ;; variable. A dead slot map is a bitfield, as an integer. |
6e8ad823 | 101 | ;; |
987c1f5f AW |
102 | (call-allocations allocation-call-allocations) |
103 | ||
104 | ;; The number of locals for a $kclause. | |
105 | ;; | |
106 | (nlocals allocation-nlocals)) | |
107 | ||
108 | (define-record-type $call-allocation | |
02c624fc | 109 | (make-call-allocation proc-slot moves dead-slot-map) |
987c1f5f AW |
110 | call-allocation? |
111 | (proc-slot call-allocation-proc-slot) | |
02c624fc AW |
112 | (moves call-allocation-moves) |
113 | (dead-slot-map call-allocation-dead-slot-map)) | |
6e8ad823 AW |
114 | |
115 | (define (find-first-zero n) | |
116 | ;; Naive implementation. | |
117 | (let lp ((slot 0)) | |
118 | (if (logbit? slot n) | |
119 | (lp (1+ slot)) | |
120 | slot))) | |
121 | ||
987c1f5f AW |
122 | (define (find-first-trailing-zero n) |
123 | (let lp ((slot (let lp ((count 2)) | |
124 | (if (< n (ash 1 (1- count))) | |
125 | count | |
126 | ;; Grow upper bound slower than factor 2 to avoid | |
127 | ;; needless bignum allocation on 32-bit systems | |
128 | ;; when there are more than 16 locals. | |
129 | (lp (+ count (ash count -1))))))) | |
6e8ad823 AW |
130 | (if (or (zero? slot) (logbit? (1- slot) n)) |
131 | slot | |
132 | (lp (1- slot))))) | |
133 | ||
987c1f5f AW |
134 | (define (lookup-maybe-slot sym allocation) |
135 | (match allocation | |
136 | (($ $allocation dfa slots) | |
137 | (vector-ref slots (dfa-var-idx dfa sym))))) | |
6e8ad823 AW |
138 | |
139 | (define (lookup-slot sym allocation) | |
987c1f5f AW |
140 | (or (lookup-maybe-slot sym allocation) |
141 | (error "Variable not allocated to a slot" sym))) | |
6e8ad823 AW |
142 | |
143 | (define (lookup-constant-value sym allocation) | |
987c1f5f AW |
144 | (match allocation |
145 | (($ $allocation dfa slots has-constv constant-values) | |
146 | (let ((idx (dfa-var-idx dfa sym))) | |
147 | (if (bitvector-ref has-constv idx) | |
148 | (vector-ref constant-values idx) | |
149 | (error "Variable does not have constant value" sym)))))) | |
6e8ad823 AW |
150 | |
151 | (define (lookup-maybe-constant-value sym allocation) | |
987c1f5f AW |
152 | (match allocation |
153 | (($ $allocation dfa slots has-constv constant-values) | |
154 | (let ((idx (dfa-var-idx dfa sym))) | |
155 | (values (bitvector-ref has-constv idx) | |
156 | (vector-ref constant-values idx)))))) | |
6e8ad823 | 157 | |
987c1f5f AW |
158 | (define (lookup-call-allocation k allocation) |
159 | (or (hashq-ref (allocation-call-allocations allocation) k) | |
160 | (error "Continuation not a call" k))) | |
6e8ad823 | 161 | |
987c1f5f AW |
162 | (define (lookup-call-proc-slot k allocation) |
163 | (or (call-allocation-proc-slot (lookup-call-allocation k allocation)) | |
164 | (error "Call has no proc slot" k))) | |
6e8ad823 AW |
165 | |
166 | (define (lookup-parallel-moves k allocation) | |
987c1f5f AW |
167 | (or (call-allocation-moves (lookup-call-allocation k allocation)) |
168 | (error "Call has no use parallel moves slot" k))) | |
169 | ||
02c624fc AW |
170 | (define (lookup-dead-slot-map k allocation) |
171 | (or (call-allocation-dead-slot-map (lookup-call-allocation k allocation)) | |
172 | (error "Call has no dead slot map" k))) | |
173 | ||
987c1f5f AW |
174 | (define (lookup-nlocals k allocation) |
175 | (or (hashq-ref (allocation-nlocals allocation) k) | |
176 | (error "Not a clause continuation" k))) | |
6e8ad823 AW |
177 | |
178 | (define (solve-parallel-move src dst tmp) | |
179 | "Solve the parallel move problem between src and dst slot lists, which | |
180 | are comparable with eqv?. A tmp slot may be used." | |
181 | ||
182 | ;; This algorithm is taken from: "Tilting at windmills with Coq: | |
183 | ;; formal verification of a compilation algorithm for parallel moves" | |
184 | ;; by Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy | |
185 | ;; <http://gallium.inria.fr/~xleroy/publi/parallel-move.pdf> | |
186 | ||
187 | (define (split-move moves reg) | |
188 | (let loop ((revhead '()) (tail moves)) | |
189 | (match tail | |
190 | (((and s+d (s . d)) . rest) | |
191 | (if (eqv? s reg) | |
192 | (cons d (append-reverse revhead rest)) | |
193 | (loop (cons s+d revhead) rest))) | |
194 | (_ #f)))) | |
195 | ||
196 | (define (replace-last-source reg moves) | |
197 | (match moves | |
198 | ((moves ... (s . d)) | |
199 | (append moves (list (cons reg d)))))) | |
200 | ||
201 | (let loop ((to-move (map cons src dst)) | |
202 | (being-moved '()) | |
203 | (moved '()) | |
204 | (last-source #f)) | |
205 | ;; 'last-source' should always be equivalent to: | |
206 | ;; (and (pair? being-moved) (car (last being-moved))) | |
207 | (match being-moved | |
208 | (() (match to-move | |
209 | (() (reverse moved)) | |
210 | (((and s+d (s . d)) . t1) | |
211 | (if (or (eqv? s d) ; idempotent | |
212 | (not s)) ; src is a constant and can be loaded directly | |
213 | (loop t1 '() moved #f) | |
214 | (loop t1 (list s+d) moved s))))) | |
215 | (((and s+d (s . d)) . b) | |
216 | (match (split-move to-move d) | |
217 | ((r . t1) (loop t1 (acons d r being-moved) moved last-source)) | |
218 | (#f (match b | |
219 | (() (loop to-move '() (cons s+d moved) #f)) | |
220 | (_ (if (eqv? d last-source) | |
221 | (loop to-move | |
222 | (replace-last-source tmp b) | |
223 | (cons s+d (acons d tmp moved)) | |
224 | tmp) | |
225 | (loop to-move b (cons s+d moved) last-source)))))))))) | |
226 | ||
863034a8 | 227 | (define (dead-after-def? k-idx v-idx dfa) |
5ded8498 | 228 | (not (intset-ref (dfa-k-in dfa k-idx) v-idx))) |
e636f424 | 229 | |
863034a8 | 230 | (define (dead-after-use? k-idx v-idx dfa) |
5ded8498 | 231 | (not (intset-ref (dfa-k-out dfa k-idx) v-idx))) |
e636f424 | 232 | |
d258fccc | 233 | (define (allocate-slots fun dfg) |
987c1f5f | 234 | (let* ((dfa (compute-live-variables fun dfg)) |
7dbf40ea AW |
235 | (min-label (dfg-min-label dfg)) |
236 | (label-count (dfg-label-count dfg)) | |
237 | (usev (make-vector label-count '())) | |
238 | (defv (make-vector label-count '())) | |
987c1f5f AW |
239 | (slots (make-vector (dfa-var-count dfa) #f)) |
240 | (constant-values (make-vector (dfa-var-count dfa) #f)) | |
241 | (has-constv (make-bitvector (dfa-var-count dfa) #f)) | |
242 | (has-slotv (make-bitvector (dfa-var-count dfa) #t)) | |
243 | (needs-slotv (make-bitvector (dfa-var-count dfa) #t)) | |
0c247a2f | 244 | (needs-hintv (make-bitvector (dfa-var-count dfa) #f)) |
987c1f5f AW |
245 | (call-allocations (make-hash-table)) |
246 | (nlocals 0) ; Mutable. It pains me. | |
247 | (nlocals-table (make-hash-table))) | |
248 | ||
7dbf40ea AW |
249 | (define (label->idx label) (- label min-label)) |
250 | (define (idx->label idx) (+ idx min-label)) | |
251 | ||
987c1f5f AW |
252 | (define (bump-nlocals! nlocals*) |
253 | (when (< nlocals nlocals*) | |
254 | (set! nlocals nlocals*))) | |
255 | ||
256 | (define (empty-live-slots) | |
257 | #b0) | |
258 | ||
259 | (define (add-live-slot slot live-slots) | |
260 | (logior live-slots (ash 1 slot))) | |
261 | ||
262 | (define (kill-dead-slot slot live-slots) | |
263 | (logand live-slots (lognot (ash 1 slot)))) | |
264 | ||
265 | (define (compute-slot live-slots hint) | |
d4b3a36d AW |
266 | ;; Slots 253-255 are reserved for shuffling; see comments in |
267 | ;; assembler.scm. | |
268 | (if (and hint (not (logbit? hint live-slots)) | |
269 | (or (< hint 253) (> hint 255))) | |
987c1f5f | 270 | hint |
d4b3a36d AW |
271 | (let ((slot (find-first-zero live-slots))) |
272 | (if (or (< slot 253) (> slot 255)) | |
273 | slot | |
274 | (+ 256 (find-first-zero (ash live-slots -256))))))) | |
987c1f5f AW |
275 | |
276 | (define (compute-call-proc-slot live-slots) | |
f8085163 | 277 | (+ 2 (find-first-trailing-zero live-slots))) |
987c1f5f AW |
278 | |
279 | (define (compute-prompt-handler-proc-slot live-slots) | |
f5765cc2 AW |
280 | (if (zero? live-slots) |
281 | 0 | |
282 | (1- (find-first-trailing-zero live-slots)))) | |
987c1f5f AW |
283 | |
284 | (define (recompute-live-slots k nargs) | |
863034a8 | 285 | (let ((in (dfa-k-in dfa (label->idx k)))) |
f5765cc2 | 286 | (let lp ((v 0) (live-slots 0)) |
5ded8498 | 287 | (let ((v (intset-next in v))) |
987c1f5f AW |
288 | (if v |
289 | (let ((slot (vector-ref slots v))) | |
290 | (lp (1+ v) | |
291 | (if slot | |
292 | (add-live-slot slot live-slots) | |
293 | live-slots))) | |
294 | live-slots))))) | |
295 | ||
296 | (define* (allocate! var-idx hint live) | |
297 | (cond | |
298 | ((not (bitvector-ref needs-slotv var-idx)) live) | |
299 | ((vector-ref slots var-idx) => (cut add-live-slot <> live)) | |
c79f873e | 300 | ((and (not hint) (bitvector-ref needs-hintv var-idx)) live) |
987c1f5f AW |
301 | (else |
302 | (let ((slot (compute-slot live hint))) | |
303 | (bump-nlocals! (1+ slot)) | |
304 | (vector-set! slots var-idx slot) | |
305 | (add-live-slot slot live))))) | |
306 | ||
307 | ;; Although some parallel moves may proceed without a temporary | |
308 | ;; slot, in general one is needed. That temporary slot must not be | |
309 | ;; part of the source or destination sets, and that slot should not | |
310 | ;; correspond to a live variable. Usually the source and | |
311 | ;; destination sets are a subset of the union of the live sets | |
312 | ;; before and after the move. However for stack slots that don't | |
313 | ;; have names -- those slots that correspond to function arguments | |
314 | ;; or to function return values -- it could be that they are out of | |
315 | ;; the computed live set. In that case they need to be adjoined to | |
316 | ;; the live set, used when choosing a temporary slot. | |
d4b3a36d AW |
317 | ;; |
318 | ;; Note that although we reserve slots 253-255 for shuffling | |
319 | ;; operands that address less than the full 24-bit range of locals, | |
320 | ;; that reservation doesn't apply here, because this temporary | |
321 | ;; itself is used while doing parallel assignment via "mov", and | |
322 | ;; "mov" does not need shuffling. | |
987c1f5f AW |
323 | (define (compute-tmp-slot live stack-slots) |
324 | (find-first-zero (fold add-live-slot live stack-slots))) | |
325 | ||
326 | (define (parallel-move src-slots dst-slots tmp-slot) | |
327 | (let ((moves (solve-parallel-move src-slots dst-slots tmp-slot))) | |
328 | (when (assv tmp-slot moves) | |
329 | (bump-nlocals! (1+ tmp-slot))) | |
330 | moves)) | |
331 | ||
332 | ;; Find variables that are actually constant, and determine which | |
333 | ;; of those can avoid slot allocation. | |
334 | (define (compute-constants!) | |
335 | (let lp ((n 0)) | |
336 | (when (< n (vector-length constant-values)) | |
337 | (let ((sym (dfa-var-sym dfa n))) | |
338 | (call-with-values (lambda () (find-constant-value sym dfg)) | |
339 | (lambda (has-const? const) | |
340 | (when has-const? | |
341 | (bitvector-set! has-constv n has-const?) | |
342 | (vector-set! constant-values n const) | |
343 | (when (not (constant-needs-allocation? sym const dfg)) | |
344 | (bitvector-set! needs-slotv n #f))) | |
345 | (lp (1+ n)))))))) | |
346 | ||
347 | ;; Record uses and defs, as lists of variable indexes, indexed by | |
7dbf40ea | 348 | ;; label index. |
987c1f5f AW |
349 | (define (compute-uses-and-defs!) |
350 | (let lp ((n 0)) | |
351 | (when (< n (vector-length usev)) | |
7dbf40ea | 352 | (match (lookup-cont (idx->label n) dfg) |
8320f504 | 353 | (($ $kfun src meta self) |
987c1f5f AW |
354 | (vector-set! defv n (list (dfa-var-idx dfa self)))) |
355 | (($ $kargs names syms body) | |
356 | (vector-set! defv n (map (cut dfa-var-idx dfa <>) syms)) | |
357 | (vector-set! usev n | |
358 | (map (cut dfa-var-idx dfa <>) | |
359 | (match (find-expression body) | |
360 | (($ $call proc args) | |
361 | (cons proc args)) | |
b3ae2b50 AW |
362 | (($ $callk k proc args) |
363 | (cons proc args)) | |
987c1f5f AW |
364 | (($ $primcall name args) |
365 | args) | |
92805e21 AW |
366 | (($ $branch kt ($ $primcall name args)) |
367 | args) | |
368 | (($ $branch kt ($ $values args)) | |
369 | args) | |
987c1f5f AW |
370 | (($ $values args) |
371 | args) | |
7ab76a83 | 372 | (($ $prompt escape? tag handler) |
987c1f5f AW |
373 | (list tag)) |
374 | (_ '()))))) | |
375 | (_ #f)) | |
376 | (lp (1+ n))))) | |
377 | ||
fa48a2f7 AW |
378 | ;; Results of function calls that are not used don't need to be |
379 | ;; allocated to slots. | |
380 | (define (compute-unused-results!) | |
7dbf40ea AW |
381 | (define (kreceive-get-kargs kreceive) |
382 | (match (lookup-cont kreceive dfg) | |
383 | (($ $kreceive arity kargs) kargs) | |
fa48a2f7 | 384 | (_ #f))) |
7dbf40ea | 385 | (let ((candidates (make-bitvector label-count #f))) |
36527695 | 386 | ;; Find all $kargs that are the successors of $kreceive nodes. |
fa48a2f7 | 387 | (let lp ((n 0)) |
7dbf40ea AW |
388 | (when (< n label-count) |
389 | (and=> (kreceive-get-kargs (idx->label n)) | |
fa48a2f7 | 390 | (lambda (kargs) |
7dbf40ea | 391 | (bitvector-set! candidates (label->idx kargs) #t))) |
fa48a2f7 | 392 | (lp (1+ n)))) |
36527695 | 393 | ;; For $kargs that only have $kreceive predecessors, remove unused |
fa48a2f7 AW |
394 | ;; variables from the needs-slotv set. |
395 | (let lp ((n 0)) | |
396 | (let ((n (bit-position #t candidates n))) | |
397 | (when n | |
7dbf40ea | 398 | (match (lookup-predecessors (idx->label n) dfg) |
36527695 | 399 | ;; At least one kreceive is in the predecessor set, so we |
fa48a2f7 AW |
400 | ;; only need to do the check for nodes with >1 |
401 | ;; predecessor. | |
36527695 | 402 | ((or (_) ((? kreceive-get-kargs) ...)) |
fa48a2f7 | 403 | (for-each (lambda (var) |
863034a8 | 404 | (when (dead-after-def? n var dfa) |
fa48a2f7 AW |
405 | (bitvector-set! needs-slotv var #f))) |
406 | (vector-ref defv n))) | |
407 | (_ #f)) | |
408 | (lp (1+ n))))))) | |
409 | ||
0c247a2f AW |
410 | ;; Compute the set of variables whose allocation should be delayed |
411 | ;; until a "hint" is known about where to allocate them. This is | |
412 | ;; the case for some procedure arguments. | |
413 | ;; | |
414 | ;; This algorithm used is a conservative approximation of what | |
415 | ;; really should happen, which would be eager allocation of call | |
416 | ;; frames as soon as it's known that a call will happen. It would | |
417 | ;; be nice to recast this as a proper data-flow problem. | |
418 | (define (compute-needs-hint!) | |
0c247a2f | 419 | (define (live-before n) |
863034a8 | 420 | (dfa-k-in dfa n)) |
0c247a2f | 421 | (define (live-after n) |
863034a8 | 422 | (dfa-k-out dfa n)) |
5ded8498 AW |
423 | (define needs-slot |
424 | (bitvector->intset needs-slotv)) | |
0c247a2f AW |
425 | |
426 | ;; Walk backwards. At a call, compute the set of variables that | |
427 | ;; have allocated slots and are live before but not after. This | |
428 | ;; set contains candidates for needs-hintv. | |
429 | (define (scan-for-call n) | |
430 | (when (<= 0 n) | |
7dbf40ea | 431 | (match (lookup-cont (idx->label n) dfg) |
0c247a2f AW |
432 | (($ $kargs names syms body) |
433 | (match (find-expression body) | |
b3ae2b50 | 434 | ((or ($ $call) ($ $callk)) |
5ded8498 AW |
435 | (let* ((args (intset-subtract (live-before n) (live-after n))) |
436 | (args-needing-slots (intset-intersect args needs-slot))) | |
437 | (if (intset-next args-needing-slots #f) | |
438 | (scan-for-hints (1- n) args-needing-slots) | |
0c247a2f AW |
439 | (scan-for-call (1- n))))) |
440 | (_ (scan-for-call (1- n))))) | |
441 | (_ (scan-for-call (1- n)))))) | |
442 | ||
443 | ;; Walk backwards in the current basic block. Stop when the block | |
444 | ;; ends, we reach a call, or when an expression kills a value. | |
445 | (define (scan-for-hints n args) | |
446 | (when (< 0 n) | |
7dbf40ea | 447 | (match (lookup-cont (idx->label n) dfg) |
0c247a2f | 448 | (($ $kargs names syms body) |
7dbf40ea AW |
449 | (match (lookup-predecessors (idx->label (1+ n)) dfg) |
450 | (((? (cut eqv? <> (idx->label n)))) | |
0c247a2f AW |
451 | ;; If we are indeed in the same basic block, then if we |
452 | ;; are finished with the scan, we kill uses of the | |
453 | ;; terminator, but leave its definitions. | |
454 | (match (find-expression body) | |
a9ec16f9 | 455 | ((or ($ $const) ($ $prim) ($ $closure) |
f4092958 AW |
456 | ($ $primcall) ($ $prompt) |
457 | ;; If $values has more than one argument, it may | |
458 | ;; use a temporary, which would invalidate our | |
459 | ;; assumptions that slots not allocated are not | |
460 | ;; used. | |
461 | ($ $values (or () (_)))) | |
5ded8498 AW |
462 | (let ((killed (intset-subtract (live-before n) (live-after n)))) |
463 | (if (intset-next (intset-intersect killed needs-slot) #f) | |
0c247a2f AW |
464 | (finish-hints n (live-before n) args) |
465 | (scan-for-hints (1- n) args)))) | |
92805e21 | 466 | ((or ($ $call) ($ $callk) ($ $values) ($ $branch)) |
0c247a2f AW |
467 | (finish-hints n (live-before n) args)))) |
468 | ;; Otherwise we kill uses of the block entry. | |
469 | (_ (finish-hints n (live-before (1+ n)) args)))) | |
470 | (_ (finish-hints n (live-before (1+ n)) args))))) | |
471 | ||
472 | ;; Add definitions ARGS minus KILL to NEED-HINTS, and go back to | |
473 | ;; looking for calls. | |
474 | (define (finish-hints n kill args) | |
5ded8498 AW |
475 | (let ((new-hints (intset-subtract args kill))) |
476 | (let lp ((n 0)) | |
477 | (let ((n (intset-next new-hints n))) | |
478 | (when n | |
479 | (bitvector-set! needs-hintv n #t) | |
480 | (lp (1+ n)))))) | |
0c247a2f AW |
481 | (scan-for-call n)) |
482 | ||
7dbf40ea | 483 | (scan-for-call (1- label-count))) |
0c247a2f | 484 | |
987c1f5f | 485 | (define (allocate-call label k uses pre-live post-live) |
7dbf40ea | 486 | (match (lookup-cont k dfg) |
987c1f5f AW |
487 | (($ $ktail) |
488 | (let* ((tail-nlocals (length uses)) | |
489 | (tail-slots (iota tail-nlocals)) | |
0c247a2f | 490 | (pre-live (fold allocate! pre-live uses tail-slots)) |
987c1f5f AW |
491 | (moves (parallel-move (map (cut vector-ref slots <>) uses) |
492 | tail-slots | |
493 | (compute-tmp-slot pre-live tail-slots)))) | |
494 | (bump-nlocals! tail-nlocals) | |
495 | (hashq-set! call-allocations label | |
02c624fc | 496 | (make-call-allocation #f moves #f)))) |
36527695 | 497 | (($ $kreceive arity kargs) |
987c1f5f AW |
498 | (let* ((proc-slot (compute-call-proc-slot post-live)) |
499 | (call-slots (map (cut + proc-slot <>) (iota (length uses)))) | |
0c247a2f | 500 | (pre-live (fold allocate! pre-live uses call-slots)) |
987c1f5f AW |
501 | (arg-moves (parallel-move (map (cut vector-ref slots <>) uses) |
502 | call-slots | |
503 | (compute-tmp-slot pre-live | |
504 | call-slots))) | |
7dbf40ea | 505 | (result-vars (vector-ref defv (label->idx kargs))) |
987c1f5f AW |
506 | (value-slots (map (cut + proc-slot 1 <>) |
507 | (iota (length result-vars)))) | |
ad4f6be1 AW |
508 | ;; Shuffle the first result down to the lowest slot, and |
509 | ;; leave any remaining results where they are. This | |
510 | ;; strikes a balance between avoiding shuffling, | |
511 | ;; especially for unused extra values, and avoiding | |
512 | ;; frame size growth due to sparse locals. | |
513 | (result-live (match (cons result-vars value-slots) | |
514 | ((() . ()) post-live) | |
515 | (((var . vars) . (slot . slots)) | |
516 | (fold allocate! | |
517 | (allocate! var #f post-live) | |
518 | vars slots)))) | |
987c1f5f | 519 | (result-slots (map (cut vector-ref slots <>) result-vars)) |
fa48a2f7 AW |
520 | ;; Filter out unused results. |
521 | (value-slots (filter-map (lambda (val result) (and result val)) | |
522 | value-slots result-slots)) | |
523 | (result-slots (filter (lambda (x) x) result-slots)) | |
987c1f5f AW |
524 | (result-moves (parallel-move value-slots |
525 | result-slots | |
526 | (compute-tmp-slot result-live | |
02c624fc AW |
527 | value-slots))) |
528 | (dead-slot-map (logand (1- (ash 1 (- proc-slot 2))) | |
529 | (lognot post-live)))) | |
987c1f5f AW |
530 | (bump-nlocals! (+ proc-slot (length uses))) |
531 | (hashq-set! call-allocations label | |
02c624fc | 532 | (make-call-allocation proc-slot arg-moves dead-slot-map)) |
987c1f5f | 533 | (hashq-set! call-allocations k |
02c624fc | 534 | (make-call-allocation proc-slot result-moves #f)))) |
987c1f5f | 535 | |
6e8ad823 | 536 | (_ |
987c1f5f AW |
537 | (let* ((proc-slot (compute-call-proc-slot post-live)) |
538 | (call-slots (map (cut + proc-slot <>) (iota (length uses)))) | |
0c247a2f | 539 | (pre-live (fold allocate! pre-live uses call-slots)) |
987c1f5f AW |
540 | (arg-moves (parallel-move (map (cut vector-ref slots <>) uses) |
541 | call-slots | |
542 | (compute-tmp-slot pre-live | |
543 | call-slots)))) | |
544 | (bump-nlocals! (+ proc-slot (length uses))) | |
545 | (hashq-set! call-allocations label | |
02c624fc | 546 | (make-call-allocation proc-slot arg-moves #f)))))) |
987c1f5f AW |
547 | |
548 | (define (allocate-values label k uses pre-live post-live) | |
7dbf40ea | 549 | (match (lookup-cont k dfg) |
8a2d420f AW |
550 | (($ $ktail) |
551 | (let* ((src-slots (map (cut vector-ref slots <>) uses)) | |
552 | (tail-nlocals (1+ (length uses))) | |
553 | (dst-slots (cdr (iota tail-nlocals))) | |
554 | (moves (parallel-move src-slots dst-slots | |
555 | (compute-tmp-slot pre-live dst-slots)))) | |
556 | (bump-nlocals! tail-nlocals) | |
557 | (hashq-set! call-allocations label | |
02c624fc | 558 | (make-call-allocation #f moves #f)))) |
8a2d420f AW |
559 | (($ $kargs (_) (_)) |
560 | ;; When there is only one value in play, we allow the dst to be | |
561 | ;; hinted (see scan-for-hints). If the src doesn't have a | |
562 | ;; slot, then the actual slot for the dst would end up being | |
563 | ;; decided by the call that uses it. Because we don't know the | |
564 | ;; slot, we can't really compute the parallel moves in that | |
565 | ;; case, so just bail and rely on the bytecode emitter to | |
566 | ;; handle the one-value case specially. | |
7dbf40ea | 567 | (match (cons uses (vector-ref defv (label->idx k))) |
8a2d420f AW |
568 | (((src) . (dst)) |
569 | (allocate! dst (vector-ref slots src) post-live)))) | |
570 | (($ $kargs) | |
571 | (let* ((src-slots (map (cut vector-ref slots <>) uses)) | |
7dbf40ea | 572 | (dst-vars (vector-ref defv (label->idx k))) |
8a2d420f AW |
573 | (result-live (fold allocate! post-live dst-vars src-slots)) |
574 | (dst-slots (map (cut vector-ref slots <>) dst-vars)) | |
575 | (moves (parallel-move src-slots dst-slots | |
576 | (compute-tmp-slot (logior pre-live result-live) | |
577 | '())))) | |
578 | (hashq-set! call-allocations label | |
59258f7c | 579 | (make-call-allocation #f moves #f)))))) |
987c1f5f AW |
580 | |
581 | (define (allocate-prompt label k handler nargs) | |
7dbf40ea | 582 | (match (lookup-cont handler dfg) |
36527695 | 583 | (($ $kreceive arity kargs) |
987c1f5f AW |
584 | (let* ((handler-live (recompute-live-slots handler nargs)) |
585 | (proc-slot (compute-prompt-handler-proc-slot handler-live)) | |
7dbf40ea | 586 | (result-vars (vector-ref defv (label->idx kargs))) |
987c1f5f AW |
587 | (value-slots (map (cut + proc-slot 1 <>) |
588 | (iota (length result-vars)))) | |
589 | (result-live (fold allocate! | |
590 | handler-live result-vars value-slots)) | |
591 | (result-slots (map (cut vector-ref slots <>) result-vars)) | |
fa48a2f7 AW |
592 | ;; Filter out unused results. |
593 | (value-slots (filter-map (lambda (val result) (and result val)) | |
594 | value-slots result-slots)) | |
595 | (result-slots (filter (lambda (x) x) result-slots)) | |
987c1f5f AW |
596 | (moves (parallel-move value-slots |
597 | result-slots | |
598 | (compute-tmp-slot result-live | |
599 | value-slots)))) | |
600 | (bump-nlocals! (+ proc-slot 1 (length result-vars))) | |
601 | (hashq-set! call-allocations handler | |
02c624fc | 602 | (make-call-allocation proc-slot moves #f)))))) |
987c1f5f AW |
603 | |
604 | (define (allocate-defs! n live) | |
605 | (fold (cut allocate! <> #f <>) live (vector-ref defv n))) | |
606 | ||
607 | ;; This traversal will visit definitions before uses, as | |
608 | ;; definitions dominate uses and a block's dominator will appear | |
609 | ;; before it, in reverse post-order. | |
610 | (define (visit-clause n nargs live) | |
f5765cc2 | 611 | (let lp ((n n) (live (recompute-live-slots (idx->label n) nargs))) |
7dbf40ea | 612 | (define (kill-dead live vars-by-label-idx pred) |
987c1f5f AW |
613 | (fold (lambda (v live) |
614 | (let ((slot (vector-ref slots v))) | |
f5765cc2 | 615 | (if (and slot (pred n v dfa)) |
987c1f5f AW |
616 | (kill-dead-slot slot live) |
617 | live))) | |
618 | live | |
7dbf40ea | 619 | (vector-ref vars-by-label-idx n))) |
987c1f5f AW |
620 | (define (kill-dead-defs live) |
621 | (kill-dead live defv dead-after-def?)) | |
622 | (define (kill-dead-uses live) | |
623 | (kill-dead live usev dead-after-use?)) | |
7dbf40ea | 624 | (if (= n label-count) |
987c1f5f | 625 | n |
7dbf40ea | 626 | (let* ((label (idx->label n)) |
987c1f5f AW |
627 | (live (if (control-point? label dfg) |
628 | (recompute-live-slots label nargs) | |
629 | live)) | |
630 | (live (kill-dead-defs (allocate-defs! n live))) | |
631 | (post-live (kill-dead-uses live))) | |
632 | ;; LIVE are the live slots coming into the term. | |
633 | ;; POST-LIVE is the subset that is still live after the | |
634 | ;; term uses its inputs. | |
7dbf40ea | 635 | (match (lookup-cont (idx->label n) dfg) |
987c1f5f AW |
636 | (($ $kclause) n) |
637 | (($ $kargs names syms body) | |
ebca3afe AW |
638 | (define (compute-k-live k) |
639 | (match (lookup-predecessors k dfg) | |
640 | ((_) post-live) | |
641 | (_ (recompute-live-slots k nargs)))) | |
987c1f5f AW |
642 | (let ((uses (vector-ref usev n))) |
643 | (match (find-call body) | |
b3ae2b50 | 644 | (($ $continue k src (or ($ $call) ($ $callk))) |
ebca3afe | 645 | (allocate-call label k uses live (compute-k-live k))) |
987c1f5f | 646 | (($ $continue k src ($ $primcall)) #t) |
8a2d420f | 647 | (($ $continue k src ($ $values)) |
ebca3afe | 648 | (allocate-values label k uses live (compute-k-live k))) |
7ab76a83 | 649 | (($ $continue k src ($ $prompt escape? tag handler)) |
987c1f5f AW |
650 | (allocate-prompt label k handler nargs)) |
651 | (_ #f))) | |
652 | (lp (1+ n) post-live)) | |
59258f7c | 653 | ((or ($ $kreceive) ($ $ktail)) |
987c1f5f AW |
654 | (lp (1+ n) post-live))))))) |
655 | ||
656 | (define (visit-entry) | |
657 | (define (visit-clauses n live) | |
658 | (unless (eqv? live (add-live-slot 0 (empty-live-slots))) | |
659 | (error "Unexpected clause live set")) | |
660 | (set! nlocals 1) | |
7dbf40ea | 661 | (match (lookup-cont (idx->label n) dfg) |
90dce16d | 662 | (($ $kclause arity ($ $cont kbody ($ $kargs names)) alternate) |
7dbf40ea AW |
663 | (unless (eq? (idx->label (1+ n)) kbody) |
664 | (error "Unexpected label order")) | |
0c247a2f AW |
665 | (let* ((nargs (length names)) |
666 | (next (visit-clause (1+ n) | |
667 | nargs | |
668 | (fold allocate! live | |
669 | (vector-ref defv (1+ n)) | |
670 | (cdr (iota (1+ nargs))))))) | |
7dbf40ea AW |
671 | (hashq-set! nlocals-table (idx->label n) nlocals) |
672 | (when (< next label-count) | |
90dce16d AW |
673 | (match alternate |
674 | (($ $cont kalt) | |
7dbf40ea | 675 | (unless (eq? kalt (idx->label next)) |
90dce16d | 676 | (error "Unexpected clause order")))) |
0c247a2f | 677 | (visit-clauses next live)))))) |
7dbf40ea | 678 | (match (lookup-cont (idx->label 0) dfg) |
8320f504 | 679 | (($ $kfun src meta self) |
987c1f5f AW |
680 | (visit-clauses 1 (allocate-defs! 0 (empty-live-slots)))))) |
681 | ||
682 | (compute-constants!) | |
683 | (compute-uses-and-defs!) | |
fa48a2f7 | 684 | (compute-unused-results!) |
0c247a2f | 685 | (compute-needs-hint!) |
987c1f5f AW |
686 | (visit-entry) |
687 | ||
688 | (make-allocation dfa slots | |
689 | has-constv constant-values | |
690 | call-allocations | |
691 | nlocals-table))) |