1 ;;; Continuation-passing style (CPS) intermediate language (IL)
3 ;; Copyright (C) 2013 Free Software Foundation, Inc.
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;;; A module to assign stack slots to variables in a CPS term.
25 (define-module (language cps slot-allocation)
26 #:use-module (ice-9 match)
27 #:use-module (srfi srfi-1)
28 #:use-module (srfi srfi-9)
29 #:use-module (srfi srfi-26)
30 #:use-module (language cps)
31 #:use-module (language cps dfg)
32 #:export (allocate-slots
36 lookup-maybe-constant-value
39 lookup-parallel-moves))
41 (define-record-type $allocation
42 (make-allocation dfa slots
43 has-constv constant-values
48 ;; A DFA records all variables bound in a function, and assigns them
49 ;; indices. The slot in which a variable is stored at runtime can be
50 ;; had by indexing into the SLOTS vector with the variable's index.
53 (slots allocation-slots)
55 ;; Not all variables have slots allocated. Variables that are
56 ;; constant and that are only used by primcalls that can accept
57 ;; constants directly are not allocated to slots, and their SLOT value
58 ;; is false. Likewise constants that are only used by calls are not
59 ;; allocated into slots, to avoid needless copying. If a variable is
60 ;; constant, its constant value is set in the CONSTANT-VALUES vector
61 ;; and the corresponding bit in the HAS-CONSTV bitvector is set.
63 (has-constv allocation-has-constv)
64 (constant-values allocation-constant-values)
66 ;; Some continuations have additional associated information. This
67 ;; addition information is a /call allocation/. Call allocations
68 ;; record the way that functions are passed values, and how their
69 ;; return values are rebound to local variables.
71 ;; A call allocation contains two pieces of information: the call's
72 ;; /proc slot/, and a set of /parallel moves/. The proc slot
73 ;; indicates the slot of a procedure in a procedure call, or where the
74 ;; procedure would be in a multiple-value return. The parallel moves
75 ;; shuffle locals into position for a call, or shuffle returned values
76 ;; back into place. Though they use the same slot, moves for a call
77 ;; are called "call moves", and moves to handle a return are "return
80 ;; $ktrunc continuations record a proc slot and a set of return moves
81 ;; to adapt multiple values from the stack to local variables.
83 ;; Tail calls record arg moves, but no proc slot.
85 ;; Non-tail calls record arg moves and a call slot. Multiple-valued
86 ;; returns will have an associated $ktrunc continuation, which records
87 ;; the same proc slot, but has return moves.
89 ;; $prompt handlers are $ktrunc continuations like any other.
91 ;; $values expressions with more than 1 value record moves but have no
94 ;; A set of moves is expressed as an ordered list of (SRC . DST)
95 ;; moves, where SRC and DST are slots. This may involve a temporary
98 (call-allocations allocation-call-allocations)
100 ;; The number of locals for a $kclause.
102 (nlocals allocation-nlocals))
104 (define-record-type $call-allocation
105 (make-call-allocation proc-slot moves)
107 (proc-slot call-allocation-proc-slot)
108 (moves call-allocation-moves))
110 (define (find-first-zero n)
111 ;; Naive implementation.
117 (define (find-first-trailing-zero n)
118 (let lp ((slot (let lp ((count 2))
119 (if (< n (ash 1 (1- count)))
121 ;; Grow upper bound slower than factor 2 to avoid
122 ;; needless bignum allocation on 32-bit systems
123 ;; when there are more than 16 locals.
124 (lp (+ count (ash count -1)))))))
125 (if (or (zero? slot) (logbit? (1- slot) n))
129 (define (lookup-maybe-slot sym allocation)
131 (($ $allocation dfa slots)
132 (vector-ref slots (dfa-var-idx dfa sym)))))
134 (define (lookup-slot sym allocation)
135 (or (lookup-maybe-slot sym allocation)
136 (error "Variable not allocated to a slot" sym)))
138 (define (lookup-constant-value sym allocation)
140 (($ $allocation dfa slots has-constv constant-values)
141 (let ((idx (dfa-var-idx dfa sym)))
142 (if (bitvector-ref has-constv idx)
143 (vector-ref constant-values idx)
144 (error "Variable does not have constant value" sym))))))
146 (define (lookup-maybe-constant-value sym allocation)
148 (($ $allocation dfa slots has-constv constant-values)
149 (let ((idx (dfa-var-idx dfa sym)))
150 (values (bitvector-ref has-constv idx)
151 (vector-ref constant-values idx))))))
153 (define (lookup-call-allocation k allocation)
154 (or (hashq-ref (allocation-call-allocations allocation) k)
155 (error "Continuation not a call" k)))
157 (define (lookup-call-proc-slot k allocation)
158 (or (call-allocation-proc-slot (lookup-call-allocation k allocation))
159 (error "Call has no proc slot" k)))
161 (define (lookup-parallel-moves k allocation)
162 (or (call-allocation-moves (lookup-call-allocation k allocation))
163 (error "Call has no use parallel moves slot" k)))
165 (define (lookup-nlocals k allocation)
166 (or (hashq-ref (allocation-nlocals allocation) k)
167 (error "Not a clause continuation" k)))
169 (define (solve-parallel-move src dst tmp)
170 "Solve the parallel move problem between src and dst slot lists, which
171 are comparable with eqv?. A tmp slot may be used."
173 ;; This algorithm is taken from: "Tilting at windmills with Coq:
174 ;; formal verification of a compilation algorithm for parallel moves"
175 ;; by Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy
176 ;; <http://gallium.inria.fr/~xleroy/publi/parallel-move.pdf>
178 (define (split-move moves reg)
179 (let loop ((revhead '()) (tail moves))
181 (((and s+d (s . d)) . rest)
183 (cons d (append-reverse revhead rest))
184 (loop (cons s+d revhead) rest)))
187 (define (replace-last-source reg moves)
190 (append moves (list (cons reg d))))))
192 (let loop ((to-move (map cons src dst))
196 ;; 'last-source' should always be equivalent to:
197 ;; (and (pair? being-moved) (car (last being-moved)))
201 (((and s+d (s . d)) . t1)
202 (if (or (eqv? s d) ; idempotent
203 (not s)) ; src is a constant and can be loaded directly
204 (loop t1 '() moved #f)
205 (loop t1 (list s+d) moved s)))))
206 (((and s+d (s . d)) . b)
207 (match (split-move to-move d)
208 ((r . t1) (loop t1 (acons d r being-moved) moved last-source))
210 (() (loop to-move '() (cons s+d moved) #f))
211 (_ (if (eqv? d last-source)
213 (replace-last-source tmp b)
214 (cons s+d (acons d tmp moved))
216 (loop to-move b (cons s+d moved) last-source))))))))))
218 (define (dead-after-def? def-k v-idx dfa)
219 (let ((l (dfa-k-idx dfa def-k)))
220 (not (bitvector-ref (dfa-k-in dfa l) v-idx))))
222 (define (dead-after-use? use-k v-idx dfa)
223 (let ((l (dfa-k-idx dfa use-k)))
224 (not (bitvector-ref (dfa-k-out dfa l) v-idx))))
226 (define (allocate-slots fun dfg)
227 (let* ((dfa (compute-live-variables fun dfg))
228 (cfa (analyze-control-flow fun dfg))
229 (usev (make-vector (cfa-k-count cfa) '()))
230 (defv (make-vector (cfa-k-count cfa) '()))
231 (cont-table (dfg-cont-table dfg))
232 (slots (make-vector (dfa-var-count dfa) #f))
233 (constant-values (make-vector (dfa-var-count dfa) #f))
234 (has-constv (make-bitvector (dfa-var-count dfa) #f))
235 (has-slotv (make-bitvector (dfa-var-count dfa) #t))
236 (needs-slotv (make-bitvector (dfa-var-count dfa) #t))
237 (call-allocations (make-hash-table))
238 (nlocals 0) ; Mutable. It pains me.
239 (nlocals-table (make-hash-table)))
241 (define (bump-nlocals! nlocals*)
242 (when (< nlocals nlocals*)
243 (set! nlocals nlocals*)))
245 (define (empty-live-slots)
248 (define (add-live-slot slot live-slots)
249 (logior live-slots (ash 1 slot)))
251 (define (kill-dead-slot slot live-slots)
252 (logand live-slots (lognot (ash 1 slot))))
254 (define (compute-slot live-slots hint)
255 (if (and hint (not (logbit? hint live-slots)))
257 (find-first-zero live-slots)))
259 (define (compute-call-proc-slot live-slots)
260 (+ 3 (find-first-trailing-zero live-slots)))
262 (define (compute-prompt-handler-proc-slot live-slots)
263 (1- (find-first-trailing-zero live-slots)))
265 (define (recompute-live-slots k nargs)
266 (let ((in (dfa-k-in dfa (dfa-k-idx dfa k))))
267 (let lp ((v 0) (live-slots (1- (ash 1 (1+ nargs)))))
268 (let ((v (bit-position #t in v)))
270 (let ((slot (vector-ref slots v)))
273 (add-live-slot slot live-slots)
277 (define* (allocate! var-idx hint live)
279 ((not (bitvector-ref needs-slotv var-idx)) live)
280 ((vector-ref slots var-idx) => (cut add-live-slot <> live))
282 (let ((slot (compute-slot live hint)))
283 (bump-nlocals! (1+ slot))
284 (vector-set! slots var-idx slot)
285 (add-live-slot slot live)))))
287 ;; Although some parallel moves may proceed without a temporary
288 ;; slot, in general one is needed. That temporary slot must not be
289 ;; part of the source or destination sets, and that slot should not
290 ;; correspond to a live variable. Usually the source and
291 ;; destination sets are a subset of the union of the live sets
292 ;; before and after the move. However for stack slots that don't
293 ;; have names -- those slots that correspond to function arguments
294 ;; or to function return values -- it could be that they are out of
295 ;; the computed live set. In that case they need to be adjoined to
296 ;; the live set, used when choosing a temporary slot.
297 (define (compute-tmp-slot live stack-slots)
298 (find-first-zero (fold add-live-slot live stack-slots)))
300 (define (parallel-move src-slots dst-slots tmp-slot)
301 (let ((moves (solve-parallel-move src-slots dst-slots tmp-slot)))
302 (when (assv tmp-slot moves)
303 (bump-nlocals! (1+ tmp-slot)))
306 ;; Find variables that are actually constant, and determine which
307 ;; of those can avoid slot allocation.
308 (define (compute-constants!)
310 (when (< n (vector-length constant-values))
311 (let ((sym (dfa-var-sym dfa n)))
312 (call-with-values (lambda () (find-constant-value sym dfg))
313 (lambda (has-const? const)
315 (bitvector-set! has-constv n has-const?)
316 (vector-set! constant-values n const)
317 (when (not (constant-needs-allocation? sym const dfg))
318 (bitvector-set! needs-slotv n #f)))
321 ;; Record uses and defs, as lists of variable indexes, indexed by
322 ;; CFA continuation index.
323 (define (compute-uses-and-defs!)
325 (when (< n (vector-length usev))
326 (match (lookup-cont (cfa-k-sym cfa n) cont-table)
328 (vector-set! defv n (list (dfa-var-idx dfa self))))
329 (($ $kargs names syms body)
330 (vector-set! defv n (map (cut dfa-var-idx dfa <>) syms))
332 (map (cut dfa-var-idx dfa <>)
333 (match (find-expression body)
336 (($ $primcall name args)
340 (($ $prompt escape? tag handler pop)
346 (define (allocate-call label k uses pre-live post-live)
347 (match (lookup-cont k cont-table)
349 (let* ((tail-nlocals (length uses))
350 (tail-slots (iota tail-nlocals))
351 (moves (parallel-move (map (cut vector-ref slots <>) uses)
353 (compute-tmp-slot pre-live tail-slots))))
354 (bump-nlocals! tail-nlocals)
355 (hashq-set! call-allocations label
356 (make-call-allocation #f moves))))
357 (($ $ktrunc arity kargs)
358 (let* ((proc-slot (compute-call-proc-slot post-live))
359 (call-slots (map (cut + proc-slot <>) (iota (length uses))))
360 (arg-moves (parallel-move (map (cut vector-ref slots <>) uses)
362 (compute-tmp-slot pre-live
364 (result-vars (vector-ref defv (cfa-k-idx cfa kargs)))
365 (value-slots (map (cut + proc-slot 1 <>)
366 (iota (length result-vars))))
367 (result-live (fold allocate!
368 post-live result-vars value-slots))
369 (result-slots (map (cut vector-ref slots <>) result-vars))
370 (result-moves (parallel-move value-slots
372 (compute-tmp-slot result-live
374 (bump-nlocals! (+ proc-slot (length uses)))
375 (hashq-set! call-allocations label
376 (make-call-allocation proc-slot arg-moves))
377 (hashq-set! call-allocations k
378 (make-call-allocation proc-slot result-moves))))
381 (let* ((proc-slot (compute-call-proc-slot post-live))
382 (call-slots (map (cut + proc-slot <>) (iota (length uses))))
383 (arg-moves (parallel-move (map (cut vector-ref slots <>) uses)
385 (compute-tmp-slot pre-live
387 (bump-nlocals! (+ proc-slot (length uses)))
388 (hashq-set! call-allocations label
389 (make-call-allocation proc-slot arg-moves))))))
391 (define (allocate-values label k uses pre-live post-live)
392 (let* ((src-slots (map (cut vector-ref slots <>) uses))
393 (dst-slots (match (lookup-cont k cont-table)
395 (let ((tail-nlocals (1+ (length uses))))
396 (bump-nlocals! tail-nlocals)
397 (cdr (iota tail-nlocals))))
399 (let ((dst-vars (vector-ref defv (cfa-k-idx cfa k))))
400 (fold allocate! post-live dst-vars src-slots)
401 (map (cut vector-ref slots <>) dst-vars)))))
402 (moves (parallel-move src-slots
404 (compute-tmp-slot pre-live dst-slots))))
405 (hashq-set! call-allocations label
406 (make-call-allocation #f moves))))
408 (define (allocate-prompt label k handler nargs)
409 (match (lookup-cont handler cont-table)
410 (($ $ktrunc arity kargs)
411 (let* ((handler-live (recompute-live-slots handler nargs))
412 (proc-slot (compute-prompt-handler-proc-slot handler-live))
413 (result-vars (vector-ref defv (cfa-k-idx cfa kargs)))
414 (value-slots (map (cut + proc-slot 1 <>)
415 (iota (length result-vars))))
416 (result-live (fold allocate!
417 handler-live result-vars value-slots))
418 (result-slots (map (cut vector-ref slots <>) result-vars))
419 (moves (parallel-move value-slots
421 (compute-tmp-slot result-live
423 (bump-nlocals! (+ proc-slot 1 (length result-vars)))
424 (hashq-set! call-allocations handler
425 (make-call-allocation proc-slot moves))))))
427 (define (allocate-defs! n live)
428 (fold (cut allocate! <> #f <>) live (vector-ref defv n)))
430 ;; This traversal will visit definitions before uses, as
431 ;; definitions dominate uses and a block's dominator will appear
432 ;; before it, in reverse post-order.
433 (define (visit-clause n nargs live)
434 (let lp ((n n) (live live))
435 (define (kill-dead live vars-by-cfa-idx pred)
436 (fold (lambda (v live)
437 (let ((slot (vector-ref slots v)))
440 (pred (cfa-k-sym cfa n) v dfa))
441 (kill-dead-slot slot live)
444 (vector-ref vars-by-cfa-idx n)))
445 (define (kill-dead-defs live)
446 (kill-dead live defv dead-after-def?))
447 (define (kill-dead-uses live)
448 (kill-dead live usev dead-after-use?))
449 (if (= n (cfa-k-count cfa))
451 (let* ((label (cfa-k-sym cfa n))
452 (live (if (control-point? label dfg)
453 (recompute-live-slots label nargs)
455 (live (kill-dead-defs (allocate-defs! n live)))
456 (post-live (kill-dead-uses live)))
457 ;; LIVE are the live slots coming into the term.
458 ;; POST-LIVE is the subset that is still live after the
459 ;; term uses its inputs.
460 (match (lookup-cont label cont-table)
462 (($ $kargs names syms body)
463 (let ((uses (vector-ref usev n)))
464 (match (find-call body)
465 (($ $continue k src ($ $call))
466 (allocate-call label k uses live post-live))
467 (($ $continue k src ($ $primcall)) #t)
468 ;; We only need to make a call allocation if there
469 ;; are two or more values.
470 (($ $continue k src ($ $values (_ _ . _)))
471 (allocate-values label k uses live post-live))
472 (($ $continue k src ($ $values)) #t)
473 (($ $continue k src ($ $prompt escape? tag handler pop))
474 (allocate-prompt label k handler nargs))
476 (lp (1+ n) post-live))
477 ((or ($ $ktrunc) ($ $kif) ($ $ktail))
478 (lp (1+ n) post-live)))))))
480 (define (visit-entry)
481 (define (visit-clauses n live)
482 (unless (eqv? live (add-live-slot 0 (empty-live-slots)))
483 (error "Unexpected clause live set"))
485 (let ((k (cfa-k-sym cfa n)))
486 (match (lookup-cont k cont-table)
487 (($ $kclause arity ($ $cont kbody ($ $kargs names)))
488 (unless (eq? (cfa-k-sym cfa (1+ n)) kbody)
489 (error "Unexpected CFA order"))
490 (let ((next (visit-clause (1+ n) (length names) live)))
491 (hashq-set! nlocals-table k nlocals)
492 (when (< next (cfa-k-count cfa))
493 (visit-clauses next live)))))))
494 (match (lookup-cont (cfa-k-sym cfa 0) cont-table)
496 (visit-clauses 1 (allocate-defs! 0 (empty-live-slots))))))
499 (compute-uses-and-defs!)
502 (make-allocation dfa slots
503 has-constv constant-values