Merge remote-tracking branch 'origin/stable-2.0'
[bpt/guile.git] / module / language / tree-il / cse.scm
1 ;;; Common Subexpression Elimination (CSE) on Tree-IL
2
3 ;; Copyright (C) 2011, 2012 Free Software Foundation, Inc.
4
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
9 ;;;;
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
14 ;;;;
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 (define-module (language tree-il cse)
20 #:use-module (language tree-il)
21 #:use-module (language tree-il primitives)
22 #:use-module (language tree-il effects)
23 #:use-module (ice-9 vlist)
24 #:use-module (ice-9 match)
25 #:use-module (srfi srfi-1)
26 #:use-module (srfi srfi-9)
27 #:use-module (srfi srfi-11)
28 #:use-module (srfi srfi-26)
29 #:export (cse))
30
31 ;;;
32 ;;; This pass eliminates common subexpressions in Tree-IL. It works
33 ;;; best locally -- within a function -- so it is meant to be run after
34 ;;; partial evaluation, which usually inlines functions and so opens up
35 ;;; a bigger space for CSE to work.
36 ;;;
37 ;;; The algorithm traverses the tree of expressions, returning two
38 ;;; values: the newly rebuilt tree, and a "database". The database is
39 ;;; the set of expressions that will have been evaluated as part of
40 ;;; evaluating an expression. For example, in:
41 ;;;
42 ;;; (1- (+ (if a b c) (* x y)))
43 ;;;
44 ;;; We can say that when it comes time to evaluate (1- <>), that the
45 ;;; subexpressions +, x, y, and (* x y) must have been evaluated in
46 ;;; values context. We know that a was evaluated in test context, but
47 ;;; we don't know if it was true or false.
48 ;;;
49 ;;; The expressions in the database /dominate/ any subsequent
50 ;;; expression: FOO dominates BAR if evaluation of BAR implies that any
51 ;;; effects associated with FOO have already occured.
52 ;;;
53 ;;; When adding expressions to the database, we record the context in
54 ;;; which they are evaluated. We treat expressions in test context
55 ;;; specially: the presence of such an expression indicates that the
56 ;;; expression is true. In this way we can elide duplicate predicates.
57 ;;;
58 ;;; Duplicate predicates are not common in code that users write, but
59 ;;; can occur quite frequently in macro-generated code.
60 ;;;
61 ;;; For example:
62 ;;;
63 ;;; (and (foo? x) (foo-bar x))
64 ;;; => (if (and (struct? x) (eq? (struct-vtable x) <foo>))
65 ;;; (if (and (struct? x) (eq? (struct-vtable x) <foo>))
66 ;;; (struct-ref x 1)
67 ;;; (throw 'not-a-foo))
68 ;;; #f))
69 ;;; => (if (and (struct? x) (eq? (struct-vtable x) <foo>))
70 ;;; (struct-ref x 1)
71 ;;; #f)
72 ;;;
73 ;;; A conditional bailout in effect context also has the effect of
74 ;;; adding predicates to the database:
75 ;;;
76 ;;; (begin (foo-bar x) (foo-baz x))
77 ;;; => (begin
78 ;;; (if (and (struct? x) (eq? (struct-vtable x) <foo>))
79 ;;; (struct-ref x 1)
80 ;;; (throw 'not-a-foo))
81 ;;; (if (and (struct? x) (eq? (struct-vtable x) <foo>))
82 ;;; (struct-ref x 2)
83 ;;; (throw 'not-a-foo)))
84 ;;; => (begin
85 ;;; (if (and (struct? x) (eq? (struct-vtable x) <foo>))
86 ;;; (struct-ref x 1)
87 ;;; (throw 'not-a-foo))
88 ;;; (struct-ref x 2))
89 ;;;
90 ;;; When removing code, we have to ensure that the semantics of the
91 ;;; source program and the residual program are the same. It's easy to
92 ;;; ensure that they have the same value, because those manipulations
93 ;;; are just algebraic, but the tricky thing is to ensure that the
94 ;;; expressions exhibit the same ordering of effects. For that, we use
95 ;;; the effects analysis of (language tree-il effects). We only
96 ;;; eliminate code if the duplicate code commutes with all of the
97 ;;; dominators on the path from the duplicate to the original.
98 ;;;
99 ;;; The implementation uses vhashes as the fundamental data structure.
100 ;;; This can be seen as a form of global value numbering. This
101 ;;; algorithm currently spends most of its time in vhash-assoc. I'm not
102 ;;; sure whether that is due to our bad hash function in Guile 2.0, an
103 ;;; inefficiency in vhashes, or what. Overall though the complexity
104 ;;; should be linear, or N log N -- whatever vhash-assoc's complexity
105 ;;; is. Walking the dominators is nonlinear, but that only happens when
106 ;;; we've actually found a common subexpression so that should be OK.
107 ;;;
108
109 ;; Logging helpers, as in peval.
110 ;;
111 (define-syntax *logging* (identifier-syntax #f))
112 ;; (define %logging #f)
113 ;; (define-syntax *logging* (identifier-syntax %logging))
114 (define-syntax log
115 (syntax-rules (quote)
116 ((log 'event arg ...)
117 (if (and *logging*
118 (or (eq? *logging* #t)
119 (memq 'event *logging*)))
120 (log* 'event arg ...)))))
121 (define (log* event . args)
122 (let ((pp (module-ref (resolve-interface '(ice-9 pretty-print))
123 'pretty-print)))
124 (pp `(log ,event . ,args))
125 (newline)
126 (values)))
127
128 ;; A pre-pass on the source program to determine the set of assigned
129 ;; lexicals.
130 ;;
131 (define* (build-assigned-var-table exp #:optional (table vlist-null))
132 (tree-il-fold
133 (lambda (exp res)
134 res)
135 (lambda (exp res)
136 (match exp
137 (($ <lexical-set> src name gensym exp)
138 (vhash-consq gensym #t res))
139 (_ res)))
140 (lambda (exp res) res)
141 table exp))
142
143 (define (boolean-valued-primitive? primitive)
144 (or (negate-primitive primitive)
145 (eq? primitive 'not)
146 (let ((chars (symbol->string primitive)))
147 (eqv? (string-ref chars (1- (string-length chars)))
148 #\?))))
149
150 (define (boolean-valued-expression? x ctx)
151 (match x
152 (($ <primcall> _ (? boolean-valued-primitive?)) #t)
153 (($ <const> _ (? boolean?)) #t)
154 (_ (eq? ctx 'test))))
155
156 (define (singly-valued-expression? x ctx)
157 (match x
158 (($ <const>) #t)
159 (($ <lexical-ref>) #t)
160 (($ <void>) #t)
161 (($ <lexical-ref>) #t)
162 (($ <primitive-ref>) #t)
163 (($ <module-ref>) #t)
164 (($ <toplevel-ref>) #t)
165 (($ <primcall> _ (? singly-valued-primitive?)) #t)
166 (($ <primcall> _ 'values (val)) #t)
167 (($ <lambda>) #t)
168 (_ (eq? ctx 'value))))
169
170 (define* (cse exp)
171 "Eliminate common subexpressions in EXP."
172
173 (define assigned-lexical?
174 (let ((table (build-assigned-var-table exp)))
175 (lambda (sym)
176 (vhash-assq sym table))))
177
178 (define %compute-effects
179 (make-effects-analyzer assigned-lexical?))
180
181 (define (negate exp ctx)
182 (match exp
183 (($ <const> src x)
184 (make-const src (not x)))
185 (($ <void> src)
186 (make-const src #f))
187 (($ <conditional> src test consequent alternate)
188 (make-conditional src test (negate consequent ctx) (negate alternate ctx)))
189 (($ <primcall> _ 'not
190 ((and x (? (cut boolean-valued-expression? <> ctx)))))
191 x)
192 (($ <primcall> src (and pred (? negate-primitive)) args)
193 (make-primcall src (negate-primitive pred) args))
194 (_
195 (make-primcall #f 'not (list exp)))))
196
197
198 (define (hasher n)
199 (lambda (x size) (modulo n size)))
200
201 (define (add-to-db exp effects ctx db)
202 (let ((v (vector exp effects ctx))
203 (h (tree-il-hash exp)))
204 (vhash-cons v h db (hasher h))))
205
206 (define (control-flow-boundary db)
207 (let ((h (hashq 'lambda most-positive-fixnum)))
208 (vhash-cons 'lambda h db (hasher h))))
209
210 (define (find-dominating-expression exp effects ctx db)
211 (define (entry-matches? v1 v2)
212 (match (if (vector? v1) v1 v2)
213 (#(exp* effects* ctx*)
214 (and (tree-il=? exp exp*)
215 (or (not ctx) (eq? ctx* ctx))))
216 (_ #f)))
217
218 (let ((len (vlist-length db))
219 (h (tree-il-hash exp)))
220 (and (vhash-assoc #t db entry-matches? (hasher h))
221 (let lp ((n 0))
222 (and (< n len)
223 (match (vlist-ref db n)
224 (('lambda . h*)
225 ;; We assume that lambdas can escape and thus be
226 ;; called from anywhere. Thus code inside a lambda
227 ;; only has a dominating expression if it does not
228 ;; depend on any effects.
229 (and (not (depends-on-effects? effects &all-effects))
230 (lp (1+ n))))
231 ((#(exp* effects* ctx*) . h*)
232 (log 'walk (unparse-tree-il exp) effects
233 (unparse-tree-il exp*) effects* ctx*)
234 (or (and (= h h*)
235 (or (not ctx) (eq? ctx ctx*))
236 (tree-il=? exp exp*))
237 (and (effects-commute? effects effects*)
238 (lp (1+ n)))))))))))
239
240 ;; Return #t if EXP is dominated by an instance of itself. In that
241 ;; case, we can exclude *type-check* effects, because the first
242 ;; expression already caused them if needed.
243 (define (has-dominating-effect? exp effects db)
244 (or (constant? effects)
245 (and
246 (effect-free?
247 (exclude-effects effects
248 (logior &zero-values
249 &allocation
250 &type-check)))
251 (find-dominating-expression exp effects #f db))))
252
253 (define (find-dominating-test exp effects db)
254 (and
255 (effect-free?
256 (exclude-effects effects (logior &allocation
257 &type-check)))
258 (match exp
259 (($ <const> src val)
260 (if (boolean? val)
261 exp
262 (make-const src (not (not val)))))
263 ;; For (not FOO), try to prove FOO, then negate the result.
264 (($ <primcall> src 'not (exp*))
265 (match (find-dominating-test exp* effects db)
266 (($ <const> _ val)
267 (log 'inferring exp (not val))
268 (make-const src (not val)))
269 (_
270 #f)))
271 (_
272 (cond
273 ((find-dominating-expression exp effects 'test db)
274 ;; We have an EXP fact, so we infer #t.
275 (log 'inferring exp #t)
276 (make-const (tree-il-src exp) #t))
277 ((find-dominating-expression (negate exp 'test) effects 'test db)
278 ;; We have a (not EXP) fact, so we infer #f.
279 (log 'inferring exp #f)
280 (make-const (tree-il-src exp) #f))
281 (else
282 ;; Otherwise we don't know.
283 #f))))))
284
285 (define (add-to-env exp name sym db env)
286 (let* ((v (vector exp name sym (vlist-length db)))
287 (h (tree-il-hash exp)))
288 (vhash-cons v h env (hasher h))))
289
290 (define (augment-env env names syms exps db)
291 (if (null? names)
292 env
293 (let ((name (car names)) (sym (car syms)) (exp (car exps)))
294 (augment-env (if (or (assigned-lexical? sym)
295 (lexical-ref? exp))
296 env
297 (add-to-env exp name sym db env))
298 (cdr names) (cdr syms) (cdr exps) db))))
299
300 (define (find-dominating-lexical exp effects env db)
301 (define (entry-matches? v1 v2)
302 (match (if (vector? v1) v1 v2)
303 (#(exp* name sym db)
304 (tree-il=? exp exp*))
305 (_ #f)))
306
307 (define (unroll db base n)
308 (or (zero? n)
309 (match (vlist-ref db base)
310 (('lambda . h*)
311 ;; See note in find-dominating-expression.
312 (and (not (depends-on-effects? effects &all-effects))
313 (unroll db (1+ base) (1- n))))
314 ((#(exp* effects* ctx*) . h*)
315 (and (effects-commute? effects effects*)
316 (unroll db (1+ base) (1- n)))))))
317
318 (let ((h (tree-il-hash exp)))
319 (and (effect-free? (exclude-effects effects &type-check))
320 (vhash-assoc exp env entry-matches? (hasher h))
321 (let ((env-len (vlist-length env))
322 (db-len (vlist-length db)))
323 (let lp ((n 0) (m 0))
324 (and (< n env-len)
325 (match (vlist-ref env n)
326 ((#(exp* name sym db-len*) . h*)
327 (let ((niter (- (- db-len db-len*) m)))
328 (and (unroll db m niter)
329 (if (and (= h h*) (tree-il=? exp* exp))
330 (make-lexical-ref (tree-il-src exp) name sym)
331 (lp (1+ n) (- db-len db-len*)))))))))))))
332
333 (define (lookup-lexical sym env)
334 (let ((env-len (vlist-length env)))
335 (let lp ((n 0))
336 (and (< n env-len)
337 (match (vlist-ref env n)
338 ((#(exp _ sym* _) . _)
339 (if (eq? sym sym*)
340 exp
341 (lp (1+ n)))))))))
342
343 (define (intersection db+ db-)
344 (vhash-fold-right
345 (lambda (k h out)
346 (if (vhash-assoc k db- equal? (hasher h))
347 (vhash-cons k h out (hasher h))
348 out))
349 vlist-null
350 db+))
351
352 (define (concat db1 db2)
353 (vhash-fold-right (lambda (k h tail)
354 (vhash-cons k h tail (hasher h)))
355 db2 db1))
356
357 (let visit ((exp exp)
358 (db vlist-null) ; dominating expressions: #(exp effects ctx) -> hash
359 (env vlist-null) ; named expressions: #(exp name sym db) -> hash
360 (ctx 'values)) ; test, effect, value, or values
361
362 (define (parallel-visit exps db env ctx)
363 (let lp ((in exps) (out '()) (db* vlist-null))
364 (if (pair? in)
365 (call-with-values (lambda () (visit (car in) db env ctx))
366 (lambda (x db**)
367 (lp (cdr in) (cons x out) (concat db** db*))))
368 (values (reverse out) db*))))
369
370 (define (compute-effects exp)
371 (%compute-effects exp (lambda (sym) (lookup-lexical sym env))))
372
373 (define (bailout? exp)
374 (causes-effects? (compute-effects exp) &definite-bailout))
375
376 (define (return exp db*)
377 (let ((effects (compute-effects exp)))
378 (cond
379 ((and (eq? ctx 'effect)
380 (not (lambda-case? exp))
381 (or (effect-free?
382 (exclude-effects effects
383 (logior &zero-values
384 &allocation)))
385 (has-dominating-effect? exp effects db)))
386 (cond
387 ((void? exp)
388 (values exp db*))
389 (else
390 (log 'elide ctx (unparse-tree-il exp))
391 (values (make-void #f) db*))))
392 ((and (boolean-valued-expression? exp ctx)
393 (find-dominating-test exp effects db))
394 => (lambda (exp)
395 (log 'propagate-test ctx (unparse-tree-il exp))
396 (values exp db*)))
397 ((and (singly-valued-expression? exp ctx)
398 (find-dominating-lexical exp effects env db))
399 => (lambda (exp)
400 (log 'propagate-value ctx (unparse-tree-il exp))
401 (values exp db*)))
402 ((and (constant? effects) (memq ctx '(value values)))
403 ;; Adds nothing to the db.
404 (values exp db*))
405 (else
406 (log 'return ctx effects (unparse-tree-il exp) db*)
407 (values exp
408 (add-to-db exp effects ctx db*))))))
409
410 (log 'visit ctx (unparse-tree-il exp) db env)
411
412 (match exp
413 (($ <const>)
414 (return exp vlist-null))
415 (($ <void>)
416 (return exp vlist-null))
417 (($ <lexical-ref> _ _ gensym)
418 (return exp vlist-null))
419 (($ <lexical-set> src name gensym exp)
420 (let*-values (((exp db*) (visit exp db env 'value)))
421 (return (make-lexical-set src name gensym exp)
422 db*)))
423 (($ <let> src names gensyms vals body)
424 (let*-values (((vals db*) (parallel-visit vals db env 'value))
425 ((body db**) (visit body (concat db* db)
426 (augment-env env names gensyms vals db)
427 ctx)))
428 (return (make-let src names gensyms vals body)
429 (concat db** db*))))
430 (($ <letrec> src in-order? names gensyms vals body)
431 (let*-values (((vals db*) (parallel-visit vals db env 'value))
432 ((body db**) (visit body (concat db* db)
433 (augment-env env names gensyms vals db)
434 ctx)))
435 (return (make-letrec src in-order? names gensyms vals body)
436 (concat db** db*))))
437 (($ <fix> src names gensyms vals body)
438 (let*-values (((vals db*) (parallel-visit vals db env 'value))
439 ((body db**) (visit body (concat db* db) env ctx)))
440 (return (make-fix src names gensyms vals body)
441 (concat db** db*))))
442 (($ <let-values> src producer consumer)
443 (let*-values (((producer db*) (visit producer db env 'values))
444 ((consumer db**) (visit consumer (concat db* db) env ctx)))
445 (return (make-let-values src producer consumer)
446 (concat db** db*))))
447 (($ <dynwind> src winder pre body post unwinder)
448 (let*-values (((winder db*) (visit winder db env 'value))
449 ((db**) db*)
450 ((unwinder db*) (visit unwinder db env 'value))
451 ((db**) (concat db* db**))
452 ((pre db*) (visit pre (concat db** db) env 'effect))
453 ((db**) (concat db* db**))
454 ((body db*) (visit body (concat db** db) env ctx))
455 ((db**) (concat db* db**))
456 ((post db*) (visit post (concat db** db) env 'effect))
457 ((db**) (concat db* db**)))
458 (return (make-dynwind src winder pre body post unwinder)
459 db**)))
460 (($ <dynlet> src fluids vals body)
461 (let*-values (((fluids db*) (parallel-visit fluids db env 'value))
462 ((vals db**) (parallel-visit vals db env 'value))
463 ((body db***) (visit body (concat db** (concat db* db))
464 env ctx)))
465 (return (make-dynlet src fluids vals body)
466 (concat db*** (concat db** db*)))))
467 (($ <dynref> src fluid)
468 (let*-values (((fluid db*) (visit fluid db env 'value)))
469 (return (make-dynref src fluid)
470 db*)))
471 (($ <dynset> src fluid exp)
472 (let*-values (((fluid db*) (visit fluid db env 'value))
473 ((exp db**) (visit exp db env 'value)))
474 (return (make-dynset src fluid exp)
475 (concat db** db*))))
476 (($ <toplevel-ref>)
477 (return exp vlist-null))
478 (($ <module-ref>)
479 (return exp vlist-null))
480 (($ <module-set> src mod name public? exp)
481 (let*-values (((exp db*) (visit exp db env 'value)))
482 (return (make-module-set src mod name public? exp)
483 db*)))
484 (($ <toplevel-define> src name exp)
485 (let*-values (((exp db*) (visit exp db env 'value)))
486 (return (make-toplevel-define src name exp)
487 db*)))
488 (($ <toplevel-set> src name exp)
489 (let*-values (((exp db*) (visit exp db env 'value)))
490 (return (make-toplevel-set src name exp)
491 db*)))
492 (($ <primitive-ref>)
493 (return exp vlist-null))
494 (($ <conditional> src test consequent alternate)
495 (let*-values
496 (((test db+) (visit test db env 'test))
497 ((converse db-) (visit (negate test 'test) db env 'test))
498 ((consequent db++) (visit consequent (concat db+ db) env ctx))
499 ((alternate db--) (visit alternate (concat db- db) env ctx)))
500 (match (make-conditional src test consequent alternate)
501 (($ <conditional> _ ($ <const> _ exp))
502 (if exp
503 (return consequent (concat db++ db+))
504 (return alternate (concat db-- db-))))
505 ;; (if FOO A A) => (begin FOO A)
506 (($ <conditional> src _
507 ($ <const> _ a) ($ <const> _ (? (cut equal? a <>))))
508 (visit (make-seq #f test (make-const #f a))
509 db env ctx))
510 ;; (if FOO #t #f) => FOO for boolean-valued FOO.
511 (($ <conditional> src
512 (? (cut boolean-valued-expression? <> ctx))
513 ($ <const> _ #t) ($ <const> _ #f))
514 (return test db+))
515 ;; (if FOO #f #t) => (not FOO)
516 (($ <conditional> src _ ($ <const> _ #f) ($ <const> _ #t))
517 (visit (negate test ctx) db env ctx))
518
519 ;; Allow "and"-like conditions to accumulate in test context.
520 ((and c ($ <conditional> _ _ _ ($ <const> _ #f)))
521 (return c (if (eq? ctx 'test) (concat db++ db+) vlist-null)))
522 ((and c ($ <conditional> _ _ ($ <const> _ #f) _))
523 (return c (if (eq? ctx 'test) (concat db-- db-) vlist-null)))
524
525 ;; Conditional bailouts turn expressions into predicates.
526 ((and c ($ <conditional> _ _ _ (? bailout?)))
527 (return c (concat db++ db+)))
528 ((and c ($ <conditional> _ _ (? bailout?) _))
529 (return c (concat db-- db-)))
530
531 (c
532 (return c (intersection (concat db++ db+) (concat db-- db-)))))))
533 (($ <primcall> src primitive args)
534 (let*-values (((args db*) (parallel-visit args db env 'value)))
535 (return (make-primcall src primitive args) db*)))
536 (($ <call> src proc args)
537 (let*-values (((proc db*) (visit proc db env 'value))
538 ((args db**) (parallel-visit args db env 'value)))
539 (return (make-call src proc args)
540 (concat db** db*))))
541 (($ <lambda> src meta body)
542 (let*-values (((body _) (visit body (control-flow-boundary db)
543 env 'values)))
544 (return (make-lambda src meta body)
545 vlist-null)))
546 (($ <lambda-case> src req opt rest kw inits gensyms body alt)
547 (let*-values (((inits _) (parallel-visit inits db env 'value))
548 ((body db*) (visit body db env ctx))
549 ((alt _) (if alt
550 (visit alt db env ctx)
551 (values #f #f))))
552 (return (make-lambda-case src req opt rest kw inits gensyms body alt)
553 (if alt vlist-null db*))))
554 (($ <seq> src head tail)
555 (let*-values (((head db*) (visit head db env 'effect)))
556 (cond
557 ((void? head)
558 (visit tail db env ctx))
559 (else
560 (let*-values (((tail db**) (visit tail (concat db* db) env ctx)))
561 (values (make-seq src head tail)
562 (concat db** db*)))))))
563 (($ <prompt> src tag body handler)
564 (let*-values (((tag db*) (visit tag db env 'value))
565 ((body _) (visit body (concat db* db) env ctx))
566 ((handler _) (visit handler (concat db* db) env ctx)))
567 (return (make-prompt src tag body handler)
568 db*)))
569 (($ <abort> src tag args tail)
570 (let*-values (((tag db*) (visit tag db env 'value))
571 ((args db**) (parallel-visit args db env 'value))
572 ((tail db***) (visit tail db env 'value)))
573 (return (make-abort src tag args tail)
574 (concat db* (concat db** db***))))))))