;;; Common Subexpression Elimination (CSE) on Tree-IL ;; Copyright (C) 2011, 2012 Free Software Foundation, Inc. ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public ;;;; License as published by the Free Software Foundation; either ;;;; version 3 of the License, or (at your option) any later version. ;;;; ;;;; This library is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;;; Lesser General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU Lesser General Public ;;;; License along with this library; if not, write to the Free Software ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA (define-module (language tree-il cse) #:use-module (language tree-il) #:use-module (language tree-il primitives) #:use-module (language tree-il effects) #:use-module (ice-9 vlist) #:use-module (ice-9 match) #:use-module (srfi srfi-1) #:use-module (srfi srfi-9) #:use-module (srfi srfi-11) #:use-module (srfi srfi-26) #:export (cse)) ;;; ;;; This pass eliminates common subexpressions in Tree-IL. It works ;;; best locally -- within a function -- so it is meant to be run after ;;; partial evaluation, which usually inlines functions and so opens up ;;; a bigger space for CSE to work. ;;; ;;; The algorithm traverses the tree of expressions, returning two ;;; values: the newly rebuilt tree, and a "database". The database is ;;; the set of expressions that will have been evaluated as part of ;;; evaluating an expression. For example, in: ;;; ;;; (1- (+ (if a b c) (* x y))) ;;; ;;; We can say that when it comes time to evaluate (1- <>), that the ;;; subexpressions +, x, y, and (* x y) must have been evaluated in ;;; values context. We know that a was evaluated in test context, but ;;; we don't know if it was true or false. ;;; ;;; The expressions in the database /dominate/ any subsequent ;;; expression: FOO dominates BAR if evaluation of BAR implies that any ;;; effects associated with FOO have already occured. ;;; ;;; When adding expressions to the database, we record the context in ;;; which they are evaluated. We treat expressions in test context ;;; specially: the presence of such an expression indicates that the ;;; expression is true. In this way we can elide duplicate predicates. ;;; ;;; Duplicate predicates are not common in code that users write, but ;;; can occur quite frequently in macro-generated code. ;;; ;;; For example: ;;; ;;; (and (foo? x) (foo-bar x)) ;;; => (if (and (struct? x) (eq? (struct-vtable x) )) ;;; (if (and (struct? x) (eq? (struct-vtable x) )) ;;; (struct-ref x 1) ;;; (throw 'not-a-foo)) ;;; #f)) ;;; => (if (and (struct? x) (eq? (struct-vtable x) )) ;;; (struct-ref x 1) ;;; #f) ;;; ;;; A conditional bailout in effect context also has the effect of ;;; adding predicates to the database: ;;; ;;; (begin (foo-bar x) (foo-baz x)) ;;; => (begin ;;; (if (and (struct? x) (eq? (struct-vtable x) )) ;;; (struct-ref x 1) ;;; (throw 'not-a-foo)) ;;; (if (and (struct? x) (eq? (struct-vtable x) )) ;;; (struct-ref x 2) ;;; (throw 'not-a-foo))) ;;; => (begin ;;; (if (and (struct? x) (eq? (struct-vtable x) )) ;;; (struct-ref x 1) ;;; (throw 'not-a-foo)) ;;; (struct-ref x 2)) ;;; ;;; When removing code, we have to ensure that the semantics of the ;;; source program and the residual program are the same. It's easy to ;;; ensure that they have the same value, because those manipulations ;;; are just algebraic, but the tricky thing is to ensure that the ;;; expressions exhibit the same ordering of effects. For that, we use ;;; the effects analysis of (language tree-il effects). We only ;;; eliminate code if the duplicate code commutes with all of the ;;; dominators on the path from the duplicate to the original. ;;; ;;; The implementation uses vhashes as the fundamental data structure. ;;; This can be seen as a form of global value numbering. This ;;; algorithm currently spends most of its time in vhash-assoc. I'm not ;;; sure whether that is due to our bad hash function in Guile 2.0, an ;;; inefficiency in vhashes, or what. Overall though the complexity ;;; should be linear, or N log N -- whatever vhash-assoc's complexity ;;; is. Walking the dominators is nonlinear, but that only happens when ;;; we've actually found a common subexpression so that should be OK. ;;; ;; Logging helpers, as in peval. ;; (define-syntax *logging* (identifier-syntax #f)) ;; (define %logging #f) ;; (define-syntax *logging* (identifier-syntax %logging)) (define-syntax log (syntax-rules (quote) ((log 'event arg ...) (if (and *logging* (or (eq? *logging* #t) (memq 'event *logging*))) (log* 'event arg ...))))) (define (log* event . args) (let ((pp (module-ref (resolve-interface '(ice-9 pretty-print)) 'pretty-print))) (pp `(log ,event . ,args)) (newline) (values))) ;; A pre-pass on the source program to determine the set of assigned ;; lexicals. ;; (define* (build-assigned-var-table exp #:optional (table vlist-null)) (tree-il-fold (lambda (exp res) res) (lambda (exp res) (match exp (($ src name gensym exp) (vhash-consq gensym #t res)) (_ res))) (lambda (exp res) res) table exp)) (define (boolean-valued-primitive? primitive) (or (negate-primitive primitive) (eq? primitive 'not) (let ((chars (symbol->string primitive))) (eqv? (string-ref chars (1- (string-length chars))) #\?)))) (define (boolean-valued-expression? x ctx) (match x (($ _ (? boolean-valued-primitive?)) #t) (($ _ (? boolean?)) #t) (_ (eq? ctx 'test)))) (define* (cse exp) "Eliminate common subexpressions in EXP." (define assigned-lexical? (let ((table (build-assigned-var-table exp))) (lambda (sym) (vhash-assq sym table)))) (define compute-effects (make-effects-analyzer assigned-lexical?)) (define (negate exp ctx) (match exp (($ src x) (make-const src (not x))) (($ src) (make-const src #f)) (($ src test consequent alternate) (make-conditional src test (negate consequent ctx) (negate alternate ctx))) (($ _ 'not ((and x (? (cut boolean-valued-expression? <> ctx))))) x) (($ src (and pred (? negate-primitive)) args) (make-primcall src (negate-primitive pred) args)) (_ (make-primcall #f 'not (list exp))))) (define (bailout? exp) (causes-effects? (compute-effects exp) &definite-bailout)) (define (struct-nfields x) (/ (string-length (symbol->string (struct-layout x))) 2)) (define hash-bits (logcount most-positive-fixnum)) (define hash-depth 4) (define hash-width 3) (define (hash-expression exp) (define (hash-exp exp depth) (define (rotate x bits) (logior (ash x (- bits)) (ash (logand x (1- (ash 1 bits))) (- hash-bits bits)))) (define (mix h1 h2) (logxor h1 (rotate h2 8))) (define (hash-struct s) (let ((len (struct-nfields s)) (h (hashq (struct-vtable s) most-positive-fixnum))) (if (zero? depth) h (let lp ((i (max (- len hash-width) 1)) (h h)) (if (< i len) (lp (1+ i) (mix (hash-exp (struct-ref s i) (1+ depth)) h)) h))))) (define (hash-list l) (let ((h (hashq 'list most-positive-fixnum))) (if (zero? depth) h (let lp ((l l) (width 0) (h h)) (if (< width hash-width) (lp (cdr l) (1+ width) (mix (hash-exp (car l) (1+ depth)) h)) h))))) (cond ((struct? exp) (hash-struct exp)) ((list? exp) (hash-list exp)) (else (hash exp most-positive-fixnum)))) (hash-exp exp 0)) (define (expressions-equal? a b) (cond ((struct? a) (and (struct? b) (eq? (struct-vtable a) (struct-vtable b)) ;; Assume that all structs are tree-il, so we skip over the ;; src slot. (let lp ((n (1- (struct-nfields a)))) (or (zero? n) (and (expressions-equal? (struct-ref a n) (struct-ref b n)) (lp (1- n))))))) ((pair? a) (and (pair? b) (expressions-equal? (car a) (car b)) (expressions-equal? (cdr a) (cdr b)))) (else (equal? a b)))) (define (hasher n) (lambda (x size) (modulo n size))) (define (add-to-db exp effects ctx db) (let ((v (vector exp effects ctx)) (h (hash-expression exp))) (vhash-cons v h db (hasher h)))) (define (control-flow-boundary db) (let ((h (hashq 'lambda most-positive-fixnum))) (vhash-cons 'lambda h db (hasher h)))) (define (find-dominating-expression exp effects ctx db) (define (entry-matches? v1 v2) (match (if (vector? v1) v1 v2) (#(exp* effects* ctx*) (and (expressions-equal? exp exp*) (or (not ctx) (eq? ctx* ctx)))) (_ #f))) (let ((len (vlist-length db)) (h (hash-expression exp))) (and (vhash-assoc #t db entry-matches? (hasher h)) (let lp ((n 0)) (and (< n len) (match (vlist-ref db n) (('lambda . h*) ;; We assume that lambdas can escape and thus be ;; called from anywhere. Thus code inside a lambda ;; only has a dominating expression if it does not ;; depend on any effects. (and (not (depends-on-effects? effects &all-effects)) (lp (1+ n)))) ((#(exp* effects* ctx*) . h*) (log 'walk (unparse-tree-il exp) effects (unparse-tree-il exp*) effects* ctx*) (or (and (= h h*) (or (not ctx) (eq? ctx ctx*)) (expressions-equal? exp exp*)) (and (effects-commute? effects effects*) (lp (1+ n))))))))))) ;; Return #t if EXP is dominated by an instance of itself. In that ;; case, we can exclude *type-check* effects, because the first ;; expression already caused them if needed. (define (has-dominating-effect? exp effects db) (or (constant? effects) (and (effect-free? (exclude-effects effects (logior &zero-values &allocation &type-check))) (find-dominating-expression exp effects #f db)))) (define (find-dominating-test exp effects db) (and (effect-free? (exclude-effects effects (logior &allocation &type-check))) (match exp (($ src val) (if (boolean? val) exp (make-const src (not (not val))))) ;; For (not FOO), try to prove FOO, then negate the result. (($ src 'not (exp*)) (match (find-dominating-test exp* effects db) (($ _ val) (log 'inferring exp (not val)) (make-const src (not val))) (_ #f))) (_ (cond ((find-dominating-expression exp effects #f db) ;; We have an EXP fact, so we infer #t. (log 'inferring exp #t) (make-const (tree-il-src exp) #t)) ((find-dominating-expression (negate exp 'test) effects #f db) ;; We have a (not EXP) fact, so we infer #f. (log 'inferring exp #f) (make-const (tree-il-src exp) #f)) (else ;; Otherwise we don't know. #f)))))) (define (add-to-env exp name sym db env) (let* ((v (vector exp name sym (vlist-length db))) (h (hash-expression exp))) (vhash-cons v h env (hasher h)))) (define (augment-env env names syms exps db) (if (null? names) env (let ((name (car names)) (sym (car syms)) (exp (car exps))) (augment-env (if (or (assigned-lexical? sym) (lexical-ref? exp)) env (add-to-env exp name sym db env)) (cdr names) (cdr syms) (cdr exps) db)))) (define (find-dominating-lexical exp effects env db) (define (entry-matches? v1 v2) (match (if (vector? v1) v1 v2) (#(exp* name sym db) (expressions-equal? exp exp*)) (_ #f))) (define (unroll db base n) (or (zero? n) (match (vlist-ref db base) (('lambda . h*) ;; See note in find-dominating-expression. (and (not (depends-on-effects? effects &all-effects)) (unroll db (1+ base) (1- n)))) ((#(exp* effects* ctx*) . h*) (and (effects-commute? effects effects*) (unroll db (1+ base) (1- n))))))) (let ((h (hash-expression exp))) (and (effect-free? (exclude-effects effects &type-check)) (vhash-assoc exp env entry-matches? (hasher h)) (let ((env-len (vlist-length env)) (db-len (vlist-length db))) (let lp ((n 0) (m 0)) (and (< n env-len) (match (vlist-ref env n) ((#(exp* name sym db-len*) . h*) (and (unroll db m (- db-len db-len*)) (if (and (= h h*) (expressions-equal? exp* exp)) (make-lexical-ref (tree-il-src exp) name sym) (lp (1+ n) (- db-len db-len*)))))))))))) (define (intersection db+ db-) (vhash-fold-right (lambda (k h out) (if (vhash-assoc k db- equal? (hasher h)) (vhash-cons k h out (hasher h)) out)) vlist-null db+)) (define (concat db1 db2) (vhash-fold-right (lambda (k h tail) (vhash-cons k h tail (hasher h))) db2 db1)) (let visit ((exp exp) (db vlist-null) ; dominating expressions: #(exp effects ctx) -> hash (env vlist-null) ; named expressions: #(exp name sym db) -> hash (ctx 'values)) ; test, effect, value, or values (define (parallel-visit exps db env ctx) (let lp ((in exps) (out '()) (db* vlist-null)) (if (pair? in) (call-with-values (lambda () (visit (car in) db env ctx)) (lambda (x db**) (lp (cdr in) (cons x out) (concat db** db*)))) (values (reverse out) db*)))) (define (return exp db*) (let ((effects (compute-effects exp))) (cond ((and (eq? ctx 'effect) (not (lambda-case? exp)) (or (effect-free? (exclude-effects effects (logior &zero-values &allocation))) (has-dominating-effect? exp effects db))) (cond ((void? exp) (values exp db*)) (else (log 'elide ctx (unparse-tree-il exp)) (values (make-void #f) db*)))) ((and (boolean-valued-expression? exp ctx) (find-dominating-test exp effects db)) => (lambda (exp) (log 'propagate-test ctx (unparse-tree-il exp)) (values exp db*))) ((and (eq? ctx 'value) (find-dominating-lexical exp effects env db)) => (lambda (exp) (log 'propagate-value ctx (unparse-tree-il exp)) (values exp db*))) ((and (constant? effects) (memq ctx '(value values))) ;; Adds nothing to the db. (values exp db*)) (else (log 'return ctx effects (unparse-tree-il exp) db*) (values exp (add-to-db exp effects ctx db*)))))) (log 'visit ctx (unparse-tree-il exp) db env) (match exp (($ ) (return exp vlist-null)) (($ ) (return exp vlist-null)) (($ _ _ gensym) (return exp vlist-null)) (($ src name gensym exp) (let*-values (((exp db*) (visit exp db env 'value))) (return (make-lexical-set src name gensym exp) db*))) (($ src names gensyms vals body) (let*-values (((vals db*) (parallel-visit vals db env 'value)) ((body db**) (visit body (concat db* db) (augment-env env names gensyms vals db) ctx))) (return (make-let src names gensyms vals body) (concat db** db*)))) (($ src in-order? names gensyms vals body) (let*-values (((vals db*) (parallel-visit vals db env 'value)) ((body db**) (visit body (concat db* db) (augment-env env names gensyms vals db) ctx))) (return (make-letrec src in-order? names gensyms vals body) (concat db** db*)))) (($ src names gensyms vals body) (let*-values (((vals db*) (parallel-visit vals db env 'value)) ((body db**) (visit body (concat db* db) env ctx))) (return (make-fix src names gensyms vals body) (concat db** db*)))) (($ src producer consumer) (let*-values (((producer db*) (visit producer db env 'values)) ((consumer db**) (visit consumer (concat db* db) env ctx))) (return (make-let-values src producer consumer) (concat db** db*)))) (($ src winder pre body post unwinder) (let*-values (((winder db*) (visit winder db env 'value)) ((db**) db*) ((unwinder db*) (visit unwinder db env 'value)) ((db**) (concat db* db**)) ((pre db*) (visit pre (concat db** db) env 'effect)) ((db**) (concat db* db**)) ((body db*) (visit body (concat db** db) env ctx)) ((db**) (concat db* db**)) ((post db*) (visit post (concat db** db) env 'effect)) ((db**) (concat db* db**))) (return (make-dynwind src winder pre body post unwinder) db**))) (($ src fluids vals body) (let*-values (((fluids db*) (parallel-visit fluids db env 'value)) ((vals db**) (parallel-visit vals db env 'value)) ((body db***) (visit body (concat db** (concat db* db)) env ctx))) (return (make-dynlet src fluids vals body) (concat db*** (concat db** db*))))) (($ src fluid) (let*-values (((fluid db*) (visit fluid db env 'value))) (return (make-dynref src fluid) db*))) (($ src fluid exp) (let*-values (((fluid db*) (visit fluid db env 'value)) ((exp db**) (visit exp db env 'value))) (return (make-dynset src fluid exp) (concat db** db*)))) (($ ) (return exp vlist-null)) (($ ) (return exp vlist-null)) (($ src mod name public? exp) (let*-values (((exp db*) (visit exp db env 'value))) (return (make-module-set src mod name public? exp) db*))) (($ src name exp) (let*-values (((exp db*) (visit exp db env 'value))) (return (make-toplevel-define src name exp) db*))) (($ src name exp) (let*-values (((exp db*) (visit exp db env 'value))) (return (make-toplevel-set src name exp) db*))) (($ ) (return exp vlist-null)) (($ src test consequent alternate) (let*-values (((test db+) (visit test db env 'test)) ((converse db-) (visit (negate test 'test) db env 'test)) ((consequent db++) (visit consequent (concat db+ db) env ctx)) ((alternate db--) (visit alternate (concat db- db) env ctx))) (match (make-conditional src test consequent alternate) (($ _ ($ _ exp)) (if exp (return consequent (concat db++ db+)) (return alternate (concat db-- db-)))) ;; (if FOO A A) => (begin FOO A) (($ src _ ($ _ a) ($ _ (? (cut equal? a <>)))) (visit (make-seq #f test (make-const #f a)) db env ctx)) ;; (if FOO #t #f) => FOO for boolean-valued FOO. (($ src (? (cut boolean-valued-expression? <> ctx)) ($ _ #t) ($ _ #f)) (return test db+)) ;; (if FOO #f #t) => (not FOO) (($ src _ ($ _ #f) ($ _ #t)) (visit (negate test ctx) db env ctx)) ;; Allow "and"-like conditions to accumulate in test context. ((and c ($ _ _ _ ($ _ #f))) (return c (if (eq? ctx 'test) (concat db++ db+) vlist-null))) ((and c ($ _ _ ($ _ #f) _)) (return c (if (eq? ctx 'test) (concat db-- db-) vlist-null))) ;; Conditional bailouts turn expressions into predicates. ((and c ($ _ _ _ (? bailout?))) (return c (concat db++ db+))) ((and c ($ _ _ (? bailout?) _)) (return c (concat db-- db-))) (c (return c (intersection (concat db++ db+) (concat db-- db-))))))) (($ src primitive args) (let*-values (((args db*) (parallel-visit args db env 'value))) (return (make-primcall src primitive args) db*))) (($ src proc args) (let*-values (((proc db*) (visit proc db env 'value)) ((args db**) (parallel-visit args db env 'value))) (return (make-call src proc args) (concat db** db*)))) (($ src meta body) (let*-values (((body _) (visit body (control-flow-boundary db) env 'values))) (return (make-lambda src meta body) vlist-null))) (($ src req opt rest kw inits gensyms body alt) (let*-values (((inits _) (parallel-visit inits db env 'value)) ((body db*) (visit body db env ctx)) ((alt _) (if alt (visit alt db env ctx) (values #f #f)))) (return (make-lambda-case src req opt rest kw inits gensyms body alt) (if alt vlist-null db*)))) (($ src head tail) (let*-values (((head db*) (visit head db env 'effect))) (cond ((void? head) (visit tail db env ctx)) (else (let*-values (((tail db**) (visit tail (concat db* db) env ctx))) (values (make-seq src head tail) (concat db** db*))))))) (($ src tag body handler) (let*-values (((tag db*) (visit tag db env 'value)) ((body _) (visit body (concat db* db) env ctx)) ((handler _) (visit handler (concat db* db) env ctx))) (return (make-prompt src tag body handler) db*))) (($ src tag args tail) (let*-values (((tag db*) (visit tag db env 'value)) ((args db**) (parallel-visit args db env 'value)) ((tail db***) (visit tail db env 'value))) (return (make-abort src tag args tail) (concat db* (concat db** db***))))))))