union: Don't traverse sub-directories only found in one element of the union.
authorLudovic Courtès <ludo@gnu.org>
Mon, 2 Sep 2013 21:03:03 +0000 (23:03 +0200)
committerLudovic Courtès <ludo@gnu.org>
Mon, 2 Sep 2013 21:04:25 +0000 (23:04 +0200)
This significantly reduces I/O when building profiles, especially with
lots of package-specific sub-directories (such as 'share/emacs/24.3',
'texmf', etc.)

* guix/build/union.scm (union-build)[file-tree](others-have-it?): New
  procedure.  Use it in the 'enter?' parameter of 'file-system-fold';
  change 'skip' parameter accordingly.
* tests/union.scm ("union-build"): Ensure that 'include' is a symlink
  and 'bin' is a directory.

guix/build/union.scm
tests/union.scm

index 275746d..077b7fe 100644 (file)
@@ -105,7 +105,22 @@ single leaf."
 the DIRECTORIES."
   (define (file-tree dir)
     ;; Return the contents of DIR as a tree.
-    (match (file-system-fold (const #t)
+
+    (define (others-have-it? subdir)
+      ;; Return #t if other elements of DIRECTORIES have SUBDIR.
+      (let ((subdir (substring subdir (string-length dir))))
+        (any (lambda (other)
+               (and (not (string=? other dir))
+                    (file-exists? (string-append other "/" subdir))))
+             directories)))
+
+    (match (file-system-fold (lambda (subdir stat result) ; enter?
+                               ;; No need to traverse DIR since there's
+                               ;; nothing to union it with.  Thus, we avoid
+                               ;; creating a gazillon symlinks (think
+                               ;; share/emacs/24.3, share/texmf, etc.)
+                               (or (string=? subdir dir)
+                                   (others-have-it? subdir)))
                              (lambda (file stat result) ; leaf
                                (match result
                                  (((siblings ...) rest ...)
@@ -117,7 +132,12 @@ the DIRECTORIES."
                                  (((leaves ...) (siblings ...) rest ...)
                                   `(((,(basename dir) ,@leaves) ,@siblings)
                                     ,@rest))))
-                             (const #f)                 ; skip
+                             (lambda (dir stat result)  ; skip
+                               ;; DIR is not available elsewhere, so treat it
+                               ;; as a leaf.
+                               (match result
+                                 (((siblings ...) rest ...)
+                                  `((,dir ,@siblings) ,@rest))))
                              (lambda (file stat errno result)
                                (format (current-error-port) "union-build: ~a: ~a~%"
                                        file (strerror errno)))
@@ -158,8 +178,9 @@ the DIRECTORIES."
   (mkdir output)
   (let loop ((tree (delete-duplicate-leaves
                     (cons "."
-                          (tree-union (append-map (compose tree-leaves file-tree)
-                                                  directories)))
+                          (tree-union
+                           (append-map (compose tree-leaves file-tree)
+                                       (delete-duplicates directories))))
                     leaf=?
                     resolve-collision))
              (dir  '()))
index 9816882..6287cff 100644 (file)
                 (file-exists? "bin/ld")
                 (file-exists? "lib/libc.so")
                 (directory-exists? "lib/gcc")
-                (file-exists? "include/unistd.h"))))))
+                (file-exists? "include/unistd.h")
+
+                ;; The 'include' sub-directory is only found in
+                ;; glibc-bootstrap, so it should be unified in a
+                ;; straightforward way, without traversing it.
+                (eq? 'symlink (stat:type (lstat "include")))
+
+                ;; Conversely, several inputs have a 'bin' sub-directory, so
+                ;; unifying it requires traversing them all, and creating a
+                ;; new 'bin' sub-directory in the profile.
+                (eq? 'directory (stat:type (lstat "bin"))))))))
 
 (test-end)