a777940f86f3cd413f9a3590b4275623944e6ebf
[jackhill/guix/guix.git] / guix / store / deduplication.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
3 ;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
4 ;;;
5 ;;; This file is part of GNU Guix.
6 ;;;
7 ;;; GNU Guix is free software; you can redistribute it and/or modify it
8 ;;; under the terms of the GNU General Public License as published by
9 ;;; the Free Software Foundation; either version 3 of the License, or (at
10 ;;; your option) any later version.
11 ;;;
12 ;;; GNU Guix is distributed in the hope that it will be useful, but
13 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;;; GNU General Public License for more details.
16 ;;;
17 ;;; You should have received a copy of the GNU General Public License
18 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
19
20 ;;; This houses stuff we do to files when they arrive at the store - resetting
21 ;;; timestamps, deduplicating, etc.
22
23 (define-module (guix store deduplication)
24 #:use-module (gcrypt hash)
25 #:use-module (guix build utils)
26 #:use-module (guix base16)
27 #:use-module (srfi srfi-11)
28 #:use-module (rnrs io ports)
29 #:use-module (ice-9 ftw)
30 #:use-module (guix serialization)
31 #:export (nar-sha256
32 deduplicate))
33
34 ;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
35 ;; 'port-position' throws to 'out-of-range' when the offset is great than or
36 ;; equal to 2^32: <https://bugs.gnu.org/32161>.
37 (define (counting-wrapper-port output-port)
38 "Return two values: an output port that wraps OUTPUT-PORT, and a thunk to
39 retrieve the number of bytes written to OUTPUT-PORT."
40 (let ((byte-count 0))
41 (values (make-custom-binary-output-port "counting-wrapper"
42 (lambda (bytes offset count)
43 (put-bytevector output-port bytes
44 offset count)
45 (set! byte-count
46 (+ byte-count count))
47 count)
48 (lambda ()
49 byte-count)
50 #f
51 (lambda ()
52 (close-port output-port)))
53 (lambda ()
54 byte-count))))
55
56 (define (nar-sha256 file)
57 "Gives the sha256 hash of a file and the size of the file in nar form."
58 (let*-values (((port get-hash) (open-sha256-port))
59 ((wrapper get-size) (counting-wrapper-port port)))
60 (write-file file wrapper)
61 (force-output wrapper)
62 (force-output port)
63 (let ((hash (get-hash))
64 (size (get-size)))
65 (close-port wrapper)
66 (values hash size))))
67
68 (define (tempname-in directory)
69 "Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
70 unused by the time you create anything with that name, but a good shot."
71 (let ((const-part (string-append directory "/.tmp-link-"
72 (number->string (getpid)))))
73 (let try ((guess-part
74 (number->string (random most-positive-fixnum) 16)))
75 (if (file-exists? (string-append const-part "-" guess-part))
76 (try (number->string (random most-positive-fixnum) 16))
77 (string-append const-part "-" guess-part)))))
78
79 (define* (get-temp-link target #:optional (link-prefix (dirname target)))
80 "Like mkstemp!, but instead of creating a new file and giving you the name,
81 it creates a new hardlink to TARGET and gives you the name. Since
82 cross-filesystem hardlinks don't work, the temp link must be created on the
83 same filesystem - where in that filesystem it is can be controlled by
84 LINK-PREFIX."
85 (let try ((tempname (tempname-in link-prefix)))
86 (catch 'system-error
87 (lambda ()
88 (link target tempname)
89 tempname)
90 (lambda args
91 (if (= (system-error-errno args) EEXIST)
92 (try (tempname-in link-prefix))
93 (apply throw args))))))
94
95 ;; There are 3 main kinds of errors we can get from hardlinking: "Too many
96 ;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
97 ;; "can't fit more stuff in this directory" (ENOSPC).
98
99 (define* (replace-with-link target to-replace
100 #:key (swap-directory (dirname target)))
101 "Atomically replace the file TO-REPLACE with a link to TARGET. Use
102 SWAP-DIRECTORY as the directory to store temporary hard links. Upon ENOSPC
103 and EMLINK, TO-REPLACE is left unchanged.
104
105 Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
106 (define temp-link
107 (catch 'system-error
108 (lambda ()
109 (get-temp-link target swap-directory))
110 (lambda args
111 ;; We get ENOSPC when we can't fit an additional entry in
112 ;; SWAP-DIRECTORY.
113 (if (= ENOSPC (system-error-errno args))
114 #f
115 (apply throw args)))))
116
117 ;; If we couldn't create TEMP-LINK, that's OK: just don't do the
118 ;; replacement, which means TO-REPLACE won't be deduplicated.
119 (when temp-link
120 (let* ((parent (dirname to-replace))
121 (stat (stat parent)))
122 (make-file-writable parent)
123 (catch 'system-error
124 (lambda ()
125 (rename-file temp-link to-replace))
126 (lambda args
127 (delete-file temp-link)
128 (unless (= EMLINK (system-error-errno args))
129 (apply throw args))))
130
131 ;; Restore PARENT's mtime and permissions.
132 (set-file-time parent stat)
133 (chmod parent (stat:mode stat)))))
134
135 (define* (deduplicate path hash #:key (store %store-directory))
136 "Check if a store item with sha256 hash HASH already exists. If so,
137 replace PATH with a hardlink to the already-existing one. If not, register
138 PATH so that future duplicates can hardlink to it. PATH is assumed to be
139 under STORE."
140 (let* ((links-directory (string-append store "/.links"))
141 (link-file (string-append links-directory "/"
142 (bytevector->base16-string hash))))
143 (mkdir-p links-directory)
144 (if (eq? 'directory (stat:type (lstat path)))
145 ;; Can't hardlink directories, so hardlink their atoms.
146 (for-each (lambda (file)
147 (unless (or (member file '("." ".."))
148 (and (string=? path store)
149 (string=? file ".links")))
150 (let ((file (string-append path "/" file)))
151 (deduplicate file (nar-sha256 file)
152 #:store store))))
153 (scandir path))
154 (if (file-exists? link-file)
155 (replace-with-link link-file path
156 #:swap-directory links-directory)
157 (catch 'system-error
158 (lambda ()
159 (link path link-file))
160 (lambda args
161 (let ((errno (system-error-errno args)))
162 (cond ((= errno EEXIST)
163 ;; Someone else put an entry for PATH in
164 ;; LINKS-DIRECTORY before we could. Let's use it.
165 (replace-with-link path link-file
166 #:swap-directory links-directory))
167 ((= errno ENOSPC)
168 ;; There's not enough room in the directory index for
169 ;; more entries in .links, but that's fine: we can
170 ;; just stop.
171 #f)
172 (else (apply throw args))))))))))