deduplication: Ignore EMLINK.
[jackhill/guix/guix.git] / guix / store / deduplication.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
3 ;;; Copyright © 2018, 2019 Ludovic Courtès <ludo@gnu.org>
4 ;;;
5 ;;; This file is part of GNU Guix.
6 ;;;
7 ;;; GNU Guix is free software; you can redistribute it and/or modify it
8 ;;; under the terms of the GNU General Public License as published by
9 ;;; the Free Software Foundation; either version 3 of the License, or (at
10 ;;; your option) any later version.
11 ;;;
12 ;;; GNU Guix is distributed in the hope that it will be useful, but
13 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;;; GNU General Public License for more details.
16 ;;;
17 ;;; You should have received a copy of the GNU General Public License
18 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
19
20 ;;; This houses stuff we do to files when they arrive at the store - resetting
21 ;;; timestamps, deduplicating, etc.
22
23 (define-module (guix store deduplication)
24 #:use-module (gcrypt hash)
25 #:use-module (guix build utils)
26 #:use-module (guix base16)
27 #:use-module (srfi srfi-11)
28 #:use-module (rnrs io ports)
29 #:use-module (ice-9 ftw)
30 #:use-module (guix serialization)
31 #:export (nar-sha256
32 deduplicate))
33
34 ;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
35 ;; 'port-position' throws to 'out-of-range' when the offset is great than or
36 ;; equal to 2^32: <https://bugs.gnu.org/32161>.
37 (define (counting-wrapper-port output-port)
38 "Return two values: an output port that wraps OUTPUT-PORT, and a thunk to
39 retrieve the number of bytes written to OUTPUT-PORT."
40 (let ((byte-count 0))
41 (values (make-custom-binary-output-port "counting-wrapper"
42 (lambda (bytes offset count)
43 (put-bytevector output-port bytes
44 offset count)
45 (set! byte-count
46 (+ byte-count count))
47 count)
48 (lambda ()
49 byte-count)
50 #f
51 (lambda ()
52 (close-port output-port)))
53 (lambda ()
54 byte-count))))
55
56 (define (nar-sha256 file)
57 "Gives the sha256 hash of a file and the size of the file in nar form."
58 (let*-values (((port get-hash) (open-sha256-port))
59 ((wrapper get-size) (counting-wrapper-port port)))
60 (write-file file wrapper)
61 (force-output wrapper)
62 (force-output port)
63 (let ((hash (get-hash))
64 (size (get-size)))
65 (close-port wrapper)
66 (values hash size))))
67
68 (define (tempname-in directory)
69 "Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
70 unused by the time you create anything with that name, but a good shot."
71 (let ((const-part (string-append directory "/.tmp-link-"
72 (number->string (getpid)))))
73 (let try ((guess-part
74 (number->string (random most-positive-fixnum) 16)))
75 (if (file-exists? (string-append const-part "-" guess-part))
76 (try (number->string (random most-positive-fixnum) 16))
77 (string-append const-part "-" guess-part)))))
78
79 (define* (get-temp-link target #:optional (link-prefix (dirname target)))
80 "Like mkstemp!, but instead of creating a new file and giving you the name,
81 it creates a new hardlink to TARGET and gives you the name. Since
82 cross-filesystem hardlinks don't work, the temp link must be created on the
83 same filesystem - where in that filesystem it is can be controlled by
84 LINK-PREFIX."
85 (let try ((tempname (tempname-in link-prefix)))
86 (catch 'system-error
87 (lambda ()
88 (link target tempname)
89 tempname)
90 (lambda args
91 (if (= (system-error-errno args) EEXIST)
92 (try (tempname-in link-prefix))
93 (apply throw args))))))
94
95 ;; There are 3 main kinds of errors we can get from hardlinking: "Too many
96 ;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
97 ;; "can't fit more stuff in this directory" (ENOSPC).
98
99 (define* (replace-with-link target to-replace
100 #:key (swap-directory (dirname target)))
101 "Atomically replace the file TO-REPLACE with a link to TARGET. Use
102 SWAP-DIRECTORY as the directory to store temporary hard links. Upon ENOSPC
103 and EMLINK, TO-REPLACE is left unchanged.
104
105 Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
106 (define temp-link
107 (catch 'system-error
108 (lambda ()
109 (get-temp-link target swap-directory))
110 (lambda args
111 ;; We get ENOSPC when we can't fit an additional entry in
112 ;; SWAP-DIRECTORY. If it's EMLINK, then TARGET has reached its
113 ;; maximum number of links.
114 (if (memv (system-error-errno args) `(,ENOSPC ,EMLINK))
115 #f
116 (apply throw args)))))
117
118 ;; If we couldn't create TEMP-LINK, that's OK: just don't do the
119 ;; replacement, which means TO-REPLACE won't be deduplicated.
120 (when temp-link
121 (let* ((parent (dirname to-replace))
122 (stat (stat parent)))
123 (make-file-writable parent)
124 (catch 'system-error
125 (lambda ()
126 (rename-file temp-link to-replace))
127 (lambda args
128 (delete-file temp-link)
129 (unless (= EMLINK (system-error-errno args))
130 (apply throw args))))
131
132 ;; Restore PARENT's mtime and permissions.
133 (set-file-time parent stat)
134 (chmod parent (stat:mode stat)))))
135
136 (define* (deduplicate path hash #:key (store %store-directory))
137 "Check if a store item with sha256 hash HASH already exists. If so,
138 replace PATH with a hardlink to the already-existing one. If not, register
139 PATH so that future duplicates can hardlink to it. PATH is assumed to be
140 under STORE."
141 (let* ((links-directory (string-append store "/.links"))
142 (link-file (string-append links-directory "/"
143 (bytevector->base16-string hash))))
144 (mkdir-p links-directory)
145 (if (eq? 'directory (stat:type (lstat path)))
146 ;; Can't hardlink directories, so hardlink their atoms.
147 (for-each (lambda (file)
148 (unless (or (member file '("." ".."))
149 (and (string=? path store)
150 (string=? file ".links")))
151 (let ((file (string-append path "/" file)))
152 (deduplicate file (nar-sha256 file)
153 #:store store))))
154 (scandir path))
155 (if (file-exists? link-file)
156 (replace-with-link link-file path
157 #:swap-directory links-directory)
158 (catch 'system-error
159 (lambda ()
160 (link path link-file))
161 (lambda args
162 (let ((errno (system-error-errno args)))
163 (cond ((= errno EEXIST)
164 ;; Someone else put an entry for PATH in
165 ;; LINKS-DIRECTORY before we could. Let's use it.
166 (replace-with-link path link-file
167 #:swap-directory links-directory))
168 ((= errno ENOSPC)
169 ;; There's not enough room in the directory index for
170 ;; more entries in .links, but that's fine: we can
171 ;; just stop.
172 #f)
173 ((= errno EMLINK)
174 ;; PATH has reached the maximum number of links, but
175 ;; that's OK: we just can't deduplicate it more.
176 #f)
177 (else (apply throw args))))))))))