1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
3 ;;; Copyright © 2018, 2019 Ludovic Courtès <ludo@gnu.org>
5 ;;; This file is part of GNU Guix.
7 ;;; GNU Guix is free software; you can redistribute it and/or modify it
8 ;;; under the terms of the GNU General Public License as published by
9 ;;; the Free Software Foundation; either version 3 of the License, or (at
10 ;;; your option) any later version.
12 ;;; GNU Guix is distributed in the hope that it will be useful, but
13 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;;; GNU General Public License for more details.
17 ;;; You should have received a copy of the GNU General Public License
18 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
20 ;;; This houses stuff we do to files when they arrive at the store - resetting
21 ;;; timestamps, deduplicating, etc.
23 (define-module (guix store deduplication)
24 #:use-module (gcrypt hash)
25 #:use-module (guix build utils)
26 #:use-module (guix base16)
27 #:use-module (srfi srfi-11)
28 #:use-module (rnrs io ports)
29 #:use-module (ice-9 ftw)
30 #:use-module (guix serialization)
34 ;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
35 ;; 'port-position' throws to 'out-of-range' when the offset is great than or
36 ;; equal to 2^32: <https://bugs.gnu.org/32161>.
37 (define (counting-wrapper-port output-port)
38 "Return two values: an output port that wraps OUTPUT-PORT, and a thunk to
39 retrieve the number of bytes written to OUTPUT-PORT."
41 (values (make-custom-binary-output-port "counting-wrapper"
42 (lambda (bytes offset count)
43 (put-bytevector output-port bytes
52 (close-port output-port)))
56 (define (nar-sha256 file)
57 "Gives the sha256 hash of a file and the size of the file in nar form."
58 (let*-values (((port get-hash) (open-sha256-port))
59 ((wrapper get-size) (counting-wrapper-port port)))
60 (write-file file wrapper)
61 (force-output wrapper)
63 (let ((hash (get-hash))
68 (define (tempname-in directory)
69 "Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
70 unused by the time you create anything with that name, but a good shot."
71 (let ((const-part (string-append directory "/.tmp-link-"
72 (number->string (getpid)))))
74 (number->string (random most-positive-fixnum) 16)))
75 (if (file-exists? (string-append const-part "-" guess-part))
76 (try (number->string (random most-positive-fixnum) 16))
77 (string-append const-part "-" guess-part)))))
79 (define* (get-temp-link target #:optional (link-prefix (dirname target)))
80 "Like mkstemp!, but instead of creating a new file and giving you the name,
81 it creates a new hardlink to TARGET and gives you the name. Since
82 cross-filesystem hardlinks don't work, the temp link must be created on the
83 same filesystem - where in that filesystem it is can be controlled by
85 (let try ((tempname (tempname-in link-prefix)))
88 (link target tempname)
91 (if (= (system-error-errno args) EEXIST)
92 (try (tempname-in link-prefix))
93 (apply throw args))))))
95 ;; There are 3 main kinds of errors we can get from hardlinking: "Too many
96 ;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
97 ;; "can't fit more stuff in this directory" (ENOSPC).
99 (define* (replace-with-link target to-replace
100 #:key (swap-directory (dirname target)))
101 "Atomically replace the file TO-REPLACE with a link to TARGET. Use
102 SWAP-DIRECTORY as the directory to store temporary hard links. Upon ENOSPC
103 and EMLINK, TO-REPLACE is left unchanged.
105 Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
109 (get-temp-link target swap-directory))
111 ;; We get ENOSPC when we can't fit an additional entry in
112 ;; SWAP-DIRECTORY. If it's EMLINK, then TARGET has reached its
113 ;; maximum number of links.
114 (if (memv (system-error-errno args) `(,ENOSPC ,EMLINK))
116 (apply throw args)))))
118 ;; If we couldn't create TEMP-LINK, that's OK: just don't do the
119 ;; replacement, which means TO-REPLACE won't be deduplicated.
121 (let* ((parent (dirname to-replace))
122 (stat (stat parent)))
123 (make-file-writable parent)
126 (rename-file temp-link to-replace))
128 (delete-file temp-link)
129 (unless (= EMLINK (system-error-errno args))
130 (apply throw args))))
132 ;; Restore PARENT's mtime and permissions.
133 (set-file-time parent stat)
134 (chmod parent (stat:mode stat)))))
136 (define* (deduplicate path hash #:key (store %store-directory))
137 "Check if a store item with sha256 hash HASH already exists. If so,
138 replace PATH with a hardlink to the already-existing one. If not, register
139 PATH so that future duplicates can hardlink to it. PATH is assumed to be
141 (let* ((links-directory (string-append store "/.links"))
142 (link-file (string-append links-directory "/"
143 (bytevector->base16-string hash))))
144 (mkdir-p links-directory)
145 (if (eq? 'directory (stat:type (lstat path)))
146 ;; Can't hardlink directories, so hardlink their atoms.
147 (for-each (lambda (file)
148 (unless (or (member file '("." ".."))
149 (and (string=? path store)
150 (string=? file ".links")))
151 (let ((file (string-append path "/" file)))
152 (deduplicate file (nar-sha256 file)
155 (if (file-exists? link-file)
156 (replace-with-link link-file path
157 #:swap-directory links-directory)
160 (link path link-file))
162 (let ((errno (system-error-errno args)))
163 (cond ((= errno EEXIST)
164 ;; Someone else put an entry for PATH in
165 ;; LINKS-DIRECTORY before we could. Let's use it.
166 (replace-with-link path link-file
167 #:swap-directory links-directory))
169 ;; There's not enough room in the directory index for
170 ;; more entries in .links, but that's fine: we can
174 ;; PATH has reached the maximum number of links, but
175 ;; that's OK: we just can't deduplicate it more.
177 (else (apply throw args))))))))))