1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
3 ;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
5 ;;; This file is part of GNU Guix.
7 ;;; GNU Guix is free software; you can redistribute it and/or modify it
8 ;;; under the terms of the GNU General Public License as published by
9 ;;; the Free Software Foundation; either version 3 of the License, or (at
10 ;;; your option) any later version.
12 ;;; GNU Guix is distributed in the hope that it will be useful, but
13 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;;; GNU General Public License for more details.
17 ;;; You should have received a copy of the GNU General Public License
18 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
20 ;;; This houses stuff we do to files when they arrive at the store - resetting
21 ;;; timestamps, deduplicating, etc.
23 (define-module (guix store deduplication)
24 #:use-module (gcrypt hash)
25 #:use-module (guix build utils)
26 #:use-module (guix base16)
27 #:use-module (srfi srfi-11)
28 #:use-module (rnrs io ports)
29 #:use-module (ice-9 ftw)
30 #:use-module (guix serialization)
34 ;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
35 ;; 'port-position' throws to 'out-of-range' when the offset is great than or
36 ;; equal to 2^32: <https://bugs.gnu.org/32161>.
37 (define (counting-wrapper-port output-port)
38 "Return two values: an output port that wraps OUTPUT-PORT, and a thunk to
39 retrieve the number of bytes written to OUTPUT-PORT."
41 (values (make-custom-binary-output-port "counting-wrapper"
42 (lambda (bytes offset count)
43 (put-bytevector output-port bytes
52 (close-port output-port)))
56 (define (nar-sha256 file)
57 "Gives the sha256 hash of a file and the size of the file in nar form."
58 (let*-values (((port get-hash) (open-sha256-port))
59 ((wrapper get-size) (counting-wrapper-port port)))
60 (write-file file wrapper)
61 (force-output wrapper)
63 (let ((hash (get-hash))
68 (define (tempname-in directory)
69 "Gives an unused temporary name under DIRECTORY. Not guaranteed to still be
70 unused by the time you create anything with that name, but a good shot."
71 (let ((const-part (string-append directory "/.tmp-link-"
72 (number->string (getpid)))))
74 (number->string (random most-positive-fixnum) 16)))
75 (if (file-exists? (string-append const-part "-" guess-part))
76 (try (number->string (random most-positive-fixnum) 16))
77 (string-append const-part "-" guess-part)))))
79 (define* (get-temp-link target #:optional (link-prefix (dirname target)))
80 "Like mkstemp!, but instead of creating a new file and giving you the name,
81 it creates a new hardlink to TARGET and gives you the name. Since
82 cross-filesystem hardlinks don't work, the temp link must be created on the
83 same filesystem - where in that filesystem it is can be controlled by
85 (let try ((tempname (tempname-in link-prefix)))
88 (link target tempname)
91 (if (= (system-error-errno args) EEXIST)
92 (try (tempname-in link-prefix))
93 (apply throw args))))))
95 ;; There are 3 main kinds of errors we can get from hardlinking: "Too many
96 ;; things link to this" (EMLINK), "this link already exists" (EEXIST), and
97 ;; "can't fit more stuff in this directory" (ENOSPC).
99 (define* (replace-with-link target to-replace
100 #:key (swap-directory (dirname target)))
101 "Atomically replace the file TO-REPLACE with a link to TARGET. Use
102 SWAP-DIRECTORY as the directory to store temporary hard links. Upon ENOSPC
103 and EMLINK, TO-REPLACE is left unchanged.
105 Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
109 (get-temp-link target swap-directory))
111 ;; We get ENOSPC when we can't fit an additional entry in
113 (if (= ENOSPC (system-error-errno args))
115 (apply throw args)))))
117 ;; If we couldn't create TEMP-LINK, that's OK: just don't do the
118 ;; replacement, which means TO-REPLACE won't be deduplicated.
120 (let* ((parent (dirname to-replace))
121 (stat (stat parent)))
122 (make-file-writable parent)
125 (rename-file temp-link to-replace))
127 (delete-file temp-link)
128 (unless (= EMLINK (system-error-errno args))
129 (apply throw args))))
131 ;; Restore PARENT's mtime and permissions.
132 (set-file-time parent stat)
133 (chmod parent (stat:mode stat)))))
135 (define* (deduplicate path hash #:key (store %store-directory))
136 "Check if a store item with sha256 hash HASH already exists. If so,
137 replace PATH with a hardlink to the already-existing one. If not, register
138 PATH so that future duplicates can hardlink to it. PATH is assumed to be
140 (let* ((links-directory (string-append store "/.links"))
141 (link-file (string-append links-directory "/"
142 (bytevector->base16-string hash))))
143 (mkdir-p links-directory)
144 (if (eq? 'directory (stat:type (lstat path)))
145 ;; Can't hardlink directories, so hardlink their atoms.
146 (for-each (lambda (file)
147 (unless (or (member file '("." ".."))
148 (and (string=? path store)
149 (string=? file ".links")))
150 (let ((file (string-append path "/" file)))
151 (deduplicate file (nar-sha256 file)
154 (if (file-exists? link-file)
155 (replace-with-link link-file path
156 #:swap-directory links-directory)
159 (link path link-file))
161 (let ((errno (system-error-errno args)))
162 (cond ((= errno EEXIST)
163 ;; Someone else put an entry for PATH in
164 ;; LINKS-DIRECTORY before we could. Let's use it.
165 (replace-with-link path link-file
166 #:swap-directory links-directory))
168 ;; There's not enough room in the directory index for
169 ;; more entries in .links, but that's fine: we can
172 (else (apply throw args))))))))))