gnu: r-qtl2: Move to (gnu packages cran).
[jackhill/guix/guix.git] / guix / build / graft.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2018 Ludovic Courtès <ludo@gnu.org>
3 ;;; Copyright © 2016 Mark H Weaver <mhw@netris.org>
4 ;;;
5 ;;; This file is part of GNU Guix.
6 ;;;
7 ;;; GNU Guix is free software; you can redistribute it and/or modify it
8 ;;; under the terms of the GNU General Public License as published by
9 ;;; the Free Software Foundation; either version 3 of the License, or (at
10 ;;; your option) any later version.
11 ;;;
12 ;;; GNU Guix is distributed in the hope that it will be useful, but
13 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;;; GNU General Public License for more details.
16 ;;;
17 ;;; You should have received a copy of the GNU General Public License
18 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
19
20 (define-module (guix build graft)
21 #:use-module (guix build utils)
22 #:use-module (guix build debug-link)
23 #:use-module (rnrs bytevectors)
24 #:use-module (ice-9 vlist)
25 #:use-module (ice-9 match)
26 #:use-module (ice-9 threads)
27 #:use-module (ice-9 binary-ports)
28 #:use-module (srfi srfi-1) ; list library
29 #:use-module (srfi srfi-26) ; cut and cute
30 #:export (replace-store-references
31 rewrite-directory
32 graft))
33
34 ;;; Commentary:
35 ;;;
36 ;;; This module supports "grafts". Grafting a directory means rewriting it,
37 ;;; with references to some specific items replaced by references to other
38 ;;; store items---the grafts.
39 ;;;
40 ;;; This method is used to provide fast security updates as only the leaves of
41 ;;; the dependency graph need to be grafted, even when the security updates
42 ;;; affect a core component such as Bash or libc. It is based on the idea of
43 ;;; 'replace-dependency' implemented by Shea Levy in Nixpkgs.
44 ;;;
45 ;;; Code:
46
47 (define-syntax-rule (define-inline name val)
48 (define-syntax name (identifier-syntax val)))
49
50 (define-inline hash-length 32)
51
52 (define nix-base32-char?
53 (cute char-set-contains?
54 ;; ASCII digits and lower case letters except e o t u
55 (string->char-set "0123456789abcdfghijklmnpqrsvwxyz")
56 <>))
57
58 (define* (replace-store-references input output replacement-table
59 #:optional (store (%store-directory)))
60 "Read data from INPUT, replacing store references according to
61 REPLACEMENT-TABLE, and writing the result to OUTPUT. REPLACEMENT-TABLE is a
62 vhash that maps strings (original hashes) to bytevectors (replacement strings
63 comprising the replacement hash, a dash, and a string).
64
65 Note: We use string keys to work around the fact that guile-2.0 hashes all
66 bytevectors to the same value."
67
68 (define (lookup-replacement s)
69 (match (vhash-assoc s replacement-table)
70 ((origin . replacement)
71 replacement)
72 (#f #f)))
73
74 (define (optimize-u8-predicate pred)
75 (cute vector-ref
76 (list->vector (map pred (iota 256)))
77 <>))
78
79 (define nix-base32-byte?
80 (optimize-u8-predicate
81 (compose nix-base32-char?
82 integer->char)))
83
84 (define (dash? byte) (= byte 45))
85
86 (define request-size (expt 2 20)) ; 1 MiB
87
88 ;; We scan the file for the following 33-byte pattern: 32 bytes of
89 ;; nix-base32 characters followed by a dash. To accommodate large files,
90 ;; we do not read the entire file, but instead work on buffers of up to
91 ;; 'request-size' bytes. To ensure that every 33-byte sequence appears
92 ;; entirely within exactly one buffer, adjacent buffers must overlap,
93 ;; i.e. they must share 32 byte positions. We accomplish this by
94 ;; "ungetting" the last 32 bytes of each buffer before reading the next
95 ;; buffer, unless we know that we've reached the end-of-file.
96 (let ((buffer (make-bytevector request-size)))
97 (let loop ()
98 ;; Note: We avoid 'get-bytevector-n' to work around
99 ;; <http://bugs.gnu.org/17466>.
100 (match (get-bytevector-n! input buffer 0 request-size)
101 ((? eof-object?) 'done)
102 (end
103 ;; We scan the buffer for dashes that might be preceded by a
104 ;; nix-base32 hash. The key optimization here is that whenever we
105 ;; find a NON-nix-base32 character at position 'i', we know that it
106 ;; cannot be part of a hash, so the earliest position where the next
107 ;; hash could start is i+1 with the following dash at position i+33.
108 ;;
109 ;; Since nix-base32 characters comprise only 1/8 of the 256 possible
110 ;; byte values, and exclude some of the most common letters in
111 ;; English text (e t o u), in practice we can advance by 33 positions
112 ;; most of the time.
113 (let scan-from ((i hash-length) (written 0))
114 ;; 'i' is the first position where we look for a dash. 'written'
115 ;; is the number of bytes in the buffer that have already been
116 ;; written.
117 (if (< i end)
118 (let ((byte (bytevector-u8-ref buffer i)))
119 (cond ((and (dash? byte)
120 ;; We've found a dash. Note that we do not know
121 ;; whether the preceeding 32 bytes are nix-base32
122 ;; characters, but we do not need to know. If
123 ;; they are not, the following lookup will fail.
124 (lookup-replacement
125 (string-tabulate (lambda (j)
126 (integer->char
127 (bytevector-u8-ref buffer
128 (+ j (- i hash-length)))))
129 hash-length)))
130 => (lambda (replacement)
131 ;; We've found a hash that needs to be replaced.
132 ;; First, write out all bytes preceding the hash
133 ;; that have not yet been written.
134 (put-bytevector output buffer written
135 (- i hash-length written))
136 ;; Now write the replacement string.
137 (put-bytevector output replacement)
138 ;; Since the byte at position 'i' is a dash,
139 ;; which is not a nix-base32 char, the earliest
140 ;; position where the next hash might start is
141 ;; i+1, and the earliest position where the
142 ;; following dash might start is (+ i 1
143 ;; hash-length). Also, increase the write
144 ;; position to account for REPLACEMENT.
145 (let ((len (bytevector-length replacement)))
146 (scan-from (+ i 1 len)
147 (+ i (- len hash-length))))))
148 ;; If the byte at position 'i' is a nix-base32 char,
149 ;; then the dash we're looking for might be as early as
150 ;; the following byte, so we can only advance by 1.
151 ((nix-base32-byte? byte)
152 (scan-from (+ i 1) written))
153 ;; If the byte at position 'i' is NOT a nix-base32
154 ;; char, then the earliest position where the next hash
155 ;; might start is i+1, with the following dash at
156 ;; position (+ i 1 hash-length).
157 (else
158 (scan-from (+ i 1 hash-length) written))))
159
160 ;; We have finished scanning the buffer. Now we determine how
161 ;; many bytes have not yet been written, and how many bytes to
162 ;; "unget". If 'end' is less than 'request-size' then we read
163 ;; less than we asked for, which indicates that we are at EOF,
164 ;; so we needn't unget anything. Otherwise, we unget up to
165 ;; 'hash-length' bytes (32 bytes). However, we must be careful
166 ;; not to unget bytes that have already been written, because
167 ;; that would cause them to be written again from the next
168 ;; buffer. In practice, this case occurs when a replacement is
169 ;; made near or beyond the end of the buffer. When REPLACEMENT
170 ;; went beyond END, we consume the extra bytes from INPUT.
171 (begin
172 (if (> written end)
173 (get-bytevector-n! input buffer 0 (- written end))
174 (let* ((unwritten (- end written))
175 (unget-size (if (= end request-size)
176 (min hash-length unwritten)
177 0))
178 (write-size (- unwritten unget-size)))
179 (put-bytevector output buffer written write-size)
180 (unget-bytevector input buffer (+ written write-size)
181 unget-size)))
182 (loop)))))))))
183
184 (define (rename-matching-files directory mapping)
185 "Apply MAPPING to the names of all the files in DIRECTORY, where MAPPING is
186 a list of store file name pairs."
187 (let* ((mapping (map (match-lambda
188 ((source . target)
189 (cons (basename source) (basename target))))
190 mapping))
191 (matches (find-files directory
192 (lambda (file stat)
193 (assoc-ref mapping (basename file)))
194 #:directories? #t)))
195
196 ;; XXX: This is not quite correct: if MAPPING contains "foo", and
197 ;; DIRECTORY contains "bar/foo/foo", we first rename "bar/foo" and then
198 ;; "bar/foo/foo" no longer exists so we fail. Oh well, surely that's good
199 ;; enough!
200 (for-each (lambda (file)
201 (let ((target (assoc-ref mapping (basename file))))
202 (rename-file file
203 (string-append (dirname file) "/" target))))
204 matches)))
205
206 (define (exit-on-exception proc)
207 "Return a procedure that wraps PROC so that 'primitive-exit' is called when
208 an exception is caught."
209 (lambda (arg)
210 (catch #t
211 (lambda ()
212 (proc arg))
213 (lambda (key . args)
214 ;; Since ports are not thread-safe as of Guile 2.0, reopen stderr.
215 (let ((port (fdopen 2 "w0")))
216 (print-exception port #f key args)
217 (primitive-exit 1))))))
218
219 ;; We need this as long as we support Guile < 2.0.13.
220 (define* (mkdir-p* dir #:optional (mode #o755))
221 "This is a variant of 'mkdir-p' that works around
222 <http://bugs.gnu.org/24659> by passing MODE explicitly in each 'mkdir' call."
223 (define absolute?
224 (string-prefix? "/" dir))
225
226 (define not-slash
227 (char-set-complement (char-set #\/)))
228
229 (let loop ((components (string-tokenize dir not-slash))
230 (root (if absolute?
231 ""
232 ".")))
233 (match components
234 ((head tail ...)
235 (let ((path (string-append root "/" head)))
236 (catch 'system-error
237 (lambda ()
238 (mkdir path mode)
239 (loop tail path))
240 (lambda args
241 (if (= EEXIST (system-error-errno args))
242 (loop tail path)
243 (apply throw args))))))
244 (() #t))))
245
246 (define* (rewrite-directory directory output mapping
247 #:optional (store (%store-directory)))
248 "Copy DIRECTORY to OUTPUT, replacing strings according to MAPPING, a list of
249 file name pairs."
250
251 (define hash-mapping
252 ;; List of hash/replacement pairs, where the hash is a nix-base32 string
253 ;; and the replacement is a string that includes the replacement's name,
254 ;; like "r837zajjc1q8z9hph4b6860a9c05blyy-openssl-1.0.2j".
255 (let* ((prefix (string-append store "/"))
256 (start (string-length prefix))
257 (end (+ start hash-length)))
258 (define (valid-hash? h)
259 (every nix-base32-char? (string->list h)))
260 (define (hash+rest s)
261 (and (< end (string-length s))
262 (let ((hash (substring s start end))
263 (all (substring s start)))
264 (and (string-prefix? prefix s)
265 (valid-hash? hash)
266 (eqv? #\- (string-ref s end))
267 (list hash all)))))
268
269 (map (match-lambda
270 (((= hash+rest (origin-hash origin-string))
271 .
272 (= hash+rest (replacement-hash replacement-string)))
273 (unless (= (string-length origin-string)
274 (string-length replacement-string))
275 (error "replacement length differs from the original length"
276 origin-string replacement-string))
277 (cons origin-hash (string->utf8 replacement-string)))
278 ((origin . replacement)
279 (error "invalid replacement" origin replacement)))
280 mapping)))
281
282 (define replacement-table
283 (alist->vhash hash-mapping))
284
285 (define prefix-len
286 (string-length directory))
287
288 (define (destination file)
289 (string-append output (string-drop file prefix-len)))
290
291 (define (rewrite-leaf file)
292 (let ((stat (lstat file))
293 (dest (destination file)))
294 (mkdir-p* (dirname dest))
295 (case (stat:type stat)
296 ((symlink)
297 (let ((target (readlink file)))
298 (symlink (call-with-output-string
299 (lambda (output)
300 (replace-store-references (open-input-string target)
301 output replacement-table
302 store)))
303 dest)))
304 ((regular)
305 (call-with-input-file file
306 (lambda (input)
307 (call-with-output-file dest
308 (lambda (output)
309 (replace-store-references input output replacement-table
310 store)
311 (chmod output (stat:perms stat)))))))
312 ((directory)
313 (mkdir-p* dest))
314 (else
315 (error "unsupported file type" stat)))))
316
317 ;; Use 'exit-on-exception' to force an exit upon I/O errors, given that
318 ;; 'n-par-for-each' silently swallows exceptions.
319 ;; See <http://bugs.gnu.org/23581>.
320 (n-par-for-each (parallel-job-count)
321 (exit-on-exception rewrite-leaf)
322 (find-files directory (const #t)
323 #:directories? #t))
324 (rename-matching-files output mapping))
325
326 (define %graft-hooks
327 ;; Default list of hooks run after grafting.
328 (list graft-debug-links))
329
330 (define* (graft old-outputs new-outputs mapping
331 #:key (log-port (current-output-port))
332 (hooks %graft-hooks))
333 "Apply the grafts described by MAPPING on OLD-OUTPUTS, leading to
334 NEW-OUTPUTS. MAPPING must be a list of file name pairs; OLD-OUTPUTS and
335 NEW-OUTPUTS are lists of output name/file name pairs."
336 (for-each (lambda (input output)
337 (format log-port "grafting '~a' -> '~a'...~%" input output)
338 (force-output)
339 (rewrite-directory input output mapping))
340 (match old-outputs
341 (((names . files) ...)
342 files))
343 (match new-outputs
344 (((names . files) ...)
345 files)))
346 (for-each (lambda (hook)
347 (hook old-outputs new-outputs mapping
348 #:log-port log-port))
349 hooks))
350
351 ;;; graft.scm ends here