Merge branch 'master' into staging
[jackhill/guix/guix.git] / guix / serialization.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
3 ;;;
4 ;;; This file is part of GNU Guix.
5 ;;;
6 ;;; GNU Guix is free software; you can redistribute it and/or modify it
7 ;;; under the terms of the GNU General Public License as published by
8 ;;; the Free Software Foundation; either version 3 of the License, or (at
9 ;;; your option) any later version.
10 ;;;
11 ;;; GNU Guix is distributed in the hope that it will be useful, but
12 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;;; GNU General Public License for more details.
15 ;;;
16 ;;; You should have received a copy of the GNU General Public License
17 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
18
19 (define-module (guix serialization)
20 #:use-module (guix combinators)
21 #:use-module (rnrs bytevectors)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-26)
24 #:use-module (srfi srfi-34)
25 #:use-module (srfi srfi-35)
26 #:use-module (ice-9 binary-ports)
27 #:use-module ((ice-9 rdelim) #:prefix rdelim:)
28 #:use-module (ice-9 match)
29 #:use-module (ice-9 ftw)
30 #:export (write-int read-int
31 write-long-long read-long-long
32 write-padding
33 write-bytevector write-string
34 read-string read-latin1-string read-maybe-utf8-string
35 write-string-list read-string-list
36 write-string-pairs
37 write-store-path read-store-path
38 write-store-path-list read-store-path-list
39 (dump . dump-port*)
40
41 &nar-error
42 nar-error?
43 nar-error-port
44 nar-error-file
45
46 &nar-read-error
47 nar-read-error?
48 nar-read-error-token
49
50 write-file
51 write-file-tree
52 fold-archive
53 restore-file))
54
55 ;;; Comment:
56 ;;;
57 ;;; Serialization procedures used by the RPCs and the Nar format. This module
58 ;;; is for internal consumption.
59 ;;;
60 ;;; Code:
61
62 ;; Similar to serialize.cc in Nix.
63
64 (define-condition-type &nar-error &error ; XXX: inherit from &store-error ?
65 nar-error?
66 (file nar-error-file) ; file we were restoring, or #f
67 (port nar-error-port)) ; port from which we read
68
69 (define currently-restored-file
70 ;; Name of the file being restored. Used internally for error reporting.
71 (make-parameter #f))
72
73
74 (define (get-bytevector-n* port count)
75 (let ((bv (get-bytevector-n port count)))
76 (when (or (eof-object? bv)
77 (< (bytevector-length bv) count))
78 (raise (condition (&nar-error
79 (file (currently-restored-file))
80 (port port)))))
81 bv))
82
83 (define (write-int n p)
84 (let ((b (make-bytevector 8 0)))
85 (bytevector-u32-set! b 0 n (endianness little))
86 (put-bytevector p b)))
87
88 (define (read-int p)
89 (let ((b (get-bytevector-n* p 8)))
90 (bytevector-u32-ref b 0 (endianness little))))
91
92 (define (write-long-long n p)
93 (let ((b (make-bytevector 8 0)))
94 (bytevector-u64-set! b 0 n (endianness little))
95 (put-bytevector p b)))
96
97 (define (read-long-long p)
98 (let ((b (get-bytevector-n* p 8)))
99 (bytevector-u64-ref b 0 (endianness little))))
100
101 (define write-padding
102 (let ((zero (make-bytevector 8 0)))
103 (lambda (n p)
104 (let ((m (modulo n 8)))
105 (or (zero? m)
106 (put-bytevector p zero 0 (- 8 m)))))))
107
108 (define* (write-bytevector s p
109 #:optional (l (bytevector-length s)))
110 (let* ((m (modulo l 8))
111 (b (make-bytevector (+ 8 l (if (zero? m) 0 (- 8 m))))))
112 (bytevector-u32-set! b 0 l (endianness little))
113 (bytevector-copy! s 0 b 8 l)
114 (put-bytevector p b)))
115
116 (define (write-string s p)
117 (write-bytevector (string->utf8 s) p))
118
119 (define (read-byte-string p)
120 (let* ((len (read-int p))
121 (m (modulo len 8))
122 (bv (get-bytevector-n* p len)))
123 (or (zero? m)
124 (get-bytevector-n* p (- 8 m)))
125 bv))
126
127 (define (read-string p)
128 (utf8->string (read-byte-string p)))
129
130 (define (read-latin1-string p)
131 "Read an ISO-8859-1 string from P."
132 ;; Note: do not use 'get-string-n' to work around Guile bug
133 ;; <http://bugs.gnu.org/19621>. See <http://bugs.gnu.org/19610> for
134 ;; a discussion.
135 (let ((bv (read-byte-string p)))
136 ;; XXX: Rewrite using (ice-9 iconv).
137 (list->string (map integer->char (bytevector->u8-list bv)))))
138
139 (define (read-maybe-utf8-string p)
140 "Read a serialized string from port P. Attempt to decode it as UTF-8 and
141 substitute invalid byte sequences with question marks. This is a
142 \"permissive\" UTF-8 decoder."
143 ;; XXX: We rely on the port's decoding mechanism to do permissive decoding
144 ;; and substitute invalid byte sequences with question marks, but this is
145 ;; not very efficient. Eventually Guile may provide a lightweight
146 ;; permissive UTF-8 decoder.
147 (let* ((bv (read-byte-string p))
148 (port (open-bytevector-input-port bv)))
149 (set-port-encoding! port "UTF-8")
150 (set-port-conversion-strategy! port 'substitute)
151 (rdelim:read-string port)))
152
153 (define (write-string-list l p)
154 (write-int (length l) p)
155 (for-each (cut write-string <> p) l))
156
157 (define (write-string-pairs l p)
158 (write-int (length l) p)
159 (for-each (match-lambda
160 ((first . second)
161 (write-string first p)
162 (write-string second p)))
163 l))
164
165 (define (read-string-list p)
166 (let ((len (read-int p)))
167 (unfold (cut >= <> len)
168 (lambda (i)
169 (read-string p))
170 1+
171 0)))
172
173 (define (write-store-path f p)
174 (write-string f p)) ; TODO: assert path
175
176 (define (read-store-path p)
177 (read-string p)) ; TODO: assert path
178
179 (define write-store-path-list write-string-list)
180 (define read-store-path-list read-string-list)
181
182 \f
183 (define-condition-type &nar-read-error &nar-error
184 nar-read-error?
185 (token nar-read-error-token)) ; faulty token, or #f
186
187
188 (define (dump in out size)
189 "Copy SIZE bytes from IN to OUT."
190 (define buf-size 65536)
191 (define buf (make-bytevector buf-size))
192
193 (let loop ((left size))
194 (if (<= left 0)
195 0
196 (let ((read (get-bytevector-n! in buf 0 (min left buf-size))))
197 (if (eof-object? read)
198 left
199 (begin
200 (put-bytevector out buf 0 read)
201 (loop (- left read))))))))
202
203 (define (write-contents-from-port input output size)
204 "Write SIZE bytes from port INPUT to port OUTPUT."
205 (write-string "contents" output)
206 (write-long-long size output)
207 ;; Use 'sendfile' when both OUTPUT and INPUT are file ports.
208 (if (and (file-port? output) (file-port? input))
209 (sendfile output input size 0)
210 (dump input output size))
211 (write-padding size output))
212
213 (define (read-file-type port)
214 "Read the file type tag from PORT, and return either 'regular or
215 'executable."
216 (match (read-string port)
217 ("contents"
218 'regular)
219 ("executable"
220 (match (list (read-string port) (read-string port))
221 (("" "contents") 'executable)
222 (x (raise
223 (condition (&message
224 (message "unexpected executable file marker"))
225 (&nar-read-error (port port)
226 (file #f)
227 (token x)))))))
228 (x
229 (raise
230 (condition (&message (message "unsupported nar file type"))
231 (&nar-read-error (port port) (file #f) (token x)))))))
232
233 (define %archive-version-1
234 ;; Magic cookie for Nix archives.
235 "nix-archive-1")
236
237 (define* (write-file file port
238 #:key (select? (const #t)))
239 "Write the contents of FILE to PORT in Nar format, recursing into
240 sub-directories of FILE as needed. For each directory entry, call (SELECT?
241 FILE STAT), where FILE is the entry's absolute file name and STAT is the
242 result of 'lstat'; exclude entries for which SELECT? does not return true."
243 (write-file-tree file port
244 #:file-type+size
245 (lambda (file)
246 (let* ((stat (lstat file))
247 (size (stat:size stat)))
248 (case (stat:type stat)
249 ((directory)
250 (values 'directory size))
251 ((regular)
252 (values (if (zero? (logand (stat:mode stat)
253 #o100))
254 'regular
255 'executable)
256 size))
257 (else
258 (values (stat:type stat) size))))) ;bah!
259 #:file-port (cut open-file <> "r0b")
260 #:symlink-target readlink
261
262 #:directory-entries
263 (lambda (directory)
264 ;; 'scandir' defaults to 'string-locale<?' to sort files,
265 ;; but this happens to be case-insensitive (at least in
266 ;; 'en_US' locale on libc 2.18.) Conversely, we want
267 ;; files to be sorted in a case-sensitive fashion.
268 (define basenames
269 (scandir directory (negate (cut member <> '("." "..")))
270 string<?))
271
272 (filter-map (lambda (base)
273 (let ((file (string-append directory
274 "/" base)))
275 (and (select? file (lstat file))
276 base)))
277 basenames))
278
279 ;; The 'scandir' call above gives us filtered and sorted
280 ;; entries, so no post-processing is needed.
281 #:postprocess-entries identity))
282
283 (define (filter/sort-directory-entries lst)
284 "Remove dot and dot-dot entries from LST, and sort it in lexicographical
285 order."
286 (delete-duplicates
287 (sort (remove (cute member <> '("." "..")) lst)
288 string<?)
289 string=?))
290
291 (define* (write-file-tree file port
292 #:key
293 file-type+size
294 file-port
295 symlink-target
296 directory-entries
297 (postprocess-entries filter/sort-directory-entries))
298 "Write the contents of FILE to PORT in Nar format, recursing into
299 sub-directories of FILE as needed.
300
301 This procedure does not make any file-system I/O calls. Instead, it calls the
302 user-provided FILE-TYPE+SIZE, FILE-PORT, SYMLINK-TARGET, and DIRECTORY-ENTRIES
303 procedures, which roughly correspond to 'lstat', 'readlink', and 'scandir'.
304 POSTPROCESS-ENTRIES ensures that directory entries are valid; leave it as-is
305 unless you know that DIRECTORY-ENTRIES provide filtered and sorted entries, in
306 which case you can use 'identity'."
307 (define p port)
308
309 (write-string %archive-version-1 p)
310
311 (let dump ((f file))
312 (define-values (type size)
313 (file-type+size f))
314
315 (write-string "(" p)
316 (case type
317 ((regular executable)
318 (write-string "type" p)
319 (write-string "regular" p)
320 (when (eq? 'executable type)
321 (write-string "executable" p)
322 (write-string "" p))
323 (let ((input (file-port f)))
324 (dynamic-wind
325 (const #t)
326 (lambda ()
327 (write-contents-from-port input p size))
328 (lambda ()
329 (close-port input)))))
330 ((directory)
331 (write-string "type" p)
332 (write-string "directory" p)
333 (let ((entries (postprocess-entries (directory-entries f))))
334 (for-each (lambda (e)
335 (let* ((f (string-append f "/" e)))
336 (write-string "entry" p)
337 (write-string "(" p)
338 (write-string "name" p)
339 (write-string e p)
340 (write-string "node" p)
341 (dump f)
342 (write-string ")" p)))
343 entries)))
344 ((symlink)
345 (write-string "type" p)
346 (write-string "symlink" p)
347 (write-string "target" p)
348 (write-string (symlink-target f) p))
349 (else
350 (raise (condition (&message (message "unsupported file type"))
351 (&nar-error (file f) (port port))))))
352 (write-string ")" p)))
353
354 (define port-conversion-strategy
355 (fluid->parameter %default-port-conversion-strategy))
356
357 (define (fold-archive proc seed port file)
358 "Read a file (possibly a directory structure) in Nar format from PORT. Call
359 PROC on each file or directory read from PORT using:
360
361 (PROC FILE TYPE CONTENTS RESULT)
362
363 using SEED as the first RESULT. TYPE is a symbol like 'regular, and CONTENTS
364 depends on TYPE."
365 (parameterize ((currently-restored-file file)
366
367 ;; Error out if we can convert file names to the current
368 ;; locale. (XXX: We'd prefer UTF-8 encoding for file names
369 ;; regardless of the locale, but that's what Guile gives us
370 ;; so far.)
371 (port-conversion-strategy 'error))
372 (let ((signature (read-string port)))
373 (unless (equal? signature %archive-version-1)
374 (raise
375 (condition (&message (message "invalid nar signature"))
376 (&nar-read-error (port port)
377 (token signature)
378 (file #f))))))
379
380 (let read ((file file)
381 (result seed))
382 (define (read-eof-marker)
383 (match (read-string port)
384 (")" #t)
385 (x (raise
386 (condition
387 (&message (message "invalid nar end-of-file marker"))
388 (&nar-read-error (port port) (file file) (token x)))))))
389
390 (currently-restored-file file)
391
392 (match (list (read-string port) (read-string port) (read-string port))
393 (("(" "type" "regular")
394 (let* ((type (read-file-type port))
395 (size (read-long-long port))
396
397 ;; The caller must read exactly SIZE bytes from PORT.
398 (result (proc file type `(,port . ,size) result)))
399 (let ((m (modulo size 8)))
400 (unless (zero? m)
401 (get-bytevector-n* port (- 8 m))))
402 (read-eof-marker)
403 result))
404 (("(" "type" "symlink")
405 (match (list (read-string port) (read-string port))
406 (("target" target)
407 (let ((result (proc file 'symlink target result)))
408 (read-eof-marker)
409 result))
410 (x (raise
411 (condition
412 (&message (message "invalid symlink tokens"))
413 (&nar-read-error (port port) (file file) (token x)))))))
414 (("(" "type" "directory")
415 (let ((dir file))
416 (let loop ((prefix (read-string port))
417 (result (proc file 'directory #f result)))
418 (match prefix
419 ("entry"
420 (match (list (read-string port)
421 (read-string port) (read-string port)
422 (read-string port))
423 (("(" "name" file "node")
424 (let ((result (read (string-append dir "/" file) result)))
425 (match (read-string port)
426 (")" #f)
427 (x
428 (raise
429 (condition
430 (&message
431 (message "unexpected directory entry termination"))
432 (&nar-read-error (port port)
433 (file file)
434 (token x))))))
435 (loop (read-string port) result)))))
436 (")" result) ;done with DIR
437 (x
438 (raise
439 (condition
440 (&message (message "unexpected directory inter-entry marker"))
441 (&nar-read-error (port port) (file file) (token x)))))))))
442 (x
443 (raise
444 (condition
445 (&message (message "unsupported nar entry type"))
446 (&nar-read-error (port port) (file file) (token x)))))))))
447
448 (define (restore-file port file)
449 "Read a file (possibly a directory structure) in Nar format from PORT.
450 Restore it as FILE."
451 (fold-archive (lambda (file type content result)
452 (match type
453 ('directory
454 (mkdir file))
455 ('symlink
456 (symlink content file))
457 ((or 'regular 'executable)
458 (match content
459 ((input . size)
460 (call-with-output-file file
461 (lambda (output)
462 (dump input output size)
463 (when (eq? type 'executable)
464 (chmod output #o755)))))))))
465 #t
466 port
467 file))
468
469 ;;; Local Variables:
470 ;;; eval: (put 'call-with-binary-input-file 'scheme-indent-function 1)
471 ;;; End:
472
473 ;;; serialization.scm ends here