licenses: Add Free Art License 1.3.
[jackhill/guix/guix.git] / guix / serialization.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
3 ;;;
4 ;;; This file is part of GNU Guix.
5 ;;;
6 ;;; GNU Guix is free software; you can redistribute it and/or modify it
7 ;;; under the terms of the GNU General Public License as published by
8 ;;; the Free Software Foundation; either version 3 of the License, or (at
9 ;;; your option) any later version.
10 ;;;
11 ;;; GNU Guix is distributed in the hope that it will be useful, but
12 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;;; GNU General Public License for more details.
15 ;;;
16 ;;; You should have received a copy of the GNU General Public License
17 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
18
19 (define-module (guix serialization)
20 #:use-module (guix combinators)
21 #:use-module (rnrs bytevectors)
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-26)
24 #:use-module (srfi srfi-34)
25 #:use-module (srfi srfi-35)
26 #:use-module (ice-9 binary-ports)
27 #:use-module ((ice-9 rdelim) #:prefix rdelim:)
28 #:use-module (ice-9 match)
29 #:use-module (ice-9 ftw)
30 #:use-module (system foreign)
31 #:export (write-int read-int
32 write-long-long read-long-long
33 write-padding
34 write-bytevector write-string
35 read-string read-latin1-string read-maybe-utf8-string
36 write-string-list read-string-list
37 write-string-pairs read-string-pairs
38 write-store-path read-store-path
39 write-store-path-list read-store-path-list
40 (dump . dump-port*)
41
42 &nar-error
43 nar-error?
44 nar-error-port
45 nar-error-file
46
47 &nar-read-error
48 nar-read-error?
49 nar-read-error-token
50
51 write-file
52 write-file-tree
53 fold-archive
54 restore-file
55 dump-file))
56
57 ;;; Comment:
58 ;;;
59 ;;; Serialization procedures used by the RPCs and the Nar format. This module
60 ;;; is for internal consumption.
61 ;;;
62 ;;; Code:
63
64 ;; Similar to serialize.cc in Nix.
65
66 (define-condition-type &nar-error &error ; XXX: inherit from &store-error ?
67 nar-error?
68 (file nar-error-file) ; file we were restoring, or #f
69 (port nar-error-port)) ; port from which we read
70
71 (define currently-restored-file
72 ;; Name of the file being restored. Used internally for error reporting.
73 (make-parameter #f))
74
75
76 (define (get-bytevector-n* port count)
77 (let ((bv (get-bytevector-n port count)))
78 (when (or (eof-object? bv)
79 (< (bytevector-length bv) count))
80 (raise (condition (&nar-error
81 (file (currently-restored-file))
82 (port port)))))
83 bv))
84
85 (define (sub-bytevector bv len)
86 "Return a bytevector that aliases the first LEN bytes of BV."
87 (define max (bytevector-length bv))
88 (cond ((= len max) bv)
89 ((< len max)
90 ;; Yes, this is safe because the result of each conversion procedure
91 ;; has its life cycle synchronized with that of its argument.
92 (pointer->bytevector (bytevector->pointer bv) len))
93 (else
94 (error "sub-bytevector called to get a super bytevector"))))
95
96 (define (write-int n p)
97 (let ((b (make-bytevector 8 0)))
98 (bytevector-u32-set! b 0 n (endianness little))
99 (put-bytevector p b)))
100
101 (define (read-int p)
102 (let ((b (get-bytevector-n* p 8)))
103 (bytevector-u32-ref b 0 (endianness little))))
104
105 (define (write-long-long n p)
106 (let ((b (make-bytevector 8 0)))
107 (bytevector-u64-set! b 0 n (endianness little))
108 (put-bytevector p b)))
109
110 (define (read-long-long p)
111 (let ((b (get-bytevector-n* p 8)))
112 (bytevector-u64-ref b 0 (endianness little))))
113
114 (define write-padding
115 (let ((zero (make-bytevector 8 0)))
116 (lambda (n p)
117 (let ((m (modulo n 8)))
118 (or (zero? m)
119 (put-bytevector p zero 0 (- 8 m)))))))
120
121 (define* (write-bytevector s p
122 #:optional (l (bytevector-length s)))
123 (let* ((m (modulo l 8))
124 (b (make-bytevector (+ 8 l (if (zero? m) 0 (- 8 m))))))
125 (bytevector-u32-set! b 0 l (endianness little))
126 (bytevector-copy! s 0 b 8 l)
127 (put-bytevector p b)))
128
129 (define (write-string s p)
130 (write-bytevector (string->utf8 s) p))
131
132 (define (read-byte-string p)
133 (let* ((len (read-int p))
134 (m (modulo len 8))
135 (pad (if (zero? m) 0 (- 8 m)))
136 (bv (get-bytevector-n* p (+ len pad))))
137 (sub-bytevector bv len)))
138
139 (define (read-string p)
140 (utf8->string (read-byte-string p)))
141
142 (define (read-latin1-string p)
143 "Read an ISO-8859-1 string from P."
144 ;; Note: do not use 'get-string-n' to work around Guile bug
145 ;; <http://bugs.gnu.org/19621>. See <http://bugs.gnu.org/19610> for
146 ;; a discussion.
147 (let ((bv (read-byte-string p)))
148 ;; XXX: Rewrite using (ice-9 iconv).
149 (list->string (map integer->char (bytevector->u8-list bv)))))
150
151 (define (read-maybe-utf8-string p)
152 "Read a serialized string from port P. Attempt to decode it as UTF-8 and
153 substitute invalid byte sequences with question marks. This is a
154 \"permissive\" UTF-8 decoder."
155 ;; XXX: We rely on the port's decoding mechanism to do permissive decoding
156 ;; and substitute invalid byte sequences with question marks, but this is
157 ;; not very efficient. Eventually Guile may provide a lightweight
158 ;; permissive UTF-8 decoder.
159 (let* ((bv (read-byte-string p))
160 (port (open-bytevector-input-port bv)))
161 (set-port-encoding! port "UTF-8")
162 (set-port-conversion-strategy! port 'substitute)
163 (rdelim:read-string port)))
164
165 (define (write-string-list l p)
166 (write-int (length l) p)
167 (for-each (cut write-string <> p) l))
168
169 (define (read-string-list p)
170 (let ((len (read-int p)))
171 (unfold (cut >= <> len)
172 (lambda (i)
173 (read-string p))
174 1+
175 0)))
176
177 (define (write-string-pairs l p)
178 (write-int (length l) p)
179 (for-each (match-lambda
180 ((first . second)
181 (write-string first p)
182 (write-string second p)))
183 l))
184
185 (define (read-string-pairs p)
186 (let ((len (read-int p)))
187 (unfold (cut >= <> len)
188 (lambda (i)
189 (cons (read-string p) (read-string p)))
190 1+
191 0)))
192
193 (define (write-store-path f p)
194 (write-string f p)) ; TODO: assert path
195
196 (define (read-store-path p)
197 (read-string p)) ; TODO: assert path
198
199 (define write-store-path-list write-string-list)
200 (define read-store-path-list read-string-list)
201
202 (define-syntax write-literal-strings
203 (lambda (s)
204 "Write the given literal strings to PORT in an optimized fashion, without
205 any run-time allocations or computations."
206 (define (padding len)
207 (let ((m (modulo len 8)))
208 (if (zero? m)
209 0
210 (- 8 m))))
211
212 (syntax-case s ()
213 ((_ port strings ...)
214 (let* ((bytes (map string->utf8 (syntax->datum #'(strings ...))))
215 (len (fold (lambda (bv size)
216 (+ size 8 (bytevector-length bv)
217 (padding (bytevector-length bv))))
218 0
219 bytes))
220 (bv (make-bytevector len))
221 (zeros (make-bytevector 8 0)))
222 (fold (lambda (str offset)
223 (let ((len (bytevector-length str)))
224 (bytevector-u32-set! bv offset len (endianness little))
225 (bytevector-copy! str 0 bv (+ 8 offset) len)
226 (bytevector-copy! zeros 0 bv (+ 8 offset len)
227 (padding len))
228 (+ offset 8 len (padding len))))
229 0
230 bytes)
231 #`(put-bytevector port #,bv))))))
232
233 \f
234 (define-condition-type &nar-read-error &nar-error
235 nar-read-error?
236 (token nar-read-error-token)) ; faulty token, or #f
237
238
239 (define (dump in out size)
240 "Copy SIZE bytes from IN to OUT."
241 (define buf-size 65536)
242 (define buf (make-bytevector buf-size))
243
244 (let loop ((left size))
245 (if (<= left 0)
246 0
247 (let ((read (get-bytevector-n! in buf 0 (min left buf-size))))
248 (if (eof-object? read)
249 left
250 (begin
251 (put-bytevector out buf 0 read)
252 (loop (- left read))))))))
253
254 (define (write-contents-from-port input output size)
255 "Write SIZE bytes from port INPUT to port OUTPUT."
256 (write-string "contents" output)
257 (write-long-long size output)
258 ;; Use 'sendfile' when both OUTPUT and INPUT are file ports.
259 (if (and (file-port? output) (file-port? input))
260 (sendfile output input size 0)
261 (dump input output size))
262 (write-padding size output))
263
264 (define (read-file-type port)
265 "Read the file type tag from PORT, and return either 'regular or
266 'executable."
267 (match (read-string port)
268 ("contents"
269 'regular)
270 ("executable"
271 (match (list (read-string port) (read-string port))
272 (("" "contents") 'executable)
273 (x (raise
274 (condition (&message
275 (message "unexpected executable file marker"))
276 (&nar-read-error (port port)
277 (file #f)
278 (token x)))))))
279 (x
280 (raise
281 (condition (&message (message "unsupported nar file type"))
282 (&nar-read-error (port port) (file #f) (token x)))))))
283
284 (define %archive-version-1
285 ;; Magic cookie for Nix archives.
286 "nix-archive-1")
287
288 (define* (write-file file port
289 #:key (select? (const #t)))
290 "Write the contents of FILE to PORT in Nar format, recursing into
291 sub-directories of FILE as needed. For each directory entry, call (SELECT?
292 FILE STAT), where FILE is the entry's absolute file name and STAT is the
293 result of 'lstat'; exclude entries for which SELECT? does not return true."
294 (write-file-tree file port
295 #:file-type+size
296 (lambda (file)
297 (let* ((stat (lstat file))
298 (size (stat:size stat)))
299 (case (stat:type stat)
300 ((directory)
301 (values 'directory size))
302 ((regular)
303 (values (if (zero? (logand (stat:mode stat)
304 #o100))
305 'regular
306 'executable)
307 size))
308 (else
309 (values (stat:type stat) size))))) ;bah!
310 #:file-port (cut open-file <> "r0b")
311 #:symlink-target readlink
312
313 #:directory-entries
314 (lambda (directory)
315 ;; 'scandir' defaults to 'string-locale<?' to sort files,
316 ;; but this happens to be case-insensitive (at least in
317 ;; 'en_US' locale on libc 2.18.) Conversely, we want
318 ;; files to be sorted in a case-sensitive fashion.
319 (define basenames
320 (scandir directory (negate (cut member <> '("." "..")))
321 string<?))
322
323 (filter-map (lambda (base)
324 (let ((file (string-append directory
325 "/" base)))
326 (and (select? file (lstat file))
327 base)))
328 basenames))
329
330 ;; The 'scandir' call above gives us filtered and sorted
331 ;; entries, so no post-processing is needed.
332 #:postprocess-entries identity))
333
334 (define (filter/sort-directory-entries lst)
335 "Remove dot and dot-dot entries from LST, and sort it in lexicographical
336 order."
337 (delete-duplicates
338 (sort (remove (cute member <> '("." "..")) lst)
339 string<?)
340 string=?))
341
342 (define* (write-file-tree file port
343 #:key
344 file-type+size
345 file-port
346 symlink-target
347 directory-entries
348 (postprocess-entries filter/sort-directory-entries))
349 "Write the contents of FILE to PORT in Nar format, recursing into
350 sub-directories of FILE as needed.
351
352 This procedure does not make any file-system I/O calls. Instead, it calls the
353 user-provided FILE-TYPE+SIZE, FILE-PORT, SYMLINK-TARGET, and DIRECTORY-ENTRIES
354 procedures, which roughly correspond to 'lstat', 'readlink', and 'scandir'.
355 POSTPROCESS-ENTRIES ensures that directory entries are valid; leave it as-is
356 unless you know that DIRECTORY-ENTRIES provide filtered and sorted entries, in
357 which case you can use 'identity'."
358 (define p port)
359
360 (write-string %archive-version-1 p)
361
362 (let dump ((f file))
363 (define-values (type size)
364 (file-type+size f))
365
366 (write-literal-strings p "(")
367 (case type
368 ((regular executable)
369 (write-literal-strings p "type" "regular")
370 (when (eq? 'executable type)
371 (write-literal-strings p "executable" ""))
372 (let ((input (file-port f)))
373 (dynamic-wind
374 (const #t)
375 (lambda ()
376 (write-contents-from-port input p size))
377 (lambda ()
378 (close-port input)))))
379 ((directory)
380 (write-literal-strings p "type" "directory")
381 (let ((entries (postprocess-entries (directory-entries f))))
382 (for-each (lambda (e)
383 (let* ((f (string-append f "/" e)))
384 (write-literal-strings p "entry" "(" "name")
385 (write-string e p)
386 (write-literal-strings p "node")
387 (dump f)
388 (write-literal-strings p ")")))
389 entries)))
390 ((symlink)
391 (write-literal-strings p "type" "symlink" "target")
392 (write-string (symlink-target f) p))
393 (else
394 (raise (condition (&message (message "unsupported file type"))
395 (&nar-error (file f) (port port))))))
396 (write-literal-strings p ")")))
397
398 (define port-conversion-strategy
399 (fluid->parameter %default-port-conversion-strategy))
400
401 (define (fold-archive proc seed port file)
402 "Read a file (possibly a directory structure) in Nar format from PORT. Call
403 PROC on each file or directory read from PORT using:
404
405 (PROC FILE TYPE CONTENTS RESULT)
406
407 using SEED as the first RESULT. TYPE is a symbol like 'regular, and CONTENTS
408 depends on TYPE."
409 (parameterize ((currently-restored-file file)
410
411 ;; Error out if we can convert file names to the current
412 ;; locale. (XXX: We'd prefer UTF-8 encoding for file names
413 ;; regardless of the locale, but that's what Guile gives us
414 ;; so far.)
415 (port-conversion-strategy 'error))
416 (let ((signature (read-string port)))
417 (unless (equal? signature %archive-version-1)
418 (raise
419 (condition (&message (message "invalid nar signature"))
420 (&nar-read-error (port port)
421 (token signature)
422 (file #f))))))
423
424 (let read ((file file)
425 (result seed))
426 (define (read-eof-marker)
427 (match (read-string port)
428 (")" #t)
429 (x (raise
430 (condition
431 (&message (message "invalid nar end-of-file marker"))
432 (&nar-read-error (port port) (file file) (token x)))))))
433
434 (currently-restored-file file)
435
436 (match (list (read-string port) (read-string port) (read-string port))
437 (("(" "type" "regular")
438 (let* ((type (read-file-type port))
439 (size (read-long-long port))
440
441 ;; The caller must read exactly SIZE bytes from PORT.
442 (result (proc file type `(,port . ,size) result)))
443 (let ((m (modulo size 8)))
444 (unless (zero? m)
445 (get-bytevector-n* port (- 8 m))))
446 (read-eof-marker)
447 result))
448 (("(" "type" "symlink")
449 (match (list (read-string port) (read-string port))
450 (("target" target)
451 (let ((result (proc file 'symlink target result)))
452 (read-eof-marker)
453 result))
454 (x (raise
455 (condition
456 (&message (message "invalid symlink tokens"))
457 (&nar-read-error (port port) (file file) (token x)))))))
458 (("(" "type" "directory")
459 (let ((dir file))
460 (let loop ((prefix (read-string port))
461 (result (proc file 'directory #f result)))
462 (match prefix
463 ("entry"
464 (match (list (read-string port)
465 (read-string port) (read-string port)
466 (read-string port))
467 (("(" "name" file "node")
468 (let ((result (read (string-append dir "/" file) result)))
469 (match (read-string port)
470 (")" #f)
471 (x
472 (raise
473 (condition
474 (&message
475 (message "unexpected directory entry termination"))
476 (&nar-read-error (port port)
477 (file file)
478 (token x))))))
479 (loop (read-string port) result)))))
480 (")" ;done with DIR
481 (proc file 'directory-complete #f result))
482 (x
483 (raise
484 (condition
485 (&message (message "unexpected directory inter-entry marker"))
486 (&nar-read-error (port port) (file file) (token x)))))))))
487 (x
488 (raise
489 (condition
490 (&message (message "unsupported nar entry type"))
491 (&nar-read-error (port port) (file file) (token x)))))))))
492
493 (define (dump-file file input size type)
494 "Dump SIZE bytes from INPUT to FILE.
495
496 This procedure is suitable for use as the #:dump-file argument to
497 'restore-file'."
498 (call-with-output-file file
499 (lambda (output)
500 (dump input output size))))
501
502 (define* (restore-file port file
503 #:key (dump-file dump-file))
504 "Read a file (possibly a directory structure) in Nar format from PORT.
505 Restore it as FILE with canonical permissions and timestamps. To write a
506 regular or executable file, call:
507
508 (DUMP-FILE FILE INPUT SIZE TYPE)
509
510 The default is to dump SIZE bytes from INPUT to FILE, but callers can provide
511 a custom procedure, for instance to deduplicate FILE on the fly."
512 (fold-archive (lambda (file type content result)
513 (match type
514 ('directory
515 (mkdir file))
516 ('directory-complete
517 (chmod file #o555)
518 (utime file 1 1 0 0))
519 ('symlink
520 (symlink content file)
521 (utime file 1 1 0 0 AT_SYMLINK_NOFOLLOW))
522 ((or 'regular 'executable)
523 (match content
524 ((input . size)
525 (dump-file file input size type)
526 (chmod file (if (eq? type 'executable)
527 #o555
528 #o444))
529 (utime file 1 1 0 0))))))
530 #t
531 port
532 file))
533
534 ;;; Local Variables:
535 ;;; eval: (put 'call-with-binary-input-file 'scheme-indent-function 1)
536 ;;; End:
537
538 ;;; serialization.scm ends here