gnu: rust-gag-0.1: Fix typo.
[jackhill/guix/guix.git] / guix / serialization.scm
CommitLineData
0f41c26f 1;;; GNU Guix --- Functional package management for GNU
7df3ab0f 2;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
0f41c26f
LC
3;;;
4;;; This file is part of GNU Guix.
5;;;
6;;; GNU Guix is free software; you can redistribute it and/or modify it
7;;; under the terms of the GNU General Public License as published by
8;;; the Free Software Foundation; either version 3 of the License, or (at
9;;; your option) any later version.
10;;;
11;;; GNU Guix is distributed in the hope that it will be useful, but
12;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14;;; GNU General Public License for more details.
15;;;
16;;; You should have received a copy of the GNU General Public License
17;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
18
19(define-module (guix serialization)
958dd3ce 20 #:use-module (guix combinators)
0f41c26f 21 #:use-module (rnrs bytevectors)
0f41c26f
LC
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-26)
0363991a
LC
24 #:use-module (srfi srfi-34)
25 #:use-module (srfi srfi-35)
2535635f
LC
26 #:use-module (ice-9 binary-ports)
27 #:use-module ((ice-9 rdelim) #:prefix rdelim:)
6c20d1d0 28 #:use-module (ice-9 match)
0363991a 29 #:use-module (ice-9 ftw)
01e5d63c 30 #:use-module (system foreign)
0f41c26f
LC
31 #:export (write-int read-int
32 write-long-long read-long-long
33 write-padding
0d268c5d 34 write-bytevector write-string
ce72c780 35 read-string read-latin1-string read-maybe-utf8-string
0f41c26f 36 write-string-list read-string-list
7df3ab0f 37 write-string-pairs read-string-pairs
0f41c26f 38 write-store-path read-store-path
0363991a 39 write-store-path-list read-store-path-list
0aa6b386 40 (dump . dump-port*)
0363991a
LC
41
42 &nar-error
43 nar-error?
44 nar-error-port
45 nar-error-file
46
47 &nar-read-error
48 nar-read-error?
49 nar-read-error-token
50
51 write-file
b94b698d 52 write-file-tree
12c1afcd 53 fold-archive
c7c7f068
LC
54 restore-file
55 dump-file))
0f41c26f
LC
56
57;;; Comment:
58;;;
59;;; Serialization procedures used by the RPCs and the Nar format. This module
60;;; is for internal consumption.
61;;;
62;;; Code:
63
64;; Similar to serialize.cc in Nix.
65
f9e8a123 66(define-condition-type &nar-error &error ; XXX: inherit from &store-error ?
46b8aadb
LC
67 nar-error?
68 (file nar-error-file) ; file we were restoring, or #f
69 (port nar-error-port)) ; port from which we read
70
71(define currently-restored-file
72 ;; Name of the file being restored. Used internally for error reporting.
73 (make-parameter #f))
74
75
76(define (get-bytevector-n* port count)
77 (let ((bv (get-bytevector-n port count)))
78 (when (or (eof-object? bv)
79 (< (bytevector-length bv) count))
80 (raise (condition (&nar-error
81 (file (currently-restored-file))
82 (port port)))))
83 bv))
84
01e5d63c
LC
85(define (sub-bytevector bv len)
86 "Return a bytevector that aliases the first LEN bytes of BV."
87 (define max (bytevector-length bv))
88 (cond ((= len max) bv)
89 ((< len max)
90 ;; Yes, this is safe because the result of each conversion procedure
91 ;; has its life cycle synchronized with that of its argument.
92 (pointer->bytevector (bytevector->pointer bv) len))
93 (else
94 (error "sub-bytevector called to get a super bytevector"))))
95
0f41c26f
LC
96(define (write-int n p)
97 (let ((b (make-bytevector 8 0)))
98 (bytevector-u32-set! b 0 n (endianness little))
99 (put-bytevector p b)))
100
101(define (read-int p)
46b8aadb 102 (let ((b (get-bytevector-n* p 8)))
0f41c26f
LC
103 (bytevector-u32-ref b 0 (endianness little))))
104
105(define (write-long-long n p)
106 (let ((b (make-bytevector 8 0)))
107 (bytevector-u64-set! b 0 n (endianness little))
108 (put-bytevector p b)))
109
110(define (read-long-long p)
46b8aadb 111 (let ((b (get-bytevector-n* p 8)))
0f41c26f
LC
112 (bytevector-u64-ref b 0 (endianness little))))
113
114(define write-padding
115 (let ((zero (make-bytevector 8 0)))
116 (lambda (n p)
117 (let ((m (modulo n 8)))
118 (or (zero? m)
119 (put-bytevector p zero 0 (- 8 m)))))))
120
39d1e965
LC
121(define* (write-bytevector s p
122 #:optional (l (bytevector-length s)))
123 (let* ((m (modulo l 8))
0f41c26f
LC
124 (b (make-bytevector (+ 8 l (if (zero? m) 0 (- 8 m))))))
125 (bytevector-u32-set! b 0 l (endianness little))
126 (bytevector-copy! s 0 b 8 l)
127 (put-bytevector p b)))
128
0d268c5d
LC
129(define (write-string s p)
130 (write-bytevector (string->utf8 s) p))
131
472e4c43 132(define (read-byte-string p)
0f41c26f
LC
133 (let* ((len (read-int p))
134 (m (modulo len 8))
01e5d63c
LC
135 (pad (if (zero? m) 0 (- 8 m)))
136 (bv (get-bytevector-n* p (+ len pad))))
137 (sub-bytevector bv len)))
0f41c26f 138
472e4c43
LC
139(define (read-string p)
140 (utf8->string (read-byte-string p)))
aa27b560 141
472e4c43
LC
142(define (read-latin1-string p)
143 "Read an ISO-8859-1 string from P."
144 ;; Note: do not use 'get-string-n' to work around Guile bug
145 ;; <http://bugs.gnu.org/19621>. See <http://bugs.gnu.org/19610> for
146 ;; a discussion.
147 (let ((bv (read-byte-string p)))
36626c55 148 ;; XXX: Rewrite using (ice-9 iconv).
472e4c43 149 (list->string (map integer->char (bytevector->u8-list bv)))))
0f41c26f 150
ce72c780
LC
151(define (read-maybe-utf8-string p)
152 "Read a serialized string from port P. Attempt to decode it as UTF-8 and
153substitute invalid byte sequences with question marks. This is a
154\"permissive\" UTF-8 decoder."
155 ;; XXX: We rely on the port's decoding mechanism to do permissive decoding
156 ;; and substitute invalid byte sequences with question marks, but this is
157 ;; not very efficient. Eventually Guile may provide a lightweight
158 ;; permissive UTF-8 decoder.
159 (let* ((bv (read-byte-string p))
bc7d089a
LC
160 (port (open-bytevector-input-port bv)))
161 (set-port-encoding! port "UTF-8")
162 (set-port-conversion-strategy! port 'substitute)
2535635f 163 (rdelim:read-string port)))
ce72c780 164
0f41c26f
LC
165(define (write-string-list l p)
166 (write-int (length l) p)
167 (for-each (cut write-string <> p) l))
168
7df3ab0f
LC
169(define (read-string-list p)
170 (let ((len (read-int p)))
171 (unfold (cut >= <> len)
172 (lambda (i)
173 (read-string p))
174 1+
175 0)))
176
6c20d1d0
LC
177(define (write-string-pairs l p)
178 (write-int (length l) p)
179 (for-each (match-lambda
180 ((first . second)
181 (write-string first p)
182 (write-string second p)))
183 l))
184
7df3ab0f 185(define (read-string-pairs p)
0f41c26f
LC
186 (let ((len (read-int p)))
187 (unfold (cut >= <> len)
188 (lambda (i)
7df3ab0f 189 (cons (read-string p) (read-string p)))
0f41c26f
LC
190 1+
191 0)))
192
193(define (write-store-path f p)
194 (write-string f p)) ; TODO: assert path
195
196(define (read-store-path p)
197 (read-string p)) ; TODO: assert path
198
199(define write-store-path-list write-string-list)
200(define read-store-path-list read-string-list)
201
f293705d
LC
202(define-syntax write-literal-strings
203 (lambda (s)
204 "Write the given literal strings to PORT in an optimized fashion, without
205any run-time allocations or computations."
206 (define (padding len)
207 (let ((m (modulo len 8)))
208 (if (zero? m)
209 0
210 (- 8 m))))
211
212 (syntax-case s ()
213 ((_ port strings ...)
214 (let* ((bytes (map string->utf8 (syntax->datum #'(strings ...))))
215 (len (fold (lambda (bv size)
216 (+ size 8 (bytevector-length bv)
217 (padding (bytevector-length bv))))
218 0
219 bytes))
220 (bv (make-bytevector len))
221 (zeros (make-bytevector 8 0)))
222 (fold (lambda (str offset)
223 (let ((len (bytevector-length str)))
224 (bytevector-u32-set! bv offset len (endianness little))
225 (bytevector-copy! str 0 bv (+ 8 offset) len)
226 (bytevector-copy! zeros 0 bv (+ 8 offset len)
227 (padding len))
228 (+ offset 8 len (padding len))))
229 0
230 bytes)
231 #`(put-bytevector port #,bv))))))
232
0363991a 233\f
0363991a
LC
234(define-condition-type &nar-read-error &nar-error
235 nar-read-error?
236 (token nar-read-error-token)) ; faulty token, or #f
237
238
239(define (dump in out size)
240 "Copy SIZE bytes from IN to OUT."
241 (define buf-size 65536)
242 (define buf (make-bytevector buf-size))
243
244 (let loop ((left size))
245 (if (<= left 0)
246 0
247 (let ((read (get-bytevector-n! in buf 0 (min left buf-size))))
248 (if (eof-object? read)
249 left
250 (begin
251 (put-bytevector out buf 0 read)
252 (loop (- left read))))))))
253
b94b698d
LC
254(define (write-contents-from-port input output size)
255 "Write SIZE bytes from port INPUT to port OUTPUT."
256 (write-string "contents" output)
257 (write-long-long size output)
258 ;; Use 'sendfile' when both OUTPUT and INPUT are file ports.
259 (if (and (file-port? output) (file-port? input))
260 (sendfile output input size 0)
261 (dump input output size))
262 (write-padding size output))
0363991a 263
12c1afcd
LC
264(define (read-file-type port)
265 "Read the file type tag from PORT, and return either 'regular or
266'executable."
267 (match (read-string port)
268 ("contents"
269 'regular)
270 ("executable"
271 (match (list (read-string port) (read-string port))
272 (("" "contents") 'executable)
273 (x (raise
274 (condition (&message
275 (message "unexpected executable file marker"))
276 (&nar-read-error (port port)
277 (file #f)
278 (token x)))))))
279 (x
280 (raise
281 (condition (&message (message "unsupported nar file type"))
282 (&nar-read-error (port port) (file #f) (token x)))))))
0363991a
LC
283
284(define %archive-version-1
285 ;; Magic cookie for Nix archives.
286 "nix-archive-1")
287
fe585be9
LC
288(define* (write-file file port
289 #:key (select? (const #t)))
0363991a 290 "Write the contents of FILE to PORT in Nar format, recursing into
fe585be9
LC
291sub-directories of FILE as needed. For each directory entry, call (SELECT?
292FILE STAT), where FILE is the entry's absolute file name and STAT is the
293result of 'lstat'; exclude entries for which SELECT? does not return true."
b94b698d
LC
294 (write-file-tree file port
295 #:file-type+size
296 (lambda (file)
297 (let* ((stat (lstat file))
298 (size (stat:size stat)))
299 (case (stat:type stat)
300 ((directory)
301 (values 'directory size))
302 ((regular)
303 (values (if (zero? (logand (stat:mode stat)
304 #o100))
305 'regular
306 'executable)
307 size))
308 (else
309 (values (stat:type stat) size))))) ;bah!
310 #:file-port (cut open-file <> "r0b")
311 #:symlink-target readlink
312
313 #:directory-entries
314 (lambda (directory)
315 ;; 'scandir' defaults to 'string-locale<?' to sort files,
316 ;; but this happens to be case-insensitive (at least in
317 ;; 'en_US' locale on libc 2.18.) Conversely, we want
318 ;; files to be sorted in a case-sensitive fashion.
319 (define basenames
320 (scandir directory (negate (cut member <> '("." "..")))
321 string<?))
322
323 (filter-map (lambda (base)
324 (let ((file (string-append directory
325 "/" base)))
c122a2e5 326 (and (select? file (lstat file))
b94b698d
LC
327 base)))
328 basenames))
329
330 ;; The 'scandir' call above gives us filtered and sorted
331 ;; entries, so no post-processing is needed.
332 #:postprocess-entries identity))
333
334(define (filter/sort-directory-entries lst)
335 "Remove dot and dot-dot entries from LST, and sort it in lexicographical
336order."
337 (delete-duplicates
338 (sort (remove (cute member <> '("." "..")) lst)
339 string<?)
340 string=?))
341
342(define* (write-file-tree file port
343 #:key
344 file-type+size
345 file-port
346 symlink-target
347 directory-entries
348 (postprocess-entries filter/sort-directory-entries))
349 "Write the contents of FILE to PORT in Nar format, recursing into
350sub-directories of FILE as needed.
351
352This procedure does not make any file-system I/O calls. Instead, it calls the
353user-provided FILE-TYPE+SIZE, FILE-PORT, SYMLINK-TARGET, and DIRECTORY-ENTRIES
354procedures, which roughly correspond to 'lstat', 'readlink', and 'scandir'.
355POSTPROCESS-ENTRIES ensures that directory entries are valid; leave it as-is
356unless you know that DIRECTORY-ENTRIES provide filtered and sorted entries, in
357which case you can use 'identity'."
0363991a
LC
358 (define p port)
359
360 (write-string %archive-version-1 p)
361
b94b698d
LC
362 (let dump ((f file))
363 (define-values (type size)
364 (file-type+size f))
365
f293705d 366 (write-literal-strings p "(")
b94b698d
LC
367 (case type
368 ((regular executable)
f293705d 369 (write-literal-strings p "type" "regular")
b94b698d 370 (when (eq? 'executable type)
f293705d 371 (write-literal-strings p "executable" ""))
b94b698d
LC
372 (let ((input (file-port f)))
373 (dynamic-wind
374 (const #t)
375 (lambda ()
376 (write-contents-from-port input p size))
377 (lambda ()
378 (close-port input)))))
fe585be9 379 ((directory)
f293705d 380 (write-literal-strings p "type" "directory")
b94b698d 381 (let ((entries (postprocess-entries (directory-entries f))))
fe585be9 382 (for-each (lambda (e)
b94b698d 383 (let* ((f (string-append f "/" e)))
f293705d 384 (write-literal-strings p "entry" "(" "name")
b94b698d 385 (write-string e p)
f293705d 386 (write-literal-strings p "node")
b94b698d 387 (dump f)
f293705d 388 (write-literal-strings p ")")))
fe585be9
LC
389 entries)))
390 ((symlink)
f293705d 391 (write-literal-strings p "type" "symlink" "target")
b94b698d 392 (write-string (symlink-target f) p))
fe585be9
LC
393 (else
394 (raise (condition (&message (message "unsupported file type"))
395 (&nar-error (file f) (port port))))))
f293705d 396 (write-literal-strings p ")")))
0363991a 397
9fe3f113
LC
398(define port-conversion-strategy
399 (fluid->parameter %default-port-conversion-strategy))
400
12c1afcd
LC
401(define (fold-archive proc seed port file)
402 "Read a file (possibly a directory structure) in Nar format from PORT. Call
403PROC on each file or directory read from PORT using:
404
405 (PROC FILE TYPE CONTENTS RESULT)
406
407using SEED as the first RESULT. TYPE is a symbol like 'regular, and CONTENTS
408depends on TYPE."
9fe3f113
LC
409 (parameterize ((currently-restored-file file)
410
411 ;; Error out if we can convert file names to the current
412 ;; locale. (XXX: We'd prefer UTF-8 encoding for file names
413 ;; regardless of the locale, but that's what Guile gives us
414 ;; so far.)
415 (port-conversion-strategy 'error))
46b8aadb
LC
416 (let ((signature (read-string port)))
417 (unless (equal? signature %archive-version-1)
418 (raise
419 (condition (&message (message "invalid nar signature"))
420 (&nar-read-error (port port)
421 (token signature)
422 (file #f))))))
423
12c1afcd
LC
424 (let read ((file file)
425 (result seed))
46b8aadb
LC
426 (define (read-eof-marker)
427 (match (read-string port)
428 (")" #t)
429 (x (raise
430 (condition
431 (&message (message "invalid nar end-of-file marker"))
432 (&nar-read-error (port port) (file file) (token x)))))))
433
434 (currently-restored-file file)
435
436 (match (list (read-string port) (read-string port) (read-string port))
437 (("(" "type" "regular")
12c1afcd
LC
438 (let* ((type (read-file-type port))
439 (size (read-long-long port))
440
441 ;; The caller must read exactly SIZE bytes from PORT.
442 (result (proc file type `(,port . ,size) result)))
443 (let ((m (modulo size 8)))
444 (unless (zero? m)
445 (get-bytevector-n* port (- 8 m))))
446 (read-eof-marker)
447 result))
46b8aadb
LC
448 (("(" "type" "symlink")
449 (match (list (read-string port) (read-string port))
450 (("target" target)
12c1afcd
LC
451 (let ((result (proc file 'symlink target result)))
452 (read-eof-marker)
453 result))
46b8aadb 454 (x (raise
0363991a 455 (condition
46b8aadb
LC
456 (&message (message "invalid symlink tokens"))
457 (&nar-read-error (port port) (file file) (token x)))))))
458 (("(" "type" "directory")
459 (let ((dir file))
12c1afcd
LC
460 (let loop ((prefix (read-string port))
461 (result (proc file 'directory #f result)))
46b8aadb
LC
462 (match prefix
463 ("entry"
464 (match (list (read-string port)
465 (read-string port) (read-string port)
466 (read-string port))
467 (("(" "name" file "node")
12c1afcd
LC
468 (let ((result (read (string-append dir "/" file) result)))
469 (match (read-string port)
470 (")" #f)
471 (x
472 (raise
473 (condition
474 (&message
475 (message "unexpected directory entry termination"))
476 (&nar-read-error (port port)
477 (file file)
478 (token x))))))
479 (loop (read-string port) result)))))
465d2cb2
LC
480 (")" ;done with DIR
481 (proc file 'directory-complete #f result))
46b8aadb
LC
482 (x
483 (raise
484 (condition
485 (&message (message "unexpected directory inter-entry marker"))
486 (&nar-read-error (port port) (file file) (token x)))))))))
487 (x
488 (raise
489 (condition
490 (&message (message "unsupported nar entry type"))
491 (&nar-read-error (port port) (file file) (token x)))))))))
0363991a 492
2718c29c 493(define (dump-file file input size type)
c7c7f068
LC
494 "Dump SIZE bytes from INPUT to FILE.
495
496This procedure is suitable for use as the #:dump-file argument to
497'restore-file'."
2718c29c
LC
498 (call-with-output-file file
499 (lambda (output)
500 (dump input output size))))
501
502(define* (restore-file port file
503 #:key (dump-file dump-file))
12c1afcd 504 "Read a file (possibly a directory structure) in Nar format from PORT.
2718c29c
LC
505Restore it as FILE with canonical permissions and timestamps. To write a
506regular or executable file, call:
507
508 (DUMP-FILE FILE INPUT SIZE TYPE)
509
510The default is to dump SIZE bytes from INPUT to FILE, but callers can provide
511a custom procedure, for instance to deduplicate FILE on the fly."
12c1afcd
LC
512 (fold-archive (lambda (file type content result)
513 (match type
514 ('directory
515 (mkdir file))
465d2cb2 516 ('directory-complete
ed7d02f7
LC
517 (chmod file #o555)
518 (utime file 1 1 0 0))
12c1afcd 519 ('symlink
ed7d02f7
LC
520 (symlink content file)
521 (utime file 1 1 0 0 AT_SYMLINK_NOFOLLOW))
12c1afcd
LC
522 ((or 'regular 'executable)
523 (match content
524 ((input . size)
2718c29c
LC
525 (dump-file file input size type)
526 (chmod file (if (eq? type 'executable)
527 #o555
528 #o444))
ed7d02f7 529 (utime file 1 1 0 0))))))
12c1afcd
LC
530 #t
531 port
532 file))
533
b94b698d
LC
534;;; Local Variables:
535;;; eval: (put 'call-with-binary-input-file 'scheme-indent-function 1)
536;;; End:
537
0f41c26f 538;;; serialization.scm ends here