gnu: r-igraph: Move to (gnu packages cran).
[jackhill/guix/guix.git] / guix / serialization.scm
CommitLineData
0f41c26f 1;;; GNU Guix --- Functional package management for GNU
0aa6b386 2;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
0f41c26f
LC
3;;;
4;;; This file is part of GNU Guix.
5;;;
6;;; GNU Guix is free software; you can redistribute it and/or modify it
7;;; under the terms of the GNU General Public License as published by
8;;; the Free Software Foundation; either version 3 of the License, or (at
9;;; your option) any later version.
10;;;
11;;; GNU Guix is distributed in the hope that it will be useful, but
12;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14;;; GNU General Public License for more details.
15;;;
16;;; You should have received a copy of the GNU General Public License
17;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
18
19(define-module (guix serialization)
958dd3ce 20 #:use-module (guix combinators)
0f41c26f 21 #:use-module (rnrs bytevectors)
0f41c26f
LC
22 #:use-module (srfi srfi-1)
23 #:use-module (srfi srfi-26)
0363991a
LC
24 #:use-module (srfi srfi-34)
25 #:use-module (srfi srfi-35)
2535635f
LC
26 #:use-module (ice-9 binary-ports)
27 #:use-module ((ice-9 rdelim) #:prefix rdelim:)
6c20d1d0 28 #:use-module (ice-9 match)
0363991a 29 #:use-module (ice-9 ftw)
01e5d63c 30 #:use-module (system foreign)
0f41c26f
LC
31 #:export (write-int read-int
32 write-long-long read-long-long
33 write-padding
0d268c5d 34 write-bytevector write-string
ce72c780 35 read-string read-latin1-string read-maybe-utf8-string
0f41c26f 36 write-string-list read-string-list
6c20d1d0 37 write-string-pairs
0f41c26f 38 write-store-path read-store-path
0363991a 39 write-store-path-list read-store-path-list
0aa6b386 40 (dump . dump-port*)
0363991a
LC
41
42 &nar-error
43 nar-error?
44 nar-error-port
45 nar-error-file
46
47 &nar-read-error
48 nar-read-error?
49 nar-read-error-token
50
51 write-file
b94b698d 52 write-file-tree
12c1afcd 53 fold-archive
0363991a 54 restore-file))
0f41c26f
LC
55
56;;; Comment:
57;;;
58;;; Serialization procedures used by the RPCs and the Nar format. This module
59;;; is for internal consumption.
60;;;
61;;; Code:
62
63;; Similar to serialize.cc in Nix.
64
f9e8a123 65(define-condition-type &nar-error &error ; XXX: inherit from &store-error ?
46b8aadb
LC
66 nar-error?
67 (file nar-error-file) ; file we were restoring, or #f
68 (port nar-error-port)) ; port from which we read
69
70(define currently-restored-file
71 ;; Name of the file being restored. Used internally for error reporting.
72 (make-parameter #f))
73
74
75(define (get-bytevector-n* port count)
76 (let ((bv (get-bytevector-n port count)))
77 (when (or (eof-object? bv)
78 (< (bytevector-length bv) count))
79 (raise (condition (&nar-error
80 (file (currently-restored-file))
81 (port port)))))
82 bv))
83
01e5d63c
LC
84(define (sub-bytevector bv len)
85 "Return a bytevector that aliases the first LEN bytes of BV."
86 (define max (bytevector-length bv))
87 (cond ((= len max) bv)
88 ((< len max)
89 ;; Yes, this is safe because the result of each conversion procedure
90 ;; has its life cycle synchronized with that of its argument.
91 (pointer->bytevector (bytevector->pointer bv) len))
92 (else
93 (error "sub-bytevector called to get a super bytevector"))))
94
0f41c26f
LC
95(define (write-int n p)
96 (let ((b (make-bytevector 8 0)))
97 (bytevector-u32-set! b 0 n (endianness little))
98 (put-bytevector p b)))
99
100(define (read-int p)
46b8aadb 101 (let ((b (get-bytevector-n* p 8)))
0f41c26f
LC
102 (bytevector-u32-ref b 0 (endianness little))))
103
104(define (write-long-long n p)
105 (let ((b (make-bytevector 8 0)))
106 (bytevector-u64-set! b 0 n (endianness little))
107 (put-bytevector p b)))
108
109(define (read-long-long p)
46b8aadb 110 (let ((b (get-bytevector-n* p 8)))
0f41c26f
LC
111 (bytevector-u64-ref b 0 (endianness little))))
112
113(define write-padding
114 (let ((zero (make-bytevector 8 0)))
115 (lambda (n p)
116 (let ((m (modulo n 8)))
117 (or (zero? m)
118 (put-bytevector p zero 0 (- 8 m)))))))
119
39d1e965
LC
120(define* (write-bytevector s p
121 #:optional (l (bytevector-length s)))
122 (let* ((m (modulo l 8))
0f41c26f
LC
123 (b (make-bytevector (+ 8 l (if (zero? m) 0 (- 8 m))))))
124 (bytevector-u32-set! b 0 l (endianness little))
125 (bytevector-copy! s 0 b 8 l)
126 (put-bytevector p b)))
127
0d268c5d
LC
128(define (write-string s p)
129 (write-bytevector (string->utf8 s) p))
130
472e4c43 131(define (read-byte-string p)
0f41c26f
LC
132 (let* ((len (read-int p))
133 (m (modulo len 8))
01e5d63c
LC
134 (pad (if (zero? m) 0 (- 8 m)))
135 (bv (get-bytevector-n* p (+ len pad))))
136 (sub-bytevector bv len)))
0f41c26f 137
472e4c43
LC
138(define (read-string p)
139 (utf8->string (read-byte-string p)))
aa27b560 140
472e4c43
LC
141(define (read-latin1-string p)
142 "Read an ISO-8859-1 string from P."
143 ;; Note: do not use 'get-string-n' to work around Guile bug
144 ;; <http://bugs.gnu.org/19621>. See <http://bugs.gnu.org/19610> for
145 ;; a discussion.
146 (let ((bv (read-byte-string p)))
36626c55 147 ;; XXX: Rewrite using (ice-9 iconv).
472e4c43 148 (list->string (map integer->char (bytevector->u8-list bv)))))
0f41c26f 149
ce72c780
LC
150(define (read-maybe-utf8-string p)
151 "Read a serialized string from port P. Attempt to decode it as UTF-8 and
152substitute invalid byte sequences with question marks. This is a
153\"permissive\" UTF-8 decoder."
154 ;; XXX: We rely on the port's decoding mechanism to do permissive decoding
155 ;; and substitute invalid byte sequences with question marks, but this is
156 ;; not very efficient. Eventually Guile may provide a lightweight
157 ;; permissive UTF-8 decoder.
158 (let* ((bv (read-byte-string p))
bc7d089a
LC
159 (port (open-bytevector-input-port bv)))
160 (set-port-encoding! port "UTF-8")
161 (set-port-conversion-strategy! port 'substitute)
2535635f 162 (rdelim:read-string port)))
ce72c780 163
0f41c26f
LC
164(define (write-string-list l p)
165 (write-int (length l) p)
166 (for-each (cut write-string <> p) l))
167
6c20d1d0
LC
168(define (write-string-pairs l p)
169 (write-int (length l) p)
170 (for-each (match-lambda
171 ((first . second)
172 (write-string first p)
173 (write-string second p)))
174 l))
175
0f41c26f
LC
176(define (read-string-list p)
177 (let ((len (read-int p)))
178 (unfold (cut >= <> len)
179 (lambda (i)
180 (read-string p))
181 1+
182 0)))
183
184(define (write-store-path f p)
185 (write-string f p)) ; TODO: assert path
186
187(define (read-store-path p)
188 (read-string p)) ; TODO: assert path
189
190(define write-store-path-list write-string-list)
191(define read-store-path-list read-string-list)
192
0363991a 193\f
0363991a
LC
194(define-condition-type &nar-read-error &nar-error
195 nar-read-error?
196 (token nar-read-error-token)) ; faulty token, or #f
197
198
199(define (dump in out size)
200 "Copy SIZE bytes from IN to OUT."
201 (define buf-size 65536)
202 (define buf (make-bytevector buf-size))
203
204 (let loop ((left size))
205 (if (<= left 0)
206 0
207 (let ((read (get-bytevector-n! in buf 0 (min left buf-size))))
208 (if (eof-object? read)
209 left
210 (begin
211 (put-bytevector out buf 0 read)
212 (loop (- left read))))))))
213
b94b698d
LC
214(define (write-contents-from-port input output size)
215 "Write SIZE bytes from port INPUT to port OUTPUT."
216 (write-string "contents" output)
217 (write-long-long size output)
218 ;; Use 'sendfile' when both OUTPUT and INPUT are file ports.
219 (if (and (file-port? output) (file-port? input))
220 (sendfile output input size 0)
221 (dump input output size))
222 (write-padding size output))
0363991a 223
12c1afcd
LC
224(define (read-file-type port)
225 "Read the file type tag from PORT, and return either 'regular or
226'executable."
227 (match (read-string port)
228 ("contents"
229 'regular)
230 ("executable"
231 (match (list (read-string port) (read-string port))
232 (("" "contents") 'executable)
233 (x (raise
234 (condition (&message
235 (message "unexpected executable file marker"))
236 (&nar-read-error (port port)
237 (file #f)
238 (token x)))))))
239 (x
240 (raise
241 (condition (&message (message "unsupported nar file type"))
242 (&nar-read-error (port port) (file #f) (token x)))))))
0363991a
LC
243
244(define %archive-version-1
245 ;; Magic cookie for Nix archives.
246 "nix-archive-1")
247
fe585be9
LC
248(define* (write-file file port
249 #:key (select? (const #t)))
0363991a 250 "Write the contents of FILE to PORT in Nar format, recursing into
fe585be9
LC
251sub-directories of FILE as needed. For each directory entry, call (SELECT?
252FILE STAT), where FILE is the entry's absolute file name and STAT is the
253result of 'lstat'; exclude entries for which SELECT? does not return true."
b94b698d
LC
254 (write-file-tree file port
255 #:file-type+size
256 (lambda (file)
257 (let* ((stat (lstat file))
258 (size (stat:size stat)))
259 (case (stat:type stat)
260 ((directory)
261 (values 'directory size))
262 ((regular)
263 (values (if (zero? (logand (stat:mode stat)
264 #o100))
265 'regular
266 'executable)
267 size))
268 (else
269 (values (stat:type stat) size))))) ;bah!
270 #:file-port (cut open-file <> "r0b")
271 #:symlink-target readlink
272
273 #:directory-entries
274 (lambda (directory)
275 ;; 'scandir' defaults to 'string-locale<?' to sort files,
276 ;; but this happens to be case-insensitive (at least in
277 ;; 'en_US' locale on libc 2.18.) Conversely, we want
278 ;; files to be sorted in a case-sensitive fashion.
279 (define basenames
280 (scandir directory (negate (cut member <> '("." "..")))
281 string<?))
282
283 (filter-map (lambda (base)
284 (let ((file (string-append directory
285 "/" base)))
c122a2e5 286 (and (select? file (lstat file))
b94b698d
LC
287 base)))
288 basenames))
289
290 ;; The 'scandir' call above gives us filtered and sorted
291 ;; entries, so no post-processing is needed.
292 #:postprocess-entries identity))
293
294(define (filter/sort-directory-entries lst)
295 "Remove dot and dot-dot entries from LST, and sort it in lexicographical
296order."
297 (delete-duplicates
298 (sort (remove (cute member <> '("." "..")) lst)
299 string<?)
300 string=?))
301
302(define* (write-file-tree file port
303 #:key
304 file-type+size
305 file-port
306 symlink-target
307 directory-entries
308 (postprocess-entries filter/sort-directory-entries))
309 "Write the contents of FILE to PORT in Nar format, recursing into
310sub-directories of FILE as needed.
311
312This procedure does not make any file-system I/O calls. Instead, it calls the
313user-provided FILE-TYPE+SIZE, FILE-PORT, SYMLINK-TARGET, and DIRECTORY-ENTRIES
314procedures, which roughly correspond to 'lstat', 'readlink', and 'scandir'.
315POSTPROCESS-ENTRIES ensures that directory entries are valid; leave it as-is
316unless you know that DIRECTORY-ENTRIES provide filtered and sorted entries, in
317which case you can use 'identity'."
0363991a
LC
318 (define p port)
319
320 (write-string %archive-version-1 p)
321
b94b698d
LC
322 (let dump ((f file))
323 (define-values (type size)
324 (file-type+size f))
325
fe585be9 326 (write-string "(" p)
b94b698d
LC
327 (case type
328 ((regular executable)
fe585be9
LC
329 (write-string "type" p)
330 (write-string "regular" p)
b94b698d
LC
331 (when (eq? 'executable type)
332 (write-string "executable" p)
333 (write-string "" p))
334 (let ((input (file-port f)))
335 (dynamic-wind
336 (const #t)
337 (lambda ()
338 (write-contents-from-port input p size))
339 (lambda ()
340 (close-port input)))))
fe585be9
LC
341 ((directory)
342 (write-string "type" p)
343 (write-string "directory" p)
b94b698d 344 (let ((entries (postprocess-entries (directory-entries f))))
fe585be9 345 (for-each (lambda (e)
b94b698d
LC
346 (let* ((f (string-append f "/" e)))
347 (write-string "entry" p)
348 (write-string "(" p)
349 (write-string "name" p)
350 (write-string e p)
351 (write-string "node" p)
352 (dump f)
353 (write-string ")" p)))
fe585be9
LC
354 entries)))
355 ((symlink)
356 (write-string "type" p)
357 (write-string "symlink" p)
358 (write-string "target" p)
b94b698d 359 (write-string (symlink-target f) p))
fe585be9
LC
360 (else
361 (raise (condition (&message (message "unsupported file type"))
362 (&nar-error (file f) (port port))))))
363 (write-string ")" p)))
0363991a 364
9fe3f113
LC
365(define port-conversion-strategy
366 (fluid->parameter %default-port-conversion-strategy))
367
12c1afcd
LC
368(define (fold-archive proc seed port file)
369 "Read a file (possibly a directory structure) in Nar format from PORT. Call
370PROC on each file or directory read from PORT using:
371
372 (PROC FILE TYPE CONTENTS RESULT)
373
374using SEED as the first RESULT. TYPE is a symbol like 'regular, and CONTENTS
375depends on TYPE."
9fe3f113
LC
376 (parameterize ((currently-restored-file file)
377
378 ;; Error out if we can convert file names to the current
379 ;; locale. (XXX: We'd prefer UTF-8 encoding for file names
380 ;; regardless of the locale, but that's what Guile gives us
381 ;; so far.)
382 (port-conversion-strategy 'error))
46b8aadb
LC
383 (let ((signature (read-string port)))
384 (unless (equal? signature %archive-version-1)
385 (raise
386 (condition (&message (message "invalid nar signature"))
387 (&nar-read-error (port port)
388 (token signature)
389 (file #f))))))
390
12c1afcd
LC
391 (let read ((file file)
392 (result seed))
46b8aadb
LC
393 (define (read-eof-marker)
394 (match (read-string port)
395 (")" #t)
396 (x (raise
397 (condition
398 (&message (message "invalid nar end-of-file marker"))
399 (&nar-read-error (port port) (file file) (token x)))))))
400
401 (currently-restored-file file)
402
403 (match (list (read-string port) (read-string port) (read-string port))
404 (("(" "type" "regular")
12c1afcd
LC
405 (let* ((type (read-file-type port))
406 (size (read-long-long port))
407
408 ;; The caller must read exactly SIZE bytes from PORT.
409 (result (proc file type `(,port . ,size) result)))
410 (let ((m (modulo size 8)))
411 (unless (zero? m)
412 (get-bytevector-n* port (- 8 m))))
413 (read-eof-marker)
414 result))
46b8aadb
LC
415 (("(" "type" "symlink")
416 (match (list (read-string port) (read-string port))
417 (("target" target)
12c1afcd
LC
418 (let ((result (proc file 'symlink target result)))
419 (read-eof-marker)
420 result))
46b8aadb 421 (x (raise
0363991a 422 (condition
46b8aadb
LC
423 (&message (message "invalid symlink tokens"))
424 (&nar-read-error (port port) (file file) (token x)))))))
425 (("(" "type" "directory")
426 (let ((dir file))
12c1afcd
LC
427 (let loop ((prefix (read-string port))
428 (result (proc file 'directory #f result)))
46b8aadb
LC
429 (match prefix
430 ("entry"
431 (match (list (read-string port)
432 (read-string port) (read-string port)
433 (read-string port))
434 (("(" "name" file "node")
12c1afcd
LC
435 (let ((result (read (string-append dir "/" file) result)))
436 (match (read-string port)
437 (")" #f)
438 (x
439 (raise
440 (condition
441 (&message
442 (message "unexpected directory entry termination"))
443 (&nar-read-error (port port)
444 (file file)
445 (token x))))))
446 (loop (read-string port) result)))))
447 (")" result) ;done with DIR
46b8aadb
LC
448 (x
449 (raise
450 (condition
451 (&message (message "unexpected directory inter-entry marker"))
452 (&nar-read-error (port port) (file file) (token x)))))))))
453 (x
454 (raise
455 (condition
456 (&message (message "unsupported nar entry type"))
457 (&nar-read-error (port port) (file file) (token x)))))))))
0363991a 458
12c1afcd
LC
459(define (restore-file port file)
460 "Read a file (possibly a directory structure) in Nar format from PORT.
461Restore it as FILE."
462 (fold-archive (lambda (file type content result)
463 (match type
464 ('directory
465 (mkdir file))
466 ('symlink
467 (symlink content file))
468 ((or 'regular 'executable)
469 (match content
470 ((input . size)
471 (call-with-output-file file
472 (lambda (output)
473 (dump input output size)
474 (when (eq? type 'executable)
475 (chmod output #o755)))))))))
476 #t
477 port
478 file))
479
b94b698d
LC
480;;; Local Variables:
481;;; eval: (put 'call-with-binary-input-file 'scheme-indent-function 1)
482;;; End:
483
0f41c26f 484;;; serialization.scm ends here