;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2012, 2013, 2014, 2015 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (guix serialization)
- #:use-module (guix utils)
+ #:use-module (guix combinators)
#:use-module (rnrs bytevectors)
- #:use-module (rnrs io ports)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
#:use-module (srfi srfi-34)
#:use-module (srfi srfi-35)
+ #:use-module (ice-9 binary-ports)
+ #:use-module ((ice-9 rdelim) #:prefix rdelim:)
#:use-module (ice-9 match)
#:use-module (ice-9 ftw)
+ #:use-module (system foreign)
#:export (write-int read-int
write-long-long read-long-long
write-padding
- write-string
+ write-bytevector write-string
read-string read-latin1-string read-maybe-utf8-string
write-string-list read-string-list
write-string-pairs
write-store-path read-store-path
write-store-path-list read-store-path-list
+ (dump . dump-port*)
&nar-error
nar-error?
nar-read-error-token
write-file
+ write-file-tree
+ fold-archive
restore-file))
;;; Comment:
;; Similar to serialize.cc in Nix.
-(define-condition-type &nar-error &error ; XXX: inherit from &nix-error ?
+(define-condition-type &nar-error &error ; XXX: inherit from &store-error ?
nar-error?
(file nar-error-file) ; file we were restoring, or #f
(port nar-error-port)) ; port from which we read
(port port)))))
bv))
+(define (sub-bytevector bv len)
+ "Return a bytevector that aliases the first LEN bytes of BV."
+ (define max (bytevector-length bv))
+ (cond ((= len max) bv)
+ ((< len max)
+ ;; Yes, this is safe because the result of each conversion procedure
+ ;; has its life cycle synchronized with that of its argument.
+ (pointer->bytevector (bytevector->pointer bv) len))
+ (else
+ (error "sub-bytevector called to get a super bytevector"))))
+
(define (write-int n p)
(let ((b (make-bytevector 8 0)))
(bytevector-u32-set! b 0 n (endianness little))
(or (zero? m)
(put-bytevector p zero 0 (- 8 m)))))))
-(define (write-string s p)
- (let* ((s (string->utf8 s))
- (l (bytevector-length s))
- (m (modulo l 8))
+(define* (write-bytevector s p
+ #:optional (l (bytevector-length s)))
+ (let* ((m (modulo l 8))
(b (make-bytevector (+ 8 l (if (zero? m) 0 (- 8 m))))))
(bytevector-u32-set! b 0 l (endianness little))
(bytevector-copy! s 0 b 8 l)
(put-bytevector p b)))
+(define (write-string s p)
+ (write-bytevector (string->utf8 s) p))
+
(define (read-byte-string p)
(let* ((len (read-int p))
(m (modulo len 8))
- (bv (get-bytevector-n* p len)))
- (or (zero? m)
- (get-bytevector-n* p (- 8 m)))
- bv))
+ (pad (if (zero? m) 0 (- 8 m)))
+ (bv (get-bytevector-n* p (+ len pad))))
+ (sub-bytevector bv len)))
(define (read-string p)
(utf8->string (read-byte-string p)))
;; <http://bugs.gnu.org/19621>. See <http://bugs.gnu.org/19610> for
;; a discussion.
(let ((bv (read-byte-string p)))
- ;; XXX: Rewrite using (ice-9 iconv) when the minimum requirement is
- ;; upgraded to Guile >= 2.0.9.
+ ;; XXX: Rewrite using (ice-9 iconv).
(list->string (map integer->char (bytevector->u8-list bv)))))
(define (read-maybe-utf8-string p)
(port (open-bytevector-input-port bv)))
(set-port-encoding! port "UTF-8")
(set-port-conversion-strategy! port 'substitute)
- (get-string-all port)))
+ (rdelim:read-string port)))
(define (write-string-list l p)
(write-int (length l) p)
(put-bytevector out buf 0 read)
(loop (- left read))))))))
-(define (write-contents file p size)
- "Write SIZE bytes from FILE to output port P."
- (define (call-with-binary-input-file file proc)
- ;; Open FILE as a binary file. This avoids scan-for-encoding, and thus
- ;; avoids any initial buffering. Disable file name canonicalization to
- ;; avoid stat'ing like crazy.
- (with-fluids ((%file-port-name-canonicalization #f))
- (let ((port (open-file file "rb")))
- (dynamic-wind
- (const #t)
- (cut proc port)
- (lambda ()
- (close-port port))))))
-
- (write-string "contents" p)
- (write-long-long size p)
- (call-with-binary-input-file file
- ;; Use `sendfile' when available (Guile 2.0.8+).
- (if (and (compile-time-value (defined? 'sendfile))
- (file-port? p))
- (cut sendfile p <> size 0)
- (cut dump <> p size)))
- (write-padding size p))
-
-(define (read-contents in out)
- "Read the contents of a file from the Nar at IN, write it to OUT, and return
-the size in bytes."
- (define executable?
- (match (read-string in)
- ("contents"
- #f)
- ("executable"
- (match (list (read-string in) (read-string in))
- (("" "contents") #t)
- (x (raise
- (condition (&message
- (message "unexpected executable file marker"))
- (&nar-read-error (port in)
- (file #f)
- (token x))))))
- #t)
- (x
- (raise
- (condition (&message (message "unsupported nar file type"))
- (&nar-read-error (port in) (file #f) (token x)))))))
-
- (let ((size (read-long-long in)))
- ;; Note: `sendfile' cannot be used here because of port buffering on IN.
- (dump in out size)
-
- (when executable?
- (chmod out #o755))
- (let ((m (modulo size 8)))
- (unless (zero? m)
- (get-bytevector-n* in (- 8 m))))
- size))
+(define (write-contents-from-port input output size)
+ "Write SIZE bytes from port INPUT to port OUTPUT."
+ (write-string "contents" output)
+ (write-long-long size output)
+ ;; Use 'sendfile' when both OUTPUT and INPUT are file ports.
+ (if (and (file-port? output) (file-port? input))
+ (sendfile output input size 0)
+ (dump input output size))
+ (write-padding size output))
+
+(define (read-file-type port)
+ "Read the file type tag from PORT, and return either 'regular or
+'executable."
+ (match (read-string port)
+ ("contents"
+ 'regular)
+ ("executable"
+ (match (list (read-string port) (read-string port))
+ (("" "contents") 'executable)
+ (x (raise
+ (condition (&message
+ (message "unexpected executable file marker"))
+ (&nar-read-error (port port)
+ (file #f)
+ (token x)))))))
+ (x
+ (raise
+ (condition (&message (message "unsupported nar file type"))
+ (&nar-read-error (port port) (file #f) (token x)))))))
(define %archive-version-1
;; Magic cookie for Nix archives.
"nix-archive-1")
-(define (write-file file port)
+(define* (write-file file port
+ #:key (select? (const #t)))
"Write the contents of FILE to PORT in Nar format, recursing into
-sub-directories of FILE as needed."
+sub-directories of FILE as needed. For each directory entry, call (SELECT?
+FILE STAT), where FILE is the entry's absolute file name and STAT is the
+result of 'lstat'; exclude entries for which SELECT? does not return true."
+ (write-file-tree file port
+ #:file-type+size
+ (lambda (file)
+ (let* ((stat (lstat file))
+ (size (stat:size stat)))
+ (case (stat:type stat)
+ ((directory)
+ (values 'directory size))
+ ((regular)
+ (values (if (zero? (logand (stat:mode stat)
+ #o100))
+ 'regular
+ 'executable)
+ size))
+ (else
+ (values (stat:type stat) size))))) ;bah!
+ #:file-port (cut open-file <> "r0b")
+ #:symlink-target readlink
+
+ #:directory-entries
+ (lambda (directory)
+ ;; 'scandir' defaults to 'string-locale<?' to sort files,
+ ;; but this happens to be case-insensitive (at least in
+ ;; 'en_US' locale on libc 2.18.) Conversely, we want
+ ;; files to be sorted in a case-sensitive fashion.
+ (define basenames
+ (scandir directory (negate (cut member <> '("." "..")))
+ string<?))
+
+ (filter-map (lambda (base)
+ (let ((file (string-append directory
+ "/" base)))
+ (and (select? file (lstat file))
+ base)))
+ basenames))
+
+ ;; The 'scandir' call above gives us filtered and sorted
+ ;; entries, so no post-processing is needed.
+ #:postprocess-entries identity))
+
+(define (filter/sort-directory-entries lst)
+ "Remove dot and dot-dot entries from LST, and sort it in lexicographical
+order."
+ (delete-duplicates
+ (sort (remove (cute member <> '("." "..")) lst)
+ string<?)
+ string=?))
+
+(define* (write-file-tree file port
+ #:key
+ file-type+size
+ file-port
+ symlink-target
+ directory-entries
+ (postprocess-entries filter/sort-directory-entries))
+ "Write the contents of FILE to PORT in Nar format, recursing into
+sub-directories of FILE as needed.
+
+This procedure does not make any file-system I/O calls. Instead, it calls the
+user-provided FILE-TYPE+SIZE, FILE-PORT, SYMLINK-TARGET, and DIRECTORY-ENTRIES
+procedures, which roughly correspond to 'lstat', 'readlink', and 'scandir'.
+POSTPROCESS-ENTRIES ensures that directory entries are valid; leave it as-is
+unless you know that DIRECTORY-ENTRIES provide filtered and sorted entries, in
+which case you can use 'identity'."
(define p port)
(write-string %archive-version-1 p)
(let dump ((f file))
- (let ((s (lstat f)))
- (write-string "(" p)
- (case (stat:type s)
- ((regular)
- (write-string "type" p)
- (write-string "regular" p)
- (if (not (zero? (logand (stat:mode s) #o100)))
- (begin
- (write-string "executable" p)
- (write-string "" p)))
- (write-contents f p (stat:size s)))
- ((directory)
- (write-string "type" p)
- (write-string "directory" p)
- (let ((entries
- ;; NOTE: Guile 2.0.5's 'scandir' returns all subdirectories
- ;; unconditionally, including "." and "..", regardless of the
- ;; 'select?' predicate passed to it, so we have to filter
- ;; those out externally.
- (filter (negate (cut member <> '("." "..")))
- ;; 'scandir' defaults to 'string-locale<?' to sort
- ;; files, but this happens to be case-insensitive (at
- ;; least in 'en_US' locale on libc 2.18.) Conversely,
- ;; we want files to be sorted in a case-sensitive
- ;; fashion.
- (scandir f (const #t) string<?))))
- (for-each (lambda (e)
- (let ((f (string-append f "/" e)))
- (write-string "entry" p)
- (write-string "(" p)
- (write-string "name" p)
- (write-string e p)
- (write-string "node" p)
- (dump f)
- (write-string ")" p)))
- entries)))
- ((symlink)
- (write-string "type" p)
- (write-string "symlink" p)
- (write-string "target" p)
- (write-string (readlink f) p))
- (else
- (raise (condition (&message (message "unsupported file type"))
- (&nar-error (file f) (port port))))))
- (write-string ")" p))))
-
-(define (restore-file port file)
- "Read a file (possibly a directory structure) in Nar format from PORT.
-Restore it as FILE."
- (parameterize ((currently-restored-file file))
+ (define-values (type size)
+ (file-type+size f))
+
+ (write-string "(" p)
+ (case type
+ ((regular executable)
+ (write-string "type" p)
+ (write-string "regular" p)
+ (when (eq? 'executable type)
+ (write-string "executable" p)
+ (write-string "" p))
+ (let ((input (file-port f)))
+ (dynamic-wind
+ (const #t)
+ (lambda ()
+ (write-contents-from-port input p size))
+ (lambda ()
+ (close-port input)))))
+ ((directory)
+ (write-string "type" p)
+ (write-string "directory" p)
+ (let ((entries (postprocess-entries (directory-entries f))))
+ (for-each (lambda (e)
+ (let* ((f (string-append f "/" e)))
+ (write-string "entry" p)
+ (write-string "(" p)
+ (write-string "name" p)
+ (write-string e p)
+ (write-string "node" p)
+ (dump f)
+ (write-string ")" p)))
+ entries)))
+ ((symlink)
+ (write-string "type" p)
+ (write-string "symlink" p)
+ (write-string "target" p)
+ (write-string (symlink-target f) p))
+ (else
+ (raise (condition (&message (message "unsupported file type"))
+ (&nar-error (file f) (port port))))))
+ (write-string ")" p)))
+
+(define port-conversion-strategy
+ (fluid->parameter %default-port-conversion-strategy))
+
+(define (fold-archive proc seed port file)
+ "Read a file (possibly a directory structure) in Nar format from PORT. Call
+PROC on each file or directory read from PORT using:
+
+ (PROC FILE TYPE CONTENTS RESULT)
+
+using SEED as the first RESULT. TYPE is a symbol like 'regular, and CONTENTS
+depends on TYPE."
+ (parameterize ((currently-restored-file file)
+
+ ;; Error out if we can convert file names to the current
+ ;; locale. (XXX: We'd prefer UTF-8 encoding for file names
+ ;; regardless of the locale, but that's what Guile gives us
+ ;; so far.)
+ (port-conversion-strategy 'error))
(let ((signature (read-string port)))
(unless (equal? signature %archive-version-1)
(raise
(token signature)
(file #f))))))
- (let restore ((file file))
+ (let read ((file file)
+ (result seed))
(define (read-eof-marker)
(match (read-string port)
(")" #t)
(match (list (read-string port) (read-string port) (read-string port))
(("(" "type" "regular")
- (call-with-output-file file (cut read-contents port <>))
- (read-eof-marker))
+ (let* ((type (read-file-type port))
+ (size (read-long-long port))
+
+ ;; The caller must read exactly SIZE bytes from PORT.
+ (result (proc file type `(,port . ,size) result)))
+ (let ((m (modulo size 8)))
+ (unless (zero? m)
+ (get-bytevector-n* port (- 8 m))))
+ (read-eof-marker)
+ result))
(("(" "type" "symlink")
(match (list (read-string port) (read-string port))
(("target" target)
- (symlink target file)
- (read-eof-marker))
+ (let ((result (proc file 'symlink target result)))
+ (read-eof-marker)
+ result))
(x (raise
(condition
(&message (message "invalid symlink tokens"))
(&nar-read-error (port port) (file file) (token x)))))))
(("(" "type" "directory")
(let ((dir file))
- (mkdir dir)
- (let loop ((prefix (read-string port)))
+ (let loop ((prefix (read-string port))
+ (result (proc file 'directory #f result)))
(match prefix
("entry"
(match (list (read-string port)
(read-string port) (read-string port)
(read-string port))
(("(" "name" file "node")
- (restore (string-append dir "/" file))
- (match (read-string port)
- (")" #t)
- (x
- (raise
- (condition
- (&message
- (message "unexpected directory entry termination"))
- (&nar-read-error (port port)
- (file file)
- (token x))))))
- (loop (read-string port)))))
- (")" #t) ; done with DIR
+ (let ((result (read (string-append dir "/" file) result)))
+ (match (read-string port)
+ (")" #f)
+ (x
+ (raise
+ (condition
+ (&message
+ (message "unexpected directory entry termination"))
+ (&nar-read-error (port port)
+ (file file)
+ (token x))))))
+ (loop (read-string port) result)))))
+ (")" result) ;done with DIR
(x
(raise
(condition
(&message (message "unsupported nar entry type"))
(&nar-read-error (port port) (file file) (token x)))))))))
+(define (restore-file port file)
+ "Read a file (possibly a directory structure) in Nar format from PORT.
+Restore it as FILE."
+ (fold-archive (lambda (file type content result)
+ (match type
+ ('directory
+ (mkdir file))
+ ('symlink
+ (symlink content file))
+ ((or 'regular 'executable)
+ (match content
+ ((input . size)
+ (call-with-output-file file
+ (lambda (output)
+ (dump input output size)
+ (when (eq? type 'executable)
+ (chmod output #o755)))))))))
+ #t
+ port
+ file))
+
+;;; Local Variables:
+;;; eval: (put 'call-with-binary-input-file 'scheme-indent-function 1)
+;;; End:
+
;;; serialization.scm ends here