1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2019 Pierre Neidhardt <mail@ambrevar.xyz>
3 ;;; Copyright © 2019 Ludovic Courtès <ludo@gnu.org>
5 ;;; This file is part of GNU Guix.
7 ;;; GNU Guix is free software; you can redistribute it and/or modify it
8 ;;; under the terms of the GNU General Public License as published by
9 ;;; the Free Software Foundation; either version 3 of the License, or (at
10 ;;; your option) any later version.
12 ;;; GNU Guix is distributed in the hope that it will be useful, but
13 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;;; GNU General Public License for more details.
17 ;;; You should have received a copy of the GNU General Public License
18 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
20 (define-module (guix lzlib)
21 #:use-module (rnrs bytevectors)
22 #:use-module (rnrs arithmetic bitwise)
23 #:use-module (ice-9 binary-ports)
24 #:use-module (ice-9 match)
25 #:use-module (system foreign)
26 #:use-module (guix config)
27 #:use-module (srfi srfi-11)
28 #:export (lzlib-available?
31 make-lzip-input-port/compressed
32 call-with-lzip-input-port
33 call-with-lzip-output-port
34 %default-member-length-limit
35 %default-compression-level))
39 ;;; Bindings to the lzlib / liblz API. Some convenience functions are also
40 ;;; provided (see the export).
42 ;;; While the bindings are complete, the convenience functions only support
43 ;;; single member archives. To decompress single member archives, we loop
44 ;;; until lz-decompress-read returns 0. This is simpler. To support multiple
45 ;;; members properly, we need (among others) to call lz-decompress-finish and
46 ;;; loop over lz-decompress-read until lz-decompress-finished? returns #t.
47 ;;; Otherwise a multi-member archive starting with an empty member would only
48 ;;; decompress the empty member and stop there, resulting in truncated output.
53 ;; File name of lzlib's shared library. When updating via 'guix pull',
54 ;; '%liblz' might be undefined so protect against it.
55 (delay (dynamic-link (if (defined? '%liblz)
59 (define (lzlib-available?)
60 "Return true if lzlib is available, #f otherwise."
61 (false-if-exception (force %lzlib)))
63 (define (lzlib-procedure ret name parameters)
64 "Return a procedure corresponding to C function NAME in liblz, or #f if
65 either lzlib or the function could not be found."
66 (match (false-if-exception (dynamic-func name (force %lzlib)))
68 (pointer->procedure ret ptr parameters))
72 (define-wrapped-pointer-type <lz-decoder>
73 ;; Scheme counterpart of the 'LZ_Decoder' opaque type.
78 (format port "#<lz-decoder ~a>"
79 (number->string (object-address obj) 16))))
81 (define-wrapped-pointer-type <lz-encoder>
82 ;; Scheme counterpart of the 'LZ_Encoder' opaque type.
87 (format port "#<lz-encoder ~a>"
88 (number->string (object-address obj) 16))))
91 (define %error-number-ok 0)
92 (define %error-number-bad-argument 1)
93 (define %error-number-mem-error 2)
94 (define %error-number-sequence-error 3)
95 (define %error-number-header-error 4)
96 (define %error-number-unexpected-eof 5)
97 (define %error-number-data-error 6)
98 (define %error-number-library-error 7)
101 ;; Compression bindings.
103 (define lz-compress-open
104 (let ((proc (lzlib-procedure '* "LZ_compress_open" (list int int uint64)))
105 ;; member-size is an "unsigned long long", and the C standard guarantees
106 ;; a minimum range of 0..2^64-1.
107 (unlimited-size (- (expt 2 64) 1)))
108 (lambda* (dictionary-size match-length-limit #:optional (member-size unlimited-size))
109 "Initialize the internal stream state for compression and returns a
110 pointer that can only be used as the encoder argument for the other
111 lz-compress functions, or a null pointer if the encoder could not be
114 See the manual: (lzlib) Compression functions."
115 (let ((encoder-ptr (proc dictionary-size match-length-limit member-size)))
116 (if (not (= (lz-compress-error encoder-ptr) -1))
117 (pointer->lz-encoder encoder-ptr)
118 (throw 'lzlib-error 'lz-compress-open))))))
120 (define lz-compress-close
121 (let ((proc (lzlib-procedure int "LZ_compress_close" '(*))))
123 "Close encoder. ENCODER can no longer be used as an argument to any
124 lz-compress function. "
125 (let ((ret (proc (lz-encoder->pointer encoder))))
127 (throw 'lzlib-error 'lz-compress-close ret)
130 (define lz-compress-finish
131 (let ((proc (lzlib-procedure int "LZ_compress_finish" '(*))))
133 "Tell that all the data for this member have already been written (with
134 the `lz-compress-write' function). It is safe to call `lz-compress-finish' as
135 many times as needed. After all the produced compressed data have been read
136 with `lz-compress-read' and `lz-compress-member-finished?' returns #t, a new
137 member can be started with 'lz-compress-restart-member'."
138 (let ((ret (proc (lz-encoder->pointer encoder))))
140 (throw 'lzlib-error 'lz-compress-finish (lz-compress-error encoder))
143 (define lz-compress-restart-member
144 (let ((proc (lzlib-procedure int "LZ_compress_restart_member" (list '* uint64))))
145 (lambda (encoder member-size)
146 "Start a new member in a multimember data stream.
147 Call this function only after `lz-compress-member-finished?' indicates that the
148 current member has been fully read (with the `lz-compress-read' function)."
149 (let ((ret (proc (lz-encoder->pointer encoder) member-size)))
151 (throw 'lzlib-error 'lz-compress-restart-member
152 (lz-compress-error encoder))
155 (define lz-compress-sync-flush
156 (let ((proc (lzlib-procedure int "LZ_compress_sync_flush" (list '*))))
158 "Make available to `lz-compress-read' all the data already written with
159 the `LZ-compress-write' function. First call `lz-compress-sync-flush'. Then
160 call 'lz-compress-read' until it returns 0.
162 Repeated use of `LZ-compress-sync-flush' may degrade compression ratio,
163 so use it only when needed. "
164 (let ((ret (proc (lz-encoder->pointer encoder))))
166 (throw 'lzlib-error 'lz-compress-sync-flush
167 (lz-compress-error encoder))
170 (define lz-compress-read
171 (let ((proc (lzlib-procedure int "LZ_compress_read" (list '* '* int))))
172 (lambda* (encoder lzfile-bv #:optional (start 0) (count (bytevector-length lzfile-bv)))
173 "Read up to COUNT bytes from the encoder stream, storing the results in LZFILE-BV.
174 Return the number of uncompressed bytes written, a positive integer."
175 (let ((ret (proc (lz-encoder->pointer encoder)
176 (bytevector->pointer lzfile-bv start)
179 (throw 'lzlib-error 'lz-compress-read (lz-compress-error encoder))
182 (define lz-compress-write
183 (let ((proc (lzlib-procedure int "LZ_compress_write" (list '* '* int))))
184 (lambda* (encoder bv #:optional (start 0) (count (bytevector-length bv)))
185 "Write up to COUNT bytes from BV to the encoder stream. Return the
186 number of uncompressed bytes written, a strictly positive integer."
187 (let ((ret (proc (lz-encoder->pointer encoder)
188 (bytevector->pointer bv start)
191 (throw 'lzlib-error 'lz-compress-write (lz-compress-error encoder))
194 (define lz-compress-write-size
195 (let ((proc (lzlib-procedure int "LZ_compress_write_size" '(*))))
197 "The maximum number of bytes that can be immediately written through the
198 `lz-compress-write' function.
200 It is guaranteed that an immediate call to `lz-compress-write' will accept a
201 SIZE up to the returned number of bytes. "
202 (let ((ret (proc (lz-encoder->pointer encoder))))
204 (throw 'lzlib-error 'lz-compress-write-size (lz-compress-error encoder))
207 (define lz-compress-error
208 (let ((proc (lzlib-procedure int "LZ_compress_errno" '(*))))
210 "ENCODER can be a Scheme object or a pointer."
211 (let* ((error-number (proc (if (lz-encoder? encoder)
212 (lz-encoder->pointer encoder)
216 (define lz-compress-finished?
217 (let ((proc (lzlib-procedure int "LZ_compress_finished" '(*))))
219 "Return #t if all the data have been read and `lz-compress-close' can
220 be safely called. Otherwise return #f."
221 (let ((ret (proc (lz-encoder->pointer encoder))))
225 (_ (throw 'lzlib-error 'lz-compress-finished? (lz-compress-error encoder))))))))
227 (define lz-compress-member-finished?
228 (let ((proc (lzlib-procedure int "LZ_compress_member_finished" '(*))))
230 "Return #t if the current member, in a multimember data stream, has
231 been fully read and 'lz-compress-restart-member' can be safely called.
232 Otherwise return #f."
233 (let ((ret (proc (lz-encoder->pointer encoder))))
237 (_ (throw 'lzlib-error 'lz-compress-member-finished? (lz-compress-error encoder))))))))
239 (define lz-compress-data-position
240 (let ((proc (lzlib-procedure uint64 "LZ_compress_data_position" '(*))))
242 "Return the number of input bytes already compressed in the current
244 (let ((ret (proc (lz-encoder->pointer encoder))))
246 (throw 'lzlib-error 'lz-compress-data-position
247 (lz-compress-error encoder))
250 (define lz-compress-member-position
251 (let ((proc (lzlib-procedure uint64 "LZ_compress_member_position" '(*))))
253 "Return the number of compressed bytes already produced, but perhaps
254 not yet read, in the current member."
255 (let ((ret (proc (lz-encoder->pointer encoder))))
257 (throw 'lzlib-error 'lz-compress-member-position
258 (lz-compress-error encoder))
261 (define lz-compress-total-in-size
262 (let ((proc (lzlib-procedure uint64 "LZ_compress_total_in_size" '(*))))
264 "Return the total number of input bytes already compressed."
265 (let ((ret (proc (lz-encoder->pointer encoder))))
267 (throw 'lzlib-error 'lz-compress-total-in-size
268 (lz-compress-error encoder))
271 (define lz-compress-total-out-size
272 (let ((proc (lzlib-procedure uint64 "LZ_compress_total_out_size" '(*))))
274 "Return the total number of compressed bytes already produced, but
275 perhaps not yet read."
276 (let ((ret (proc (lz-encoder->pointer encoder))))
278 (throw 'lzlib-error 'lz-compress-total-out-size
279 (lz-compress-error encoder))
283 ;; Decompression bindings.
285 (define lz-decompress-open
286 (let ((proc (lzlib-procedure '* "LZ_decompress_open" '())))
288 "Initializes the internal stream state for decompression and returns a
289 pointer that can only be used as the decoder argument for the other
290 lz-decompress functions, or a null pointer if the decoder could not be
293 See the manual: (lzlib) Decompression functions."
294 (let ((decoder-ptr (proc)))
295 (if (not (= (lz-decompress-error decoder-ptr) -1))
296 (pointer->lz-decoder decoder-ptr)
297 (throw 'lzlib-error 'lz-decompress-open))))))
299 (define lz-decompress-close
300 (let ((proc (lzlib-procedure int "LZ_decompress_close" '(*))))
302 "Close decoder. DECODER can no longer be used as an argument to any
303 lz-decompress function. "
304 (let ((ret (proc (lz-decoder->pointer decoder))))
306 (throw 'lzlib-error 'lz-decompress-close ret)
309 (define lz-decompress-finish
310 (let ((proc (lzlib-procedure int "LZ_decompress_finish" '(*))))
312 "Tell that all the data for this stream have already been written (with
313 the `lz-decompress-write' function). It is safe to call
314 `lz-decompress-finish' as many times as needed."
315 (let ((ret (proc (lz-decoder->pointer decoder))))
317 (throw 'lzlib-error 'lz-decompress-finish (lz-decompress-error decoder))
320 (define lz-decompress-reset
321 (let ((proc (lzlib-procedure int "LZ_decompress_reset" '(*))))
323 "Reset the internal state of DECODER as it was just after opening it
324 with the `lz-decompress-open' function. Data stored in the internal buffers
325 is discarded. Position counters are set to 0."
326 (let ((ret (proc (lz-decoder->pointer decoder))))
328 (throw 'lzlib-error 'lz-decompress-reset
329 (lz-decompress-error decoder))
332 (define lz-decompress-sync-to-member
333 (let ((proc (lzlib-procedure int "LZ_decompress_sync_to_member" '(*))))
335 "Reset the error state of DECODER and enters a search state that lasts
336 until a new member header (or the end of the stream) is found. After a
337 successful call to `lz-decompress-sync-to-member', data written with
338 `lz-decompress-write' will be consumed and 'lz-decompress-read' will return 0
339 until a header is found.
341 This function is useful to discard any data preceding the first member, or to
342 discard the rest of the current member, for example in case of a data
343 error. If the decoder is already at the beginning of a member, this function
345 (let ((ret (proc (lz-decoder->pointer decoder))))
347 (throw 'lzlib-error 'lz-decompress-sync-to-member
348 (lz-decompress-error decoder))
351 (define lz-decompress-read
352 (let ((proc (lzlib-procedure int "LZ_decompress_read" (list '* '* int))))
353 (lambda* (decoder file-bv #:optional (start 0) (count (bytevector-length file-bv)))
354 "Read up to COUNT bytes from the decoder stream, storing the results in FILE-BV.
355 Return the number of uncompressed bytes written, a non-negative positive integer."
356 (let ((ret (proc (lz-decoder->pointer decoder)
357 (bytevector->pointer file-bv start)
360 (throw 'lzlib-error 'lz-decompress-read (lz-decompress-error decoder))
363 (define lz-decompress-write
364 (let ((proc (lzlib-procedure int "LZ_decompress_write" (list '* '* int))))
365 (lambda* (decoder bv #:optional (start 0) (count (bytevector-length bv)))
366 "Write up to COUNT bytes from BV to the decoder stream. Return the
367 number of uncompressed bytes written, a non-negative integer."
368 (let ((ret (proc (lz-decoder->pointer decoder)
369 (bytevector->pointer bv start)
372 (throw 'lzlib-error 'lz-decompress-write (lz-decompress-error decoder))
375 (define lz-decompress-write-size
376 (let ((proc (lzlib-procedure int "LZ_decompress_write_size" '(*))))
378 "Return the maximum number of bytes that can be immediately written
379 through the `lz-decompress-write' function.
381 It is guaranteed that an immediate call to `lz-decompress-write' will accept a
382 SIZE up to the returned number of bytes. "
383 (let ((ret (proc (lz-decoder->pointer decoder))))
385 (throw 'lzlib-error 'lz-decompress-write-size (lz-decompress-error decoder))
388 (define lz-decompress-error
389 (let ((proc (lzlib-procedure int "LZ_decompress_errno" '(*))))
391 "DECODER can be a Scheme object or a pointer."
392 (let* ((error-number (proc (if (lz-decoder? decoder)
393 (lz-decoder->pointer decoder)
397 (define lz-decompress-finished?
398 (let ((proc (lzlib-procedure int "LZ_decompress_finished" '(*))))
400 "Return #t if all the data have been read and `lz-decompress-close' can
401 be safely called. Otherwise return #f."
402 (let ((ret (proc (lz-decoder->pointer decoder))))
406 (_ (throw 'lzlib-error 'lz-decompress-finished? (lz-decompress-error decoder))))))))
408 (define lz-decompress-member-finished?
409 (let ((proc (lzlib-procedure int "LZ_decompress_member_finished" '(*))))
411 "Return #t if the current member, in a multimember data stream, has
412 been fully read and `lz-decompress-restart-member' can be safely called.
413 Otherwise return #f."
414 (let ((ret (proc (lz-decoder->pointer decoder))))
418 (_ (throw 'lzlib-error 'lz-decompress-member-finished? (lz-decompress-error decoder))))))))
420 (define lz-decompress-member-version
421 (let ((proc (lzlib-procedure int "LZ_decompress_member_version" '(*))))
423 (let ((ret (proc (lz-decoder->pointer decoder))))
424 "Return the version of current member from member header."
426 (throw 'lzlib-error 'lz-decompress-data-position
427 (lz-decompress-error decoder))
430 (define lz-decompress-dictionary-size
431 (let ((proc (lzlib-procedure int "LZ_decompress_dictionary_size" '(*))))
433 (let ((ret (proc (lz-decoder->pointer decoder))))
434 "Return the dictionary size of current member from member header."
436 (throw 'lzlib-error 'lz-decompress-member-position
437 (lz-decompress-error decoder))
440 (define lz-decompress-data-crc
441 (let ((proc (lzlib-procedure unsigned-int "LZ_decompress_data_crc" '(*))))
443 (let ((ret (proc (lz-decoder->pointer decoder))))
444 "Return the 32 bit Cyclic Redundancy Check of the data decompressed
445 from the current member. The returned value is valid only when
446 `lz-decompress-member-finished' returns #t. "
448 (throw 'lzlib-error 'lz-decompress-member-position
449 (lz-decompress-error decoder))
452 (define lz-decompress-data-position
453 (let ((proc (lzlib-procedure uint64 "LZ_decompress_data_position" '(*))))
455 "Return the number of decompressed bytes already produced, but perhaps
456 not yet read, in the current member."
457 (let ((ret (proc (lz-decoder->pointer decoder))))
459 (throw 'lzlib-error 'lz-decompress-data-position
460 (lz-decompress-error decoder))
463 (define lz-decompress-member-position
464 (let ((proc (lzlib-procedure uint64 "LZ_decompress_member_position" '(*))))
466 "Return the number of input bytes already decompressed in the current
468 (let ((ret (proc (lz-decoder->pointer decoder))))
470 (throw 'lzlib-error 'lz-decompress-member-position
471 (lz-decompress-error decoder))
474 (define lz-decompress-total-in-size
475 (let ((proc (lzlib-procedure uint64 "LZ_decompress_total_in_size" '(*))))
477 (let ((ret (proc (lz-decoder->pointer decoder))))
478 "Return the total number of input bytes already compressed."
480 (throw 'lzlib-error 'lz-decompress-total-in-size
481 (lz-decompress-error decoder))
484 (define lz-decompress-total-out-size
485 (let ((proc (lzlib-procedure uint64 "LZ_decompress_total_out_size" '(*))))
487 (let ((ret (proc (lz-decoder->pointer decoder))))
488 "Return the total number of compressed bytes already produced, but
489 perhaps not yet read."
491 (throw 'lzlib-error 'lz-decompress-total-out-size
492 (lz-decompress-error decoder))
496 ;; High level functions.
498 (define* (lzread! decoder port bv
499 #:optional (start 0) (count (bytevector-length bv)))
500 "Read up to COUNT bytes from PORT into BV at offset START. Return the
501 number of uncompressed bytes actually read; it is zero if COUNT is zero or if
502 the end-of-stream has been reached."
503 (define (feed-decoder! decoder)
504 ;; Feed DECODER with data read from PORT.
505 (match (get-bytevector-n port (lz-decompress-write-size decoder))
506 ((? eof-object? eof) eof)
507 (bv (lz-decompress-write decoder bv))))
511 (cond ((< read count)
512 (match (lz-decompress-read decoder bv start (- count read))
513 (0 (cond ((lz-decompress-finished? decoder)
515 ((eof-object? (feed-decoder! decoder))
516 (lz-decompress-finish decoder)
520 (n (loop (+ read n) (+ start n)))))
524 (define (lzwrite! encoder source source-offset source-count
525 target target-offset target-count)
526 "Write up to SOURCE-COUNT bytes from SOURCE to ENCODER, and read up to
527 TARGET-COUNT bytes into TARGET at TARGET-OFFSET. Return two values: the
528 number of bytes read from SOURCE, and the number of bytes written to TARGET,
531 (if (> (lz-compress-write-size encoder) 0)
532 (match (lz-compress-write encoder source source-offset source-count)
533 (0 (lz-compress-finish encoder) 0)
538 (lz-compress-read encoder target target-offset target-count))
540 (values read written))
542 (define* (lzwrite encoder bv lz-port
543 #:optional (start 0) (count (bytevector-length bv)))
544 "Write up to COUNT bytes from BV at offset START into LZ-PORT. Return
545 the number of uncompressed bytes written, a non-negative integer."
548 (while (and (< 0 (lz-compress-write-size encoder))
550 (set! written (+ written
551 (lz-compress-write encoder bv (+ start written) (- count written)))))
553 (lz-compress-finish encoder))
554 (let ((lz-bv (make-bytevector written)))
556 (set! rd (lz-compress-read encoder lz-bv 0 (bytevector-length lz-bv)))
557 (put-bytevector lz-port lz-bv 0 rd)
558 (set! read (+ read rd))
561 ;; `written' is the total byte count of uncompressed data.
569 ;; Alist of (levels (dictionary-size match-length-limit)). 0 is the fastest.
570 ;; See bbexample.c in lzlib's source.
571 (define %compression-levels
573 (1 (,(bitwise-arithmetic-shift-left 1 20) 5))
574 (2 (,(bitwise-arithmetic-shift-left 3 19) 6))
575 (3 (,(bitwise-arithmetic-shift-left 1 21) 8))
576 (4 (,(bitwise-arithmetic-shift-left 3 20) 12))
577 (5 (,(bitwise-arithmetic-shift-left 1 22) 20))
578 (6 (,(bitwise-arithmetic-shift-left 1 23) 36))
579 (7 (,(bitwise-arithmetic-shift-left 1 24) 68))
580 (8 (,(bitwise-arithmetic-shift-left 3 23) 132))
581 (9 (,(bitwise-arithmetic-shift-left 1 25) 273))))
583 (define %default-compression-level
586 (define* (make-lzip-input-port port)
587 "Return an input port that decompresses data read from PORT, a file port.
588 PORT is automatically closed when the resulting port is closed."
589 (define decoder (lz-decompress-open))
591 (define (read! bv start count)
592 (lzread! decoder port bv start count))
594 (make-custom-binary-input-port "lzip-input" read! #f #f
596 (lz-decompress-close decoder)
599 (define* (make-lzip-output-port port
601 (level %default-compression-level))
602 "Return an output port that compresses data at the given LEVEL, using PORT,
603 a file port, as its sink. PORT is automatically closed when the resulting
605 (define encoder (apply lz-compress-open
606 (car (assoc-ref %compression-levels level))))
608 (define (write! bv start count)
609 (lzwrite encoder bv port start count))
611 (make-custom-binary-output-port "lzip-output" write! #f #f
613 (lz-compress-finish encoder)
614 ;; "lz-read" the trailing metadata added by `lz-compress-finish'.
615 (let ((lz-bv (make-bytevector (* 64 1024))))
617 (set! rd (lz-compress-read encoder lz-bv 0 (bytevector-length lz-bv)))
618 (put-bytevector port lz-bv 0 rd)
621 (lz-compress-close encoder)
624 (define* (make-lzip-input-port/compressed port
626 (level %default-compression-level))
627 "Return an input port that compresses data read from PORT, with the given LEVEL.
628 PORT is automatically closed when the resulting port is closed."
629 (define encoder (apply lz-compress-open
630 (car (assoc-ref %compression-levels level))))
632 (define input-buffer (make-bytevector 8192))
634 (define input-offset 0)
636 (define input-eof? #f)
638 (define (read! bv start count)
641 (match (lz-compress-read encoder bv start count)
642 (0 (if (lz-compress-finished? encoder)
644 (read! bv start count)))
646 ((= input-offset input-len)
647 (match (get-bytevector-n! port input-buffer 0
648 (bytevector-length input-buffer))
651 (lz-compress-finish encoder))
653 (set! input-offset 0)
654 (set! input-len count)))
655 (read! bv start count))
657 (let-values (((read written)
659 input-buffer input-offset
660 (- input-len input-offset)
662 (set! input-offset (+ input-offset read))
664 ;; Make sure we don't return zero except on EOF.
666 (read! bv start count)
669 (make-custom-binary-input-port "lzip-input/compressed"
674 (define* (call-with-lzip-input-port port proc)
675 "Call PROC with a port that wraps PORT and decompresses data read from it.
676 PORT is closed upon completion."
677 (let ((lzip (make-lzip-input-port port)))
683 (close-port lzip)))))
685 (define* (call-with-lzip-output-port port proc
687 (level %default-compression-level))
688 "Call PROC with an output port that wraps PORT and compresses data. PORT is
689 close upon completion."
690 (let ((lzip (make-lzip-output-port port
697 (close-port lzip)))))
699 ;;; lzlib.scm ends here