gnu: add convmv.
[jackhill/guix/guix.git] / guix / lzlib.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2019 Pierre Neidhardt <mail@ambrevar.xyz>
3 ;;; Copyright © 2019, 2020 Ludovic Courtès <ludo@gnu.org>
4 ;;;
5 ;;; This file is part of GNU Guix.
6 ;;;
7 ;;; GNU Guix is free software; you can redistribute it and/or modify it
8 ;;; under the terms of the GNU General Public License as published by
9 ;;; the Free Software Foundation; either version 3 of the License, or (at
10 ;;; your option) any later version.
11 ;;;
12 ;;; GNU Guix is distributed in the hope that it will be useful, but
13 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;;; GNU General Public License for more details.
16 ;;;
17 ;;; You should have received a copy of the GNU General Public License
18 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
19
20 (define-module (guix lzlib)
21 #:use-module (rnrs bytevectors)
22 #:use-module (rnrs arithmetic bitwise)
23 #:use-module (ice-9 binary-ports)
24 #:use-module (ice-9 match)
25 #:use-module (system foreign)
26 #:use-module (guix config)
27 #:use-module (srfi srfi-11)
28 #:export (lzlib-available?
29 make-lzip-input-port
30 make-lzip-output-port
31 make-lzip-input-port/compressed
32 call-with-lzip-input-port
33 call-with-lzip-output-port
34 %default-member-length-limit
35 %default-compression-level
36 dictionary-size+match-length-limit))
37
38 ;;; Commentary:
39 ;;;
40 ;;; Bindings to the lzlib / liblz API. Some convenience functions are also
41 ;;; provided (see the export).
42 ;;;
43 ;;; While the bindings are complete, the convenience functions only support
44 ;;; single member archives. To decompress single member archives, we loop
45 ;;; until lz-decompress-read returns 0. This is simpler. To support multiple
46 ;;; members properly, we need (among others) to call lz-decompress-finish and
47 ;;; loop over lz-decompress-read until lz-decompress-finished? returns #t.
48 ;;; Otherwise a multi-member archive starting with an empty member would only
49 ;;; decompress the empty member and stop there, resulting in truncated output.
50
51 ;;; Code:
52
53 (define %lzlib
54 ;; File name of lzlib's shared library. When updating via 'guix pull',
55 ;; '%liblz' might be undefined so protect against it.
56 (delay (dynamic-link (if (defined? '%liblz)
57 %liblz
58 "liblz"))))
59
60 (define (lzlib-available?)
61 "Return true if lzlib is available, #f otherwise."
62 (false-if-exception (force %lzlib)))
63
64 (define (lzlib-procedure ret name parameters)
65 "Return a procedure corresponding to C function NAME in liblz, or #f if
66 either lzlib or the function could not be found."
67 (match (false-if-exception (dynamic-func name (force %lzlib)))
68 ((? pointer? ptr)
69 (pointer->procedure ret ptr parameters))
70 (#f
71 #f)))
72
73 (define-wrapped-pointer-type <lz-decoder>
74 ;; Scheme counterpart of the 'LZ_Decoder' opaque type.
75 lz-decoder?
76 pointer->lz-decoder
77 lz-decoder->pointer
78 (lambda (obj port)
79 (format port "#<lz-decoder ~a>"
80 (number->string (object-address obj) 16))))
81
82 (define-wrapped-pointer-type <lz-encoder>
83 ;; Scheme counterpart of the 'LZ_Encoder' opaque type.
84 lz-encoder?
85 pointer->lz-encoder
86 lz-encoder->pointer
87 (lambda (obj port)
88 (format port "#<lz-encoder ~a>"
89 (number->string (object-address obj) 16))))
90
91 ;; From lzlib.h
92 (define %error-number-ok 0)
93 (define %error-number-bad-argument 1)
94 (define %error-number-mem-error 2)
95 (define %error-number-sequence-error 3)
96 (define %error-number-header-error 4)
97 (define %error-number-unexpected-eof 5)
98 (define %error-number-data-error 6)
99 (define %error-number-library-error 7)
100
101 \f
102 ;; Compression bindings.
103
104 (define lz-compress-open
105 (let ((proc (lzlib-procedure '* "LZ_compress_open" (list int int uint64)))
106 ;; member-size is an "unsigned long long", and the C standard guarantees
107 ;; a minimum range of 0..2^64-1.
108 (unlimited-size (- (expt 2 64) 1)))
109 (lambda* (dictionary-size match-length-limit #:optional (member-size unlimited-size))
110 "Initialize the internal stream state for compression and returns a
111 pointer that can only be used as the encoder argument for the other
112 lz-compress functions, or a null pointer if the encoder could not be
113 allocated.
114
115 See the manual: (lzlib) Compression functions."
116 (let ((encoder-ptr (proc dictionary-size match-length-limit member-size)))
117 (if (not (= (lz-compress-error encoder-ptr) -1))
118 (pointer->lz-encoder encoder-ptr)
119 (throw 'lzlib-error 'lz-compress-open))))))
120
121 (define lz-compress-close
122 (let ((proc (lzlib-procedure int "LZ_compress_close" '(*))))
123 (lambda (encoder)
124 "Close encoder. ENCODER can no longer be used as an argument to any
125 lz-compress function. "
126 (let ((ret (proc (lz-encoder->pointer encoder))))
127 (if (= ret -1)
128 (throw 'lzlib-error 'lz-compress-close ret)
129 ret)))))
130
131 (define lz-compress-finish
132 (let ((proc (lzlib-procedure int "LZ_compress_finish" '(*))))
133 (lambda (encoder)
134 "Tell that all the data for this member have already been written (with
135 the `lz-compress-write' function). It is safe to call `lz-compress-finish' as
136 many times as needed. After all the produced compressed data have been read
137 with `lz-compress-read' and `lz-compress-member-finished?' returns #t, a new
138 member can be started with 'lz-compress-restart-member'."
139 (let ((ret (proc (lz-encoder->pointer encoder))))
140 (if (= ret -1)
141 (throw 'lzlib-error 'lz-compress-finish (lz-compress-error encoder))
142 ret)))))
143
144 (define lz-compress-restart-member
145 (let ((proc (lzlib-procedure int "LZ_compress_restart_member" (list '* uint64))))
146 (lambda (encoder member-size)
147 "Start a new member in a multimember data stream.
148 Call this function only after `lz-compress-member-finished?' indicates that the
149 current member has been fully read (with the `lz-compress-read' function)."
150 (let ((ret (proc (lz-encoder->pointer encoder) member-size)))
151 (if (= ret -1)
152 (throw 'lzlib-error 'lz-compress-restart-member
153 (lz-compress-error encoder))
154 ret)))))
155
156 (define lz-compress-sync-flush
157 (let ((proc (lzlib-procedure int "LZ_compress_sync_flush" (list '*))))
158 (lambda (encoder)
159 "Make available to `lz-compress-read' all the data already written with
160 the `LZ-compress-write' function. First call `lz-compress-sync-flush'. Then
161 call 'lz-compress-read' until it returns 0.
162
163 Repeated use of `LZ-compress-sync-flush' may degrade compression ratio,
164 so use it only when needed. "
165 (let ((ret (proc (lz-encoder->pointer encoder))))
166 (if (= ret -1)
167 (throw 'lzlib-error 'lz-compress-sync-flush
168 (lz-compress-error encoder))
169 ret)))))
170
171 (define lz-compress-read
172 (let ((proc (lzlib-procedure int "LZ_compress_read" (list '* '* int))))
173 (lambda* (encoder lzfile-bv #:optional (start 0) (count (bytevector-length lzfile-bv)))
174 "Read up to COUNT bytes from the encoder stream, storing the results in LZFILE-BV.
175 Return the number of uncompressed bytes written, a positive integer."
176 (let ((ret (proc (lz-encoder->pointer encoder)
177 (bytevector->pointer lzfile-bv start)
178 count)))
179 (if (= ret -1)
180 (throw 'lzlib-error 'lz-compress-read (lz-compress-error encoder))
181 ret)))))
182
183 (define lz-compress-write
184 (let ((proc (lzlib-procedure int "LZ_compress_write" (list '* '* int))))
185 (lambda* (encoder bv #:optional (start 0) (count (bytevector-length bv)))
186 "Write up to COUNT bytes from BV to the encoder stream. Return the
187 number of uncompressed bytes written, a strictly positive integer."
188 (let ((ret (proc (lz-encoder->pointer encoder)
189 (bytevector->pointer bv start)
190 count)))
191 (if (< ret 0)
192 (throw 'lzlib-error 'lz-compress-write (lz-compress-error encoder))
193 ret)))))
194
195 (define lz-compress-write-size
196 (let ((proc (lzlib-procedure int "LZ_compress_write_size" '(*))))
197 (lambda (encoder)
198 "The maximum number of bytes that can be immediately written through the
199 `lz-compress-write' function.
200
201 It is guaranteed that an immediate call to `lz-compress-write' will accept a
202 SIZE up to the returned number of bytes. "
203 (let ((ret (proc (lz-encoder->pointer encoder))))
204 (if (= ret -1)
205 (throw 'lzlib-error 'lz-compress-write-size (lz-compress-error encoder))
206 ret)))))
207
208 (define lz-compress-error
209 (let ((proc (lzlib-procedure int "LZ_compress_errno" '(*))))
210 (lambda (encoder)
211 "ENCODER can be a Scheme object or a pointer."
212 (let* ((error-number (proc (if (lz-encoder? encoder)
213 (lz-encoder->pointer encoder)
214 encoder))))
215 error-number))))
216
217 (define lz-compress-finished?
218 (let ((proc (lzlib-procedure int "LZ_compress_finished" '(*))))
219 (lambda (encoder)
220 "Return #t if all the data have been read and `lz-compress-close' can
221 be safely called. Otherwise return #f."
222 (let ((ret (proc (lz-encoder->pointer encoder))))
223 (match ret
224 (1 #t)
225 (0 #f)
226 (_ (throw 'lzlib-error 'lz-compress-finished? (lz-compress-error encoder))))))))
227
228 (define lz-compress-member-finished?
229 (let ((proc (lzlib-procedure int "LZ_compress_member_finished" '(*))))
230 (lambda (encoder)
231 "Return #t if the current member, in a multimember data stream, has
232 been fully read and 'lz-compress-restart-member' can be safely called.
233 Otherwise return #f."
234 (let ((ret (proc (lz-encoder->pointer encoder))))
235 (match ret
236 (1 #t)
237 (0 #f)
238 (_ (throw 'lzlib-error 'lz-compress-member-finished? (lz-compress-error encoder))))))))
239
240 (define lz-compress-data-position
241 (let ((proc (lzlib-procedure uint64 "LZ_compress_data_position" '(*))))
242 (lambda (encoder)
243 "Return the number of input bytes already compressed in the current
244 member."
245 (let ((ret (proc (lz-encoder->pointer encoder))))
246 (if (= ret -1)
247 (throw 'lzlib-error 'lz-compress-data-position
248 (lz-compress-error encoder))
249 ret)))))
250
251 (define lz-compress-member-position
252 (let ((proc (lzlib-procedure uint64 "LZ_compress_member_position" '(*))))
253 (lambda (encoder)
254 "Return the number of compressed bytes already produced, but perhaps
255 not yet read, in the current member."
256 (let ((ret (proc (lz-encoder->pointer encoder))))
257 (if (= ret -1)
258 (throw 'lzlib-error 'lz-compress-member-position
259 (lz-compress-error encoder))
260 ret)))))
261
262 (define lz-compress-total-in-size
263 (let ((proc (lzlib-procedure uint64 "LZ_compress_total_in_size" '(*))))
264 (lambda (encoder)
265 "Return the total number of input bytes already compressed."
266 (let ((ret (proc (lz-encoder->pointer encoder))))
267 (if (= ret -1)
268 (throw 'lzlib-error 'lz-compress-total-in-size
269 (lz-compress-error encoder))
270 ret)))))
271
272 (define lz-compress-total-out-size
273 (let ((proc (lzlib-procedure uint64 "LZ_compress_total_out_size" '(*))))
274 (lambda (encoder)
275 "Return the total number of compressed bytes already produced, but
276 perhaps not yet read."
277 (let ((ret (proc (lz-encoder->pointer encoder))))
278 (if (= ret -1)
279 (throw 'lzlib-error 'lz-compress-total-out-size
280 (lz-compress-error encoder))
281 ret)))))
282
283 \f
284 ;; Decompression bindings.
285
286 (define lz-decompress-open
287 (let ((proc (lzlib-procedure '* "LZ_decompress_open" '())))
288 (lambda ()
289 "Initializes the internal stream state for decompression and returns a
290 pointer that can only be used as the decoder argument for the other
291 lz-decompress functions, or a null pointer if the decoder could not be
292 allocated.
293
294 See the manual: (lzlib) Decompression functions."
295 (let ((decoder-ptr (proc)))
296 (if (not (= (lz-decompress-error decoder-ptr) -1))
297 (pointer->lz-decoder decoder-ptr)
298 (throw 'lzlib-error 'lz-decompress-open))))))
299
300 (define lz-decompress-close
301 (let ((proc (lzlib-procedure int "LZ_decompress_close" '(*))))
302 (lambda (decoder)
303 "Close decoder. DECODER can no longer be used as an argument to any
304 lz-decompress function. "
305 (let ((ret (proc (lz-decoder->pointer decoder))))
306 (if (= ret -1)
307 (throw 'lzlib-error 'lz-decompress-close ret)
308 ret)))))
309
310 (define lz-decompress-finish
311 (let ((proc (lzlib-procedure int "LZ_decompress_finish" '(*))))
312 (lambda (decoder)
313 "Tell that all the data for this stream have already been written (with
314 the `lz-decompress-write' function). It is safe to call
315 `lz-decompress-finish' as many times as needed."
316 (let ((ret (proc (lz-decoder->pointer decoder))))
317 (if (= ret -1)
318 (throw 'lzlib-error 'lz-decompress-finish (lz-decompress-error decoder))
319 ret)))))
320
321 (define lz-decompress-reset
322 (let ((proc (lzlib-procedure int "LZ_decompress_reset" '(*))))
323 (lambda (decoder)
324 "Reset the internal state of DECODER as it was just after opening it
325 with the `lz-decompress-open' function. Data stored in the internal buffers
326 is discarded. Position counters are set to 0."
327 (let ((ret (proc (lz-decoder->pointer decoder))))
328 (if (= ret -1)
329 (throw 'lzlib-error 'lz-decompress-reset
330 (lz-decompress-error decoder))
331 ret)))))
332
333 (define lz-decompress-sync-to-member
334 (let ((proc (lzlib-procedure int "LZ_decompress_sync_to_member" '(*))))
335 (lambda (decoder)
336 "Reset the error state of DECODER and enters a search state that lasts
337 until a new member header (or the end of the stream) is found. After a
338 successful call to `lz-decompress-sync-to-member', data written with
339 `lz-decompress-write' will be consumed and 'lz-decompress-read' will return 0
340 until a header is found.
341
342 This function is useful to discard any data preceding the first member, or to
343 discard the rest of the current member, for example in case of a data
344 error. If the decoder is already at the beginning of a member, this function
345 does nothing."
346 (let ((ret (proc (lz-decoder->pointer decoder))))
347 (if (= ret -1)
348 (throw 'lzlib-error 'lz-decompress-sync-to-member
349 (lz-decompress-error decoder))
350 ret)))))
351
352 (define lz-decompress-read
353 (let ((proc (lzlib-procedure int "LZ_decompress_read" (list '* '* int))))
354 (lambda* (decoder file-bv #:optional (start 0) (count (bytevector-length file-bv)))
355 "Read up to COUNT bytes from the decoder stream, storing the results in FILE-BV.
356 Return the number of uncompressed bytes written, a non-negative positive integer."
357 (let ((ret (proc (lz-decoder->pointer decoder)
358 (bytevector->pointer file-bv start)
359 count)))
360 (if (< ret 0)
361 (throw 'lzlib-error 'lz-decompress-read (lz-decompress-error decoder))
362 ret)))))
363
364 (define lz-decompress-write
365 (let ((proc (lzlib-procedure int "LZ_decompress_write" (list '* '* int))))
366 (lambda* (decoder bv #:optional (start 0) (count (bytevector-length bv)))
367 "Write up to COUNT bytes from BV to the decoder stream. Return the
368 number of uncompressed bytes written, a non-negative integer."
369 (let ((ret (proc (lz-decoder->pointer decoder)
370 (bytevector->pointer bv start)
371 count)))
372 (if (< ret 0)
373 (throw 'lzlib-error 'lz-decompress-write (lz-decompress-error decoder))
374 ret)))))
375
376 (define lz-decompress-write-size
377 (let ((proc (lzlib-procedure int "LZ_decompress_write_size" '(*))))
378 (lambda (decoder)
379 "Return the maximum number of bytes that can be immediately written
380 through the `lz-decompress-write' function.
381
382 It is guaranteed that an immediate call to `lz-decompress-write' will accept a
383 SIZE up to the returned number of bytes. "
384 (let ((ret (proc (lz-decoder->pointer decoder))))
385 (if (= ret -1)
386 (throw 'lzlib-error 'lz-decompress-write-size (lz-decompress-error decoder))
387 ret)))))
388
389 (define lz-decompress-error
390 (let ((proc (lzlib-procedure int "LZ_decompress_errno" '(*))))
391 (lambda (decoder)
392 "DECODER can be a Scheme object or a pointer."
393 (let* ((error-number (proc (if (lz-decoder? decoder)
394 (lz-decoder->pointer decoder)
395 decoder))))
396 error-number))))
397
398 (define lz-decompress-finished?
399 (let ((proc (lzlib-procedure int "LZ_decompress_finished" '(*))))
400 (lambda (decoder)
401 "Return #t if all the data have been read and `lz-decompress-close' can
402 be safely called. Otherwise return #f."
403 (let ((ret (proc (lz-decoder->pointer decoder))))
404 (match ret
405 (1 #t)
406 (0 #f)
407 (_ (throw 'lzlib-error 'lz-decompress-finished? (lz-decompress-error decoder))))))))
408
409 (define lz-decompress-member-finished?
410 (let ((proc (lzlib-procedure int "LZ_decompress_member_finished" '(*))))
411 (lambda (decoder)
412 "Return #t if the current member, in a multimember data stream, has
413 been fully read and `lz-decompress-restart-member' can be safely called.
414 Otherwise return #f."
415 (let ((ret (proc (lz-decoder->pointer decoder))))
416 (match ret
417 (1 #t)
418 (0 #f)
419 (_ (throw 'lzlib-error 'lz-decompress-member-finished? (lz-decompress-error decoder))))))))
420
421 (define lz-decompress-member-version
422 (let ((proc (lzlib-procedure int "LZ_decompress_member_version" '(*))))
423 (lambda (decoder)
424 (let ((ret (proc (lz-decoder->pointer decoder))))
425 "Return the version of current member from member header."
426 (if (= ret -1)
427 (throw 'lzlib-error 'lz-decompress-data-position
428 (lz-decompress-error decoder))
429 ret)))))
430
431 (define lz-decompress-dictionary-size
432 (let ((proc (lzlib-procedure int "LZ_decompress_dictionary_size" '(*))))
433 (lambda (decoder)
434 (let ((ret (proc (lz-decoder->pointer decoder))))
435 "Return the dictionary size of current member from member header."
436 (if (= ret -1)
437 (throw 'lzlib-error 'lz-decompress-member-position
438 (lz-decompress-error decoder))
439 ret)))))
440
441 (define lz-decompress-data-crc
442 (let ((proc (lzlib-procedure unsigned-int "LZ_decompress_data_crc" '(*))))
443 (lambda (decoder)
444 (let ((ret (proc (lz-decoder->pointer decoder))))
445 "Return the 32 bit Cyclic Redundancy Check of the data decompressed
446 from the current member. The returned value is valid only when
447 `lz-decompress-member-finished' returns #t. "
448 (if (= ret -1)
449 (throw 'lzlib-error 'lz-decompress-member-position
450 (lz-decompress-error decoder))
451 ret)))))
452
453 (define lz-decompress-data-position
454 (let ((proc (lzlib-procedure uint64 "LZ_decompress_data_position" '(*))))
455 (lambda (decoder)
456 "Return the number of decompressed bytes already produced, but perhaps
457 not yet read, in the current member."
458 (let ((ret (proc (lz-decoder->pointer decoder))))
459 (if (= ret -1)
460 (throw 'lzlib-error 'lz-decompress-data-position
461 (lz-decompress-error decoder))
462 ret)))))
463
464 (define lz-decompress-member-position
465 (let ((proc (lzlib-procedure uint64 "LZ_decompress_member_position" '(*))))
466 (lambda (decoder)
467 "Return the number of input bytes already decompressed in the current
468 member."
469 (let ((ret (proc (lz-decoder->pointer decoder))))
470 (if (= ret -1)
471 (throw 'lzlib-error 'lz-decompress-member-position
472 (lz-decompress-error decoder))
473 ret)))))
474
475 (define lz-decompress-total-in-size
476 (let ((proc (lzlib-procedure uint64 "LZ_decompress_total_in_size" '(*))))
477 (lambda (decoder)
478 (let ((ret (proc (lz-decoder->pointer decoder))))
479 "Return the total number of input bytes already compressed."
480 (if (= ret -1)
481 (throw 'lzlib-error 'lz-decompress-total-in-size
482 (lz-decompress-error decoder))
483 ret)))))
484
485 (define lz-decompress-total-out-size
486 (let ((proc (lzlib-procedure uint64 "LZ_decompress_total_out_size" '(*))))
487 (lambda (decoder)
488 (let ((ret (proc (lz-decoder->pointer decoder))))
489 "Return the total number of compressed bytes already produced, but
490 perhaps not yet read."
491 (if (= ret -1)
492 (throw 'lzlib-error 'lz-decompress-total-out-size
493 (lz-decompress-error decoder))
494 ret)))))
495
496 \f
497 ;; High level functions.
498
499 (define* (lzread! decoder port bv
500 #:optional (start 0) (count (bytevector-length bv)))
501 "Read up to COUNT bytes from PORT into BV at offset START. Return the
502 number of uncompressed bytes actually read; it is zero if COUNT is zero or if
503 the end-of-stream has been reached."
504 (define (feed-decoder! decoder)
505 ;; Feed DECODER with data read from PORT.
506 (match (get-bytevector-n port (lz-decompress-write-size decoder))
507 ((? eof-object? eof) eof)
508 (bv (lz-decompress-write decoder bv))))
509
510 (let loop ((read 0)
511 (start start))
512 (cond ((< read count)
513 (match (lz-decompress-read decoder bv start (- count read))
514 (0 (cond ((lz-decompress-finished? decoder)
515 read)
516 ((eof-object? (feed-decoder! decoder))
517 (lz-decompress-finish decoder)
518 (loop read start))
519 (else ;read again
520 (loop read start))))
521 (n (loop (+ read n) (+ start n)))))
522 (else
523 read))))
524
525 (define (lzwrite! encoder source source-offset source-count
526 target target-offset target-count)
527 "Write up to SOURCE-COUNT bytes from SOURCE to ENCODER, and read up to
528 TARGET-COUNT bytes into TARGET at TARGET-OFFSET. Return two values: the
529 number of bytes read from SOURCE, and the number of bytes written to TARGET,
530 possibly zero."
531 (define read
532 (if (> (lz-compress-write-size encoder) 0)
533 (match (lz-compress-write encoder source source-offset source-count)
534 (0 (lz-compress-finish encoder) 0)
535 (n n))
536 0))
537
538 (define written
539 (lz-compress-read encoder target target-offset target-count))
540
541 (values read written))
542
543 (define* (lzwrite encoder bv lz-port
544 #:optional (start 0) (count (bytevector-length bv)))
545 "Write up to COUNT bytes from BV at offset START into LZ-PORT. Return
546 the number of uncompressed bytes written, a non-negative integer."
547 (let ((written 0)
548 (read 0))
549 (while (and (< 0 (lz-compress-write-size encoder))
550 (< written count))
551 (set! written (+ written
552 (lz-compress-write encoder bv (+ start written) (- count written)))))
553 (when (= written 0)
554 (lz-compress-finish encoder))
555 (let ((lz-bv (make-bytevector written)))
556 (let loop ((rd 0))
557 (set! rd (lz-compress-read encoder lz-bv 0 (bytevector-length lz-bv)))
558 (put-bytevector lz-port lz-bv 0 rd)
559 (set! read (+ read rd))
560 (unless (= rd 0)
561 (loop rd))))
562 ;; `written' is the total byte count of uncompressed data.
563 written))
564
565 \f
566 ;;;
567 ;;; Port interface.
568 ;;;
569
570 ;; Alist of (levels (dictionary-size match-length-limit)). 0 is the fastest.
571 ;; See bbexample.c in lzlib's source.
572 (define %compression-levels
573 `((0 65535 16)
574 (1 ,(bitwise-arithmetic-shift-left 1 20) 5)
575 (2 ,(bitwise-arithmetic-shift-left 3 19) 6)
576 (3 ,(bitwise-arithmetic-shift-left 1 21) 8)
577 (4 ,(bitwise-arithmetic-shift-left 3 20) 12)
578 (5 ,(bitwise-arithmetic-shift-left 1 22) 20)
579 (6 ,(bitwise-arithmetic-shift-left 1 23) 36)
580 (7 ,(bitwise-arithmetic-shift-left 1 24) 68)
581 (8 ,(bitwise-arithmetic-shift-left 3 23) 132)
582 (9 ,(bitwise-arithmetic-shift-left 1 25) 273)))
583
584 (define %default-compression-level
585 6)
586
587 (define (dictionary-size+match-length-limit level)
588 "Return two values: the dictionary size for LEVEL, and its match-length
589 limit. LEVEL must be a compression level, an integer between 0 and 9."
590 (match (assv-ref %compression-levels level)
591 ((dictionary-size match-length-limit)
592 (values dictionary-size match-length-limit))))
593
594 (define* (make-lzip-input-port port)
595 "Return an input port that decompresses data read from PORT, a file port.
596 PORT is automatically closed when the resulting port is closed."
597 (define decoder (lz-decompress-open))
598
599 (define (read! bv start count)
600 (lzread! decoder port bv start count))
601
602 (make-custom-binary-input-port "lzip-input" read! #f #f
603 (lambda ()
604 (lz-decompress-close decoder)
605 (close-port port))))
606
607 (define* (make-lzip-output-port port
608 #:key
609 (level %default-compression-level))
610 "Return an output port that compresses data at the given LEVEL, using PORT,
611 a file port, as its sink. PORT is automatically closed when the resulting
612 port is closed."
613 (define encoder
614 (call-with-values (lambda () (dictionary-size+match-length-limit level))
615 lz-compress-open))
616
617 (define (write! bv start count)
618 (lzwrite encoder bv port start count))
619
620 (make-custom-binary-output-port "lzip-output" write! #f #f
621 (lambda ()
622 (lz-compress-finish encoder)
623 ;; "lz-read" the trailing metadata added by `lz-compress-finish'.
624 (let ((lz-bv (make-bytevector (* 64 1024))))
625 (let loop ((rd 0))
626 (set! rd (lz-compress-read encoder lz-bv 0 (bytevector-length lz-bv)))
627 (put-bytevector port lz-bv 0 rd)
628 (unless (= rd 0)
629 (loop rd))))
630 (lz-compress-close encoder)
631 (close-port port))))
632
633 (define* (make-lzip-input-port/compressed port
634 #:key
635 (level %default-compression-level))
636 "Return an input port that compresses data read from PORT, with the given LEVEL.
637 PORT is automatically closed when the resulting port is closed."
638 (define encoder
639 (call-with-values (lambda () (dictionary-size+match-length-limit level))
640 lz-compress-open))
641
642 (define input-buffer (make-bytevector 8192))
643 (define input-len 0)
644 (define input-offset 0)
645
646 (define input-eof? #f)
647
648 (define (read! bv start count)
649 (cond
650 (input-eof?
651 (match (lz-compress-read encoder bv start count)
652 (0 (if (lz-compress-finished? encoder)
653 0
654 (read! bv start count)))
655 (n n)))
656 ((= input-offset input-len)
657 (match (get-bytevector-n! port input-buffer 0
658 (bytevector-length input-buffer))
659 ((? eof-object?)
660 (set! input-eof? #t)
661 (lz-compress-finish encoder))
662 (count
663 (set! input-offset 0)
664 (set! input-len count)))
665 (read! bv start count))
666 (else
667 (let-values (((read written)
668 (lzwrite! encoder
669 input-buffer input-offset
670 (- input-len input-offset)
671 bv start count)))
672 (set! input-offset (+ input-offset read))
673
674 ;; Make sure we don't return zero except on EOF.
675 (if (= 0 written)
676 (read! bv start count)
677 written)))))
678
679 (make-custom-binary-input-port "lzip-input/compressed"
680 read! #f #f
681 (lambda ()
682 (close-port port))))
683
684 (define* (call-with-lzip-input-port port proc)
685 "Call PROC with a port that wraps PORT and decompresses data read from it.
686 PORT is closed upon completion."
687 (let ((lzip (make-lzip-input-port port)))
688 (dynamic-wind
689 (const #t)
690 (lambda ()
691 (proc lzip))
692 (lambda ()
693 (close-port lzip)))))
694
695 (define* (call-with-lzip-output-port port proc
696 #:key
697 (level %default-compression-level))
698 "Call PROC with an output port that wraps PORT and compresses data. PORT is
699 close upon completion."
700 (let ((lzip (make-lzip-output-port port
701 #:level level)))
702 (dynamic-wind
703 (const #t)
704 (lambda ()
705 (proc lzip))
706 (lambda ()
707 (close-port lzip)))))
708
709 ;;; lzlib.scm ends here