summaryrefslogtreecommitdiff
path: root/guix/lzlib.scm
blob: 31d84971cb0e3cdf73fa3dceb10b4b425eb66aac (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2019 Pierre Neidhardt <mail@ambrevar.xyz>
;;; Copyright © 2019 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.

(define-module (guix lzlib)
  #:use-module (rnrs bytevectors)
  #:use-module (rnrs arithmetic bitwise)
  #:use-module (ice-9 binary-ports)
  #:use-module (ice-9 match)
  #:use-module (system foreign)
  #:use-module (guix config)
  #:use-module (srfi srfi-11)
  #:export (lzlib-available?
            make-lzip-input-port
            make-lzip-output-port
            make-lzip-input-port/compressed
            call-with-lzip-input-port
            call-with-lzip-output-port
            %default-member-length-limit
            %default-compression-level))

;;; Commentary:
;;;
;;; Bindings to the lzlib / liblz API.  Some convenience functions are also
;;; provided (see the export).
;;;
;;; While the bindings are complete, the convenience functions only support
;;; single member archives.  To decompress single member archives, we loop
;;; until lz-decompress-read returns 0.  This is simpler.  To support multiple
;;; members properly, we need (among others) to call lz-decompress-finish and
;;; loop over lz-decompress-read until lz-decompress-finished? returns #t.
;;; Otherwise a multi-member archive starting with an empty member would only
;;; decompress the empty member and stop there, resulting in truncated output.

;;; Code:

(define %lzlib
  ;; File name of lzlib's shared library.  When updating via 'guix pull',
  ;; '%liblz' might be undefined so protect against it.
  (delay (dynamic-link (if (defined? '%liblz)
                           %liblz
                           "liblz"))))

(define (lzlib-available?)
  "Return true if lzlib is available, #f otherwise."
  (false-if-exception (force %lzlib)))

(define (lzlib-procedure ret name parameters)
  "Return a procedure corresponding to C function NAME in liblz, or #f if
either lzlib or the function could not be found."
  (match (false-if-exception (dynamic-func name (force %lzlib)))
    ((? pointer? ptr)
     (pointer->procedure ret ptr parameters))
    (#f
     #f)))

(define-wrapped-pointer-type <lz-decoder>
  ;; Scheme counterpart of the 'LZ_Decoder' opaque type.
  lz-decoder?
  pointer->lz-decoder
  lz-decoder->pointer
  (lambda (obj port)
    (format port "#<lz-decoder ~a>"
            (number->string (object-address obj) 16))))

(define-wrapped-pointer-type <lz-encoder>
  ;; Scheme counterpart of the 'LZ_Encoder' opaque type.
  lz-encoder?
  pointer->lz-encoder
  lz-encoder->pointer
  (lambda (obj port)
    (format port "#<lz-encoder ~a>"
            (number->string (object-address obj) 16))))

;; From lzlib.h
(define %error-number-ok 0)
(define %error-number-bad-argument 1)
(define %error-number-mem-error 2)
(define %error-number-sequence-error 3)
(define %error-number-header-error 4)
(define %error-number-unexpected-eof 5)
(define %error-number-data-error 6)
(define %error-number-library-error 7)


;; Compression bindings.

(define lz-compress-open
  (let ((proc (lzlib-procedure '* "LZ_compress_open" (list int int uint64)))
        ;; member-size is an "unsigned long long", and the C standard guarantees
        ;; a minimum range of 0..2^64-1.
        (unlimited-size (- (expt 2 64) 1)))
    (lambda* (dictionary-size match-length-limit #:optional (member-size unlimited-size))
      "Initialize the internal stream state for compression and returns a
pointer that can only be used as the encoder argument for the other
lz-compress functions, or a null pointer if the encoder could not be
allocated.

See the manual: (lzlib) Compression functions."
      (let ((encoder-ptr (proc dictionary-size match-length-limit member-size)))
        (if (not (= (lz-compress-error encoder-ptr) -1))
            (pointer->lz-encoder encoder-ptr)
            (throw 'lzlib-error 'lz-compress-open))))))

(define lz-compress-close
  (let ((proc (lzlib-procedure int "LZ_compress_close" '(*))))
    (lambda (encoder)
      "Close encoder.  ENCODER can no longer be used as an argument to any
lz-compress function. "
      (let ((ret (proc (lz-encoder->pointer encoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-compress-close ret)
            ret)))))

(define lz-compress-finish
  (let ((proc (lzlib-procedure int "LZ_compress_finish" '(*))))
    (lambda (encoder)
      "Tell that all the data for this member have already been written (with
the `lz-compress-write' function).  It is safe to call `lz-compress-finish' as
many times as needed.  After all the produced compressed data have been read
with `lz-compress-read' and `lz-compress-member-finished?' returns #t, a new
member can be started with 'lz-compress-restart-member'."
      (let ((ret (proc (lz-encoder->pointer encoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-compress-finish (lz-compress-error encoder))
            ret)))))

(define lz-compress-restart-member
  (let ((proc (lzlib-procedure int "LZ_compress_restart_member" (list '* uint64))))
    (lambda (encoder member-size)
      "Start a new member in a multimember data stream.
Call this function only after `lz-compress-member-finished?' indicates that the
current member has been fully read (with the `lz-compress-read' function)."
      (let ((ret (proc (lz-encoder->pointer encoder) member-size)))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-compress-restart-member
                   (lz-compress-error encoder))
            ret)))))

(define lz-compress-sync-flush
  (let ((proc (lzlib-procedure int "LZ_compress_sync_flush" (list '*))))
    (lambda (encoder)
      "Make available to `lz-compress-read' all the data already written with
the `LZ-compress-write' function.  First call `lz-compress-sync-flush'.  Then
call 'lz-compress-read' until it returns 0.

Repeated use of `LZ-compress-sync-flush' may degrade compression ratio,
so use it only when needed. "
      (let ((ret (proc (lz-encoder->pointer encoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-compress-sync-flush
                   (lz-compress-error encoder))
            ret)))))

(define lz-compress-read
  (let ((proc (lzlib-procedure int "LZ_compress_read" (list '* '* int))))
    (lambda* (encoder lzfile-bv #:optional (start 0) (count (bytevector-length lzfile-bv)))
      "Read up to COUNT bytes from the encoder stream, storing the results in LZFILE-BV.
Return the number of uncompressed bytes written, a positive integer."
      (let ((ret (proc (lz-encoder->pointer encoder)
                       (bytevector->pointer lzfile-bv start)
                       count)))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-compress-read (lz-compress-error encoder))
            ret)))))

(define lz-compress-write
  (let ((proc (lzlib-procedure int "LZ_compress_write" (list '* '* int))))
    (lambda* (encoder bv #:optional (start 0) (count (bytevector-length bv)))
      "Write up to COUNT bytes from BV to the encoder stream.  Return the
number of uncompressed bytes written, a strictly positive integer."
      (let ((ret (proc (lz-encoder->pointer encoder)
                       (bytevector->pointer bv start)
                       count)))
        (if (< ret 0)
            (throw 'lzlib-error 'lz-compress-write (lz-compress-error encoder))
            ret)))))

(define lz-compress-write-size
  (let ((proc (lzlib-procedure int "LZ_compress_write_size" '(*))))
    (lambda (encoder)
      "The maximum number of bytes that can be immediately written through the
`lz-compress-write' function.

It is guaranteed that an immediate call to `lz-compress-write' will accept a
SIZE up to the returned number of bytes. "
      (let ((ret (proc (lz-encoder->pointer encoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-compress-write-size (lz-compress-error encoder))
            ret)))))

(define lz-compress-error
  (let ((proc (lzlib-procedure int "LZ_compress_errno" '(*))))
    (lambda (encoder)
      "ENCODER can be a Scheme object or a pointer."
      (let* ((error-number (proc (if (lz-encoder? encoder)
                                     (lz-encoder->pointer encoder)
                                     encoder))))
        error-number))))

(define lz-compress-finished?
  (let ((proc (lzlib-procedure int "LZ_compress_finished" '(*))))
    (lambda (encoder)
      "Return #t if all the data have been read and `lz-compress-close' can
be safely called. Otherwise return #f."
      (let ((ret (proc (lz-encoder->pointer encoder))))
        (match ret
          (1 #t)
          (0 #f)
          (_ (throw 'lzlib-error 'lz-compress-finished? (lz-compress-error encoder))))))))

(define lz-compress-member-finished?
  (let ((proc (lzlib-procedure int "LZ_compress_member_finished" '(*))))
    (lambda (encoder)
      "Return #t if the current member, in a multimember data stream, has
been fully read and 'lz-compress-restart-member' can be safely called.
Otherwise return #f."
      (let ((ret (proc (lz-encoder->pointer encoder))))
        (match ret
          (1 #t)
          (0 #f)
          (_ (throw 'lzlib-error 'lz-compress-member-finished? (lz-compress-error encoder))))))))

(define lz-compress-data-position
  (let ((proc (lzlib-procedure uint64 "LZ_compress_data_position" '(*))))
    (lambda (encoder)
      "Return the number of input bytes already compressed in the current
member."
      (let ((ret (proc (lz-encoder->pointer encoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-compress-data-position
                   (lz-compress-error encoder))
            ret)))))

(define lz-compress-member-position
  (let ((proc (lzlib-procedure uint64 "LZ_compress_member_position" '(*))))
    (lambda (encoder)
      "Return the number of compressed bytes already produced, but perhaps
not yet read, in the current member."
      (let ((ret (proc (lz-encoder->pointer encoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-compress-member-position
                   (lz-compress-error encoder))
            ret)))))

(define lz-compress-total-in-size
  (let ((proc (lzlib-procedure uint64 "LZ_compress_total_in_size" '(*))))
    (lambda (encoder)
      "Return the total number of input bytes already compressed."
      (let ((ret (proc (lz-encoder->pointer encoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-compress-total-in-size
                   (lz-compress-error encoder))
            ret)))))

(define lz-compress-total-out-size
  (let ((proc (lzlib-procedure uint64 "LZ_compress_total_out_size" '(*))))
    (lambda (encoder)
      "Return the total number of compressed bytes already produced, but
perhaps not yet read."
      (let ((ret (proc (lz-encoder->pointer encoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-compress-total-out-size
                   (lz-compress-error encoder))
            ret)))))


;; Decompression bindings.

(define lz-decompress-open
  (let ((proc (lzlib-procedure '* "LZ_decompress_open" '())))
    (lambda ()
      "Initializes the internal stream state for decompression and returns a
pointer that can only be used as the decoder argument for the other
lz-decompress functions, or a null pointer if the decoder could not be
allocated.

See the manual: (lzlib) Decompression functions."
      (let ((decoder-ptr (proc)))
        (if (not (= (lz-decompress-error decoder-ptr) -1))
            (pointer->lz-decoder decoder-ptr)
            (throw 'lzlib-error 'lz-decompress-open))))))

(define lz-decompress-close
  (let ((proc (lzlib-procedure int "LZ_decompress_close" '(*))))
    (lambda (decoder)
      "Close decoder.  DECODER can no longer be used as an argument to any
lz-decompress function. "
      (let ((ret (proc (lz-decoder->pointer decoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-close ret)
            ret)))))

(define lz-decompress-finish
  (let ((proc (lzlib-procedure int "LZ_decompress_finish" '(*))))
    (lambda (decoder)
      "Tell that all the data for this stream have already been written (with
the `lz-decompress-write' function).  It is safe to call
`lz-decompress-finish' as many times as needed."
      (let ((ret (proc (lz-decoder->pointer decoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-finish (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-reset
  (let ((proc (lzlib-procedure int "LZ_decompress_reset" '(*))))
    (lambda (decoder)
      "Reset the internal state of DECODER as it was just after opening it
with the `lz-decompress-open' function.  Data stored in the internal buffers
is discarded.  Position counters are set to 0."
      (let ((ret (proc (lz-decoder->pointer decoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-reset
                   (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-sync-to-member
  (let ((proc (lzlib-procedure int "LZ_decompress_sync_to_member" '(*))))
    (lambda (decoder)
      "Reset the error state of DECODER and enters a search state that lasts
until a new member header (or the end of the stream) is found.  After a
successful call to `lz-decompress-sync-to-member', data written with
`lz-decompress-write' will be consumed and 'lz-decompress-read' will return 0
until a header is found.

This function is useful to discard any data preceding the first member, or to
discard the rest of the current member, for example in case of a data
error.  If the decoder is already at the beginning of a member, this function
does nothing."
      (let ((ret (proc (lz-decoder->pointer decoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-sync-to-member
                   (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-read
  (let ((proc (lzlib-procedure int "LZ_decompress_read" (list '* '* int))))
    (lambda* (decoder file-bv #:optional (start 0) (count (bytevector-length file-bv)))
      "Read up to COUNT bytes from the decoder stream, storing the results in FILE-BV.
Return the number of uncompressed bytes written, a non-negative positive integer."
      (let ((ret (proc (lz-decoder->pointer decoder)
                       (bytevector->pointer file-bv start)
                       count)))
        (if (< ret 0)
            (throw 'lzlib-error 'lz-decompress-read (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-write
  (let ((proc (lzlib-procedure int "LZ_decompress_write" (list '* '* int))))
    (lambda* (decoder bv #:optional (start 0) (count (bytevector-length bv)))
      "Write up to COUNT bytes from BV to the decoder stream.  Return the
number of uncompressed bytes written, a non-negative integer."
      (let ((ret (proc (lz-decoder->pointer decoder)
                       (bytevector->pointer bv start)
                       count)))
        (if (< ret 0)
            (throw 'lzlib-error 'lz-decompress-write (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-write-size
  (let ((proc (lzlib-procedure int "LZ_decompress_write_size" '(*))))
    (lambda (decoder)
      "Return the maximum number of bytes that can be immediately written
through the `lz-decompress-write' function.

It is guaranteed that an immediate call to `lz-decompress-write' will accept a
SIZE up to the returned number of bytes. "
      (let ((ret (proc (lz-decoder->pointer decoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-write-size (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-error
  (let ((proc (lzlib-procedure int "LZ_decompress_errno" '(*))))
    (lambda (decoder)
      "DECODER can be a Scheme object or a pointer."
      (let* ((error-number (proc (if (lz-decoder? decoder)
                                     (lz-decoder->pointer decoder)
                                     decoder))))
        error-number))))

(define lz-decompress-finished?
  (let ((proc (lzlib-procedure int "LZ_decompress_finished" '(*))))
    (lambda (decoder)
      "Return #t if all the data have been read and `lz-decompress-close' can
be safely called.  Otherwise return #f."
      (let ((ret (proc (lz-decoder->pointer decoder))))
        (match ret
          (1 #t)
          (0 #f)
          (_ (throw 'lzlib-error 'lz-decompress-finished? (lz-decompress-error decoder))))))))

(define lz-decompress-member-finished?
  (let ((proc (lzlib-procedure int "LZ_decompress_member_finished" '(*))))
    (lambda (decoder)
      "Return #t if the current member, in a multimember data stream, has
been fully read and `lz-decompress-restart-member' can be safely called.
Otherwise return #f."
      (let ((ret (proc (lz-decoder->pointer decoder))))
        (match ret
          (1 #t)
          (0 #f)
          (_ (throw 'lzlib-error 'lz-decompress-member-finished? (lz-decompress-error decoder))))))))

(define lz-decompress-member-version
  (let ((proc (lzlib-procedure int "LZ_decompress_member_version" '(*))))
    (lambda (decoder)
      (let ((ret (proc (lz-decoder->pointer decoder))))
        "Return the version of current member from member header."
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-data-position
                   (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-dictionary-size
  (let ((proc (lzlib-procedure int "LZ_decompress_dictionary_size" '(*))))
    (lambda (decoder)
      (let ((ret (proc (lz-decoder->pointer decoder))))
        "Return the dictionary size of current member from member header."
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-member-position
                   (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-data-crc
  (let ((proc (lzlib-procedure unsigned-int "LZ_decompress_data_crc" '(*))))
    (lambda (decoder)
      (let ((ret (proc (lz-decoder->pointer decoder))))
        "Return the 32 bit Cyclic Redundancy Check of the data decompressed
from the current member.  The returned value is valid only when
`lz-decompress-member-finished' returns #t. "
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-member-position
                   (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-data-position
  (let ((proc (lzlib-procedure uint64 "LZ_decompress_data_position" '(*))))
    (lambda (decoder)
      "Return the number of decompressed bytes already produced, but perhaps
not yet read, in the current member."
      (let ((ret (proc (lz-decoder->pointer decoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-data-position
                   (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-member-position
  (let ((proc (lzlib-procedure uint64 "LZ_decompress_member_position" '(*))))
    (lambda (decoder)
      "Return the number of input bytes already decompressed in the current
member."
      (let ((ret (proc (lz-decoder->pointer decoder))))
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-member-position
                   (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-total-in-size
  (let ((proc (lzlib-procedure uint64 "LZ_decompress_total_in_size" '(*))))
    (lambda (decoder)
      (let ((ret (proc (lz-decoder->pointer decoder))))
        "Return the total number of input bytes already compressed."
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-total-in-size
                   (lz-decompress-error decoder))
            ret)))))

(define lz-decompress-total-out-size
  (let ((proc (lzlib-procedure uint64 "LZ_decompress_total_out_size" '(*))))
    (lambda (decoder)
      (let ((ret (proc (lz-decoder->pointer decoder))))
        "Return the total number of compressed bytes already produced, but
perhaps not yet read."
        (if (= ret -1)
            (throw 'lzlib-error 'lz-decompress-total-out-size
                   (lz-decompress-error decoder))
            ret)))))


;; High level functions.

(define* (lzread! decoder port bv
                  #:optional (start 0) (count (bytevector-length bv)))
  "Read up to COUNT bytes from PORT into BV at offset START.  Return the
number of uncompressed bytes actually read; it is zero if COUNT is zero or if
the end-of-stream has been reached."
  (define (feed-decoder! decoder)
    ;; Feed DECODER with data read from PORT.
    (match (get-bytevector-n port (lz-decompress-write-size decoder))
      ((? eof-object? eof) eof)
      (bv (lz-decompress-write decoder bv))))

  (let loop ((read 0)
             (start start))
    (cond ((< read count)
           (match (lz-decompress-read decoder bv start (- count read))
             (0 (if (eof-object? (feed-decoder! decoder))
                    read
                    (loop read start)))
             (n (loop (+ read n) (+ start n)))))
          (else
           read))))

(define (lzwrite! encoder source source-offset source-count
                  target target-offset target-count)
  "Write up to SOURCE-COUNT bytes from SOURCE to ENCODER, and read up to
TARGET-COUNT bytes into TARGET at TARGET-OFFSET.  Return two values: the
number of bytes read from SOURCE, and the number of bytes written to TARGET,
possibly zero."
  (define read
    (if (> (lz-compress-write-size encoder) 0)
        (match (lz-compress-write encoder source source-offset source-count)
          (0 (lz-compress-finish encoder) 0)
          (n n))
        0))

  (define written
    (lz-compress-read encoder target target-offset target-count))

  (values read written))

(define* (lzwrite encoder bv lz-port
                  #:optional (start 0) (count (bytevector-length bv)))
  "Write up to COUNT bytes from BV at offset START into LZ-PORT.  Return
the number of uncompressed bytes written, a non-negative integer."
  (let ((written 0)
        (read 0))
    (while (and (< 0 (lz-compress-write-size encoder))
                (< written count))
      (set! written (+ written
                       (lz-compress-write encoder bv (+ start written) (- count written)))))
    (when (= written 0)
      (lz-compress-finish encoder))
    (let ((lz-bv (make-bytevector written)))
      (let loop ((rd 0))
        (set! rd (lz-compress-read encoder lz-bv 0 (bytevector-length lz-bv)))
        (put-bytevector lz-port lz-bv 0 rd)
        (set! read (+ read rd))
        (unless (= rd 0)
          (loop rd))))
    ;; `written' is the total byte count of uncompressed data.
    written))


;;;
;;; Port interface.
;;;

;; Alist of (levels (dictionary-size match-length-limit)).  0 is the fastest.
;; See bbexample.c in lzlib's source.
(define %compression-levels
  `((0 (65535 16))
    (1 (,(bitwise-arithmetic-shift-left 1 20) 5))
    (2 (,(bitwise-arithmetic-shift-left 3 19) 6))
    (3 (,(bitwise-arithmetic-shift-left 1 21) 8))
    (4 (,(bitwise-arithmetic-shift-left 3 20) 12))
    (5 (,(bitwise-arithmetic-shift-left 1 22) 20))
    (6 (,(bitwise-arithmetic-shift-left 1 23) 36))
    (7 (,(bitwise-arithmetic-shift-left 1 24) 68))
    (8 (,(bitwise-arithmetic-shift-left 3 23) 132))
    (9 (,(bitwise-arithmetic-shift-left 1 25) 273))))

(define %default-compression-level
  6)

(define* (make-lzip-input-port port)
  "Return an input port that decompresses data read from PORT, a file port.
PORT is automatically closed when the resulting port is closed."
  (define decoder (lz-decompress-open))

  (define (read! bv start count)
    (lzread! decoder port bv start count))

  (make-custom-binary-input-port "lzip-input" read! #f #f
                                 (lambda ()
                                   (lz-decompress-close decoder)
                                   (close-port port))))

(define* (make-lzip-output-port port
                                #:key
                                (level %default-compression-level))
  "Return an output port that compresses data at the given LEVEL, using PORT,
a file port, as its sink.  PORT is automatically closed when the resulting
port is closed."
  (define encoder (apply lz-compress-open
                         (car (assoc-ref %compression-levels level))))

  (define (write! bv start count)
    (lzwrite encoder bv port start count))

  (make-custom-binary-output-port "lzip-output" write! #f #f
                                  (lambda ()
                                    (lz-compress-finish encoder)
                                    ;; "lz-read" the trailing metadata added by `lz-compress-finish'.
                                    (let ((lz-bv (make-bytevector (* 64 1024))))
                                      (let loop ((rd 0))
                                        (set! rd (lz-compress-read encoder lz-bv 0 (bytevector-length lz-bv)))
                                        (put-bytevector port lz-bv 0 rd)
                                        (unless (= rd 0)
                                          (loop rd))))
                                    (lz-compress-close encoder)
                                    (close-port port))))

(define* (make-lzip-input-port/compressed port
                                          #:key
                                          (level %default-compression-level))
  "Return an input port that compresses data read from PORT, with the given LEVEL.
PORT is automatically closed when the resulting port is closed."
  (define encoder (apply lz-compress-open
                         (car (assoc-ref %compression-levels level))))

  (define input-buffer (make-bytevector 8192))
  (define input-len 0)
  (define input-offset 0)

  (define input-eof? #f)

  (define (read! bv start count)
    (cond
     (input-eof?
      (match (lz-compress-read encoder bv start count)
        (0 (if (lz-compress-finished? encoder)
               0
               (read! bv start count)))
        (n n)))
     ((= input-offset input-len)
      (match (get-bytevector-n! port input-buffer 0
                                (bytevector-length input-buffer))
        ((? eof-object?)
         (set! input-eof? #t)
         (lz-compress-finish encoder))
        (count
         (set! input-offset 0)
         (set! input-len count)))
      (read! bv start count))
     (else
      (let-values (((read written)
                    (lzwrite! encoder
                              input-buffer input-offset
                              (- input-len input-offset)
                              bv start count)))
        (set! input-offset (+ input-offset read))

        ;; Make sure we don't return zero except on EOF.
        (if (= 0 written)
            (read! bv start count)
            written)))))

  (make-custom-binary-input-port "lzip-input/compressed"
                                 read! #f #f
                                 (lambda ()
                                   (close-port port))))

(define* (call-with-lzip-input-port port proc)
  "Call PROC with a port that wraps PORT and decompresses data read from it.
PORT is closed upon completion."
  (let ((lzip (make-lzip-input-port port)))
    (dynamic-wind
      (const #t)
      (lambda ()
        (proc lzip))
      (lambda ()
        (close-port lzip)))))

(define* (call-with-lzip-output-port port proc
                                     #:key
                                     (level %default-compression-level))
  "Call PROC with an output port that wraps PORT and compresses data.  PORT is
close upon completion."
  (let ((lzip (make-lzip-output-port port
                                     #:level level)))
    (dynamic-wind
      (const #t)
      (lambda ()
        (proc lzip))
      (lambda ()
        (close-port lzip)))))

;;; lzlib.scm ends here