;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.

(define-module (guix serialization)
  #:use-module (guix combinators)
  #:use-module (rnrs bytevectors)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-26)
  #:use-module (srfi srfi-34)
  #:use-module (srfi srfi-35)
  #:use-module (ice-9 binary-ports)
  #:use-module ((ice-9 rdelim) #:prefix rdelim:)
  #:use-module (ice-9 match)
  #:use-module (ice-9 ftw)
  #:use-module (system foreign)
  #:export (write-int read-int
            write-long-long read-long-long
            write-padding
            write-bytevector write-string
            read-string read-latin1-string read-maybe-utf8-string
            write-string-list read-string-list
            write-string-pairs read-string-pairs
            write-store-path read-store-path
            write-store-path-list read-store-path-list
            (dump . dump-port*)

            &nar-error
            nar-error?
            nar-error-port
            nar-error-file

            &nar-read-error
            nar-read-error?
            nar-read-error-token

            write-file
            write-file-tree
            fold-archive
            restore-file
            dump-file))

;;; Comment:
;;;
;;; Serialization procedures used by the RPCs and the Nar format.  This module
;;; is for internal consumption.
;;;
;;; Code:

;; Similar to serialize.cc in Nix.

(define-condition-type &nar-error &error      ; XXX: inherit from &store-error ?
  nar-error?
  (file  nar-error-file)                       ; file we were restoring, or #f
  (port  nar-error-port))                      ; port from which we read

(define currently-restored-file
  ;; Name of the file being restored.  Used internally for error reporting.
  (make-parameter #f))


(define (get-bytevector-n* port count)
  (let ((bv (get-bytevector-n port count)))
    (when (or (eof-object? bv)
              (< (bytevector-length bv) count))
      (raise (condition (&nar-error
                         (file (currently-restored-file))
                         (port port)))))
    bv))

(define (sub-bytevector bv len)
  "Return a bytevector that aliases the first LEN bytes of BV."
  (define max (bytevector-length bv))
  (cond ((= len max) bv)
        ((< len max)
         ;; Yes, this is safe because the result of each conversion procedure
         ;; has its life cycle synchronized with that of its argument.
         (pointer->bytevector (bytevector->pointer bv) len))
        (else
         (error "sub-bytevector called to get a super bytevector"))))

(define (write-int n p)
  (let ((b (make-bytevector 8 0)))
    (bytevector-u32-set! b 0 n (endianness little))
    (put-bytevector p b)))

(define (read-int p)
  (let ((b (get-bytevector-n* p 8)))
    (bytevector-u32-ref b 0 (endianness little))))

(define (write-long-long n p)
  (let ((b (make-bytevector 8 0)))
    (bytevector-u64-set! b 0 n (endianness little))
    (put-bytevector p b)))

(define (read-long-long p)
  (let ((b (get-bytevector-n* p 8)))
    (bytevector-u64-ref b 0 (endianness little))))

(define write-padding
  (let ((zero (make-bytevector 8 0)))
    (lambda (n p)
      (let ((m (modulo n 8)))
        (or (zero? m)
            (put-bytevector p zero 0 (- 8 m)))))))

(define* (write-bytevector s p
                           #:optional (l (bytevector-length s)))
  (let* ((m (modulo l 8))
         (b (make-bytevector (+ 8 l (if (zero? m) 0 (- 8 m))))))
    (bytevector-u32-set! b 0 l (endianness little))
    (bytevector-copy! s 0 b 8 l)
    (put-bytevector p b)))

(define (write-string s p)
  (write-bytevector (string->utf8 s) p))

(define (read-byte-string p)
  (let* ((len (read-int p))
         (m   (modulo len 8))
         (pad (if (zero? m) 0 (- 8 m)))
         (bv  (get-bytevector-n* p (+ len pad))))
    (sub-bytevector bv len)))

(define (read-string p)
  (utf8->string (read-byte-string p)))

(define (read-latin1-string p)
  "Read an ISO-8859-1 string from P."
  ;; Note: do not use 'get-string-n' to work around Guile bug
  ;; <http://bugs.gnu.org/19621>.  See <http://bugs.gnu.org/19610> for
  ;; a discussion.
  (let ((bv (read-byte-string p)))
    ;; XXX: Rewrite using (ice-9 iconv).
    (list->string (map integer->char (bytevector->u8-list bv)))))

(define (read-maybe-utf8-string p)
  "Read a serialized string from port P.  Attempt to decode it as UTF-8 and
substitute invalid byte sequences with question marks.  This is a
\"permissive\" UTF-8 decoder."
  ;; XXX: We rely on the port's decoding mechanism to do permissive decoding
  ;; and substitute invalid byte sequences with question marks, but this is
  ;; not very efficient.  Eventually Guile may provide a lightweight
  ;; permissive UTF-8 decoder.
  (let* ((bv   (read-byte-string p))
         (port (open-bytevector-input-port bv)))
    (set-port-encoding! port "UTF-8")
    (set-port-conversion-strategy! port 'substitute)
    (rdelim:read-string port)))

(define (write-string-list l p)
  (write-int (length l) p)
  (for-each (cut write-string <> p) l))

(define (read-string-list p)
  (let ((len (read-int p)))
    (unfold (cut >= <> len)
            (lambda (i)
              (read-string p))
            1+
            0)))

(define (write-string-pairs l p)
  (write-int (length l) p)
  (for-each (match-lambda
             ((first . second)
              (write-string first p)
              (write-string second p)))
            l))

(define (read-string-pairs p)
  (let ((len (read-int p)))
    (unfold (cut >= <> len)
            (lambda (i)
              (cons (read-string p) (read-string p)))
            1+
            0)))

(define (write-store-path f p)
  (write-string f p))                             ; TODO: assert path

(define (read-store-path p)
  (read-string p))                                ; TODO: assert path

(define write-store-path-list write-string-list)
(define read-store-path-list read-string-list)

(define-syntax write-literal-strings
  (lambda (s)
    "Write the given literal strings to PORT in an optimized fashion, without
any run-time allocations or computations."
    (define (padding len)
      (let ((m (modulo len 8)))
        (if (zero? m)
            0
            (- 8 m))))

    (syntax-case s ()
      ((_ port strings ...)
       (let* ((bytes (map string->utf8 (syntax->datum #'(strings ...))))
              (len   (fold (lambda (bv size)
                             (+ size 8 (bytevector-length bv)
                                     (padding (bytevector-length bv))))
                           0
                           bytes))
              (bv    (make-bytevector len))
              (zeros (make-bytevector 8 0)))
         (fold (lambda (str offset)
                 (let ((len (bytevector-length str)))
                   (bytevector-u32-set! bv offset len (endianness little))
                   (bytevector-copy! str 0 bv (+ 8 offset) len)
                   (bytevector-copy! zeros 0 bv (+ 8 offset len)
                                     (padding len))
                   (+ offset 8 len (padding len))))
               0
               bytes)
         #`(put-bytevector port #,bv))))))


(define-condition-type &nar-read-error &nar-error
  nar-read-error?
  (token nar-read-error-token))                 ; faulty token, or #f


(define (dump in out size)
  "Copy SIZE bytes from IN to OUT."
  (define buf-size 65536)
  (define buf (make-bytevector buf-size))

  (let loop ((left size))
    (if (<= left 0)
        0
        (let ((read (get-bytevector-n! in buf 0 (min left buf-size))))
          (if (eof-object? read)
              left
              (begin
                (put-bytevector out buf 0 read)
                (loop (- left read))))))))

(define (write-contents-from-port input output size)
  "Write SIZE bytes from port INPUT to port OUTPUT."
  (write-string "contents" output)
  (write-long-long size output)
  ;; Use 'sendfile' when both OUTPUT and INPUT are file ports.
  (if (and (file-port? output) (file-port? input))
      (sendfile output input size 0)
      (dump input output size))
  (write-padding size output))

(define (read-file-type port)
  "Read the file type tag from PORT, and return either 'regular or
'executable."
  (match (read-string port)
    ("contents"
     'regular)
    ("executable"
     (match (list (read-string port) (read-string port))
       (("" "contents") 'executable)
       (x (raise
           (condition (&message
                       (message "unexpected executable file marker"))
                      (&nar-read-error (port port)
                                       (file #f)
                                       (token x)))))))
    (x
     (raise
      (condition (&message (message "unsupported nar file type"))
                 (&nar-read-error (port port) (file #f) (token x)))))))

(define %archive-version-1
  ;; Magic cookie for Nix archives.
  "nix-archive-1")

(define* (write-file file port
                     #:key (select? (const #t)))
  "Write the contents of FILE to PORT in Nar format, recursing into
sub-directories of FILE as needed.  For each directory entry, call (SELECT?
FILE STAT), where FILE is the entry's absolute file name and STAT is the
result of 'lstat'; exclude entries for which SELECT? does not return true."
  (write-file-tree file port
                   #:file-type+size
                   (lambda (file)
                     (let* ((stat (lstat file))
                            (size (stat:size stat)))
                       (case (stat:type stat)
                         ((directory)
                          (values 'directory size))
                         ((regular)
                          (values (if (zero? (logand (stat:mode stat)
                                                     #o100))
                                      'regular
                                      'executable)
                                  size))
                         (else
                          (values (stat:type stat) size))))) ;bah!
                   #:file-port (cut open-file <> "r0b")
                   #:symlink-target readlink

                   #:directory-entries
                   (lambda (directory)
                     ;; 'scandir' defaults to 'string-locale<?' to sort files,
                     ;; but this happens to be case-insensitive (at least in
                     ;; 'en_US' locale on libc 2.18.)  Conversely, we want
                     ;; files to be sorted in a case-sensitive fashion.
                     (define basenames
                       (scandir directory (negate (cut member <> '("." "..")))
                                string<?))

                     (filter-map (lambda (base)
                                   (let ((file (string-append directory
                                                              "/" base)))
                                     (and (select? file (lstat file))
                                          base)))
                                 basenames))

                   ;; The 'scandir' call above gives us filtered and sorted
                   ;; entries, so no post-processing is needed.
                   #:postprocess-entries identity))

(define (filter/sort-directory-entries lst)
  "Remove dot and dot-dot entries from LST, and sort it in lexicographical
order."
  (delete-duplicates
   (sort (remove (cute member <> '("." "..")) lst)
         string<?)
   string=?))

(define* (write-file-tree file port
                          #:key
                          file-type+size
                          file-port
                          symlink-target
                          directory-entries
                          (postprocess-entries filter/sort-directory-entries))
  "Write the contents of FILE to PORT in Nar format, recursing into
sub-directories of FILE as needed.

This procedure does not make any file-system I/O calls.  Instead, it calls the
user-provided FILE-TYPE+SIZE, FILE-PORT, SYMLINK-TARGET, and DIRECTORY-ENTRIES
procedures, which roughly correspond to 'lstat', 'readlink', and 'scandir'.
POSTPROCESS-ENTRIES ensures that directory entries are valid; leave it as-is
unless you know that DIRECTORY-ENTRIES provide filtered and sorted entries, in
which case you can use 'identity'."
  (define p port)

  (write-string %archive-version-1 p)

  (let dump ((f file))
    (define-values (type size)
      (file-type+size f))

    (write-literal-strings p "(")
    (case type
      ((regular executable)
       (write-literal-strings p "type" "regular")
       (when (eq? 'executable type)
         (write-literal-strings p "executable" ""))
       (let ((input (file-port f)))
         (dynamic-wind
           (const #t)
           (lambda ()
             (write-contents-from-port input p size))
           (lambda ()
             (close-port input)))))
      ((directory)
       (write-literal-strings p "type" "directory")
       (let ((entries (postprocess-entries (directory-entries f))))
         (for-each (lambda (e)
                     (let* ((f (string-append f "/" e)))
                       (write-literal-strings p "entry" "(" "name")
                       (write-string e p)
                       (write-literal-strings p "node")
                       (dump f)
                       (write-literal-strings p ")")))
                   entries)))
      ((symlink)
       (write-literal-strings p "type" "symlink" "target")
       (write-string (symlink-target f) p))
      (else
       (raise (condition (&message (message "unsupported file type"))
                         (&nar-error (file f) (port port))))))
    (write-literal-strings p ")")))

(define port-conversion-strategy
  (fluid->parameter %default-port-conversion-strategy))

(define (fold-archive proc seed port file)
  "Read a file (possibly a directory structure) in Nar format from PORT.  Call
PROC on each file or directory read from PORT using:

  (PROC FILE TYPE CONTENTS RESULT)

using SEED as the first RESULT.  TYPE is a symbol like 'regular, and CONTENTS
depends on TYPE."
  (parameterize ((currently-restored-file file)

                 ;; Error out if we can convert file names to the current
                 ;; locale.  (XXX: We'd prefer UTF-8 encoding for file names
                 ;; regardless of the locale, but that's what Guile gives us
                 ;; so far.)
                 (port-conversion-strategy 'error))
    (let ((signature (read-string port)))
      (unless (equal? signature %archive-version-1)
        (raise
         (condition (&message (message "invalid nar signature"))
                    (&nar-read-error (port port)
                                     (token signature)
                                     (file #f))))))

    (let read ((file file)
               (result seed))
      (define (read-eof-marker)
        (match (read-string port)
          (")" #t)
          (x (raise
              (condition
               (&message (message "invalid nar end-of-file marker"))
               (&nar-read-error (port port) (file file) (token x)))))))

      (currently-restored-file file)

      (match (list (read-string port) (read-string port) (read-string port))
        (("(" "type" "regular")
         (let* ((type   (read-file-type port))
                (size   (read-long-long port))

                ;; The caller must read exactly SIZE bytes from PORT.
                (result (proc file type `(,port . ,size) result)))
           (let ((m (modulo size 8)))
             (unless (zero? m)
               (get-bytevector-n* port (- 8 m))))
           (read-eof-marker)
           result))
        (("(" "type" "symlink")
         (match (list (read-string port) (read-string port))
           (("target" target)
            (let ((result (proc file 'symlink target result)))
              (read-eof-marker)
              result))
           (x (raise
               (condition
                (&message (message "invalid symlink tokens"))
                (&nar-read-error (port port) (file file) (token x)))))))
        (("(" "type" "directory")
         (let ((dir file))
           (let loop ((prefix (read-string port))
                      (result (proc file 'directory #f result)))
             (match prefix
               ("entry"
                (match (list (read-string port)
                             (read-string port) (read-string port)
                             (read-string port))
                  (("(" "name" file "node")
                   (let ((result (read (string-append dir "/" file) result)))
                     (match (read-string port)
                       (")" #f)
                       (x
                        (raise
                         (condition
                          (&message
                           (message "unexpected directory entry termination"))
                          (&nar-read-error (port port)
                                           (file file)
                                           (token x))))))
                     (loop (read-string port) result)))))
               (")"                               ;done with DIR
                (proc file 'directory-complete #f result))
               (x
                (raise
                 (condition
                  (&message (message "unexpected directory inter-entry marker"))
                  (&nar-read-error (port port) (file file) (token x)))))))))
        (x
         (raise
          (condition
           (&message (message "unsupported nar entry type"))
           (&nar-read-error (port port) (file file) (token x)))))))))

(define (dump-file file input size type)
  "Dump SIZE bytes from INPUT to FILE.

This procedure is suitable for use as the #:dump-file argument to
'restore-file'."
  (call-with-output-file file
    (lambda (output)
      (dump input output size))))

(define* (restore-file port file
                       #:key (dump-file dump-file))
  "Read a file (possibly a directory structure) in Nar format from PORT.
Restore it as FILE with canonical permissions and timestamps.  To write a
regular or executable file, call:

  (DUMP-FILE FILE INPUT SIZE TYPE)

The default is to dump SIZE bytes from INPUT to FILE, but callers can provide
a custom procedure, for instance to deduplicate FILE on the fly."
  (fold-archive (lambda (file type content result)
                  (match type
                    ('directory
                     (mkdir file))
                    ('directory-complete
                     (chmod file #o555)
                     (utime file 1 1 0 0))
                    ('symlink
                     (symlink content file)
                     (utime file 1 1 0 0 AT_SYMLINK_NOFOLLOW))
                    ((or 'regular 'executable)
                     (match content
                       ((input . size)
                        (dump-file file input size type)
                        (chmod file (if (eq? type 'executable)
                                        #o555
                                        #o444))
                        (utime file 1 1 0 0))))))
                #t
                port
                file))

;;; Local Variables:
;;; eval: (put 'call-with-binary-input-file 'scheme-indent-function 1)
;;; End:

;;; serialization.scm ends here