summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config-daemon.ac11
-rw-r--r--guix/store/deduplication.scm69
-rw-r--r--nix/libstore/gc.cc4
-rw-r--r--nix/libstore/local-store.hh3
-rw-r--r--nix/libstore/optimise-store.cc15
-rw-r--r--tests/derivations.scm14
-rw-r--r--tests/nar.scm7
-rw-r--r--tests/store-deduplication.scm41
-rw-r--r--tests/store.scm4
9 files changed, 126 insertions, 42 deletions
diff --git a/config-daemon.ac b/config-daemon.ac
index 5ddc740600..86306effe1 100644
--- a/config-daemon.ac
+++ b/config-daemon.ac
@@ -94,17 +94,6 @@ if test "x$guix_build_daemon" = "xyes"; then
AC_CHECK_FUNCS([lutimes lchown posix_fallocate sched_setaffinity \
statvfs nanosleep strsignal statx])
- dnl Check whether the store optimiser can optimise symlinks.
- AC_MSG_CHECKING([whether it is possible to create a link to a symlink])
- ln -s bla tmp_link
- if ln tmp_link tmp_link2 2> /dev/null; then
- AC_MSG_RESULT(yes)
- AC_DEFINE(CAN_LINK_SYMLINK, 1, [Whether link() works on symlinks.])
- else
- AC_MSG_RESULT(no)
- fi
- rm -f tmp_link tmp_link2
-
dnl Check for <locale>.
AC_LANG_PUSH(C++)
AC_CHECK_HEADERS([locale])
diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm
index cd9660174c..370df4a74c 100644
--- a/guix/store/deduplication.scm
+++ b/guix/store/deduplication.scm
@@ -1,6 +1,6 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2017 Caleb Ristvedt <caleb.ristvedt@cune.org>
-;;; Copyright © 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2018-2021 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -22,12 +22,13 @@
(define-module (guix store deduplication)
#:use-module (gcrypt hash)
- #:use-module (guix build utils)
+ #:use-module ((guix build utils) #:hide (dump-port))
#:use-module (guix build syscalls)
#:use-module (guix base32)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-34)
#:use-module (srfi srfi-35)
+ #:use-module (rnrs bytevectors)
#:use-module (rnrs io ports)
#:use-module (ice-9 ftw)
#:use-module (ice-9 match)
@@ -37,6 +38,31 @@
dump-file/deduplicate
copy-file/deduplicate))
+;; TODO: Remove once 'dump-port' in (guix build utils) has an optional 'len'
+;; parameter.
+(define* (dump-port in out
+ #:optional len
+ #:key (buffer-size 16384))
+ "Read LEN bytes from IN (or as much as possible if LEN is #f) and write it
+to OUT, using chunks of BUFFER-SIZE bytes."
+ (define buffer
+ (make-bytevector buffer-size))
+
+ (let loop ((total 0)
+ (bytes (get-bytevector-n! in buffer 0
+ (if len
+ (min len buffer-size)
+ buffer-size))))
+ (or (eof-object? bytes)
+ (and len (= total len))
+ (let ((total (+ total bytes)))
+ (put-bytevector out buffer 0 bytes)
+ (loop total
+ (get-bytevector-n! in buffer 0
+ (if len
+ (min (- len total) buffer-size)
+ buffer-size)))))))
+
(define (nar-sha256 file)
"Gives the sha256 hash of a file and the size of the file in nar form."
(let-values (((port get-hash) (open-sha256-port)))
@@ -127,11 +153,27 @@ Note: TARGET, TO-REPLACE, and SWAP-DIRECTORY must be on the same file system."
(unless (= EMLINK (system-error-errno args))
(apply throw args)))))))
+(define %deduplication-minimum-size
+ ;; Size below which files are not deduplicated. This avoids adding too many
+ ;; entries to '.links', which would slow down 'removeUnusedLinks' while
+ ;; saving little space. Keep in sync with optimize-store.cc.
+ 8192)
+
(define* (deduplicate path hash #:key (store (%store-directory)))
"Check if a store item with sha256 hash HASH already exists. If so,
replace PATH with a hardlink to the already-existing one. If not, register
PATH so that future duplicates can hardlink to it. PATH is assumed to be
under STORE."
+ ;; Lightweight promises.
+ (define-syntax-rule (delay exp)
+ (let ((value #f))
+ (lambda ()
+ (unless value
+ (set! value exp))
+ value)))
+ (define-syntax-rule (force promise)
+ (promise))
+
(define links-directory
(string-append store "/.links"))
@@ -144,13 +186,18 @@ under STORE."
((file . properties)
(unless (member file '("." ".."))
(let* ((file (string-append path "/" file))
+ (st (delay (lstat file)))
(type (match (assoc-ref properties 'type)
((or 'unknown #f)
- (stat:type (lstat file)))
+ (stat:type (force st)))
(type type))))
- (loop file type
- (and (not (eq? 'directory type))
- (nar-sha256 file)))))))
+ (when (or (eq? 'directory type)
+ (and (eq? 'regular type)
+ (>= (stat:size (force st))
+ %deduplication-minimum-size)))
+ (loop file type
+ (and (not (eq? 'directory type))
+ (nar-sha256 file))))))))
(scandir* path))
(let ((link-file (string-append links-directory "/"
(bytevector->nix-base32-string hash))))
@@ -222,9 +269,9 @@ OUTPUT as it goes."
This procedure is suitable as a #:dump-file argument to 'restore-file'. When
used that way, it deduplicates files on the fly as they are restored, thereby
-removing the need to a deduplication pass that would re-read all the files
+removing the need for a deduplication pass that would re-read all the files
down the road."
- (define hash
+ (define (dump-and-compute-hash)
(call-with-output-file file
(lambda (output)
(let-values (((hash-port get-hash)
@@ -236,7 +283,11 @@ down the road."
(close-port hash-port)
(get-hash)))))
- (deduplicate file hash #:store store))
+ (if (>= size %deduplication-minimum-size)
+ (deduplicate file (dump-and-compute-hash) #:store store)
+ (call-with-output-file file
+ (lambda (output)
+ (dump-port input output size)))))
(define* (copy-file/deduplicate source target
#:key (store (%store-directory)))
diff --git a/nix/libstore/gc.cc b/nix/libstore/gc.cc
index e1d0765154..16519116e4 100644
--- a/nix/libstore/gc.cc
+++ b/nix/libstore/gc.cc
@@ -606,7 +606,9 @@ void LocalStore::removeUnusedLinks(const GCState & state)
throw SysError(format("statting `%1%'") % path);
#endif
- if (st.st_nlink != 1) {
+ /* Drop links for files smaller than 'deduplicationMinSize', even if
+ they have more than one hard link. */
+ if (st.st_nlink != 1 && st.st_size >= deduplicationMinSize) {
actualSize += st.st_size;
unsharedSize += (st.st_nlink - 1) * st.st_size;
continue;
diff --git a/nix/libstore/local-store.hh b/nix/libstore/local-store.hh
index 9ba37219da..20d3c3c893 100644
--- a/nix/libstore/local-store.hh
+++ b/nix/libstore/local-store.hh
@@ -292,4 +292,7 @@ void canonicaliseTimestampAndPermissions(const Path & path);
MakeError(PathInUse, Error);
+/* Size below which a file is not considered for deduplication. */
+extern const size_t deduplicationMinSize;
+
}
diff --git a/nix/libstore/optimise-store.cc b/nix/libstore/optimise-store.cc
index eb303ab4c3..9fd6f3cb35 100644
--- a/nix/libstore/optimise-store.cc
+++ b/nix/libstore/optimise-store.cc
@@ -15,6 +15,9 @@
namespace nix {
+/* Any file smaller than this is not considered for deduplication.
+ Keep in sync with (guix store deduplication). */
+const size_t deduplicationMinSize = 8192;
static void makeWritable(const Path & path)
{
@@ -105,12 +108,12 @@ void LocalStore::optimisePath_(OptimiseStats & stats, const Path & path, InodeHa
return;
}
- /* We can hard link regular files and maybe symlinks. */
- if (!S_ISREG(st.st_mode)
-#if CAN_LINK_SYMLINK
- && !S_ISLNK(st.st_mode)
-#endif
- ) return;
+ /* We can hard link regular files (and maybe symlinks), but do that only
+ for files larger than some threshold. This avoids adding too many
+ entries to '.links', which would slow down 'removeUnusedLinks' while
+ saving little space. */
+ if (!S_ISREG(st.st_mode) || ((size_t) st.st_size) < deduplicationMinSize)
+ return;
/* Sometimes SNAFUs can cause files in the store to be
modified, in particular when running programs as root under
diff --git a/tests/derivations.scm b/tests/derivations.scm
index cd165d1be6..0775719ea3 100644
--- a/tests/derivations.scm
+++ b/tests/derivations.scm
@@ -1,5 +1,5 @@
;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2012-2021 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -170,11 +170,15 @@
#f))))
(test-assert "identical files are deduplicated"
- (let* ((build1 (add-text-to-store %store "one.sh"
- "echo hello, world > \"$out\"\n"
+ ;; Note: DATA must be longer than %DEDUPLICATION-MINIMUM-SIZE.
+ (let* ((data (make-string 9000 #\a))
+ (build1 (add-text-to-store %store "one.sh"
+ (string-append "echo -n " data
+ " > \"$out\"\n")
'()))
(build2 (add-text-to-store %store "two.sh"
- "# Hey!\necho hello, world > \"$out\"\n"
+ (string-append "# Hey!\necho -n "
+ data " > \"$out\"\n")
'()))
(drv1 (derivation %store "foo"
%bash `(,build1)
@@ -187,7 +191,7 @@
(file2 (derivation->output-path drv2)))
(and (valid-path? %store file1) (valid-path? %store file2)
(string=? (call-with-input-file file1 get-string-all)
- "hello, world\n")
+ data)
(= (stat:ino (lstat file1))
(stat:ino (lstat file2))))))))
diff --git a/tests/nar.scm b/tests/nar.scm
index ba4881caaa..98752f2088 100644
--- a/tests/nar.scm
+++ b/tests/nar.scm
@@ -1,5 +1,5 @@
;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2012-2021 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -486,8 +486,9 @@
;; their mtime and permissions were not reset. Ensure that this bug is
;; gone.
(with-store store
- (let* ((text1 (random-text))
- (text2 (random-text))
+ ;; Note: TEXT1 and TEXT2 must be longer than %DEDUPLICATION-MINIMUM-SIZE.
+ (let* ((text1 (string-concatenate (make-list 200 (random-text))))
+ (text2 (string-concatenate (make-list 200 (random-text))))
(tree `("tree" directory
("a" regular (data ,text1))
("b" directory
diff --git a/tests/store-deduplication.scm b/tests/store-deduplication.scm
index b1c2d93bbd..2950fbc1a3 100644
--- a/tests/store-deduplication.scm
+++ b/tests/store-deduplication.scm
@@ -1,5 +1,5 @@
;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2018, 2020 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2018, 2020-2021 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
@@ -30,13 +30,40 @@
(test-begin "store-deduplication")
+(test-equal "deduplicate, below %deduplication-minimum-size"
+ (list #t (make-list 5 1))
+
+ (call-with-temporary-directory
+ (lambda (store)
+ ;; Note: DATA must be longer than %DEDUPLICATION-MINIMUM-SIZE.
+ (let ((data "Hello, world!")
+ (identical (map (lambda (n)
+ (string-append store "/" (number->string n)
+ "/a/b/c"))
+ (iota 5))))
+ (for-each (lambda (file)
+ (mkdir-p (dirname file))
+ (call-with-output-file file
+ (lambda (port)
+ (put-bytevector port (string->utf8 data)))))
+ identical)
+
+ (deduplicate store (nar-sha256 store) #:store store)
+
+ ;; (system (string-append "ls -lRia " store))
+ (list (= (length (delete-duplicates
+ (map (compose stat:ino stat) identical)))
+ (length identical))
+ (map (compose stat:nlink stat) identical))))))
+
(test-equal "deduplicate"
(cons* #t #f ;inode comparisons
2 (make-list 5 6)) ;'nlink' values
(call-with-temporary-directory
(lambda (store)
- (let ((data (string->utf8 "Hello, world!"))
+ ;; Note: DATA must be longer than %DEDUPLICATION-MINIMUM-SIZE.
+ (let ((data (string-concatenate (make-list 1000 "Hello, world!")))
(identical (map (lambda (n)
(string-append store "/" (number->string n)
"/a/b/c"))
@@ -46,7 +73,7 @@
(mkdir-p (dirname file))
(call-with-output-file file
(lambda (port)
- (put-bytevector port data))))
+ (put-bytevector port (string->utf8 data)))))
identical)
;; Make the parent of IDENTICAL read-only. This should not prevent
;; deduplication from inserting its hard link.
@@ -54,7 +81,7 @@
(call-with-output-file unique
(lambda (port)
- (put-bytevector port (string->utf8 "This is unique."))))
+ (put-bytevector port (string->utf8 (string-reverse data)))))
(deduplicate store (nar-sha256 store) #:store store)
@@ -77,8 +104,10 @@
(lambda (store)
(let ((true-link link)
(links 0)
- (data1 (string->utf8 "Hello, world!"))
- (data2 (string->utf8 "Hi, world!"))
+ (data1 (string->utf8
+ (string-concatenate (make-list 1000 "Hello, world!"))))
+ (data2 (string->utf8
+ (string-concatenate (make-list 1000 "Hi, world!"))))
(identical (map (lambda (n)
(string-append store "/" (number->string n)
"/a/b/c"))
diff --git a/tests/store.scm b/tests/store.scm
index 2150a0048c..5c9f651d6c 100644
--- a/tests/store.scm
+++ b/tests/store.scm
@@ -759,7 +759,9 @@
(test-assert "substitute, deduplication"
(with-store s
- (let* ((c (random-text)) ; contents of the output
+ ;; Note: C must be longer than %DEDUPLICATION-MINIMUM-SIZE.
+ (let* ((c (string-concatenate
+ (make-list 200 (random-text)))) ; contents of the output
(g (package-derivation s %bootstrap-guile))
(d1 (build-expression->derivation s "substitute-me"
`(begin ,c (exit 1))