From 2718c29c3fb9f9de2ec897248ad49ae11ca39b7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Thu, 10 Dec 2020 11:21:14 +0100 Subject: [PATCH] nar: Deduplicate files right as they are restored. This avoids having to traverse and re-read the files that we have just restored, thereby reducing I/O. * guix/serialization.scm (dump-file): New procedure. (restore-file): Add #:dump-file parameter and honor it. * guix/store/deduplication.scm (tee, dump-file/deduplicate): New procedures. * guix/nar.scm (restore-one-item): Pass #:dump-file to 'restore-file'. (finalize-store-file): Pass #:deduplicate? #f to 'register-items'. * tests/nar.scm : Call 'setenv' to set "NIX_STORE". --- guix/nar.scm | 12 ++++---- guix/serialization.scm | 27 ++++++++++++----- guix/store/deduplication.scm | 57 +++++++++++++++++++++++++++++++++++- tests/nar.scm | 3 ++ 4 files changed, 85 insertions(+), 14 deletions(-) diff --git a/guix/nar.scm b/guix/nar.scm index edfcc9aab5..ba035ca6dc 100644 --- a/guix/nar.scm +++ b/guix/nar.scm @@ -27,6 +27,7 @@ ;; (guix store) since this is "daemon-side" code. #:use-module (guix store) #:use-module (guix store database) + #:use-module ((guix store deduplication) #:select (dump-file/deduplicate)) #:use-module ((guix build store-copy) #:select (store-info)) #:use-module (guix i18n) @@ -114,12 +115,12 @@ held." ;; Install the new TARGET. (rename-file source target) - ;; Register TARGET. As a side effect, run a deduplication pass. - ;; Timestamps and permissions are already correct thanks to - ;; 'restore-file'. + ;; Register TARGET. The 'restore-file' call took care of + ;; deduplication, timestamps, and permissions. (register-items db (list (store-info target deriver references)) - #:reset-timestamps? #f)) + #:reset-timestamps? #f + #:deduplicate? #f)) (when lock? (delete-file (string-append target ".lock")) @@ -212,7 +213,8 @@ s-expression")) (let-values (((port get-hash) (open-sha256-input-port port))) (with-temporary-store-file temp - (restore-file port temp) + (restore-file port temp + #:dump-file dump-file/deduplicate) (let ((magic (read-int port))) (unless (= magic %export-magic) diff --git a/guix/serialization.scm b/guix/serialization.scm index 677ca60b66..9e2dce8bb0 100644 --- a/guix/serialization.scm +++ b/guix/serialization.scm @@ -457,9 +457,22 @@ depends on TYPE." (&message (message "unsupported nar entry type")) (&nar-read-error (port port) (file file) (token x))))))))) -(define (restore-file port file) +(define (dump-file file input size type) + "Dump SIZE bytes from INPUT to FILE." + (call-with-output-file file + (lambda (output) + (dump input output size)))) + +(define* (restore-file port file + #:key (dump-file dump-file)) "Read a file (possibly a directory structure) in Nar format from PORT. -Restore it as FILE with canonical permissions and timestamps." +Restore it as FILE with canonical permissions and timestamps. To write a +regular or executable file, call: + + (DUMP-FILE FILE INPUT SIZE TYPE) + +The default is to dump SIZE bytes from INPUT to FILE, but callers can provide +a custom procedure, for instance to deduplicate FILE on the fly." (fold-archive (lambda (file type content result) (match type ('directory @@ -473,12 +486,10 @@ Restore it as FILE with canonical permissions and timestamps." ((or 'regular 'executable) (match content ((input . size) - (call-with-output-file file - (lambda (output) - (dump input output size) - (chmod output (if (eq? type 'executable) - #o555 - #o444)))) + (dump-file file input size type) + (chmod file (if (eq? type 'executable) + #o555 + #o444)) (utime file 1 1 0 0)))))) #t port diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm index 0655ceb890..b4d37d4525 100644 --- a/guix/store/deduplication.scm +++ b/guix/store/deduplication.scm @@ -26,12 +26,15 @@ #:use-module (guix build syscalls) #:use-module (guix base32) #:use-module (srfi srfi-11) + #:use-module (srfi srfi-34) + #:use-module (srfi srfi-35) #:use-module (rnrs io ports) #:use-module (ice-9 ftw) #:use-module (ice-9 match) #:use-module (guix serialization) #:export (nar-sha256 - deduplicate)) + deduplicate + dump-file/deduplicate)) ;; XXX: This port is used as a workaround on Guile <= 2.2.4 where ;; 'port-position' throws to 'out-of-range' when the offset is great than or @@ -201,3 +204,55 @@ under STORE." ;; that's OK: we just can't deduplicate it more. #f) (else (apply throw args))))))))))) + +(define (tee input len output) + "Return a port that reads up to LEN bytes from INPUT and writes them to +OUTPUT as it goes." + (define bytes-read 0) + + (define (fail) + ;; Reached EOF before we had read LEN bytes from INPUT. + (raise (condition + (&nar-error (port input) + (file (port-filename output)))))) + + (define (read! bv start count) + ;; Read at most LEN bytes in total. + (let ((count (min count (- len bytes-read)))) + (let loop ((ret (get-bytevector-n! input bv start count))) + (cond ((eof-object? ret) + (if (= bytes-read len) + 0 ; EOF + (fail))) + ((and (zero? ret) (> count 0)) + ;; Do not return zero since zero means EOF, so try again. + (loop (get-bytevector-n! input bv start count))) + (else + (put-bytevector output bv start ret) + (set! bytes-read (+ bytes-read ret)) + ret))))) + + (make-custom-binary-input-port "tee input port" read! #f #f #f)) + +(define* (dump-file/deduplicate file input size type + #:key (store (%store-directory))) + "Write SIZE bytes read from INPUT to FILE. TYPE is a symbol, either +'regular or 'executable. + +This procedure is suitable as a #:dump-file argument to 'restore-file'. When +used that way, it deduplicates files on the fly as they are restored, thereby +removing the need to a deduplication pass that would re-read all the files +down the road." + (define hash + (call-with-output-file file + (lambda (output) + (let-values (((hash-port get-hash) + (open-hash-port (hash-algorithm sha256)))) + (write-file-tree file hash-port + #:file-type+size (lambda (_) (values type size)) + #:file-port + (const (tee input size output))) + (close-port hash-port) + (get-hash))))) + + (deduplicate file hash #:store store)) diff --git a/tests/nar.scm b/tests/nar.scm index 59616659c8..ba4881caaa 100644 --- a/tests/nar.scm +++ b/tests/nar.scm @@ -452,6 +452,9 @@ (false-if-exception (rm-rf %test-dir)) (setlocale LC_ALL locale))))) +;; XXX: Tell the 'deduplicate' procedure what store we're actually using. +(setenv "NIX_STORE" (%store-prefix)) + (test-assert "restore-file-set (signed, valid)" (with-store store (let* ((texts (unfold (cut >= <> 10)