nar: Deduplicate files right as they are restored.
This avoids having to traverse and re-read the files that we have just restored, thereby reducing I/O. * guix/serialization.scm (dump-file): New procedure. (restore-file): Add #:dump-file parameter and honor it. * guix/store/deduplication.scm (tee, dump-file/deduplicate): New procedures. * guix/nar.scm (restore-one-item): Pass #:dump-file to 'restore-file'. (finalize-store-file): Pass #:deduplicate? #f to 'register-items'. * tests/nar.scm <top level>: Call 'setenv' to set "NIX_STORE".master
parent
ed7d02f7c1
commit
2718c29c3f
12
guix/nar.scm
12
guix/nar.scm
|
@ -27,6 +27,7 @@
|
||||||
;; (guix store) since this is "daemon-side" code.
|
;; (guix store) since this is "daemon-side" code.
|
||||||
#:use-module (guix store)
|
#:use-module (guix store)
|
||||||
#:use-module (guix store database)
|
#:use-module (guix store database)
|
||||||
|
#:use-module ((guix store deduplication) #:select (dump-file/deduplicate))
|
||||||
#:use-module ((guix build store-copy) #:select (store-info))
|
#:use-module ((guix build store-copy) #:select (store-info))
|
||||||
|
|
||||||
#:use-module (guix i18n)
|
#:use-module (guix i18n)
|
||||||
|
@ -114,12 +115,12 @@ held."
|
||||||
;; Install the new TARGET.
|
;; Install the new TARGET.
|
||||||
(rename-file source target)
|
(rename-file source target)
|
||||||
|
|
||||||
;; Register TARGET. As a side effect, run a deduplication pass.
|
;; Register TARGET. The 'restore-file' call took care of
|
||||||
;; Timestamps and permissions are already correct thanks to
|
;; deduplication, timestamps, and permissions.
|
||||||
;; 'restore-file'.
|
|
||||||
(register-items db
|
(register-items db
|
||||||
(list (store-info target deriver references))
|
(list (store-info target deriver references))
|
||||||
#:reset-timestamps? #f))
|
#:reset-timestamps? #f
|
||||||
|
#:deduplicate? #f))
|
||||||
|
|
||||||
(when lock?
|
(when lock?
|
||||||
(delete-file (string-append target ".lock"))
|
(delete-file (string-append target ".lock"))
|
||||||
|
@ -212,7 +213,8 @@ s-expression"))
|
||||||
(let-values (((port get-hash)
|
(let-values (((port get-hash)
|
||||||
(open-sha256-input-port port)))
|
(open-sha256-input-port port)))
|
||||||
(with-temporary-store-file temp
|
(with-temporary-store-file temp
|
||||||
(restore-file port temp)
|
(restore-file port temp
|
||||||
|
#:dump-file dump-file/deduplicate)
|
||||||
|
|
||||||
(let ((magic (read-int port)))
|
(let ((magic (read-int port)))
|
||||||
(unless (= magic %export-magic)
|
(unless (= magic %export-magic)
|
||||||
|
|
|
@ -457,9 +457,22 @@ depends on TYPE."
|
||||||
(&message (message "unsupported nar entry type"))
|
(&message (message "unsupported nar entry type"))
|
||||||
(&nar-read-error (port port) (file file) (token x)))))))))
|
(&nar-read-error (port port) (file file) (token x)))))))))
|
||||||
|
|
||||||
(define (restore-file port file)
|
(define (dump-file file input size type)
|
||||||
|
"Dump SIZE bytes from INPUT to FILE."
|
||||||
|
(call-with-output-file file
|
||||||
|
(lambda (output)
|
||||||
|
(dump input output size))))
|
||||||
|
|
||||||
|
(define* (restore-file port file
|
||||||
|
#:key (dump-file dump-file))
|
||||||
"Read a file (possibly a directory structure) in Nar format from PORT.
|
"Read a file (possibly a directory structure) in Nar format from PORT.
|
||||||
Restore it as FILE with canonical permissions and timestamps."
|
Restore it as FILE with canonical permissions and timestamps. To write a
|
||||||
|
regular or executable file, call:
|
||||||
|
|
||||||
|
(DUMP-FILE FILE INPUT SIZE TYPE)
|
||||||
|
|
||||||
|
The default is to dump SIZE bytes from INPUT to FILE, but callers can provide
|
||||||
|
a custom procedure, for instance to deduplicate FILE on the fly."
|
||||||
(fold-archive (lambda (file type content result)
|
(fold-archive (lambda (file type content result)
|
||||||
(match type
|
(match type
|
||||||
('directory
|
('directory
|
||||||
|
@ -473,12 +486,10 @@ Restore it as FILE with canonical permissions and timestamps."
|
||||||
((or 'regular 'executable)
|
((or 'regular 'executable)
|
||||||
(match content
|
(match content
|
||||||
((input . size)
|
((input . size)
|
||||||
(call-with-output-file file
|
(dump-file file input size type)
|
||||||
(lambda (output)
|
(chmod file (if (eq? type 'executable)
|
||||||
(dump input output size)
|
|
||||||
(chmod output (if (eq? type 'executable)
|
|
||||||
#o555
|
#o555
|
||||||
#o444))))
|
#o444))
|
||||||
(utime file 1 1 0 0))))))
|
(utime file 1 1 0 0))))))
|
||||||
#t
|
#t
|
||||||
port
|
port
|
||||||
|
|
|
@ -26,12 +26,15 @@
|
||||||
#:use-module (guix build syscalls)
|
#:use-module (guix build syscalls)
|
||||||
#:use-module (guix base32)
|
#:use-module (guix base32)
|
||||||
#:use-module (srfi srfi-11)
|
#:use-module (srfi srfi-11)
|
||||||
|
#:use-module (srfi srfi-34)
|
||||||
|
#:use-module (srfi srfi-35)
|
||||||
#:use-module (rnrs io ports)
|
#:use-module (rnrs io ports)
|
||||||
#:use-module (ice-9 ftw)
|
#:use-module (ice-9 ftw)
|
||||||
#:use-module (ice-9 match)
|
#:use-module (ice-9 match)
|
||||||
#:use-module (guix serialization)
|
#:use-module (guix serialization)
|
||||||
#:export (nar-sha256
|
#:export (nar-sha256
|
||||||
deduplicate))
|
deduplicate
|
||||||
|
dump-file/deduplicate))
|
||||||
|
|
||||||
;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
|
;; XXX: This port is used as a workaround on Guile <= 2.2.4 where
|
||||||
;; 'port-position' throws to 'out-of-range' when the offset is great than or
|
;; 'port-position' throws to 'out-of-range' when the offset is great than or
|
||||||
|
@ -201,3 +204,55 @@ under STORE."
|
||||||
;; that's OK: we just can't deduplicate it more.
|
;; that's OK: we just can't deduplicate it more.
|
||||||
#f)
|
#f)
|
||||||
(else (apply throw args)))))))))))
|
(else (apply throw args)))))))))))
|
||||||
|
|
||||||
|
(define (tee input len output)
|
||||||
|
"Return a port that reads up to LEN bytes from INPUT and writes them to
|
||||||
|
OUTPUT as it goes."
|
||||||
|
(define bytes-read 0)
|
||||||
|
|
||||||
|
(define (fail)
|
||||||
|
;; Reached EOF before we had read LEN bytes from INPUT.
|
||||||
|
(raise (condition
|
||||||
|
(&nar-error (port input)
|
||||||
|
(file (port-filename output))))))
|
||||||
|
|
||||||
|
(define (read! bv start count)
|
||||||
|
;; Read at most LEN bytes in total.
|
||||||
|
(let ((count (min count (- len bytes-read))))
|
||||||
|
(let loop ((ret (get-bytevector-n! input bv start count)))
|
||||||
|
(cond ((eof-object? ret)
|
||||||
|
(if (= bytes-read len)
|
||||||
|
0 ; EOF
|
||||||
|
(fail)))
|
||||||
|
((and (zero? ret) (> count 0))
|
||||||
|
;; Do not return zero since zero means EOF, so try again.
|
||||||
|
(loop (get-bytevector-n! input bv start count)))
|
||||||
|
(else
|
||||||
|
(put-bytevector output bv start ret)
|
||||||
|
(set! bytes-read (+ bytes-read ret))
|
||||||
|
ret)))))
|
||||||
|
|
||||||
|
(make-custom-binary-input-port "tee input port" read! #f #f #f))
|
||||||
|
|
||||||
|
(define* (dump-file/deduplicate file input size type
|
||||||
|
#:key (store (%store-directory)))
|
||||||
|
"Write SIZE bytes read from INPUT to FILE. TYPE is a symbol, either
|
||||||
|
'regular or 'executable.
|
||||||
|
|
||||||
|
This procedure is suitable as a #:dump-file argument to 'restore-file'. When
|
||||||
|
used that way, it deduplicates files on the fly as they are restored, thereby
|
||||||
|
removing the need to a deduplication pass that would re-read all the files
|
||||||
|
down the road."
|
||||||
|
(define hash
|
||||||
|
(call-with-output-file file
|
||||||
|
(lambda (output)
|
||||||
|
(let-values (((hash-port get-hash)
|
||||||
|
(open-hash-port (hash-algorithm sha256))))
|
||||||
|
(write-file-tree file hash-port
|
||||||
|
#:file-type+size (lambda (_) (values type size))
|
||||||
|
#:file-port
|
||||||
|
(const (tee input size output)))
|
||||||
|
(close-port hash-port)
|
||||||
|
(get-hash)))))
|
||||||
|
|
||||||
|
(deduplicate file hash #:store store))
|
||||||
|
|
|
@ -452,6 +452,9 @@
|
||||||
(false-if-exception (rm-rf %test-dir))
|
(false-if-exception (rm-rf %test-dir))
|
||||||
(setlocale LC_ALL locale)))))
|
(setlocale LC_ALL locale)))))
|
||||||
|
|
||||||
|
;; XXX: Tell the 'deduplicate' procedure what store we're actually using.
|
||||||
|
(setenv "NIX_STORE" (%store-prefix))
|
||||||
|
|
||||||
(test-assert "restore-file-set (signed, valid)"
|
(test-assert "restore-file-set (signed, valid)"
|
||||||
(with-store store
|
(with-store store
|
||||||
(let* ((texts (unfold (cut >= <> 10)
|
(let* ((texts (unfold (cut >= <> 10)
|
||||||
|
|
Reference in New Issue