From 4ff12d1de7cd617b791996ee7ca1240660b4c20e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Tue, 31 May 2022 17:17:10 +0200 Subject: [PATCH] profiles: Do not repeat entries in 'manifest' file. Fixes . Reported by Ricardo Wurmus . With this change, the manifest file created for: guix install r r-seurat r-cistopic r-monocle3 r-cicero-monocle3 r-assertthat goes from 5.7M to 176K. Likewise, on this profile, wall-clock time of: GUIX_PROFILING=gc guix package -I goes from 0.7s to 0.1s, with heap usage going from 55M to 9M. * guix/profiles.scm (manifest->gexp)[optional]: New procedure. [entry->gexp]: Turn into a monadic procedure. Return a 'repeated' sexp if ENTRY was already visited before. Adjust caller accordingly. Bump manifest version. (sexp->manifest)[sexp->manifest-entry]: Turn into a monadic procedure. Add case for 'repeated' nodes. Add each entry to the current state vhash. Add clause for version 4 manifests. [sexp->manifest-entry/v3]: New procedure, with former 'sexp->manifest-entry' code. * tests/profiles.scm ("deduplication of repeated entries"): New test. * guix/build/profiles.scm (manifest-sexp->inputs+search-paths)[let-fields]: New macro. Use it. Expect version 4. Add clause for 'repeated' nodes. --- guix/build/profiles.scm | 32 +++++++-- guix/profiles.scm | 139 ++++++++++++++++++++++++++++++++-------- tests/profiles.scm | 42 ++++++++++++ 3 files changed, 181 insertions(+), 32 deletions(-) diff --git a/guix/build/profiles.scm b/guix/build/profiles.scm index f9875ca92e..2ab76bde74 100644 --- a/guix/build/profiles.scm +++ b/guix/build/profiles.scm @@ -149,19 +149,33 @@ instead make DIRECTORY a \"real\" directory containing symlinks." "Parse MANIFEST, an sexp as produced by 'manifest->gexp', and return two values: the list of store items of its manifest entries, and the list of search path specifications." + (define-syntax let-fields + (syntax-rules () + ;; Bind the fields NAME of LST to same-named variables in the lexical + ;; scope of BODY. + ((_ lst (name rest ...) body ...) + (let ((name (match (assq 'name lst) + ((_ value) value) + (#f '())))) + (let-fields lst (rest ...) body ...))) + ((_ lst () body ...) + (begin body ...)))) + (match manifest ;this must match 'manifest->gexp' - (('manifest ('version 3) + (('manifest ('version 4) ('packages (entries ...))) (let loop ((entries entries) (inputs '()) (search-paths '())) (match entries - (((name version output item - ('propagated-inputs deps) - ('search-paths paths) _ ...) . rest) - (loop (append rest deps) ;breadth-first traversal - (cons item inputs) - (append paths search-paths))) + (((name version output item fields ...) . rest) + (let ((paths search-paths)) + (let-fields fields (propagated-inputs search-paths properties) + (loop (append rest propagated-inputs) ;breadth-first traversal + (cons item inputs) + (append search-paths paths))))) + ((('repeated name version item) . rest) + (loop rest inputs search-paths)) (() (values (reverse inputs) (delete-duplicates @@ -212,4 +226,8 @@ search paths of MANIFEST's entries." ;; Write 'OUTPUT/etc/profile'. (build-etc/profile output search-paths))) +;;; Local Variables: +;;; eval: (put 'let-fields 'scheme-indent-function 2) +;;; End: + ;;; profile.scm ends here diff --git a/guix/profiles.scm b/guix/profiles.scm index bf50c00a1e..701852ae98 100644 --- a/guix/profiles.scm +++ b/guix/profiles.scm @@ -454,32 +454,58 @@ denoting a specific output of a package." (define (manifest->gexp manifest) "Return a representation of MANIFEST as a gexp." + (define (optional name value) + (if (null? value) + #~() + #~((#$name #$value)))) + (define (entry->gexp entry) - (match entry - (($ name version output (? string? path) - (deps ...) (search-paths ...) _ (properties ...)) - #~(#$name #$version #$output #$path - (propagated-inputs #$(map entry->gexp deps)) - (search-paths #$(map search-path-specification->sexp - search-paths)) - #$@(if (null? properties) - #~() - #~((properties . #$properties))))) - (($ name version output package - (deps ...) (search-paths ...) _ (properties ...)) - #~(#$name #$version #$output - (ungexp package (or output "out")) - (propagated-inputs #$(map entry->gexp deps)) - (search-paths #$(map search-path-specification->sexp - search-paths)) - #$@(if (null? properties) - #~() - #~((properties . #$properties))))))) + ;; Maintain in state monad a vhash of visited entries, indexed by their + ;; item, usually package objects (we cannot use the entry itself as an + ;; index since identical entries are usually not 'eq?'). Use that vhash + ;; to avoid repeating duplicate entries. This is particularly useful in + ;; the presence of propagated inputs, where we could otherwise end up + ;; repeating large trees. + (mlet %state-monad ((visited (current-state))) + (if (match (vhash-assq (manifest-entry-item entry) visited) + ((_ . previous-entry) + (manifest-entry=? previous-entry entry)) + (#f #f)) + (return #~(repeated #$(manifest-entry-name entry) + #$(manifest-entry-version entry) + (ungexp (manifest-entry-item entry) + (manifest-entry-output entry)))) + (mbegin %state-monad + (set-current-state (vhash-consq (manifest-entry-item entry) + entry visited)) + (mlet %state-monad ((deps (mapm %state-monad entry->gexp + (manifest-entry-dependencies entry)))) + (return + (match entry + (($ name version output (? string? path) + (_ ...) (search-paths ...) _ (properties ...)) + #~(#$name #$version #$output #$path + #$@(optional 'propagated-inputs deps) + #$@(optional 'search-paths + (map search-path-specification->sexp + search-paths)) + #$@(optional 'properties properties))) + (($ name version output package + (_deps ...) (search-paths ...) _ (properties ...)) + #~(#$name #$version #$output + (ungexp package (or output "out")) + #$@(optional 'propagated-inputs deps) + #$@(optional 'search-paths + (map search-path-specification->sexp + search-paths)) + #$@(optional 'properties properties)))))))))) (match manifest (($ (entries ...)) - #~(manifest (version 3) - (packages #$(map entry->gexp entries)))))) + #~(manifest (version 4) + (packages #$(run-with-state + (mapm %state-monad entry->gexp entries) + vlist-null)))))) (define (find-package name version) "Return a package from the distro matching NAME and possibly VERSION. This @@ -520,14 +546,15 @@ procedure is here for backward-compatibility and will eventually vanish." (item item) (parent parent)))) - (define* (sexp->manifest-entry sexp #:optional (parent (delay #f))) + (define* (sexp->manifest-entry/v3 sexp #:optional (parent (delay #f))) + ;; Read SEXP as a version 3 manifest entry. (match sexp ((name version output path ('propagated-inputs deps) ('search-paths search-paths) extra-stuff ...) ;; For each of DEPS, keep a promise pointing to ENTRY. - (letrec* ((deps* (map (cut sexp->manifest-entry <> (delay entry)) + (letrec* ((deps* (map (cut sexp->manifest-entry/v3 <> (delay entry)) deps)) (entry (manifest-entry (name name) @@ -542,6 +569,56 @@ procedure is here for backward-compatibility and will eventually vanish." '()))))) entry)))) + (define-syntax let-fields + (syntax-rules () + ;; Bind the fields NAME of LST to same-named variables in the lexical + ;; scope of BODY. + ((_ lst (name rest ...) body ...) + (let ((name (match (assq 'name lst) + ((_ value) value) + (#f '())))) + (let-fields lst (rest ...) body ...))) + ((_ lst () body ...) + (begin body ...)))) + + (define* (sexp->manifest-entry sexp #:optional (parent (delay #f))) + (match sexp + (('repeated name version path) + ;; This entry is the same as another one encountered earlier; look it + ;; up and return it. + (mlet %state-monad ((visited (current-state)) + (key -> (list name version path))) + (match (vhash-assoc key visited) + (#f + (raise (formatted-message + (G_ "invalid repeated entry in profile: ~s") + sexp))) + ((_ . entry) + (return entry))))) + ((name version output path fields ...) + (let-fields fields (propagated-inputs search-paths properties) + (mlet* %state-monad + ((entry -> #f) + (deps (mapm %state-monad + (cut sexp->manifest-entry <> (delay entry)) + propagated-inputs)) + (visited (current-state)) + (key -> (list name version path))) + (set! entry ;XXX: emulate 'letrec*' + (manifest-entry + (name name) + (version version) + (output output) + (item path) + (dependencies deps) + (search-paths (map sexp->search-path-specification + search-paths)) + (parent parent) + (properties properties))) + (mbegin %state-monad + (set-current-state (vhash-cons key entry visited)) + (return entry))))))) + (match sexp (('manifest ('version 0) ('packages ((name version output path) ...))) @@ -608,7 +685,15 @@ procedure is here for backward-compatibility and will eventually vanish." ;; Version 3 represents DEPS as full-blown manifest entries. (('manifest ('version 3 minor-version ...) ('packages (entries ...))) - (manifest (map sexp->manifest-entry entries))) + (manifest (map sexp->manifest-entry/v3 entries))) + + ;; Version 4 deduplicates repeated entries and makes manifest entry fields + ;; such as 'propagated-inputs' and 'search-paths' optional. + (('manifest ('version 4 minor-version ...) + ('packages (entries ...))) + (manifest (run-with-state + (mapm %state-monad sexp->manifest-entry entries) + vlist-null))) (_ (raise (condition (&message (message "unsupported manifest format"))))))) @@ -2317,4 +2402,8 @@ PROFILE refers to, directly or indirectly, or PROFILE." %known-shorthand-profiles) profile)) +;;; Local Variables: +;;; eval: (put 'let-fields 'scheme-indent-function 2) +;;; End: + ;;; profiles.scm ends here diff --git a/tests/profiles.scm b/tests/profiles.scm index a026f6e238..f002dfc5e4 100644 --- a/tests/profiles.scm +++ b/tests/profiles.scm @@ -586,6 +586,48 @@ #:locales? #f))) (return #f))))) +(test-assertm "deduplication of repeated entries" + ;; Make sure the 'manifest' file does not duplicate identical entries. + ;; See . + (mlet* %store-monad ((p0 -> (dummy-package "p0" + (build-system trivial-build-system) + (arguments + `(#:guile ,%bootstrap-guile + #:builder (mkdir (assoc-ref %outputs "out")))) + (propagated-inputs + `(("guile" ,%bootstrap-guile))))) + (p1 -> (package + (inherit p0) + (name "p1"))) + (drv (profile-derivation (packages->manifest + (list p0 p1)) + #:hooks '() + #:locales? #f))) + (mbegin %store-monad + (built-derivations (list drv)) + (let ((file (string-append (derivation->output-path drv) + "/manifest")) + (manifest (profile-manifest (derivation->output-path drv)))) + (define (contains-repeated? sexp) + (match sexp + (('repeated _ ...) #t) + ((lst ...) (any contains-repeated? sexp)) + (_ #f))) + + (return (and (contains-repeated? (call-with-input-file file read)) + + ;; MANIFEST has two entries for %BOOTSTRAP-GUILE since + ;; it's propagated both from P0 and from P1. When + ;; reading a 'repeated' node, 'read-manifest' should + ;; reuse the previously-read entry so the two + ;; %BOOTSTRAP-GUILE entries must be 'eq?'. + (match (manifest-entries manifest) + (((= manifest-entry-dependencies (dep0)) + (= manifest-entry-dependencies (dep1))) + (and (string=? (manifest-entry-name dep0) + (package-name %bootstrap-guile)) + (eq? dep0 dep1)))))))))) + (test-assertm "no collision" ;; Here we have an entry that is "lowered" (its 'item' field is a store file ;; name) and another entry (its 'item' field is a package) that is