me
/
guix
Archived
1
0
Fork 0

import: go: Improve synopsis and description parsing.

* guix/import/go.scm (%strict-tokenizer?): Set parameter to #t.
(go-path-escape): Redefine to prevent inlining.
(http-get*): Replace by ...
(http-fetch*): this ...
(json-fetch*): New procedure.
(go.pkg.dev-info): Use http-fetch*.
(go-package-licenses): Rewrite in terms of go.pkg.dev-info.
(go-package-description): Likewise.
(go-package-synopsis): Likewise.
(fetch-go.mod): Use the memoized http-fetch*.
(parse-go.mod): Adjust to receive content as a string.
(fetch-module-meta-data): Adjust to use http-fetch*.
(go-module->guix-package): Adjust to the modified fetch-go.mod return value.
[inputs]: Use propagated inputs, which is the most common situations for Go
libraries.
[description]: Beautify description.
[licenses]: Do no check for #f.  The result of the license parsing is always a
list.
* tests/go.scm: Adjust following above changes.
master
Maxim Cournoyer 2021-03-21 23:53:21 -04:00
parent 2446a112df
commit 6aee902eaf
No known key found for this signature in database
GPG Key ID: 1260E46482E63562
3 changed files with 169 additions and 121 deletions

View File

@ -33,7 +33,7 @@
#:use-module (guix http-client)
#:use-module ((guix licenses) #:prefix license:)
#:use-module (guix memoization)
#:autoload (htmlprag) (html->sxml) ;from Guile-Lib
#:use-module (htmlprag) ;from Guile-Lib
#:autoload (guix git) (update-cached-checkout)
#:autoload (gcrypt hash) (open-hash-port hash-algorithm sha256)
#:autoload (guix serialization) (write-file)
@ -43,20 +43,28 @@
#:use-module (ice-9 rdelim)
#:use-module (ice-9 receive)
#:use-module (ice-9 regex)
#:use-module (ice-9 textual-ports)
#:use-module ((rnrs io ports) #:select (call-with-port))
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-2)
#:use-module (srfi srfi-9)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-26)
#:use-module (sxml xpath)
#:use-module (sxml match)
#:use-module ((sxml xpath) #:renamer (lambda (s)
(if (eq? 'filter s)
'xfilter
s)))
#:use-module (web client)
#:use-module (web response)
#:use-module (web uri)
#:export (go-path-escape
go-module->guix-package
#:export (go-module->guix-package
go-module-recursive-import))
;;; Parameterize htmlprag to parse valid HTML more reliably.
(%strict-tokenizer? #t)
;;; Commentary:
;;;
;;; (guix import go) attempts to make it easier to create Guix package
@ -90,6 +98,14 @@
;;; Code:
(define http-fetch*
;; Like http-fetch, but memoized and returning the body as a string.
(memoize (lambda args
(call-with-port (apply http-fetch args) get-string-all))))
(define json-fetch*
(memoize json-fetch))
(define (go-path-escape path)
"Escape a module path by replacing every uppercase letter with an
exclamation mark followed with its lowercase equivalent, as per the module
@ -99,54 +115,73 @@ https://godoc.org/golang.org/x/mod/module#hdr-Escaped_Paths)."
(string-append "!" (string-downcase (match:substring occurrence))))
(regexp-substitute/global #f "[A-Z]" path 'pre escape 'post))
;; Prevent inlining of this procedure, which is accessed by unit tests.
(set! go-path-escape go-path-escape)
(define (go.pkg.dev-info name)
(http-fetch* (string-append "https://pkg.go.dev/" name)))
(define (go-module-latest-version goproxy-url module-path)
"Fetch the version number of the latest version for MODULE-PATH from the
given GOPROXY-URL server."
(assoc-ref (json-fetch (format #f "~a/~a/@latest" goproxy-url
(go-path-escape module-path)))
(assoc-ref (json-fetch* (format #f "~a/~a/@latest" goproxy-url
(go-path-escape module-path)))
"Version"))
(define (go-package-licenses name)
"Retrieve the list of licenses that apply to NAME, a Go package or module
name (e.g. \"github.com/golang/protobuf/proto\"). The data is scraped from
the https://pkg.go.dev/ web site."
(let*-values (((url) (string-append "https://pkg.go.dev/" name
"?tab=licenses"))
((response body) (http-get url))
;; Extract the text contained in a h2 child node of any
;; element marked with a "License" class attribute.
((select) (sxpath `(// (* (@ (equal? (class "License"))))
h2 // *text*))))
(and (eq? (response-code response) 200)
(match (select (html->sxml body))
(() #f) ;nothing selected
(licenses licenses)))))
name (e.g. \"github.com/golang/protobuf/proto\")."
(let* ((body (go.pkg.dev-info (string-append name "?tab=licenses")))
;; Extract the text contained in a h2 child node of any
;; element marked with a "License" class attribute.
(select (sxpath `(// (* (@ (equal? (class "License"))))
h2 // *text*))))
(select (html->sxml body))))
(define (go.pkg.dev-info name)
(http-get (string-append "https://pkg.go.dev/" name)))
(define go.pkg.dev-info*
(memoize go.pkg.dev-info))
(define (sxml->texi sxml-node)
"A very basic SXML to Texinfo converter which attempts to preserve HTML
formatting and links as text."
(sxml-match sxml-node
((strong ,text)
(format #f "@strong{~a}" text))
((a (@ (href ,url)) ,text)
(format #f "@url{~a,~a}" url text))
((code ,text)
(format #f "@code{~a}" text))
(,something-else something-else)))
(define (go-package-description name)
"Retrieve a short description for NAME, a Go package name,
e.g. \"google.golang.org/protobuf/proto\". The data is scraped from the
https://pkg.go.dev/ web site."
(let*-values (((response body) (go.pkg.dev-info* name))
;; Extract the text contained in a h2 child node of any
;; element marked with a "License" class attribute.
((select) (sxpath
`(// (section
(@ (equal? (class "Documentation-overview"))))
(p 1)))))
(and (eq? (response-code response) 200)
(match (select (html->sxml body))
(() #f) ;nothing selected
(((p . strings))
;; The paragraph text is returned as a list of strings embedding
;; newline characters. Join them and strip the newline
;; characters.
(string-delete #\newline (string-join strings)))))))
e.g. \"google.golang.org/protobuf/proto\"."
(let* ((body (go.pkg.dev-info name))
(sxml (html->sxml body))
(overview ((sxpath
`(//
(* (@ (equal? (class "Documentation-overview"))))
(p 1))) sxml))
;; Sometimes, the first paragraph just contains images/links that
;; has only "\n" for text. The following filter is designed to
;; omit it.
(contains-text? (lambda (node)
(remove string-null?
(map string-trim-both
(filter (node-typeof? '*text*)
(cdr node))))))
(select-content (sxpath
`(//
(* (@ (equal? (class "UnitReadme-content"))))
div // p ,(xfilter contains-text?))))
;; Fall-back to use content; this is less desirable as it is more
;; verbose, but not every page has an overview.
(description (if (not (null? overview))
overview
(select-content sxml)))
(description* (and (not (null? description))
(first description))))
(match description*
(() #f) ;nothing selected
((p elements ...)
(apply string-append (filter string? (map sxml->texi elements)))))))
(define (go-package-synopsis module-name)
"Retrieve a short synopsis for a Go module named MODULE-NAME,
@ -154,17 +189,17 @@ e.g. \"google.golang.org/protobuf\". The data is scraped from
the https://pkg.go.dev/ web site."
;; Note: Only the *module* (rather than package) page has the README title
;; used as a synopsis on the https://pkg.go.dev web site.
(let*-values (((response body) (go.pkg.dev-info* module-name))
;; Extract the text contained in a h2 child node of any
;; element marked with a "License" class attribute.
((select) (sxpath
`(// (div (@ (equal? (class "UnitReadme-content"))))
// h3 *text*))))
(and (eq? (response-code response) 200)
(match (select (html->sxml body))
(() #f) ;nothing selected
((title more ...) ;title is the first string of the list
(string-trim-both title))))))
(let* ((url (string-append "https://pkg.go.dev/" module-name))
(body (http-fetch* url))
;; Extract the text contained in a h2 child node of any
;; element marked with a "License" class attribute.
(select-title (sxpath
`(// (div (@ (equal? (class "UnitReadme-content"))))
// h3 *text*))))
(match (select-title (html->sxml body))
(() #f) ;nothing selected
((title more ...) ;title is the first string of the list
(string-trim-both title)))))
(define (list->licenses licenses)
"Given a list of LICENSES mostly following the SPDX conventions, return the
@ -189,13 +224,13 @@ corresponding Guix license or 'unknown-license!"
'unknown-license!)))
licenses))
(define (fetch-go.mod goproxy-url module-path version)
"Fetches go.mod from the given GOPROXY-URL server for the given MODULE-PATH
and VERSION."
(let ((url (format #f "~a/~a/@v/~a.mod" goproxy-url
(define (fetch-go.mod goproxy module-path version)
"Fetch go.mod from the given GOPROXY server for the given MODULE-PATH
and VERSION and return an input port."
(let ((url (format #f "~a/~a/@v/~a.mod" goproxy
(go-path-escape module-path)
(go-path-escape version))))
(http-fetch url)))
(http-fetch* url)))
(define %go.mod-require-directive-rx
;; A line in a require directive is composed of a module path and
@ -216,9 +251,8 @@ and VERSION."
"[[:blank:]]+" "=>" "[[:blank:]]+"
"([^[:blank:]]+)([[:blank:]]+([^[:blank:]]+))?")))
(define (parse-go.mod port)
"Parse the go.mod file accessible via the input PORT, returning a list of
requirements."
(define (parse-go.mod content)
"Parse the go.mod file CONTENT, returning a list of requirements."
(define-record-type <results>
(make-results requirements replacements)
results?
@ -229,7 +263,7 @@ requirements."
(define (toplevel results)
"Main parser, RESULTS is a pair of alist serving as accumulator for
all encountered requirements and replacements."
(let ((line (read-line port)))
(let ((line (read-line)))
(cond
((eof-object? line)
;; parsing ended, give back the result
@ -255,7 +289,7 @@ requirements."
(toplevel results)))))
(define (in-require results)
(let ((line (read-line port)))
(let ((line (read-line)))
(cond
((eof-object? line)
;; this should never happen here but we ignore silently
@ -267,7 +301,7 @@ requirements."
(in-require (require-directive results line))))))
(define (in-replace results)
(let ((line (read-line port)))
(let ((line (read-line)))
(cond
((eof-object? line)
;; this should never happen here but we ignore silently
@ -306,7 +340,9 @@ requirements."
(($ <results> requirements replaced)
(make-results (alist-cons module-path version requirements) replaced)))))
(let ((results (toplevel (make-results '() '()))))
(let ((results (with-input-from-string content
(lambda _
(toplevel (make-results '() '()))))))
(match results
(($ <results> requirements replaced)
;; At last we remove replaced modules from the requirements list
@ -325,8 +361,10 @@ requirements."
(url-prefix vcs-url-prefix)
(root-regex vcs-root-regex)
(type vcs-type))
(define (make-vcs prefix regexp type)
(%make-vcs prefix (make-regexp regexp) type))
(%make-vcs prefix (make-regexp regexp) type))
(define known-vcs
;; See the following URL for the official Go equivalent:
;; https://github.com/golang/go/blob/846dce9d05f19a1f53465e62a304dea21b99f910/src/cmd/go/internal/vcs/vcs.go#L1026-L1087
@ -387,6 +425,14 @@ hence the need to derive this information."
"/" "-")
"_" "-"))))
(define (strip-.git-suffix/maybe repo-url)
"Strip a repository URL '.git' suffix from REPO-URL if hosted at GitHub."
(match repo-url
((and (? (cut string-prefix? "https://github.com" <>))
(? (cut string-suffix? ".git" <>)))
(string-drop-right repo-url 4))
(_ repo-url)))
(define-record-type <module-meta>
(make-module-meta import-prefix vcs repo-root)
module-meta?
@ -399,21 +445,22 @@ hence the need to derive this information."
because goproxy servers don't currently provide all the information needed to
build a package."
;; <meta name="go-import" content="import-prefix vcs repo-root">
(let* ((port (http-fetch (format #f "https://~a?go-get=1" module-path)))
(let* ((meta-data (http-fetch* (format #f "https://~a?go-get=1" module-path)))
(select (sxpath `(// head (meta (@ (equal? (name "go-import"))))
// content))))
(match (select (call-with-port port html->sxml))
(() #f) ;nothing selected
(match (select (html->sxml meta-data))
(() #f) ;nothing selected
(((content content-text))
(match (string-split content-text #\space)
((root-path vcs repo-url)
(make-module-meta root-path (string->symbol vcs) repo-url)))))))
(make-module-meta root-path (string->symbol vcs)
(strip-.git-suffix/maybe repo-url))))))))
(define (module-meta-data-repo-url meta-data goproxy-url)
(define (module-meta-data-repo-url meta-data goproxy)
"Return the URL where the fetcher which will be used can download the
source."
(if (member (module-meta-vcs meta-data) '(fossil mod))
goproxy-url
goproxy
(module-meta-repo-root meta-data)))
;; XXX: Copied from (guix scripts hash).
@ -466,6 +513,9 @@ control system is being used."
(method git-fetch)
(uri (git-reference
(url ,vcs-repo-url)
;; This is done because the version field of the package,
;; which the generated quoted expression refers to, has been
;; stripped of any 'v' prefixed.
(commit ,(if (and plain-version? v-prefixed?)
'(string-append "v" version)
'(go-version->git-ref version)))))
@ -505,8 +555,8 @@ control system is being used."
(define* (go-module->guix-package module-path #:key
(goproxy-url "https://proxy.golang.org"))
(let* ((latest-version (go-module-latest-version goproxy-url module-path))
(port (fetch-go.mod goproxy-url module-path latest-version))
(dependencies (map car (call-with-port port parse-go.mod)))
(content (fetch-go.mod goproxy-url module-path latest-version))
(dependencies (map car (parse-go.mod content)))
(guix-name (go-module->guix-package-name module-path))
(root-module-path (module-path->repository-root module-path))
;; The VCS type and URL are not included in goproxy information. For
@ -527,14 +577,17 @@ control system is being used."
(build-system go-build-system)
(arguments
'(#:import-path ,root-module-path))
,@(maybe-inputs (map go-module->guix-package-name dependencies))
,@(maybe-propagated-inputs
(map go-module->guix-package-name dependencies))
(home-page ,(format #f "https://~a" root-module-path))
(synopsis ,synopsis)
(description ,description)
(license ,(match (and=> licenses list->licenses)
((license) license)
((licenses ...) `(list ,@licenses))
(x x))))
(description ,(and=> description beautify-description))
(license ,(match (list->licenses licenses)
(() #f) ;unknown license
((license) ;a single license
license)
((license ...) ;a list of licenses
`(list ,@license)))))
dependencies)))
(define go-module->guix-package* (memoize go-module->guix-package))

View File

@ -446,8 +446,8 @@ obtain a node's uniquely identifying \"key\"."
"Return a list of package expressions for PACKAGE-NAME and all its
dependencies, sorted in topological order. For each package,
call (REPO->GUIX-PACKAGE NAME :KEYS version repo), which should return a
package expression and a list of dependencies; call (GUIX-NAME NAME) to
obtain the Guix package name corresponding to the upstream name."
package expression and a list of dependencies; call (GUIX-NAME PACKAGE-NAME)
to obtain the Guix package name corresponding to the upstream name."
(define-record-type <node>
(make-node name version package dependencies)
node?

View File

@ -180,13 +180,9 @@ require github.com/kr/pretty v0.2.1
(define (testing-parse-mod name expected input)
(define (inf? p1 p2)
(string<? (car p1) (car p2)))
(let ((input-port (open-input-string input)))
(test-equal name
(sort expected inf?)
(sort
( (@@ (guix import go) parse-go.mod)
input-port)
inf?))))
(test-equal name
(sort expected inf?)
(sort ((@@ (guix import go) parse-go.mod) input) inf?)))
(testing-parse-mod "parse-go.mod-simple"
'(("good/thing" . "v1.4.5")
@ -249,44 +245,43 @@ require github.com/kr/pretty v0.2.1
(test-equal "go-module->guix-package"
'(package
(name "go-github-com-go-check-check")
(version "0.0.0-20201130134442-10cb98267c6c")
(source
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/go-check/check.git")
(commit (go-version->git-ref version))))
(file-name (git-file-name name version))
(sha256
(base32
"0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5"))))
(build-system go-build-system)
(arguments
(quote (#:import-path "github.com/go-check/check")))
(inputs
(quasiquote (("go-github-com-kr-pretty"
(unquote go-github-com-kr-pretty)))))
(home-page "https://github.com/go-check/check")
(synopsis "Instructions")
(description #f)
(license license:bsd-2))
(name "go-github-com-go-check-check")
(version "0.0.0-20201130134442-10cb98267c6c")
(source
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/go-check/check")
(commit (go-version->git-ref version))))
(file-name (git-file-name name version))
(sha256
(base32
"0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5"))))
(build-system go-build-system)
(arguments
'(#:import-path "github.com/go-check/check"))
(propagated-inputs
`(("go-github-com-kr-pretty" ,go-github-com-kr-pretty)))
(home-page "https://github.com/go-check/check")
(synopsis "Instructions")
(description "Package check is a rich testing extension for Go's testing \
package.")
(license license:bsd-2))
;; Replace network resources with sample data.
(call-with-temporary-directory
(lambda (checkout)
(mock ((web client) http-get
(mock-http-get fixtures-go-check-test))
(mock ((guix http-client) http-fetch
(mock-http-fetch fixtures-go-check-test))
(mock ((guix git) update-cached-checkout
(lambda* (url #:key ref)
;; Return an empty directory and its hash.
(values checkout
(nix-base32-string->bytevector
"0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5")
#f)))
(go-module->guix-package "github.com/go-check/check")))))))
(mock ((guix http-client) http-fetch
(mock-http-fetch fixtures-go-check-test))
(mock ((guix git) update-cached-checkout
(lambda* (url #:key ref)
;; Return an empty directory and its hash.
(values checkout
(nix-base32-string->bytevector
"0sjjj9z1dhilhpc8pq4154czrb79z9cm044jvn75kxcjv6v5l2m5")
#f)))
(go-module->guix-package "github.com/go-check/check")))))))
(test-end "go")