diff options
Diffstat (limited to 'guix/import/go.scm')
-rw-r--r-- | guix/import/go.scm | 547 |
1 files changed, 547 insertions, 0 deletions
diff --git a/guix/import/go.scm b/guix/import/go.scm new file mode 100644 index 0000000000..7452b4c903 --- /dev/null +++ b/guix/import/go.scm @@ -0,0 +1,547 @@ +;;; GNU Guix --- Functional package management for GNU +;;; Copyright © 2020 Katherine Cox-Buday <cox.katherine.e@gmail.com> +;;; Copyright © 2020 Helio Machado <0x2b3bfa0+guix@googlemail.com> +;;; Copyright © 2021 François Joulaud <francois.joulaud@radiofrance.com> +;;; Copyright © 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com> +;;; Copyright © 2021 Ludovic Courtès <ludo@gnu.org> +;;; +;;; This file is part of GNU Guix. +;;; +;;; GNU Guix is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; GNU Guix is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. + +(define-module (guix import go) + #:use-module (guix build-system go) + #:use-module (guix git) + #:use-module (guix i18n) + #:use-module (guix diagnostics) + #:use-module (guix import utils) + #:use-module (guix import json) + #:use-module (guix packages) + #:use-module ((guix utils) #:select (string-replace-substring)) + #:use-module (guix http-client) + #:use-module ((guix licenses) #:prefix license:) + #:use-module (guix memoization) + #:autoload (htmlprag) (html->sxml) ;from Guile-Lib + #:autoload (guix git) (update-cached-checkout) + #:autoload (gcrypt hash) (open-hash-port hash-algorithm sha256) + #:autoload (guix serialization) (write-file) + #:autoload (guix base32) (bytevector->nix-base32-string) + #:autoload (guix build utils) (mkdir-p) + #:use-module (ice-9 match) + #:use-module (ice-9 rdelim) + #:use-module (ice-9 receive) + #:use-module (ice-9 regex) + #:use-module ((rnrs io ports) #:select (call-with-port)) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-9) + #:use-module (srfi srfi-11) + #:use-module (srfi srfi-26) + #:use-module (sxml xpath) + #:use-module (web client) + #:use-module (web response) + #:use-module (web uri) + + #:export (go-path-escape + go-module->guix-package + go-module-recursive-import)) + +;;; Commentary: +;;; +;;; (guix import go) attempts to make it easier to create Guix package +;;; declarations for Go modules. +;;; +;;; Modules in Go are a "collection of related Go packages" which are "the +;;; unit of source code interchange and versioning". Modules are generally +;;; hosted in a repository. +;;; +;;; At this point it should handle correctly modules which have only Go +;;; dependencies and are accessible from proxy.golang.org (or configured via +;;; GOPROXY). +;;; +;;; We want it to work more or less this way: +;;; - get latest version for the module from GOPROXY +;;; - infer VCS root repo from which we will check-out source by +;;; + recognising known patterns (like github.com) +;;; + or recognizing .vcs suffix +;;; + or parsing meta tag in HTML served at the URL +;;; + or (TODO) if nothing else works by using zip file served by GOPROXY +;;; - get go.mod from GOPROXY (which is able to synthetize one if needed) +;;; - extract list of dependencies from this go.mod +;;; +;;; The Go module paths are translated to a Guix package name under the +;;; assumption that there will be no collision. + +;;; TODO list +;;; - get correct hash in vcs->origin +;;; - print partial result during recursive imports (need to catch +;;; exceptions) + +;;; Code: + +(define (go-path-escape path) + "Escape a module path by replacing every uppercase letter with an +exclamation mark followed with its lowercase equivalent, as per the module +Escaped Paths specification (see: +https://godoc.org/golang.org/x/mod/module#hdr-Escaped_Paths)." + (define (escape occurrence) + (string-append "!" (string-downcase (match:substring occurrence)))) + (regexp-substitute/global #f "[A-Z]" path 'pre escape 'post)) + +(define (go-module-latest-version goproxy-url module-path) + "Fetch the version number of the latest version for MODULE-PATH from the +given GOPROXY-URL server." + (assoc-ref (json-fetch (format #f "~a/~a/@latest" goproxy-url + (go-path-escape module-path))) + "Version")) + + +(define (go-package-licenses name) + "Retrieve the list of licenses that apply to NAME, a Go package or module +name (e.g. \"github.com/golang/protobuf/proto\"). The data is scraped from +the https://pkg.go.dev/ web site." + (let*-values (((url) (string-append "https://pkg.go.dev/" name + "?tab=licenses")) + ((response body) (http-get url)) + ;; Extract the text contained in a h2 child node of any + ;; element marked with a "License" class attribute. + ((select) (sxpath `(// (* (@ (equal? (class "License")))) + h2 // *text*)))) + (and (eq? (response-code response) 200) + (match (select (html->sxml body)) + (() #f) ;nothing selected + (licenses licenses))))) + +(define (go.pkg.dev-info name) + (http-get (string-append "https://pkg.go.dev/" name))) +(define go.pkg.dev-info* + (memoize go.pkg.dev-info)) + +(define (go-package-description name) + "Retrieve a short description for NAME, a Go package name, +e.g. \"google.golang.org/protobuf/proto\". The data is scraped from the +https://pkg.go.dev/ web site." + (let*-values (((response body) (go.pkg.dev-info* name)) + ;; Extract the text contained in a h2 child node of any + ;; element marked with a "License" class attribute. + ((select) (sxpath + `(// (section + (@ (equal? (class "Documentation-overview")))) + (p 1))))) + (and (eq? (response-code response) 200) + (match (select (html->sxml body)) + (() #f) ;nothing selected + (((p . strings)) + ;; The paragraph text is returned as a list of strings embedding + ;; newline characters. Join them and strip the newline + ;; characters. + (string-delete #\newline (string-join strings))))))) + +(define (go-package-synopsis module-name) + "Retrieve a short synopsis for a Go module named MODULE-NAME, +e.g. \"google.golang.org/protobuf\". The data is scraped from +the https://pkg.go.dev/ web site." + ;; Note: Only the *module* (rather than package) page has the README title + ;; used as a synopsis on the https://pkg.go.dev web site. + (let*-values (((response body) (go.pkg.dev-info* module-name)) + ;; Extract the text contained in a h2 child node of any + ;; element marked with a "License" class attribute. + ((select) (sxpath + `(// (div (@ (equal? (class "UnitReadme-content")))) + // h3 *text*)))) + (and (eq? (response-code response) 200) + (match (select (html->sxml body)) + (() #f) ;nothing selected + ((title more ...) ;title is the first string of the list + (string-trim-both title)))))) + +(define (list->licenses licenses) + "Given a list of LICENSES mostly following the SPDX conventions, return the +corresponding Guix license or 'unknown-license!" + (filter-map (lambda (license) + (and (not (string-null? license)) + (not (any (cut string=? <> license) + '("AND" "OR" "WITH"))) + ;; Adjust the license names scraped from + ;; https://pkg.go.dev to an equivalent SPDX identifier, + ;; if they differ (see: https://github.com/golang/pkgsite + ;; /internal/licenses/licenses.go#L174). + (or (spdx-string->license + (match license + ("BlueOak-1.0" "BlueOak-1.0.0") + ("BSD-0-Clause" "0BSD") + ("BSD-2-Clause" "BSD-2-Clause-FreeBSD") + ("GPL2" "GPL-2.0") + ("GPL3" "GPL-3.0") + ("NIST" "NIST-PD") + (_ license))) + 'unknown-license!))) + licenses)) + +(define (fetch-go.mod goproxy-url module-path version) + "Fetches go.mod from the given GOPROXY-URL server for the given MODULE-PATH +and VERSION." + (let ((url (format #f "~a/~a/@v/~a.mod" goproxy-url + (go-path-escape module-path) + (go-path-escape version)))) + (http-fetch url))) + +(define %go.mod-require-directive-rx + ;; A line in a require directive is composed of a module path and + ;; a version separated by whitespace and an optionnal '//' comment at + ;; the end. + (make-regexp + (string-append + "^[[:blank:]]*" + "([^[:blank:]]+)[[:blank:]]+([^[:blank:]]+)" + "([[:blank:]]+//.*)?"))) + +(define %go.mod-replace-directive-rx + ;; ReplaceSpec = ModulePath [ Version ] "=>" FilePath newline + ;; | ModulePath [ Version ] "=>" ModulePath Version newline . + (make-regexp + (string-append + "([^[:blank:]]+)([[:blank:]]+([^[:blank:]]+))?" + "[[:blank:]]+" "=>" "[[:blank:]]+" + "([^[:blank:]]+)([[:blank:]]+([^[:blank:]]+))?"))) + +(define (parse-go.mod port) + "Parse the go.mod file accessible via the input PORT, returning a list of +requirements." + (define-record-type <results> + (make-results requirements replacements) + results? + (requirements results-requirements) + (replacements results-replacements)) + ;; We parse only a subset of https://golang.org/ref/mod#go-mod-file-grammar + ;; which we think necessary for our use case. + (define (toplevel results) + "Main parser, RESULTS is a pair of alist serving as accumulator for + all encountered requirements and replacements." + (let ((line (read-line port))) + (cond + ((eof-object? line) + ;; parsing ended, give back the result + results) + ((string=? line "require (") + ;; a require block begins, delegate parsing to IN-REQUIRE + (in-require results)) + ((string=? line "replace (") + ;; a replace block begins, delegate parsing to IN-REPLACE + (in-replace results)) + ((string-prefix? "require " line) + ;; a standalone require directive + (let* ((stripped-line (string-drop line 8)) + (new-results (require-directive results stripped-line))) + (toplevel new-results))) + ((string-prefix? "replace " line) + ;; a standalone replace directive + (let* ((stripped-line (string-drop line 8)) + (new-results (replace-directive results stripped-line))) + (toplevel new-results))) + (#t + ;; unrecognised line, ignore silently + (toplevel results))))) + + (define (in-require results) + (let ((line (read-line port))) + (cond + ((eof-object? line) + ;; this should never happen here but we ignore silently + results) + ((string=? line ")") + ;; end of block, coming back to toplevel + (toplevel results)) + (#t + (in-require (require-directive results line)))))) + + (define (in-replace results) + (let ((line (read-line port))) + (cond + ((eof-object? line) + ;; this should never happen here but we ignore silently + results) + ((string=? line ")") + ;; end of block, coming back to toplevel + (toplevel results)) + (#t + (in-replace (replace-directive results line)))))) + + (define (replace-directive results line) + "Extract replaced modules and new requirements from replace directive + in LINE and add to RESULTS." + (match results + (($ <results> requirements replaced) + (let* ((rx-match (regexp-exec %go.mod-replace-directive-rx line)) + (module-path (match:substring rx-match 1)) + (version (match:substring rx-match 3)) + (new-module-path (match:substring rx-match 4)) + (new-version (match:substring rx-match 6)) + (new-replaced (alist-cons module-path version replaced)) + (new-requirements + (if (string-match "^\\.?\\./" new-module-path) + requirements + (alist-cons new-module-path new-version requirements)))) + (make-results new-requirements new-replaced))))) + (define (require-directive results line) + "Extract requirement from LINE and add it to RESULTS." + (let* ((rx-match (regexp-exec %go.mod-require-directive-rx line)) + (module-path (match:substring rx-match 1)) + ;; we saw double-quoted string in the wild without escape + ;; sequences so we just trim the quotes + (module-path (string-trim-both module-path #\")) + (version (match:substring rx-match 2))) + (match results + (($ <results> requirements replaced) + (make-results (alist-cons module-path version requirements) replaced))))) + + (let ((results (toplevel (make-results '() '())))) + (match results + (($ <results> requirements replaced) + ;; At last we remove replaced modules from the requirements list + (fold + (lambda (replacedelem requirements) + (alist-delete! (car replacedelem) requirements)) + requirements + replaced))))) + +;; Prevent inlining of this procedure, which is accessed by unit tests. +(set! parse-go.mod parse-go.mod) + +(define-record-type <vcs> + (%make-vcs url-prefix root-regex type) + vcs? + (url-prefix vcs-url-prefix) + (root-regex vcs-root-regex) + (type vcs-type)) +(define (make-vcs prefix regexp type) + (%make-vcs prefix (make-regexp regexp) type)) +(define known-vcs + ;; See the following URL for the official Go equivalent: + ;; https://github.com/golang/go/blob/846dce9d05f19a1f53465e62a304dea21b99f910/src/cmd/go/internal/vcs/vcs.go#L1026-L1087 + (list + (make-vcs + "github.com" + "^(github\\.com/[A-Za-z0-9_.\\-]+/[A-Za-z0-9_.\\-]+)(/[A-Za-z0-9_.\\-]+)*$" + 'git) + (make-vcs + "bitbucket.org" + "^(bitbucket\\.org/([A-Za-z0-9_.\\-]+/[A-Za-z0-9_.\\-]+))(/[A-Za-z0-9_.\\-]+)*$" + 'unknown) + (make-vcs + "hub.jazz.net/git/" + "^(hub\\.jazz\\.net/git/[a-z0-9]+/[A-Za-z0-9_.\\-]+)(/[A-Za-z0-9_.\\-]+)*$" + 'git) + (make-vcs + "git.apache.org" + "^(git\\.apache\\.org/[a-z0-9_.\\-]+\\.git)(/[A-Za-z0-9_.\\-]+)*$" + 'git) + (make-vcs + "git.openstack.org" + "^(git\\.openstack\\.org/[A-Za-z0-9_.\\-]+/[A-Za-z0-9_.\\-]+)(\\.git)?\ +(/[A-Za-z0-9_.\\-]+)*$" + 'git))) + +(define (module-path->repository-root module-path) + "Infer the repository root from a module path. Go modules can be +defined at any level of a repository tree, but querying for the meta tag +usually can only be done from the web page at the root of the repository, +hence the need to derive this information." + + ;; For reference, see: https://golang.org/ref/mod#vcs-find. + (define vcs-qualifiers '(".bzr" ".fossil" ".git" ".hg" ".svn")) + + (define (vcs-qualified-module-path->root-repo-url module-path) + (let* ((vcs-qualifiers-group (string-join vcs-qualifiers "|")) + (pattern (format #f "^(.*(~a))(/|$)" vcs-qualifiers-group)) + (m (string-match pattern module-path))) + (and=> m (cut match:substring <> 1)))) + + (or (and=> (find (lambda (vcs) + (string-prefix? (vcs-url-prefix vcs) module-path)) + known-vcs) + (lambda (vcs) + (match:substring (regexp-exec (vcs-root-regex vcs) + module-path) 1))) + (vcs-qualified-module-path->root-repo-url module-path) + module-path)) + +(define (go-module->guix-package-name module-path) + "Converts a module's path to the canonical Guix format for Go packages." + (string-downcase (string-append "go-" (string-replace-substring + (string-replace-substring + module-path + "." "-") + "/" "-")))) + +(define-record-type <module-meta> + (make-module-meta import-prefix vcs repo-root) + module-meta? + (import-prefix module-meta-import-prefix) + (vcs module-meta-vcs) ;a symbol + (repo-root module-meta-repo-root)) + +(define (fetch-module-meta-data module-path) + "Retrieve the module meta-data from its landing page. This is necessary +because goproxy servers don't currently provide all the information needed to +build a package." + ;; <meta name="go-import" content="import-prefix vcs repo-root"> + (let* ((port (http-fetch (format #f "https://~a?go-get=1" module-path))) + (select (sxpath `(// head (meta (@ (equal? (name "go-import")))) + // content)))) + (match (select (call-with-port port html->sxml)) + (() #f) ;nothing selected + (((content content-text)) + (match (string-split content-text #\space) + ((root-path vcs repo-url) + (make-module-meta root-path (string->symbol vcs) repo-url))))))) + +(define (module-meta-data-repo-url meta-data goproxy-url) + "Return the URL where the fetcher which will be used can download the +source." + (if (member (module-meta-vcs meta-data) '(fossil mod)) + goproxy-url + (module-meta-repo-root meta-data))) + +;; XXX: Copied from (guix scripts hash). +(define (vcs-file? file stat) + (case (stat:type stat) + ((directory) + (member (basename file) '(".bzr" ".git" ".hg" ".svn" "CVS"))) + ((regular) + ;; Git sub-modules have a '.git' file that is a regular text file. + (string=? (basename file) ".git")) + (else + #f))) + +;; XXX: Adapted from 'file-hash' in (guix scripts hash). +(define* (file-hash file #:optional (algorithm (hash-algorithm sha256))) + ;; Compute the hash of FILE. + (let-values (((port get-hash) (open-hash-port algorithm))) + (write-file file port #:select? (negate vcs-file?)) + (force-output port) + (get-hash))) + +(define* (git-checkout-hash url reference algorithm) + "Return the ALGORITHM hash of the checkout of URL at REFERENCE, a commit or +tag." + (define cache + (string-append (or (getenv "TMPDIR") "/tmp") + "/guix-import-go-" + (passwd:name (getpwuid (getuid))))) + + ;; Use a custom cache to avoid cluttering the default one under + ;; ~/.cache/guix, but choose one under /tmp so that it's persistent across + ;; subsequent "guix import" invocations. + (mkdir-p cache) + (chmod cache #o700) + (let-values (((checkout commit _) + (parameterize ((%repository-cache-directory cache)) + (update-cached-checkout url + #:ref + `(tag-or-commit . ,reference))))) + (file-hash checkout algorithm))) + +(define (vcs->origin vcs-type vcs-repo-url version) + "Generate the `origin' block of a package depending on what type of source +control system is being used." + (case vcs-type + ((git) + (let ((plain-version? (string=? version (go-version->git-ref version))) + (v-prefixed? (string-prefix? "v" version))) + `(origin + (method git-fetch) + (uri (git-reference + (url ,vcs-repo-url) + (commit ,(if (and plain-version? v-prefixed?) + '(string-append "v" version) + '(go-version->git-ref version))))) + (file-name (git-file-name name version)) + (sha256 + (base32 + ,(bytevector->nix-base32-string + (git-checkout-hash vcs-repo-url (go-version->git-ref version) + (hash-algorithm sha256)))))))) + ((hg) + `(origin + (method hg-fetch) + (uri (hg-reference + (url ,vcs-repo-url) + (changeset ,version))) + (file-name (string-append name "-" version "-checkout")) + (sha256 + (base32 + ;; FIXME: populate hash for hg repo checkout + "0000000000000000000000000000000000000000000000000000")))) + ((svn) + `(origin + (method svn-fetch) + (uri (svn-reference + (url ,vcs-repo-url) + (revision (string->number version)))) + (file-name (string-append name "-" version "-checkout")) + (sha256 + (base32 + ;; FIXME: populate hash for svn repo checkout + "0000000000000000000000000000000000000000000000000000")))) + (else + (raise + (formatted-message (G_ "unsupported vcs type '~a' for package '~a'") + vcs-type vcs-repo-url))))) + +(define* (go-module->guix-package module-path #:key + (goproxy-url "https://proxy.golang.org")) + (let* ((latest-version (go-module-latest-version goproxy-url module-path)) + (port (fetch-go.mod goproxy-url module-path latest-version)) + (dependencies (map car (call-with-port port parse-go.mod))) + (guix-name (go-module->guix-package-name module-path)) + (root-module-path (module-path->repository-root module-path)) + ;; The VCS type and URL are not included in goproxy information. For + ;; this we need to fetch it from the official module page. + (meta-data (fetch-module-meta-data root-module-path)) + (vcs-type (module-meta-vcs meta-data)) + (vcs-repo-url (module-meta-data-repo-url meta-data goproxy-url)) + (synopsis (go-package-synopsis root-module-path)) + (description (go-package-description module-path)) + (licenses (go-package-licenses module-path))) + (values + `(package + (name ,guix-name) + ;; Elide the "v" prefix Go uses + (version ,(string-trim latest-version #\v)) + (source + ,(vcs->origin vcs-type vcs-repo-url latest-version)) + (build-system go-build-system) + (arguments + '(#:import-path ,root-module-path)) + ,@(maybe-inputs (map go-module->guix-package-name dependencies)) + (home-page ,(format #f "https://~a" root-module-path)) + (synopsis ,synopsis) + (description ,description) + (license ,(match (and=> licenses list->licenses) + ((license) license) + ((licenses ...) `(list ,@licenses)) + (x x)))) + dependencies))) + +(define go-module->guix-package* (memoize go-module->guix-package)) + +(define* (go-module-recursive-import package-name + #:key (goproxy-url "https://proxy.golang.org")) + (recursive-import + package-name + #:repo->guix-package (lambda* (name . _) + (go-module->guix-package* + name + #:goproxy-url goproxy-url)) + #:guix-name go-module->guix-package-name)) |