summaryrefslogtreecommitdiff
path: root/guix/import
diff options
context:
space:
mode:
authorRicardo Wurmus <ricardo.wurmus@mdc-berlin.de>2015-12-03 16:12:09 +0100
committerRicardo Wurmus <ricardo.wurmus@mdc-berlin.de>2015-12-11 15:35:07 +0100
commit0f6b9e9828dfc269bfc4eade771efed1753e8c62 (patch)
tree90bd8ab26d32ff8c22bbf0a089690705f0486724 /guix/import
parentb6a222757bfebdbf3b907b39f1c3b42967aaa915 (diff)
import: cran: Parse DESCRIPTION instead of HTML.
* guix/import/cran.scm (description->alist, listify, beautify-description, description->package): New procedures. (table-datum, downloads->url, nodes->text, cran-sxml->sexp): Remove proceduces. (latest-release): Use parsed DESCRIPTION instead of SXML. * tests/cran.scm: Rewrite to match importer.
Diffstat (limited to 'guix/import')
-rw-r--r--guix/import/cran.scm265
1 files changed, 130 insertions, 135 deletions
diff --git a/guix/import/cran.scm b/guix/import/cran.scm
index 43dc2c80b6..845ecb5832 100644
--- a/guix/import/cran.scm
+++ b/guix/import/cran.scm
@@ -20,26 +20,26 @@
(define-module (guix import cran)
#:use-module (ice-9 match)
#:use-module (ice-9 regex)
+ #:use-module ((ice-9 rdelim) #:select (read-string))
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
- #:use-module (sxml simple)
- #:use-module (sxml match)
- #:use-module (sxml xpath)
#:use-module (guix http-client)
#:use-module (guix hash)
#:use-module (guix store)
#:use-module (guix base32)
#:use-module ((guix download) #:select (download-to-store))
#:use-module (guix import utils)
+ #:use-module ((guix build-system r) #:select (cran-uri))
#:use-module (guix upstream)
#:use-module (guix packages)
+ #:use-module (gnu packages)
#:export (cran->guix-package
%cran-updater))
;;; Commentary:
;;;
;;; Generate a package declaration template for the latest version of an R
-;;; package on CRAN, using the HTML description downloaded from
+;;; package on CRAN, using the DESCRIPTION file downloaded from
;;; cran.r-project.org.
;;;
;;; Code:
@@ -67,6 +67,31 @@
((lst ...) `(list ,@(map string->license lst)))
(_ #f)))
+
+(define (description->alist description)
+ "Convert a DESCRIPTION string into an alist."
+ (let ((lines (string-split description #\newline))
+ (parse (lambda (line acc)
+ (if (string-null? line) acc
+ ;; Keys usually start with a capital letter and end with
+ ;; ":". There are some exceptions, unfortunately (such
+ ;; as "biocViews"). There are no blanks in a key.
+ (if (string-match "^[A-Za-z][^ :]+:( |\n|$)" line)
+ ;; New key/value pair
+ (let* ((pos (string-index line #\:))
+ (key (string-take line pos))
+ (value (string-drop line (+ 1 pos))))
+ (cons (cons key
+ (string-trim-both value))
+ acc))
+ ;; This is a continuation of the previous pair
+ (match-let ((((key . value) . rest) acc))
+ (cons (cons key (string-join
+ (list value
+ (string-trim-both line))))
+ rest)))))))
+ (fold parse '() lines)))
+
(define (format-inputs names)
"Generate a sorted list of package inputs from a list of package NAMES."
(map (lambda (name)
@@ -82,125 +107,94 @@ package definition."
((package-inputs ...)
`((,type (,'quasiquote ,(format-inputs package-inputs)))))))
-(define (table-datum tree label)
- "Extract the datum node following a LABEL in the sxml table TREE. Only the
-first cell of a table row is considered a label cell."
- ((node-pos 1)
- ((sxpath `(xhtml:tr
- (xhtml:td 1) ; only first cell can contain label
- (equal? ,label)
- ,(node-parent tree) ; go up to label cell
- ,(node-parent tree) ; go up to matching row
- (xhtml:td 2))) ; select second cell
- tree)))
-
(define %cran-url "http://cran.r-project.org/web/packages/")
(define (cran-fetch name)
- "Return an sxml representation of the CRAN page for the R package NAME,
-or #f on failure. NAME is case-sensitive."
+ "Return an alist of the contents of the DESCRIPTION file for the R package
+NAME, or #f on failure. NAME is case-sensitive."
;; This API always returns the latest release of the module.
- (let ((cran-url (string-append %cran-url name "/")))
- (false-if-exception
- (xml->sxml (http-fetch cran-url)
- #:trim-whitespace? #t
- #:namespaces '((xhtml . "http://www.w3.org/1999/xhtml"))
- #:default-entity-handler
- (lambda (port name)
- (case name
- ((nbsp) " ")
- ((ge) ">=")
- ((gt) ">")
- ((lt) "<")
- (else
- (format (current-warning-port)
- "~a:~a:~a: undefined entitity: ~a\n"
- cran-url (port-line port) (port-column port)
- name)
- (symbol->string name))))))))
-
-(define (downloads->url downloads)
- "Extract from DOWNLOADS, the downloads item of the CRAN sxml tree, the
-download URL."
- (string-append "mirror://cran/"
- ;; Remove double dots, because we want an
- ;; absolute path.
- (regexp-substitute/global
- #f "\\.\\./"
- (string-join ((sxpath '((xhtml:a 1) @ href *text*))
- (table-datum downloads " Package source: ")))
- 'pre 'post)))
-
-(define (nodes->text nodeset)
- "Return the concatenation of the text nodes among NODESET."
- (string-join ((sxpath '(// *text*)) nodeset) " "))
-
-(define (cran-sxml->sexp sxml)
- "Return the `package' s-expression for a CRAN package from the SXML
-representation of the package page."
+ (let ((url (string-append %cran-url name "/DESCRIPTION")))
+ (description->alist (read-string (http-fetch url)))))
+
+(define (listify meta field)
+ "Look up FIELD in the alist META. If FIELD contains a comma-separated
+string, turn it into a list and strip off parenthetic expressions. Return the
+empty list when the FIELD cannot be found."
+ (let ((value (assoc-ref meta field)))
+ (if (not value)
+ '()
+ ;; Strip off parentheses
+ (let ((items (string-split (regexp-substitute/global
+ #f "( *\\([^\\)]+\\)) *"
+ value 'pre 'post)
+ #\,)))
+ ;; When there is whitespace inside of items it is probably because
+ ;; this was not an actual list to begin with.
+ (remove (cut string-any char-set:whitespace <>)
+ (map string-trim-both items))))))
+
+(define (beautify-description description)
+ "Improve the package DESCRIPTION by turning a beginning sentence fragment
+into a proper sentence and by using two spaces between sentences."
+ (let ((cleaned (if (string-prefix? "A " description)
+ (string-append "This package provides a"
+ (substring description 1))
+ description)))
+ ;; Use double spacing between sentences
+ (regexp-substitute/global #f "\\. \\b"
+ cleaned 'pre ". " 'post)))
+
+(define (description->package meta)
+ "Return the `package' s-expression for a CRAN package from the alist META,
+which was derived from the R package's DESCRIPTION file."
(define (guix-name name)
(if (string-prefix? "r-" name)
(string-downcase name)
(string-append "r-" (string-downcase name))))
- (sxml-match-let*
- (((*TOP* (xhtml:html
- ,head
- (xhtml:body
- (xhtml:h2 ,name-and-synopsis)
- (xhtml:p ,description)
- ,summary
- (xhtml:h4 "Downloads:") ,downloads
- . ,rest)))
- sxml))
- (let* ((name (match:prefix (string-match ": " name-and-synopsis)))
- (synopsis (match:suffix (string-match ": " name-and-synopsis)))
- (version (nodes->text (table-datum summary "Version:")))
- (license ((compose string->license nodes->text)
- (table-datum summary "License:")))
- (home-page (nodes->text ((sxpath '((xhtml:a 1)))
- (table-datum summary "URL:"))))
- (source-url (downloads->url downloads))
- (tarball (with-store store (download-to-store store source-url)))
- (sysdepends (map match:substring
- (list-matches
- "[^ ]+"
- ;; Strip off comma and parenthetical
- ;; expressions.
- (regexp-substitute/global
- #f "(,|\\([^\\)]+\\))"
- (nodes->text (table-datum summary
- "SystemRequirements:"))
- 'pre 'post))))
- (imports (map guix-name
- ((sxpath '(// xhtml:a *text*))
- (table-datum summary "Imports:")))))
- `(package
- (name ,(guix-name name))
- (version ,version)
- (source (origin
- (method url-fetch)
- (uri (cran-uri ,name version))
- (sha256
- (base32
- ,(bytevector->nix-base32-string (file-sha256 tarball))))))
- (build-system r-build-system)
- ,@(maybe-inputs sysdepends)
- ,@(maybe-inputs imports 'propagated-inputs)
- (home-page ,(if (string-null? home-page)
- (string-append %cran-url name)
- home-page))
- (synopsis ,synopsis)
- ;; Use double spacing
- (description ,(regexp-substitute/global #f "\\. \\b" description
- 'pre ". " 'post))
- (license ,license)))))
+ (let* ((name (assoc-ref meta "Package"))
+ (synopsis (assoc-ref meta "Title"))
+ (version (assoc-ref meta "Version"))
+ (license (string->license (assoc-ref meta "License")))
+ ;; Some packages have multiple home pages. Some have none.
+ (home-page (match (listify meta "URL")
+ ((url rest ...) url)
+ (_ (string-append %cran-url name))))
+ (source-url (match (cran-uri name version)
+ ((url rest ...) url)
+ (_ #f)))
+ (tarball (with-store store (download-to-store store source-url)))
+ (sysdepends (map string-downcase (listify meta "SystemRequirements")))
+ (propagate (map guix-name (lset-union equal?
+ (listify meta "Imports")
+ (listify meta "LinkingTo")
+ (delete "R"
+ (listify meta "Depends"))))))
+ `(package
+ (name ,(guix-name name))
+ (version ,version)
+ (source (origin
+ (method url-fetch)
+ (uri (cran-uri ,name version))
+ (sha256
+ (base32
+ ,(bytevector->nix-base32-string (file-sha256 tarball))))))
+ (properties ,`(,'quasiquote ((,'upstream-name . ,name))))
+ (build-system r-build-system)
+ ,@(maybe-inputs sysdepends)
+ ,@(maybe-inputs propagate 'propagated-inputs)
+ (home-page ,(if (string-null? home-page)
+ (string-append %cran-url name)
+ home-page))
+ (synopsis ,synopsis)
+ (description ,(beautify-description (assoc-ref meta "Description")))
+ (license ,license))))
(define (cran->guix-package package-name)
"Fetch the metadata for PACKAGE-NAME from cran.r-project.org, and return the
`package' s-expression corresponding to that package, or #f on failure."
(let ((module-meta (cran-fetch package-name)))
- (and=> module-meta cran-sxml->sexp)))
+ (and=> module-meta description->package)))
;;;
@@ -209,32 +203,33 @@ representation of the package page."
(define (latest-release package)
"Return an <upstream-source> for the latest release of PACKAGE."
- (define name
- (if (string-prefix? "r-" package)
- (string-drop package 2)
- package))
-
- (define sxml
- (cran-fetch name))
-
- (and sxml
- (sxml-match-let*
- (((*TOP* (xhtml:html
- ,head
- (xhtml:body
- (xhtml:h2 ,name-and-synopsis)
- (xhtml:p ,description)
- ,summary
- (xhtml:h4 "Downloads:") ,downloads
- . ,rest)))
- sxml))
- (let ((version (nodes->text (table-datum summary "Version:")))
- (url (downloads->url downloads)))
- ;; CRAN does not provide signatures.
- (upstream-source
- (package package)
- (version version)
- (urls (list url)))))))
+
+ (define (package->cran-name package)
+ (match (package-source package)
+ ((? origin? origin)
+ (match (origin-uri origin)
+ ((url rest ...)
+ (let ((end (string-rindex url #\_))
+ (start (string-rindex url #\/)))
+ ;; The URL ends on
+ ;; (string-append "/" name "_" version ".tar.gz")
+ (substring url start end)))
+ (_ #f)))
+ (_ #f)))
+
+ (define cran-name
+ (package->cran-name (specification->package package)))
+
+ (define meta
+ (cran-fetch cran-name))
+
+ (and meta
+ (let ((version (assoc-ref meta "Version")))
+ ;; CRAN does not provide signatures.
+ (upstream-source
+ (package package)
+ (version version)
+ (urls (cran-uri cran-name version))))))
(define (cran-package? package)
"Return true if PACKAGE is an R package from CRAN."