diff options
author | Maxim Cournoyer <maxim.cournoyer@gmail.com> | 2019-03-28 00:26:01 -0400 |
---|---|---|
committer | Maxim Cournoyer <maxim.cournoyer@gmail.com> | 2019-07-02 10:07:59 +0900 |
commit | 803fb336d62ea65990e263ce58d8552e04c9c038 (patch) | |
tree | 302306159a166490b7837135ba766960d5be5490 /guix | |
parent | c4797121beea74ae93e3ce17677b9e72b8df920d (diff) |
import: pypi: Improve parsing of requirement specifications.
The previous solution was fragile and could leave unwanted characters in a
requirement name, such as '[' or ']'.
Partially fixes <https://bugs.gnu.org/33047>.
* guix/import/pypi.scm (use-modules): Export SPECIFICATION->REQUIREMENT-NAME
(%requirement-name-regexp): New variable.
(clean-requirement): Rename to...
(specification->requirement-name): this, which now uses
%requirement-name-regexp to select the requirement name from the requirement
specification.
(parse-requires.txt): Adapt.
Diffstat (limited to 'guix')
-rw-r--r-- | guix/import/pypi.scm | 54 |
1 files changed, 40 insertions, 14 deletions
diff --git a/guix/import/pypi.scm b/guix/import/pypi.scm index d9db876222..6a881bda12 100644 --- a/guix/import/pypi.scm +++ b/guix/import/pypi.scm @@ -48,6 +48,7 @@ #:use-module ((guix licenses) #:prefix license:) #:use-module (guix build-system python) #:export (parse-requires.txt + specification->requirement-name guix-package->pypi-name pypi-recursive-import pypi->guix-package @@ -118,22 +119,47 @@ package definition." ((package-inputs ...) `((propagated-inputs (,'quasiquote ,package-inputs)))))) -(define (clean-requirement s) - ;; Given a requirement LINE, as can be found in a setuptools requires.txt - ;; file, remove everything other than the actual name of the required - ;; package, and return it. - (cond - ((string-index s (char-set #\space #\> #\= #\<)) => (cut string-take s <>)) - (else s))) +(define %requirement-name-regexp + ;; Regexp to match the requirement name in a requirement specification. + + ;; Some grammar, taken from PEP-0508 (see: + ;; https://www.python.org/dev/peps/pep-0508/). + + ;; Using this grammar makes the PEP-0508 regexp easier to understand for + ;; humans. The use of a regexp is preferred to more primitive string + ;; manipulations because we can more directly match what upstream uses + ;; (again, per PEP-0508). The regexp approach is also easier to extend, + ;; should we want to implement more completely the grammar of PEP-0508. + + ;; The unified rule can be expressed as: + ;; specification = wsp* ( url_req | name_req ) wsp* + + ;; where url_req is: + ;; url_req = name wsp* extras? wsp* urlspec wsp+ quoted_marker? + + ;; and where name_req is: + ;; name_req = name wsp* extras? wsp* versionspec? wsp* quoted_marker? + + ;; Thus, we need only matching NAME, which is expressed as: + ;; identifer_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit) + ;; identifier = letterOrDigit identifier_end* + ;; name = identifier + (let* ((letter-or-digit "[A-Za-z0-9]") + (identifier-end (string-append "(" letter-or-digit "|" + "[-_.]*" letter-or-digit ")")) + (identifier (string-append "^" letter-or-digit identifier-end "*")) + (name identifier)) + (make-regexp name))) + +(define (specification->requirement-name spec) + "Given a specification SPEC, return the requirement name." + (match:substring + (or (regexp-exec %requirement-name-regexp spec) + (error (G_ "Could not extract requirement name in spec:") spec)))) (define (parse-requires.txt requires.txt) "Given REQUIRES.TXT, a Setuptools requires.txt file, return a list of requirement names." - ;; This is a very incomplete parser, whose job is to select the non-optional - ;; dependencies and strip them out of any version information. - ;; Alternatively, we could implement a PEG parser with the (ice-9 peg) - ;; library and the requirements grammar defined by PEP-0508 - ;; (https://www.python.org/dev/peps/pep-0508/). (define (comment? line) ;; Return #t if the given LINE is a comment, #f otherwise. @@ -156,7 +182,7 @@ requirement names." ((or (string-null? line) (comment? line)) (loop result)) (else - (loop (cons (clean-requirement line) + (loop (cons (specification->requirement-name line) result)))))))))) (define (guess-requirements source-url wheel-url tarball) @@ -198,7 +224,7 @@ cannot determine package dependencies")) (hash-ref (list-ref run_requires 0) "requires") '()))) - (map clean-requirement requirements))))) + (map specification->requirement-name requirements))))) (lambda () (delete-file json-file) (rmdir dirname)))))) |