diff options
Diffstat (limited to 'gnu/packages/python-science.scm')
-rw-r--r-- | gnu/packages/python-science.scm | 210 |
1 files changed, 178 insertions, 32 deletions
diff --git a/gnu/packages/python-science.scm b/gnu/packages/python-science.scm index 6fce8d2319..738b504e3b 100644 --- a/gnu/packages/python-science.scm +++ b/gnu/packages/python-science.scm @@ -35,6 +35,7 @@ #:use-module (gnu packages) #:use-module (gnu packages base) #:use-module (gnu packages check) + #:use-module (gnu packages databases) #:use-module (gnu packages gcc) #:use-module (gnu packages image-processing) #:use-module (gnu packages machine-learning) @@ -44,6 +45,7 @@ #:use-module (gnu packages pkg-config) #:use-module (gnu packages python) #:use-module (gnu packages python-build) + #:use-module (gnu packages python-crypto) #:use-module (gnu packages python-check) #:use-module (gnu packages python-web) #:use-module (gnu packages python-xyz) @@ -327,49 +329,55 @@ of the SGP4 satellite tracking algorithm.") (define-public python-pandas (package (name "python-pandas") - (version "1.0.5") + (version "1.3.0") (source (origin (method url-fetch) (uri (pypi-uri "pandas" version)) (sha256 - (base32 "1a2gv3g6jr6vb5ca43fkwjl5xf86wpfz8y3zcy787adjl0hdkib9")))) + (base32 "1qi2cv450m05dwccx3p1s373k5b4ncvwi74plnms2pidrz4ycm65")))) (build-system python-build-system) (arguments `(#:modules ((guix build utils) (guix build python-build-system) (ice-9 ftw) + (srfi srfi-1) (srfi srfi-26)) - #:phases (modify-phases %standard-phases - (add-after 'unpack 'patch-which - (lambda* (#:key inputs #:allow-other-keys) - (let ((which (assoc-ref inputs "which"))) - (substitute* "pandas/io/clipboard/__init__.py" - (("^WHICH_CMD = .*") - (string-append "WHICH_CMD = \"" which "\"\n")))) - #t)) - (add-before 'check 'prepare-x - (lambda _ - (system "Xvfb &") - (setenv "DISPLAY" ":0") - ;; xsel needs to write a log file. - (setenv "HOME" "/tmp") - #t)) - (replace 'check - (lambda _ - (let ((build-directory - (string-append - (getcwd) "/build/" - (car (scandir "build" - (cut string-prefix? "lib." <>)))))) - ;; Disable the "strict data files" option which causes - ;; the build to error out if required data files are - ;; not available (as is the case with PyPI archives). - (substitute* "setup.cfg" - (("addopts = --strict-data-files") "addopts = ")) - (with-directory-excursion build-directory - (invoke "pytest" "-vv" "pandas" "--skip-slow" - "--skip-network")))))))) + #:phases + (modify-phases %standard-phases + (add-after 'unpack 'patch-which + (lambda* (#:key inputs #:allow-other-keys) + (let ((which (assoc-ref inputs "which"))) + (substitute* "pandas/io/clipboard/__init__.py" + (("^WHICH_CMD = .*") + (string-append "WHICH_CMD = \"" which "\"\n")))))) + (add-before 'check 'prepare-x + (lambda _ + (system "Xvfb &") + (setenv "DISPLAY" ":0") + ;; xsel needs to write a log file. + (setenv "HOME" "/tmp"))) + (replace 'check + (lambda _ + (let ((build-directory + (string-append + (getcwd) "/build/" + (first (scandir "build" + (cut string-prefix? "lib." <>)))))) + (with-directory-excursion build-directory + (invoke "pytest" "-vv" "pandas" "--skip-slow" + "--skip-network" + "-k" + ;; These tets access the internet: + ;; pandas/tests/io/xml/test_xml.py::test_wrong_url[lxml] + ;; pandas/tests/io/xml/test_xml.py::test_wrong_url[etree] + ;; TODO: the excel tests fail for unknown reasons + (string-append "not test_wrong_url" + " and not test_excelwriter_fspath" + " and not test_ExcelWriter_dispatch" + ;; TODO: Missing input + " and not TestS3" + " and not s3"))))))))) (propagated-inputs `(("python-jinja2" ,python-jinja2) ("python-numpy" ,python-numpy) @@ -835,3 +843,141 @@ and more @end itemize") (license license:gpl3))) +(define-public python-distributed + (package + (name "python-distributed") + (version "2021.07.1") + (source + (origin + ;; The test files are not included in the archive on pypi + (method git-fetch) + (uri (git-reference + (url "https://github.com/dask/distributed") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0i55zf3k55sqjxnwlzsyj3h3v1588fn54ng4mj3dfiqzh3nlj0dg")))) + (build-system python-build-system) + (arguments + '(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'fix-references + (lambda* (#:key outputs #:allow-other-keys) + (substitute* '("distributed/comm/tests/test_ucx_config.py" + "distributed/tests/test_client.py" + "distributed/tests/test_queues.py" + "distributed/tests/test_variable.py" + "distributed/cli/tests/test_tls_cli.py" + "distributed/cli/tests/test_dask_spec.py" + "distributed/cli/tests/test_dask_worker.py" + "distributed/cli/tests/test_dask_scheduler.py") + (("\"dask-scheduler\"") + (format #false "\"~a/bin/dask-scheduler\"" + (assoc-ref outputs "out"))) + (("\"dask-worker\"") + (format #false "\"~a/bin/dask-worker\"" + (assoc-ref outputs "out")))))) + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (setenv "DISABLE_IPV6" "1") + (invoke "pytest" "-vv" "distributed" + "-m" "not slow and not gpu and not ipython and not avoid_ci" + "-k" + ;; TODO: These tests fail for unknown reasons: + ;; Assertion error. + (string-append + "not test_version_option" + ;; "The 'distributed' distribution was not found" + " and not test_register_backend_entrypoint" + ;; "AttributeError: module 'distributed.dashboard' has no attribute 'scheduler'" + " and not test_get_client_functions_spawn_clusters")))))))) + (propagated-inputs + `(("python-click" ,python-click) + ("python-cloudpickle" ,python-cloudpickle) + ("python-cryptography" ,python-cryptography) + ("python-dask" ,python-dask) + ("python-msgpack" ,python-msgpack) + ("python-psutil" ,python-psutil) + ("python-pyyaml" ,python-pyyaml) + ("python-setuptools" ,python-setuptools) + ("python-sortedcontainers" ,python-sortedcontainers) + ("python-tblib" ,python-tblib) + ("python-toolz" ,python-toolz) + ("python-tornado" ,python-tornado-6) + ("python-zict" ,python-zict))) + (native-inputs + `(("python-pytest" ,python-pytest))) + (home-page "https://distributed.dask.org") + (synopsis "Distributed scheduler for Dask") + (description "Dask.distributed is a lightweight library for distributed +computing in Python. It extends both the @code{concurrent.futures} and +@code{dask} APIs to moderate sized clusters.") + (license license:bsd-3))) + +(define-public python-modin + (package + (name "python-modin") + (version "0.10.1") + (source + (origin + ;; The archive on pypi does not include all required files. + (method git-fetch) + (uri (git-reference + (url "https://github.com/modin-project/modin") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "128ghfb9ncmnn8km409xjcdppvn9nr9jqw8rkvsfavh7wnwlk509")))) + (build-system python-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'make-files-writable + (lambda _ + (for-each make-file-writable (find-files ".")))) + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (setenv "MODIN_ENGINE" "dask") + (invoke "python" "-m" "pytest" + "modin/pandas/test/test_concat.py") + (setenv "MODIN_ENGINE" "python") + (invoke "python" "-m" "pytest" + "modin/pandas/test/test_concat.py"))))))) + (propagated-inputs + `(("python-cloudpickle" ,python-cloudpickle) + ("python-dask" ,python-dask) + ("python-distributed" ,python-distributed) + ("python-numpy" ,python-numpy) + ("python-packaging" ,python-packaging) + ("python-pandas" ,python-pandas))) + (native-inputs + `(("python-coverage" ,python-coverage) + ("python-jinja2" ,python-jinja2) + ("python-lxml" ,python-lxml) + ("python-matplotlib" ,python-matplotlib) + ("python-msgpack" ,python-msgpack) + ("python-openpyxl" ,python-openpyxl) + ("python-psutil" ,python-psutil) + ("python-pyarrow" ,python-pyarrow) + ("python-pytest" ,python-pytest) + ("python-pytest-benchmark" ,python-pytest-benchmark) + ("python-pytest-cov" ,python-pytest-cov) + ("python-pytest-xdist" ,python-pytest-xdist) + ("python-scipy" ,python-scipy) + ("python-sqlalchemy" ,python-sqlalchemy) + ("python-tables" ,python-tables) + ("python-tqdm" ,python-tqdm) + ("python-xarray" ,python-xarray) + ("python-xlrd" ,python-xlrd))) + (home-page "https://github.com/modin-project/modin") + (synopsis "Make your pandas code run faster") + (description + "Modin uses Ray or Dask to provide an effortless way to speed up your +pandas notebooks, scripts, and libraries. Unlike other distributed DataFrame +libraries, Modin provides seamless integration and compatibility with existing +pandas code.") + (license license:asl2.0))) |