summaryrefslogtreecommitdiff
path: root/gnu
diff options
context:
space:
mode:
authorTroy Figiel <troy@troyfigiel.com>2024-01-29 19:17:14 +0100
committerSharlatan Hellseher <sharlatanus@gmail.com>2024-01-29 21:40:59 +0000
commitcde0adaacdcfb401ff249a715c0fbfd20d64bbfd (patch)
treeffe773bda37f3016b0a4f8d2db00ff795ddd28f9 /gnu
parent343f4f1d70ea01bc3f6382546e1db5c907c6af01 (diff)
gnu: Add python-pyjanitor.
* gnu/packages/python-science.scm (python-pyjanitor): New variable. Signed-off-by: Sharlatan Hellseher <sharlatanus@gmail.com>
Diffstat (limited to 'gnu')
-rw-r--r--gnu/packages/python-science.scm57
1 files changed, 57 insertions, 0 deletions
diff --git a/gnu/packages/python-science.scm b/gnu/packages/python-science.scm
index d29f583a32..2affbd3fa5 100644
--- a/gnu/packages/python-science.scm
+++ b/gnu/packages/python-science.scm
@@ -47,6 +47,7 @@
#:use-module (gnu packages boost)
#:use-module (gnu packages build-tools)
#:use-module (gnu packages check)
+ #:use-module (gnu packages chemistry)
#:use-module (gnu packages cpp)
#:use-module (gnu packages crypto)
#:use-module (gnu packages databases)
@@ -840,6 +841,62 @@ production-critical data pipelines or reproducible research settings. With
@end itemize")
(license license:expat)))
+(define-public python-pyjanitor
+ (package
+ (name "python-pyjanitor")
+ (version "0.26.0")
+ (source
+ (origin
+ ;; The build requires the mkdocs directory for the description in
+ ;; setup.py. This is not included in the PyPI tarball.
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/pyjanitor-devs/pyjanitor")
+ (commit (string-append "v" version))))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "1f8xbl1k9l2z56bapp7v6bd3016zrk48igcaz6hb553r6yfl7vfx"))))
+ (build-system pyproject-build-system)
+ ;; Pyjanitor has an extensive test suite. For quick debugging, the tests
+ ;; marked turtle can be skipped using "-m" "not turtle".
+ (arguments
+ (list
+ #:test-flags '(list
+ "-n" (number->string (parallel-job-count))
+ ;; Tries to connect to the internet.
+ "-k" "not test_is_connected"
+ ;; PySpark has not been packaged yet.
+ "--ignore=tests/spark/functions/test_clean_names_spark.py"
+ "--ignore=tests/spark/functions/test_update_where_spark.py")
+ #:phases #~(modify-phases %standard-phases
+ (add-before 'check 'set-env-ci
+ (lambda _
+ ;; Some tests are skipped if the JANITOR_CI_MACHINE
+ ;; variable is not set.
+ (setenv "JANITOR_CI_MACHINE" "1"))))))
+ (propagated-inputs (list python-multipledispatch
+ python-natsort
+ python-pandas-flavor
+ python-scipy
+ ;; Optional imports.
+ python-biopython ;biology submodule
+ python-unyt)) ;engineering submodule
+ (native-inputs (list python-pytest
+ python-pytest-xdist
+ ;; Optional imports. We do not propagate them due to
+ ;; their size.
+ python-numba ;speedup of joins
+ rdkit)) ;chemistry submodule
+ (home-page "https://github.com/pyjanitor-devs/pyjanitor")
+ (synopsis "Tools for cleaning and transforming pandas DataFrames")
+ (description
+ "@code{pyjanitor} provides a set of data cleaning routines for
+@code{pandas} DataFrames. These routines extend the method chaining API
+defined by @code{pandas} for a subset of its methods. Originally, this
+package was a port of the R package by the same name and it is inspired by the
+ease-of-use and expressiveness of the @code{dplyr} package.")
+ (license license:expat)))
+
(define-public python-pythran
(package
(name "python-pythran")