summaryrefslogtreecommitdiff
path: root/gnu/packages/bioinformatics.scm
diff options
context:
space:
mode:
authorMarius Bakke <mbakke@fastmail.com>2019-03-14 15:06:06 +0100
committerMarius Bakke <mbakke@fastmail.com>2019-03-14 15:06:06 +0100
commitebfe259f6682b43d7f0d3b57b525e25f97410052 (patch)
treee3f09eb6159168e1736f8a629286eb3d148917a9 /gnu/packages/bioinformatics.scm
parent47b3eed821cf86c350dc4b0fdbe75647cdc275b0 (diff)
parent8cc3983a4d02a15ad4a863671c1a5a8b2b542625 (diff)
Merge branch 'master' into staging
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r--gnu/packages/bioinformatics.scm160
1 files changed, 121 insertions, 39 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 26fcc3a41a..9b0a4314b9 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -1752,6 +1752,72 @@ high-throughput sequencing data – with an emphasis on simplicity.")
(define-public python2-plastid
(package-with-python2 python-plastid))
+(define-public tetoolkit
+ (package
+ (name "tetoolkit")
+ (version "2.0.3")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/mhammell-laboratory/tetoolkit.git")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:python ,python-2 ; not guaranteed to work with Python 3
+ #:phases
+ (modify-phases %standard-phases
+ (add-after 'unpack 'make-writable
+ (lambda _
+ (for-each make-file-writable (find-files "."))
+ #t))
+ (add-after 'unpack 'patch-invocations
+ (lambda* (#:key inputs #:allow-other-keys)
+ (substitute* '("bin/TEtranscripts"
+ "bin/TEcount")
+ (("'sort ")
+ (string-append "'" (which "sort") " "))
+ (("'rm -f ")
+ (string-append "'" (which "rm") " -f "))
+ (("'Rscript'") (string-append "'" (which "Rscript") "'")))
+ (substitute* "TEToolkit/IO/ReadInputs.py"
+ (("BamToBED") (which "bamToBed")))
+ (substitute* "TEToolkit/Normalization.py"
+ (("\"Rscript\"")
+ (string-append "\"" (which "Rscript") "\"")))
+ #t))
+ (add-after 'install 'wrap-program
+ (lambda* (#:key outputs #:allow-other-keys)
+ ;; Make sure the executables find R packages.
+ (let ((out (assoc-ref outputs "out")))
+ (for-each
+ (lambda (script)
+ (wrap-program (string-append out "/bin/" script)
+ `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
+ '("TEtranscripts"
+ "TEcount")))
+ #t)))))
+ (inputs
+ `(("coreutils" ,coreutils)
+ ("bedtools" ,bedtools)
+ ("python-argparse" ,python2-argparse)
+ ("python-pysam" ,python2-pysam)
+ ("r-minimal" ,r-minimal)
+ ("r-deseq2" ,r-deseq2)))
+ (home-page "https://github.com/mhammell-laboratory/tetoolkit")
+ (synopsis "Transposable elements in differential enrichment analysis")
+ (description
+ "This is package for including transposable elements in differential
+enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
+RNA-seq (and similar data) and annotates reads to both genes and transposable
+elements. TEtranscripts then performs differential analysis using DESeq2.
+Note that TEtranscripts and TEcount rely on specially curated GTF files, which
+are not included due to their size.")
+ (license license:gpl3+)))
+
(define-public cd-hit
(package
(name "cd-hit")
@@ -10293,35 +10359,6 @@ quality controls, normalization, visualization, and further analysis are also
provided.")
(license license:artistic2.0)))
-(define-public r-qvalue
- (package
- (name "r-qvalue")
- (version "2.14.1")
- (source
- (origin
- (method url-fetch)
- (uri (bioconductor-uri "qvalue" version))
- (sha256
- (base32
- "0kxavzm1j2mk26qicmjm90nxx4w5h3dxighzks7wzihay3k8cysc"))))
- (build-system r-build-system)
- (propagated-inputs
- `(("r-ggplot2" ,r-ggplot2)
- ("r-reshape2" ,r-reshape2)))
- (home-page "http://github.com/jdstorey/qvalue")
- (synopsis "Q-value estimation for false discovery rate control")
- (description
- "This package takes a list of p-values resulting from the simultaneous
-testing of many hypotheses and estimates their q-values and local @dfn{false
-discovery rate} (FDR) values. The q-value of a test measures the proportion
-of false positives incurred when that particular test is called significant.
-The local FDR measures the posterior probability the null hypothesis is true
-given the test's p-value. Various plots are automatically generated, allowing
-one to make sensible significance cut-offs. The software can be applied to
-problems in genomics, brain imaging, astrophysics, and data mining.")
- ;; Any version of the LGPL.
- (license license:lgpl3+)))
-
(define-public r-hdf5array
(package
(name "r-hdf5array")
@@ -11908,21 +11945,35 @@ variational inference.")
(define-public python-loompy
(package
(name "python-loompy")
- (version "2.0.2")
- (source
- (origin
- (method url-fetch)
- (uri (pypi-uri "loompy" version))
- (sha256
- (base32
- "1drgv8j1hxqzzpnfg272x9djb6j8qr798w1pc2x8ikmfgyd9gh51"))))
+ (version "2.0.17")
+ ;; The tarball on Pypi does not include the tests.
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/linnarsson-lab/loompy.git")
+ (commit version)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32
+ "12a5kjgiikapv93wahfw0frszx1lblnppyz3vs5gy8fgmgngra07"))))
(build-system python-build-system)
- ;; There are no tests
- (arguments '(#:tests? #f))
+ (arguments
+ `(#:phases
+ (modify-phases %standard-phases
+ (replace 'check
+ (lambda _
+ (setenv "PYTHONPATH"
+ (string-append (getcwd) ":"
+ (getenv "PYTHONPATH")))
+ (invoke "pytest" "tests")
+ #t)))))
(propagated-inputs
`(("python-h5py" ,python-h5py)
("python-numpy" ,python-numpy)
+ ("python-pandas" ,python-pandas)
("python-scipy" ,python-scipy)))
+ (native-inputs
+ `(("python-pytest" ,python-pytest)))
(home-page "https://github.com/linnarsson-lab/loompy")
(synopsis "Work with .loom files for single-cell RNA-seq data")
(description "The loom file format is an efficient format for very large
@@ -14158,3 +14209,34 @@ short read sequences, removes errors then produces high quality unique
contigs. It then uses paired read information, if available, to retrieve the
repeated areas between contigs.")
(license license:gpl2+)))
+
+(define-public python-velocyto
+ (package
+ (name "python-velocyto")
+ (version "0.17.17")
+ (source
+ (origin
+ (method url-fetch)
+ (uri (pypi-uri "velocyto" version))
+ (sha256
+ (base32
+ "0fgygyzqgrq32dv6a00biq1p1cwi6kbl5iqblxq1kklj6b2mzmhs"))))
+ (build-system python-build-system)
+ (propagated-inputs
+ `(("python-click" ,python-click)
+ ("python-cython" ,python-cython)
+ ("python-h5py" ,python-h5py)
+ ("python-loompy" ,python-loompy)
+ ("python-matplotlib" ,python-matplotlib)
+ ("python-numba" ,python-numba)
+ ("python-numpy" ,python-numpy)
+ ("python-pandas" ,python-pandas)
+ ("python-pysam" ,python-pysam)
+ ("python-scikit-learn" ,python-scikit-learn)
+ ("python-scipy" ,python-scipy)))
+ (home-page "https://github.com/velocyto-team/velocyto.py")
+ (synopsis "RNA velocity analysis for single cell RNA-seq data")
+ (description
+ "Velocyto is a library for the analysis of RNA velocity. Velocyto
+includes a command line tool and an analysis pipeline.")
+ (license license:bsd-2)))