diff options
author | Maxim Cournoyer <maxim.cournoyer@gmail.com> | 2021-04-08 15:47:00 -0400 |
---|---|---|
committer | Maxim Cournoyer <maxim.cournoyer@gmail.com> | 2021-04-08 15:47:00 -0400 |
commit | 533a893cc6b03f100566760d6e0c8e0500ed7082 (patch) | |
tree | 0ecbf89895a400c43d16bb1f6539f1a88aeaa4cf /gnu/packages/bioinformatics.scm | |
parent | 6a2546f92d07df04e5d700924edd027ed1e2ef11 (diff) | |
parent | c762df54786fd6f005f3b5307323f1d2df3cbf0b (diff) |
Merge branch 'master' into staging
Conflicts:
.guix-authorizations
gnu/ci.scm
gnu/packages/base.scm
gnu/packages/databases.scm
gnu/packages/gnome.scm
gnu/packages/gtk.scm
gnu/packages/imagemagick.scm
gnu/packages/password-utils.scm
gnu/packages/python-xyz.scm
gnu/packages/web-browsers.scm
Diffstat (limited to 'gnu/packages/bioinformatics.scm')
-rw-r--r-- | gnu/packages/bioinformatics.scm | 1546 |
1 files changed, 523 insertions, 1023 deletions
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm index 6f835b3875..908916ebf3 100644 --- a/gnu/packages/bioinformatics.scm +++ b/gnu/packages/bioinformatics.scm @@ -19,6 +19,8 @@ ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net> ;;; Copyright © 2020 Pierre Langlois <pierre.langlois@gmx.com> ;;; Copyright © 2020 Bonface Munyoki Kilyungi <bonfacemunyoki@gmail.com> +;;; Copyright © 2021 Tim Howes <timhowes@lavabit.com> +;;; Copyright © 2021 Hong Li <hli@mdc-berlin.de> ;;; ;;; This file is part of GNU Guix. ;;; @@ -51,6 +53,7 @@ #:use-module (guix build-system ocaml) #:use-module (guix build-system perl) #:use-module (guix build-system python) + #:use-module (guix build-system qt) #:use-module (guix build-system r) #:use-module (guix build-system ruby) #:use-module (guix build-system scons) @@ -124,6 +127,7 @@ #:use-module (gnu packages python-science) #:use-module (gnu packages python-web) #:use-module (gnu packages python-xyz) + #:use-module (gnu packages qt) #:use-module (gnu packages rdf) #:use-module (gnu packages readline) #:use-module (gnu packages ruby) @@ -2741,8 +2745,7 @@ trees (phylogenies) and characters.") (define-public python2-dendropy (let ((base (package-with-python2 python-dendropy))) - (package - (inherit base) + (package/inherit base (arguments `(#:phases (modify-phases %standard-phases @@ -3816,15 +3819,17 @@ particular, reads spanning multiple exons.") (define-public hisat2 (package (name "hisat2") - (version "2.0.5") + (version "2.2.1") (source (origin - (method url-fetch) - (uri (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2" - "/downloads/hisat2-" version "-source.zip")) + (method git-fetch) + (uri (git-reference + (url "https://github.com/DaehwanKimLab/hisat2/") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) (sha256 (base32 - "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g")))) + "0lmzdhzjkvxw7n5w40pbv5fgzd4cz0f9pxczswn3d4cr0k10k754")))) (build-system gnu-build-system) (arguments `(#:tests? #f ; no check target @@ -3837,9 +3842,12 @@ particular, reads spanning multiple exons.") (add-after 'unpack 'make-deterministic (lambda _ (substitute* "Makefile" - (("`date`") "0")) - #t)) + (("`date`") "0")))) (delete 'configure) + (add-before 'build 'build-manual + (lambda _ + (mkdir-p "doc") + (invoke "make" "doc"))) (replace 'install (lambda* (#:key outputs #:allow-other-keys) (let* ((out (assoc-ref outputs "out")) @@ -3850,13 +3858,13 @@ particular, reads spanning multiple exons.") (find-files "." "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")) (mkdir-p doc) - (install-file "doc/manual.inc.html" doc)) - #t))))) + (install-file "doc/manual.inc.html" doc))))))) (native-inputs - `(("unzip" ,unzip) ; needed for archive from ftp - ("perl" ,perl) + `(("perl" ,perl) ("pandoc" ,pandoc))) ; for documentation - (home-page "https://ccb.jhu.edu/software/hisat2/index.shtml") + (inputs + `(("python" ,python-wrapper))) + (home-page "https://daehwankimlab.github.io/hisat2/") (synopsis "Graph-based alignment of genomic sequencing reads") (description "HISAT2 is a fast and sensitive alignment program for mapping next-generation sequencing reads (both DNA and RNA) to a population of human @@ -3874,7 +3882,7 @@ indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).") (define-public hmmer (package (name "hmmer") - (version "3.2.1") + (version "3.3.2") (source (origin (method url-fetch) @@ -3882,9 +3890,10 @@ indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).") "http://eddylab.org/software/hmmer/hmmer-" version ".tar.gz")) (sha256 (base32 - "171bivy6xhgjsz5nv53n81pc3frnwz29ylblawk2bv46szwjjqd5")))) + "0s9wf6n0qanbx8qs6igfl3vyjikwbrvh4d9d6mv54yp3xysykzlj")))) (build-system gnu-build-system) - (native-inputs `(("perl" ,perl))) + (native-inputs `(("perl" ,perl) + ("python" ,python))) ; for tests (home-page "http://hmmer.org/") (synopsis "Biosequence analysis using profile hidden Markov models") (description @@ -6281,7 +6290,7 @@ subsequent visualization, annotation and storage of results.") (define-public plink-ng (package (inherit plink) (name "plink-ng") - (version "1.90b4") + (version "2.00a2.3") (source (origin (method git-fetch) @@ -6290,29 +6299,26 @@ subsequent visualization, annotation and storage of results.") (commit (string-append "v" version)))) (file-name (git-file-name name version)) (sha256 - (base32 "02npdwgkpfkdnhw819rhj5kw02a5k5m90b14zq9zzya4hyg929c0")))) + (base32 "1p88lz9agzjlspjhciz61qjc36cfniv4nkxszyy0njqyc5rzc0cd")))) (build-system gnu-build-system) (arguments - '(#:tests? #f ;no "check" target + `(#:tests? #f ;no "check" target #:make-flags (list "BLASFLAGS=-llapack -lopenblas" "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1" "ZLIB=-lz" - "-f" "Makefile.std") + "BIN=plink prettify" + (string-append "CC=" ,(cc-for-target)) + (string-append "PREFIX=" (assoc-ref %outputs "out")) + "DESTDIR=") #:phases (modify-phases %standard-phases (add-after 'unpack 'chdir (lambda _ (chdir "1.9") #t)) - (delete 'configure) ; no "configure" script - (replace 'install - (lambda* (#:key outputs #:allow-other-keys) - (let ((bin (string-append (assoc-ref outputs "out") - "/bin/"))) - (install-file "plink" bin) - #t)))))) + (delete 'configure)))) ; no "configure" script (inputs - `(("zlib" ,zlib) - ("lapack" ,lapack) - ("openblas" ,openblas))) + `(("lapack" ,lapack) + ("openblas" ,openblas) + ("zlib" ,zlib))) (home-page "https://www.cog-genomics.org/plink/") (license license:gpl3+))) @@ -6606,7 +6612,7 @@ writing files into the .sra format.") `(("source" ,source) ("tar" ,tar) ("xz" ,xz))) - (home-page "http://www.seqan.de") + (home-page "https://www.seqan.de") (synopsis "Library for nucleotide sequence analysis") (description "SeqAn is a C++ library of efficient algorithms and data structures for @@ -6622,7 +6628,7 @@ bioinformatics file formats, sequence alignment, and more.") (version "1.4.2") (source (origin (method url-fetch) - (uri (string-append "http://packages.seqan.de/seqan-library/" + (uri (string-append "https://packages.seqan.de/seqan-library/" "seqan-library-" version ".tar.bz2")) (sha256 (base32 @@ -6804,7 +6810,7 @@ application of SortMeRNA is filtering rRNA from metatranscriptomic data.") (define-public star (package (name "star") - (version "2.7.3a") + (version "2.7.8a") (source (origin (method git-fetch) (uri (git-reference @@ -6813,7 +6819,7 @@ application of SortMeRNA is filtering rRNA from metatranscriptomic data.") (file-name (git-file-name name version)) (sha256 (base32 - "1hgiqw5qhs0pc1xazzihcfd92na02xyq2kb469z04y1v51kpvvjq")) + "0zc5biymja9zml9yizcj1h68fq9c6sxfcav8a0lbgvgsm44rvans")) (modules '((guix build utils))) (snippet '(begin @@ -6889,6 +6895,32 @@ sequences.") ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed. (license license:gpl3+))) +(define-public star-for-pigx + (package + (inherit star) + (name "star") + (version "2.7.3a") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/alexdobin/STAR") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1hgiqw5qhs0pc1xazzihcfd92na02xyq2kb469z04y1v51kpvvjq")) + (modules '((guix build utils))) + (snippet + '(begin + (substitute* "source/Makefile" + (("/bin/rm") "rm")) + ;; Remove pre-built binaries and bundled htslib sources. + (delete-file-recursively "bin/MacOSX_x86_64") + (delete-file-recursively "bin/Linux_x86_64") + (delete-file-recursively "bin/Linux_x86_64_static") + (delete-file-recursively "source/htslib") + #t)))))) + (define-public starlong (package (inherit star) (name "starlong") @@ -7128,14 +7160,14 @@ data in the form of VCF files.") (define-public infernal (package (name "infernal") - (version "1.1.3") + (version "1.1.4") (source (origin (method url-fetch) (uri (string-append "http://eddylab.org/software/infernal/" "infernal-" version ".tar.gz")) (sha256 (base32 - "0pm8bm3s6nfa0av4x6m6h27lsg12b3lz3jm0fyh1mc77l2isd61v")))) + "1z4mgwqg1j4n5ika08ai8mg9yjyjhf4821jp83v2bgwzxrykqjgr")))) (build-system gnu-build-system) (native-inputs `(("perl" ,perl) @@ -7203,6 +7235,55 @@ clustering analysis, differential analysis, motif inference and exploration of single cell ATAC-seq sequencing data.") (license license:gpl3))) +(define-public r-archr + (let ((commit "46b519ffb6f73edf132497ac31650d19ef055dc1") + (revision "1")) + (package + (name "r-archr") + (version (git-version "1.0.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/GreenleafLab/ArchR") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1zj3sdfhgn2q2256fmz61a92vw1wylyck632d7842d6knd0v92v8")))) + (properties `((upstream-name . "ArchR"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biocgenerics" ,r-biocgenerics) + ("r-biostrings" ,r-biostrings) + ("r-chromvar" ,r-chromvar) + ("r-complexheatmap" ,r-complexheatmap) + ("r-data-table" ,r-data-table) + ("r-genomicranges" ,r-genomicranges) + ("r-ggplot2" ,r-ggplot2) + ("r-ggrepel" ,r-ggrepel) + ("r-gridextra" ,r-gridextra) + ("r-gtable" ,r-gtable) + ("r-gtools" ,r-gtools) + ("r-magrittr" ,r-magrittr) + ("r-matrix" ,r-matrix) + ("r-matrixstats" ,r-matrixstats) + ("r-motifmatchr" ,r-motifmatchr) + ("r-nabor" ,r-nabor) + ("r-plyr" ,r-plyr) + ("r-rcpp" ,r-rcpp) + ("r-rhdf5" ,r-rhdf5) + ("r-rsamtools" ,r-rsamtools) + ("r-s4vectors" ,r-s4vectors) + ("r-stringr" ,r-stringr) + ("r-summarizedexperiment" ,r-summarizedexperiment) + ("r-uwot" ,r-uwot))) + (home-page "https://github.com/GreenleafLab/ArchR") + (synopsis "Analyze single-cell regulatory chromatin in R") + (description + "This package is designed to streamline scATAC analyses in R.") + (license license:gpl2+)))) + (define-public r-scde (package (name "r-scde") @@ -7308,387 +7389,6 @@ includes software to ") (license license:cc0)))) -(define-public r-genefilter - (package - (name "r-genefilter") - (version "1.72.1") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "genefilter" version)) - (sha256 - (base32 - "1c6h3qnjvphs977qhv5vafvsb108r0q7xhaayly6qv6adqfn94rn")))) - (build-system r-build-system) - (native-inputs - `(("gfortran" ,gfortran) - ("r-knitr" ,r-knitr))) - (propagated-inputs - `(("r-annotate" ,r-annotate) - ("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-survival" ,r-survival))) - (home-page "https://bioconductor.org/packages/genefilter") - (synopsis "Filter genes from high-throughput experiments") - (description - "This package provides basic functions for filtering genes from -high-throughput sequencing experiments.") - (license license:artistic2.0))) - -(define-public r-deseq2 - (package - (name "r-deseq2") - (version "1.30.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "DESeq2" version)) - (sha256 - (base32 - "0q2f9cywrcmp1p7ii8f45g4dk4hsnjflq3yqhsxgnpv9fw338qpp")))) - (properties `((upstream-name . "DESeq2"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-biocparallel" ,r-biocparallel) - ("r-genefilter" ,r-genefilter) - ("r-geneplotter" ,r-geneplotter) - ("r-genomicranges" ,r-genomicranges) - ("r-ggplot2" ,r-ggplot2) - ("r-iranges" ,r-iranges) - ("r-locfit" ,r-locfit) - ("r-rcpp" ,r-rcpp) - ("r-rcpparmadillo" ,r-rcpparmadillo) - ("r-s4vectors" ,r-s4vectors) - ("r-summarizedexperiment" ,r-summarizedexperiment))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/DESeq2") - (synopsis "Differential gene expression analysis") - (description - "This package provides functions to estimate variance-mean dependence in -count data from high-throughput nucleotide sequencing assays and test for -differential expression based on a model using the negative binomial -distribution.") - (license license:lgpl3+))) - -(define-public r-dexseq - (package - (name "r-dexseq") - (version "1.36.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "DEXSeq" version)) - (sha256 - (base32 - "0wfjb42xcr4wjy8a654b74411dky8hp6sp8xdwf0sxqgsxy106qi")))) - (properties `((upstream-name . "DEXSeq"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-biocparallel" ,r-biocparallel) - ("r-biomart" ,r-biomart) - ("r-deseq2" ,r-deseq2) - ("r-genefilter" ,r-genefilter) - ("r-geneplotter" ,r-geneplotter) - ("r-genomicranges" ,r-genomicranges) - ("r-hwriter" ,r-hwriter) - ("r-iranges" ,r-iranges) - ("r-rcolorbrewer" ,r-rcolorbrewer) - ("r-rsamtools" ,r-rsamtools) - ("r-s4vectors" ,r-s4vectors) - ("r-statmod" ,r-statmod) - ("r-stringr" ,r-stringr) - ("r-summarizedexperiment" ,r-summarizedexperiment))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/DEXSeq") - (synopsis "Inference of differential exon usage in RNA-Seq") - (description - "This package is focused on finding differential exon usage using RNA-seq -exon counts between samples with different experimental designs. It provides -functions that allows the user to make the necessary statistical tests based -on a model that uses the negative binomial distribution to estimate the -variance between biological replicates and generalized linear models for -testing. The package also provides functions for the visualization and -exploration of the results.") - (license license:gpl3+))) - -(define-public r-annotationforge - (package - (name "r-annotationforge") - (version "1.32.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "AnnotationForge" version)) - (sha256 - (base32 - "0y3820dkvwz09wlmz9drx6gqpsr9cwppaiz40zafwfxbz65y8px7")))) - (properties - `((upstream-name . "AnnotationForge"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-dbi" ,r-dbi) - ("r-rcurl" ,r-rcurl) - ("r-rsqlite" ,r-rsqlite) - ("r-s4vectors" ,r-s4vectors) - ("r-xml" ,r-xml))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/AnnotationForge") - (synopsis "Code for building annotation database packages") - (description - "This package provides code for generating Annotation packages and their -databases. Packages produced are intended to be used with AnnotationDbi.") - (license license:artistic2.0))) - -(define-public r-rbgl - (package - (name "r-rbgl") - (version "1.66.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "RBGL" version)) - (sha256 - (base32 - "016vyzgixb3gjpzi21rbs6ngnnqcxr77krwjjf1ldnzzj8vqrqsz")))) - (properties `((upstream-name . "RBGL"))) - (build-system r-build-system) - (propagated-inputs - `(("r-bh" ,r-bh) - ("r-graph" ,r-graph))) - (home-page "https://www.bioconductor.org/packages/RBGL") - (synopsis "Interface to the Boost graph library") - (description - "This package provides a fairly extensive and comprehensive interface to -the graph algorithms contained in the Boost library.") - (license license:artistic2.0))) - -(define-public r-gseabase - (package - (name "r-gseabase") - (version "1.52.1") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "GSEABase" version)) - (sha256 - (base32 - "0dawh1kjmf6921jm77j2s2phrq5237pjc4sdh8fkln89gf48zx6i")))) - (properties `((upstream-name . "GSEABase"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotate" ,r-annotate) - ("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-graph" ,r-graph) - ("r-xml" ,r-xml))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/GSEABase") - (synopsis "Gene set enrichment data structures and methods") - (description - "This package provides classes and methods to support @dfn{Gene Set -Enrichment Analysis} (GSEA).") - (license license:artistic2.0))) - -(define-public r-category - (package - (name "r-category") - (version "2.56.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "Category" version)) - (sha256 - (base32 - "0m77wpnica0h2ia9ajdaiga4plgz1s9wls6pdnxzk7kwl8a68wkr")))) - (properties `((upstream-name . "Category"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotate" ,r-annotate) - ("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-genefilter" ,r-genefilter) - ("r-graph" ,r-graph) - ("r-gseabase" ,r-gseabase) - ("r-matrix" ,r-matrix) - ("r-rbgl" ,r-rbgl) - ("r-dbi" ,r-dbi))) - (home-page "https://bioconductor.org/packages/Category") - (synopsis "Category analysis") - (description - "This package provides a collection of tools for performing category -analysis.") - (license license:artistic2.0))) - -(define-public r-gostats - (package - (name "r-gostats") - (version "2.56.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "GOstats" version)) - (sha256 - (base32 - "18q8p0fv9fl2r6zjxknfjwqxr69dlyxy6c8amzn6c6dwjq1cxk6j")))) - (properties `((upstream-name . "GOstats"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotate" ,r-annotate) - ("r-annotationdbi" ,r-annotationdbi) - ("r-annotationforge" ,r-annotationforge) - ("r-biobase" ,r-biobase) - ("r-category" ,r-category) - ("r-go-db" ,r-go-db) - ("r-graph" ,r-graph) - ("r-rgraphviz" ,r-rgraphviz) - ("r-rbgl" ,r-rbgl))) - (home-page "https://bioconductor.org/packages/GOstats") - (synopsis "Tools for manipulating GO and microarrays") - (description - "This package provides a set of tools for interacting with GO and -microarray data. A variety of basic manipulation tools for graphs, hypothesis -testing and other simple calculations.") - (license license:artistic2.0))) - -(define-public r-shortread - (package - (name "r-shortread") - (version "1.48.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "ShortRead" version)) - (sha256 - (base32 - "0w4m8d3h660mmr2ymp206r1n4aqssxmkv8yxkbr5y1swrahxzfk9")))) - (properties `((upstream-name . "ShortRead"))) - (build-system r-build-system) - (inputs - `(("zlib" ,zlib))) - (propagated-inputs - `(("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-biocparallel" ,r-biocparallel) - ("r-biostrings" ,r-biostrings) - ("r-genomeinfodb" ,r-genomeinfodb) - ("r-genomicalignments" ,r-genomicalignments) - ("r-genomicranges" ,r-genomicranges) - ("r-rhtslib" ,r-rhtslib) - ("r-hwriter" ,r-hwriter) - ("r-iranges" ,r-iranges) - ("r-lattice" ,r-lattice) - ("r-latticeextra" ,r-latticeextra) - ("r-rsamtools" ,r-rsamtools) - ("r-s4vectors" ,r-s4vectors) - ("r-xvector" ,r-xvector) - ("r-zlibbioc" ,r-zlibbioc))) - (home-page "https://bioconductor.org/packages/ShortRead") - (synopsis "FASTQ input and manipulation tools") - (description - "This package implements sampling, iteration, and input of FASTQ files. -It includes functions for filtering and trimming reads, and for generating a -quality assessment report. Data are represented as -@code{DNAStringSet}-derived objects, and easily manipulated for a diversity of -purposes. The package also contains legacy support for early single-end, -ungapped alignment formats.") - (license license:artistic2.0))) - -(define-public r-systempiper - (package - (name "r-systempiper") - (version "1.24.3") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "systemPipeR" version)) - (sha256 - (base32 - "0ffazyl2q9plbhwlxi04s3fvnli6qj95n7bkjc21535bbi08xfki")))) - (properties `((upstream-name . "systemPipeR"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotate" ,r-annotate) - ("r-assertthat" ,r-assertthat) - ("r-batchtools" ,r-batchtools) - ("r-biostrings" ,r-biostrings) - ("r-deseq2" ,r-deseq2) - ("r-dot" ,r-dot) - ("r-edger" ,r-edger) - ("r-genomicfeatures" ,r-genomicfeatures) - ("r-genomicranges" ,r-genomicranges) - ("r-ggplot2" ,r-ggplot2) - ("r-go-db" ,r-go-db) - ("r-gostats" ,r-gostats) - ("r-iranges" ,r-iranges) - ("r-limma" ,r-limma) - ("r-magrittr" ,r-magrittr) - ("r-pheatmap" ,r-pheatmap) - ("r-rjson" ,r-rjson) - ("r-rsamtools" ,r-rsamtools) - ("r-rsvg" ,r-rsvg) - ("r-shortread" ,r-shortread) - ("r-stringr" ,r-stringr) - ("r-summarizedexperiment" ,r-summarizedexperiment) - ("r-yaml" ,r-yaml) - ("r-variantannotation" ,r-variantannotation))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://github.com/tgirke/systemPipeR") - (synopsis "Next generation sequencing workflow and reporting environment") - (description - "This R package provides tools for building and running automated -end-to-end analysis workflows for a wide range of @dfn{next generation -sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq. -Important features include a uniform workflow interface across different NGS -applications, automated report generation, and support for running both R and -command-line software, such as NGS aligners or peak/variant callers, on local -computers or compute clusters. Efficient handling of complex sample sets and -experimental designs is facilitated by a consistently implemented sample -annotation infrastructure.") - (license license:artistic2.0))) - -(define-public r-grohmm - (package - (name "r-grohmm") - (version "1.24.0") - (source - (origin - (method url-fetch) - (uri (bioconductor-uri "groHMM" version)) - (sha256 - (base32 - "08pap9wsaxl4jjlc1py0rc019gmi6daa0f9cr3ih1d97wybncanx")))) - (properties `((upstream-name . "groHMM"))) - (build-system r-build-system) - (propagated-inputs - `(("r-genomeinfodb" ,r-genomeinfodb) - ("r-genomicalignments" ,r-genomicalignments) - ("r-genomicranges" ,r-genomicranges) - ("r-iranges" ,r-iranges) - ("r-mass" ,r-mass) - ("r-rtracklayer" ,r-rtracklayer) - ("r-s4vectors" ,r-s4vectors))) - (home-page "https://github.com/Kraus-Lab/groHMM") - (synopsis "GRO-seq analysis pipeline") - (description - "This package provides a pipeline for the analysis of GRO-seq data.") - (license license:gpl3+))) - (define-public vsearch (package (name "vsearch") @@ -7776,6 +7476,79 @@ clusters.") (home-page "https://sourceforge.net/projects/pardre/") (license license:gpl3+))) +(define-public ngshmmalign + (package + (name "ngshmmalign") + (version "0.1.1") + (source + (origin + (method url-fetch) + (uri (string-append "https://github.com/cbg-ethz/ngshmmalign/" + "releases/download/" version + "/ngshmmalign-" version ".tar.bz2")) + (sha256 + (base32 + "0jryvlssi2r2ii1dxnx39yk6bh4yqgq010fnxrgfgbaj3ykijlzv")))) + (build-system cmake-build-system) + (arguments '(#:tests? #false)) ; there are none + (inputs + `(("boost" ,boost))) + (home-page "https://github.com/cbg-ethz/ngshmmalign/") + (synopsis "Profile HMM aligner for NGS reads") + (description + "ngshmmalign is a profile HMM aligner for NGS reads designed particularly +for small genomes (such as those of RNA viruses like HIV-1 and HCV) that +experience substantial biological insertions and deletions.") + (license license:gpl2+))) + +(define-public prinseq + (package + (name "prinseq") + (version "0.20.4") + (source + (origin + (method url-fetch) + (uri (string-append "mirror://sourceforge/prinseq/standalone/" + "prinseq-lite-" version ".tar.gz")) + (sha256 + (base32 + "0vxmzvmm67whxrqdaaamwgjk7cf0fzfs5s673jgg00kz7g70splv")))) + (build-system gnu-build-system) + (arguments + `(#:tests? #false ; no check target + #:phases + (modify-phases %standard-phases + (delete 'configure) + (delete 'build) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let* ((out (assoc-ref outputs "out")) + (bin (string-append out "/bin"))) + (for-each (lambda (file) + (chmod file #o555) + (install-file file bin) + (wrap-script (string-append bin "/" (basename file)) + `("PERL5LIB" ":" prefix + (,(getenv "PERL5LIB"))))) + (find-files "." "prinseq.*.pl")))))))) + (inputs + `(("guile" ,guile-3.0) ; for wrapper scripts + ("perl" ,perl) + ("perl-cairo" ,perl-cairo) + ("perl-data-dumper" ,perl-data-dumper) + ("perl-digest-md5" ,perl-digest-md5) + ("perl-getopt-long" ,perl-getopt-long) + ("perl-json" ,perl-json) + ("perl-statistics-pca" ,perl-statistics-pca))) + (home-page "http://prinseq.sourceforge.net/") + (synopsis "Preprocess sequence data in FASTA or FASTQ formats") + (description + "PRINSEQ is a bioinformatics tool to help you preprocess your genomic or +metagenomic sequence data in FASTA or FASTQ formats. The tool is written in +Perl and can be helpful if you want to filter, reformat, or trim your sequence +data. It also generates basic statistics for your sequences.") + (license license:gpl3+))) + (define-public ruby-bio-kseq (package (name "ruby-bio-kseq") @@ -7920,591 +7693,6 @@ including VCF header and contents in RDF and JSON.") (home-page "https://github.com/vcflib/bio-vcf") (license license:expat))) -(define-public r-biocviews - (package - (name "r-biocviews") - (version "1.58.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "biocViews" version)) - (sha256 - (base32 - "1by2639z7n62z84dr8rj9jz12gsd1k8q42zsnxacxbwfwp6h0cl4")))) - (properties - `((upstream-name . "biocViews"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biobase" ,r-biobase) - ("r-biocmanager" ,r-biocmanager) - ("r-graph" ,r-graph) - ("r-rbgl" ,r-rbgl) - ("r-rcurl" ,r-rcurl) - ("r-xml" ,r-xml) - ("r-runit" ,r-runit))) - (home-page "https://bioconductor.org/packages/biocViews") - (synopsis "Bioconductor package categorization helper") - (description "The purpose of biocViews is to create HTML pages that -categorize packages in a Bioconductor package repository according to keywords, -also known as views, in a controlled vocabulary.") - (license license:artistic2.0))) - -(define-public r-biocstyle - (package - (name "r-biocstyle") - (version "2.18.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "BiocStyle" version)) - (sha256 - (base32 - "0rsxyna4dd99x42vc82mlkxx774vb9375llpakg53max1hhwkrqp")))) - (properties - `((upstream-name . "BiocStyle"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocmanager" ,r-biocmanager) - ("r-bookdown" ,r-bookdown) - ("r-knitr" ,r-knitr) - ("r-rmarkdown" ,r-rmarkdown) - ("r-yaml" ,r-yaml))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/BiocStyle") - (synopsis "Bioconductor formatting styles") - (description "This package provides standard formatting styles for -Bioconductor PDF and HTML documents. Package vignettes illustrate use and -functionality.") - (license license:artistic2.0))) - -(define-public r-bioccheck - (package - (name "r-bioccheck") - (version "1.26.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "BiocCheck" version)) - (sha256 - (base32 - "1hyncn9zqj432da95k86rm5b28nbwrvzm52jbhisifkxj1j43cib")))) - (properties - `((upstream-name . "BiocCheck"))) - (build-system r-build-system) - (arguments - '(#:phases - (modify-phases %standard-phases - ;; This package can be used by calling BiocCheck(<package>) from - ;; within R, or by running R CMD BiocCheck <package>. This phase - ;; makes sure the latter works. For this to work, the BiocCheck - ;; script must be somewhere on the PATH (not the R bin directory). - (add-after 'install 'install-bioccheck-subcommand - (lambda* (#:key outputs #:allow-other-keys) - (let* ((out (assoc-ref outputs "out")) - (dest-dir (string-append out "/bin")) - (script-dir - (string-append out "/site-library/BiocCheck/script/"))) - (mkdir-p dest-dir) - (symlink (string-append script-dir "/checkBadDeps.R") - (string-append dest-dir "/checkBadDeps.R")) - (symlink (string-append script-dir "/BiocCheck") - (string-append dest-dir "/BiocCheck"))) - #t))))) - (propagated-inputs - `(("r-codetools" ,r-codetools) - ("r-graph" ,r-graph) - ("r-httr" ,r-httr) - ("r-knitr" ,r-knitr) - ("r-optparse" ,r-optparse) - ("r-biocmanager" ,r-biocmanager) - ("r-biocviews" ,r-biocviews) - ("r-stringdist" ,r-stringdist))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/BiocCheck") - (synopsis "Executes Bioconductor-specific package checks") - (description "This package contains tools to perform additional quality -checks on R packages that are to be submitted to the Bioconductor repository.") - (license license:artistic2.0))) - -(define-public r-s4vectors - (package - (name "r-s4vectors") - (version "0.28.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "S4Vectors" version)) - (sha256 - (base32 - "0fhf4lsfxrim7glazh6ng46ykzaly5ggwpg170vcz4cc24prv0rh")))) - (properties - `((upstream-name . "S4Vectors"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics))) - (home-page "https://bioconductor.org/packages/S4Vectors") - (synopsis "S4 implementation of vectors and lists") - (description - "The S4Vectors package defines the @code{Vector} and @code{List} virtual -classes and a set of generic functions that extend the semantic of ordinary -vectors and lists in R. Package developers can easily implement vector-like -or list-like objects as concrete subclasses of @code{Vector} or @code{List}. -In addition, a few low-level concrete subclasses of general interest (e.g. -@code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the -S4Vectors package itself.") - (license license:artistic2.0))) - -(define-public r-iranges - (package - (name "r-iranges") - (version "2.24.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "IRanges" version)) - (sha256 - (base32 - "01mx46a82vd3gz705pj0kk4wpxg683s8jqxchzjia3gz00b4qw52")))) - (properties - `((upstream-name . "IRanges"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics) - ("r-s4vectors" ,r-s4vectors))) - (home-page "https://bioconductor.org/packages/IRanges") - (synopsis "Infrastructure for manipulating intervals on sequences") - (description - "This package provides efficient low-level and highly reusable S4 classes -for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more -generally, data that can be organized sequentially (formally defined as -@code{Vector} objects), as well as views on these @code{Vector} objects. -Efficient list-like classes are also provided for storing big collections of -instances of the basic classes. All classes in the package use consistent -naming and share the same rich and consistent \"Vector API\" as much as -possible.") - (license license:artistic2.0))) - -(define-public r-genomeinfodbdata - (package - (name "r-genomeinfodbdata") - (version "1.2.0") - (source (origin - (method url-fetch) - ;; We cannot use bioconductor-uri here because this tarball is - ;; located under "data/annotation/" instead of "bioc/". - (uri (string-append "https://bioconductor.org/packages/release/" - "data/annotation/src/contrib/GenomeInfoDbData_" - version ".tar.gz")) - (sha256 - (base32 - "0di6nlqpsyqf693k2na65ayqldih563x3zfrczpqc5q2hl5kg35c")))) - (properties - `((upstream-name . "GenomeInfoDbData"))) - (build-system r-build-system) - (home-page "https://bioconductor.org/packages/GenomeInfoDbData") - (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb") - (description "This package contains data for mapping between NCBI taxonomy -ID and species. It is used by functions in the GenomeInfoDb package.") - (license license:artistic2.0))) - -(define-public r-genomeinfodb - (package - (name "r-genomeinfodb") - (version "1.26.2") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "GenomeInfoDb" version)) - (sha256 - (base32 - "092izc49maxjhf6m4b0qx21ad16dz7bmxy5pysp3vkyhdrfa2f7v")))) - (properties - `((upstream-name . "GenomeInfoDb"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics) - ("r-genomeinfodbdata" ,r-genomeinfodbdata) - ("r-iranges" ,r-iranges) - ("r-rcurl" ,r-rcurl) - ("r-s4vectors" ,r-s4vectors))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/GenomeInfoDb") - (synopsis "Utilities for manipulating chromosome identifiers") - (description - "This package contains data and functions that define and allow -translation between different chromosome sequence naming conventions (e.g., -\"chr1\" versus \"1\"), including a function that attempts to place sequence -names in their natural, rather than lexicographic, order.") - (license license:artistic2.0))) - -(define-public r-edger - (package - (name "r-edger") - (version "3.32.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "edgeR" version)) - (sha256 - (base32 - "1gaic8qf6a6sy0bmydh1xzf52w0wnq31aanpvw3a30pfsi218bcp")))) - (properties `((upstream-name . "edgeR"))) - (build-system r-build-system) - (propagated-inputs - `(("r-limma" ,r-limma) - ("r-locfit" ,r-locfit) - ("r-rcpp" ,r-rcpp) - ("r-statmod" ,r-statmod))) ;for estimateDisp - (home-page "http://bioinf.wehi.edu.au/edgeR") - (synopsis "EdgeR does empirical analysis of digital gene expression data") - (description "This package can do differential expression analysis of -RNA-seq expression profiles with biological replication. It implements a range -of statistical methodology based on the negative binomial distributions, -including empirical Bayes estimation, exact tests, generalized linear models -and quasi-likelihood tests. It be applied to differential signal analysis of -other types of genomic data that produce counts, including ChIP-seq, SAGE and -CAGE.") - (license license:gpl2+))) - -(define-public r-variantannotation - (package - (name "r-variantannotation") - (version "1.36.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "VariantAnnotation" version)) - (sha256 - (base32 - "1sl0l6v05lfglj281nszma0h5k234md7rn2pdah8vs2d4iq3kimw")))) - (properties - `((upstream-name . "VariantAnnotation"))) - (propagated-inputs - `(("r-annotationdbi" ,r-annotationdbi) - ("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-biostrings" ,r-biostrings) - ("r-bsgenome" ,r-bsgenome) - ("r-dbi" ,r-dbi) - ("r-genomeinfodb" ,r-genomeinfodb) - ("r-genomicfeatures" ,r-genomicfeatures) - ("r-genomicranges" ,r-genomicranges) - ("r-iranges" ,r-iranges) - ("r-matrixgenerics" ,r-matrixgenerics) - ("r-summarizedexperiment" ,r-summarizedexperiment) - ("r-rhtslib" ,r-rhtslib) - ("r-rsamtools" ,r-rsamtools) - ("r-rtracklayer" ,r-rtracklayer) - ("r-s4vectors" ,r-s4vectors) - ("r-xvector" ,r-xvector) - ("r-zlibbioc" ,r-zlibbioc))) - (build-system r-build-system) - (home-page "https://bioconductor.org/packages/VariantAnnotation") - (synopsis "Package for annotation of genetic variants") - (description "This R package can annotate variants, compute amino acid -coding changes and predict coding outcomes.") - (license license:artistic2.0))) - -(define-public r-limma - (package - (name "r-limma") - (version "3.46.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "limma" version)) - (sha256 - (base32 - "1xxv493q1kip9bjfv7v7k5dnq7hz7gvl80i983v4mvwavhgnbxfz")))) - (build-system r-build-system) - (home-page "http://bioinf.wehi.edu.au/limma") - (synopsis "Package for linear models for microarray and RNA-seq data") - (description "This package can be used for the analysis of gene expression -studies, especially the use of linear models for analysing designed experiments -and the assessment of differential expression. The analysis methods apply to -different technologies, including microarrays, RNA-seq, and quantitative PCR.") - (license license:gpl2+))) - -(define-public r-xvector - (package - (name "r-xvector") - (version "0.30.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "XVector" version)) - (sha256 - (base32 - "1pqljikg4f6jb7wgm5537zwgq5b013nyz1agjrwfq2cljb0ym6lq")))) - (properties - `((upstream-name . "XVector"))) - (build-system r-build-system) - (arguments - `(#:phases - (modify-phases %standard-phases - (add-after 'unpack 'use-system-zlib - (lambda _ - (substitute* "DESCRIPTION" - (("zlibbioc, ") "")) - (substitute* "NAMESPACE" - (("import\\(zlibbioc\\)") "")) - #t))))) - (inputs - `(("zlib" ,zlib))) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics) - ("r-iranges" ,r-iranges) - ("r-s4vectors" ,r-s4vectors))) - (home-page "https://bioconductor.org/packages/XVector") - (synopsis "Representation and manpulation of external sequences") - (description - "This package provides memory efficient S4 classes for storing sequences -\"externally\" (behind an R external pointer, or on disk).") - (license license:artistic2.0))) - -(define-public r-genomicranges - (package - (name "r-genomicranges") - (version "1.42.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "GenomicRanges" version)) - (sha256 - (base32 - "0j4py5g6pdj35xhlaqhxxhg55j9l4mcdk3yck4dgyavv5f2dh24i")))) - (properties - `((upstream-name . "GenomicRanges"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics) - ("r-genomeinfodb" ,r-genomeinfodb) - ("r-iranges" ,r-iranges) - ("r-s4vectors" ,r-s4vectors) - ("r-xvector" ,r-xvector))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/GenomicRanges") - (synopsis "Representation and manipulation of genomic intervals") - (description - "This package provides tools to efficiently represent and manipulate -genomic annotations and alignments is playing a central role when it comes to -analyzing high-throughput sequencing data (a.k.a. NGS data). The -GenomicRanges package defines general purpose containers for storing and -manipulating genomic intervals and variables defined along a genome.") - (license license:artistic2.0))) - -(define-public r-biobase - (package - (name "r-biobase") - (version "2.50.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "Biobase" version)) - (sha256 - (base32 - "11kgc4flywlm3i18603558l8ksv91c24vkc5fnnbcd375i2dhhd4")))) - (properties - `((upstream-name . "Biobase"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics))) - (home-page "https://bioconductor.org/packages/Biobase") - (synopsis "Base functions for Bioconductor") - (description - "This package provides functions that are needed by many other packages -on Bioconductor or which replace R functions.") - (license license:artistic2.0))) - -(define-public r-annotationdbi - (package - (name "r-annotationdbi") - (version "1.52.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "AnnotationDbi" version)) - (sha256 - (base32 - "0zqxgh3nx6y8ry12s2vss2f4axz5vpqxha1y4ifhhcx4zhpzsglr")))) - (properties - `((upstream-name . "AnnotationDbi"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biobase" ,r-biobase) - ("r-biocgenerics" ,r-biocgenerics) - ("r-dbi" ,r-dbi) - ("r-iranges" ,r-iranges) - ("r-rsqlite" ,r-rsqlite) - ("r-s4vectors" ,r-s4vectors))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/AnnotationDbi") - (synopsis "Annotation database interface") - (description - "This package provides user interface and database connection code for -annotation data packages using SQLite data storage.") - (license license:artistic2.0))) - -(define-public r-biomart - (package - (name "r-biomart") - (version "2.46.3") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "biomaRt" version)) - (sha256 - (base32 - "0gwmd0ykpv0gyh34c56g5m12lil20fvig49f3ih1jxrxf3q4wmq7")))) - (properties - `((upstream-name . "biomaRt"))) - (build-system r-build-system) - (propagated-inputs - `(("r-annotationdbi" ,r-annotationdbi) - ("r-biocfilecache" ,r-biocfilecache) - ("r-httr" ,r-httr) - ("r-openssl" ,r-openssl) - ("r-progress" ,r-progress) - ("r-rappdirs" ,r-rappdirs) - ("r-stringr" ,r-stringr) - ("r-xml" ,r-xml) - ("r-xml2" ,r-xml2))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/biomaRt") - (synopsis "Interface to BioMart databases") - (description - "biomaRt provides an interface to a growing collection of databases -implementing the @url{BioMart software suite, http://www.biomart.org}. The -package enables retrieval of large amounts of data in a uniform way without -the need to know the underlying database schemas or write complex SQL queries. -Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene, -Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt -users direct access to a diverse set of data and enable a wide range of -powerful online queries from gene annotation to database mining.") - (license license:artistic2.0))) - -(define-public r-biocparallel - (package - (name "r-biocparallel") - (version "1.24.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "BiocParallel" version)) - (sha256 - (base32 - "1iryicvmcagcrj29kp49mqhiq2kn72j4idj380hi9illmdrg9ism")))) - (properties - `((upstream-name . "BiocParallel"))) - (build-system r-build-system) - (propagated-inputs - `(("r-futile-logger" ,r-futile-logger) - ("r-snow" ,r-snow) - ("r-bh" ,r-bh))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/BiocParallel") - (synopsis "Bioconductor facilities for parallel evaluation") - (description - "This package provides modified versions and novel implementation of -functions for parallel evaluation, tailored to use with Bioconductor -objects.") - (license (list license:gpl2+ license:gpl3+)))) - -(define-public r-biostrings - (package - (name "r-biostrings") - (version "2.58.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "Biostrings" version)) - (sha256 - (base32 - "1rbqhs73mhfr1gi0rx28jiyan7i3hb45ai3jpl1656fnrhgjfxq5")))) - (properties - `((upstream-name . "Biostrings"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics) - ("r-crayon" ,r-crayon) - ("r-iranges" ,r-iranges) - ("r-s4vectors" ,r-s4vectors) - ("r-xvector" ,r-xvector))) - (home-page "https://bioconductor.org/packages/Biostrings") - (synopsis "String objects and algorithms for biological sequences") - (description - "This package provides memory efficient string containers, string -matching algorithms, and other utilities, for fast manipulation of large -biological sequences or sets of sequences.") - (license license:artistic2.0))) - -(define-public r-rsamtools - (package - (name "r-rsamtools") - (version "2.6.0") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "Rsamtools" version)) - (sha256 - (base32 - "040pggkwglc6wy90qnc7xcdnaj0v3iqlykvvsl74241409qly554")))) - (properties - `((upstream-name . "Rsamtools"))) - (build-system r-build-system) - (arguments - `(#:phases - (modify-phases %standard-phases - (add-after 'unpack 'use-system-zlib - (lambda _ - (substitute* "DESCRIPTION" - (("zlibbioc, ") "")) - (substitute* "NAMESPACE" - (("import\\(zlibbioc\\)") "")) - #t))))) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics) - ("r-biocparallel" ,r-biocparallel) - ("r-biostrings" ,r-biostrings) - ("r-bitops" ,r-bitops) - ("r-genomeinfodb" ,r-genomeinfodb) - ("r-genomicranges" ,r-genomicranges) - ("r-iranges" ,r-iranges) - ("r-rhtslib" ,r-rhtslib) - ("r-s4vectors" ,r-s4vectors) - ("r-xvector" ,r-xvector))) - (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html") - (synopsis "Interface to samtools, bcftools, and tabix") - (description - "This package provides an interface to the @code{samtools}, -@code{bcftools}, and @code{tabix} utilities for manipulating SAM (Sequence -Alignment / Map), FASTA, binary variant call (BCF) and compressed indexed -tab-delimited (tabix) files.") - (license license:expat))) - -(define-public r-delayedarray - (package - (name "r-delayedarray") - (version "0.16.1") - (source (origin - (method url-fetch) - (uri (bioconductor-uri "DelayedArray" version)) - (sha256 - (base32 - "1d75zrhha1v7dhbvjp6a4iap441l5k268w0jjxklpqywbqns7l3d")))) - (properties - `((upstream-name . "DelayedArray"))) - (build-system r-build-system) - (propagated-inputs - `(("r-biocgenerics" ,r-biocgenerics) - ("r-s4vectors" ,r-s4vectors) - ("r-iranges" ,r-iranges) - ("r-matrix" ,r-matrix) - ("r-matrixgenerics" ,r-matrixgenerics))) - (native-inputs - `(("r-knitr" ,r-knitr))) - (home-page "https://bioconductor.org/packages/DelayedArray") - (synopsis "Delayed operations on array-like objects") - (description - "Wrapping an array-like object (typically an on-disk object) in a -@code{DelayedArray} object allows one to perform common array operations on it -without loading the object in memory. In order to reduce memory usage and -optimize performance, operations on the object are either delayed or executed -using a block processing mechanism. Note that this also works on in-memory -array-like objects like @code{DataFrame} objects (typically with Rle columns), -@code{Matrix} objects, and ordinary arrays and data frames.") - (license license:artistic2.0))) - (define-public r-summarizedexperiment (package (name "r-summarizedexperiment") @@ -8623,13 +7811,13 @@ as well as query and modify the browser state, such as the current viewport.") (define-public r-genomicfeatures (package (name "r-genomicfeatures") - (version "1.42.1") + (version "1.42.3") (source (origin (method url-fetch) (uri (bioconductor-uri "GenomicFeatures" version)) (sha256 (base32 - "17dyd9hcw6pw16y353dh55wfhxmkxka99lbsxsp9xyrhffwrxi0s")))) + "168cf261vmcqffbzassavkjyz9a2af0l6zbv9cagkx6b1qrk3siz")))) (properties `((upstream-name . "GenomicFeatures"))) (build-system r-build-system) @@ -10709,6 +9897,72 @@ explore and perform basic analysis of single cell sequencing data coming from droplet sequencing. It has been particularly tailored for Drop-seq.") (license license:gpl3)))) +(define-public r-cellchat + (let ((commit + "21edd226ca408e4c413408f98562d71ee0b54e5d") + (revision "1")) + (package + (name "r-cellchat") + (version (git-version "1.0.0" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/sqjin/CellChat") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "0cvzl9mi8jjznpql2gv67swnk1dndn3a2h22z5l84h7lwpwjmh53")) + (snippet + '(for-each delete-file '("src/CellChat.so" + "src/CellChat_Rcpp.o" + "src/RcppExports.o"))))) + (properties `((upstream-name . "CellChat"))) + (build-system r-build-system) + (propagated-inputs + `(("r-biocgenerics" ,r-biocgenerics) + ("r-circlize" ,r-circlize) + ("r-colorspace" ,r-colorspace) + ("r-complexheatmap" ,r-complexheatmap) + ("r-cowplot" ,r-cowplot) + ("r-dplyr" ,r-dplyr) + ("r-expm" ,r-expm) + ("r-fnn" ,r-fnn) + ("r-forcats" ,r-forcats) + ("r-future" ,r-future) + ("r-future-apply" ,r-future-apply) + ("r-gg-gap" ,r-gg-gap) + ("r-ggalluvial" ,r-ggalluvial) + ("r-ggplot2" ,r-ggplot2) + ("r-ggrepel" ,r-ggrepel) + ("r-igraph" ,r-igraph) + ("r-irlba" ,r-irlba) + ("r-magrittr" ,r-magrittr) + ("r-matrix" ,r-matrix) + ("r-nmf" ,r-nmf) + ("r-patchwork" ,r-patchwork) + ("r-pbapply" ,r-pbapply) + ("r-rcolorbrewer" ,r-rcolorbrewer) + ("r-rcpp" ,r-rcpp) + ("r-rcppeigen" ,r-rcppeigen) + ("r-reshape2" ,r-reshape2) + ("r-reticulate" ,r-reticulate) + ("r-rspectra" ,r-rspectra) + ("r-rtsne" ,r-rtsne) + ("r-scales" ,r-scales) + ("r-shape" ,r-shape) + ("r-sna" ,r-sna) + ("r-stringr" ,r-stringr) + ("r-svglite" ,r-svglite))) + (native-inputs `(("r-knitr" ,r-knitr))) + (home-page "https://github.com/sqjin/CellChat") + (synopsis "Analysis of cell-cell communication from single-cell transcriptomics data") + (description + "This package infers, visualizes and analyzes the cell-cell +communication networks from scRNA-seq data.") + (license license:gpl3)))) + (define-public sambamba (package (name "sambamba") @@ -10812,7 +10066,7 @@ with narrow binding events such as transcription factor ChIP-seq.") (define-public trim-galore (package (name "trim-galore") - (version "0.6.1") + (version "0.6.6") (source (origin (method git-fetch) @@ -10822,7 +10076,7 @@ with narrow binding events such as transcription factor ChIP-seq.") (file-name (git-file-name name version)) (sha256 (base32 - "1y31wbxwkm9xqzr5zv1pk5q418whnmlmgmfyxxpnl12h83m2i9iv")))) + "0yrwg6325j4sb9vnplvl3jplzab0qdhp92wl480qjinpfq88j4rs")))) (build-system gnu-build-system) (arguments `(#:tests? #f ; no tests @@ -11275,7 +10529,7 @@ browser.") (inputs `(("perl" ,perl) ("java-commons-cli" ,java-commons-cli))) - (home-page "http://fureylab.web.unc.edu/software/fseq/") + (home-page "https://fureylab.web.unc.edu/software/fseq/") (synopsis "Feature density estimator for high-throughput sequence tags") (description "F-Seq is a software package that generates a continuous tag sequence @@ -11444,7 +10698,7 @@ using nucleotide or amino-acid sequence data.") (define-public kallisto (package (name "kallisto") - (version "0.44.0") + (version "0.46.2") (source (origin (method git-fetch) (uri (git-reference @@ -11453,7 +10707,10 @@ using nucleotide or amino-acid sequence data.") (file-name (git-file-name name version)) (sha256 (base32 - "0nj382jiywqnpgvyhichajpkkh5r0bapn43f4dx40zdaq5v4m40m")))) + "0ij5n7v3m90jdfi7sn8nvglfyf58abp1f5xq42r4k73l0lfds6xi")) + (modules '((guix build utils))) + (snippet + '(delete-file-recursively "ext/htslib/")))) (build-system cmake-build-system) (arguments `(#:tests? #f ; no "check" target @@ -11476,7 +10733,7 @@ using nucleotide or amino-acid sequence data.") #t))))) (inputs `(("hdf5" ,hdf5) - ("htslib" ,htslib) + ("htslib" ,htslib-1.9) ("zlib" ,zlib))) (home-page "https://pachterlab.github.io/kallisto/") (synopsis "Near-optimal RNA-Seq quantification") @@ -12465,7 +11722,7 @@ once. This package provides tools to perform Drop-seq analyses.") ("snakemake" ,snakemake) ("fastqc" ,fastqc) ("multiqc" ,multiqc) - ("star" ,star) + ("star" ,star-for-pigx) ("trim-galore" ,trim-galore) ("htseq" ,htseq) ("samtools" ,samtools) @@ -12676,7 +11933,7 @@ methylation and segmentation.") ("pandoc-citeproc" ,pandoc-citeproc) ("samtools" ,samtools) ("snakemake" ,snakemake) - ("star" ,star) + ("star" ,star-for-pigx) ("r-minimal" ,r-minimal) ("r-argparser" ,r-argparser) ("r-cowplot" ,r-cowplot) @@ -12968,6 +12225,64 @@ concatenates pieces of read sequences to generate the final unitig sequences. Thus the per-base error rate is similar to the raw input reads.") (license license:expat))) +(define-public bandage + (package + (name "bandage") + (version "0.8.1") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/rrwick/Bandage") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "1bbsn5f5x8wlspg4pbibqz6m5vin8c19nl224f3z3km0pkc97rwv")))) + (build-system qt-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (replace 'configure + (lambda _ + (invoke "qmake" "Bandage.pro"))) + (replace 'check + (lambda* (#:key tests? #:allow-other-keys) + (when tests? + (substitute* "tests/bandage_command_line_tests.sh" + (("^bandagepath=.*") + (string-append "bandagepath=" (getcwd) "/Bandage\n"))) + (with-directory-excursion "tests" + (setenv "XDG_RUNTIME_DIR" (getcwd)) + (invoke "./bandage_command_line_tests.sh"))) + #t)) + (replace 'install + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out"))) + (install-file "Bandage" (string-append out "/bin")) + #t)))))) + (inputs + `(("qtbase" ,qtbase) + ("qtsvg" ,qtsvg))) + (native-inputs + `(("imagemagick" ,imagemagick))) + (home-page "https://rrwick.github.io/Bandage/") + (synopsis + "Bioinformatics Application for Navigating De novo Assembly Graphs Easily") + (description "Bandage is a program for visualising de novo assembly graphs. +It allows users to interact with the assembly graphs made by de novo assemblers +such as Velvet, SPAdes, MEGAHIT and others. De novo assembly graphs contain not +only assembled contigs but also the connections between those contigs, which +were previously not easily accessible. Bandage visualises assembly graphs, with +connections, using graph layout algorithms. Nodes in the drawn graph, which +represent contigs, can be automatically labelled with their ID, length or depth. +Users can interact with the graph by moving, labelling and colouring nodes. +Sequence information can also be extracted directly from the graph viewer. By +displaying connections between contigs, Bandage opens up new possibilities for +analysing and improving de novo assemblies that are not possible by looking at +contigs alone.") + (license (list license:gpl2+ ; bundled ogdf + license:gpl3+)))) + (define-public r-circus (package (name "r-circus") @@ -13222,6 +12537,89 @@ create connections between analogous cells in different batches without altering the counts or PCA space.") (license license:expat))) +(define-public python-drep + (package + (name "python-drep") + (version "3.2.0") + (source + (origin + (method url-fetch) + (uri (pypi-uri "drep" version)) + (sha256 + (base32 + "08vk0x6v5c5n7afgd5pcjhsvb424absypxy22hw1cm1n9kirbi77")))) + (build-system python-build-system) + (propagated-inputs + `(("python-biopython" ,python-biopython) + ("python-matplotlib" ,python-matplotlib) + ("python-numpy" ,python-numpy) + ("python-pandas" ,python-pandas) + ("python-pytest" ,python-pytest) + ("python-scikit-learn" ,python-scikit-learn) + ("python-seaborn" ,python-seaborn) + ("python-tqdm" ,python-tqdm))) + (home-page "https://github.com/MrOlm/drep") + (synopsis "De-replication of microbial genomes assembled from multiple samples") + (description + "dRep is a Python program for rapidly comparing large numbers of genomes. +dRep can also \"de-replicate\" a genome set by identifying groups of highly +similar genomes and choosing the best representative genome for each genome +set.") + (license license:expat))) + +(define-public instrain + (package + (name "instrain") + (version "1.5.2") + (source + (origin + (method url-fetch) + (uri (pypi-uri "inStrain" version)) + (sha256 + (base32 + "0ykqlpf6yz4caihsaz3ys00cyvlr7wdj4s9a8rh56q5r8xf80ic0")))) + (build-system python-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'patch-relative-imports + (lambda _ + (substitute* "docker/run_instrain.py" + (("from s3_utils") + "from .s3_utils") + (("from job_utils") + "from .job_utils"))))))) + (inputs + `(("python-biopython" ,python-biopython) + ("python-boto3" ,python-boto3) + ("python-h5py" ,python-h5py) + ("python-lmfit" ,python-lmfit) + ("python-matplotlib" ,python-matplotlib) + ("python-networkx" ,python-networkx) + ("python-numba" ,python-numba) + ("python-numpy" ,python-numpy) + ("python-pandas" ,python-pandas) + ("python-psutil" ,python-psutil) + ("python-pysam" ,python-pysam) + ("python-scikit-learn" ,python-scikit-learn) + ("python-seaborn" ,python-seaborn) + ("python-tqdm" ,python-tqdm) + ;; drep is needed for deprecated plot utilities + ("python-drep" ,python-drep))) + (native-inputs + `(("python-pytest" ,python-pytest))) + (home-page "https://github.com/MrOlm/inStrain") + (synopsis "Calculation of strain-level metrics") + (description + "inStrain is a Python program for analysis of co-occurring genome +populations from metagenomes that allows highly accurate genome comparisons, +analysis of coverage, microdiversity, and linkage, and sensitive SNP detection +with gene localization and synonymous non-synonymous identification.") + ;; The tool itself says that the license is "MIT", but the repository + ;; contains a LICENSE file with the GPLv3. + ;; See https://github.com/MrOlm/inStrain/issues/51 + (license license:expat))) + (define-public gffcompare (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41") (revision "1")) @@ -14127,18 +13525,24 @@ choosing which reads pass the filter.") #t))) (add-after 'install 'wrap-programs (lambda* (#:key outputs #:allow-other-keys) - (for-each (lambda (file) - (wrap-program file `("PYTHONPATH" ":" prefix (,path)))) - (find-files "/share/nanopolish/scripts" "\\.py")) - (for-each (lambda (file) - (wrap-program file `("PERL5LIB" ":" prefix (,path)))) - (find-files "/share/nanopolish/scripts" "\\.pl")) - #t))))) + (let ((pythonpath (getenv "PYTHONPATH")) + (perl5lib (getenv "PERL5LIB")) + (scripts (string-append (assoc-ref outputs "out") + "/share/nanopolish/scripts"))) + (for-each (lambda (file) + (wrap-program file `("PYTHONPATH" ":" prefix (,pythonpath)))) + (find-files scripts "\\.py")) + (for-each (lambda (file) + (wrap-script file `("PERL5LIB" ":" prefix (,perl5lib)))) + (find-files scripts "\\.pl")))))))) (inputs - `(("eigen" ,eigen) + `(("guile" ,guile-3.0) ; for wrappers + ("eigen" ,eigen) ("hdf5" ,hdf5) ("htslib" ,htslib) ("perl" ,perl) + ("bioperl" ,bioperl-minimal) + ("perl-getopt-long" ,perl-getopt-long) ("python" ,python-wrapper) ("python-biopython" ,python-biopython) ("python-numpy" ,python-numpy) @@ -14224,14 +13628,14 @@ is a Cython wrapper for FIt-SNE.") (define-public bbmap (package (name "bbmap") - (version "35.82") + (version "38.90") (source (origin (method url-fetch) (uri (string-append "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz")) (sha256 (base32 - "1q4rfhxcb6z3gm8zg2davjz98w22lkf4hm9ikxz9kdl93pil3wkd")))) + "1wb94bcc006qq86x77z2rz0lc8m9f1kpnw6gdhjfg9bdaqf56rm3")))) (build-system ant-build-system) (arguments `(#:build-target "dist" @@ -15570,6 +14974,41 @@ manipulation of hierarchical features (e.g., genes, transcripts, and exons) than is possible with plain-text methods alone.") (license license:expat)))) +(define-public indelfixer + (package + (name "indelfixer") + (version "1.1") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/cbg-ethz/InDelFixer/") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "10ak05x8i1bx2p7rriv2rglqg1wr7c8wrhjrqlq1wm7ka99w8i79")))) + (build-system ant-build-system) + (arguments + `(#:jar-name "InDelFixer.jar" + #:source-dir "src/main/java" + #:test-dir "src/test")) + (inputs + `(("java-commons-lang2" ,java-commons-lang) + ("java-args4j" ,java-args4j))) + (native-inputs + `(("java-junit" ,java-junit))) + (home-page "https://github.com/cbg-ethz/InDelFixer/") + (synopsis "Iterative and sensitive NGS sequence aligner") + (description "InDelFixer is a sensitive aligner for 454, Illumina and +PacBio data, employing a full Smith-Waterman alignment against a reference. +This Java command line application aligns Next-Generation Sequencing (NGS) and +third-generation reads to a set of reference sequences, by a prior fast k-mer +matching and removes indels, causing frame shifts. In addition, only a +specific region can be considered. An iterative refinement of the alignment +can be performed, by alignment against the consensus sequence with wobbles. +The output is in SAM format.") + (license license:gpl3+))) + (define-public libsbml (package (name "libsbml") @@ -15607,3 +15046,64 @@ biological processes. SBML is useful for models of metabolism, cell signaling, and more. It continues to be evolved and expanded by an international community.") (license license:lgpl2.1+))) + +(define-public r-signac + (let ((commit "e0512d348adeda4a3f23a2e8f56d1fe09840e03c") + (revision "1")) + (package + (name "r-signac") + (version (git-version "1.1.1" revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/timoast/signac/") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1yihhrv7zs87ax61la1nb4y12lg3knraw4b20k5digbcwm8488lb")))) + (properties `((upstream-name . "Signac"))) + (build-system r-build-system) + (inputs `(("zlib" ,zlib))) + (propagated-inputs + `(("r-annotationfilter" ,r-annotationfilter) + ("r-biocgenerics" ,r-biocgenerics) + ("r-biostrings" ,r-biostrings) + ("r-biovizbase" ,r-biovizbase) + ("r-data-table" ,r-data-table) + ("r-dplyr" ,r-dplyr) + ("r-fastmatch" ,r-fastmatch) + ("r-future" ,r-future) + ("r-future-apply" ,r-future-apply) + ("r-genomeinfodb" ,r-genomeinfodb) + ("r-genomicranges" ,r-genomicranges) + ("r-ggbio" ,r-ggbio) + ("r-ggforce" ,r-ggforce) + ("r-ggplot2" ,r-ggplot2) + ("r-ggrepel" ,r-ggrepel) + ("r-ggseqlogo" ,r-ggseqlogo) + ("r-iranges" ,r-iranges) + ("r-irlba" ,r-irlba) + ("r-lsa" ,r-lsa) + ("r-matrix" ,r-matrix) + ("r-patchwork" ,r-patchwork) + ("r-pbapply" ,r-pbapply) + ("r-rcpp" ,r-rcpp) + ("r-rcpproll" ,r-rcpproll) + ("r-rsamtools" ,r-rsamtools) + ("r-s4vectors" ,r-s4vectors) + ("r-scales" ,r-scales) + ("r-seurat" ,r-seurat) + ("r-seuratobject" ,r-seuratobject) + ("r-stringi" ,r-stringi) + ("r-tidyr" ,r-tidyr))) + (home-page "https://github.com/timoast/signac/") + (synopsis "Analysis of single-cell chromatin data") + (description + "This package provides a framework for the analysis and exploration of +single-cell chromatin data. The Signac package contains functions for +quantifying single-cell chromatin data, computing per-cell quality control +metrics, dimension reduction and normalization, visualization, and DNA +sequence motif analysis.") + (license license:expat)))) |