summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEfraim Flashner <efraim@flashner.co.il>2020-04-21 13:56:33 +0300
committerEfraim Flashner <efraim@flashner.co.il>2020-05-05 10:06:28 +0300
commit31def9a9df583dd27f8604302700ff48368c43f3 (patch)
treea78b64a008c19db875b54bae7d35251d654a3155
parent0e680920b9e54d7c8a901b1c9cf02ce4468f44ed (diff)
gnu: Add grocsvs.
* gnu/packages/bioinformatics.scm (grocsvs): New variable. * gnu/packages/patches/grocsvs-dont-use-admiral.patch: New file. * gnu/local.mk (dist_patch_DATA): Register it.
-rw-r--r--gnu/local.mk1
-rw-r--r--gnu/packages/bioinformatics.scm42
-rw-r--r--gnu/packages/patches/grocsvs-dont-use-admiral.patch69
3 files changed, 112 insertions, 0 deletions
diff --git a/gnu/local.mk b/gnu/local.mk
index daacb1992a..0797efb93e 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -1007,6 +1007,7 @@ dist_patch_DATA = \
%D%/packages/patches/gpsbabel-qstring.patch \
%D%/packages/patches/grantlee-merge-theme-dirs.patch \
%D%/packages/patches/grep-timing-sensitive-test.patch \
+ %D%/packages/patches/grocsvs-dont-use-admiral.patch \
%D%/packages/patches/gromacs-tinyxml2.patch \
%D%/packages/patches/groovy-add-exceptionutilsgenerator.patch \
%D%/packages/patches/grub-efi-fat-serial-number.patch \
diff --git a/gnu/packages/bioinformatics.scm b/gnu/packages/bioinformatics.scm
index 65b44568e0..40f75e9e0c 100644
--- a/gnu/packages/bioinformatics.scm
+++ b/gnu/packages/bioinformatics.scm
@@ -79,6 +79,7 @@
#:use-module (gnu packages golang)
#:use-module (gnu packages glib)
#:use-module (gnu packages graph)
+ #:use-module (gnu packages graphviz)
#:use-module (gnu packages groff)
#:use-module (gnu packages gtk)
#:use-module (gnu packages guile)
@@ -15853,3 +15854,44 @@ biological processes. SBML is useful for models of metabolism, cell
signaling, and more. It continues to be evolved and expanded by an
international community.")
(license license:lgpl2.1+)))
+
+(define-public grocsvs
+ ;; The last release is out of date and new features have been added.
+ (let ((commit "ecd956a65093a0b2c41849050e4512d46fecea5d")
+ (revision "1"))
+ (package
+ (name "grocsvs")
+ (version (git-version "0.2.6.1" revision commit))
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://github.com/grocsvs/grocsvs")
+ (commit commit)))
+ (file-name (git-file-name name version))
+ (sha256
+ (base32 "14505725gr7qxc17cxxf0k6lzcwmgi64pija4mwf29aw70qn35cc"))
+ (patches (search-patches "grocsvs-dont-use-admiral.patch"))))
+ (build-system python-build-system)
+ (arguments
+ `(#:tests? #f ; No test suite.
+ #:python ,python-2)) ; Only python-2 supported.
+ (inputs
+ `(("python2-h5py" ,python2-h5py)
+ ("python2-ipython-cluster-helper" ,python2-ipython-cluster-helper)
+ ("python2-networkx" ,python2-networkx)
+ ("python2-psutil" ,python2-psutil)
+ ("python2-pandas" ,python2-pandas)
+ ("python2-pybedtools" ,python2-pybedtools)
+ ("python2-pyfaidx" ,python2-pyfaidx)
+ ("python2-pygraphviz" ,python2-pygraphviz)
+ ("python2-pysam" ,python2-pysam)
+ ("python2-scipy" ,python2-scipy)))
+ (home-page "https://github.com/grocsvs/grocsvs")
+ (synopsis "Genome-wide reconstruction of complex structural variants")
+ (description
+ "@dfn{Genome-wide Reconstruction of Complex Structural Variants}
+(GROC-SVs) is a software pipeline for identifying large-scale structural
+variants, performing sequence assembly at the breakpoints, and reconstructing
+the complex structural variants using the long-fragment information from the
+10x Genomics platform.")
+ (license license:expat))))
diff --git a/gnu/packages/patches/grocsvs-dont-use-admiral.patch b/gnu/packages/patches/grocsvs-dont-use-admiral.patch
new file mode 100644
index 0000000000..cb976e19b0
--- /dev/null
+++ b/gnu/packages/patches/grocsvs-dont-use-admiral.patch
@@ -0,0 +1,69 @@
+python-admiral doesn't have a license
+https://github.com/nspies/admiral/issues/3
+
+diff --git a/setup.py b/setup.py
+index 692b6a0..568f381 100755
+--- a/setup.py
++++ b/setup.py
+@@ -20,7 +20,7 @@ setup(
+ 'console_scripts' : ["grocsvs = grocsvs.main:main"]
+ },
+
+- install_requires = ["admiral", "h5py", "networkx>=2.0", "pandas", "pybedtools",
++ install_requires = ["h5py", "networkx>=2.0", "pandas", "pybedtools",
+ "pyfaidx", "pysam>=0.10.0", "scipy", "ipython-cluster-helper",
+ "pygraphviz", "psutil"],
+
+diff --git a/src/grocsvs/jobmanagers.py b/src/grocsvs/jobmanagers.py
+index 6da0b58..112d7ff 100755
+--- a/src/grocsvs/jobmanagers.py
++++ b/src/grocsvs/jobmanagers.py
+@@ -41,34 +41,3 @@ class MultiprocessingCluster(Cluster):
+ pool = multiprocessing.Pool(processes=self.processes)
+ return pool.map_async(fn, args).get(999999)
+
+-
+-class AdmiralCluster(Cluster):
+- def map(self, fn, args):
+- from admiral import jobmanagers, remote
+-
+- cluster_options = self.cluster_settings.cluster_options.copy()
+-
+- scheduler = cluster_options.pop("scheduler")
+-
+- jobmanager_class = jobmanagers.get_jobmanager(scheduler)
+- jobmanager = jobmanager_class(
+- batch_dir=self.batch_dir, log_dir=self.batch_dir)
+-
+-
+- if not "mem" in cluster_options:
+- cluster_options["mem"] = "16g"
+- if not "time" in cluster_options:
+- cluster_options["time"] = "12h"
+-
+- jobs = []
+- #for i, arg in enumerate(args):
+-
+- job_name = args[0].__class__.__name__
+- args = [[arg] for arg in args]
+- job = remote.run_remote(fn, jobmanager, job_name, args=args,
+- array=True, overwrite=True, **cluster_options)
+-
+- result = jobmanagers.wait_for_jobs([job], wait=5, progress=True)
+-
+- if not result:
+- raise Exception("Some chunks failed to complete")
+diff --git a/src/grocsvs/pipeline.py b/src/grocsvs/pipeline.py
+index ab1bb2d..350976f 100755
+--- a/src/grocsvs/pipeline.py
++++ b/src/grocsvs/pipeline.py
+@@ -8,8 +8,7 @@ from grocsvs import utilities
+ def make_jobmanager(jobmanager_settings, processes, batch_dir):
+ jobmanager_classes = {"IPCluster":jobmanagers.IPCluster,
+ "local": jobmanagers.LocalCluster,
+- "multiprocessing": jobmanagers.MultiprocessingCluster,
+- "admiral": jobmanagers.AdmiralCluster}
++ "multiprocessing": jobmanagers.MultiprocessingCluster}
+
+ cls = jobmanager_classes[jobmanager_settings.cluster_type]
+ return cls(processes, jobmanager_settings, batch_dir)