1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016, 2020, 2021 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021, 2022 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016, 2020, 2022 Marius Bakke <marius@gnu.org>
9 ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;; Copyright © 2017, 2021, 2022 Arun Isaac <arunisaac@systemreboot.net>
12 ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
13 ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
14 ;;; Copyright © 2018, 2019, 2020, 2021, 2022 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
15 ;;; Copyright © 2019, 2020, 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com>
16 ;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
17 ;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
18 ;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
19 ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
20 ;;; Copyright © 2020 Pierre Langlois <pierre.langlois@gmx.com>
21 ;;; Copyright © 2020 Bonface Munyoki Kilyungi <bonfacemunyoki@gmail.com>
22 ;;; Copyright © 2021 Tim Howes <timhowes@lavabit.com>
23 ;;; Copyright © 2021 Hong Li <hli@mdc-berlin.de>
24 ;;; Copyright © 2021, 2022 Simon Tournier <zimon.toutoune@gmail.com>
25 ;;; Copyright © 2021 Felix Gruber <felgru@posteo.net>
27 ;;; This file is part of GNU Guix.
29 ;;; GNU Guix is free software; you can redistribute it and/or modify it
30 ;;; under the terms of the GNU General Public License as published by
31 ;;; the Free Software Foundation; either version 3 of the License, or (at
32 ;;; your option) any later version.
34 ;;; GNU Guix is distributed in the hope that it will be useful, but
35 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
36 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37 ;;; GNU General Public License for more details.
39 ;;; You should have received a copy of the GNU General Public License
40 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
42 (define-module (gnu packages bioinformatics)
43 #:use-module ((guix licenses) #:prefix license:)
44 #:use-module (guix packages)
45 #:use-module (guix gexp)
46 #:use-module (guix utils)
47 #:use-module (guix download)
48 #:use-module (guix git-download)
49 #:use-module (guix hg-download)
50 #:use-module (guix build-system ant)
51 #:use-module (guix build-system gnu)
52 #:use-module (guix build-system cargo)
53 #:use-module (guix build-system cmake)
54 #:use-module (guix build-system go)
55 #:use-module (guix build-system haskell)
56 #:use-module (guix build-system meson)
57 #:use-module (guix build-system ocaml)
58 #:use-module (guix build-system perl)
59 #:use-module (guix build-system python)
60 #:use-module (guix build-system qt)
61 #:use-module (guix build-system r)
62 #:use-module (guix build-system ruby)
63 #:use-module (guix build-system trivial)
64 #:use-module (guix deprecation)
65 #:use-module (gnu packages)
66 #:use-module (gnu packages admin)
67 #:use-module (gnu packages assembly)
68 #:use-module (gnu packages autotools)
69 #:use-module (gnu packages algebra)
70 #:use-module (gnu packages base)
71 #:use-module (gnu packages bash)
72 #:use-module (gnu packages bison)
73 #:use-module (gnu packages bioconductor)
74 #:use-module (gnu packages boost)
75 #:use-module (gnu packages check)
76 #:use-module (gnu packages code)
77 #:use-module (gnu packages cmake)
78 #:use-module (gnu packages compression)
79 #:use-module (gnu packages cpp)
80 #:use-module (gnu packages cpio)
81 #:use-module (gnu packages cran)
82 #:use-module (gnu packages crates-io)
83 #:use-module (gnu packages curl)
84 #:use-module (gnu packages documentation)
85 #:use-module (gnu packages databases)
86 #:use-module (gnu packages datastructures)
87 #:use-module (gnu packages dlang)
88 #:use-module (gnu packages file)
89 #:use-module (gnu packages flex)
90 #:use-module (gnu packages gawk)
91 #:use-module (gnu packages gcc)
92 #:use-module (gnu packages gd)
93 #:use-module (gnu packages golang)
94 #:use-module (gnu packages glib)
95 #:use-module (gnu packages graph)
96 #:use-module (gnu packages graphics)
97 #:use-module (gnu packages graphviz)
98 #:use-module (gnu packages groff)
99 #:use-module (gnu packages gtk)
100 #:use-module (gnu packages guile)
101 #:use-module (gnu packages guile-xyz)
102 #:use-module (gnu packages haskell-check)
103 #:use-module (gnu packages haskell-web)
104 #:use-module (gnu packages haskell-xyz)
105 #:use-module (gnu packages image)
106 #:use-module (gnu packages image-processing)
107 #:use-module (gnu packages imagemagick)
108 #:use-module (gnu packages java)
109 #:use-module (gnu packages java-compression)
110 #:use-module (gnu packages jemalloc)
111 #:use-module (gnu packages jupyter)
112 #:use-module (gnu packages libffi)
113 #:use-module (gnu packages linux)
114 #:use-module (gnu packages llvm)
115 #:use-module (gnu packages logging)
116 #:use-module (gnu packages lsof)
117 #:use-module (gnu packages machine-learning)
118 #:use-module (gnu packages man)
119 #:use-module (gnu packages maths)
120 #:use-module (gnu packages mpi)
121 #:use-module (gnu packages ncurses)
122 #:use-module (gnu packages node)
123 #:use-module (gnu packages ocaml)
124 #:use-module (gnu packages pcre)
125 #:use-module (gnu packages parallel)
126 #:use-module (gnu packages pdf)
127 #:use-module (gnu packages perl)
128 #:use-module (gnu packages perl-check)
129 #:use-module (gnu packages pkg-config)
130 #:use-module (gnu packages popt)
131 #:use-module (gnu packages protobuf)
132 #:use-module (gnu packages python)
133 #:use-module (gnu packages python-build)
134 #:use-module (gnu packages python-check)
135 #:use-module (gnu packages python-compression)
136 #:use-module (gnu packages python-crypto)
137 #:use-module (gnu packages python-science)
138 #:use-module (gnu packages python-web)
139 #:use-module (gnu packages python-xyz)
140 #:use-module (gnu packages qt)
141 #:use-module (gnu packages rdf)
142 #:use-module (gnu packages readline)
143 #:use-module (gnu packages rpc)
144 #:use-module (gnu packages rsync)
145 #:use-module (gnu packages ruby)
146 #:use-module (gnu packages serialization)
147 #:use-module (gnu packages shells)
148 #:use-module (gnu packages skribilo)
149 #:use-module (gnu packages sphinx)
150 #:use-module (gnu packages sqlite)
151 #:use-module (gnu packages statistics)
152 #:use-module (gnu packages swig)
153 #:use-module (gnu packages tbb)
154 #:use-module (gnu packages tex)
155 #:use-module (gnu packages texinfo)
156 #:use-module (gnu packages textutils)
157 #:use-module (gnu packages time)
158 #:use-module (gnu packages tls)
159 #:use-module (gnu packages uglifyjs)
160 #:use-module (gnu packages vim)
161 #:use-module (gnu packages web)
162 #:use-module (gnu packages wget)
163 #:use-module (gnu packages xml)
164 #:use-module (gnu packages xorg)
165 #:use-module (guix platform)
166 #:use-module (srfi srfi-1)
167 #:use-module (srfi srfi-26)
168 #:use-module (ice-9 match))
170 (define-public aragorn
177 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
181 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
182 (build-system gnu-build-system)
184 `(#:tests? #f ; there are no tests
186 (modify-phases %standard-phases
196 (string-append "aragorn" ,version ".c"))
199 (lambda* (#:key outputs #:allow-other-keys)
200 (let* ((out (assoc-ref outputs "out"))
201 (bin (string-append out "/bin"))
202 (man (string-append out "/share/man/man1")))
203 (install-file "aragorn" bin)
204 (install-file "aragorn.1" man))
206 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
207 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
209 "Aragorn identifies transfer RNA, mitochondrial RNA and
210 transfer-messenger RNA from nucleotide sequences, based on homology to known
211 tRNA consensus sequences and RNA structure. It also outputs the secondary
212 structure of the predicted RNA.")
213 (license license:gpl2)))
215 (define-public bamtools
222 (url "https://github.com/pezmaster31/bamtools")
223 (commit (string-append "v" version))))
224 (file-name (git-file-name name version))
227 "0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
228 (build-system cmake-build-system)
230 `(#:tests? #f ;no "check" target
232 (modify-phases %standard-phases
234 'configure 'set-ldflags
235 (lambda* (#:key outputs #:allow-other-keys)
239 (assoc-ref outputs "out") "/lib/bamtools"))
242 (home-page "https://github.com/pezmaster31/bamtools")
243 (synopsis "C++ API and command-line toolkit for working with BAM data")
245 "BamTools provides both a C++ API and a command-line toolkit for handling
247 (license license:expat)))
249 (define-public bamutils
256 (url "https://github.com/statgen/bamUtil")
257 (commit (string-append "v" version))))
258 (file-name (git-file-name name version))
261 "0i2r332k1kz0jysyg89d858wqq59n16lw6dv5qmilcwshb77r9v7"))))
262 (build-system gnu-build-system)
264 `(#:tests? #f ; Unclear how to run tests
266 ,#~(list "USER_WARNINGS=-std=gnu++98" ;
267 (string-append "INSTALLDIR=" #$output "/bin"))
269 (modify-phases %standard-phases
271 (lambda* (#:key inputs #:allow-other-keys)
272 (substitute* "src/Makefile" ;
273 (("^DATE=.*") "DATE=\"1970-01-01\"\n"))
274 (copy-recursively (assoc-ref inputs "libstatgen")
275 "../libStatGen"))))))
283 (url "https://github.com/statgen/libStatGen/")
284 (commit (string-append "v" version))))
285 (file-name (git-file-name "libstatgen" version))
288 "0q9iyk046r4m7qnav8c3f28zsar25lj9nydiklwaswmzdijhi4p1"))))))
289 (home-page "https://genome.sph.umich.edu/wiki/BamUtil")
290 (synopsis "Programs for working on SAM/BAM files")
291 (description "This package provides several programs that perform
292 operations on SAM/BAM files. All of these programs are built into a
293 single executable called @code{bam}.")
294 (license license:gpl3+)))
296 (define-public bcftools
302 (uri (string-append "https://github.com/samtools/bcftools/"
304 version "/bcftools-" version ".tar.bz2"))
307 "1jqrma16fx8kpvb3c0462dg0asvmiv5yi8myqmc5ddgwi6p8ivxp"))
308 (modules '((guix build utils)))
310 ;; Delete bundled htslib.
311 (delete-file-recursively "htslib-1.14")))))
312 (build-system gnu-build-system)
315 (list "--enable-libgsl")
318 (modify-phases %standard-phases
319 (add-before 'check 'patch-tests
321 (substitute* "test/test.pl"
322 (("/bin/bash") (which "bash"))))))))
327 (home-page "https://samtools.github.io/bcftools/")
328 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
330 "BCFtools is a set of utilities that manipulate variant calls in the
331 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
332 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
333 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
334 (license (list license:gpl3+ license:expat))))
336 (define-public bcftools-1.12
337 (package/inherit bcftools
341 (uri (string-append "https://github.com/samtools/bcftools/"
343 version "/bcftools-" version ".tar.bz2"))
346 "1x94l1hy2pi3lbz0sxlbw0g6q5z5apcrhrlcwda94ns9n4r6a3ks"))
347 (modules '((guix build utils)))
349 ;; Delete bundled htslib.
350 (delete-file-recursively "htslib-1.12")))))
351 (native-inputs (list htslib-1.12 perl))))
353 (define-public bcftools-1.10
354 (package/inherit bcftools
358 (uri (string-append "https://github.com/samtools/bcftools/"
360 version "/bcftools-" version ".tar.bz2"))
363 "10xgwfdgqb6dsmr3ndnpb77mc3a38dy8kh2c6czn6wj7jhdp4dra"))
364 (modules '((guix build utils)))
366 ;; Delete bundled htslib.
367 (delete-file-recursively "htslib-1.10")))))
368 (native-inputs (list htslib-1.10 perl))))
370 (define-public bedops
377 (url "https://github.com/bedops/bedops")
378 (commit (string-append "v" version))))
379 (file-name (git-file-name name version))
382 "0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
383 (build-system gnu-build-system)
386 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
388 (modify-phases %standard-phases
389 (add-after 'unpack 'unpack-tarballs
391 ;; FIXME: Bedops includes tarballs of minimally patched upstream
392 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
393 ;; libraries because at least one of the libraries (zlib) is
394 ;; patched to add a C++ function definition (deflateInit2cpp).
395 ;; Until the Bedops developers offer a way to link against system
396 ;; libraries we have to build the in-tree copies of these three
399 ;; See upstream discussion:
400 ;; https://github.com/bedops/bedops/issues/124
402 ;; Unpack the tarballs to benefit from shebang patching.
403 (with-directory-excursion "third-party"
404 (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
405 (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
406 (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
407 ;; Disable unpacking of tarballs in Makefile.
408 (substitute* "system.mk/Makefile.linux"
409 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
410 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
411 (substitute* "third-party/zlib-1.2.7/Makefile.in"
412 (("^SHELL=.*$") "SHELL=bash\n"))
414 (delete 'configure))))
415 (home-page "https://github.com/bedops/bedops")
416 (synopsis "Tools for high-performance genomic feature operations")
418 "BEDOPS is a suite of tools to address common questions raised in genomic
419 studies---mostly with regard to overlap and proximity relationships between
420 data sets. It aims to be scalable and flexible, facilitating the efficient
421 and accurate analysis and management of large-scale genomic data.
423 BEDOPS provides tools that perform highly efficient and scalable Boolean and
424 other set operations, statistical calculations, archiving, conversion and
425 other management of genomic data of arbitrary scale. Tasks can be easily
426 split by chromosome for distributing whole-genome analyses across a
427 computational cluster.")
428 (license license:gpl2+)))
430 (define-public bedtools
436 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
437 "download/v" version "/"
438 "bedtools-" version ".tar.gz"))
441 "1f2hh79l7dn147c2xyfgf5wfjvlqfw32kjfnnh2n1qy6rpzx2fik"))))
442 (build-system gnu-build-system)
444 `(#:test-target "test"
446 ,#~(list (string-append "prefix=" #$output))
448 (modify-phases %standard-phases
449 (delete 'configure))))
451 `(("python" ,python-wrapper)))
453 (list samtools zlib))
454 (home-page "https://github.com/arq5x/bedtools2")
455 (synopsis "Tools for genome analysis and arithmetic")
457 "Collectively, the bedtools utilities are a swiss-army knife of tools for
458 a wide-range of genomics analysis tasks. The most widely-used tools enable
459 genome arithmetic: that is, set theory on the genome. For example, bedtools
460 allows one to intersect, merge, count, complement, and shuffle genomic
461 intervals from multiple files in widely-used genomic file formats such as BAM,
463 (license license:expat)))
465 ;; Later releases of bedtools produce files with more columns than
466 ;; what Ribotaper expects.
467 (define-public bedtools-2.18
468 (package (inherit bedtools)
473 (uri (string-append "https://github.com/arq5x/bedtools2/"
474 "releases/download/v" version
475 "/bedtools-" version ".tar.gz"))
478 "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
480 '(#:test-target "test"
482 (modify-phases %standard-phases
483 (add-after 'unpack 'compatibility
485 (substitute* "src/utils/fileType/FileRecordTypeChecker.h"
486 (("static const float PERCENTAGE")
487 "static constexpr float PERCENTAGE"))
488 (substitute* "src/utils/general/DualQueue.h"
489 (("template <class T, template<class T> class CompareFunc>")
490 "template <class T, template<class U> class CompareFunc>"))))
493 (lambda* (#:key outputs #:allow-other-keys)
494 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
495 (for-each (lambda (file)
496 (install-file file bin))
497 (find-files "bin" ".*"))))))))
499 `(("python" ,python-wrapper)))
501 (list samtools zlib))))
503 (define-public pbcopper
504 ;; This is the latest commit at the time of this writing.
505 (let ((commit "ad4143afd25a0bd6adc977c544865c992a515841")
509 (version (git-version "1.9.0" revision commit))
513 (url "https://github.com/PacificBiosciences/pbcopper")
515 (file-name (git-file-name name version))
518 "1qxkbpdkamfisnk36lpi1vdvf3p1lg2hdqna3xgd94pz52bwbmp7"))))
519 (build-system meson-build-system)
522 (modify-phases %standard-phases
523 (add-after 'unpack 'patch-meson-files
524 (lambda* (#:key inputs #:allow-other-keys)
525 (substitute* "meson.build"
526 (("-msse4.1") "")))))))
530 (list googletest pkg-config))
531 (home-page "https://github.com/PacificBiosciences/pbcopper")
532 (synopsis "Data structures, algorithms, and utilities for PacBio C++ applications")
534 "The pbcopper library provides a suite of data structures, algorithms,
535 and utilities for PacBio C++ applications.")
536 (license license:bsd-3))))
545 (url "https://github.com/PacificBiosciences/pbbam")
546 (commit (string-append "v" version))))
547 (file-name (git-file-name name version))
550 "1avdm5hwhr5ls79017blyalx1npzbf1aa6dgb6j6lg8sq4nk9yyg"))))
551 (build-system meson-build-system)
554 (modify-phases %standard-phases
555 (add-after 'unpack 'patch-tests
556 (lambda* (#:key inputs #:allow-other-keys)
557 ;; Disable this test. I tried fixing it by including
558 ;; optional_io.hpp, but there's a type error.
559 (substitute* "tests/src/meson.build"
560 (("'test_ReadGroupInfo.cpp',") ""))
562 (substitute* "include/pbbam/ReadGroupInfo.h"
563 (("#include <boost/optional.hpp>" m)
564 (string-append m "\n#include <boost/optional/optional_io.hpp>")))
565 (substitute* '("tests/scripts/cram/_test.py"
566 "tests/scripts/cram/_main.py")
568 (string-append "'" (which "sh") "'"))))))))
569 ;; These libraries are listed as "Required" in the pkg-config file.
571 (list htslib pbcopper zlib))
573 (list boost samtools))
575 `(("googletest" ,googletest)
576 ("pkg-config" ,pkg-config)
577 ("python" ,python-wrapper))) ; for tests
578 (home-page "https://github.com/PacificBiosciences/pbbam")
579 (synopsis "Work with PacBio BAM files")
581 "The pbbam software package provides components to create, query, and
582 edit PacBio BAM files and associated indices. These components include a core
583 C++ library, bindings for additional languages, and command-line utilities.
584 This library is not intended to be used as a general-purpose BAM utility - all
585 input and output BAMs must adhere to the PacBio BAM format specification.
586 Non-PacBio BAMs will cause exceptions to be thrown.")
587 (license license:bsd-3)))
589 (define-public pbgzip
590 (let ((commit "2b09f97b5f20b6d83c63a5c6b408d152e3982974"))
593 (version (git-version "0.0.0" "0" commit))
597 (url "https://github.com/nh13/pbgzip")
599 (file-name (git-file-name name version))
602 "1mlmq0v96irbz71bgw5zcc43g1x32zwnxx21a5p1f1ch4cikw1yd"))))
603 (build-system gnu-build-system)
605 (list autoconf automake))
608 (home-page "https://github.com/nh13/pbgzip")
609 (synopsis "Parallel Block GZIP")
610 (description "This package implements parallel block gzip. For many
611 formats, in particular genomics data formats, data are compressed in
612 fixed-length blocks such that they can be easily indexed based on a (genomic)
613 coordinate order, since typically each block is sorted according to this order.
614 This allows for each block to be individually compressed (deflated), or more
615 importantly, decompressed (inflated), with the latter enabling random retrieval
616 of data in large files (gigabytes to terabytes). @code{pbgzip} is not limited
617 to any particular format, but certain features are tailored to genomics data
618 formats when enabled. Parallel decompression is somewhat faster, but the true
619 speedup comes during compression.")
620 (license license:expat))))
622 (define-public blasr-libcpp
624 (name "blasr-libcpp")
629 (url "https://github.com/PacificBiosciences/blasr_libcpp")
631 (file-name (git-file-name name version))
634 "07cdfnfl29zf2j7fpaaqaxghq3p0wnc109razs0icwm2q6l3gycb"))))
635 (build-system meson-build-system)
638 (modify-phases %standard-phases
639 (add-after 'unpack 'link-with-hdf5
640 (lambda* (#:key inputs #:allow-other-keys)
641 (let ((hdf5 (assoc-ref inputs "hdf5")))
642 (substitute* "meson.build"
643 (("libblasr_deps = \\[" m)
646 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
647 cpp.find_library('hdf5_cpp', dirs : '~a'), "
650 (add-after 'unpack 'find-googletest
651 (lambda* (#:key inputs #:allow-other-keys)
652 ;; It doesn't find gtest_main because there's no pkg-config file
653 ;; for it. Find it another way.
654 (substitute* "unittest/meson.build"
655 (("libblasr_gtest_dep = dependency\\('gtest_main'.*")
656 (format #f "cpp = meson.get_compiler('cpp')
657 libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
658 (assoc-ref inputs "googletest"))))
660 ;; TODO: unittest/libblasr_unittest cannot be linked
661 ;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
662 ;; undefined reference to symbol
663 ;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
664 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
665 ;; error adding symbols: DSO missing from command line
667 #:configure-flags '("-Dtests=false")))
669 (list boost hdf5 htslib pbbam zlib))
671 (list googletest pkg-config))
672 (home-page "https://github.com/PacificBiosciences/blasr_libcpp")
673 (synopsis "Library for analyzing PacBio genomic sequences")
675 "This package provides three libraries used by applications for analyzing
676 PacBio genomic sequences. This library contains three sub-libraries: pbdata,
678 (license license:bsd-3)))
687 (url "https://github.com/PacificBiosciences/blasr")
689 (file-name (git-file-name name version))
692 "0axyd06gn2xa0p0k76fihsbxpfxvhlb18jn6bf97c0ii58r1wc0k"))))
693 (build-system meson-build-system)
696 (modify-phases %standard-phases
697 (add-after 'unpack 'link-with-hdf5
698 (lambda* (#:key inputs #:allow-other-keys)
699 (let ((hdf5 (assoc-ref inputs "hdf5")))
700 (substitute* "meson.build"
701 (("blasr_deps = \\[" m)
704 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
705 cpp.find_library('hdf5_cpp', dirs : '~a'), "
707 ;; Tests require "cram" executable, which is not packaged.
709 #:configure-flags '("-Dtests=false")))
711 (list boost blasr-libcpp hdf5 pbbam zlib))
714 (home-page "https://github.com/PacificBiosciences/blasr")
715 (synopsis "PacBio long read aligner")
717 "Blasr is a genomic sequence aligner for processing PacBio long reads.")
718 (license license:bsd-3)))
720 (define-public ribotaper
726 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
727 "files/RiboTaper/RiboTaper_Version_"
731 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
732 (build-system gnu-build-system)
735 (modify-phases %standard-phases
736 (add-after 'install 'wrap-executables
737 (lambda* (#:key inputs outputs #:allow-other-keys)
738 (let* ((out (assoc-ref outputs "out")))
741 (wrap-program (string-append out "/bin/" script)
742 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
743 '("create_annotations_files.bash"
744 "create_metaplots.bash"
745 "Ribotaper_ORF_find.sh"
757 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
758 (synopsis "Define translated ORFs using ribosome profiling data")
760 "Ribotaper is a method for defining translated @dfn{open reading
761 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
762 provides the Ribotaper pipeline.")
763 (license license:gpl3+)))
765 (define-public bioawk
772 (url "https://github.com/lh3/bioawk")
773 (commit (string-append "v" version))))
774 (file-name (git-file-name name version))
777 "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
778 (build-system gnu-build-system)
784 `(#:tests? #f ; There are no tests to run.
785 ;; Bison must generate files, before other targets can build.
788 (modify-phases %standard-phases
789 (delete 'configure) ; There is no configure phase.
791 (lambda* (#:key outputs #:allow-other-keys)
792 (let* ((out (assoc-ref outputs "out"))
793 (bin (string-append out "/bin"))
794 (man (string-append out "/share/man/man1")))
796 (copy-file "awk.1" (string-append man "/bioawk.1"))
797 (install-file "bioawk" bin))
799 (home-page "https://github.com/lh3/bioawk")
800 (synopsis "AWK with bioinformatics extensions")
801 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
802 support of several common biological data formats, including optionally gzip'ed
803 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
804 also adds a few built-in functions and a command line option to use TAB as the
805 input/output delimiter. When the new functionality is not used, bioawk is
806 intended to behave exactly the same as the original BWK awk.")
807 (license license:x11)))
809 (define-public python-cellbender
811 (name "python-cellbender")
817 (url "https://github.com/broadinstitute/CellBender")
818 (commit (string-append "v" version))))
819 (file-name (git-file-name name version))
822 "1zav2q8nnss80i25y06fccagkvrqsy7lpylsl4dxv4qkj8p4fnv3"))))
823 (build-system python-build-system)
825 (list #:tests? #false)) ;there are none
835 python-sphinx-argparse
836 python-sphinx-autodoc-typehints
837 python-sphinx-rtd-theme
838 python-sphinxcontrib-programoutput
840 (home-page "https://cellbender.rtfd.io/")
841 (synopsis "Eliminate technical artifacts from single-cell RNA-seq data")
843 "CellBender is a software package for eliminating technical artifacts
844 from high-throughput single-cell RNA sequencing (scRNA-seq) data.")
845 (license license:bsd-3)))
847 (define-public python-htsget
849 (name "python-htsget")
853 (uri (pypi-uri "htsget" version))
856 "0ic07q85vhw9djf23k57b21my7i5xp400m8gfqgr5gcryqvdr0yk"))))
857 (build-system python-build-system)
859 (list python-setuptools-scm))
861 (list python-humanize python-requests python-six))
862 (home-page "https://pypi.org/project/htsget/")
863 (synopsis "Python API and command line interface for the GA4GH htsget API")
864 (description "This package is a client implementation of the GA4GH htsget
865 protocol. It provides a simple and reliable way to retrieve genomic data from
866 servers supporting the protocol.")
867 (license license:asl2.0)))
869 (define-public python-pybedtools
871 (name "python-pybedtools")
875 (uri (pypi-uri "pybedtools" version))
878 "18rhzk08d3rpxhi5xh6pqg64x6v5q3daw6y3v54k85v4swncjrwj"))))
879 (build-system python-build-system)
881 `(#:modules ((srfi srfi-26)
883 (guix build python-build-system))
884 ;; See https://github.com/daler/pybedtools/issues/192
886 (modify-phases %standard-phases
887 (add-after 'unpack 'disable-broken-tests
889 (substitute* "pybedtools/test/test_helpers.py"
890 ;; Requires internet access.
891 (("def test_chromsizes")
892 "def _do_not_test_chromsizes")
893 ;; Broken as a result of the workaround used in the check phase
894 ;; (see: https://github.com/daler/pybedtools/issues/192).
895 (("def test_getting_example_beds")
896 "def _do_not_test_getting_example_beds"))
897 ;; This issue still occurs on python2
898 (substitute* "pybedtools/test/test_issues.py"
899 (("def test_issue_303")
900 "def _test_issue_303"))))
901 ;; Force the Cythonization of C++ files to guard against compilation
903 (add-after 'unpack 'remove-cython-generated-files
905 (let ((cython-sources (map (cut string-drop-right <> 4)
906 (find-files "." "\\.pyx$")))
907 (c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
908 (define (strip-extension filename)
909 (string-take filename (string-index-right filename #\.)))
910 (define (cythonized? c/c++-file)
911 (member (strip-extension c/c++-file) cython-sources))
912 (for-each delete-file (filter cythonized? c/c++-files)))))
913 (add-after 'remove-cython-generated-files 'generate-cython-extensions
915 (invoke "python" "setup.py" "cythonize")))
918 ;; The tests need to be run from elsewhere...
919 (mkdir-p "/tmp/test")
920 (copy-recursively "pybedtools/test" "/tmp/test")
921 (with-directory-excursion "/tmp/test"
922 (invoke "pytest" "-v" "--doctest-modules")))))))
924 (list bedtools samtools python-matplotlib python-pysam
930 kentutils ; for bedGraphToBigWig
932 ;; For the test suite.
935 (home-page "https://pythonhosted.org/pybedtools/")
936 (synopsis "Python wrapper for BEDtools programs")
938 "This package is a Python wrapper for Aaron Quinlan's BEDtools programs,
939 which are widely used for genomic interval manipulation or \"genome algebra\".
940 pybedtools extends BEDTools by offering feature-level manipulations from with
942 ;; pypi lists GPLv2 in the PKG-INFO and website, but was relicensed in
943 ;; version 0.9.0 and the LICENSE.txt is consistant with the source code.
945 ;; pybedtools/include/gzstream.cpp and pybedtools/include/gzstream.h are
947 (license (list license:expat license:lgpl2.1+))))
949 (define-public python-biom-format
951 (name "python-biom-format")
956 ;; Use GitHub as source because PyPI distribution does not contain
957 ;; test data: https://github.com/biocore/biom-format/issues/693
959 (url "https://github.com/biocore/biom-format")
961 (file-name (git-file-name name version))
964 "0i62j6ksmp78ap2dnl969gq6vprc3q87zc8ksj9if8g2603iq6i8"))
965 (modules '((guix build utils)))
966 ;; Delete generated C files.
968 '(for-each delete-file (find-files "." "\\.c")))))
969 (build-system python-build-system)
972 (modify-phases %standard-phases
973 (add-after 'unpack 'use-cython
974 (lambda _ (setenv "USE_CYTHON" "1")))
975 (add-after 'unpack 'relax
977 (substitute* "setup.py"
978 (("pytest < 5.3.4") "pytest"))))
979 (add-after 'unpack 'disable-broken-tests
981 (substitute* "biom/tests/test_cli/test_validate_table.py"
982 (("^(.+)def test_invalid_hdf5" m indent)
983 (string-append indent
984 "@npt.dec.skipif(True, msg='Guix')\n"
986 (substitute* "biom/tests/test_table.py"
987 (("^(.+)def test_from_hdf5_issue_731" m indent)
988 (string-append indent
989 "@npt.dec.skipif(True, msg='Guix')\n"
1001 (list python-cython python-pytest python-pytest-cov python-nose))
1002 (home-page "http://www.biom-format.org")
1003 (synopsis "Biological Observation Matrix (BIOM) format utilities")
1005 "The BIOM file format is designed to be a general-use format for
1006 representing counts of observations e.g. operational taxonomic units, KEGG
1007 orthology groups or lipid types, in one or more biological samples
1008 e.g. microbiome samples, genomes, metagenomes.")
1009 (license license:bsd-3)))
1011 (define-public python-pairtools
1013 (name "python-pairtools")
1018 (url "https://github.com/mirnylab/pairtools")
1019 (commit (string-append "v" version))))
1020 (file-name (git-file-name name version))
1023 "0gr8y13q7sd6yai6df4aavl2470n1f9s3cib6r473z4hr8hcbwmc"))))
1024 (build-system python-build-system)
1027 (modify-phases %standard-phases
1028 (add-after 'unpack 'fix-references
1030 (substitute* '("pairtools/pairtools_merge.py"
1031 "pairtools/pairtools_sort.py")
1032 (("/bin/bash") (which "bash")))
1035 (lambda* (#:key inputs outputs #:allow-other-keys)
1036 (add-installed-pythonpath inputs outputs)
1037 (with-directory-excursion "/tmp"
1038 (invoke "pytest" "-v")))))))
1040 (list python-cython python-nose python-pytest))
1042 `(("python" ,python-wrapper)))
1044 (list htslib ; for bgzip, looked up in PATH
1045 samtools ; looked up in PATH
1049 (home-page "https://github.com/mirnylab/pairtools")
1050 (synopsis "Process mapped Hi-C data")
1051 (description "Pairtools is a simple and fast command-line framework to
1052 process sequencing data from a Hi-C experiment. Process pair-end sequence
1053 alignments and perform the following operations:
1056 @item detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end
1057 sequences of Hi-C DNA molecules
1058 @item sort @code{.pairs} files for downstream analyses
1059 @item detect, tag and remove PCR/optical duplicates
1060 @item generate extensive statistics of Hi-C datasets
1061 @item select Hi-C pairs given flexibly defined criteria
1062 @item restore @code{.sam} alignments from Hi-C pairs.
1065 (license license:expat)))
1067 (define-public bioperl-minimal
1069 (name "bioperl-minimal")
1075 (url "https://github.com/bioperl/bioperl-live")
1076 (commit (string-append "release-"
1077 (string-map (lambda (c)
1079 #\- c)) version)))))
1080 (file-name (git-file-name name version))
1083 "0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
1084 (build-system perl-build-system)
1086 (let ((transitive-inputs
1087 (map (compose package-name cadr)
1090 (map (compose package-transitive-target-inputs cadr)
1091 (package-inputs this-package)))))))
1093 (modify-phases %standard-phases
1095 'install 'wrap-programs
1096 (lambda* (#:key outputs #:allow-other-keys)
1097 ;; Make sure all executables in "bin" find the required Perl
1098 ;; modules at runtime. As the PERL5LIB variable contains also
1099 ;; the paths of native inputs, we pick the transitive target
1100 ;; inputs from %build-inputs.
1101 (let* ((out (assoc-ref outputs "out"))
1102 (bin (string-append out "/bin/"))
1104 (cons (string-append out "/lib/perl5/site_perl")
1106 (assoc-ref %build-inputs name))
1107 ',transitive-inputs))
1109 (for-each (lambda (file)
1111 `("PERL5LIB" ":" prefix (,path))))
1112 (find-files bin "\\.pl$"))
1115 (list perl-module-build perl-data-stag perl-libwww perl-uri))
1117 (list perl-test-most))
1118 (home-page "https://metacpan.org/release/BioPerl")
1119 (synopsis "Bioinformatics toolkit")
1121 "BioPerl is the product of a community effort to produce Perl code which
1122 is useful in biology. Examples include Sequence objects, Alignment objects
1123 and database searching objects. These objects not only do what they are
1124 advertised to do in the documentation, but they also interact - Alignment
1125 objects are made from the Sequence objects, Sequence objects have access to
1126 Annotation and SeqFeature objects and databases, Blast objects can be
1127 converted to Alignment objects, and so on. This means that the objects
1128 provide a coordinated and extensible framework to do computational biology.")
1129 (license license:perl-license)))
1131 (define-public perl-bio-db-hts
1133 (name "perl-bio-db-hts")
1138 (uri (string-append "mirror://cpan/authors/id/A/AV/AVULLO/Bio-DB-HTS-"
1142 "0hjg0igfkpvh27zdkdr6pa7cqm9n6r7cwz0np74cl4wmawgvr9hj"))))
1143 (build-system perl-build-system)
1145 (list perl-module-build pkg-config))
1147 (list bioperl-minimal htslib-1.9))
1148 (home-page "https://metacpan.org/release/Bio-DB-HTS")
1149 (synopsis "Perl interface to HTS library for DNA sequencing")
1150 (description "This is a Perl interface to the HTS library for DNA
1152 (license license:asl2.0)))
1154 (define-public python-biopython
1156 (name "python-biopython")
1160 ;; use PyPi rather than biopython.org to ease updating
1161 (uri (pypi-uri "biopython" version))
1164 "0wlch9xpa0fpgjzyxi6jsfca6iakaq9a05927xg8vqnmvaccnwrq"))))
1165 (build-system python-build-system)
1168 (modify-phases %standard-phases
1169 (add-before 'check 'set-home
1170 ;; Some tests require a home directory to be set.
1171 (lambda _ (setenv "HOME" "/tmp") #t)))))
1173 (list python-numpy))
1174 (home-page "https://biopython.org/")
1175 (synopsis "Tools for biological computation in Python")
1177 "Biopython is a set of tools for biological computation including parsers
1178 for bioinformatics files into Python data structures; interfaces to common
1179 bioinformatics programs; a standard sequence class and tools for performing
1180 common operations on them; code to perform data classification; code for
1181 dealing with alignments; code making it easy to split up parallelizable tasks
1182 into separate processes; and more.")
1183 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
1185 (define-public python-biopython-1.73
1187 (inherit python-biopython)
1191 ;; use PyPi rather than biopython.org to ease updating
1192 (uri (pypi-uri "biopython" version))
1195 "1q55jhf76z3k6is3psis0ckbki7df26x7dikpcc3vhk1vhkwribh"))))))
1197 (define-public python-fastalite
1199 (name "python-fastalite")
1204 (uri (pypi-uri "fastalite" version))
1207 "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
1208 (build-system python-build-system)
1210 `(#:tests? #f)) ; Test data is not distributed.
1211 (home-page "https://github.com/nhoffman/fastalite")
1212 (synopsis "Simplest possible FASTA parser")
1213 (description "This library implements a FASTA and a FASTQ parser without
1214 relying on a complex dependency tree.")
1215 (license license:expat)))
1217 (define-public biosoup
1225 (url "https://github.com/rvaser/biosoup")
1226 ;; Corresponds to version 0.10.0
1227 (commit "38181f09854ff42cbd9632200a2ec9fb37a4b7b6")))
1228 (file-name (git-file-name name version))
1231 "02hvyka703zagx0nvv2yx3dkc748zc8g6qbrpya7r8kfkcl7y8hw"))))
1232 (build-system cmake-build-system)
1235 (modify-phases %standard-phases
1237 (lambda* (#:key tests? #:allow-other-keys)
1239 (invoke "./bin/biosoup_test")))))))
1242 (home-page "https://github.com/rvaser/biosoup")
1243 (synopsis "C++ support library for bioinformatics tools")
1244 (description "Biosoup is a C++ collection of header-only data structures
1245 used for storage and logging in bioinformatics tools.")
1246 (license license:expat)))
1248 (define-public bioparser
1256 (url "https://github.com/rvaser/bioparser")
1257 ;; Corresponds to tag 3.0.13
1258 (commit "13341e6e0855c6b358ffcea6dad216e1009e1287")))
1259 (file-name (git-file-name name version))
1262 "0c5p2dl8jb12ci9f427jzrmmm9cgvc1k4fxsn2ggkfsin6r1r82i"))))
1263 (build-system cmake-build-system)
1266 (modify-phases %standard-phases
1268 (lambda* (#:key tests? #:allow-other-keys)
1270 (invoke "./bin/bioparser_test")))))))
1277 (home-page "https://github.com/rvaser/bioparser")
1278 (synopsis "C++ library for parsing several formats in bioinformatics")
1279 (description "Bioparser is a C++ header only parsing library for several
1280 bioinformatics formats (FASTA/Q, MHAP/PAF/SAM), with support for zlib
1282 (license license:expat)))
1284 (define-public circtools
1292 (url "https://github.com/Kevinzjy/circtools")
1293 ;; Corresponds to tag v1.0.0
1294 (commit "79380de59013601021ca3b1352d6f64d2fb89646")
1296 (file-name (git-file-name name version))
1299 "0wg1s927g32k25j967kfr8l30nmr4c0p4zvy5igvy7cs6chd60lh"))))
1300 (build-system cargo-build-system)
1303 (modify-phases %standard-phases
1304 (add-after 'unpack 'make-writable
1306 (for-each make-file-writable (find-files "."))))
1307 (add-after 'unpack 'prepare-spoa-dependencies
1308 (lambda* (#:key inputs #:allow-other-keys)
1309 (substitute* "vendor/spoa/CMakeLists.txt"
1310 (("find_package\\(bioparser 3.0.13 QUIET\\)")
1311 "find_package(bioparser 3.0.13 CONFIG)")
1312 (("find_package\\(biosoup 0.10.0 QUIET\\)")
1313 "find_package(biosoup 0.10.0 CONFIG)")
1314 (("GTest_FOUND") "TRUE")))))
1316 (("rust-anyhow" ,rust-anyhow-1)
1317 ("rust-bio" ,rust-bio-0.33)
1318 ("rust-chrono" ,rust-chrono-0.4)
1319 ("rust-docopt" ,rust-docopt-1)
1320 ("rust-flate2" ,rust-flate2-1)
1321 ("rust-indicatif" ,rust-indicatif-0.15)
1322 ("rust-libc" ,rust-libc-0.2)
1323 ("rust-serde" ,rust-serde-1)
1324 ("rust-seq-io" ,rust-seq-io-0.3))))
1326 (list bioparser biosoup))
1328 (list cmake pkg-config googletest))
1329 (home-page "https://github.com/Kevinzjy/circtools")
1330 (synopsis "Accelerating functions in CIRI toolkit")
1331 (description "This package provides accelerated functions for the CIRI
1332 toolkit. It also provides the @code{ccs} executable to scan for circular
1333 consensus sequences.")
1334 (license license:expat)))
1336 (define-public ciri-long
1344 (url "https://github.com/bioinfo-biols/CIRI-long")
1345 (commit (string-append "v" version))))
1346 (file-name (git-file-name name version))
1349 "10k88i1fcqchrrjv82rmylwvbwqfba0n51palhig9hsg71xs0dbi"))
1350 ;; Delete bundled binary
1351 (snippet '(delete-file "libs/ccs"))))
1352 (build-system python-build-system)
1355 (modify-phases %standard-phases
1356 (add-after 'unpack 'relax-requirements
1358 (substitute* "setup.py"
1359 (("'argparse[^']*',") "") ; only for python2
1361 (add-before 'build 'build-libssw
1363 (with-directory-excursion "libs/striped_smith_waterman"
1364 (invoke "make" "libssw.so"))))
1365 (add-before 'build 'fix-reference-to-ccs
1366 (lambda* (#:key inputs #:allow-other-keys)
1367 (substitute* "CIRI_long/pipeline.py"
1370 (assoc-ref inputs "circtools") "/bin/ccs"
1373 (substitute* "CIRI_long/main.py"
1374 (("os.chmod\\(lib_path.*") "")))))))
1388 (list python-cython python-nose python-setuptools))
1389 (home-page "https://ciri-cookbook.readthedocs.io/")
1390 (synopsis "Circular RNA identification for Nanopore sequencing")
1391 (description "CIRI-long is a package for circular RNA identification using
1392 long-read sequencing data.")
1393 (license license:expat)))
1395 (define-public qtltools
1400 (method url-fetch/tarbomb)
1401 (uri (string-append "https://qtltools.github.io/qtltools/"
1402 "binaries/QTLtools_" version
1406 "13gdry5l43abn3464fmk8qzrxgxnxah2612r66p9dzhhl92j30cd"))))
1407 (build-system gnu-build-system)
1409 `(#:tests? #f ; no tests included
1411 ,#~(list (string-append "BOOST_INC="
1412 #$(this-package-input "boost") "/include")
1413 (string-append "BOOST_LIB="
1414 #$(this-package-input "boost") "/lib")
1415 (string-append "HTSLD_INC="
1416 #$(this-package-input "htslib") "/include")
1417 (string-append "HTSLD_LIB="
1418 #$(this-package-input "htslib") "/lib")
1419 (string-append "RMATH_INC="
1420 #$(this-package-input "rmath-standalone")
1422 (string-append "RMATH_LIB="
1423 #$(this-package-input "rmath-standalone")
1426 (modify-phases %standard-phases
1427 (add-after 'unpack 'fix-linkage
1429 (substitute* "qtltools/Makefile"
1430 (("libboost_iostreams.a")
1431 "libboost_iostreams.so")
1432 (("libboost_program_options.a")
1433 "libboost_program_options.so")
1434 (("-lblas") "-lopenblas"))))
1435 (add-before 'build 'chdir
1436 (lambda _ (chdir "qtltools")))
1439 (substitute* "qtltools/Makefile"
1441 "LIB_FLAGS=-lz -lcrypto -lssl")
1442 (("LIB_FILES=\\$\\(RMATH_LIB\\)/libRmath.a \
1443 \\$\\(HTSLD_LIB\\)/libhts.a \
1444 \\$\\(BOOST_LIB\\)/libboost_iostreams.a \
1445 \\$\\(BOOST_LIB\\)/libboost_program_options.a")
1446 "LIB_FILES=$(RMATH_LIB)/libRmath.so \
1447 $(HTSLD_LIB)/libhts.so \
1448 $(BOOST_LIB)/libboost_iostreams.so \
1449 $(BOOST_LIB)/libboost_program_options.so"))))
1451 (lambda* (#:key outputs #:allow-other-keys)
1452 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
1454 (install-file "bin/QTLtools" bin)))))))
1464 (home-page "https://qtltools.github.io/qtltools/")
1465 (synopsis "Tool set for molecular QTL discovery and analysis")
1466 (description "QTLtools is a tool set for molecular QTL discovery
1467 and analysis. It allows going from the raw genetic sequence data to
1468 collection of molecular @dfn{Quantitative Trait Loci} (QTLs) in few
1469 easy-to-perform steps.")
1470 (license license:gpl3+)))
1472 (define-public bpp-core
1473 ;; The last release was in 2014 and the recommended way to install from source
1474 ;; is to clone the git repository, so we do this.
1475 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1476 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
1479 (version (string-append "2.2.0-1." (string-take commit 7)))
1483 (url "http://biopp.univ-montp2.fr/git/bpp-core")
1485 (file-name (string-append name "-" version "-checkout"))
1488 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
1489 (build-system cmake-build-system)
1491 `(#:parallel-build? #f))
1492 (home-page "http://biopp.univ-montp2.fr")
1493 (synopsis "C++ libraries for Bioinformatics")
1495 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1496 analysis, phylogenetics, molecular evolution and population genetics. It is
1497 Object Oriented and is designed to be both easy to use and computer efficient.
1498 Bio++ intends to help programmers to write computer expensive programs, by
1499 providing them a set of re-usable tools.")
1500 (license license:cecill-c))))
1502 (define-public bpp-phyl
1503 ;; The last release was in 2014 and the recommended way to install from source
1504 ;; is to clone the git repository, so we do this.
1505 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1506 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
1509 (version (string-append "2.2.0-1." (string-take commit 7)))
1513 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
1515 (file-name (string-append name "-" version "-checkout"))
1518 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
1519 (build-system cmake-build-system)
1521 `(#:parallel-build? #f
1522 ;; If out-of-source, test data is not copied into the build directory
1523 ;; so the tests fail.
1524 #:out-of-source? #f))
1526 (list bpp-core bpp-seq))
1527 (home-page "http://biopp.univ-montp2.fr")
1528 (synopsis "Bio++ phylogenetic Library")
1530 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1531 analysis, phylogenetics, molecular evolution and population genetics. This
1532 library provides phylogenetics-related modules.")
1533 (license license:cecill-c))))
1535 (define-public bpp-popgen
1536 ;; The last release was in 2014 and the recommended way to install from source
1537 ;; is to clone the git repository, so we do this.
1538 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1539 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
1542 (version (string-append "2.2.0-1." (string-take commit 7)))
1546 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
1548 (file-name (string-append name "-" version "-checkout"))
1551 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
1552 (build-system cmake-build-system)
1554 `(#:parallel-build? #f
1555 #:tests? #f)) ; There are no tests.
1557 (list bpp-core bpp-seq))
1558 (home-page "http://biopp.univ-montp2.fr")
1559 (synopsis "Bio++ population genetics library")
1561 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1562 analysis, phylogenetics, molecular evolution and population genetics. This
1563 library provides population genetics-related modules.")
1564 (license license:cecill-c))))
1566 (define-public bpp-seq
1567 ;; The last release was in 2014 and the recommended way to install from source
1568 ;; is to clone the git repository, so we do this.
1569 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1570 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
1573 (version (string-append "2.2.0-1." (string-take commit 7)))
1577 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
1579 (file-name (string-append name "-" version "-checkout"))
1582 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
1583 (build-system cmake-build-system)
1585 `(#:parallel-build? #f
1586 ;; If out-of-source, test data is not copied into the build directory
1587 ;; so the tests fail.
1588 #:out-of-source? #f))
1591 (home-page "http://biopp.univ-montp2.fr")
1592 (synopsis "Bio++ sequence library")
1594 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1595 analysis, phylogenetics, molecular evolution and population genetics. This
1596 library provides sequence-related modules.")
1597 (license license:cecill-c))))
1599 (define-public bppsuite
1600 ;; The last release was in 2014 and the recommended way to install from source
1601 ;; is to clone the git repository, so we do this.
1602 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1603 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
1606 (version (string-append "2.2.0-1." (string-take commit 7)))
1610 (url "http://biopp.univ-montp2.fr/git/bppsuite")
1612 (file-name (string-append name "-" version "-checkout"))
1615 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
1616 (build-system cmake-build-system)
1618 `(#:parallel-build? #f
1619 #:tests? #f)) ; There are no tests.
1621 (list groff man-db texinfo))
1623 `(("bpp-core" ,bpp-core)
1624 ("bpp-seq" ,bpp-seq)
1625 ("bpp-phyl" ,bpp-phyl)
1626 ("bpp-phyl" ,bpp-popgen)))
1627 (home-page "http://biopp.univ-montp2.fr")
1628 (synopsis "Bioinformatics tools written with the Bio++ libraries")
1630 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1631 analysis, phylogenetics, molecular evolution and population genetics. This
1632 package provides command line tools using the Bio++ library.")
1633 (license license:cecill-c))))
1635 (define-public blast+
1642 "https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
1643 version "/ncbi-blast-" version "+-src.tar.gz"))
1646 "0m0r9vkw631ky1za1wilsfk9k9spwqh22nkrb9a57rbwmrc1i3nq"))
1647 (modules '((guix build utils)))
1650 ;; Remove bundled bzip2, zlib and pcre.
1651 (delete-file-recursively "c++/src/util/compress/bzip2")
1652 (delete-file-recursively "c++/src/util/compress/zlib")
1653 (delete-file-recursively "c++/src/util/regexp")
1654 (substitute* "c++/src/util/compress/Makefile.in"
1655 (("bzip2 zlib api") "api"))
1656 ;; Remove useless msbuild directory
1657 (delete-file-recursively
1658 "c++/src/build-system/project_tree_builder/msbuild")
1660 ;; Build reproducibly.
1661 ;; Do not record the kernel version
1662 (substitute* "c++/src/build-system/configure"
1663 (("kver=.*") "kver=\"\""))
1664 ;; Do not generate random numbers.
1665 (substitute* "c++/scripts/common/impl/define_random_macros.sh"
1666 (("#define NCBI_RANDOM_VALUE_MAX 0xffffffffu" m)
1668 #define NCBI_RANDOM_VALUE_0 2845495105u
1669 #define NCBI_RANDOM_VALUE_1 2158634051u
1670 #define NCBI_RANDOM_VALUE_2 4072202242u
1671 #define NCBI_RANDOM_VALUE_3 902228395u
1672 #define NCBI_RANDOM_VALUE_4 1353323915u
1673 #define NCBI_RANDOM_VALUE_5 574823513u
1674 #define NCBI_RANDOM_VALUE_6 4119501261u
1675 #define NCBI_RANDOM_VALUE_7 2477640938u
1676 #define NCBI_RANDOM_VALUE_8 2776595395u
1677 #define NCBI_RANDOM_VALUE_9 270550684u
1679 (("cksum") "cksum >/dev/null"))))))
1680 (build-system gnu-build-system)
1682 `(;; There are two(!) tests for this massive library, and both fail with
1683 ;; "unparsable timing stats".
1684 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
1685 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
1688 #:parallel-build? #f ; not supported
1690 (modify-phases %standard-phases
1691 (add-before 'configure 'set-HOME
1692 ;; $HOME needs to be set at some point during the configure phase
1693 (lambda _ (setenv "HOME" "/tmp") #t))
1694 (add-after 'unpack 'enter-dir
1695 (lambda _ (chdir "c++") #t))
1696 (add-after 'enter-dir 'fix-build-system
1698 (define (which* cmd)
1699 (cond ((string=? cmd "date")
1700 ;; make call to "date" deterministic
1705 (format (current-error-port)
1706 "WARNING: Unable to find absolute path for ~s~%"
1710 ;; Rewrite hardcoded paths to various tools
1711 (substitute* (append '("src/build-system/configure.ac"
1712 "src/build-system/configure"
1713 "src/build-system/helpers/run_with_lock.c"
1714 "scripts/common/impl/if_diff.sh"
1715 "scripts/common/impl/run_with_lock.sh"
1716 "src/build-system/Makefile.configurables.real"
1717 "src/build-system/Makefile.in.top"
1718 "src/build-system/Makefile.meta.gmake=no"
1719 "src/build-system/Makefile.meta.in"
1720 "src/build-system/Makefile.meta_l"
1721 "src/build-system/Makefile.meta_p"
1722 "src/build-system/Makefile.meta_r"
1723 "src/build-system/Makefile.mk.in"
1724 "src/build-system/Makefile.requirements"
1725 "src/build-system/Makefile.rules_with_autodep.in")
1726 (find-files "scripts/common/check" "\\.sh$"))
1727 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1728 (or (which* cmd) all)))
1730 (substitute* (find-files "src/build-system" "^config.*")
1731 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1734 ;; rewrite "/var/tmp" in check script
1735 (substitute* "scripts/common/check/check_make_unix.sh"
1736 (("/var/tmp") "/tmp"))
1738 ;; do not reset PATH
1739 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1741 (("action=/bin/") "action=")
1742 (("export PATH") ":"))
1745 (lambda* (#:key inputs outputs #:allow-other-keys)
1746 (let ((out (assoc-ref outputs "out"))
1747 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1748 (include (string-append (assoc-ref outputs "include")
1749 "/include/ncbi-tools++")))
1750 ;; The 'configure' script doesn't recognize things like
1751 ;; '--enable-fast-install'.
1752 (invoke "./configure.orig"
1753 (string-append "--with-build-root=" (getcwd) "/build")
1754 (string-append "--prefix=" out)
1755 (string-append "--libdir=" lib)
1756 (string-append "--includedir=" include)
1757 (string-append "--with-bz2="
1758 (assoc-ref inputs "bzip2"))
1759 (string-append "--with-z="
1760 (assoc-ref inputs "zlib"))
1761 (string-append "--with-pcre="
1762 (assoc-ref inputs "pcre"))
1763 ;; Each library is built twice by default, once
1764 ;; with "-static" in its name, and again
1769 (outputs '("out" ; 21 MB
1778 ("python" ,python-wrapper)))
1781 (home-page "https://blast.ncbi.nlm.nih.gov")
1782 (synopsis "Basic local alignment search tool")
1784 "BLAST is a popular method of performing a DNA or protein sequence
1785 similarity search, using heuristics to produce results quickly. It also
1786 calculates an “expect value” that estimates how many matches would have
1787 occurred at a given score by chance, which can aid a user in judging how much
1788 confidence to have in an alignment.")
1789 ;; Most of the sources are in the public domain, with the following
1792 ;; * ./c++/include/util/bitset/
1793 ;; * ./c++/src/html/ncbi_menu*.js
1795 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1797 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1799 ;; * ./c++/src/corelib/teamcity_*
1800 (license (list license:public-domain
1806 (define-public bless
1812 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1816 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1817 (modules '((guix build utils)))
1820 ;; Remove bundled boost, pigz, zlib, and .git directory
1821 ;; FIXME: also remove bundled sources for murmurhash3 and
1822 ;; kmc once packaged.
1823 (delete-file-recursively "boost")
1824 (delete-file-recursively "pigz")
1825 (delete-file-recursively "google-sparsehash")
1826 (delete-file-recursively "zlib")
1827 (delete-file-recursively ".git")))))
1828 (build-system gnu-build-system)
1830 `(#:tests? #f ;no "check" target
1832 ,#~(list (string-append "ZLIB="
1833 #$(this-package-input "zlib")
1835 (string-append "LDFLAGS="
1836 (string-join '("-lboost_filesystem"
1842 (modify-phases %standard-phases
1843 (add-after 'unpack 'do-not-build-bundled-pigz
1844 (lambda* (#:key inputs outputs #:allow-other-keys)
1845 (substitute* "Makefile"
1846 (("cd pigz/pigz-2.3.3; make") ""))))
1847 (add-after 'unpack 'patch-paths-to-executables
1848 (lambda* (#:key inputs outputs #:allow-other-keys)
1849 (substitute* "parse_args.cpp"
1850 (("kmc_binary = .*")
1851 (string-append "kmc_binary = \""
1852 (assoc-ref outputs "out")
1854 (("pigz_binary = .*")
1855 (string-append "pigz_binary = \""
1856 (assoc-ref inputs "pigz")
1859 (lambda* (#:key outputs #:allow-other-keys)
1860 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1861 (for-each (lambda (file)
1862 (install-file file bin))
1863 '("bless" "kmc/bin/kmc")))))
1864 (delete 'configure))))
1868 (list openmpi boost sparsehash pigz zlib))
1869 (supported-systems '("x86_64-linux"))
1870 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1871 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1873 "@dfn{Bloom-filter-based error correction solution for high-throughput
1874 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1875 correction tool for genomic reads produced by @dfn{Next-generation
1876 sequencing} (NGS). BLESS produces accurate correction results with much less
1877 memory compared with previous solutions and is also able to tolerate a higher
1878 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1879 errors at the end of reads.")
1880 (license license:gpl3+)))
1882 (define-public bowtie
1889 (url "https://github.com/BenLangmead/bowtie2")
1890 (commit (string-append "v" version))))
1891 (file-name (git-file-name name version))
1894 "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
1895 (modules '((guix build utils)))
1898 (substitute* "Makefile"
1899 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1900 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1901 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))))))
1902 (build-system gnu-build-system)
1907 (string-append "prefix=" #$output))
1909 (modify-phases %standard-phases
1912 ;; This "extended character" is not considered valid.
1913 (substitute* "processor_support.h"
1919 "scripts/test/simple_tests.pl"
1920 "--bowtie2=./bowtie2"
1921 "--bowtie2-build=./bowtie2-build"))))))
1925 ("python" ,python-wrapper)))
1927 (list perl perl-clone perl-test-deep perl-test-simple))
1928 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1929 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1931 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1932 reads to long reference sequences. It is particularly good at aligning reads
1933 of about 50 up to 100s or 1,000s of characters, and particularly good at
1934 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1935 genome with an FM Index to keep its memory footprint small: for the human
1936 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1937 gapped, local, and paired-end alignment modes.")
1938 (supported-systems '("x86_64-linux"))
1939 (license license:gpl3+)))
1941 (define-public bowtie1
1947 (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
1948 version "/bowtie-" version "-src.zip"))
1951 "11dbihdnrizc6qhx9xsw77w3q5ssx642alaqzvhxx32ak9glvq04"))
1952 (modules '((guix build utils)))
1954 '(substitute* "Makefile"
1955 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1956 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1957 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
1958 (build-system gnu-build-system)
1960 `(#:tests? #f ; no "check" target
1962 ,#~(list "CC=gcc" "all"
1963 (string-append "prefix=" #$output))
1965 (modify-phases %standard-phases
1966 (delete 'configure))))
1968 (list python-wrapper tbb-2020 zlib))
1969 (supported-systems '("x86_64-linux"))
1970 (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
1971 (synopsis "Fast aligner for short nucleotide sequence reads")
1973 "Bowtie is a fast, memory-efficient short read aligner. It aligns short
1974 DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
1975 reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to
1976 keep its memory footprint small: typically about 2.2 GB for the human
1977 genome (2.9 GB for paired-end).")
1978 (license license:artistic2.0)))
1980 (define-public tophat
1987 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1991 "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
1992 (modules '((guix build utils)))
1995 ;; Remove bundled SeqAn and samtools
1996 (delete-file-recursively "src/SeqAn-1.4.2")
1997 (delete-file-recursively "src/samtools-0.1.18")
1999 (build-system gnu-build-system)
2001 '(#:parallel-build? #f ; not supported
2003 (modify-phases %standard-phases
2004 (add-after 'set-paths 'hide-default-gcc
2005 (lambda* (#:key inputs #:allow-other-keys)
2006 (let ((gcc (assoc-ref inputs "gcc")))
2007 ;; Remove the default GCC from CPLUS_INCLUDE_PATH to prevent
2008 ;; conflicts with the GCC 5 input.
2009 (setenv "CPLUS_INCLUDE_PATH"
2011 (delete (string-append gcc "/include/c++")
2012 (string-split (getenv "CPLUS_INCLUDE_PATH") #\:))
2015 (add-after 'unpack 'use-system-samtools
2016 (lambda* (#:key inputs #:allow-other-keys)
2017 (substitute* "src/Makefile.in"
2018 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
2019 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
2020 (("SAMPROG = samtools_0\\.1\\.18") "")
2021 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
2022 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
2023 (substitute* '("src/common.cpp"
2025 (("samtools_0.1.18") (which "samtools")))
2026 (substitute* '("src/common.h"
2027 "src/bam2fastx.cpp")
2028 (("#include \"bam.h\"") "#include <samtools/bam.h>")
2029 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
2030 (substitute* '("src/bwt_map.h"
2032 "src/align_status.h")
2033 (("#include <bam.h>") "#include <samtools/bam.h>")
2034 (("#include <sam.h>") "#include <samtools/sam.h>"))
2037 `(("gcc@5" ,gcc-5))) ;; doesn't build with later versions
2041 ("ncurses" ,ncurses)
2043 ("python" ,python-2)
2044 ("samtools" ,samtools-0.1)
2047 (home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
2048 (synopsis "Spliced read mapper for RNA-Seq data")
2050 "TopHat is a fast splice junction mapper for nucleotide sequence
2051 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
2052 mammalian-sized genomes using the ultra high-throughput short read
2053 aligner Bowtie, and then analyzes the mapping results to identify
2054 splice junctions between exons.")
2055 ;; TopHat is released under the Boost Software License, Version 1.0
2056 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
2057 (license license:boost1.0)))
2066 "https://github.com/lh3/bwa/releases/download/v"
2067 version "/bwa-" version ".tar.bz2"))
2070 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
2071 (build-system gnu-build-system)
2073 '(#:tests? #f ;no "check" target
2074 #:make-flags '("CFLAGS=-fcommon")
2076 (modify-phases %standard-phases
2078 (lambda* (#:key outputs #:allow-other-keys)
2079 (let* ((out (assoc-ref outputs "out"))
2080 (bin (string-append out "/bin"))
2081 (lib (string-append out "/lib"))
2082 (doc (string-append out "/share/doc/bwa"))
2083 (man (string-append out "/share/man/man1")))
2084 (install-file "bwa" bin)
2085 (install-file "libbwa.a" lib)
2086 (install-file "README.md" doc)
2087 (install-file "bwa.1" man))))
2088 ;; no "configure" script
2089 (delete 'configure))))
2090 (inputs (list zlib))
2091 ;; Non-portable SSE instructions are used so building fails on platforms
2092 ;; other than x86_64.
2093 (supported-systems '("x86_64-linux"))
2094 (home-page "http://bio-bwa.sourceforge.net/")
2095 (synopsis "Burrows-Wheeler sequence aligner")
2097 "BWA is a software package for mapping low-divergent sequences against a
2098 large reference genome, such as the human genome. It consists of three
2099 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
2100 designed for Illumina sequence reads up to 100bp, while the rest two for
2101 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
2102 features such as long-read support and split alignment, but BWA-MEM, which is
2103 the latest, is generally recommended for high-quality queries as it is faster
2104 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
2105 70-100bp Illumina reads.")
2106 (license license:gpl3+)))
2108 (define-public bwa-pssm
2109 (package (inherit bwa)
2115 (url "https://github.com/pkerpedjiev/bwa-pssm")
2117 (file-name (git-file-name name version))
2120 "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
2121 (build-system gnu-build-system)
2123 (substitute-keyword-arguments (package-arguments bwa)
2124 ((#:phases phases '%standard-phases)
2125 `(modify-phases ,phases
2126 (add-after 'unpack 'patch-C-error
2128 (substitute* "pssm.c"
2129 (("inline int map") "int map"))))))))
2131 (list gdsl zlib perl))
2132 (home-page "http://bwa-pssm.binf.ku.dk/")
2133 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
2135 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
2136 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
2137 existing aligners it is fast and sensitive. Unlike most other aligners,
2138 however, it is also adaptible in the sense that one can direct the alignment
2139 based on known biases within the data set. It is coded as a modification of
2140 the original BWA alignment program and shares the genome index structure as
2141 well as many of the command line options.")
2142 (license license:gpl3+)))
2144 (define-public bwa-meth
2151 (url "https://github.com/brentp/bwa-meth")
2152 (commit (string-append "v" version))))
2153 (file-name (git-file-name name version))
2156 "0c695lkrr0996zwkibl7324wg2vxmn6522sz30xv4a9gaf0lnbh3"))))
2157 (build-system python-build-system)
2160 (modify-phases %standard-phases
2161 (add-after 'unpack 'keep-references-to-bwa
2162 (lambda* (#:key inputs #:allow-other-keys)
2163 (substitute* "bwameth.py"
2164 (("bwa (mem|index)" _ command)
2165 (string-append (which "bwa") " " command))
2166 ;; There's an ill-advised check for "samtools" on PATH.
2167 (("^checkX.*") "")))))))
2171 (list python-toolshed))
2172 (home-page "https://github.com/brentp/bwa-meth")
2173 (synopsis "Fast and accurante alignment of BS-Seq reads")
2175 "BWA-Meth works for single-end reads and for paired-end reads from the
2176 directional protocol (most common). It uses the method employed by
2177 methylcoder and Bismark of in silico conversion of all C's to T's in both
2178 reference and reads. It recovers the original read (needed to tabulate
2179 methylation) by attaching it as a comment which BWA appends as a tag to the
2180 read. It performs favorably to existing aligners gauged by number of on and
2181 off-target reads for a capture method that targets CpG-rich region.")
2182 (license license:expat)))
2184 (define-public python-bx-python
2186 (name "python-bx-python")
2191 (url "https://github.com/bxlab/bx-python")
2192 (commit "f4e6a5c93e719db69b5798b6fdd9b167da358316")))
2193 (file-name (git-file-name name version))
2196 "0mclahslz34vq9x424jmzsxk0nmpm1j716fa8h3zwr9ssvch7skc"))))
2197 (build-system python-build-system)
2199 (list python-numpy))
2203 (list python-lzo python-nose python-cython))
2204 (home-page "https://github.com/bxlab/bx-python")
2205 (synopsis "Tools for manipulating biological data")
2207 "bx-python provides tools for manipulating biological data, particularly
2208 multiple sequence alignments.")
2209 (license license:expat)))
2211 (define-public python-pyega3
2213 (name "python-pyega3")
2217 (uri (pypi-uri "pyega3" version))
2220 "1k736in8g27rarx65ym9xk50x53zjg75h37bb8ljynxv04rypx2q"))))
2221 (build-system python-build-system)
2223 `(#:tests? #f)) ; The tests require network access.
2225 (list python-psutil python-htsget))
2227 (list python-requests python-tqdm python-urllib3 python-responses))
2228 (home-page "https://github.com/EGA-archive/ega-download-client")
2229 (synopsis "Python client for EGA")
2230 (description "This package is a python-based tool for viewing and
2231 downloading files from authorized EGA datasets. It uses the EGA data API and
2232 has several key features:
2234 @item Files are transferred over secure https connections and received
2235 unencrypted, so no need for decryption after download.
2236 @item Downloads resume from where they left off in the event that the
2237 connection is interrupted.
2238 @item Supports file segmenting and parallelized download of segments,
2239 improving overall performance.
2240 @item After download completes, file integrity is verified using checksums.
2241 @item Implements the GA4GH-compliant htsget protocol for download of genomic
2242 ranges for data files with accompanying index files.
2244 (license license:asl2.0)))
2246 (define-public python-pysam
2248 (name "python-pysam")
2252 ;; Test data is missing on PyPi.
2254 (url "https://github.com/pysam-developers/pysam")
2255 (commit (string-append "v" version))))
2256 (file-name (git-file-name name version))
2259 "042ca27r6634xg2ixgvq1079cp714wmm6ml7bwc1snn0wxxzywfg"))
2260 (modules '((guix build utils)))
2262 ;; FIXME: Unbundle samtools and bcftools.
2263 (delete-file-recursively "htslib")))))
2264 (build-system python-build-system)
2267 (modify-phases %standard-phases
2268 (add-before 'build 'set-flags
2269 (lambda* (#:key inputs #:allow-other-keys)
2270 (setenv "HTSLIB_MODE" "external")
2271 (setenv "HTSLIB_LIBRARY_DIR"
2272 (string-append (assoc-ref inputs "htslib") "/lib"))
2273 (setenv "HTSLIB_INCLUDE_DIR"
2274 (string-append (assoc-ref inputs "htslib") "/include"))
2275 (setenv "LDFLAGS" "-lncurses")
2276 (setenv "CFLAGS" "-D_CURSES_LIB=1")))
2278 (lambda* (#:key tests? #:allow-other-keys)
2280 ;; Step out of source dir so python does not import from CWD.
2281 (with-directory-excursion "tests"
2282 (setenv "HOME" "/tmp")
2283 (invoke "make" "-C" "pysam_data")
2284 (invoke "make" "-C" "cbcf_data")
2285 ;; The FileHTTP test requires network access.
2286 (invoke "pytest" "-k" "not FileHTTP"))))))))
2288 (list htslib)) ; Included from installed header files.
2290 (list ncurses curl zlib))
2294 ;; Dependencies below are are for tests only.
2297 (home-page "https://github.com/pysam-developers/pysam")
2298 (synopsis "Python bindings to the SAMtools C API")
2300 "Pysam is a Python module for reading and manipulating files in the
2301 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
2302 also includes an interface for tabix.")
2303 (license license:expat)))
2305 (define-public python-twobitreader
2307 (name "python-twobitreader")
2312 (url "https://github.com/benjschiller/twobitreader")
2314 (file-name (git-file-name name version))
2317 "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
2318 (build-system python-build-system)
2319 ;; Tests are not included
2320 (arguments '(#:tests? #f))
2322 (list python-sphinx))
2323 (home-page "https://github.com/benjschiller/twobitreader")
2324 (synopsis "Python library for reading .2bit files")
2326 "twobitreader is a Python library for reading .2bit files as used by the
2327 UCSC genome browser.")
2328 (license license:artistic2.0)))
2330 (define-public python-plastid
2332 (name "python-plastid")
2336 (uri (pypi-uri "plastid" version))
2339 "1a7mdky2xw02y88l51f58pqk8039ahdp6sblj3zx58zarmy2pqyl"))))
2340 (build-system python-build-system)
2342 ;; Some test files are not included.
2354 (list python-cython python-nose))
2355 (home-page "https://github.com/joshuagryphon/plastid")
2356 (synopsis "Python library for genomic analysis")
2358 "plastid is a Python library for genomic analysis – in particular,
2359 high-throughput sequencing data – with an emphasis on simplicity.")
2360 (license license:bsd-3)))
2362 (define-public tetoolkit
2369 (url "https://github.com/mhammell-laboratory/TEtranscripts")
2371 (file-name (git-file-name name version))
2374 "1m3xsydakhdan9gp9mfdz7llka5g6ak91d0mbl1cmmxq9qs6an4y"))))
2375 (build-system python-build-system)
2378 (modify-phases %standard-phases
2379 (add-after 'unpack 'adjust-requirements
2381 (substitute* "setup.py"
2382 ;; This defunct dependency isn't required for Python 3 (see:
2383 ;; https://github.com/mhammell-laboratory/TEtranscripts/issues/111).
2384 ((".*'argparse'.*") ""))))
2385 (add-after 'unpack 'patch-invocations
2386 (lambda* (#:key inputs #:allow-other-keys)
2387 (substitute* '("bin/TEtranscripts"
2390 (string-append "'" (search-input-file inputs "bin/sort") " "))
2392 (string-append "'" (search-input-file inputs "bin/rm") " -f "))
2394 (string-append "'" (search-input-file inputs "bin/Rscript")
2396 (substitute* "TEToolkit/IO/ReadInputs.py"
2398 (search-input-file inputs "bin/bamToBed")))
2399 (substitute* "TEToolkit/Normalization.py"
2401 (string-append "\"" (search-input-file inputs "bin/Rscript")
2403 (add-after 'install 'wrap-program
2404 (lambda* (#:key outputs #:allow-other-keys)
2405 ;; Make sure the executables find R packages.
2406 (for-each (lambda (script)
2407 (wrap-program script
2408 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
2409 (list (search-input-file outputs "bin/TEtranscripts")
2410 (search-input-file outputs "bin/TEcount"))))))))
2418 (home-page "https://github.com/mhammell-laboratory/TEtranscripts")
2419 (synopsis "Transposable elements in differential enrichment analysis")
2421 "This is package for including transposable elements in differential
2422 enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
2423 RNA-seq (and similar data) and annotates reads to both genes and transposable
2424 elements. TEtranscripts then performs differential analysis using DESeq2.
2425 Note that TEtranscripts and TEcount rely on specially curated GTF files, which
2426 are not included due to their size.")
2427 (license license:gpl3+)))
2429 (define-public cd-hit
2435 (uri (string-append "https://github.com/weizhongli/cdhit"
2436 "/releases/download/V" version
2438 "-2019-0228.tar.gz"))
2441 "1phmfhgcpyfd6kj7jwzw976613lcpv1wc2pzfdfaxla062x2s5r6"))))
2442 (build-system gnu-build-system)
2445 #:tests? #f ; there are no tests
2447 ;; Executables are copied directly to the PREFIX.
2448 #~(list (string-append "PREFIX=" #$output "/bin")
2449 ;; Support longer sequences (e.g. Pacbio sequences)
2452 '(modify-phases %standard-phases
2453 ;; No "configure" script
2455 ;; Remove sources of non-determinism
2456 (add-after 'unpack 'be-timeless
2458 (substitute* "cdhit-utility.c++"
2459 ((" \\(built on \" __DATE__ \"\\)") ""))
2460 (substitute* "cdhit-common.c++"
2461 (("__DATE__") "\"0\"")
2462 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))))
2463 ;; The "install" target does not create the target directory.
2464 (add-before 'install 'create-target-dir
2465 (lambda* (#:key outputs #:allow-other-keys)
2466 (mkdir-p (string-append (assoc-ref outputs "out") "/bin")))))))
2469 (home-page "http://weizhongli-lab.org/cd-hit/")
2470 (synopsis "Cluster and compare protein or nucleotide sequences")
2472 "CD-HIT is a program for clustering and comparing protein or nucleotide
2473 sequences. CD-HIT is designed to be fast and handle extremely large
2475 ;; The manual says: "It can be copied under the GNU General Public License
2476 ;; version 2 (GPLv2)."
2477 (license license:gpl2)))
2479 (define-public cd-hit-auxtools
2482 (name "cd-hit-auxtools")
2485 #:tests? #f ; there are no tests
2487 #~(modify-phases %standard-phases
2488 (add-after 'unpack 'chdir (lambda _ (chdir "cd-hit-auxtools")))
2489 ;; No "configure" script
2491 ;; There is no install target.
2494 (for-each (lambda (file)
2495 (install-file file (string-append #$output "/bin")))
2496 '("cd-hit-dup" "cd-hit-lap" "read-linker")))))))
2499 (define-public clipper
2506 (url "https://github.com/YeoLab/clipper")
2508 (file-name (git-file-name name version))
2511 "0508rgnfjk5ar5d1mjbjyrnarv4kw9ksq0m3jw2bmgabmb5v6ikk"))
2512 (modules '((guix build utils)))
2515 ;; Delete pre-compiled files.
2516 (delete-file "clipper/src/peaks.so")))))
2517 (build-system python-build-system)
2521 (modify-phases %standard-phases
2522 (add-after 'unpack 'use-python3-for-cython
2524 (substitute* "setup.py"
2527 peaks.cython_directives = {'language_level': '3'}
2528 readsToWiggle.cython_directives = {'language_level': '3'}
2530 (add-after 'unpack 'disable-nondeterministic-test
2532 ;; This test fails/succeeds non-deterministically.
2533 (substitute* "clipper/test/test_call_peak.py"
2534 (("test_get_FDR_cutoff_mean") "_test_get_FDR_cutoff_mean"))))
2535 ;; This doesn't work because "usage" is executed, and that calls
2538 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
2540 (add-installed-pythonpath inputs outputs)
2541 (with-directory-excursion "clipper/test"
2542 (invoke "python" "-m" "unittest")))))
2543 ;; This is not a library
2544 (delete 'sanity-check))))
2556 (list python-setuptools-git
2557 python-mock ; for tests
2558 python-nose ; for tests
2559 python-pytz)) ; for tests
2560 (home-page "https://github.com/YeoLab/clipper")
2561 (synopsis "CLIP peak enrichment recognition")
2563 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
2564 (license license:gpl2)))
2566 (define-public codingquarry
2568 (name "codingquarry")
2573 "mirror://sourceforge/codingquarry/CodingQuarry_v"
2577 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
2578 (build-system gnu-build-system)
2580 '(#:tests? #f ; no "check" target
2582 (modify-phases %standard-phases
2585 (lambda* (#:key outputs #:allow-other-keys)
2586 (let* ((out (assoc-ref outputs "out"))
2587 (bin (string-append out "/bin"))
2588 (doc (string-append out "/share/doc/codingquarry")))
2589 (install-file "INSTRUCTIONS.pdf" doc)
2590 (copy-recursively "QuarryFiles"
2591 (string-append out "/QuarryFiles"))
2592 (install-file "CodingQuarry" bin)
2593 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
2595 (inputs (list openmpi))
2596 (native-search-paths
2597 (list (search-path-specification
2598 (variable "QUARRY_PATH")
2599 (files '("QuarryFiles")))))
2600 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
2601 (synopsis "Fungal gene predictor")
2602 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
2603 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
2604 (home-page "https://sourceforge.net/projects/codingquarry/")
2605 (license license:gpl3+)))
2607 (define-public clustal-omega
2609 (name "clustal-omega")
2613 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
2617 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
2618 (build-system gnu-build-system)
2621 (home-page "http://www.clustal.org/omega/")
2622 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
2624 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
2625 program for protein and DNA/RNA. It produces high quality MSAs and is capable
2626 of handling data-sets of hundreds of thousands of sequences in reasonable
2628 (license license:gpl2+)))
2630 (define-public crossmap
2636 (uri (pypi-uri "CrossMap" version))
2639 "0hqminh5wn1p3x481jbyc7gmncp5xc196hpvki7k25vzbryhwcix"))
2640 (modules '((guix build utils)))
2643 ;; Delete compiled Python files.
2644 (for-each delete-file (find-files "." "\\.pyc$"))
2645 (delete-file-recursively ".eggs")))))
2646 (build-system python-build-system)
2648 (list python-bx-python python-numpy python-pybigwig python-pysam
2651 (list python-cython python-nose))
2652 (home-page "http://crossmap.sourceforge.net/")
2653 (synopsis "Convert genome coordinates between assemblies")
2655 "CrossMap is a program for conversion of genome coordinates or annotation
2656 files between different genome assemblies. It supports most commonly used
2657 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
2658 (license license:gpl2+)))
2660 (define-public python-dnaio
2662 (name "python-dnaio")
2667 (uri (pypi-uri "dnaio" version))
2670 "14v5yyasq2bz34j38wi3xfcp06jj7l35ppibjcn95l2n73hz3zwi"))))
2671 (build-system python-build-system)
2673 (list python-cython python-pytest python-xopen))
2674 (home-page "https://github.com/marcelm/dnaio/")
2675 (synopsis "Read FASTA and FASTQ files efficiently")
2677 "dnaio is a Python library for fast parsing of FASTQ and also FASTA
2678 files. The code was previously part of the cutadapt tool.")
2679 (license license:expat)))
2681 (define-public python-deeptoolsintervals
2683 (name "python-deeptoolsintervals")
2687 (uri (pypi-uri "deeptoolsintervals" version))
2690 "1xnl80nblysj6dylj4683wgrfa425rkx4dp5k65hvwdns9pw753x"))))
2691 (build-system python-build-system)
2694 (home-page "https://github.com/deeptools/deeptools_intervals")
2695 (synopsis "Create GTF-based interval trees with associated meta-data")
2697 "This package provides a Python module creating/accessing GTF-based
2698 interval trees with associated meta-data. It is primarily used by the
2699 @code{deeptools} package.")
2700 (license license:expat)))
2702 (define-public python-deeptools
2704 (name "python-deeptools")
2709 (url "https://github.com/deeptools/deepTools")
2711 (file-name (git-file-name name version))
2714 "0l09vyynz6s6w7fnyd94rpys4a6aja6kp4gli64pngdxdz3md1nl"))))
2715 (build-system python-build-system)
2717 (list python-mock python-nose))
2719 (list python-matplotlib
2726 python-deeptoolsintervals
2727 python-plotly-2.4.1))
2728 (home-page "https://pypi.org/project/deepTools/")
2729 (synopsis "Useful tools for exploring deep sequencing data")
2730 (description "This package addresses the challenge of handling large amounts
2731 of data that are now routinely generated from DNA sequencing centers.
2732 @code{deepTools} contains useful modules to process the mapped reads data for
2733 multiple quality checks, creating normalized coverage files in standard bedGraph
2734 and bigWig file formats, that allow comparison between different files. Finally,
2735 using such normalized and standardized files, deepTools can create many
2736 publication-ready visualizations to identify enrichments and for functional
2737 annotations of the genome.")
2738 ;; The file deeptools/cm.py is licensed under the BSD license. The
2739 ;; remainder of the code is licensed under the MIT license.
2740 (license (list license:bsd-3 license:expat))))
2742 (define-deprecated deeptools python-deeptools)
2744 (define-public cutadapt
2750 (uri (pypi-uri "cutadapt" version))
2753 "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
2754 (build-system python-build-system)
2757 (modify-phases %standard-phases
2758 (add-after 'unpack 'always-cythonize
2760 (delete-file "src/cutadapt/_align.c")
2761 ;; If PKG-INFO exists, setup.py decides not to run Cython.
2762 (substitute* "setup.py"
2763 (("os.path.exists\\('PKG-INFO'\\):")
2764 "os.path.exists('totally-does-not-exist'):")))))))
2766 (list python-dnaio python-xopen))
2768 (list python-cython python-pytest python-setuptools-scm))
2769 (home-page "https://cutadapt.readthedocs.io/en/stable/")
2770 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
2772 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
2773 other types of unwanted sequence from high-throughput sequencing reads.")
2774 (license license:expat)))
2776 (define-public libbigwig
2783 (url "https://github.com/dpryan79/libBigWig")
2785 (file-name (git-file-name name version))
2788 "09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
2789 (build-system gnu-build-system)
2791 `(#:test-target "test"
2792 #:tests? #f ; tests require access to the web
2795 (string-append "prefix=" #$output))
2797 (modify-phases %standard-phases
2798 (delete 'configure))))
2802 `(("doxygen" ,doxygen)
2804 ("python" ,python-2)))
2805 (home-page "https://github.com/dpryan79/libBigWig")
2806 (synopsis "C library for handling bigWig files")
2808 "This package provides a C library for parsing local and remote BigWig
2810 (license license:expat)))
2812 (define-public python-pybigwig
2814 (name "python-pybigwig")
2818 (uri (pypi-uri "pyBigWig" version))
2821 "157x6v48y299zm382krf1dw08fdxg95im8lnabhp5vc94s04zxj1"))
2822 (modules '((guix build utils)))
2825 ;; Delete bundled libBigWig sources
2826 (delete-file-recursively "libBigWig")
2828 (build-system python-build-system)
2831 (modify-phases %standard-phases
2832 (add-after 'unpack 'link-with-libBigWig
2833 (lambda* (#:key inputs #:allow-other-keys)
2834 (substitute* "setup.py"
2835 (("libs=\\[") "libs=[\"BigWig\", "))
2838 (list python-numpy))
2840 (list libbigwig zlib curl))
2841 (home-page "https://github.com/dpryan79/pyBigWig")
2842 (synopsis "Access bigWig files in Python using libBigWig")
2844 "This package provides Python bindings to the libBigWig library for
2845 accessing bigWig files.")
2846 (license license:expat)))
2848 (define-public python-schema-salad
2850 (name "python-schema-salad")
2851 (version "8.2.20211116214159")
2855 (uri (pypi-uri "schema-salad" version))
2858 "005dh2y45x92zl8sf2sqjmfvcqr4hrz8dfckgkckv87003v7lwqc"))))
2859 (build-system python-build-system)
2862 (modify-phases %standard-phases
2863 (add-before 'check 'skip-failing-tests
2865 ;; Skip tests that require network access.
2866 (substitute* "schema_salad/tests/test_cwl11.py"
2867 (("^def test_(secondaryFiles|outputBinding)" all)
2868 (string-append "@pytest.mark.skip(reason="
2869 "\"test requires network access\")\n"
2872 (list python-cachecontrol
2876 python-rdflib-jsonld
2879 python-typing-extensions))
2881 (list python-black python-pytest python-pytest-runner))
2882 (home-page "https://github.com/common-workflow-language/schema_salad")
2883 (synopsis "Schema Annotations for Linked Avro Data (SALAD)")
2885 "Salad is a schema language for describing JSON or YAML structured linked
2886 data documents. Salad schema describes rules for preprocessing, structural
2887 validation, and hyperlink checking for documents described by a Salad schema.
2888 Salad supports rich data modeling with inheritance, template specialization,
2889 object identifiers, object references, documentation generation, code
2890 generation, and transformation to RDF. Salad provides a bridge between document
2891 and record oriented data modeling and the Semantic Web.")
2892 (license license:asl2.0)))
2894 (define-public cwltool
2897 (version "3.1.20220119140128")
2901 (url "https://github.com/common-workflow-language/cwltool")
2903 (file-name (git-file-name name version))
2906 "1jmrm0qrqgka79avc1kq63fgh20gx6g07fc8p3iih4k85vhdyl3f"))))
2907 (build-system python-build-system)
2910 (modify-phases %standard-phases
2911 (add-after 'unpack 'loosen-version-restrictions
2913 (substitute* "setup.py"
2914 (("== 1.5.1") ">=1.5.1")))) ; prov
2915 (add-after 'unpack 'dont-use-git
2917 (substitute* "gittaggers.py"
2918 (("self.git_timestamp_tag\\(\\)")
2919 (string-append "time.strftime('.%Y%m%d%H%M%S', time.gmtime(int("
2920 (string-drop ,version 4) ")))")))))
2921 (add-after 'unpack 'modify-tests
2923 ;; Tries to connect to the internet.
2924 (delete-file "tests/test_content_type.py")
2925 (delete-file "tests/test_udocker.py")
2926 (delete-file "tests/test_http_input.py")
2927 (substitute* "tests/test_load_tool.py"
2928 (("def test_load_graph_fragment_from_packed")
2929 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2930 "def test_load_graph_fragment_from_packed")))
2931 (substitute* "tests/test_examples.py"
2932 (("def test_env_filtering")
2933 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2934 "def test_env_filtering")))
2935 ;; Tries to use cwl-runners.
2936 (substitute* "tests/test_examples.py"
2937 (("def test_v1_0_arg_empty_prefix_separate_false")
2938 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2939 "def test_v1_0_arg_empty_prefix_separate_false")))
2941 (substitute* '("cwltool/schemas/v1.1/tests/env-tool1.cwl"
2942 "cwltool/schemas/v1.1/tests/env-tool2.cwl"
2943 "cwltool/schemas/v1.1/tests/imported-hint.cwl"
2944 "tests/subgraph/env-tool2.cwl"
2945 "tests/subgraph/env-tool2_req.cwl"
2946 "tests/subgraph/env-wf2_subwf-packed.cwl"
2947 "tests/subgraph/env-tool2_no_env.cwl")
2948 (("\"/bin/sh\"") (string-append "\"" (which "sh") "\"")))
2949 ;; Pytest doesn't know what to do with "-n auto"
2950 (substitute* "tox.ini"
2951 (("-n auto") "")))))))
2953 (list python-argcomplete
2956 python-mypy-extensions
2965 python-typing-extensions
2966 ;; Not listed as needed but still necessary:
2970 python-humanfriendly
2975 python-pytest-runner))
2977 "https://github.com/common-workflow-language/common-workflow-language")
2978 (synopsis "Common Workflow Language reference implementation")
2980 "This is the reference implementation of the @acronym{CWL, Common Workflow
2981 Language} standards. The CWL open standards are for describing analysis
2982 workflows and tools in a way that makes them portable and scalable across a
2983 variety of software and hardware environments, from workstations to cluster,
2984 cloud, and high performance computing (HPC) environments. CWL is designed to
2985 meet the needs of data-intensive science, such as Bioinformatics, Medical
2986 Imaging, Astronomy, Physics, and Chemistry. The @acronym{cwltool, CWL reference
2987 implementation} is intended to be feature complete and to provide comprehensive
2988 validation of CWL files as well as provide other tools related to working with
2990 (license license:asl2.0)))
2992 (define-public python-dendropy
2994 (name "python-dendropy")
2999 ;; Source from GitHub so that tests are included.
3001 (url "https://github.com/jeetsukumaran/DendroPy")
3002 (commit (string-append "v" version))))
3003 (file-name (git-file-name name version))
3006 "0lrfzjqzbpk1rrra9vd7z2j7q09jy9w1ss7wn2rd85i4k5y3xz8l"))))
3007 (build-system python-build-system)
3010 (modify-phases %standard-phases
3011 (add-after 'unpack 'skip-broken-tests
3013 ;; These tests fail because we have no "paup" executable.
3014 (substitute* "tests/test_datamodel_split_bitmasks.py"
3015 (((format #false "(~{~a~^|~})"
3017 "test_basic_split_counting_under_different_rootings"
3018 "test_basic_split_count_with_incorrect_weight_treatment_raises_error"
3019 "test_basic_split_count_with_incorrect_rootings_raises_error")) m)
3020 (string-append "_skip_" m)))
3021 (delete-file "tests/test_paup.py")
3022 (delete-file "tests/test_dataio_nexml_reader_tree_list.py")
3023 ;; Assert error for unknown reasons
3024 (substitute* "tests/test_protractedspeciation.py"
3025 (("test_by_num_lineages" m)
3026 (string-append "_skip_" m))))))))
3027 (home-page "https://dendropy.org/")
3028 (synopsis "Library for phylogenetics and phylogenetic computing")
3030 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
3031 writing, simulation, processing and manipulation of phylogenetic
3032 trees (phylogenies) and characters.")
3033 (license license:bsd-3)))
3035 (define-public python-py2bit
3037 (name "python-py2bit")
3042 (uri (pypi-uri "py2bit" version))
3045 "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
3046 (build-system python-build-system)
3047 (home-page "https://github.com/dpryan79/py2bit")
3048 (synopsis "Access 2bit files using lib2bit")
3050 "This package provides Python bindings for lib2bit to access 2bit files
3052 (license license:expat)))
3054 (define-public delly
3061 (url "https://github.com/dellytools/delly")
3062 (commit (string-append "v" version))))
3063 (file-name (git-file-name name version))
3065 (base32 "1ibnplgfzj96w8glkx17v7sld3pm402fr5ybmf3h0rlcryabxrqy"))
3066 (modules '((guix build utils)))
3069 (delete-file-recursively "src/htslib")
3071 (build-system gnu-build-system)
3073 `(#:tests? #f ; There are no tests to run.
3075 ,#~(list "PARALLEL=1" ; Allow parallel execution at run-time.
3076 (string-append "prefix=" #$output))
3078 (modify-phases %standard-phases
3079 (delete 'configure) ; There is no configure phase.
3080 (add-after 'install 'install-templates
3081 (lambda* (#:key outputs #:allow-other-keys)
3082 (let ((templates (string-append (assoc-ref outputs "out")
3083 "/share/delly/templates")))
3085 (copy-recursively "excludeTemplates" templates)
3088 (list boost bzip2 htslib zlib))
3089 (home-page "https://github.com/dellytools/delly")
3090 (synopsis "Integrated structural variant prediction method")
3091 (description "Delly is an integrated structural variant prediction method
3092 that can discover and genotype deletions, tandem duplications, inversions and
3093 translocations at single-nucleotide resolution in short-read massively parallel
3094 sequencing data. It uses paired-ends and split-reads to sensitively and
3095 accurately delineate genomic rearrangements throughout the genome.")
3096 (license license:gpl3+)))
3105 (url "https://github.com/Benson-Genomics-Lab/TRF")
3106 (commit (string-append "v" version))))
3107 (file-name (git-file-name name version))
3109 (base32 "0fhwr4s1mf8nw8fr5imwjvjr42b59p97zr961ifm8xl1bajz4wpg"))))
3110 (build-system gnu-build-system)
3111 (home-page "https://github.com/Benson-Genomics-Lab/TRF")
3112 (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
3113 (description "A tandem repeat in DNA is two or more adjacent, approximate
3114 copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
3115 locate and display tandem repeats in DNA sequences. In order to use the
3116 program, the user submits a sequence in FASTA format. The output consists of
3117 two files: a repeat table file and an alignment file. Submitted sequences may
3118 be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
3119 bases are detected.")
3120 (license license:agpl3+)))
3122 (define-public repeat-masker
3124 (name "repeat-masker")
3125 (version "4.1.2-p1")
3128 (uri (string-append "http://www.repeatmasker.org/"
3129 "RepeatMasker/RepeatMasker-"
3132 (base32 "15hfdfpzmdjcx7ng7rjfid69bmvgn3z9g9r43qhjnhjhq3v4prab"))))
3133 (build-system gnu-build-system)
3135 `(#:tests? #false ; there are none
3137 (modify-phases %standard-phases
3140 (lambda* (#:key inputs outputs #:allow-other-keys)
3141 (let ((share (string-append (assoc-ref outputs "out")
3142 "/share/RepeatMasker")))
3144 (copy-recursively "." share)
3145 (with-directory-excursion share
3146 (invoke "perl" "configure"
3147 "--trf_prgm" (which "trf")
3149 (string-append (assoc-ref inputs "hmmer")
3152 (lambda* (#:key outputs #:allow-other-keys)
3153 (let* ((out (assoc-ref outputs "out"))
3154 (share (string-append out "/share/RepeatMasker"))
3155 (bin (string-append out "/bin"))
3156 (path (getenv "PERL5LIB")))
3157 (install-file (string-append share "/RepeatMasker") bin)
3158 (wrap-program (string-append bin "/RepeatMasker")
3159 `("PERL5LIB" ":" prefix (,path ,share)))))))))
3167 (home-page "https://github.com/Benson-Genomics-Lab/TRF")
3168 (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
3169 (description "A tandem repeat in DNA is two or more adjacent, approximate
3170 copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
3171 locate and display tandem repeats in DNA sequences. In order to use the
3172 program, the user submits a sequence in FASTA format. The output consists of
3173 two files: a repeat table file and an alignment file. Submitted sequences may
3174 be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
3175 bases are detected.")
3176 (license license:osl2.1)))
3178 (define-public diamond
3185 (url "https://github.com/bbuchfink/diamond")
3186 (commit (string-append "v" version))))
3187 (file-name (git-file-name name version))
3190 "0k6f3kb6cniw11xw6763kkbs1sl0yack7xsy7q5fl5v170ssphq4"))))
3191 (build-system cmake-build-system)
3193 '(#:tests? #f ; no "check" target
3195 (modify-phases %standard-phases
3196 (add-after 'unpack 'remove-native-compilation
3198 (substitute* "CMakeLists.txt" (("-march=native") ""))
3202 (home-page "https://github.com/bbuchfink/diamond")
3203 (synopsis "Accelerated BLAST compatible local sequence aligner")
3205 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
3206 translated DNA query sequences against a protein reference database (BLASTP
3207 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
3208 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
3209 data and settings.")
3210 (license license:agpl3+)))
3212 (define-public discrover
3220 (url "https://github.com/maaskola/discrover")
3222 (file-name (git-file-name name version))
3225 "173fwi2vb6a5kp406hm3jj6j7v4whww796f2qcygp4rpvamh307y"))))
3226 (build-system cmake-build-system)
3228 `(#:tests? #f ; there are no tests
3230 (modify-phases %standard-phases
3231 (add-before 'build 'set-force-source-date
3232 ;; for reproducible dates, texlive needs this to respect respect
3233 ;; SOURCE_DATE_EPOCH
3235 (setenv "FORCE_SOURCE_DATE" "1")))
3236 (add-after 'unpack 'fix-latex-errors
3238 (with-fluids ((%default-port-encoding #f))
3239 (substitute* "doc/references.bib"
3240 (("\\{S\\}illanp[^,]+,")
3241 "{S}illanp{\\\"a}{\\\"a},")))
3242 ;; XXX: I just can't get pdflatex to not complain about these
3243 ;; characters. They end up in the manual via the generated
3244 ;; discrover-cli-help.txt.
3245 (substitute* "src/hmm/cli.cpp"
3249 ;; This seems to be a syntax error.
3250 (substitute* "doc/discrover-manual.tex"
3251 (("theverbbox\\[t\\]") "theverbbox"))))
3252 (add-after 'unpack 'add-missing-includes
3254 (substitute* "src/executioninformation.hpp"
3255 (("#define EXECUTIONINFORMATION_HPP" line)
3256 (string-append line "\n#include <random>")))
3257 (substitute* "src/plasma/fasta.hpp"
3258 (("#define FASTA_HPP" line)
3259 (string-append line "\n#include <random>"))))))))
3261 (list boost cairo rmath-standalone))
3263 `(("texlive" ,(texlive-updmap.cfg (list texlive-cm
3267 texlive-latex-examplep
3270 texlive-latex-natbib
3271 texlive-bibtex ; style files used by natbib
3273 texlive-latex-verbatimbox)))
3274 ("imagemagick" ,imagemagick)))
3275 (home-page "https://dorina.mdc-berlin.de/public/rajewsky/discrover/")
3276 (synopsis "Discover discriminative nucleotide sequence motifs")
3277 (description "Discrover is a motif discovery method to find binding sites
3278 of nucleic acid binding proteins.")
3279 (license license:gpl3+)))
3281 (define-public eigensoft
3289 (url "https://github.com/DReichLab/EIG")
3290 (commit (string-append "v" version))))
3291 (file-name (git-file-name name version))
3294 "1c141fqvhnzibmnf22sv23vbmzm20kjjyrib44cfh75wyndp2d9k"))
3295 (modules '((guix build utils)))
3296 ;; Remove pre-built binaries.
3298 (delete-file-recursively "bin")
3301 (build-system gnu-build-system)
3303 `(#:tests? #f ; There are no tests.
3304 #:make-flags '("CC=gcc")
3306 (modify-phases %standard-phases
3307 ;; There is no configure phase, but the Makefile is in a
3310 (lambda _ (chdir "src") #t))
3311 ;; The provided install target only copies executables to
3312 ;; the "bin" directory in the build root.
3313 (add-after 'install 'actually-install
3314 (lambda* (#:key outputs #:allow-other-keys)
3315 (let* ((out (assoc-ref outputs "out"))
3316 (bin (string-append out "/bin")))
3317 (for-each (lambda (file)
3318 (install-file file bin))
3319 (find-files "../bin" ".*"))
3322 (list gsl lapack openblas perl
3323 `(,gfortran "lib")))
3324 (home-page "https://github.com/DReichLab/EIG")
3325 (synopsis "Tools for population genetics")
3326 (description "The EIGENSOFT package provides tools for population
3327 genetics and stratification correction. EIGENSOFT implements methods commonly
3328 used in population genetics analyses such as PCA, computation of Tracy-Widom
3329 statistics, and finding related individuals in structured populations. It
3330 comes with a built-in plotting script and supports multiple file formats and
3331 quantitative phenotypes.")
3332 ;; The license of the eigensoft tools is Expat, but since it's
3333 ;; linking with the GNU Scientific Library (GSL) the effective
3334 ;; license is the GPL.
3335 (license license:gpl3+)))
3337 (define-public edirect
3340 (version "13.3.20200128")
3343 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
3344 "/versions/" version
3345 "/edirect-" version ".tar.gz"))
3348 "093zp7klv81ph0y8mm8d78a9hnpfxbv2kdym70gzdf3vz176rw33"))
3349 (modules '((guix build utils)))
3351 '(begin (delete-file "Mozilla-CA.tar.gz")
3352 (substitute* "rchive.go"
3353 ;; This go library does not have any license.
3354 (("github.com/fiam/gounidecode/unidecode")
3355 "golang.org/rainycape/unidecode"))
3357 (build-system perl-build-system)
3360 (modify-phases %standard-phases
3363 (delete 'check) ; simple check after install
3364 (add-after 'unpack 'patch-programs
3365 (lambda* (#:key inputs #:allow-other-keys)
3366 ;; Ignore errors about missing xtract.Linux and rchive.Linux.
3367 (substitute* "pm-refresh"
3368 (("cat \\\"\\$target")
3369 "grep ^[[:digit:]] \"$target"))
3372 (lambda* (#:key inputs outputs #:allow-other-keys)
3373 (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
3374 (edirect-go (assoc-ref inputs "edirect-go-programs")))
3377 (install-file file bin))
3378 '("archive-pubmed" "asp-cp" "asp-ls" "download-ncbi-data"
3379 "download-pubmed" "edirect.pl" "efetch" "epost" "esearch"
3380 "fetch-pubmed" "ftp-cp" "ftp-ls" "has-asp" "index-pubmed"
3381 "pm-prepare" "pm-refresh" "pm-stash" "pm-collect"
3382 "pm-index" "pm-invert" "pm-merge" "pm-promote"))
3383 (symlink (string-append edirect-go "/bin/xtract.Linux")
3384 (string-append bin "/xtract"))
3385 (symlink (string-append edirect-go "/bin/rchive.Linux")
3386 (string-append bin "/rchive")))
3388 (add-after 'install 'wrap-program
3389 (lambda* (#:key outputs #:allow-other-keys)
3390 ;; Make sure everything can run in a pure environment.
3391 (let ((out (assoc-ref outputs "out"))
3392 (path (getenv "PERL5LIB")))
3396 `("PERL5LIB" ":" prefix (,path)))
3398 `("PATH" ":" prefix (,(string-append out "/bin")
3399 ,(dirname (which "sed"))
3400 ,(dirname (which "gzip"))
3401 ,(dirname (which "grep"))
3402 ,(dirname (which "perl"))
3403 ,(dirname (which "uname"))))))
3404 (find-files out ".")))
3406 (add-after 'wrap-program 'check
3407 (lambda* (#:key outputs #:allow-other-keys)
3408 (invoke (string-append (assoc-ref outputs "out")
3413 (list edirect-go-programs
3424 perl-lwp-protocol-https
3430 (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
3431 (synopsis "Tools for accessing the NCBI's set of databases")
3433 "Entrez Direct (EDirect) is a method for accessing the National Center
3434 for Biotechnology Information's (NCBI) set of interconnected
3435 databases (publication, sequence, structure, gene, variation, expression,
3436 etc.) from a terminal. Functions take search terms from command-line
3437 arguments. Individual operations are combined to build multi-step queries.
3438 Record retrieval and formatting normally complete the process.
3440 EDirect also provides an argument-driven function that simplifies the
3441 extraction of data from document summaries or other results that are returned
3442 in structured XML format. This can eliminate the need for writing custom
3443 software to answer ad hoc questions.")
3444 (native-search-paths
3445 ;; Ideally this should be set for LWP somewhere.
3446 (list (search-path-specification
3447 (variable "PERL_LWP_SSL_CA_FILE")
3448 (file-type 'regular)
3450 (files '("/etc/ssl/certs/ca-certificates.crt")))))
3451 (license license:public-domain)))
3453 (define-public edirect-go-programs
3456 (name "edirect-go-programs")
3457 (build-system go-build-system)
3459 `(#:install-source? #f
3460 #:tests? #f ; No tests.
3461 #:import-path "ncbi.nlm.nih.gov/entrez/edirect"
3463 (modify-phases %standard-phases
3465 (lambda* (#:key import-path #:allow-other-keys)
3466 (with-directory-excursion (string-append "src/" import-path)
3467 (invoke "go" "build" "-v" "-x" "j2x.go")
3468 (invoke "go" "build" "-v" "-x" "t2x.go")
3469 (invoke "go" "build" "-v" "-x" "-o"
3470 "xtract.Linux" "xtract.go" "common.go")
3471 (invoke "go" "build" "-v" "-x" "-o"
3472 "rchive.Linux" "rchive.go" "common.go")
3473 (invoke "go" "build" "-v" "-x" "-o" "symbols.Linux" "s2p.go"))))
3475 (lambda* (#:key outputs import-path #:allow-other-keys)
3476 (let ((dest (string-append (assoc-ref outputs "out") "/bin"))
3477 (source (string-append "src/" import-path "/")))
3478 (for-each (lambda (file)
3479 (format #t "installing ~a~%" file)
3480 (install-file (string-append source file) dest))
3481 '("j2x" "t2x" "symbols.Linux" "xtract.Linux" "rchive.Linux"))
3484 (propagated-inputs '())
3486 (list go-github-com-fatih-color
3487 go-github-com-fogleman-gg
3488 go-github-com-gedex-inflector
3489 go-github-com-golang-freetype
3490 go-github-com-klauspost-cpuid
3491 go-github-com-pbnjay-memory
3492 go-github-com-surgebase-porter2
3493 go-golang-org-rainycape-unidecode
3494 go-golang-org-x-image
3495 go-golang-org-x-text))))
3497 (define-public exonerate
3506 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
3507 "exonerate-" version ".tar.gz"))
3510 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
3511 (build-system gnu-build-system)
3513 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
3519 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
3520 (synopsis "Generic tool for biological sequence alignment")
3522 "Exonerate is a generic tool for pairwise sequence comparison. It allows
3523 the alignment of sequences using a many alignment models, either exhaustive
3524 dynamic programming or a variety of heuristics.")
3525 (license license:gpl3)))
3527 (define-public express
3534 (url "https://github.com/adarob/eXpress")
3536 (file-name (git-file-name name version))
3539 "18nb22n7x820fzjngf4qgyb3mspqkw7xyk7v7s5ps6wfrd8qwscb"))))
3540 (build-system cmake-build-system)
3542 `(#:tests? #f ;no "check" target
3544 (modify-phases %standard-phases
3545 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
3546 (lambda* (#:key inputs #:allow-other-keys)
3547 (substitute* "CMakeLists.txt"
3548 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
3549 "set(Boost_USE_STATIC_LIBS OFF)")
3550 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
3551 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
3552 (substitute* "src/CMakeLists.txt"
3553 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
3554 (string-append (assoc-ref inputs "bamtools") "/lib"))
3555 (("libprotobuf.a") "libprotobuf.so"))
3557 (add-after 'unpack 'remove-update-check
3559 (substitute* "src/main.cpp"
3560 (("#include \"update_check.h\"") "")
3561 (("check_version\\(PACKAGE_VERSION\\);") ""))
3564 (list boost bamtools protobuf zlib))
3565 (home-page "http://bio.math.berkeley.edu/eXpress")
3566 (synopsis "Streaming quantification for high-throughput genomic sequencing")
3568 "eXpress is a streaming tool for quantifying the abundances of a set of
3569 target sequences from sampled subsequences. Example applications include
3570 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
3571 analysis (from RNA-Seq), transcription factor binding quantification in
3572 ChIP-Seq, and analysis of metagenomic data.")
3573 (license license:artistic2.0)))
3575 (define-public express-beta-diversity
3577 (name "express-beta-diversity")
3582 (url "https://github.com/dparks1134/ExpressBetaDiversity")
3583 (commit (string-append "v" version))))
3584 (file-name (git-file-name name version))
3587 "0s0yzg5c21349rh7x4w9266jsvnp7j1hp9cf8sk32hz8nvrj745x"))))
3588 (build-system gnu-build-system)
3591 (modify-phases %standard-phases
3593 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
3595 (lambda _ (invoke "../bin/ExpressBetaDiversity" "-u") #t))
3597 (lambda* (#:key outputs #:allow-other-keys)
3598 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3599 (install-file "../scripts/convertToEBD.py" bin)
3600 (install-file "../bin/ExpressBetaDiversity" bin)
3603 `(("python" ,python-2)))
3604 (home-page "https://github.com/dparks1134/ExpressBetaDiversity")
3605 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
3607 "Express Beta Diversity (EBD) calculates ecological beta diversity
3608 (dissimilarity) measures between biological communities. EBD implements a
3609 variety of diversity measures including those that make use of phylogenetic
3610 similarity of community members.")
3611 (license license:gpl3+)))
3613 (define-public fasttree
3620 "http://www.microbesonline.org/fasttree/FastTree-"
3624 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
3625 (build-system gnu-build-system)
3627 `(#:tests? #f ; no "check" target
3629 (modify-phases %standard-phases
3633 (lambda* (#:key source #:allow-other-keys)
3636 "-finline-functions"
3647 "-finline-functions"
3656 (lambda* (#:key outputs #:allow-other-keys)
3657 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3658 (install-file "FastTree" bin)
3659 (install-file "FastTreeMP" bin)
3661 (home-page "http://www.microbesonline.org/fasttree")
3662 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
3664 "FastTree can handle alignments with up to a million of sequences in a
3665 reasonable amount of time and memory. For large alignments, FastTree is
3666 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
3667 (license license:gpl2+)))
3669 (define-public fastx-toolkit
3671 (name "fastx-toolkit")
3677 "https://github.com/agordon/fastx_toolkit/releases/download/"
3678 version "/fastx_toolkit-" version ".tar.bz2"))
3681 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
3682 (build-system gnu-build-system)
3684 (list libgtextutils))
3686 (list gcc-6 ;; doesn't build with later versions
3688 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
3689 (synopsis "Tools for FASTA/FASTQ file preprocessing")
3691 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
3692 FASTA/FASTQ files preprocessing.
3694 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
3695 containing multiple short-reads sequences. The main processing of such
3696 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
3697 is sometimes more productive to preprocess the files before mapping the
3698 sequences to the genome---manipulating the sequences to produce better mapping
3699 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
3700 (license license:agpl3+)))
3702 (define-public flexbar
3709 (url "https://github.com/seqan/flexbar")
3710 (commit (string-append "v" version))))
3711 (file-name (git-file-name name version))
3714 "1pq9sxvdnldl14libk234m72dqhwgzs3acgl943wchwdqlcsi5r2"))))
3715 (build-system cmake-build-system)
3718 (modify-phases %standard-phases
3719 (add-after 'unpack 'do-not-tune-to-CPU
3721 (substitute* "src/CMakeLists.txt"
3722 ((" -march=native") ""))))
3724 (lambda* (#:key outputs #:allow-other-keys)
3725 (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
3726 (with-directory-excursion "../source/test"
3727 (invoke "bash" "flexbar_test.sh"))))
3729 (lambda* (#:key outputs #:allow-other-keys)
3730 (let* ((out (string-append (assoc-ref outputs "out")))
3731 (bin (string-append out "/bin/")))
3732 (install-file "flexbar" bin)))))))
3734 (list tbb-2020 zlib))
3736 (list pkg-config seqan-2))
3737 (home-page "https://github.com/seqan/flexbar")
3738 (synopsis "Barcode and adapter removal tool for sequencing platforms")
3740 "Flexbar preprocesses high-throughput nucleotide sequencing data
3741 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
3742 Moreover, trimming and filtering features are provided. Flexbar increases
3743 read mapping rates and improves genome and transcriptome assemblies. It
3744 supports next-generation sequencing data in fasta/q and csfasta/q format from
3745 Illumina, Roche 454, and the SOLiD platform.")
3746 (license license:bsd-3)))
3748 (define-public fxtract
3749 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
3757 (url "https://github.com/ctSkennerton/fxtract")
3759 (file-name (git-file-name name version))
3762 "0hab3gpwf4w9s87qlbswq6ws1qqybh4dcqk79q1ahyldzai5fgp5"))))
3763 (build-system gnu-build-system)
3765 `(#:make-flags ,#~(list
3766 (string-append "PREFIX=" #$output)
3768 #:test-target "fxtract_test"
3770 (modify-phases %standard-phases
3772 (add-before 'build 'copy-util
3773 (lambda* (#:key inputs #:allow-other-keys)
3775 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
3777 ;; Do not use make install as this requires additional dependencies.
3779 (lambda* (#:key outputs #:allow-other-keys)
3780 (let* ((out (assoc-ref outputs "out"))
3781 (bin (string-append out"/bin")))
3782 (install-file "fxtract" bin)
3787 ;; ctskennerton-util is licensed under GPL2.
3788 `(("ctskennerton-util"
3792 (url "https://github.com/ctSkennerton/util")
3793 (commit util-commit)))
3794 (file-name (string-append
3795 "ctstennerton-util-" util-commit "-checkout"))
3798 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
3799 (home-page "https://github.com/ctSkennerton/fxtract")
3800 (synopsis "Extract sequences from FASTA and FASTQ files")
3802 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
3803 or FASTQ) file given a subsequence. It uses a simple substring search for
3804 basic tasks but can change to using POSIX regular expressions, PCRE, hash
3805 lookups or multi-pattern searching as required. By default fxtract looks in
3806 the sequence of each record but can also be told to look in the header,
3807 comment or quality sections.")
3808 ;; 'util' requires SSE instructions.
3809 (supported-systems '("x86_64-linux"))
3810 (license license:expat))))
3812 (define-public gemma
3819 (url "https://github.com/genetics-statistics/GEMMA")
3821 (file-name (git-file-name name version))
3824 "1p8a7kkfn1mmrg017aziy544aha8i9h6wd1x2dk3w2794wl33qb7"))
3825 (modules '((guix build utils)))
3828 (delete-file-recursively "contrib")
3830 (build-system gnu-build-system)
3832 (list gsl openblas zlib))
3834 `(("catch" ,catch2-1)
3836 ("shunit2" ,shunit2)
3840 (modify-phases %standard-phases
3842 (add-after 'unpack 'prepare-build
3843 (lambda* (#:key inputs #:allow-other-keys)
3845 (substitute* "Makefile"
3846 (("/usr/local/opt/openblas")
3847 (assoc-ref inputs "openblas")))
3850 (lambda* (#:key tests? #:allow-other-keys)
3852 ;; 'make slow-check' expects shunit2-2.0.3.
3853 (with-directory-excursion "test"
3854 (invoke "./test_suite.sh"))
3857 (lambda* (#:key outputs #:allow-other-keys)
3858 (install-file "bin/gemma"
3859 (string-append (assoc-ref outputs "out") "/bin"))
3861 (home-page "https://github.com/genetics-statistics/GEMMA")
3862 (synopsis "Tool for genome-wide efficient mixed model association")
3864 "@acronym{GEMMA, Genome-wide Efficient Mixed Model Association} provides a
3865 standard linear mixed model resolver with application in @acronym{GWAS,
3866 genome-wide association studies}.")
3867 (license license:gpl3)))
3869 (define-public hisat
3876 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
3877 version "-beta-source.zip"))
3880 "177z85vqp9b30vgxl5py5hz4mm37ila37nzhfam23ci9iyfxgyv9"))))
3881 (build-system gnu-build-system)
3883 `(#:tests? #f ;no check target
3884 #:make-flags '("allall"
3885 ;; Disable unsupported `popcnt' instructions on
3886 ;; architectures other than x86_64
3887 ,@(if (string-prefix? "x86_64"
3888 (or (%current-target-system)
3891 '("POPCNT_CAPABILITY=0")))
3893 (modify-phases %standard-phases
3894 (add-after 'unpack 'patch-sources
3896 ;; XXX Cannot use snippet because zip files are not supported
3897 (substitute* "Makefile"
3898 (("^CC = .*$") "CC = gcc")
3899 (("^CPP = .*$") "CPP = g++")
3900 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
3901 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
3902 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
3903 (substitute* '("hisat-build" "hisat-inspect")
3904 (("/usr/bin/env") (which "env")))
3905 ;; This "extended character" is not considered valid.
3906 (substitute* "processor_support.h"
3910 (lambda* (#:key outputs #:allow-other-keys)
3911 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3912 (for-each (lambda (file)
3913 (install-file file bin))
3916 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))))
3917 (delete 'configure))))
3921 (list perl python zlib))
3922 ;; Non-portable SSE instructions are used so building fails on platforms
3923 ;; other than x86_64.
3924 (supported-systems '("x86_64-linux"))
3925 (home-page "https://ccb.jhu.edu/software/hisat/index.shtml")
3926 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
3928 "HISAT is a fast and sensitive spliced alignment program for mapping
3929 RNA-seq reads. In addition to one global FM index that represents a whole
3930 genome, HISAT uses a large set of small FM indexes that collectively cover the
3931 whole genome. These small indexes (called local indexes) combined with
3932 several alignment strategies enable effective alignment of RNA-seq reads, in
3933 particular, reads spanning multiple exons.")
3934 (license license:gpl3+)))
3936 (define-public hisat2
3944 (url "https://github.com/DaehwanKimLab/hisat2/")
3945 (commit (string-append "v" version))))
3946 (file-name (git-file-name name version))
3949 "0lmzdhzjkvxw7n5w40pbv5fgzd4cz0f9pxczswn3d4cr0k10k754"))))
3950 (build-system gnu-build-system)
3952 `(#:tests? #f ; no check target
3953 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
3954 #:modules ((guix build gnu-build-system)
3958 (modify-phases %standard-phases
3959 (add-after 'unpack 'make-deterministic
3961 (substitute* "Makefile"
3964 (add-before 'build 'build-manual
3967 (invoke "make" "doc")))
3969 (lambda* (#:key outputs #:allow-other-keys)
3970 (let* ((out (assoc-ref outputs "out"))
3971 (bin (string-append out "/bin/"))
3972 (doc (string-append out "/share/doc/hisat2/")))
3974 (cut install-file <> bin)
3976 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
3978 (install-file "doc/manual.inc.html" doc)))))))
3980 (list perl pandoc)) ; for documentation
3982 `(("python" ,python-wrapper)))
3983 (home-page "https://daehwankimlab.github.io/hisat2/")
3984 (synopsis "Graph-based alignment of genomic sequencing reads")
3985 (description "HISAT2 is a fast and sensitive alignment program for mapping
3986 next-generation sequencing reads (both DNA and RNA) to a population of human
3987 genomes (as well as to a single reference genome). In addition to using one
3988 global @dfn{graph FM} (GFM) index that represents a population of human
3989 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
3990 the whole genome. These small indexes, combined with several alignment
3991 strategies, enable rapid and accurate alignment of sequencing reads. This new
3992 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
3993 ;; HISAT2 contains files from Bowtie2, which is released under
3994 ;; GPLv2 or later. The HISAT2 source files are released under
3996 (license license:gpl3+)))
3998 (define-public hmmer
4006 "http://eddylab.org/software/hmmer/hmmer-" version ".tar.gz"))
4009 "0s9wf6n0qanbx8qs6igfl3vyjikwbrvh4d9d6mv54yp3xysykzlj"))))
4010 (build-system gnu-build-system)
4011 (native-inputs (list perl python)) ; for tests
4012 (home-page "http://hmmer.org/")
4013 (synopsis "Biosequence analysis using profile hidden Markov models")
4015 "HMMER is used for searching sequence databases for homologs of protein
4016 sequences, and for making protein sequence alignments. It implements methods
4017 using probabilistic models called profile hidden Markov models (profile
4019 ;; hmmer uses non-portable SSE intrinsics so building fails on other
4021 (supported-systems '("x86_64-linux" "i686-linux"))
4022 (license license:bsd-3)))
4024 (define-public htseq
4028 ;; Sources on pypi do not include everything needed to run the tests.
4032 (url "https://github.com/htseq/htseq")
4033 (commit (string-append "release_" version))))
4034 (file-name (git-file-name name version))
4037 "1kbr4ydjjhizz6r5m3xd4f0wj7qnn8zs0vnzghhgaa0yhbya5r19"))))
4038 (build-system python-build-system)
4042 '(modify-phases %standard-phases
4043 ;; Avoid rebuilding the extension. Everything is built during the
4044 ;; 'install phase anyway.
4047 (lambda* (#:key tests? #:allow-other-keys)
4049 (invoke "pytest" "-v")))))))
4051 (list python-matplotlib
4060 (home-page "https://github.com/htseq")
4061 (synopsis "Framework for analyzing high-throughput sequencing data")
4063 "This package provides a framework to process and analyze data from
4064 high-throughput sequencing (HTS) assays")
4065 (license license:gpl3+)))
4067 (define-public java-htsjdk
4069 (name "java-htsjdk")
4070 (version "2.3.0") ; last version without build dependency on gradle
4074 (url "https://github.com/samtools/htsjdk")
4076 (file-name (git-file-name name version))
4079 "1b178ixcabanm834ydjl3jiakpyxdmki32hqfv2abrzn3rcwa28i"))
4080 (modules '((guix build utils)))
4082 ;; Delete pre-built binaries
4084 (delete-file-recursively "lib")
4087 (build-system ant-build-system)
4089 `(#:tests? #f ; test require Internet access
4092 ,#~(list (string-append "-Ddist=" #$output "/share/java/htsjdk/"))
4093 #:build-target "all"
4095 (modify-phases %standard-phases
4096 ;; The build phase also installs the jars
4097 (delete 'install))))
4099 `(("java-ngs" ,java-ngs)
4100 ("java-snappy-1" ,java-snappy-1)
4101 ("java-commons-compress" ,java-commons-compress)
4102 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
4103 ("java-commons-jexl-2" ,java-commons-jexl-2)
4104 ("java-xz" ,java-xz)))
4107 (home-page "http://samtools.github.io/htsjdk/")
4108 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
4110 "HTSJDK is an implementation of a unified Java library for accessing
4111 common file formats, such as SAM and VCF, used for high-throughput
4112 sequencing (HTS) data. There are also an number of useful utilities for
4113 manipulating HTS data.")
4114 (license license:expat)))
4116 (define-public java-htsjdk-latest
4118 (name "java-htsjdk")
4123 (url "https://github.com/samtools/htsjdk")
4125 (file-name (string-append name "-" version "-checkout"))
4128 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
4129 (build-system ant-build-system)
4131 `(#:tests? #f ; test require Scala
4133 #:jar-name "htsjdk.jar"
4135 (modify-phases %standard-phases
4136 (add-after 'unpack 'remove-useless-build.xml
4137 (lambda _ (delete-file "build.xml") #t))
4138 ;; The tests require the scalatest package.
4139 (add-after 'unpack 'remove-tests
4140 (lambda _ (delete-file-recursively "src/test") #t)))))
4142 `(("java-ngs" ,java-ngs)
4143 ("java-snappy-1" ,java-snappy-1)
4144 ("java-commons-compress" ,java-commons-compress)
4145 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
4146 ("java-commons-jexl-2" ,java-commons-jexl-2)
4147 ("java-xz" ,java-xz)))
4150 (home-page "http://samtools.github.io/htsjdk/")
4151 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
4153 "HTSJDK is an implementation of a unified Java library for accessing
4154 common file formats, such as SAM and VCF, used for high-throughput
4155 sequencing (HTS) data. There are also an number of useful utilities for
4156 manipulating HTS data.")
4157 (license license:expat)))
4159 ;; This is needed for picard 2.10.3
4160 (define-public java-htsjdk-2.10.1
4161 (package (inherit java-htsjdk-latest)
4162 (name "java-htsjdk")
4167 (url "https://github.com/samtools/htsjdk")
4169 (file-name (string-append name "-" version "-checkout"))
4172 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
4173 (build-system ant-build-system)
4175 `(#:tests? #f ; tests require Scala
4177 #:jar-name "htsjdk.jar"
4179 (modify-phases %standard-phases
4180 (add-after 'unpack 'remove-useless-build.xml
4181 (lambda _ (delete-file "build.xml") #t))
4182 ;; The tests require the scalatest package.
4183 (add-after 'unpack 'remove-tests
4184 (lambda _ (delete-file-recursively "src/test") #t)))))))
4186 ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
4187 ;; recent version of java-htsjdk, which depends on gradle.
4188 (define-public java-picard
4190 (name "java-picard")
4195 (url "https://github.com/broadinstitute/picard")
4197 (file-name (string-append "java-picard-" version "-checkout"))
4200 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
4201 (modules '((guix build utils)))
4204 ;; Delete pre-built binaries.
4205 (delete-file-recursively "lib")
4207 (substitute* "build.xml"
4208 ;; Remove build-time dependency on git.
4209 (("failifexecutionfails=\"true\"")
4210 "failifexecutionfails=\"false\"")
4212 (("depends=\"compile-htsjdk, ")
4214 (("depends=\"compile-htsjdk-tests, ")
4216 ;; Build picard-lib.jar before building picard.jar
4217 (("name=\"picard-jar\" depends=\"" line)
4218 (string-append line "picard-lib-jar, ")))
4220 (build-system ant-build-system)
4222 `(#:build-target "picard-jar"
4223 #:test-target "test"
4224 ;; Tests require jacoco:coverage.
4227 ,#~(list (string-append "-Dhtsjdk_lib_dir="
4228 #$(this-package-input "java-htsjdk")
4229 "/share/java/htsjdk/")
4230 "-Dhtsjdk-classes=dist/tmp"
4231 (string-append "-Dhtsjdk-version="
4232 #$(package-version java-htsjdk)))
4235 (modify-phases %standard-phases
4236 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
4237 (delete 'generate-jar-indices)
4238 (add-after 'unpack 'use-our-htsjdk
4239 (lambda* (#:key inputs #:allow-other-keys)
4240 (substitute* "build.xml"
4241 (("\\$\\{htsjdk\\}/lib")
4242 (search-input-directory inputs
4243 "share/java/htsjdk")))))
4244 (add-after 'unpack 'make-test-target-independent
4245 (lambda* (#:key inputs #:allow-other-keys)
4246 (substitute* "build.xml"
4247 (("name=\"test\" depends=\"compile, ")
4248 "name=\"test\" depends=\""))
4250 (replace 'install (install-jars "dist")))))
4252 (list java-htsjdk java-guava))
4255 (home-page "http://broadinstitute.github.io/picard/")
4256 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
4257 (description "Picard is a set of Java command line tools for manipulating
4258 high-throughput sequencing (HTS) data and formats. Picard is implemented
4259 using the HTSJDK Java library to support accessing file formats that are
4260 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
4262 (license license:expat)))
4264 ;; This is needed for dropseq-tools
4265 (define-public java-picard-2.10.3
4267 (name "java-picard")
4272 (url "https://github.com/broadinstitute/picard")
4274 (file-name (string-append "java-picard-" version "-checkout"))
4277 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
4278 (build-system ant-build-system)
4280 `(#:jar-name "picard.jar"
4281 ;; Tests require jacoco:coverage.
4284 #:main-class "picard.cmdline.PicardCommandLine"
4285 #:modules ((guix build ant-build-system)
4287 (guix build java-utils)
4292 (modify-phases %standard-phases
4293 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
4294 (delete 'generate-jar-indices)
4295 (add-after 'unpack 'remove-useless-build.xml
4296 (lambda _ (delete-file "build.xml") #t))
4297 ;; This is necessary to ensure that htsjdk is found when using
4298 ;; picard.jar as an executable.
4299 (add-before 'build 'edit-classpath-in-manifest
4300 (lambda* (#:key inputs #:allow-other-keys)
4301 (chmod "build.xml" #o664)
4302 (call-with-output-file "build.xml.new"
4306 (with-input-from-file "build.xml"
4307 (lambda _ (xml->sxml #:trim-whitespace? #t)))
4308 `((target . ,(lambda (tag . kids)
4309 (let ((name ((sxpath '(name *text*))
4311 ;; FIXME: We're breaking the line
4312 ;; early with a dummy path to
4313 ;; ensure that the store reference
4314 ;; isn't broken apart and can still
4315 ;; be found by the reference
4320 ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
4321 ;; maximum line length is 70
4322 (string-tabulate (const #\b) 57)
4323 (assoc-ref inputs "java-htsjdk"))))
4324 (if (member "manifest" name)
4327 (@ (file "${manifest.file}")
4328 (match "\\r\\n\\r\\n")
4329 (replace "${line.separator}")))
4332 (file "${manifest.file}")
4335 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
4336 (*text* . ,(lambda (_ txt) txt))))
4338 (rename-file "build.xml.new" "build.xml")
4341 (list java-htsjdk-2.10.1))
4343 (list java-testng java-guava))
4344 (home-page "http://broadinstitute.github.io/picard/")
4345 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
4346 (description "Picard is a set of Java command line tools for manipulating
4347 high-throughput sequencing (HTS) data and formats. Picard is implemented
4348 using the HTSJDK Java library to support accessing file formats that are
4349 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
4351 (license license:expat)))
4353 ;; This is the last version of Picard to provide net.sf.samtools
4354 (define-public java-picard-1.113
4355 (package (inherit java-picard)
4356 (name "java-picard")
4361 (url "https://github.com/broadinstitute/picard")
4363 (file-name (string-append "java-picard-" version "-checkout"))
4366 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
4367 (modules '((guix build utils)))
4370 ;; Delete pre-built binaries.
4371 (delete-file-recursively "lib")
4374 (build-system ant-build-system)
4376 `(#:build-target "picard-jar"
4377 #:test-target "test"
4378 ;; FIXME: the class path at test time is wrong.
4379 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
4380 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
4384 ;; This is only used for tests.
4386 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
4388 (modify-phases %standard-phases
4389 ;; FIXME: This phase fails.
4390 (delete 'generate-jar-indices)
4391 ;; Do not use bundled ant bzip2.
4392 (add-after 'unpack 'use-ant-bzip
4393 (lambda* (#:key inputs #:allow-other-keys)
4394 (substitute* "build.xml"
4395 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
4396 (search-input-file inputs "/lib/ant.jar")))))
4397 (add-after 'unpack 'make-test-target-independent
4398 (lambda* (#:key inputs #:allow-other-keys)
4399 (substitute* "build.xml"
4400 (("name=\"test\" depends=\"compile, ")
4401 "name=\"test\" depends=\"compile-tests, ")
4402 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
4403 "name=\"compile\" depends=\"compile-src\""))
4405 (add-after 'unpack 'fix-deflater-path
4406 (lambda* (#:key outputs #:allow-other-keys)
4407 (substitute* "src/java/net/sf/samtools/Defaults.java"
4408 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
4409 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
4410 (assoc-ref outputs "out")
4411 "/lib/jni/libIntelDeflater.so"
4414 ;; Build the deflater library, because we've previously deleted the
4415 ;; pre-built one. This can only be built with access to the JDK
4417 (add-after 'build 'build-jni
4418 (lambda* (#:key inputs #:allow-other-keys)
4421 (invoke "tar" "--strip-components=1" "-C" "jdk-src"
4422 "-xf" (assoc-ref inputs "jdk-src"))
4423 (invoke "javah" "-jni"
4424 "-classpath" "classes"
4426 "net.sf.samtools.util.zip.IntelDeflater")
4427 (with-directory-excursion "src/c/inteldeflater"
4428 (invoke "gcc" "-I../../../lib" "-I."
4429 (string-append "-I" (assoc-ref inputs "jdk")
4431 "-I../../../jdk-src/src/share/native/common/"
4432 "-I../../../jdk-src/src/solaris/native/common/"
4433 "-c" "-O3" "-fPIC" "IntelDeflater.c")
4434 (invoke "gcc" "-shared"
4435 "-o" "../../../lib/jni/libIntelDeflater.so"
4436 "IntelDeflater.o" "-lz" "-lstdc++"))
4438 ;; We can only build everything else after building the JNI library.
4439 (add-after 'build-jni 'build-rest
4440 (lambda* (#:key make-flags #:allow-other-keys)
4441 (apply invoke `("ant" "all" ,@make-flags))
4443 (add-before 'build 'set-JAVA6_HOME
4445 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
4447 (replace 'install (install-jars "dist"))
4448 (add-after 'install 'install-jni-lib
4449 (lambda* (#:key outputs #:allow-other-keys)
4450 (let ((jni (string-append (assoc-ref outputs "out")
4453 (install-file "lib/jni/libIntelDeflater.so" jni)
4456 `(("java-snappy-1" ,java-snappy-1)
4457 ("java-commons-jexl-2" ,java-commons-jexl-2)
4458 ("java-cofoja" ,java-cofoja)
4459 ("ant" ,ant/java8) ; for bzip2 support at runtime
4462 `(("ant-apache-bcel" ,ant-apache-bcel)
4463 ("ant-junit" ,ant-junit)
4464 ("java-testng" ,java-testng)
4465 ("java-commons-bcel" ,java-commons-bcel)
4466 ("java-jcommander" ,java-jcommander)
4467 ("jdk" ,icedtea-8 "jdk")
4468 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
4470 (define-public fastqc
4477 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
4478 "projects/fastqc/fastqc_v"
4479 version "_source.zip"))
4482 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
4483 (build-system ant-build-system)
4485 `(#:tests? #f ; there are no tests
4486 #:build-target "build"
4488 (modify-phases %standard-phases
4489 (add-after 'unpack 'fix-dependencies
4490 (lambda* (#:key inputs #:allow-other-keys)
4491 (substitute* "build.xml"
4493 (search-input-file inputs "/share/java/jbzip2.jar"))
4495 (search-input-file inputs
4496 "/share/java/sam-1.112.jar"))
4498 (search-input-file inputs
4499 "/share/java/sis-jhdf5.jar")))))
4500 ;; There is no installation target
4502 (lambda* (#:key inputs outputs #:allow-other-keys)
4503 (let* ((out (assoc-ref outputs "out"))
4504 (bin (string-append out "/bin"))
4505 (share (string-append out "/share/fastqc/"))
4506 (exe (string-append share "/fastqc")))
4507 (for-each mkdir-p (list bin share))
4508 (copy-recursively "bin" share)
4510 (("my \\$java_bin = 'java';")
4511 (string-append "my $java_bin = '"
4512 (assoc-ref inputs "java")
4515 (symlink exe (string-append bin "/fastqc"))
4519 ("perl" ,perl) ; needed for the wrapper script
4520 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
4521 ("java-picard-1.113" ,java-picard-1.113)
4522 ("java-jbzip2" ,java-jbzip2)))
4525 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
4526 (synopsis "Quality control tool for high throughput sequence data")
4528 "FastQC aims to provide a simple way to do some quality control
4529 checks on raw sequence data coming from high throughput sequencing
4530 pipelines. It provides a modular set of analyses which you can use to
4531 give a quick impression of whether your data has any problems of which
4532 you should be aware before doing any further analysis.
4534 The main functions of FastQC are:
4537 @item Import of data from BAM, SAM or FastQ files (any variant);
4538 @item Providing a quick overview to tell you in which areas there may
4540 @item Summary graphs and tables to quickly assess your data;
4541 @item Export of results to an HTML based permanent report;
4542 @item Offline operation to allow automated generation of reports
4543 without running the interactive application.
4545 (license license:gpl3+)))
4547 (define-public fastp
4555 (url "https://github.com/OpenGene/fastp")
4556 (commit (string-append "v" version))))
4557 (file-name (git-file-name name version))
4560 "0ly8mxdvrcy23jwxyppysx3dhb1lwsqhfbgpyvargxhfk6k700x4"))))
4561 (build-system gnu-build-system)
4563 `(#:tests? #f ; there are none
4565 ,#~(list (string-append "PREFIX=" #$output))
4567 (modify-phases %standard-phases
4569 (add-before 'install 'create-target-dir
4570 (lambda* (#:key outputs #:allow-other-keys)
4571 (mkdir-p (string-append (assoc-ref outputs "out") "/bin")))))))
4574 (home-page "https://github.com/OpenGene/fastp/")
4575 (synopsis "All-in-one FastQ preprocessor")
4577 "Fastp is a tool designed to provide fast all-in-one preprocessing for
4578 FastQ files. This tool has multi-threading support to afford high
4580 (license license:expat)))
4582 (define-public htslib
4589 "https://github.com/samtools/htslib/releases/download/"
4590 version "/htslib-" version ".tar.bz2"))
4593 "0pwk8yhhvb85mi1d2qhwsb4samc3rmbcrq7b1s0jz0glaa7in8pd"))))
4594 (build-system gnu-build-system)
4595 ;; Let htslib translate "gs://" and "s3://" to regular https links with
4596 ;; "--enable-gcs" and "--enable-s3". For these options to work, we also
4597 ;; need to set "--enable-libcurl".
4599 `(#:configure-flags '("--enable-gcs"
4603 (list bzip2 curl openssl xz))
4604 ;; This is referred to in the pkg-config file as a required library.
4609 (home-page "https://www.htslib.org")
4610 (synopsis "C library for reading/writing high-throughput sequencing data")
4612 "HTSlib is a C library for reading/writing high-throughput sequencing
4613 data. It also provides the @command{bgzip}, @command{htsfile}, and
4614 @command{tabix} utilities.")
4615 ;; Files under cram/ are released under the modified BSD license;
4616 ;; the rest is released under the Expat license
4617 (license (list license:expat license:bsd-3))))
4619 (define-public htslib-1.12
4620 (package/inherit htslib
4625 "https://github.com/samtools/htslib/releases/download/"
4626 version "/htslib-" version ".tar.bz2"))
4629 "1jplnvizgr0fyyvvmkfmnsywrrpqhid3760vw15bllz98qdi9012"))))))
4631 (define-public htslib-1.10
4632 (package/inherit htslib
4637 "https://github.com/samtools/htslib/releases/download/"
4638 version "/htslib-" version ".tar.bz2"))
4641 "0wm9ay7qgypj3mwx9zl1mrpnr36298b1aj5vx69l4k7bzbclvr3s"))))))
4643 (define-public htslib-1.9
4644 (package/inherit htslib
4649 "https://github.com/samtools/htslib/releases/download/"
4650 version "/htslib-" version ".tar.bz2"))
4653 "16ljv43sc3fxmv63w7b2ff8m1s7h89xhazwmbm1bicz8axq8fjz0"))))))
4655 ;; This package should be removed once no packages rely upon it.
4657 (package/inherit htslib
4662 "https://github.com/samtools/htslib/releases/download/"
4663 version "/htslib-" version ".tar.bz2"))
4666 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
4668 (define htslib-for-samtools-1.2
4669 (package/inherit htslib
4674 "https://github.com/samtools/htslib/releases/download/"
4675 version "/htslib-" version ".tar.bz2"))
4678 "1c32ssscbnjwfw3dra140fq7riarp2x990qxybh34nr1p5r17nxx"))))
4681 (modify-phases %standard-phases
4682 (add-after 'unpack 'patch-tests
4684 (substitute* "test/test.pl"
4685 (("/bin/bash") (which "bash"))))))))
4689 `(("perl" ,perl)))))
4691 (define htslib-for-stringtie
4693 (inherit htslib-1.12)
4695 (inherit (package-source htslib-1.12))
4697 (search-patches "htslib-for-stringtie.patch"))))
4699 `(#:configure-flags '("--with-libdeflate")))
4701 (list bzip2 libdeflate openssl))))
4710 (url "https://github.com/nboley/idr")
4712 (file-name (git-file-name name version))
4715 "04j876h6z444v2q79drxx283d3k5snd72kj895wbalnl42206x9g"))
4716 ;; Delete generated C code.
4718 '(begin (delete-file "idr/inv_cdf.c") #t))))
4719 (build-system python-build-system)
4720 ;; There is only one test ("test_inv_cdf.py") and it tests features that
4721 ;; are no longer part of this package. It also asserts False, which
4722 ;; causes the tests to always fail.
4723 (arguments `(#:tests? #f))
4725 (list python-scipy python-sympy python-numpy python-matplotlib))
4727 (list python-cython))
4728 (home-page "https://github.com/nboley/idr")
4729 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
4731 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
4732 to measure the reproducibility of findings identified from replicate
4733 experiments and provide highly stable thresholds based on reproducibility.")
4734 (license license:gpl2+)))
4736 (define-public jellyfish
4742 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
4743 "releases/download/v" version
4744 "/jellyfish-" version ".tar.gz"))
4747 "0npa62wzasdibas5zp3n8j3armsci4kyvh0jw7jr0am4gg7vg5g1"))))
4748 (build-system gnu-build-system)
4749 (outputs '("out" ;for library
4750 "python")) ;for Python bindings
4753 ,#~(list "--without-sse" ; configure script probes for CPU features when SSE is enabled.
4754 (string-append "--enable-python-binding=" #$output:python))
4756 (modify-phases %standard-phases
4757 (add-before 'check 'set-SHELL-variable
4759 ;; generator_manager.hpp either uses /bin/sh or $SHELL
4761 (setenv "SHELL" (which "bash")))))))
4765 ("python" ,python-wrapper)
4766 ("pkg-config" ,pkg-config)))
4769 (synopsis "Tool for fast counting of k-mers in DNA")
4771 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
4772 DNA. A k-mer is a substring of length k, and counting the occurrences of all
4773 such substrings is a central step in many analyses of DNA sequence. Jellyfish
4774 is a command-line program that reads FASTA and multi-FASTA files containing
4775 DNA sequences. It outputs its k-mer counts in a binary format, which can be
4776 translated into a human-readable text format using the @code{jellyfish dump}
4777 command, or queried for specific k-mers with @code{jellyfish query}.")
4778 (home-page "http://www.genome.umd.edu/jellyfish.html")
4779 ;; JELLYFISH seems to be 64-bit only.
4780 (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
4781 ;; One of these licenses may be picked
4782 (license (list license:gpl3+ license:bsd-3))))
4784 (define-public khmer
4792 (url "https://github.com/dib-lab/khmer")
4793 (commit (string-append "v" version))))
4794 (file-name (git-file-name name version))
4797 "01l4jczglkl7yfhgvzx8j0df7k54bk1r8sli9ll16i1mis0d8f37"))
4798 (modules '((guix build utils)))
4801 ;; Delete bundled libraries. We do not replace the bundled seqan
4802 ;; as it is a modified subset of the old version 1.4.1.
4804 ;; We do not replace the bundled MurmurHash as the canonical
4805 ;; repository for this code 'SMHasher' is unsuitable for providing
4807 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
4808 (delete-file-recursively "third-party/zlib")
4809 (delete-file-recursively "third-party/bzip2")
4810 (delete-file-recursively "third-party/seqan")
4811 (substitute* "setup.cfg"
4812 (("# libraries = z,bz2")
4813 "libraries = z,bz2")
4814 (("include:third-party/zlib:third-party/bzip2")
4816 ;; Delete generated Cython CPP files.
4817 (for-each delete-file (find-files "khmer/_oxli/" "\\.cpp$"))))))
4818 (build-system python-build-system)
4821 (modify-phases %standard-phases
4822 (add-after 'unpack 'set-cc
4823 (lambda _ (setenv "CC" "gcc")))
4824 (add-after 'unpack 'python-3.8-compatibility
4826 ;; Python 3.8 removed time.clock().
4827 (substitute* "sandbox/sweep-reads.py"
4829 "time.process_time"))))
4830 (add-after 'unpack 'do-use-cython
4832 (substitute* "setup.py"
4833 (("from setuptools import Extension as CyExtension")
4834 "from Cython.Distutils import Extension as CyExtension")
4835 (("from setuptools.command.build_ext import build_ext as _build_ext")
4836 "from Cython.Distutils import build_ext as _build_ext")
4837 (("HAS_CYTHON = False") "HAS_CYTHON = True")
4838 (("cy_ext = 'cpp'") "cy_ext = 'pyx'"))))
4839 (add-before 'build 'build-extensions
4841 ;; Cython extensions have to be built before running the tests.
4842 (invoke "python" "setup.py" "build_ext" "--inplace")))
4844 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
4846 (add-installed-pythonpath inputs outputs)
4847 (invoke "pytest" "-v")))))))
4849 (list python-cython python-pytest python-pytest-runner))
4851 (list zlib bzip2 seqan-1 python-screed python-bz2file))
4852 (home-page "https://khmer.readthedocs.org/")
4853 (synopsis "K-mer counting, filtering and graph traversal library")
4854 (description "The khmer software is a set of command-line tools for
4855 working with DNA shotgun sequencing data from genomes, transcriptomes,
4856 metagenomes and single cells. Khmer can make de novo assemblies faster, and
4857 sometimes better. Khmer can also identify and fix problems with shotgun
4859 ;; When building on i686, armhf and mips64el, we get the following error:
4860 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
4861 (supported-systems '("x86_64-linux" "aarch64-linux"))
4862 (license license:bsd-3)))
4864 (define-public kaiju
4871 (url "https://github.com/bioinformatics-centre/kaiju")
4872 (commit (string-append "v" version))))
4873 (file-name (git-file-name name version))
4876 "1hfmadkfs6jjd7l3byly5xxb0ifm3dm1wis11sjbqfcv6l89snmg"))))
4877 (build-system gnu-build-system)
4880 #:tests? #f ; There are no tests.
4882 #~(modify-phases %standard-phases
4884 (add-before 'build 'move-to-src-dir
4885 (lambda _ (chdir "src")))
4888 (let ((bin (string-append #$output "/bin")))
4890 (copy-recursively "../bin" bin)
4891 (let ((path (search-path-as-list '("bin")
4892 '#$(match (package-inputs this-package)
4893 (((_ pkg) ...) pkg)))))
4894 (for-each (lambda (script)
4895 (let ((exe (string-append bin "/" script)))
4898 #:guile #$(file-append guile-3.0 "/bin/guile")
4899 `("PATH" ":" prefix ,path))))
4900 (list "kaiju-convertMAR.py"
4902 "kaiju-makedb")))))))))
4908 guile-3.0 ;for wrap-script
4915 (home-page "http://kaiju.binf.ku.dk/")
4916 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
4917 (description "Kaiju is a program for sensitive taxonomic classification
4918 of high-throughput sequencing reads from metagenomic whole genome sequencing
4920 (license license:gpl3+)))
4927 ;; The PyPi tarball does not contain tests.
4930 (url "https://github.com/macs3-project/MACS")
4931 (commit (string-append "v" version))))
4932 (file-name (git-file-name name version))
4935 "08zsgh65xbpv1md2s3wqmrk9g2mz6izmn59ryw5lbac54120p291"))
4936 (modules '((guix build utils)))
4937 ;; Remove files generated by Cython
4940 (for-each (lambda (file)
4941 (let ((generated-file
4942 (string-append (string-drop-right file 3) "c")))
4943 (when (file-exists? generated-file)
4944 (delete-file generated-file))))
4945 (find-files "." "\\.pyx$"))
4946 (delete-file "MACS2/IO/CallPeakUnitPrecompiled.c")))))
4947 (build-system python-build-system)
4950 (modify-phases %standard-phases
4951 (add-before 'build 'set-HOME
4952 (lambda _ (setenv "HOME" "/tmp")))
4954 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
4956 (add-installed-pythonpath inputs outputs)
4957 (invoke "pytest" "-v")))))))
4959 (list python-numpy))
4961 (list python-cython python-pytest))
4962 (home-page "https://github.com/macs3-project/MACS")
4963 (synopsis "Model based analysis for ChIP-Seq data")
4965 "MACS is an implementation of a ChIP-Seq analysis algorithm for
4966 identifying transcript factor binding sites named Model-based Analysis of
4967 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
4968 the significance of enriched ChIP regions and it improves the spatial
4969 resolution of binding sites through combining the information of both
4970 sequencing tag position and orientation.")
4971 (license license:bsd-3)))
4973 (define-public mafft
4980 "https://mafft.cbrc.jp/alignment/software/mafft-" version
4981 "-without-extensions-src.tgz"))
4982 (file-name (string-append name "-" version ".tgz"))
4985 "0i2i2m3blh2xkbkdk48hxfssks30ny0v381gdl7zwhcvp0axs26r"))))
4986 (build-system gnu-build-system)
4988 `(#:tests? #f ; no automated tests, though there are tests in the read me
4989 #:make-flags ,#~(list (string-append "PREFIX=" #$output)
4990 (string-append "BINDIR="
4991 (string-append #$output "/bin")))
4993 (modify-phases %standard-phases
4994 (add-after 'unpack 'enter-dir
4995 (lambda _ (chdir "core") #t))
4996 (add-after 'enter-dir 'patch-makefile
4998 ;; on advice from the MAFFT authors, there is no need to
4999 ;; distribute mafft-profile, mafft-distance, or
5000 ;; mafft-homologs.rb as they are too "specialised".
5001 (substitute* "Makefile"
5002 ;; remove mafft-homologs.rb from SCRIPTS
5003 (("^SCRIPTS = mafft mafft-homologs.rb")
5005 ;; remove mafft-homologs from MANPAGES
5006 (("^MANPAGES = mafft.1 mafft-homologs.1")
5007 "MANPAGES = mafft.1")
5008 ;; remove mafft-distance from PROGS
5009 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
5010 "PROGS = dvtditr dndfast7 dndblast sextet5")
5011 ;; remove mafft-profile from PROGS
5012 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
5013 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
5014 (("^rm -f mafft-profile mafft-profile.exe") "#")
5015 (("^rm -f mafft-distance mafft-distance.exe") ")#")
5016 ;; do not install MAN pages in libexec folder
5017 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
5018 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
5020 (add-after 'enter-dir 'patch-paths
5021 (lambda* (#:key inputs #:allow-other-keys)
5022 (substitute* '("pairash.c"
5024 (("perl") (which "perl"))
5025 (("([\"`| ])awk" _ prefix)
5026 (string-append prefix (which "awk")))
5027 (("grep") (which "grep")))
5030 (add-after 'install 'wrap-programs
5031 (lambda* (#:key inputs outputs #:allow-other-keys)
5032 (let* ((out (assoc-ref outputs "out"))
5033 (bin (string-append out "/bin"))
5034 (path (string-append
5035 (assoc-ref inputs "coreutils") "/bin:")))
5036 (for-each (lambda (file)
5038 `("PATH" ":" prefix (,path))))
5039 (find-files bin))))))))
5041 (list perl ruby gawk grep coreutils))
5042 (home-page "https://mafft.cbrc.jp/alignment/software/")
5043 (synopsis "Multiple sequence alignment program")
5045 "MAFFT offers a range of multiple alignment methods for nucleotide and
5046 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
5047 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
5049 (license (license:non-copyleft
5050 "https://mafft.cbrc.jp/alignment/software/license.txt"
5051 "BSD-3 with different formatting"))))
5060 (url "https://github.com/marbl/mash")
5061 (commit (string-append "v" version))))
5062 (file-name (git-file-name name version))
5065 "049hwcc059p2fd9vwndn63laifvvsi0wmv84i6y1fr79k15dxwy6"))
5066 (modules '((guix build utils)))
5068 ;; Delete bundled kseq.
5069 ;; TODO: Also delete bundled murmurhash and open bloom filter.
5070 '(delete-file "src/mash/kseq.h"))))
5071 (build-system gnu-build-system)
5073 `(#:tests? #f ; No tests.
5076 (string-append "--with-capnp=" #$(this-package-input "capnproto"))
5077 (string-append "--with-gsl=" #$(this-package-input "gsl")))
5078 #:make-flags (list "CC=gcc")
5080 (modify-phases %standard-phases
5081 (add-after 'unpack 'fix-includes
5083 (substitute* '("src/mash/Sketch.cpp"
5084 "src/mash/CommandFind.cpp"
5085 "src/mash/CommandScreen.cpp")
5086 (("^#include \"kseq\\.h\"")
5087 "#include \"htslib/kseq.h\""))
5089 (add-after 'fix-includes 'use-c++14
5091 ;; capnproto 0.7 requires c++14 to build
5092 (substitute* "configure.ac"
5093 (("c\\+\\+11") "c++14"))
5094 (substitute* "Makefile.in"
5095 (("c\\+\\+11") "c++14"))
5100 (list ;; Capnproto and htslib are statically embedded in the final
5101 ;; application. Therefore we also list their licenses, below.
5106 (supported-systems '("x86_64-linux"))
5107 (home-page "https://mash.readthedocs.io")
5108 (synopsis "Fast genome and metagenome distance estimation using MinHash")
5109 (description "Mash is a fast sequence distance estimator that uses the
5110 MinHash algorithm and is designed to work with genomes and metagenomes in the
5111 form of assemblies or reads.")
5112 (license (list license:bsd-3 ; Mash
5113 license:expat ; HTSlib and capnproto
5114 license:public-domain ; MurmurHash 3
5115 license:cpl1.0)))) ; Open Bloom Filter
5117 (define-public metabat
5125 (url "https://bitbucket.org/berkeleylab/metabat.git")
5126 (commit (string-append "v" version))))
5127 (file-name (git-file-name name version))
5130 "0v3gsps0ypani14102z2y1a2wignhpf7s1h45mxmj5f783rkhqd9"))))
5131 (build-system cmake-build-system)
5134 ,#~(list (string-append "-Dzlib_LIB=" #$(this-package-input "zlib")
5136 (string-append "-Dhtslib_LIB=" #$(this-package-input "htslib")
5138 (string-append "-DBOOST_ROOT=" #$(this-package-input "boost")))
5140 (modify-phases %standard-phases
5141 (add-after 'unpack 'configure-version-file
5143 (copy-file "metabat_version.h.in" "metabat_version.h")
5144 (substitute* "metabat_version.h"
5145 (("@_time_stamp@") "19700101")
5146 (("@GIT_IS_DIRTY@") "0")
5147 (("@GIT_RETRIEVED_STATE@") "0")
5148 (("@GIT_HEAD_SHA1@") (string-append "v" ,version)))))
5149 (add-after 'unpack 'do-not-use-bundled-libraries
5151 (substitute* "CMakeLists.txt"
5152 (("include\\(cmake.*") ""))
5153 (substitute* "src/CMakeLists.txt"
5154 (("set\\(Boost.*") "")
5155 (("add_dependencies.*") "")))))))
5157 (list zlib perl samtools htslib boost))
5158 (home-page "https://bitbucket.org/berkeleylab/metabat")
5160 "Reconstruction of single genomes from complex microbial communities")
5162 "Grouping large genomic fragments assembled from shotgun metagenomic
5163 sequences to deconvolute complex microbial communities, or metagenome binning,
5164 enables the study of individual organisms and their interactions. MetaBAT is
5165 an automated metagenome binning software, which integrates empirical
5166 probabilistic distances of genome abundance and tetranucleotide frequency.")
5167 ;; The source code contains inline assembly.
5168 (supported-systems '("x86_64-linux" "i686-linux"))
5169 (license (license:non-copyleft "file://license.txt"
5170 "See license.txt in the distribution."))))
5172 (define-public minced
5179 (url "https://github.com/ctSkennerton/minced")
5181 (file-name (git-file-name name version))
5184 "1f5h9him0gd355cnx7p6pnxpknhckd4g0v62mg8zyhfbx9as25fv"))))
5185 (build-system gnu-build-system)
5187 `(#:test-target "test"
5189 (modify-phases %standard-phases
5191 (add-before 'check 'fix-test
5193 ;; Fix test for latest version.
5194 (substitute* "t/Aquifex_aeolicus_VF5.expected"
5195 (("minced:0.1.6") "minced:0.2.0"))
5197 (replace 'install ; No install target.
5198 (lambda* (#:key inputs outputs #:allow-other-keys)
5199 (let* ((out (assoc-ref outputs "out"))
5200 (bin (string-append out "/bin"))
5201 (wrapper (string-append bin "/minced")))
5202 ;; Minced comes with a wrapper script that tries to figure out where
5203 ;; it is located before running the JAR. Since these paths are known
5204 ;; to us, we build our own wrapper to avoid coreutils dependency.
5205 (install-file "minced.jar" bin)
5206 (with-output-to-file wrapper
5210 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
5211 (assoc-ref inputs "jre") "/bin/java -jar "
5212 bin "/minced.jar \"$@\"\n"))))
5213 (chmod wrapper #o555))
5216 `(("jdk" ,icedtea "jdk")))
5219 ("jre" ,icedtea "out")))
5220 (home-page "https://github.com/ctSkennerton/minced")
5221 (synopsis "Mining CRISPRs in Environmental Datasets")
5223 "MinCED is a program to find Clustered Regularly Interspaced Short
5224 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
5225 unassembled metagenomic reads, but is mainly designed for full genomes and
5226 assembled metagenomic sequence.")
5227 (license license:gpl3+)))
5229 (define-public muscle
5232 (version "3.8.1551")
5234 (method url-fetch/tarbomb)
5236 "http://www.drive5.com/muscle/muscle_src_"
5240 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
5241 (build-system gnu-build-system)
5243 `(#:make-flags (list "LDLIBS = -lm")
5245 (modify-phases %standard-phases
5248 ;; There are no tests, so just test if it runs.
5249 (lambda _ (invoke "./muscle" "-version") #t))
5251 (lambda* (#:key outputs #:allow-other-keys)
5252 (let* ((out (assoc-ref outputs "out"))
5253 (bin (string-append out "/bin")))
5254 (install-file "muscle" bin)
5256 (home-page "http://www.drive5.com/muscle")
5257 (synopsis "Multiple sequence alignment program")
5259 "MUSCLE aims to be a fast and accurate multiple sequence alignment
5260 program for nucleotide and protein sequences.")
5261 ;; License information found in 'muscle -h' and usage.cpp.
5262 (license license:public-domain)))
5264 (define-public newick-utils
5265 ;; There are no recent releases so we package from git.
5266 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
5268 (name "newick-utils")
5269 (version (string-append "1.6-1." (string-take commit 8)))
5273 (url "https://github.com/tjunier/newick_utils")
5275 (file-name (string-append name "-" version "-checkout"))
5278 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
5279 (build-system gnu-build-system)
5281 '(#:make-flags (list "CFLAGS=-O2 -g -fcommon")))
5283 ;; XXX: TODO: Enable Lua and Guile bindings.
5284 ;; https://github.com/tjunier/newick_utils/issues/13
5285 (list libxml2 flex bison))
5287 (list autoconf automake libtool))
5288 (synopsis "Programs for working with newick format phylogenetic trees")
5290 "Newick-utils is a suite of utilities for processing phylogenetic trees
5291 in Newick format. Functions include re-rooting, extracting subtrees,
5292 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
5293 (home-page "https://github.com/tjunier/newick_utils")
5294 (license license:bsd-3))))
5303 "https://github.com/wwood/OrfM/releases/download/v"
5304 version "/orfm-" version ".tar.gz"))
5307 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
5308 (build-system gnu-build-system)
5309 (inputs (list zlib))
5311 (list ruby-bio-commandeer ruby-rspec ruby))
5312 (synopsis "Simple and not slow open reading frame (ORF) caller")
5314 "An ORF caller finds stretches of DNA that, when translated, are not
5315 interrupted by stop codons. OrfM finds and prints these ORFs.")
5316 (home-page "https://github.com/wwood/OrfM")
5317 (license license:lgpl3+)))
5319 (define-public prank
5326 "http://wasabiapp.org/download/prank/prank.source."
5330 "0nc8g9c5rkdxcir46s0in9ci1sxwzbjibxrvkksf22ybnplvagk2"))))
5331 (build-system gnu-build-system)
5334 (modify-phases %standard-phases
5335 (add-after 'unpack 'enter-src-dir
5336 (lambda _ (chdir "src")))
5337 (add-after 'unpack 'remove-m64-flag
5338 ;; Prank will build with the correct 'bit-ness' without this flag
5339 ;; and this allows building on 32-bit machines.
5341 (substitute* "src/Makefile"
5345 (lambda* (#:key inputs outputs #:allow-other-keys)
5346 (let* ((out (assoc-ref outputs "out"))
5347 (bin (string-append out "/bin"))
5348 (man (string-append out "/share/man/man1"))
5349 (path (string-append
5350 (assoc-ref inputs "mafft") "/bin:"
5351 (assoc-ref inputs "exonerate") "/bin:"
5352 (assoc-ref inputs "bppsuite") "/bin")))
5353 (install-file "prank" bin)
5354 (wrap-program (string-append bin "/prank")
5355 `("PATH" ":" prefix (,path)))
5356 (install-file "prank.1" man)))))))
5358 (list mafft exonerate bppsuite))
5359 (home-page "http://wasabiapp.org/software/prank/")
5360 (synopsis "Probabilistic multiple sequence alignment program")
5362 "PRANK is a probabilistic multiple sequence alignment program for DNA,
5363 codon and amino-acid sequences. It is based on a novel algorithm that treats
5364 insertions correctly and avoids over-estimation of the number of deletion
5365 events. In addition, PRANK borrows ideas from maximum likelihood methods used
5366 in phylogenetics and correctly takes into account the evolutionary distances
5367 between sequences. Lastly, PRANK allows for defining a potential structure
5368 for sequences to be aligned and then, simultaneously with the alignment,
5369 predicts the locations of structural units in the sequences.")
5370 (license license:gpl2+)))
5372 (define-public proteinortho
5374 (name "proteinortho")
5379 (url "https://gitlab.com/paulklemm_PHD/proteinortho.git")
5380 (commit (string-append "v" version))))
5381 (file-name (git-file-name name version))
5384 "0pmy617zy2z2w6hjqxjhf3rzikf5n3mpia80ysq8233vfr7wrzff"))
5385 (modules '((guix build utils)))
5388 ;; remove pre-built scripts
5389 (delete-file-recursively "src/BUILD/")
5391 (build-system gnu-build-system)
5393 `(#:test-target "test"
5394 #:make-flags '("CC=gcc")
5396 (modify-phases %standard-phases
5398 ;; There is no configure script, so we modify the Makefile directly.
5399 (lambda* (#:key outputs #:allow-other-keys)
5400 (substitute* "Makefile"
5403 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
5405 (add-before 'install 'make-install-directory
5406 ;; The install directory is not created during 'make install'.
5407 (lambda* (#:key outputs #:allow-other-keys)
5408 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
5410 (add-after 'install 'wrap-programs
5411 (lambda* (#:key inputs outputs #:allow-other-keys)
5412 (let ((path (getenv "PATH"))
5413 (out (assoc-ref outputs "out"))
5414 (guile (search-input-file inputs "bin/guile")))
5415 (for-each (lambda (script)
5416 (wrap-script script #:guile guile
5417 `("PATH" ":" prefix (,path))))
5418 (cons (string-append out "/bin/proteinortho")
5419 (find-files out "\\.(pl|py)$"))))
5422 `(("guile" ,guile-3.0) ; for wrap-script
5423 ("diamond" ,diamond)
5425 ("python" ,python-wrapper)
5428 ("openblas" ,openblas)))
5431 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
5432 (synopsis "Detect orthologous genes across species")
5434 "Proteinortho is a tool to detect orthologous genes across different
5435 species. For doing so, it compares similarities of given gene sequences and
5436 clusters them to find significant groups. The algorithm was designed to handle
5437 large-scale data and can be applied to hundreds of species at once.")
5438 (license license:gpl3+)))
5440 (define-public prodigal
5443 ;; Check for a new home page when updating this package:
5444 ;; https://github.com/hyattpd/Prodigal/issues/36#issuecomment-536617588
5449 (url "https://github.com/hyattpd/Prodigal")
5450 (commit (string-append "v" version))))
5451 (file-name (git-file-name name version))
5454 "1fs1hqk83qjbjhrvhw6ni75zakx5ki1ayy3v6wwkn3xvahc9hi5s"))))
5455 (build-system gnu-build-system)
5457 `(#:tests? #f ; no check target
5459 ,#~(list (string-append "INSTALLDIR=" #$output "/bin"))
5461 (modify-phases %standard-phases
5462 (delete 'configure))))
5463 (home-page "https://github.com/hyattpd/Prodigal")
5464 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
5466 "Prodigal runs smoothly on finished genomes, draft genomes, and
5467 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
5468 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
5469 partial genes, and identifies translation initiation sites.")
5470 (license license:gpl3+)))
5472 (define-public roary
5480 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
5484 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
5485 (build-system perl-build-system)
5488 (modify-phases %standard-phases
5493 ;; The tests are not run by default, so we run each test file
5495 (setenv "PATH" (string-append (getcwd) "/bin" ":"
5497 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
5498 (getenv "PERL5LIB")))
5499 (for-each (lambda (file)
5500 (display file)(display "\n")
5501 (invoke "perl" file))
5502 (find-files "t" ".*\\.t$"))
5505 ;; There is no 'install' target in the Makefile.
5506 (lambda* (#:key outputs #:allow-other-keys)
5507 (let* ((out (assoc-ref outputs "out"))
5508 (bin (string-append out "/bin"))
5509 (perl (string-append out "/lib/perl5/site_perl"))
5510 (roary-plots "contrib/roary_plots"))
5513 (copy-recursively "bin" bin)
5514 (copy-recursively "lib" perl)
5516 (add-after 'install 'wrap-programs
5517 (lambda* (#:key inputs outputs #:allow-other-keys)
5518 (let* ((out (assoc-ref outputs "out"))
5519 (perl5lib (getenv "PERL5LIB"))
5520 (path (getenv "PATH")))
5521 (for-each (lambda (prog)
5522 (let ((binary (string-append out "/" prog)))
5523 (wrap-program binary
5524 `("PERL5LIB" ":" prefix
5525 (,(string-append perl5lib ":" out
5526 "/lib/perl5/site_perl"))))
5527 (wrap-program binary
5529 (,(string-append path ":" out "/bin"))))))
5530 (find-files "bin" ".*[^R]$"))
5532 (string-append out "/bin/roary-create_pan_genome_plots.R"))
5533 (r-site-lib (getenv "R_LIBS_SITE"))
5535 (dirname (search-input-file inputs "bin/chmod"))))
5537 `("R_LIBS_SITE" ":" prefix
5538 (,(string-append r-site-lib ":" out "/site-library/"))))
5541 (,(string-append coreutils-path ":" out "/bin"))))))
5544 (list perl-env-path perl-test-files perl-test-most perl-test-output))
5546 `(("perl-array-utils" ,perl-array-utils)
5547 ("bioperl" ,bioperl-minimal)
5548 ("perl-digest-md5-file" ,perl-digest-md5-file)
5549 ("perl-exception-class" ,perl-exception-class)
5550 ("perl-file-find-rule" ,perl-file-find-rule)
5551 ("perl-file-grep" ,perl-file-grep)
5552 ("perl-file-slurper" ,perl-file-slurper)
5553 ("perl-file-which" ,perl-file-which)
5554 ("perl-graph" ,perl-graph)
5555 ("perl-graph-readwrite" ,perl-graph-readwrite)
5556 ("perl-log-log4perl" ,perl-log-log4perl)
5557 ("perl-moose" ,perl-moose)
5558 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
5559 ("perl-text-csv" ,perl-text-csv)
5560 ("bedtools" ,bedtools)
5564 ("parallel" ,parallel)
5567 ("fasttree" ,fasttree)
5571 ("r-minimal" ,r-minimal)
5572 ("r-ggplot2" ,r-ggplot2)
5573 ("coreutils" ,coreutils)))
5574 (home-page "https://sanger-pathogens.github.io/Roary/")
5575 (synopsis "High speed stand-alone pan genome pipeline")
5577 "Roary is a high speed stand alone pan genome pipeline, which takes
5578 annotated assemblies in GFF3 format (produced by the Prokka program) and
5579 calculates the pan genome. Using a standard desktop PC, it can analyse
5580 datasets with thousands of samples, without compromising the quality of the
5581 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
5582 single processor. Roary is not intended for metagenomics or for comparing
5583 extremely diverse sets of genomes.")
5584 (license license:gpl3)))
5586 (define-public raxml
5594 (url "https://github.com/stamatak/standard-RAxML")
5595 (commit (string-append "v" version))))
5596 (file-name (git-file-name name version))
5599 "1jqjzhch0rips0vp04prvb8vmc20c5pdmsqn8knadcf91yy859fh"))))
5600 (build-system gnu-build-system)
5602 `(#:tests? #f ; There are no tests.
5603 ;; Use 'standard' Makefile rather than SSE or AVX ones.
5604 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
5606 (modify-phases %standard-phases
5609 (lambda* (#:key outputs #:allow-other-keys)
5610 (let* ((out (assoc-ref outputs "out"))
5611 (bin (string-append out "/bin"))
5612 (executable "raxmlHPC-HYBRID"))
5613 (install-file executable bin)
5614 (symlink (string-append bin "/" executable) "raxml"))
5618 (home-page "https://cme.h-its.org/exelixis/web/software/raxml/index.html")
5619 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
5621 "RAxML is a tool for phylogenetic analysis and post-analysis of large
5623 ;; The source includes x86 specific code
5624 (supported-systems '("x86_64-linux" "i686-linux"))
5625 (license license:gpl2+)))
5635 (url "https://github.com/deweylab/RSEM")
5636 (commit (string-append "v" version))))
5638 (base32 "1yl4i7z20n2p84j1lmk15aiak3yqc6fiw0q5a4pndw7pxfiq3rzp"))
5639 (file-name (git-file-name name version))
5640 (modules '((guix build utils)))
5643 ;; remove bundled copy of boost and samtools
5644 (delete-file-recursively "boost")
5645 (delete-file-recursively "samtools-1.3")))))
5646 (build-system gnu-build-system)
5648 `(#:tests? #f ;no "check" target
5650 ,#~(list (string-append "BOOST="
5651 #$(this-package-input "boost")
5653 (string-append "SAMHEADERS="
5654 #$(this-package-input "htslib")
5655 "/include/htslib/sam.h")
5656 (string-append "SAMLIBS="
5657 #$(this-package-input "htslib")
5660 (modify-phases %standard-phases
5661 ;; No "configure" script.
5664 (substitute* "Makefile"
5665 (("^all : \\$\\(PROGRAMS\\).*") "all: $(PROGRAMS)\n")
5666 ;; Do not build bundled samtools library.
5667 (("^\\$\\(SAMLIBS\\).*") "")
5669 (("gnu\\+\\+98") "gnu++11"))
5670 ;; C++11 compatibility
5671 (substitute* "buildReadIndex.cpp"
5672 (("success = \\(getline")
5673 "success = (bool)(getline"))
5674 (substitute* '("PairedEndHit.h"
5676 (("return \\(in>>sid>>pos")
5677 "return (bool)(in>>sid>>pos"))))
5679 (lambda* (#:key outputs #:allow-other-keys)
5680 (let* ((out (string-append (assoc-ref outputs "out")))
5681 (bin (string-append out "/bin/"))
5682 (perl (string-append out "/lib/perl5/site_perl")))
5685 (for-each (lambda (file)
5686 (install-file file bin))
5687 (find-files "." "rsem-.*"))
5688 (install-file "rsem_perl_utils.pm" perl))))
5689 (add-after 'install 'wrap-program
5690 (lambda* (#:key outputs #:allow-other-keys)
5691 (let ((out (assoc-ref outputs "out")))
5692 (for-each (lambda (prog)
5693 (wrap-program (string-append out "/bin/" prog)
5694 `("PERL5LIB" ":" prefix
5695 (,(string-append out "/lib/perl5/site_perl")))))
5696 '("rsem-calculate-expression"
5698 "rsem-generate-data-matrix"
5699 "rsem-generate-ngvector"
5700 "rsem-plot-transcript-wiggles"
5701 "rsem-prepare-reference"
5703 "rsem-run-prsem-testing-procedure"))))))))
5705 (list boost r-minimal perl htslib-1.3 zlib))
5706 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
5707 (synopsis "Estimate gene expression levels from RNA-Seq data")
5709 "RSEM is a software package for estimating gene and isoform expression
5710 levels from RNA-Seq data. The RSEM package provides a user-friendly
5711 interface, supports threads for parallel computation of the EM algorithm,
5712 single-end and paired-end read data, quality scores, variable-length reads and
5713 RSPD estimation. In addition, it provides posterior mean and 95% credibility
5714 interval estimates for expression levels. For visualization, it can generate
5715 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
5716 (license license:gpl3+)))
5718 (define-public rseqc
5726 (string-append "mirror://sourceforge/rseqc/"
5727 "RSeQC-" version ".tar.gz"))
5730 "0gbb9iyb7swiv5455fm5rg98r7l6qn27v564yllqjd574hncpx6m"))))
5731 (build-system python-build-system)
5741 (home-page "http://rseqc.sourceforge.net/")
5742 (synopsis "RNA-seq quality control package")
5744 "RSeQC provides a number of modules that can comprehensively evaluate
5745 high throughput sequence data, especially RNA-seq data. Some basic modules
5746 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
5747 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
5748 distribution, coverage uniformity, strand specificity, etc.")
5749 (license license:gpl3+)))
5752 ;; There are no release tarballs. And the installation instructions at
5753 ;; http://seek.princeton.edu/installation.jsp only mention a mercurial
5754 ;; changeset ID. This is a git repository, though. So we just take the
5755 ;; most recent commit.
5756 (let ((commit "196ed4c7633246e9c628e4330d77577ccfd7f1e5")
5760 (version (git-version "1" revision commit))
5764 (url "https://github.com/FunctionLab/sleipnir.git")
5766 (recursive? #true)))
5767 (file-name (git-file-name name version))
5770 "0c658n8nz563a96dsi4gl2685vxph0yfmmqq5yjc6i4xin1jy1ab"))))
5771 (build-system cmake-build-system)
5774 ,#~(list (string-append "-DSVM_LIBRARY="
5775 #$(this-package-input "libsvm")
5777 (string-append "-DSVM_INCLUDE="
5778 #$(this-package-input "libsvm")
5780 #:tests? #false ; tests only fail in the build container
5782 (modify-phases %standard-phases
5783 ;; The check phase expects to find the unit_tests executable in the
5784 ;; "build/bin" directory, but it is actually in "build/tests".
5786 (lambda* (#:key tests? #:allow-other-keys)
5788 (invoke "tests/unit_tests")))))))
5790 `(("apache-thrift:include" ,apache-thrift "include")
5791 ("apache-thrift:lib" ,apache-thrift "lib")
5794 ("gengetopt" ,gengetopt)
5796 ("log4cpp" ,log4cpp)
5798 ("readline" ,readline)))
5801 (home-page "http://seek.princeton.edu")
5802 (synopsis "Gene co-expression search engine")
5804 "SEEK is a computational gene co-expression search engine. SEEK provides
5805 biologists with a way to navigate the massive human expression compendium that
5806 now contains thousands of expression datasets. SEEK returns a robust ranking
5807 of co-expressed genes in the biological area of interest defined by the user's
5808 query genes. It also prioritizes thousands of expression datasets according
5809 to the user's query of interest.")
5810 (license license:cc-by3.0))))
5812 (define-public samtools
5820 (string-append "mirror://sourceforge/samtools/samtools/"
5821 version "/samtools-" version ".tar.bz2"))
5824 "0x3xdda78ac5vx66b3jdsv9sfhyz4npl4znl1zbaf3lbm6xdlhck"))
5825 (modules '((guix build utils)))
5827 ;; Delete bundled htslib.
5828 (delete-file-recursively "htslib-1.14")))))
5829 (build-system gnu-build-system)
5831 `(#:configure-flags (list "--with-ncurses")
5833 (modify-phases %standard-phases
5834 (add-after 'unpack 'patch-tests
5836 (substitute* "test/test.pl"
5837 ;; The test script calls out to /bin/bash
5838 (("/bin/bash") (which "bash"))))))))
5839 (native-inputs (list pkg-config))
5841 (list htslib ncurses perl python zlib))
5842 (home-page "http://samtools.sourceforge.net")
5843 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
5845 "Samtools implements various utilities for post-processing nucleotide
5846 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
5847 variant calling (in conjunction with bcftools), and a simple alignment
5849 (license license:expat)))
5851 (define-public samtools-1.12
5852 (package/inherit samtools
5858 (string-append "mirror://sourceforge/samtools/samtools/"
5859 version "/samtools-" version ".tar.bz2"))
5862 "1jrdj2idpma5ja9cg0rr73b565vdbr9wyy6zig54bidicc2pg8vd"))
5863 (modules '((guix build utils)))
5865 ;; Delete bundled htslib.
5866 (delete-file-recursively "htslib-1.12")))))
5868 (substitute-keyword-arguments (package-arguments samtools)
5872 (guix build gnu-build-system)
5873 (guix build utils)))
5875 `(modify-phases ,phases
5876 (add-after 'install 'install-library
5877 (lambda* (#:key outputs #:allow-other-keys)
5878 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
5879 (install-file "libbam.a" lib))))
5880 (add-after 'install 'install-headers
5881 (lambda* (#:key outputs #:allow-other-keys)
5882 (let ((include (string-append (assoc-ref outputs "out")
5883 "/include/samtools/")))
5884 (for-each (lambda (file)
5885 (install-file file include))
5886 (scandir "." (lambda (name)
5887 (string-match "\\.h$" name)))))))))))
5888 (native-inputs (list pkg-config))
5890 (list htslib-1.12 ncurses perl python zlib))))
5892 (define-public samtools-1.10
5893 (package (inherit samtools)
5900 (string-append "mirror://sourceforge/samtools/samtools/"
5901 version "/samtools-" version ".tar.bz2"))
5904 "119ms0dpydw8dkh3zc4yyw9zhdzgv12px4l2kayigv31bpqcb7kv"))
5905 (modules '((guix build utils)))
5907 ;; Delete bundled htslib.
5908 (delete-file-recursively "htslib-1.10")
5911 (list htslib-1.10 ncurses perl python zlib))))
5913 (define-public samtools-1.2
5914 (package (inherit samtools)
5921 (string-append "mirror://sourceforge/samtools/samtools/"
5922 version "/samtools-" version ".tar.bz2"))
5925 "1akdqb685pk9xk1nb6sa9aq8xssjjhvvc06kp4cpdqvz2157l3j2"))
5926 (modules '((guix build utils)))
5928 ;; Delete bundled htslib and Windows binaries
5929 '(for-each delete-file-recursively (list "win32" "htslib-1.2.1")))))
5932 ,#~(list (string-append "prefix=" #$output)
5933 (string-append "BGZIP="
5934 #$(this-package-input "htslib")
5936 (string-append "HTSLIB="
5937 #$(this-package-input "htslib")
5939 (string-append "HTSDIR="
5940 #$(this-package-input "htslib")
5943 (modify-phases %standard-phases
5944 (add-after 'unpack 'patch-makefile-curses
5946 (substitute* "Makefile"
5947 (("-lcurses") "-lncurses")
5948 (("include \\$\\(HTSDIR.*") ""))))
5949 (add-after 'unpack 'patch-tests
5951 (substitute* "test/test.pl"
5952 ;; The test script calls out to /bin/bash
5953 (("/bin/bash") (which "bash"))
5954 ;; There are two failing tests upstream relating to the "stats"
5955 ;; subcommand in test_usage_subcommand ("did not have Usage"
5956 ;; and "usage did not mention samtools stats"), so we disable
5958 (("(test_usage_subcommand\\(.*\\);)" cmd)
5959 (string-append "unless ($subcommand eq 'stats') {" cmd "};")))
5960 ;; This test fails because the grep output doesn't look as
5961 ;; expected; it is correct, though.
5962 (substitute* "test/mpileup/mpileup.reg"
5963 (("P 52.out.*") ""))))
5964 (delete 'configure))))
5966 (list grep gawk pkg-config))
5968 (list htslib-for-samtools-1.2 ncurses perl python zlib))))
5970 (define-public samtools-0.1
5971 ;; This is the most recent version of the 0.1 line of samtools. The input
5972 ;; and output formats differ greatly from that used and produced by samtools
5973 ;; 1.x and is still used in many bioinformatics pipelines.
5974 (package (inherit samtools)
5980 (string-append "mirror://sourceforge/samtools/samtools/"
5981 version "/samtools-" version ".tar.bz2"))
5983 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
5985 `(#:tests? #f ;no "check" target
5987 (list "LIBCURSES=-lncurses")
5988 ,@(substitute-keyword-arguments (package-arguments samtools)
5992 (guix build gnu-build-system)
5993 (guix build utils)))
5995 `(modify-phases ,phases
5997 (lambda* (#:key outputs #:allow-other-keys)
5998 (let ((bin (string-append
5999 (assoc-ref outputs "out") "/bin")))
6001 (install-file "samtools" bin)
6003 (add-after 'install 'install-library
6004 (lambda* (#:key outputs #:allow-other-keys)
6005 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
6006 (install-file "libbam.a" lib))))
6007 (add-after 'install 'install-headers
6008 (lambda* (#:key outputs #:allow-other-keys)
6009 (let ((include (string-append (assoc-ref outputs "out")
6010 "/include/samtools/")))
6011 (for-each (lambda (file)
6012 (install-file file include))
6013 (scandir "." (lambda (name)
6014 (string-match "\\.h$" name)))))))
6015 (delete 'patch-tests)
6016 (delete 'configure))))))))
6018 (define-public mosaik
6019 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
6024 ;; There are no release tarballs nor tags.
6027 (url "https://github.com/wanpinglee/MOSAIK")
6029 (file-name (string-append name "-" version))
6032 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
6033 (build-system gnu-build-system)
6035 `(#:tests? #f ; no tests
6036 #:make-flags (list "CC=gcc")
6038 (modify-phases %standard-phases
6040 (lambda _ (chdir "src") #t))
6042 (lambda* (#:key outputs #:allow-other-keys)
6043 (let ((bin (string-append (assoc-ref outputs "out")
6046 (copy-recursively "../bin" bin)
6050 ("zlib:static" ,zlib "static")
6052 (supported-systems '("x86_64-linux"))
6053 (home-page "https://github.com/wanpinglee/MOSAIK")
6054 (synopsis "Map nucleotide sequence reads to reference genomes")
6056 "MOSAIK is a program for mapping second and third-generation sequencing
6057 reads to a reference genome. MOSAIK can align reads generated by all the
6058 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
6059 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
6060 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
6061 ;; code released into the public domain:
6062 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
6063 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
6064 (license (list license:gpl2+ license:public-domain)))))
6066 (define-public mosaicatcher
6068 (name "mosaicatcher")
6071 ;; There are no release tarballs nor tags.
6074 (url "https://github.com/friendsofstrandseq/mosaicatcher")
6075 (commit (string-append version "-dev"))))
6076 (file-name (git-file-name name version))
6079 "1n2s5wvvj2y0vfgjkg1q11xahpbagxz7h2vf5q7qyy25s12kbzbd"))
6080 (patches (search-patches "mosaicatcher-unbundle-htslib.patch"))))
6081 (build-system cmake-build-system)
6083 `(#:tests? #false ; there are no tests
6085 (modify-phases %standard-phases
6086 (add-after 'unpack 'chdir
6087 (lambda _ (chdir "src")))
6089 (lambda* (#:key inputs outputs #:allow-other-keys)
6090 (let* ((target (assoc-ref outputs "out"))
6091 (bin (string-append target "/bin"))
6092 (share (string-append target "/share/mosaicatcher")))
6093 (install-file "mosaic" bin)
6095 (copy-recursively "../R" share)))))))
6097 (list boost htslib))
6098 (home-page "https://github.com/friendsofstrandseq/mosaicatcher")
6099 (synopsis "Count and classify Strand-seq reads")
6101 "Mosaicatcher counts Strand-seq reads and classifies strand states of
6102 each chromosome in each cell using a Hidden Markov Model.")
6103 (license license:expat)))
6105 (define-public ngs-sdk
6112 (url "https://github.com/ncbi/ngs")
6114 (file-name (git-file-name name version))
6117 "1ix51c25hjn57w93qmwzw80xh2i34wx8j2hn7szh8p6w8i3az5qa"))))
6118 (build-system gnu-build-system)
6120 `(#:parallel-build? #f ; not supported
6121 #:tests? #f ; no "check" target
6123 (modify-phases %standard-phases
6125 (lambda* (#:key outputs #:allow-other-keys)
6126 (let ((out (assoc-ref outputs "out")))
6127 ;; Allow 'konfigure.perl' to find 'package.prl'.
6129 (string-append ".:" (getenv "PERL5LIB")))
6131 ;; The 'configure' script doesn't recognize things like
6132 ;; '--enable-fast-install'.
6133 (invoke "./configure"
6134 (string-append "--build-prefix=" (getcwd) "/build")
6135 (string-append "--prefix=" out))
6137 (add-after 'unpack 'enter-dir
6138 (lambda _ (chdir "ngs-sdk") #t)))))
6139 (native-inputs (list perl))
6140 ;; According to the test
6141 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
6142 ;; in ngs-sdk/setup/konfigure.perl
6143 (supported-systems '("i686-linux" "x86_64-linux"))
6144 (home-page "https://github.com/ncbi/ngs")
6145 (synopsis "API for accessing Next Generation Sequencing data")
6147 "NGS is a domain-specific API for accessing reads, alignments and pileups
6148 produced from Next Generation Sequencing. The API itself is independent from
6149 any particular back-end implementation, and supports use of multiple back-ends
6151 (license license:public-domain)))
6153 (define-public java-ngs
6154 (package (inherit ngs-sdk)
6157 `(,@(substitute-keyword-arguments
6158 `(#:modules ((guix build gnu-build-system)
6162 ,@(package-arguments ngs-sdk))
6164 `(modify-phases ,phases
6165 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
6167 `(("jdk" ,icedtea "jdk")
6168 ("ngs-sdk" ,ngs-sdk)))
6169 (synopsis "Java bindings for NGS SDK")))
6171 (define-public ncbi-vdb
6178 (url "https://github.com/ncbi/ncbi-vdb")
6180 (file-name (git-file-name name version))
6183 "0m8hlxscidsfqm9x9fyi62q6lpf1dv5115kgjjgnrkl49q9c27m6"))))
6184 (build-system gnu-build-system)
6186 `(#:parallel-build? #f ; not supported
6187 #:tests? #f ; no "check" target
6188 #:make-flags '("HAVE_HDF5=1")
6190 (modify-phases %standard-phases
6191 (add-after 'unpack 'make-files-writable
6192 (lambda _ (for-each make-file-writable (find-files "." ".*")) #t))
6193 (add-before 'configure 'set-perl-search-path
6195 ;; Work around "dotless @INC" build failure.
6197 (string-append (getcwd) "/setup:"
6198 (getenv "PERL5LIB")))
6200 ;; See https://github.com/ncbi/ncbi-vdb/issues/14
6201 (add-after 'unpack 'patch-krypto-flags
6203 (substitute* "libs/krypto/Makefile"
6204 (("-Wa,-march=generic64\\+aes") "")
6205 (("-Wa,-march=generic64\\+sse4") ""))
6208 (lambda* (#:key inputs outputs #:allow-other-keys)
6209 (let ((out (assoc-ref outputs "out")))
6210 ;; Override include path for libmagic
6211 (substitute* "setup/package.prl"
6212 (("name => 'magic', Include => '/usr/include'")
6213 (string-append "name=> 'magic', Include => '"
6214 (assoc-ref inputs "libmagic")
6217 ;; Install kdf5 library (needed by sra-tools)
6218 (substitute* "build/Makefile.install"
6219 (("LIBRARIES_TO_INSTALL =")
6220 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
6222 (substitute* "build/Makefile.env"
6223 (("CFLAGS =" prefix)
6224 (string-append prefix "-msse2 ")))
6226 ;; Override search path for ngs-java
6227 (substitute* "setup/package.prl"
6228 (("/usr/local/ngs/ngs-java")
6229 (assoc-ref inputs "java-ngs")))
6231 ;; The 'configure' script doesn't recognize things like
6232 ;; '--enable-fast-install'.
6233 (invoke "./configure"
6234 (string-append "--build-prefix=" (getcwd) "/build")
6235 (string-append "--prefix=" (assoc-ref outputs "out"))
6236 (string-append "--debug")
6237 (string-append "--with-xml2-prefix="
6238 (assoc-ref inputs "libxml2"))
6239 (string-append "--with-ngs-sdk-prefix="
6240 (assoc-ref inputs "ngs-sdk"))
6241 (string-append "--with-hdf5-prefix="
6242 (assoc-ref inputs "hdf5")))
6244 (add-after 'install 'install-interfaces
6245 (lambda* (#:key outputs #:allow-other-keys)
6246 ;; Install interface libraries. On i686 the interface libraries
6247 ;; are installed to "linux/gcc/i386", so we need to use the Linux
6248 ;; architecture name ("i386") instead of the target system prefix
6250 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
6251 (copy-recursively (string-append
6252 "build/ncbi-vdb/linux/gcc/"
6253 ,(platform-linux-architecture
6254 (lookup-platform-by-target-or-system
6255 (or (%current-target-system)
6256 (%current-system))))
6258 (string-append (assoc-ref outputs "out")
6260 ;; Install interface headers
6261 (copy-recursively "interfaces"
6262 (string-append (assoc-ref outputs "out")
6265 ;; These files are needed by sra-tools.
6266 (add-after 'install 'install-configuration-files
6267 (lambda* (#:key outputs #:allow-other-keys)
6268 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
6270 (install-file "libs/kfg/default.kfg" target)
6271 (install-file "libs/kfg/certs.kfg" target))
6274 `(("libxml2" ,libxml2)
6275 ("ngs-sdk" ,ngs-sdk)
6276 ("java-ngs" ,java-ngs)
6279 (native-inputs (list perl))
6280 ;; NCBI-VDB requires SSE capability.
6281 (supported-systems '("i686-linux" "x86_64-linux"))
6282 (home-page "https://github.com/ncbi/ncbi-vdb")
6283 (synopsis "Database engine for genetic information")
6285 "The NCBI-VDB library implements a highly compressed columnar data
6286 warehousing engine that is most often used to store genetic information.
6287 Databases are stored in a portable image within the file system, and can be
6288 accessed/downloaded on demand across HTTP.")
6289 (license license:public-domain)))
6291 (define-public plink
6299 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
6300 version "-src.zip"))
6302 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
6303 (patches (search-patches "plink-1.07-unclobber-i.patch"
6304 "plink-endian-detection.patch"))))
6305 (build-system gnu-build-system)
6307 `(#:tests? #f ;no "check" target
6308 #:make-flags ,#~(list (string-append "LIB_LAPACK="
6309 #$(this-package-input "lapack")
6310 "/lib/liblapack.so")
6313 ;; disable phoning home
6316 (modify-phases %standard-phases
6317 ;; no "configure" script
6320 (lambda* (#:key outputs #:allow-other-keys)
6321 (let ((bin (string-append (assoc-ref outputs "out")
6323 (install-file "plink" bin)))))))
6328 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
6329 (synopsis "Whole genome association analysis toolset")
6331 "PLINK is a whole genome association analysis toolset, designed to
6332 perform a range of basic, large-scale analyses in a computationally efficient
6333 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
6334 so there is no support for steps prior to this (e.g. study design and
6335 planning, generating genotype or CNV calls from raw data). Through
6336 integration with gPLINK and Haploview, there is some support for the
6337 subsequent visualization, annotation and storage of results.")
6338 ;; Code is released under GPLv2, except for fisher.h, which is under
6340 (license (list license:gpl2 license:lgpl2.1+))))
6342 (define-public plink-ng
6343 (package (inherit plink)
6345 (version "2.00a3.3")
6350 (url "https://github.com/chrchang/plink-ng")
6351 (commit (string-append "v" version))))
6352 (file-name (git-file-name name version))
6354 (base32 "0m8wkyvbgvcr5kzc284w8fbhpxwglh2c1xq0yc3yv00a53gs7rv0"))))
6355 (build-system gnu-build-system)
6358 #:tests? #false ;TEST_EXTRACT_CHR doesn't produce expected files
6360 #~(list "BLASFLAGS=-llapack -lopenblas"
6364 (string-append "CC=" #$(cc-for-target))
6365 (string-append "PREFIX=" #$output)
6368 '(modify-phases %standard-phases
6369 (add-after 'unpack 'chdir
6370 (lambda _ (chdir "2.0/build_dynamic")))
6371 (delete 'configure) ; no "configure" script
6373 (lambda* (#:key tests? inputs #:allow-other-keys)
6375 (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
6376 (with-directory-excursion "../Tests"
6377 (substitute* "run_tests.sh"
6379 (string-append (which "bash") " " m)))
6380 (invoke "bash" "run_tests.sh")))))
6382 (lambda* (#:key outputs #:allow-other-keys)
6383 (install-file "plink2"
6385 (assoc-ref outputs "out") "/bin")))))))
6387 (list lapack openblas zlib `(,zstd "lib")))
6389 (list diffutils plink python simde)) ; for tests
6390 (home-page "https://www.cog-genomics.org/plink/")
6391 (license license:gpl3+)))
6393 (define-public smithlab-cpp
6394 (let ((revision "1")
6395 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
6397 (name "smithlab-cpp")
6398 (version (string-append "0." revision "." (string-take commit 7)))
6402 (url "https://github.com/smithlabcode/smithlab_cpp")
6404 (file-name (string-append name "-" version "-checkout"))
6407 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
6408 (build-system gnu-build-system)
6410 `(#:modules ((guix build gnu-build-system)
6413 #:tests? #f ;no "check" target
6415 (modify-phases %standard-phases
6416 (add-after 'unpack 'use-samtools-headers
6418 (substitute* '("SAM.cpp"
6420 (("sam.h") "samtools/sam.h"))
6423 (lambda* (#:key outputs #:allow-other-keys)
6424 (let* ((out (assoc-ref outputs "out"))
6425 (lib (string-append out "/lib"))
6426 (include (string-append out "/include/smithlab-cpp")))
6429 (for-each (cut install-file <> lib)
6430 (find-files "." "\\.o$"))
6431 (for-each (cut install-file <> include)
6432 (find-files "." "\\.hpp$")))
6434 (delete 'configure))))
6436 (list samtools-0.1 zlib))
6437 (home-page "https://github.com/smithlabcode/smithlab_cpp")
6438 (synopsis "C++ helper library for functions used in Smith lab projects")
6440 "Smithlab CPP is a C++ library that includes functions used in many of
6441 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
6442 structures, classes for genomic regions, mapped sequencing reads, etc.")
6443 (license license:gpl3+))))
6445 (define-public preseq
6451 (uri (string-append "https://github.com/smithlabcode/preseq/"
6452 "releases/download/v" version
6453 "/preseq_v" version ".tar.bz2"))
6455 (base32 "149x9xmk1wy1gff85325yfzqc0qk4sgp1w6gbyj9cnji4x1dszbl"))
6456 (modules '((guix build utils)))
6457 ;; Remove bundled samtools.
6458 (snippet '(delete-file-recursively "samtools"))))
6459 (build-system gnu-build-system)
6461 `(#:tests? #f ;no "check" target
6463 (modify-phases %standard-phases
6464 (delete 'configure))
6466 ,#~(list (string-append "PREFIX=" #$output)
6467 (string-append "LIBBAM="
6468 #$(this-package-input "samtools")
6470 (string-append "SMITHLAB_CPP="
6471 #$(this-package-input "smithlab-cpp")
6474 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
6476 (list gsl samtools-0.1 smithlab-cpp zlib))
6477 (home-page "http://smithlabresearch.org/software/preseq/")
6478 (synopsis "Program for analyzing library complexity")
6480 "The preseq package is aimed at predicting and estimating the complexity
6481 of a genomic sequencing library, equivalent to predicting and estimating the
6482 number of redundant reads from a given sequencing depth and how many will be
6483 expected from additional sequencing using an initial sequencing experiment.
6484 The estimates can then be used to examine the utility of further sequencing,
6485 optimize the sequencing depth, or to screen multiple libraries to avoid low
6486 complexity samples.")
6487 (license license:gpl3+)))
6489 (define-public python-screed
6491 (name "python-screed")
6496 (uri (pypi-uri "screed" version))
6499 "148vcb7w2wr6a4w6vs2bsxanbqibxfk490zbcbg4m61s8669zdjx"))))
6500 (build-system python-build-system)
6502 (list python-pytest python-pytest-cov python-pytest-runner))
6504 (list python-bz2file))
6505 (home-page "https://github.com/dib-lab/screed/")
6506 (synopsis "Short read sequence database utilities")
6507 (description "Screed parses FASTA and FASTQ files and generates databases.
6508 Values such as sequence name, sequence description, sequence quality and the
6509 sequence itself can be retrieved from these databases.")
6510 (license license:bsd-3)))
6512 (define-public python-taggd
6514 (name "python-taggd")
6519 (url "https://github.com/SpatialTranscriptomicsResearch/taggd")
6521 (file-name (git-file-name name version))
6524 "0j19ah81z7aqrdljah9hyarp91gvgbk63pz6fz3pdpksy1yqyi6k"))
6525 (modules '((guix build utils)))
6527 '(for-each delete-file
6528 (find-files "taggd" "\\.c$")))))
6529 (build-system python-build-system)
6533 '(modify-phases %standard-phases
6534 (add-after 'unpack 'disable-broken-tests
6536 (substitute* "tests/taggd_demultiplex_test.py"
6537 (("def test_normal_bam_run")
6538 "def _disabled_test_normal_bam_run")))))))
6540 (list python-numpy python-pysam python-setuptools))
6542 (list python-cython))
6543 (home-page "https://github.com/SpatialTranscriptomicsResearch/taggd")
6544 (synopsis "Genetic barcode demultiplexing")
6545 (description "This package provides TagGD barcode demultiplexing utilities
6546 for Spatial Transcriptomics data.")
6547 (license license:bsd-3)))
6549 (define-public stpipeline
6555 (uri (pypi-uri "stpipeline" version))
6558 "0har2g42fvaqpiz66lincy86aj1hvwzds26kxhxfamvyvv4721wk"))))
6559 (build-system python-build-system)
6563 '(modify-phases %standard-phases
6564 (add-after 'unpack 'relax-requirements
6566 (substitute* "requirements.txt"
6567 (("argparse.*") "")))))))
6585 (home-page "https://github.com/SpatialTranscriptomicsResearch/st_pipeline")
6586 (synopsis "Pipeline for spatial mapping of unique transcripts")
6588 "This package provides an automated pipeline for spatial mapping of
6589 unique transcripts.")
6590 (license license:expat)))
6592 (define-public sra-tools
6600 (url "https://github.com/ncbi/sra-tools")
6602 (file-name (git-file-name name version))
6605 "1cr2mijkfs5sm35ffjs6861qsd1qkgnhnbavdv65zg5d655abbjf"))))
6606 (build-system gnu-build-system)
6608 `(#:parallel-build? #f ; not supported
6609 #:tests? #f ; no "check" target
6611 ,#~(list (string-append "DEFAULT_CRT="
6612 #$(this-package-input "ncbi-vdb")
6614 (string-append "DEFAULT_KFG="
6615 #$(this-package-input "ncbi-vdb")
6617 (string-append "VDB_LIBDIR="
6618 #$(this-package-input "ncbi-vdb")
6619 #$(if (string-prefix? "x86_64"
6620 (or (%current-target-system)
6625 (modify-phases %standard-phases
6626 (add-before 'configure 'set-perl-search-path
6628 ;; Work around "dotless @INC" build failure.
6630 (string-append (getcwd) "/setup:"
6631 (getenv "PERL5LIB")))
6634 (lambda* (#:key inputs outputs #:allow-other-keys)
6635 ;; The build system expects a directory containing the sources and
6636 ;; raw build output of ncbi-vdb, including files that are not
6637 ;; installed. Since we are building against an installed version of
6638 ;; ncbi-vdb, the following modifications are needed.
6639 (substitute* "setup/konfigure.perl"
6640 ;; Make the configure script look for the "ilib" directory of
6641 ;; "ncbi-vdb" without first checking for the existence of a
6642 ;; matching library in its "lib" directory.
6643 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
6644 "my $f = File::Spec->catdir($ilibdir, $ilib);")
6645 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
6646 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
6647 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
6650 (substitute* "tools/copycat/Makefile"
6651 (("smagic-static") "lmagic"))
6652 (substitute* "tools/driver-tool/utf8proc/Makefile"
6653 (("CC\\?=gcc") "myCC=gcc")
6654 (("\\(CC\\)") "(myCC)"))
6656 ;; The 'configure' script doesn't recognize things like
6657 ;; '--enable-fast-install'.
6658 (invoke "./configure"
6659 (string-append "--build-prefix=" (getcwd) "/build")
6660 (string-append "--prefix=" (assoc-ref outputs "out"))
6661 (string-append "--debug")
6662 (string-append "--with-fuse-prefix="
6663 (assoc-ref inputs "fuse"))
6664 (string-append "--with-magic-prefix="
6665 (assoc-ref inputs "libmagic"))
6666 ;; TODO: building with libxml2 fails with linker errors
6668 (string-append "--with-xml2-prefix="
6669 (assoc-ref inputs "libxml2"))
6670 (string-append "--with-ncbi-vdb-sources="
6671 (assoc-ref inputs "ncbi-vdb"))
6672 (string-append "--with-ncbi-vdb-build="
6673 (assoc-ref inputs "ncbi-vdb"))
6674 (string-append "--with-ngs-sdk-prefix="
6675 (assoc-ref inputs "ngs-sdk"))
6676 (string-append "--with-hdf5-prefix="
6677 (assoc-ref inputs "hdf5")))
6679 (native-inputs (list perl))
6681 `(("ngs-sdk" ,ngs-sdk)
6682 ("ncbi-vdb" ,ncbi-vdb)
6687 ("python" ,python-wrapper)))
6689 "https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
6690 (synopsis "Tools and libraries for reading and writing sequencing data")
6692 "The SRA Toolkit from NCBI is a collection of tools and libraries for
6693 reading of sequencing files from the Sequence Read Archive (SRA) database and
6694 writing files into the .sra format.")
6695 (license license:public-domain)))
6697 (define-public seqan
6703 (uri (string-append "https://github.com/seqan/seqan3/releases/"
6704 "download/" version "/seqan3-"
6705 version "-Source.tar.xz"))
6708 "1h2z0cvgidhkmh5xsbw75waqbrqbbv6kkrvb0b92xfh3gqpaiz22"))))
6709 (build-system cmake-build-system)
6712 (modify-phases %standard-phases
6715 (invoke "ctest" "test" "--output-on-failure"))))))
6717 (list bzip2 cereal zlib))
6718 (home-page "https://www.seqan.de")
6719 (synopsis "Library for nucleotide sequence analysis")
6721 "SeqAn is a C++ library of efficient algorithms and data structures for
6722 the analysis of sequences with the focus on biological data. It contains
6723 algorithms and data structures for string representation and their
6724 manipulation, online and indexed string search, efficient I/O of
6725 bioinformatics file formats, sequence alignment, and more.")
6726 (license license:bsd-3)))
6728 (define-public seqan-2
6734 (uri (string-append "https://github.com/seqan/seqan/releases/"
6735 "download/seqan-v" version
6736 "/seqan-library-" version ".tar.xz"))
6739 "19a1rlxx03qy1i1iriicly68w64yjxbv24g9gdywnfmq998v35yx"))))
6740 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6741 ;; makes sense to split the outputs.
6742 (outputs '("out" "doc"))
6743 (build-system trivial-build-system)
6745 `(#:modules ((guix build utils))
6748 (use-modules (guix build utils))
6749 (let ((tar #$(this-package-native-input "tar"))
6750 (xz #$(this-package-native-input "xz"))
6753 (setenv "PATH" (string-append tar "/bin:" xz "/bin"))
6754 (invoke "tar" "xvf" #$(this-package-native-input "source"))
6755 (chdir (string-append "seqan-library-" #$version))
6756 (copy-recursively "include" (string-append out "/include"))
6757 (copy-recursively "share" (string-append doc "/share"))))))
6759 `(("source" ,source)
6763 (define-public seqan-1
6764 (package (inherit seqan)
6769 (uri (string-append "https://packages.seqan.de/seqan-library/"
6770 "seqan-library-" version ".tar.bz2"))
6773 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
6774 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6775 ;; makes sense to split the outputs.
6776 (outputs '("out" "doc"))
6777 (build-system trivial-build-system)
6779 `(#:modules ((guix build utils))
6782 (use-modules (guix build utils))
6783 (let ((tar #$(this-package-native-input "tar"))
6784 (bzip #$(this-package-native-input "bzip2"))
6787 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
6788 (invoke "tar" "xvf" #$(this-package-native-input "source"))
6789 (chdir (string-append "seqan-library-" #$version))
6790 (copy-recursively "include" (string-append out "/include"))
6791 (copy-recursively "share" (string-append doc "/share"))))))
6793 `(("source" ,source)
6795 ("bzip2" ,bzip2)))))
6797 (define-public seqmagick
6804 (uri (pypi-uri "seqmagick" version))
6807 "0pf98da7i59q47gwrbx0wjk6xlvbybiwphw80w7h4ydjj0579a2b"))))
6808 (build-system python-build-system)
6810 (list python-biopython))
6813 (home-page "https://github.com/fhcrc/seqmagick")
6814 (synopsis "Tools for converting and modifying sequence files")
6816 "Bioinformaticians often have to convert sequence files between formats
6817 and do little manipulations on them, and it's not worth writing scripts for
6818 that. Seqmagick is a utility to expose the file format conversion in
6819 BioPython in a convenient way. Instead of having a big mess of scripts, there
6820 is one that takes arguments.")
6821 (license license:gpl3)))
6823 (define-public seqtk
6830 (url "https://github.com/lh3/seqtk")
6831 (commit (string-append "v" version))))
6832 (file-name (git-file-name name version))
6835 "1bfzlqa84b5s1qi22blmmw2s8xdyp9h9ydcq22pfjhh5gab3yz6l"))))
6836 (build-system gnu-build-system)
6839 (modify-phases %standard-phases
6842 ;; There are no tests, so we just run a sanity check.
6843 (lambda _ (invoke "./seqtk" "seq") #t))
6845 (lambda* (#:key outputs #:allow-other-keys)
6846 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6847 (install-file "seqtk" bin)
6851 (home-page "https://github.com/lh3/seqtk")
6852 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
6854 "Seqtk is a fast and lightweight tool for processing sequences in the
6855 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
6856 optionally compressed by gzip.")
6857 (license license:expat)))
6859 (define-public snap-aligner
6861 (name "snap-aligner")
6866 (url "https://github.com/amplab/snap")
6867 (commit (string-append "v" version))))
6868 (file-name (git-file-name name version))
6871 "0yag3jcazdqfxgmw0vbi91i98kj9sr0aqx83qqj6m5b45wxs7jms"))))
6872 (build-system gnu-build-system)
6875 (modify-phases %standard-phases
6878 (lambda* (#:key tests? #:allow-other-keys)
6879 (when tests? (invoke "./unit_tests"))))
6881 (lambda* (#:key outputs #:allow-other-keys)
6882 (let* ((out (assoc-ref outputs "out"))
6883 (bin (string-append out "/bin")))
6884 (install-file "snap-aligner" bin)
6885 (install-file "SNAPCommand" bin)))))))
6888 (home-page "http://snap.cs.berkeley.edu/")
6889 (synopsis "Short read DNA sequence aligner")
6891 "SNAP is a fast and accurate aligner for short DNA reads. It is
6892 optimized for modern read lengths of 100 bases or higher, and takes advantage
6893 of these reads to align data quickly through a hash-based indexing scheme.")
6894 ;; 32-bit systems are not supported by the unpatched code.
6895 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
6896 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
6897 ;; systems without a lot of memory cannot make good use of this program.
6898 (supported-systems '("x86_64-linux"))
6899 (license license:asl2.0)))
6901 (define-public sortmerna
6909 (url "https://github.com/biocore/sortmerna")
6910 (commit (string-append "v" version))))
6911 (file-name (git-file-name name version))
6914 "0f8jfc8vsq6llhbb92p9yv7nbp566yqwfcmq3g2hw0n7d8hyl3a8"))))
6915 (build-system cmake-build-system)
6916 (outputs '("out" ;for binaries
6917 "db")) ;for sequence databases
6920 #:tests? #false ;unclear how to run them
6922 #~(list "-DWITH_TESTS=ON"
6923 "-DCMAKE_CXX_FLAGS=-pthread"
6925 "-DROCKSDB_STATIC=OFF"
6926 "-DPORTABLE=OFF" ;do not use static linking
6927 (string-append "-DROCKSDB_HOME="
6928 #$(this-package-input "rocksdb"))
6929 (string-append "-DRAPIDJSON_HOME="
6930 #$(this-package-input "rapidjson"))
6931 (string-append "-DRapidJson_DIR="
6932 #$(this-package-input "rapidjson")
6933 "/lib/cmake/RapidJSON")
6934 (string-append "-DRapidJSON_INCLUDE_DIR="
6935 #$(this-package-input "rapidjson")
6938 '(modify-phases %standard-phases
6939 (add-after 'unpack 'find-concurrentqueue-headers
6940 (lambda* (#:key inputs #:allow-other-keys)
6941 ;; Ensure that headers can be found
6942 (setenv "CPLUS_INCLUDE_PATH"
6943 (string-append (search-input-directory
6944 inputs "/include/concurrentqueue")
6946 (or (getenv "CPLUS_INCLUDE_PATH") "")))))
6948 (lambda* (#:key outputs #:allow-other-keys)
6949 (let* ((out (assoc-ref outputs "out"))
6950 (bin (string-append out "/bin"))
6951 (db (assoc-ref outputs "db"))
6953 (string-append db "/share/sortmerna/rRNA_databases")))
6954 (install-file "src/sortmerna" bin)
6955 (for-each (lambda (file)
6956 (install-file file share))
6957 (find-files "../source/data/rRNA_databases" ".*fasta"))))))))
6959 (list concurrentqueue
6960 gflags ; because of rocksdb
6961 rapidjson rocksdb zlib))
6964 (home-page "https://bioinfo.lifl.fr/RNA/sortmerna/")
6965 (synopsis "Biological sequence analysis tool for NGS reads")
6967 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
6968 and @acronym{OTU, operational taxonomic unit} picking of @acronym{NGS, next
6969 generation sequencing} reads. The core algorithm is based on approximate seeds
6970 and allows for fast and sensitive analyses of nucleotide sequences. The main
6971 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
6972 ;; The source includes x86 specific code
6973 (supported-systems '("x86_64-linux" "i686-linux"))
6974 (license license:lgpl3)))
6983 (url "https://github.com/alexdobin/STAR")
6985 (file-name (git-file-name name version))
6988 "0zc5biymja9zml9yizcj1h68fq9c6sxfcav8a0lbgvgsm44rvans"))
6989 (modules '((guix build utils)))
6992 (substitute* "source/Makefile"
6994 ;; Remove pre-built binaries and bundled htslib sources.
6995 (delete-file-recursively "bin/MacOSX_x86_64")
6996 (delete-file-recursively "bin/Linux_x86_64")
6997 (delete-file-recursively "bin/Linux_x86_64_static")
6998 (delete-file-recursively "source/htslib")
7000 (build-system gnu-build-system)
7002 '(#:tests? #f ;no check target
7003 #:make-flags '("STAR")
7005 (modify-phases %standard-phases
7006 (add-after 'unpack 'enter-source-dir
7007 (lambda _ (chdir "source") #t))
7008 (add-after 'enter-source-dir 'make-reproducible
7010 (substitute* "Makefile"
7011 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
7012 (string-append pre "Built with Guix" post)))
7014 ;; See https://github.com/alexdobin/STAR/pull/562
7015 (add-after 'enter-source-dir 'add-missing-header
7017 (substitute* "SoloReadFeature_inputRecords.cpp"
7018 (("#include \"binarySearch2.h\"" h)
7019 (string-append h "\n#include <math.h>")))
7021 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
7023 (substitute* "Makefile"
7024 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
7026 (substitute* '("BAMfunctions.cpp"
7031 "bamRemoveDuplicates.cpp")
7032 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
7033 (string-append "#include <" header ">")))
7034 (substitute* "IncludeDefine.h"
7035 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
7036 (string-append "<" header ">")))
7039 (lambda* (#:key outputs #:allow-other-keys)
7040 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
7041 (install-file "STAR" bin))
7043 (delete 'configure))))
7048 (home-page "https://github.com/alexdobin/STAR")
7049 (synopsis "Universal RNA-seq aligner")
7051 "The Spliced Transcripts Alignment to a Reference (STAR) software is
7052 based on a previously undescribed RNA-seq alignment algorithm that uses
7053 sequential maximum mappable seed search in uncompressed suffix arrays followed
7054 by seed clustering and stitching procedure. In addition to unbiased de novo
7055 detection of canonical junctions, STAR can discover non-canonical splices and
7056 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
7058 ;; Only 64-bit systems are supported according to the README.
7059 (supported-systems '("x86_64-linux" "mips64el-linux"))
7060 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
7061 (license license:gpl3+)))
7063 (define-public star-for-pigx
7071 (url "https://github.com/alexdobin/STAR")
7073 (file-name (git-file-name name version))
7076 "1hgiqw5qhs0pc1xazzihcfd92na02xyq2kb469z04y1v51kpvvjq"))
7077 (modules '((guix build utils)))
7080 (substitute* "source/Makefile"
7082 ;; Remove pre-built binaries and bundled htslib sources.
7083 (delete-file-recursively "bin/MacOSX_x86_64")
7084 (delete-file-recursively "bin/Linux_x86_64")
7085 (delete-file-recursively "bin/Linux_x86_64_static")
7086 (delete-file-recursively "source/htslib")
7089 (define-public starlong
7090 (package (inherit star)
7093 (substitute-keyword-arguments (package-arguments star)
7094 ((#:make-flags flags)
7097 `(modify-phases ,phases
7098 ;; Allow extra long sequence reads.
7099 (add-after 'unpack 'make-extra-long
7101 (substitute* "source/IncludeDefine.h"
7102 (("(#define DEF_readNameLengthMax ).*" _ match)
7103 (string-append match "900000\n")))
7106 (lambda* (#:key outputs #:allow-other-keys)
7107 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
7108 (install-file "STARlong" bin))
7111 (define-public subread
7117 (uri (string-append "mirror://sourceforge/subread/subread-"
7118 version "/subread-" version "-source.tar.gz"))
7121 "0szmllia7jl0annk5568xjhw6cc8yj1c5mb961qk5m0lz6ig7kjn"))))
7122 (build-system gnu-build-system)
7124 `(#:tests? #f ;no "check" target
7125 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
7126 ;; optimizations by default, so we override these flags such that x86_64
7127 ;; flags are only added when the build target is an x86_64 system.
7129 (list (let ((system ,(or (%current-target-system)
7131 (flags '("-ggdb" "-fomit-frame-pointer"
7132 "-ffast-math" "-funroll-loops"
7133 "-fmessage-length=0" "-fcommon"
7134 "-O9" "-Wall" "-DMAKE_FOR_EXON"
7136 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
7137 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
7138 (if (string-prefix? "x86_64" system)
7139 (string-append "CCFLAGS=" (string-join (append flags flags64)))
7140 (string-append "CCFLAGS=" (string-join flags))))
7141 "-f" "Makefile.Linux"
7142 "CC=gcc ${CCFLAGS}")
7144 (modify-phases %standard-phases
7145 (add-after 'unpack 'enter-dir
7146 (lambda _ (chdir "src")))
7148 (lambda* (#:key outputs #:allow-other-keys)
7149 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
7151 (copy-recursively "../bin" bin))))
7152 ;; no "configure" script
7153 (delete 'configure))))
7154 (inputs (list zlib))
7155 (home-page "http://subread.sourceforge.net/")
7156 (synopsis "Tool kit for processing next-gen sequencing data")
7158 "The subread package contains the following tools: subread aligner, a
7159 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
7160 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
7161 features; exactSNP: a SNP caller that discovers SNPs by testing signals
7162 against local background noises.")
7163 (license license:gpl3+)))
7165 (define-public stringtie
7171 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
7172 "stringtie-" version ".tar.gz"))
7175 "08w3ish4y9kf9acp7k38iwi8ixa6j51m6qyf0vvfj7yz78a3ai3x"))
7176 ;; This package bundles an annoying amount of third party source
7178 (modules '((guix build utils)))
7180 '(delete-file-recursively "htslib"))))
7181 (build-system gnu-build-system)
7183 `(#:tests? #f ;no test suite
7184 #:make-flags '("LIBDEFLATE=-ldeflate"
7188 (modify-phases %standard-phases
7189 ;; no configure script
7191 (add-before 'build 'use-system-samtools
7193 (substitute* "Makefile"
7194 ((" -lm") " -lm -lhts")
7195 ((" \\$\\{HTSLIB\\}/libhts\\.a") " "))))
7197 (lambda* (#:key outputs #:allow-other-keys)
7198 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
7199 (install-file "stringtie" bin)))))))
7201 (list bzip2 htslib-for-stringtie libdeflate zlib))
7202 (home-page "http://ccb.jhu.edu/software/stringtie/")
7203 (synopsis "Transcript assembly and quantification for RNA-Seq data")
7205 "StringTie is a fast and efficient assembler of RNA-Seq sequence
7206 alignments into potential transcripts. It uses a novel network flow algorithm
7207 as well as an optional de novo assembly step to assemble and quantitate
7208 full-length transcripts representing multiple splice variants for each gene
7209 locus. Its input can include not only the alignments of raw reads used by
7210 other transcript assemblers, but also alignments of longer sequences that have
7211 been assembled from those reads. To identify differentially expressed genes
7212 between experiments, StringTie's output can be processed either by the
7213 Cuffdiff or Ballgown programs.")
7214 (license license:expat)))
7216 (define-public taxtastic
7221 ;; The Pypi version does not include tests.
7224 (url "https://github.com/fhcrc/taxtastic")
7225 (commit (string-append "v" version))))
7226 (file-name (git-file-name name version))
7229 "1k6wg9ych1j3srnhdny1y4470qlhfg730rb3rm3pq7l7gw62vmgb"))))
7230 (build-system python-build-system)
7233 (modify-phases %standard-phases
7234 (add-after 'unpack 'prepare-directory
7236 ;; This test fails, but the error is not caught by the test
7237 ;; framework, so the tests fail...
7238 (substitute* "tests/test_taxit.py"
7239 (("self.cmd_fails\\(''\\)")
7240 "self.cmd_fails('nothing')"))
7241 ;; This version file is expected to be created with git describe.
7242 (mkdir-p "taxtastic/data")
7243 (with-output-to-file "taxtastic/data/ver"
7244 (lambda () (display ,version)))))
7246 ;; Note, this fails to run with "-v" as it tries to write to a
7247 ;; closed output stream.
7248 (lambda* (#:key tests? #:allow-other-keys)
7250 (invoke "python" "-m" "unittest"))))
7251 ;; This fails because it cannot find psycopg2 even though it is
7253 (delete 'sanity-check))))
7255 (list python-sqlalchemy
7265 (home-page "https://github.com/fhcrc/taxtastic")
7266 (synopsis "Tools for taxonomic naming and annotation")
7268 "Taxtastic is software written in python used to build and maintain
7269 reference packages i.e. collections of reference trees, reference alignments,
7270 profiles, and associated taxonomic information.")
7271 (license license:gpl3+)))
7273 (define-public vcftools
7280 "https://github.com/vcftools/vcftools/releases/download/v"
7281 version "/vcftools-" version ".tar.gz"))
7284 "1qqlx7flfv7axrjwkaz6njkscsl1d0jw98ns8d8bh1n1hd1pgz6v"))))
7285 (build-system gnu-build-system)
7287 `(#:tests? #f ; no "check" target
7290 "CFLAGS=-O2" ; override "-m64" flag
7291 (string-append "PREFIX=" #$output)
7292 (string-append "MANDIR=" #$output "/share/man/man1"))))
7297 (home-page "https://vcftools.github.io/")
7298 (synopsis "Tools for working with VCF files")
7300 "VCFtools is a program package designed for working with VCF files, such
7301 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
7302 provide easily accessible methods for working with complex genetic variation
7303 data in the form of VCF files.")
7304 ;; The license is declared as LGPLv3 in the README and
7305 ;; at https://vcftools.github.io/license.html
7306 (license license:lgpl3)))
7308 (define-public infernal
7314 (uri (string-append "http://eddylab.org/software/infernal/"
7315 "infernal-" version ".tar.gz"))
7318 "1z4mgwqg1j4n5ika08ai8mg9yjyjhf4821jp83v2bgwzxrykqjgr"))))
7319 (build-system gnu-build-system)
7321 (list perl python)) ; for tests
7322 (home-page "http://eddylab.org/infernal/")
7323 (synopsis "Inference of RNA alignments")
7324 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
7325 searching DNA sequence databases for RNA structure and sequence similarities.
7326 It is an implementation of a special case of profile stochastic context-free
7327 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
7328 profile, but it scores a combination of sequence consensus and RNA secondary
7329 structure consensus, so in many cases, it is more capable of identifying RNA
7330 homologs that conserve their secondary structure more than their primary
7332 ;; Infernal 1.1.3 requires VMX or SSE capability for parallel instructions.
7333 (supported-systems '("i686-linux" "x86_64-linux"))
7334 (license license:bsd-3)))
7336 (define-public r-gutils
7337 (let ((commit "10e36c7b580aacb2d952140a3fdd82418aaddea6")
7341 (version (git-version "0.2.0" revision commit))
7345 (url "https://github.com/mskilab/gUtils")
7347 (file-name (git-file-name name version))
7350 "1wq9kd1afzy7ii510r20c4n9fkykj6p15q5c85ws27h1q5w4ghxy"))))
7351 (properties `((upstream-name . "gUtils")))
7352 (build-system r-build-system)
7354 (list r-biocgenerics
7362 (home-page "https://github.com/mskilab/gUtils")
7363 (synopsis "Additional capabilities and speed for GenomicRanges operations")
7365 "This is an R package providing additional capabilities and speed for
7366 @code{GenomicRanges} operations.")
7367 (license license:gpl2))))
7369 (define-public r-bamutils
7370 (let ((commit "639dba901f16944fa1b7a8d7048701ba86a2cdb8")
7374 (version (git-version "0.0.0.9000" revision commit))
7378 (url "https://github.com/mskilab/bamutils/")
7380 (file-name (git-file-name name version))
7383 "0qwby2v5rydnipvf1iv1wz9nf02yq98k0xbc4inf9mqc54jwacs0"))))
7384 (properties `((upstream-name . "bamUtils")))
7385 (build-system r-build-system)
7394 r-variantannotation))
7395 (home-page "https://github.com/mskilab/bamutils/")
7396 (synopsis "Utility functions for manipulating BAMs")
7397 (description "This package provides utility functions for manipulating
7399 (license license:gpl2))))
7401 (define-public r-gtrack
7402 (let ((commit "a694fa36cedafca2658da79fc8e5b673535b15e5")
7406 (version (git-version "0.1.0" revision commit))
7410 (url "https://github.com/mskilab/gTrack/")
7412 (file-name (git-file-name name version))
7415 "070qlrbqsbj9max2vx740zigqh0ymvnw2pm1ia5la3wb4dbfwh2b"))))
7416 (properties `((upstream-name . "gTrack")))
7417 (build-system r-build-system)
7419 (list r-biocgenerics
7431 (home-page "https://github.com/mskilab/gTrack/")
7432 (synopsis "Plot tracks of complex genomic data across multiple genomic windows")
7434 "This package provides an object for plotting GRanges, RleList, UCSC
7435 file formats, and ffTrack objects in multi-track panels.")
7436 (license license:gpl2))))
7438 (define-public r-gchain
7439 (let ((commit "dc393e8dd0d8efaf36270c04d7112db8553db36a")
7443 (version (git-version "0.2.0" revision commit))
7447 (url "https://github.com/mskilab/gChain/")
7449 (file-name (git-file-name name version))
7452 "105wgi5w2fhwq1grsvj6zjigwg0sny3z7zr577q8ki3qffjwdkj0"))))
7453 (properties `((upstream-name . "gChain")))
7454 (build-system r-build-system)
7465 (home-page "https://github.com/mskilab/gChain/")
7466 (synopsis "Additional capabilities and speed for GenomicRanges operations")
7468 "This R package provides additional capabilities and speed for
7469 GenomicRanges operations.")
7470 (license license:gpl2))))
7472 (define-public r-skitools
7473 (let ((commit "22d107d32f063eb891eb5e7fb36996d1c0b0d2bc")
7477 (version (git-version "0.0.0.9000" revision commit))
7481 (url "https://github.com/mskilab/skitools/")
7483 (file-name (git-file-name name version))
7486 "1977d9bkdk9l2n6niahfj9vksh9l1ga4g7c3b3x27lj1gc0qgr4z"))))
7487 (properties `((upstream-name . "skitools")))
7488 (build-system r-build-system)
7510 r-variantannotation))
7511 (home-page "https://github.com/mskilab/skitools/")
7512 (synopsis "Various mskilab R utilties")
7514 "This package provides R miscellaneous utilities for basic data
7515 manipulation, debugging, visualization, lsf management, and common mskilab
7517 (license license:expat))))
7519 (define-public r-chromunity
7520 (let ((commit "09fce8bc12cb84b45a6ea25bf8db6e5b75113d4f")
7523 (name "r-chromunity")
7524 (version (git-version "0.0.1" revision commit))
7528 (url "https://github.com/mskilab/chromunity")
7530 (file-name (git-file-name name version))
7533 "0lp0h614k8fq6h9gpbylk4chh7q6w4qda8lx03ajrpppxmg7al2d"))))
7534 (properties `((upstream-name . "chromunity")))
7535 (build-system r-build-system)
7552 (home-page "https://github.com/mskilab/chromunity")
7553 (synopsis "Discovery of communities in Pore-C concatemers")
7554 (description "This is a package for the discovery of communities in
7555 Pore-C concatemers.")
7556 (license license:gpl3))))
7558 (define-public r-presto
7559 (let ((commit "052085db9c88aa70a28d11cc58ebc807999bf0ad")
7563 (version (git-version "1.0.0" revision commit))
7568 (url "https://github.com/immunogenomics/presto")
7570 (file-name (git-file-name name version))
7572 (base32 "1c3fmag4r4p2lvbvxlxyck9dvfw1prbwcl9665mmlx4a35750hk8"))))
7573 (properties `((upstream . "presto")))
7574 (build-system r-build-system)
7585 (home-page "https://github.com/immunogenomics/presto")
7586 (synopsis "Fast Functions for Differential Expression using Wilcox and AUC")
7587 (description "This package performs a fast Wilcoxon rank sum test and
7589 (license license:gpl3))))
7591 (define-public r-sccustomize
7592 (let ((commit "8414d1f5fb32277855b0619191a568932b7baeb0")
7595 (name "r-sccustomize")
7596 (version (git-version "0.7.0" revision commit))
7600 (url "https://github.com/samuel-marsh/scCustomize")
7602 (file-name (git-file-name name version))
7605 "1wcgfq7lx83a2kf8pjbw524gdvxf351n08cwd5wzmmy57kf4knbj"))))
7606 (properties `((upstream-name . "scCustomize")))
7607 (build-system r-build-system)
7641 (native-inputs (list r-knitr))
7642 (home-page "https://github.com/samuel-marsh/scCustomize")
7643 (synopsis "Custom visualization and analyses of single-cell sequencing")
7645 "This is a collection of functions created and/or curated to aid in the
7646 visualization and analysis of single-cell data using R.")
7647 (license license:gpl3+))))
7649 (define-public r-snapatac
7657 (url "https://github.com/r3fang/SnapATAC")
7658 (commit (string-append "v" version))))
7659 (file-name (git-file-name name version))
7661 (base32 "037jzlbl436fi7lkpq7d83i2vd1crnrik3vac2x6xj75dbikb2av"))))
7662 (properties `((upstream-name . "SnapATAC")))
7663 (build-system r-build-system)
7684 (home-page "https://github.com/r3fang/SnapATAC")
7685 (synopsis "Single nucleus analysis package for ATAC-Seq")
7687 "This package provides a fast and accurate analysis toolkit for single
7688 cell ATAC-seq (Assay for transposase-accessible chromatin using sequencing).
7689 Single cell ATAC-seq can resolve the heterogeneity of a complex tissue and
7690 reveal cell-type specific regulatory landscapes. However, the exceeding data
7691 sparsity has posed unique challenges for the data analysis. This package
7692 @code{r-snapatac} is an end-to-end bioinformatics pipeline for analyzing large-
7693 scale single cell ATAC-seq data which includes quality control, normalization,
7694 clustering analysis, differential analysis, motif inference and exploration of
7695 single cell ATAC-seq sequencing data.")
7696 (license license:gpl3)))
7698 (define-public r-umi4cpackage
7699 (let ((commit "88b07d896a137418ba6c31c2474b9dbe1d86fc20")
7702 (name "r-umi4cpackage")
7703 (version (git-version "0.0.1" revision commit))
7708 (url "https://github.com/tanaylab/umi4cpackage")
7710 (file-name (git-file-name name version))
7712 (base32 "0bjzamdw2lcfhlbzc0vdva87c3wwnij8jsvnrpx4wyyxvpcz13m5"))))
7713 (properties `((upstream-name . "umi4cPackage")))
7714 (build-system r-build-system)
7717 (modify-phases %standard-phases
7718 (add-after 'unpack 'fix-references
7720 (substitute* "inst/conf/paths.conf"
7721 (("TG3C\\.bowtie2_bin=.*")
7722 (string-append "TG3C.bowtie2_bin="
7723 (which "bowtie2") "\n")))
7724 (substitute* "inst/perl/map3c/TG3C/import3C.pl"
7726 (string-append "\"" (which "perl")))))))))
7730 (list r-misha r-zoo))
7731 (native-inputs (list r-knitr))
7732 (home-page "https://github.com/tanaylab/umi4cpackage")
7733 (synopsis "Processing and analysis of UMI-4C contact profiles")
7734 (description "This is a package that lets you process UMI-4C data from
7735 scratch to produce nice plots.")
7736 (license license:expat))))
7738 (define-public r-shinycell
7740 "aecbd56e66802f28e397f5ae1f19403aadd12163")
7743 (name "r-shinycell")
7744 (version (git-version "2.0.0" revision commit))
7749 (url "https://github.com/SGDDNB/ShinyCell")
7751 (file-name (git-file-name name version))
7754 "13jn2ikmvljnzayk485g1mmq5abcp9m1b8n1djdb1agmn83zaki5"))))
7755 (properties `((upstream-name . "ShinyCell")))
7756 (build-system r-build-system)
7768 (home-page "https://github.com/SGDDNB/ShinyCell")
7769 (synopsis "Shiny interactive web apps for single-cell data")
7771 "This package provides Shiny apps for interactive exploration of
7773 (license license:gpl3))))
7775 (define-public r-archr
7776 (let ((commit "92ab814f86be0cea75c661f9827a9549c2cf47f5")
7780 (version (git-version "1.0.1" revision commit))
7785 (url "https://github.com/GreenleafLab/ArchR")
7787 (file-name (git-file-name name version))
7789 (base32 "1m1vp3kkpvd0fcviv5vb3gcbm3w91ih6gm9ivg48swnbqny44kqb"))))
7790 (properties `((upstream-name . "ArchR")))
7791 (build-system r-build-system)
7793 (list r-biocgenerics
7815 r-summarizedexperiment
7817 (home-page "https://github.com/GreenleafLab/ArchR")
7818 (synopsis "Analyze single-cell regulatory chromatin in R")
7820 "This package is designed to streamline scATAC analyses in R.")
7821 (license license:gpl2+))))
7823 (define-public r-icellnet
7824 ;; v1.0 tagged in 2020, last commit contains many fixes.
7825 ;; DESCRIPTION says Version: 0.0.0.9000.
7826 (let ((commit "b9c05488fb8b5ea69bd560018966eaf4e25f82a")
7830 (version (git-version "1.0" revision commit))
7834 (url "https://github.com/soumelis-lab/ICELLNET")
7836 (file-name (git-file-name name version))
7839 "0cld7d6xqnvd0zpcpg3sx73an6vdc9divzywgnn6zxnqcd987cnw"))))
7840 (build-system r-build-system)
7843 (modify-phases %standard-phases
7844 (add-after 'unpack 'enter-dir
7845 (lambda _ (chdir "icellnet"))))))
7847 (list r-annotationdbi
7856 (home-page "https://github.com/soumelis-lab/ICELLNET")
7857 (synopsis "Transcriptomic-based framework to dissect cell communication")
7858 (description "This package provides a a transcriptomic-based framework
7859 to dissect cell communication in a global manner. It integrates an original
7860 expert-curated database of ligand-receptor interactions taking into account
7861 multiple subunits expression. Based on transcriptomic profiles (gene
7862 expression), this package allows to compute communication scores between cells
7863 and provides several visualization modes that can be helpful to dig into
7864 cell-cell interaction mechanism and extend biological knowledge.")
7865 (license license:gpl3))))
7867 (define-public r-scde
7874 (url "https://github.com/hms-dbmi/scde")
7876 (file-name (git-file-name name version))
7879 "10na2gyka24mszdxf92wz9h2c13hdf1ww30c68gfsw53lvvhhhxb"))))
7880 (build-system r-build-system)
7897 (home-page "https://hms-dbmi.github.io/scde/")
7898 (synopsis "R package for analyzing single-cell RNA-seq data")
7899 (description "The SCDE package implements a set of statistical methods for
7900 analyzing single-cell RNA-seq data. SCDE fits individual error models for
7901 single-cell RNA-seq measurements. These models can then be used for
7902 assessment of differential expression between groups of cells, as well as
7903 other types of analysis. The SCDE package also contains the pagoda framework
7904 which applies pathway and gene set overdispersion analysis to identify aspects
7905 of transcriptional heterogeneity among single cells.")
7906 ;; See https://github.com/hms-dbmi/scde/issues/38
7907 (license license:gpl2)))
7909 (define-public r-millefy
7912 (version "0.1.9-beta")
7917 (url "https://github.com/yuifu/millefy")
7918 (commit (string-append "v" version))))
7919 (file-name (git-file-name name version))
7922 "0z2y0x99f761pxvg6n37cmnyrnj699jhjk43pvk05sa86iykgizl"))))
7923 (properties `((upstream-name . "millefy")))
7924 (build-system r-build-system)
7935 (home-page "https://github.com/yuifu/millefy")
7936 (synopsis "Make millefy plot with single-cell RNA-seq data")
7937 (description "@code{Millefy} is a tool for visualizing read coverage of
7938 @dfn{scRNA-seq}(single-cell RNA sequencing) datasets in genomic contexts. By
7939 dynamically and automatically reorder single cells based on locus-specific
7940 pseudo time, @code{Millefy} highlights cell-to-cell heterogeneity in read coverage
7941 of scRNA-seq data.")
7942 (license license:expat)))
7944 (define-public r-misha
7952 (url "https://github.com/tanaylab/misha")
7954 (file-name (git-file-name name version))
7957 "0bgivx3lzjh3173jsfrhb5kvhjsn53br0n4hmyx7i3dwy2cnnp2p"))
7958 ;; Delete bundled executable.
7960 '(delete-file "exec/bigWigToWig"))))
7961 (build-system r-build-system)
7964 (modify-phases %standard-phases
7965 (add-after 'unpack 'do-not-use-bundled-bigWigToWig
7966 (lambda* (#:key inputs #:allow-other-keys)
7967 (substitute* "R/misha.R"
7968 (("get\\(\".GLIBDIR\"\\), \"/exec/bigWigToWig")
7970 (assoc-ref inputs "kentutils")
7971 "/bin/bigWigToWig"))))))))
7974 (home-page "https://github.com/tanaylab/misha")
7975 (synopsis "Toolkit for analysis of genomic data")
7976 (description "This package is intended to help users to efficiently
7977 analyze genomic data resulting from various experiments.")
7978 (license license:gpl2)))
7980 (define-public r-scseqcomm
7981 (let ((commit "01076e703999f1a5aa76419d821b50aebe2b777a")
7984 (name "r-scseqcomm")
7985 (version (git-version "0" revision commit))
7990 (url "https://gitlab.com/sysbiobig/scseqcomm")
7992 (file-name (git-file-name name version))
7995 "1fw5si47d6agnz5fibmp2b1sv08pbpwv1j71w57xbav9044i032q"))
7996 ;; Delete bundled dependency.
7997 (modules '((guix build utils)))
7999 '(delete-file-recursively "other_deps"))))
8000 (build-system r-build-system)
8007 ;;r-grid ;; listed in DESCRIPTION
8014 ;;r-methods ;; listed in DESCRIPTION
8024 (home-page "https://gitlab.com/sysbiobig/scseqcomm")
8025 (synopsis "Inter- and intra- cellular signaling from single cell RNA-seq")
8026 (description "This package is tools for analysing intercellular and
8027 intracellular signaling from single cell RNA-seq (scRNA-seq) data.")
8028 (license license:gpl3))))
8030 (define-public r-shaman
8031 (let ((commit "d6944e8ac7bd1dbd5c6cec646eafc1d19d0ca96f")
8036 (version (git-version release revision commit))
8040 (url "https://github.com/tanaylab/shaman")
8042 (file-name (git-file-name name version))
8045 "03sx138dzpfiq23j49z0m0s4j79855mrg64hpj9c83408wzphxi6"))
8047 ;; This file will be generated.
8048 '(delete-file "inst/doc/shaman-package.R"))))
8049 (build-system r-build-system)
8063 (home-page "https://github.com/tanaylab/shaman")
8064 (synopsis "Sampling HiC contact matrices for a-parametric normalization")
8065 (description "The Shaman package implements functions for
8066 resampling Hi-C matrices in order to generate expected contact
8067 distributions given constraints on marginal coverage and
8068 contact-distance probability distributions. The package also provides
8069 support for visualizing normalized matrices and statistical analysis
8070 of contact distributions around selected landmarks.")
8071 ;; Any version of the GPL
8072 (license license:gpl3+))))
8074 (define-public r-centipede
8076 (name "r-centipede")
8080 (uri (string-append "http://download.r-forge.r-project.org/"
8081 "src/contrib/CENTIPEDE_" version ".tar.gz"))
8084 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
8085 (build-system r-build-system)
8086 (home-page "http://centipede.uchicago.edu/")
8087 (synopsis "Predict transcription factor binding sites")
8089 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
8090 of the genome that are bound by particular transcription factors. It starts
8091 by identifying a set of candidate binding sites, and then aims to classify the
8092 sites according to whether each site is bound or not bound by a transcription
8093 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
8094 between two different types of motif instances using as much relevant
8095 information as possible.")
8096 (license (list license:gpl2+ license:gpl3+))))
8098 (define-public r-demultiplex
8099 (let ((commit "6e2a1422c8e6f418cfb271997eebc91f9195f299")
8102 (name "r-demultiplex")
8103 (version (git-version "1.0.2" revision commit))
8108 (url "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
8110 (file-name (git-file-name name version))
8113 "01kv88wp8vdaq07sjk0d3d1cb553mq1xqg0war81pgmg63bgi38w"))))
8114 (properties `((upstream-name . "deMULTIplex")))
8115 (build-system r-build-system)
8117 (list r-kernsmooth r-reshape2 r-rtsne r-shortread r-stringdist))
8118 (home-page "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
8119 (synopsis "MULTI-seq pre-processing and classification tools")
8121 "deMULTIplex is an R package for analyzing single-cell RNA sequencing
8122 data generated with the MULTI-seq sample multiplexing method. The package
8123 includes software to
8126 @item Convert raw MULTI-seq sample barcode library FASTQs into a sample
8127 barcode UMI count matrix, and
8128 @item Classify cell barcodes into sample barcode groups.
8131 (license license:cc0))))
8133 (define-public gdc-client
8141 (url "https://github.com/NCI-GDC/gdc-client.git")
8143 (file-name (git-file-name name version))
8146 "0cagawlzjwj3wam10lv64xgbfx4zcnzxi5sjpsdhq7rn4z24mzc2"))))
8147 (build-system python-build-system)
8150 (modify-phases %standard-phases
8151 (add-after 'unpack 'relax-requirements
8153 (substitute* "requirements.txt"
8156 (list python-cryptography
8160 python-ndg-httpsclient
8167 (home-page "https://gdc.nci.nih.gov/access-data/gdc-data-transfer-tool")
8168 (synopsis "GDC data transfer tool")
8169 (description "The gdc-client provides several convenience functions over
8170 the GDC API which provides general download/upload via HTTPS.")
8171 (license license:asl2.0)))
8173 (define-public vsearch
8181 (url "https://github.com/torognes/vsearch")
8182 (commit (string-append "v" version))))
8183 (file-name (git-file-name name version))
8186 "0vhrpjfdf75ba04b24xknp41790cvcgwl0vgpy7qbzj5xh2521ss"))
8187 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
8190 ;; Remove bundled cityhash sources. The vsearch source is adjusted
8191 ;; for this in the patch.
8192 (delete-file "src/city.h")
8193 (delete-file "src/citycrc.h")
8194 (delete-file "src/city.cc")
8196 (build-system gnu-build-system)
8198 (list zlib bzip2 cityhash))
8200 (list autoconf automake))
8201 (synopsis "Sequence search tools for metagenomics")
8203 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
8204 dereplication, pairwise alignment, shuffling, subsampling, sorting and
8205 masking. The tool takes advantage of parallelism in the form of SIMD
8206 vectorization as well as multiple threads to perform accurate alignments at
8207 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
8208 Needleman-Wunsch).")
8209 (home-page "https://github.com/torognes/vsearch")
8210 ;; vsearch uses non-portable SSE intrinsics so building fails on other
8212 (supported-systems '("x86_64-linux"))
8213 ;; Dual licensed; also includes public domain source.
8214 (license (list license:gpl3 license:bsd-2))))
8216 (define-public pardre
8223 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
8227 "105s4f8zs8hh0sc32r9p725n7idza9cj5jvp5z1m5pljjhgk3if5"))))
8228 (build-system gnu-build-system)
8230 `(#:tests? #f ; tests require "prove"
8232 (modify-phases %standard-phases
8235 (lambda* (#:key outputs #:allow-other-keys)
8236 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
8237 (install-file "ParDRe" bin)))))))
8239 (list openmpi-c++ zlib))
8240 (synopsis "Parallel tool to remove duplicate DNA reads")
8242 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
8243 Duplicate reads can be seen as identical or nearly identical sequences with
8244 some mismatches. This tool lets users avoid the analysis of unnecessary
8245 reads, reducing the time of subsequent procedures with the
8246 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
8247 in order to exploit the parallel capabilities of multicore clusters. It is
8248 faster than multithreaded counterparts (end of 2015) for the same number of
8249 cores and, thanks to the message-passing technology, it can be executed on
8251 (home-page "https://sourceforge.net/projects/pardre/")
8252 (license license:gpl3+)))
8254 (define-public ngshmmalign
8256 (name "ngshmmalign")
8261 (uri (string-append "https://github.com/cbg-ethz/ngshmmalign/"
8262 "releases/download/" version
8263 "/ngshmmalign-" version ".tar.bz2"))
8266 "0jryvlssi2r2ii1dxnx39yk6bh4yqgq010fnxrgfgbaj3ykijlzv"))))
8267 (build-system cmake-build-system)
8268 (arguments '(#:tests? #false)) ; there are none
8271 (home-page "https://github.com/cbg-ethz/ngshmmalign/")
8272 (synopsis "Profile HMM aligner for NGS reads")
8274 "ngshmmalign is a profile HMM aligner for NGS reads designed particularly
8275 for small genomes (such as those of RNA viruses like HIV-1 and HCV) that
8276 experience substantial biological insertions and deletions.")
8277 (license license:gpl2+)))
8279 (define-public prinseq
8286 (uri (string-append "mirror://sourceforge/prinseq/standalone/"
8287 "prinseq-lite-" version ".tar.gz"))
8290 "0vxmzvmm67whxrqdaaamwgjk7cf0fzfs5s673jgg00kz7g70splv"))))
8291 (build-system gnu-build-system)
8293 `(#:tests? #false ; no check target
8295 (modify-phases %standard-phases
8299 (lambda* (#:key inputs outputs #:allow-other-keys)
8300 (let* ((out (assoc-ref outputs "out"))
8301 (bin (string-append out "/bin"))
8302 (scripts (find-files "." "prinseq.*.pl"))
8303 (guile (search-input-file inputs "bin/guile")))
8304 (substitute* scripts
8306 (string-append "\"" (which "perl") " -pe")))
8307 (for-each (lambda (file)
8309 (install-file file bin)
8310 (wrap-script (string-append bin "/" (basename file))
8312 `("PERL5LIB" ":" prefix
8313 (,(getenv "PERL5LIB")))))
8316 (list guile-3.0 ; for wrapper scripts
8323 perl-statistics-pca))
8324 (home-page "http://prinseq.sourceforge.net/")
8325 (synopsis "Preprocess sequence data in FASTA or FASTQ formats")
8327 "PRINSEQ is a bioinformatics tool to help you preprocess your genomic or
8328 metagenomic sequence data in FASTA or FASTQ formats. The tool is written in
8329 Perl and can be helpful if you want to filter, reformat, or trim your sequence
8330 data. It also generates basic statistics for your sequences.")
8331 (license license:gpl3+)))
8333 (define-public shorah
8340 (uri (string-append "https://github.com/cbg-ethz/shorah"
8341 "/releases/download/v" version
8342 "/shorah-" version ".tar.xz"))
8345 "158dir9qcqspknlnyfr9zwk41x48nrh5wcg10k2grh9cidp9daiq"))))
8346 (build-system gnu-build-system)
8349 (modify-phases %standard-phases
8350 (add-after 'unpack 'fix-test-wrapper
8351 (lambda* (#:key outputs #:allow-other-keys)
8352 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
8353 (substitute* "examples/run_end2end_test"
8354 (("\\$\\{interpreter\\} ../\\$\\{testscript\\}")
8355 (string-append bin "/${testscript}"))))))
8357 (add-after 'install 'wrap-programs
8358 (lambda* (#:key outputs #:allow-other-keys)
8359 (let* ((out (assoc-ref outputs "out"))
8360 (site (string-append
8362 ,(version-major+minor
8363 (package-version python))
8365 (pythonpath (getenv "GUIX_PYTHONPATH"))
8366 (script (string-append out "/bin/shorah")))
8367 (chmod script #o555)
8368 (wrap-program script `("GUIX_PYTHONPATH" ":" prefix (,site ,pythonpath))))))
8369 (add-after 'wrap-programs 'check
8370 (lambda* (#:key tests? #:allow-other-keys)
8372 (invoke "make" "check")))))))
8383 (synopsis "Short reads assembly into haplotypes")
8385 "ShoRAH is a project for the analysis of next generation sequencing data.
8386 It is designed to analyse genetically heterogeneous samples. Its tools
8387 provide error correction, haplotype reconstruction and estimation of the
8388 frequency of the different genetic variants present in a mixed sample.")
8389 (license license:gpl3+)))
8391 (define-public ruby-bio-kseq
8393 (name "ruby-bio-kseq")
8398 (uri (rubygems-uri "bio-kseq" version))
8401 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
8402 (build-system ruby-build-system)
8404 `(#:test-target "spec"))
8406 (list bundler ruby-rspec ruby-rake-compiler))
8409 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
8411 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
8412 FASTQ parsing code. It provides a fast iterator over sequences and their
8414 (home-page "https://github.com/gusevfe/bio-kseq")
8415 (license license:expat)))
8417 (define-public bio-locus
8424 (uri (rubygems-uri "bio-locus" version))
8427 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
8428 (build-system ruby-build-system)
8431 (synopsis "Tool for fast querying of genome locations")
8433 "Bio-locus is a tabix-like tool for fast querying of genome
8434 locations. Many file formats in bioinformatics contain records that
8435 start with a chromosome name and a position for a SNP, or a start-end
8436 position for indels. Bio-locus allows users to store this chr+pos or
8437 chr+pos+alt information in a database.")
8438 (home-page "https://github.com/pjotrp/bio-locus")
8439 (license license:expat)))
8441 (define-public bio-blastxmlparser
8443 (name "bio-blastxmlparser")
8447 (uri (rubygems-uri "bio-blastxmlparser" version))
8450 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
8451 (build-system ruby-build-system)
8453 (list ruby-bio-logger ruby-nokogiri))
8456 (synopsis "Fast big data BLAST XML parser and library")
8458 "Very fast parallel big-data BLAST XML file parser which can be used as
8459 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
8460 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
8461 (home-page "https://github.com/pjotrp/blastxmlparser")
8462 (license license:expat)))
8464 (define-public bioruby
8471 (uri (rubygems-uri "bio" version))
8474 "1d56amdsjv1mag7m6gv2w0xij8hqx1v5xbdjsix8sp3yp36m7938"))))
8475 (build-system ruby-build-system)
8479 (list which)) ; required for test phase
8482 (modify-phases %standard-phases
8483 (add-before 'build 'patch-test-command
8485 (substitute* '("test/functional/bio/test_command.rb")
8486 (("/bin/sh") (which "sh")))
8487 (substitute* '("test/functional/bio/test_command.rb")
8488 (("/bin/ls") (which "ls")))
8489 (substitute* '("test/functional/bio/test_command.rb")
8490 (("which") (which "which")))
8491 (substitute* '("test/functional/bio/test_command.rb",
8492 "test/data/command/echoarg2.sh")
8493 (("/bin/echo") (which "echo")))
8495 (synopsis "Ruby library, shell and utilities for bioinformatics")
8496 (description "BioRuby comes with a comprehensive set of Ruby development
8497 tools and libraries for bioinformatics and molecular biology. BioRuby has
8498 components for sequence analysis, pathway analysis, protein modelling and
8499 phylogenetic analysis; it supports many widely used data formats and provides
8500 easy access to databases, external programs and public web services, including
8501 BLAST, KEGG, GenBank, MEDLINE and GO.")
8502 (home-page "http://bioruby.org/")
8503 ;; Code is released under Ruby license, except for setup
8504 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
8505 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
8507 (define-public centrifuge
8515 (url "https://github.com/DaehwanKimLab/centrifuge.git")
8516 (commit (string-append "v" version))))
8517 (file-name (git-file-name name version))
8520 "167610gbz1rrh6ir3j7jcmhzg3x5msn7x7a3dpv7wmwdndnnqvg0"))))
8521 (build-system gnu-build-system)
8524 #:tests? #false ; no check target
8526 #~(list (string-append "prefix=" #$output))
8528 '(modify-phases %standard-phases
8529 (delete 'configure))))
8530 (inputs (list python-wrapper))
8532 (list pandoc perl ;for documentation
8534 (home-page "https://github.com/DaehwanKimLab/centrifuge/")
8535 (synopsis "Classifier for metagenomic sequences")
8536 (description "Centrifuge is a microbial classification engine that enables
8537 rapid, accurate and sensitive labeling of reads and quantification of species
8538 on desktop computers. The system uses an indexing scheme based on the
8539 @dfn{Burrows-Wheeler transform} (BWT) and the @dfn{Ferragina-Manzini} (FM)
8540 index, optimized specifically for the metagenomic classification problem.
8541 Centrifuge requires a relatively small index (4.7 GB for all complete
8542 bacterial and viral genomes plus the human genome) and classifies sequences at
8543 very high speed, allowing it to process the millions of reads from a typical
8544 high-throughput DNA sequencing run within a few minutes.")
8545 (license license:gpl3+)))
8547 (define-public bio-vcf
8554 (uri (rubygems-uri "bio-vcf" version))
8557 "1glw5pn9s8z13spxk6yyfqaz80n9lga67f33w35nkpq9dwi2vg6g"))))
8558 (build-system ruby-build-system)
8560 (list ruby-cucumber))
8561 (synopsis "Smart VCF parser DSL")
8563 "Bio-vcf provides a @acronym{DSL, domain specific language} for processing
8564 the VCF format. Record named fields can be queried with regular expressions.
8565 Bio-vcf is a new generation VCF parser, filter and converter. Bio-vcf is not
8566 only very fast for genome-wide (WGS) data, it also comes with a filtering,
8567 evaluation and rewrite language and can output any type of textual data,
8568 including VCF header and contents in RDF and JSON.")
8569 (home-page "https://github.com/vcflib/bio-vcf")
8570 (license license:expat)))
8572 (define-public r-phantompeakqualtools
8573 (let ((commit "8d2b2d18c686d894ef5908b37da7adf72a07ef42")
8576 (name "r-phantompeakqualtools")
8577 (version (git-version "1.2.2" revision commit))
8582 (url "https://github.com/kundajelab/phantompeakqualtools")
8584 (file-name (git-file-name name version))
8587 "00anrvcwsp02d98qhj1xpj85644h2pp4kfzq6dgbmwmdr6jvy7p4"))))
8588 (build-system gnu-build-system)
8590 `(#:tests? #f ; There are no tests.
8592 (modify-phases %standard-phases
8596 (lambda* (#:key inputs outputs #:allow-other-keys)
8597 (let ((script (string-append (assoc-ref outputs "out")
8599 (install-file "run_spp.R" script)))))))
8601 `(("r" ,r-minimal)))
8613 (home-page "https://github.com/kundajelab/phantompeakqualtools")
8614 (synopsis "Informative enrichment for ChIP-seq data")
8615 (description "This package computes informative enrichment and quality
8616 measures for ChIP-seq/DNase-seq/FAIRE-seq/MNase-seq data. It can also be
8617 used to obtain robust estimates of the predominant fragment length or
8618 characteristic tag shift values in these assays.")
8619 (license license:bsd-3))))
8621 (define-public r-r4rna
8628 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8632 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8633 (build-system r-build-system)
8635 (list r-optparse r-rcolorbrewer))
8636 (home-page "https://www.e-rna.org/r-chie/index.cgi")
8637 (synopsis "Analysis framework for RNA secondary structure")
8639 "The R4RNA package aims to be a general framework for the analysis of RNA
8640 secondary structure and comparative analysis in R.")
8641 (license license:gpl3+)))
8643 (define-public rcas-web
8650 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8651 "releases/download/v" version
8652 "/rcas-web-" version ".tar.gz"))
8655 "0wq951aj45gqki1bickg876i993lmawkp8x24agg264br5x716db"))))
8656 (build-system gnu-build-system)
8659 (modify-phases %standard-phases
8660 (add-before 'configure 'find-RCAS
8661 ;; The configure script can't find non-1.3.x versions of RCAS because
8662 ;; its R expression ‘1.10.1 >= 1.3.4’ evaluates to false.
8664 (substitute* "configure"
8665 (("1\\.3\\.4") "0.0.0"))
8667 (add-after 'install 'wrap-executable
8668 (lambda* (#:key inputs outputs #:allow-other-keys)
8669 (let* ((out (assoc-ref outputs "out"))
8670 (json (assoc-ref inputs "guile-json"))
8671 (redis (assoc-ref inputs "guile-redis"))
8672 (path (string-append
8673 json "/share/guile/site/2.2:"
8674 redis "/share/guile/site/2.2")))
8675 (wrap-program (string-append out "/bin/rcas-web")
8676 `("GUILE_LOAD_PATH" ":" = (,path))
8677 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8678 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8681 `(("r-minimal" ,r-minimal)
8683 ("guile" ,guile-2.2)
8684 ("guile-json" ,guile-json-1)
8685 ("guile-redis" ,guile2.2-redis)))
8688 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8689 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8690 (description "This package provides a simple web interface for the
8691 @dfn{RNA-centric annotation system} (RCAS).")
8692 (license license:agpl3+)))
8694 (define-public r-chipkernels
8695 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8698 (name "r-chipkernels")
8699 (version (git-version "1.1" revision commit))
8704 (url "https://github.com/ManuSetty/ChIPKernels")
8706 (file-name (git-file-name name version))
8709 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8710 (build-system r-build-system)
8722 (home-page "https://github.com/ManuSetty/ChIPKernels")
8723 (synopsis "Build string kernels for DNA Sequence analysis")
8724 (description "ChIPKernels is an R package for building different string
8725 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8726 must be built and this dictionary can be used for determining kernels for DNA
8728 (license license:gpl2+))))
8730 (define-public r-seqgl
8738 (url "https://github.com/ManuSetty/SeqGL")
8740 (file-name (git-file-name name version))
8743 "1r6ywvhxl3ffv48lgj7sbd582mcc6dha3ksgc2qjlvjrnkbj3799"))))
8744 (build-system r-build-system)
8752 (home-page "https://github.com/ManuSetty/SeqGL")
8753 (synopsis "Group lasso for Dnase/ChIP-seq data")
8754 (description "SeqGL is a group lasso based algorithm to extract
8755 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8756 This package presents a method which uses group lasso to discriminate between
8757 bound and non bound genomic regions to accurately identify transcription
8758 factors bound at the specific regions.")
8759 (license license:gpl2+)))
8761 (define-public emboss
8767 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8768 (version-major+minor version) ".0/"
8769 "EMBOSS-" version ".tar.gz"))
8772 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8773 (build-system gnu-build-system)
8776 ,#~(list (string-append "--with-hpdf="
8777 #$(this-package-input "libharu")))
8779 (modify-phases %standard-phases
8780 (add-after 'unpack 'fix-checks
8782 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8783 ;; and zlib, but assume that they are all found at the same
8785 (substitute* "configure.in"
8786 (("CHECK_PNGDRIVER")
8787 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8788 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8789 AM_CONDITIONAL(AMPNG, true)"))
8791 (add-after 'fix-checks 'disable-update-check
8793 ;; At build time there is no connection to the Internet, so
8794 ;; looking for updates will not work.
8795 (substitute* "Makefile.am"
8796 (("\\$\\(bindir\\)/embossupdate") ""))
8798 (add-after 'disable-update-check 'autogen
8799 (lambda _ (invoke "autoreconf" "-vif") #t)))))
8808 (list autoconf automake libtool pkg-config))
8809 (home-page "http://emboss.sourceforge.net")
8810 (synopsis "Molecular biology analysis suite")
8811 (description "EMBOSS is the \"European Molecular Biology Open Software
8812 Suite\". EMBOSS is an analysis package specially developed for the needs of
8813 the molecular biology (e.g. EMBnet) user community. The software
8814 automatically copes with data in a variety of formats and even allows
8815 transparent retrieval of sequence data from the web. It also provides a
8816 number of libraries for the development of software in the field of molecular
8817 biology. EMBOSS also integrates a range of currently available packages and
8818 tools for sequence analysis into a seamless whole.")
8819 (license license:gpl2+)))
8822 (let ((revision "1")
8823 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
8826 ;; The version is 2.13.0 even though no release archives have been
8827 ;; published as yet.
8828 (version (git-version "2.13.0" revision commit))
8832 (url "https://github.com/arq5x/bits")
8834 (file-name (git-file-name name version))
8837 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
8838 (build-system gnu-build-system)
8840 `(#:tests? #f ;no tests included
8842 (modify-phases %standard-phases
8844 (add-after 'unpack 'remove-cuda
8846 (substitute* "Makefile"
8848 (("(bits_test_intersections) \\\\" _ match) match))
8851 (lambda* (#:key outputs #:allow-other-keys)
8853 "bin" (string-append (assoc-ref outputs "out") "/bin"))
8857 (home-page "https://github.com/arq5x/bits")
8858 (synopsis "Implementation of binary interval search algorithm")
8859 (description "This package provides an implementation of the
8860 BITS (Binary Interval Search) algorithm, an approach to interval set
8861 intersection. It is especially suited for the comparison of diverse genomic
8862 datasets and the exploration of large datasets of genome
8863 intervals (e.g. genes, sequence alignments).")
8864 (license license:gpl2))))
8866 (define-public piranha
8867 ;; There is no release tarball for the latest version. The latest commit is
8868 ;; older than one year at the time of this writing.
8869 (let ((revision "1")
8870 (commit "0466d364b71117d01e4471b74c514436cc281233"))
8873 (version (git-version "1.2.1" revision commit))
8877 (url "https://github.com/smithlabcode/piranha")
8879 (file-name (git-file-name name version))
8882 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
8883 (build-system gnu-build-system)
8885 `(#:test-target "test"
8887 (modify-phases %standard-phases
8888 (add-after 'unpack 'copy-smithlab-cpp
8889 (lambda* (#:key inputs #:allow-other-keys)
8890 (for-each (lambda (file)
8891 (install-file file "./src/smithlab_cpp/"))
8892 (find-files (assoc-ref inputs "smithlab-cpp")))))
8893 (add-after 'install 'install-to-store
8894 (lambda* (#:key outputs #:allow-other-keys)
8895 (let* ((out (assoc-ref outputs "out"))
8896 (bin (string-append out "/bin")))
8897 (for-each (lambda (file)
8898 (install-file file bin))
8899 (find-files "bin" ".*"))))))
8901 ,#~(list (string-append "--with-bam_tools_headers="
8902 #$(this-package-input "bamtools") "/include/bamtools")
8903 (string-append "--with-bam_tools_library="
8904 #$(this-package-input "bamtools") "/lib/bamtools"))))
8906 `(("bamtools" ,bamtools)
8907 ("samtools" ,samtools-0.1)
8910 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
8914 (url "https://github.com/smithlabcode/smithlab_cpp")
8916 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
8919 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
8921 `(("python" ,python-2)))
8922 (home-page "https://github.com/smithlabcode/piranha")
8923 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
8925 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
8926 RIP-seq experiments. It takes input in BED or BAM format and identifies
8927 regions of statistically significant read enrichment. Additional covariates
8928 may optionally be provided to further inform the peak-calling process.")
8929 (license license:gpl3+))))
8931 (define-public filevercmp
8932 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7")
8936 (version (git-version "0" revision commit))
8940 (url "https://github.com/ekg/filevercmp")
8942 (file-name (git-file-name name commit))
8945 "1j9vxsy0y050v59h0q1d6501fcw1kjvj0d18l1xk2zyg0jzj247c"))))
8946 (build-system gnu-build-system)
8948 `(#:tests? #f ; There are no tests to run.
8950 (modify-phases %standard-phases
8951 (delete 'configure) ; There is no configure phase.
8953 (lambda* (#:key outputs #:allow-other-keys)
8954 (let ((out (assoc-ref outputs "out")))
8955 (install-file "filevercmp" (string-append out "/bin"))
8956 (install-file "filevercmp.h" (string-append out "/include"))
8958 (home-page "https://github.com/ekg/filevercmp")
8959 (synopsis "This program compares version strings")
8960 (description "This program compares version strings. It intends to be a
8961 replacement for strverscmp.")
8962 (license license:gpl3+))))
8964 (define-public multiqc
8971 (uri (pypi-uri "multiqc" version))
8974 "0y9sgjca3bp0kk3ngry4zf4q2diyzp5bvzsx5l23nsysfbfkigm4"))))
8975 (build-system python-build-system)
8978 (modify-phases %standard-phases
8980 (lambda* (#:key inputs outputs tests? #:allow-other-keys)
8982 (setenv "HOME" "/tmp")
8983 (let ((here (getcwd)))
8984 (copy-recursively (assoc-ref inputs "tests") "/tmp/tests")
8985 ;; ModuleNotFoundError: No module named 'multiqc.modules.ccs'
8986 (delete-file "/tmp/tests/unit_tests/test_ccs.py")
8987 (with-directory-excursion "/tmp/tests"
8988 (setenv "GUIX_PYTHONPATH"
8989 (string-append here ":" (getenv "GUIX_PYTHONPATH")))
8990 (invoke "python" "-munittest" "discover"))))))
8991 ;; TODO: importing the picard and gatk modules fails for unknown
8993 (delete 'sanity-check))))
9010 `(("python-pytest" ,python-pytest)
9012 ,(let ((commit "02272d48a382beb27489fcf9e6308a0407dc3c2e"))
9016 (url "https://github.com/ewels/MultiQC_TestData")
9018 (file-name (git-file-name "multiqc-test-data" commit))
9021 "1bha64wanrigczw4yn81din56396n61j5gqdrkslhslmskcafi91")))))))
9022 (home-page "https://multiqc.info")
9023 (synopsis "Aggregate bioinformatics analysis reports")
9025 "MultiQC is a tool to aggregate bioinformatics results across many
9026 samples into a single report. It contains modules for a large number of
9027 common bioinformatics tools.")
9028 (license license:gpl3+)))
9030 (define-public variant-tools
9032 (name "variant-tools")
9038 (url "https://github.com/vatlab/varianttools")
9039 ;; There is no tag corresponding to version 3.1.2
9040 (commit "813ae4a90d25b69abc8a40f4f70441fe09015249")))
9041 (file-name (git-file-name name version))
9044 "12ibdmksj7icyqhks4xyvd61bygk4pjmxn618kp6vgk1af01y34g"))))
9045 (build-system python-build-system)
9055 (list python-numpy python-pycurl python-pyzmq python-scipy
9057 (home-page "https://vatlab.github.io/vat-docs/")
9058 (synopsis "Analyze genetic variants from Next-Gen sequencing studies")
9060 "Variant tools is a tool for the manipulation, annotation,
9061 selection, simulation, and analysis of variants in the context of next-gen
9062 sequencing analysis. Unlike some other tools used for next-gen sequencing
9063 analysis, variant tools is project based and provides a whole set of tools to
9064 manipulate and analyze genetic variants.")
9065 (license license:gpl3+)))
9067 (define-public r-chromvarmotifs
9068 (let ((commit "38bed559c1f4770b6c91c80bf3f8ea965da26076")
9071 (name "r-chromvarmotifs")
9072 (version (git-version "0.2.0" revision commit))
9077 (url "https://github.com/GreenleafLab/chromVARmotifs")
9079 (file-name (git-file-name name version))
9081 (base32 "0i9v1m1hrg1lkd2pnkj5nnrpks6vhhhpbdhsfl2lmjak4npxxr5q"))))
9082 (properties `((upstream-name . "chromVARmotifs")))
9083 (build-system r-build-system)
9085 `(("r-tfbstools" ,r-tfbstools)))
9086 (home-page "https://github.com/GreenleafLab/chromVARmotifs")
9087 (synopsis "Stores motif collections for use with motifmatchr or chromVAR")
9089 "This package stores motif collections as lists of @dfn{position
9090 frequency matrix} (PWMatrixList) objects provided by the @code{TFBSTools}
9091 package for use in R with packages like @code{motifmatchr} or
9093 (license license:expat))))
9095 (define-public r-raremetals2
9097 (name "r-raremetals2")
9102 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9103 "b/b7/RareMETALS2_" version ".tar.gz"))
9106 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9107 (properties `((upstream-name . "RareMETALS2")))
9108 (build-system r-build-system)
9110 (list r-seqminer r-mvtnorm r-mass r-compquadform r-getopt))
9111 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9112 (synopsis "Analyze gene-level association tests for binary trait")
9114 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9115 It was designed to meta-analyze gene-level association tests for binary trait.
9116 While rareMETALS offers a near-complete solution for meta-analysis of
9117 gene-level tests for quantitative trait, it does not offer the optimal
9118 solution for binary trait. The package rareMETALS2 offers improved features
9119 for analyzing gene-level association tests in meta-analyses for binary
9121 (license license:gpl3)))
9123 (define-public r-rnaseqdtu
9124 (let ((commit "5bee1e769d2e1dc6a3f1cecb78078050eeb5b9ac")
9127 (name "r-rnaseqdtu")
9128 (version (git-version "2.0" revision commit))
9133 (url "https://github.com/mikelove/rnaseqDTU/")
9135 (file-name (git-file-name name version))
9137 (base32 "0jfi1ydsk8m5nadwnih48v87nnxdc7s3f0pny4axmnj40dd42as0"))))
9138 (properties `((upstream-name . "rnaseqDTU")))
9139 (build-system r-build-system)
9148 (native-inputs (list r-knitr))
9149 (home-page "https://github.com/mikelove/rnaseqDTU/")
9150 (synopsis "RNA-seq workflow for differential transcript usage")
9152 "This package provides an RNA-seq workflow for differential transcript
9153 usage (DTU) following Salmon quantification. This workflow performs a DTU
9154 analysis on simulated data. It also shows how to use stageR to perform
9155 two-stage testing of DTU, a statistical framework to screen at the gene level
9156 and then confirm which transcripts within the significant genes show evidence
9158 (license license:artistic2.0))))
9160 (define-public r-dropbead
9161 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
9165 (version (string-append "0-" revision "." (string-take commit 7)))
9170 (url "https://github.com/rajewsky-lab/dropbead")
9172 (file-name (git-file-name name version))
9175 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
9176 (build-system r-build-system)
9178 (list r-ggplot2 r-rcolorbrewer r-gridextra r-gplots r-plyr))
9179 (home-page "https://github.com/rajewsky-lab/dropbead")
9180 (synopsis "Basic exploration and analysis of Drop-seq data")
9181 (description "This package offers a quick and straight-forward way to
9182 explore and perform basic analysis of single cell sequencing data coming from
9183 droplet sequencing. It has been particularly tailored for Drop-seq.")
9184 (license license:gpl3))))
9186 (define-public r-cellchat
9188 "21edd226ca408e4c413408f98562d71ee0b54e5d")
9192 (version (git-version "1.0.0" revision commit))
9197 (url "https://github.com/sqjin/CellChat")
9199 (file-name (git-file-name name version))
9202 "0cvzl9mi8jjznpql2gv67swnk1dndn3a2h22z5l84h7lwpwjmh53"))
9204 '(for-each delete-file '("src/CellChat.so"
9205 "src/CellChat_Rcpp.o"
9206 "src/RcppExports.o")))))
9207 (properties `((upstream-name . "CellChat")))
9208 (build-system r-build-system)
9210 (list r-biocgenerics
9244 (native-inputs (list r-knitr))
9245 (home-page "https://github.com/sqjin/CellChat")
9246 (synopsis "Analysis of cell-cell communication from single-cell transcriptomics data")
9248 "This package infers, visualizes and analyzes the cell-cell
9249 communication networks from scRNA-seq data.")
9250 (license license:gpl3))))
9252 (define-public r-copykat
9253 (let ((commit ;no tag
9254 "256de33dfc1b80a1a0ac9e098c5557f95a4e0d53")
9258 (version (git-version "1.0.8" revision commit))
9263 (url "https://github.com/navinlabcode/copykat")
9265 (file-name (git-file-name name version))
9268 "0ckyqnial3imcqlgd6xfgwk5w977l1i87sx4kdbwdvg40w0vh1j8"))))
9269 (properties `((upstream-name . "copykat")))
9270 (build-system r-build-system)
9279 (native-inputs (list r-knitr))
9280 (home-page "https://github.com/navinlabcode/copykat")
9281 (synopsis "Inference of genomic copy number from single cell RNAseq data")
9283 "This package Copynumber KAryotyping of Tumors infers genomic copy
9284 number and subclonal structure of human tumors using integrative Bayesian
9285 approaches to identify genome-wide aneuploidy at 5MB resolution in single
9286 cells data. It separates tumor cells and tumor subclones from normal cells
9287 using high-throughput sc-RNAseq data.")
9288 (license license:gpl2))))
9290 (define-public sambamba
9298 (url "https://github.com/biod/sambamba")
9299 (commit (string-append "v" version))))
9300 (file-name (git-file-name name version))
9303 "1zdkd1md5wk4la71p82pbclqqcm55abk23fk087da6186i1bsihl"))))
9304 (build-system gnu-build-system)
9306 `(#:tests? #f ; there is no test target
9307 #:parallel-build? #f ; not supported
9309 (modify-phases %standard-phases
9311 (add-after 'unpack 'prepare-build-tools
9312 (lambda* (#:key inputs #:allow-other-keys)
9313 (substitute* "Makefile"
9314 (("\\$\\(shell which ldmd2\\)") (which "ldmd2")))
9316 (setenv "D_LD" (which "ld.gold"))))
9317 (add-after 'unpack 'unbundle-prerequisites
9319 (substitute* "Makefile"
9320 (("= lz4/lib/liblz4.a") "= -L-llz4")
9321 (("ldc_version_info lz4-static") "ldc_version_info"))))
9323 (lambda* (#:key outputs #:allow-other-keys)
9324 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
9326 (copy-file (string-append "bin/sambamba-" ,version)
9327 (string-append bin "/sambamba"))))))))
9329 `(("ld-gold-wrapper"
9330 ;; Importing (gnu packages commencement) would introduce a cycle.
9331 ,(module-ref (resolve-interface
9332 '(gnu packages commencement))
9334 ("binutils-gold" ,binutils-gold)
9335 ("python" ,python)))
9337 (list ldc lz4 zlib))
9338 (home-page "https://github.com/biod/sambamba")
9339 (synopsis "Tools for working with SAM/BAM data")
9340 (description "Sambamba is a high performance modern robust and
9341 fast tool (and library), written in the D programming language, for
9342 working with SAM and BAM files. Current parallelised functionality is
9343 an important subset of samtools functionality, including view, index,
9344 sort, markdup, and depth.")
9345 (license license:gpl2+)))
9347 (define-public ritornello
9354 (url "https://github.com/KlugerLab/Ritornello")
9355 (commit (string-append "v" version))))
9356 (file-name (git-file-name name version))
9359 "1xahvq215qld7x1w8vpa5zbrsj6p9crb9shqa2x89sb0aaxa02jk"))))
9360 (build-system gnu-build-system)
9362 `(#:tests? #f ; there are no tests
9364 (modify-phases %standard-phases
9365 (add-after 'unpack 'patch-samtools-references
9366 (lambda* (#:key inputs #:allow-other-keys)
9367 (substitute* '("src/SamStream.h"
9369 (("<sam.h>") "<samtools/sam.h>"))
9373 (lambda* (#:key inputs outputs #:allow-other-keys)
9374 (let* ((out (assoc-ref outputs "out"))
9375 (bin (string-append out "/bin/")))
9377 (install-file "bin/Ritornello" bin)
9380 (list samtools-0.1 fftw boost zlib))
9381 (home-page "https://github.com/KlugerLab/Ritornello")
9382 (synopsis "Control-free peak caller for ChIP-seq data")
9383 (description "Ritornello is a ChIP-seq peak calling algorithm based on
9384 signal processing that can accurately call binding events without the need to
9385 do a pair total DNA input or IgG control sample. It has been tested for use
9386 with narrow binding events such as transcription factor ChIP-seq.")
9387 (license license:gpl3+)))
9389 (define-public trim-galore
9391 (name "trim-galore")
9397 (url "https://github.com/FelixKrueger/TrimGalore")
9399 (file-name (git-file-name name version))
9402 "0yrwg6325j4sb9vnplvl3jplzab0qdhp92wl480qjinpfq88j4rs"))))
9403 (build-system gnu-build-system)
9405 `(#:tests? #f ; no tests
9407 (modify-phases %standard-phases
9410 ;; Trim Galore tries to figure out what version of Python
9411 ;; cutadapt is using by looking at the shebang. Of course that
9412 ;; doesn't work, because cutadapt is wrapped in a shell script.
9413 (substitute* "trim_galore"
9414 (("my \\$python_return.*")
9415 "my $python_return = \"Python 3.999\";\n"))
9418 (add-after 'unpack 'hardcode-tool-references
9419 (lambda* (#:key inputs #:allow-other-keys)
9420 (substitute* "trim_galore"
9421 (("\\$path_to_cutadapt = 'cutadapt'")
9422 (string-append "$path_to_cutadapt = '"
9423 (assoc-ref inputs "cutadapt")
9425 (("\\$compression_path = \"gzip\"")
9426 (string-append "$compression_path = \""
9427 (assoc-ref inputs "gzip")
9431 (assoc-ref inputs "gzip")
9435 (assoc-ref inputs "pigz")
9439 (lambda* (#:key outputs #:allow-other-keys)
9440 (let ((bin (string-append (assoc-ref outputs "out")
9443 (install-file "trim_galore" bin)
9446 (list gzip perl pigz cutadapt))
9449 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
9450 (synopsis "Wrapper around Cutadapt and FastQC")
9451 (description "Trim Galore! is a wrapper script to automate quality and
9452 adapter trimming as well as quality control, with some added functionality to
9453 remove biased methylation positions for RRBS sequence files.")
9454 (license license:gpl3+)))
9456 (define-public phylip
9463 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
9464 "download/phylip-" version ".tar.gz"))
9467 "1h8h0nafnlbqryswxgplx80k2044yhfz97jh13vsgzlaifqdh9ls"))))
9468 (build-system gnu-build-system)
9470 `(#:tests? #f ; no check target
9471 #:make-flags (list "-f" "Makefile.unx" "CFLAGS=-fcommon" "install")
9472 #:parallel-build? #f ; not supported
9474 (modify-phases %standard-phases
9475 (add-after 'unpack 'enter-dir
9476 (lambda _ (chdir "src")))
9479 (lambda* (#:key inputs outputs #:allow-other-keys)
9480 (let ((target (string-append (assoc-ref outputs "out")
9483 (for-each (lambda (file)
9484 (install-file file target))
9485 (find-files "../exe" ".*"))))))))
9486 (home-page "http://evolution.genetics.washington.edu/phylip/")
9487 (synopsis "Tools for inferring phylogenies")
9488 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
9489 programs for inferring phylogenies (evolutionary trees).")
9490 (license license:bsd-2)))
9492 (define-public phyml
9495 (version "3.3.20220408")
9499 (url "https://github.com/stephaneguindon/phyml")
9500 (commit (string-append "v" version))))
9501 (file-name (git-file-name name version))
9504 "03hdqmnsgnzkcrp9r9ajdfkj33jgq4b86kra8ssjlrph65y344sa"))
9506 '(delete-file "doc/phyml-manual.pdf"))))
9507 (build-system gnu-build-system)
9508 (supported-systems '("x86_64-linux"))
9510 (let ((default-flags (list "--disable-native")))
9512 (let ((build (lambda (what)
9514 (apply (assoc-ref %standard-phases 'configure)
9516 (list #:configure-flags
9517 (cons (format #false "--enable-~a" what)
9518 '() #;,default-flags))))
9519 (apply (assoc-ref %standard-phases 'build) args)
9520 (apply (assoc-ref %standard-phases 'install) args)))))
9521 (modify-phases %standard-phases
9522 ;; We cannot use --disable-native; see
9523 ;; https://github.com/stephaneguindon/phyml/issues/173 Instead we
9524 ;; patch the code to at least get rid of -march=native.
9525 (add-after 'unpack 'remove-march-native
9527 (substitute* "configure.ac"
9528 (("DEFAULT_VECTOR_FLAG=\"-march=native\"")
9529 "DEFAULT_VECTOR_FLAG=\"-march=athlon64-sse3\"\n"))))
9530 (add-after 'build 'build-manual
9532 (with-directory-excursion "doc"
9533 (invoke "make" "phyml-manual.pdf"))))
9534 (add-after 'build-manual 'install-manual
9535 (lambda* (#:key outputs #:allow-other-keys)
9536 (with-directory-excursion "doc"
9537 (install-file "phyml-manual.pdf"
9538 (string-append (assoc-ref outputs "out")
9539 "/share/doc/phyml")))))
9540 (add-after 'install 'build-phyml-mpi
9541 (build "phyml-mpi"))
9542 (add-after 'build-phyml-mpi 'build-rf
9544 (add-after 'build-rf 'build-phyrex
9545 (build "phyrex")))))))
9550 (texlive-updmap.cfg (list texlive-amsfonts
9556 texlive-latex-fancyvrb
9557 texlive-latex-graphics
9558 texlive-latex-psfrag
9560 (home-page "https://github.com/stephaneguindon/phyml")
9561 (synopsis "Programs for working on SAM/BAM files")
9563 "@code{PhyML} is a software package that uses modern statistical
9564 approaches to analyse alignments of nucleotide or amino acid sequences in a
9565 phylogenetic framework. The main tool in this package builds phylogenies
9566 under the maximum likelihood criterion. It implements a large number of
9567 substitution models coupled with efficient options to search the space of
9568 phylogenetic tree topologies. code{PhyREX} fits the
9569 spatial-Lambda-Fleming-Viot model to geo-referenced genetic data. This model
9570 is similar to the structured coalescent but assumes that individuals are
9571 distributed along a spatial continuum rather than discrete demes.
9572 @code{PhyREX} can be used to estimate population densities and rates of
9573 dispersal. Its output can be processed by treeannotator (from the
9574 @code{BEAST} package) as well as @code{SPREAD}.")
9575 (license license:gpl3)))
9584 (uri (string-append "https://integrativemodeling.org/"
9585 version "/download/imp-" version ".tar.gz"))
9588 "05hsrnkpkajppa3f45x4qsarnkj616hlby749zxg4is3bv4i6b5y"))))
9589 (build-system cmake-build-system)
9591 `(#:tests? #false ; The test suite is notoriously fickle
9593 (let ((disabled-tests
9594 '("expensive" ;exclude expensive tests
9595 "IMP.modeller" ;fail to import its own modules
9596 "IMP.parallel-test_sge.py" ;fail in build container
9597 ;; The following test fails non-reproducibly on
9598 ;; an inexact numbers assertion.
9599 "IMP.em-medium_test_local_fitting.py"
9600 ;; The following test fails for unknown reasons
9601 "IMP.foxs-add-missing-residues.py")))
9604 "-DCMAKE_CTEST_ARGUMENTS="
9606 (list "-L" "-tests?-" ;select only tests
9607 "-E" (format #f "'(~a)'" (string-join disabled-tests "|")))
9610 `(("python" ,python-wrapper)
9619 ;; Enabling MPI causes the build to use all the available memory and
9620 ;; fail (tested on a machine with 32 GiB of RAM).
9624 (list python-numpy python-scipy python-pandas python-scikit-learn
9626 (home-page "https://integrativemodeling.org")
9627 (synopsis "Integrative modeling platform")
9628 (description "IMP's broad goal is to contribute to a comprehensive
9629 structural characterization of biomolecules ranging in size and complexity
9630 from small peptides to large macromolecular assemblies, by integrating data
9631 from diverse biochemical and biophysical experiments. IMP provides a C++ and
9632 Python toolbox for solving complex modeling problems, and a number of
9633 applications for tackling some common problems in a user-friendly way.")
9634 ;; IMP is largely available under the GNU Lesser GPL; see the file
9635 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
9636 ;; available under the GNU GPL (see the file COPYING.GPL).
9637 (license (list license:lgpl2.1+
9640 ;; We use this seemingly arbitrary commit because of
9641 ;; https://github.com/3DGenomes/TADbit/issues/371
9642 (define-public tadbit
9643 (let ((commit "5c4c1ddaadfbaf7e6edc58173e46d801093bdc9b")
9647 (version (git-version "1.0.1" revision commit))
9651 (url "https://github.com/3DGenomes/TADbit")
9653 (file-name (git-file-name name version))
9656 "17nwlvjgqpa7x6jgh56m3di61ynaz34kl1jamyv7r2a5rhfcbkla"))))
9657 (build-system python-build-system)
9660 (modify-phases %standard-phases
9661 (add-after 'unpack 'fix-problems-with-setup.py
9662 (lambda* (#:key outputs #:allow-other-keys)
9663 (substitute* "src/test/Makefile"
9664 (("^CFLAGS=") "CFLAGS= -fcommon"))
9666 ;; Don't attempt to install the bash completions to
9667 ;; the home directory.
9668 (rename-file "extras/.bash_completion"
9670 (substitute* "setup.py"
9671 (("\\(path.expanduser\\('~'\\)")
9672 (string-append "(\""
9673 (assoc-ref outputs "out")
9674 "/etc/bash_completion.d\""))
9675 (("extras/\\.bash_completion")
9678 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
9680 (add-installed-pythonpath inputs outputs)
9681 (invoke "python3" "test/test_all.py")))))))
9683 (list `(,glib "bin") ;for gtester
9686 ;; TODO: add Chimera for visualization
9695 (home-page "https://3dgenomes.github.io/TADbit/")
9696 (synopsis "Analyze, model, and explore 3C-based data")
9698 "TADbit is a complete Python library to deal with all steps to analyze,
9699 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
9700 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
9701 correct interaction matrices, identify and compare the so-called
9702 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
9703 interaction matrices, and finally, extract structural properties from the
9704 models. TADbit is complemented by TADkit for visualizing 3D models.")
9705 (license license:gpl3+))))
9707 (define-public kentutils
9710 ;; 302.1.0 is out, but the only difference is the inclusion of
9711 ;; pre-built binaries.
9717 (url "https://github.com/ENCODE-DCC/kentUtils")
9718 (commit (string-append "v" version))))
9719 (file-name (git-file-name name version))
9722 "0n1wbyjpzii2b9qhyp9r1q76j623cggpg3y8fmw78ld3z4y7ivha"))
9723 (modules '((guix build utils)
9728 ;; Only the contents of the specified directories are free
9729 ;; for all uses, so we remove the rest. "hg/autoSql" and
9730 ;; "hg/autoXml" are nominally free, but they depend on a
9731 ;; library that is built from the sources in "hg/lib",
9732 ;; which is nonfree.
9733 (let ((free (list "." ".."
9734 "utils" "lib" "inc" "tagStorm"
9735 "parasol" "htslib"))
9736 (directory? (lambda (file)
9737 (eq? 'directory (stat:type (stat file))))))
9738 (for-each (lambda (file)
9739 (and (directory? file)
9740 (delete-file-recursively file)))
9741 (map (cut string-append "src/" <>)
9744 (not (member file free)))))))
9745 ;; Only make the utils target, not the userApps target,
9746 ;; because that requires libraries we won't build.
9747 (substitute* "Makefile"
9748 ((" userApps") " utils"))
9749 ;; Only build libraries that are free.
9750 (substitute* "src/makefile"
9751 (("DIRS =.*") "DIRS =\n")
9752 (("cd jkOwnLib.*") "")
9755 (substitute* "src/utils/makefile"
9756 ;; These tools depend on "jkhgap.a", which is part of the
9757 ;; nonfree "src/hg/lib" directory.
9759 (("pslLiftSubrangeBlat") "")
9761 ;; Do not build UCSC tools, which may require nonfree
9763 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
9765 (build-system gnu-build-system)
9767 `( ;; There is no global test target and the test target for
9768 ;; individual tools depends on input files that are not
9772 (modify-phases %standard-phases
9773 (add-after 'unpack 'fix-permissions
9774 (lambda _ (make-file-writable "src/inc/localEnvironment.mk") #t))
9775 (add-after 'unpack 'fix-paths
9777 (substitute* "Makefile"
9778 (("/bin/echo") (which "echo")))
9780 (add-after 'unpack 'prepare-samtabix
9781 (lambda* (#:key inputs #:allow-other-keys)
9782 (copy-recursively (assoc-ref inputs "samtabix")
9787 (lambda* (#:key outputs #:allow-other-keys)
9788 (let ((bin (string-append (assoc-ref outputs "out")
9790 (copy-recursively "bin" bin))
9794 ,(let ((commit "10fd107909c1ac4d679299908be4262a012965ba"))
9798 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
9800 (file-name (git-file-name "samtabix" (string-take commit 7)))
9803 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma")))))))
9809 ("mariadb-dev" ,mariadb "dev")
9810 ("openssl" ,openssl)))
9811 (home-page "https://genome.cse.ucsc.edu/index.html")
9812 (synopsis "Assorted bioinformatics utilities")
9813 (description "This package provides the kentUtils, a selection of
9814 bioinformatics utilities used in combination with the UCSC genome
9816 ;; Only a subset of the sources are released under a non-copyleft
9817 ;; free software license. All other sources are removed in a
9818 ;; snippet. See this bug report for an explanation of how the
9819 ;; license statements apply:
9820 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
9821 (license (license:non-copyleft
9822 "http://genome.ucsc.edu/license/"
9823 "The contents of this package are free for all uses."))))
9825 (define-public f-seq
9826 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
9830 (version (git-version "1.1" revision commit))
9834 (url "https://github.com/aboyle/F-seq")
9836 (file-name (git-file-name name version))
9839 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
9840 (modules '((guix build utils)))
9841 ;; Remove bundled Java library archives.
9843 '(for-each delete-file (find-files "lib" ".*")))))
9844 (build-system ant-build-system)
9846 `(#:tests? #f ; no tests included
9848 (modify-phases %standard-phases
9850 (lambda* (#:key inputs outputs #:allow-other-keys)
9851 (let* ((target (assoc-ref outputs "out"))
9852 (bin (string-append target "/bin"))
9853 (doc (string-append target "/share/doc/f-seq"))
9854 (lib (string-append target "/lib")))
9857 (substitute* "bin/linux/fseq"
9858 (("java") (which "java"))
9859 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
9860 (search-input-file inputs
9861 (string-append "/lib/m2/commons-cli/commons-cli/"
9862 ,(package-version java-commons-cli)
9864 ,(package-version java-commons-cli)
9867 (string-append "REALDIR=" bin "\n")))
9868 (install-file "README.txt" doc)
9869 (install-file "bin/linux/fseq" bin)
9870 (install-file "build~/fseq.jar" lib)
9871 (copy-recursively "lib" lib)))))))
9873 (list perl java-commons-cli))
9874 (home-page "https://fureylab.web.unc.edu/software/fseq/")
9875 (synopsis "Feature density estimator for high-throughput sequence tags")
9877 "F-Seq is a software package that generates a continuous tag sequence
9878 density estimation allowing identification of biologically meaningful sites
9879 such as transcription factor binding sites (ChIP-seq) or regions of open
9880 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
9882 (license license:gpl3+))))
9884 (define-public bismark
9892 (url "https://github.com/FelixKrueger/Bismark")
9894 (file-name (git-file-name name version))
9897 "0xchm3rgilj6vfjnyzfzzymfd7djr64sbrmrvs3njbwi66jqbzw9"))))
9898 (build-system perl-build-system)
9900 `(#:tests? #f ; there are no tests
9901 #:modules ((guix build utils)
9904 (guix build perl-build-system))
9906 (modify-phases %standard-phases
9907 ;; The bundled plotly.js is minified.
9908 (add-after 'unpack 'replace-plotly.js
9909 (lambda* (#:key inputs #:allow-other-keys)
9910 (let* ((file (assoc-ref inputs "plotly.js"))
9911 (installed "plotly/plotly.js"))
9912 (let ((minified (open-pipe* OPEN_READ "uglifyjs" file)))
9913 (call-with-output-file installed
9914 (cut dump-port minified <>))))
9919 (lambda* (#:key inputs outputs #:allow-other-keys)
9920 (let* ((out (assoc-ref outputs "out"))
9921 (bin (string-append out "/bin"))
9922 (share (string-append out "/share/bismark"))
9923 (docdir (string-append out "/share/doc/bismark"))
9924 (docs '("Docs/Bismark_User_Guide.html"))
9925 (scripts '("bismark"
9926 "bismark_genome_preparation"
9927 "bismark_methylation_extractor"
9931 "deduplicate_bismark"
9932 "filter_non_conversion"
9936 (substitute* "bismark2report"
9937 (("\\$RealBin/plotly")
9938 (string-append share "/plotly")))
9942 (for-each (lambda (file) (install-file file bin))
9944 (for-each (lambda (file) (install-file file docdir))
9946 (copy-recursively "Docs/Images" (string-append docdir "/Images"))
9947 (copy-recursively "plotly"
9948 (string-append share "/plotly"))
9950 ;; Fix references to gunzip
9951 (substitute* (map (lambda (file)
9952 (string-append bin "/" file))
9955 (string-append "\"" (assoc-ref inputs "gzip")
9959 (list gzip perl-carp perl-getopt-long))
9964 (uri (string-append "https://raw.githubusercontent.com/plotly/plotly.js/"
9965 "v1.39.4/dist/plotly.js"))
9967 (base32 "138mwsr4nf5qif4mrxx286mpnagxd1xwl6k8aidrjgknaqg88zyr"))))
9968 ("uglifyjs" ,node-uglify-js)))
9969 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/bismark/")
9970 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
9971 (description "Bismark is a program to map bisulfite treated sequencing
9972 reads to a genome of interest and perform methylation calls in a single step.
9973 The output can be easily imported into a genome viewer, such as SeqMonk, and
9974 enables a researcher to analyse the methylation levels of their samples
9975 straight away. Its main features are:
9978 @item Bisulfite mapping and methylation calling in one single step
9979 @item Supports single-end and paired-end read alignments
9980 @item Supports ungapped and gapped alignments
9981 @item Alignment seed length, number of mismatches etc are adjustable
9982 @item Output discriminates between cytosine methylation in CpG, CHG
9985 (license license:gpl3+)))
9993 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
9994 "paml" version ".tgz"))
9997 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
9998 (modules '((guix build utils)))
9999 ;; Remove Windows binaries
10002 (for-each delete-file (find-files "." "\\.exe$"))
10003 ;; Some files in the original tarball have restrictive
10004 ;; permissions, which makes repackaging fail
10005 (for-each (lambda (file) (chmod file #o644)) (find-files "."))
10007 (build-system gnu-build-system)
10009 `(#:tests? #f ; there are no tests
10010 #:make-flags '("CC=gcc" "CFLAGS=-fcommon -O3")
10012 (modify-phases %standard-phases
10013 (replace 'configure
10015 (substitute* "src/BFdriver.c"
10016 (("/bin/bash") (which "bash")))
10019 (lambda* (#:key outputs #:allow-other-keys)
10020 (let ((tools '("baseml" "basemlg" "codeml"
10021 "pamp" "evolver" "yn00" "chi2"))
10022 (bin (string-append (assoc-ref outputs "out") "/bin"))
10023 (docdir (string-append (assoc-ref outputs "out")
10024 "/share/doc/paml")))
10026 (for-each (lambda (file) (install-file file bin)) tools)
10027 (copy-recursively "../doc" docdir)))))))
10028 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
10029 (synopsis "Phylogentic analysis by maximum likelihood")
10030 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
10031 contains a few programs for model fitting and phylogenetic tree reconstruction
10032 using nucleotide or amino-acid sequence data.")
10034 (license license:gpl3)))
10036 (define-public segemehl
10042 (uri (string-append "https://www.bioinf.uni-leipzig.de/Software"
10043 "/segemehl/downloads/segemehl-"
10044 version ".tar.gz"))
10047 "0lbzbb7i8zadsn9b99plairhq6s2h1z8qdn6n7djclfis01nycz4"))))
10048 (build-system gnu-build-system)
10051 (list (string-append "CC=" ,(cc-for-target))
10053 #:tests? #false ; there are none
10055 (modify-phases %standard-phases
10056 (delete 'configure)
10057 ;; There is no installation target
10059 (lambda* (#:key inputs outputs #:allow-other-keys)
10060 (let* ((out (assoc-ref outputs "out"))
10061 (bin (string-append out "/bin"))
10062 (exes (list "segemehl.x" "haarz.x")))
10064 (for-each (lambda (exe)
10065 (install-file exe bin))
10068 (list htslib ncurses zlib))
10071 (home-page "https://www.bioinf.uni-leipzig.de/Software/segemehl")
10072 (synopsis "Map short sequencer reads to reference genomes")
10073 (description "Segemehl is software to map short sequencer reads to
10074 reference genomes. Segemehl implements a matching strategy based on enhanced
10075 suffix arrays (ESA). It accepts fasta and fastq queries (gzip'ed and
10076 bgzip'ed). In addition to the alignment of reads from standard DNA- and
10077 RNA-seq protocols, it also allows the mapping of bisulfite converted
10078 reads (Lister and Cokus) and implements a split read mapping strategy. The
10079 output of segemehl is a SAM or BAM formatted alignment file.")
10080 (license license:gpl3+)))
10082 (define-public kallisto
10088 (uri (git-reference
10089 (url "https://github.com/pachterlab/kallisto")
10090 (commit (string-append "v" version))))
10091 (file-name (git-file-name name version))
10094 "0ij5n7v3m90jdfi7sn8nvglfyf58abp1f5xq42r4k73l0lfds6xi"))
10095 (modules '((guix build utils)))
10097 '(delete-file-recursively "ext/htslib/"))))
10098 (build-system cmake-build-system)
10100 `(#:tests? #f ; no "check" target
10102 (modify-phases %standard-phases
10103 (add-after 'unpack 'do-not-use-bundled-htslib
10105 (substitute* "CMakeLists.txt"
10106 (("^ExternalProject_Add" m)
10107 (string-append "if (NEVER)\n" m))
10109 (string-append ")\nendif(NEVER)"))
10110 (("include_directories\\(\\$\\{htslib_PREFIX.*" m)
10111 (string-append "# " m)))
10112 (substitute* "src/CMakeLists.txt"
10113 (("target_link_libraries\\(kallisto kallisto_core pthread \
10114 \\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/../ext/htslib/libhts.a\\)")
10115 "target_link_libraries(kallisto kallisto_core pthread hts)")
10116 (("include_directories\\(\\.\\./ext/htslib\\)") ""))
10119 (list hdf5 htslib-1.9 zlib))
10120 (home-page "https://pachterlab.github.io/kallisto/")
10121 (synopsis "Near-optimal RNA-Seq quantification")
10123 "Kallisto is a program for quantifying abundances of transcripts from
10124 RNA-Seq data, or more generally of target sequences using high-throughput
10125 sequencing reads. It is based on the novel idea of pseudoalignment for
10126 rapidly determining the compatibility of reads with targets, without the need
10127 for alignment. Pseudoalignment of reads preserves the key information needed
10128 for quantification, and kallisto is therefore not only fast, but also as
10129 accurate as existing quantification tools.")
10130 (license license:bsd-2)))
10132 (define-public libgff
10138 (uri (git-reference
10139 (url "https://github.com/COMBINE-lab/libgff")
10140 (commit (string-append "v" version))))
10141 (file-name (git-file-name name version))
10144 "0ds9r22y8bl1rj7bhl0003kgmm6aam7g8l41mnjfrzw15d9zf9k4"))))
10145 (build-system cmake-build-system)
10146 (arguments `(#:tests? #f)) ; no tests included
10147 (home-page "https://github.com/COMBINE-lab/libgff")
10148 (synopsis "Parser library for reading/writing GFF files")
10149 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
10150 code that is used in the Cufflinks codebase. The goal of this library is to
10151 provide this functionality without the necessity of drawing in a heavy-weight
10152 dependency like SeqAn.")
10153 (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
10155 (define-public sailfish
10161 (uri (git-reference
10162 (url "https://github.com/kingsfordgroup/sailfish")
10163 (commit (string-append "v" version))))
10164 (file-name (git-file-name name version))
10167 "1amcc5hqvsl42hg4x19bi9vy47cl874s0lw1fmi0hwsdk9i8c03v"))
10168 (modules '((guix build utils)))
10169 ;; Delete bundled headers for eigen3.
10171 '(delete-file-recursively "include/eigen3/"))))
10172 (build-system cmake-build-system)
10174 `(#:configure-flags
10175 ,#~(list (string-append "-DBOOST_INCLUDEDIR="
10176 #$(this-package-input "boost")
10178 (string-append "-DBOOST_LIBRARYDIR="
10179 #$(this-package-input "boost")
10181 (string-append "-DBoost_LIBRARIES="
10182 "-lboost_iostreams "
10183 "-lboost_filesystem "
10188 "-lboost_program_options")
10189 "-DBoost_FOUND=TRUE"
10190 ;; Don't download RapMap---we already have it!
10191 "-DFETCHED_RAPMAP=1")
10192 ;; Tests must be run after installation and the location of the test
10193 ;; data file must be overridden. But the tests fail. It looks like
10194 ;; they are not really meant to be run.
10197 (modify-phases %standard-phases
10198 ;; Boost cannot be found, even though it's right there.
10199 (add-after 'unpack 'do-not-look-for-boost
10200 (lambda* (#:key inputs #:allow-other-keys)
10201 (substitute* "CMakeLists.txt"
10202 (("find_package\\(Boost 1\\.53\\.0") "#"))))
10203 (add-after 'unpack 'do-not-assign-to-macro
10205 (substitute* "include/spdlog/details/format.cc"
10206 (("const unsigned CHAR_WIDTH = 1;") ""))))
10207 (add-after 'unpack 'prepare-rapmap
10208 (lambda* (#:key inputs #:allow-other-keys)
10209 (let ((src "external/install/src/rapmap/")
10210 (include "external/install/include/rapmap/")
10211 (rapmap (assoc-ref inputs "rapmap")))
10214 (for-each (lambda (file)
10215 (install-file file src))
10216 (find-files (string-append rapmap "/src") "\\.(c|cpp)"))
10217 (copy-recursively (string-append rapmap "/include") include))))
10218 (add-after 'unpack 'use-system-libraries
10219 (lambda* (#:key inputs #:allow-other-keys)
10220 (substitute* '("src/SailfishIndexer.cpp"
10221 "src/SailfishUtils.cpp"
10222 "src/SailfishQuantify.cpp"
10223 "src/FASTAParser.cpp"
10225 "include/SailfishUtils.hpp"
10226 "include/SailfishIndex.hpp"
10227 "include/CollapsedEMOptimizer.hpp"
10228 "src/CollapsedEMOptimizer.cpp")
10229 (("#include \"jellyfish/config.h\"") ""))
10230 (substitute* "src/CMakeLists.txt"
10231 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
10232 (search-input-directory
10234 (string-append "/include/jellyfish-" ,(package-version jellyfish))))
10235 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
10236 (search-input-file inputs
10237 "/lib/libjellyfish-2.0.a"))
10238 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
10239 (search-input-file inputs
10240 "/lib/libdivsufsort.so"))
10241 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
10242 (search-input-file inputs
10243 "/lib/libdivsufsort64.so")))
10244 (substitute* "CMakeLists.txt"
10245 ;; Don't prefer static libs
10246 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
10247 (("find_package\\(Jellyfish.*") "")
10248 (("ExternalProject_Add\\(libjellyfish") "message(")
10249 (("ExternalProject_Add\\(libgff") "message(")
10250 (("ExternalProject_Add\\(libsparsehash") "message(")
10251 (("ExternalProject_Add\\(libdivsufsort") "message("))
10253 ;; Ensure that Eigen headers can be found
10254 (setenv "CPLUS_INCLUDE_PATH"
10255 (string-append (search-input-directory
10256 inputs "/include/eigen3")
10258 (or (getenv "CPLUS_INCLUDE_PATH") ""))))))))
10262 ("jemalloc" ,jemalloc)
10263 ("jellyfish" ,jellyfish)
10264 ("sparsehash" ,sparsehash)
10267 (uri (git-reference
10268 (url "https://github.com/COMBINE-lab/RapMap")
10269 (commit (string-append "sf-v" version))))
10270 (file-name (string-append "rapmap-sf-v" version "-checkout"))
10273 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
10274 (modules '((guix build utils)))
10275 ;; These files are expected to be excluded.
10277 '(begin (delete-file-recursively "include/spdlog")
10278 (for-each delete-file '("include/xxhash.h"
10279 "src/xxhash.c"))))))
10280 ("libdivsufsort" ,libdivsufsort)
10286 (home-page "https://www.cs.cmu.edu/~ckingsf/software/sailfish/")
10287 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
10288 (description "Sailfish is a tool for genomic transcript quantification
10289 from RNA-seq data. It requires a set of target transcripts (either from a
10290 reference or de-novo assembly) to quantify. All you need to run sailfish is a
10291 fasta file containing your reference transcripts and a (set of) fasta/fastq
10292 file(s) containing your reads.")
10293 (license license:gpl3+)))
10295 (define libstadenio-for-salmon
10297 (name "libstadenio")
10301 (uri (git-reference
10302 (url "https://github.com/COMBINE-lab/staden-io_lib")
10303 (commit (string-append "v" version))))
10304 (file-name (string-append name "-" version "-checkout"))
10307 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
10308 (build-system gnu-build-system)
10309 (arguments '(#:parallel-tests? #f)) ; not supported
10313 `(("perl" ,perl))) ; for tests
10314 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
10315 (synopsis "General purpose trace and experiment file library")
10316 (description "This package provides a library of file reading and writing
10317 code to provide a general purpose Trace file (and Experiment File) reading
10320 The following file formats are supported:
10323 @item SCF trace files
10324 @item ABI trace files
10325 @item ALF trace files
10326 @item ZTR trace files
10327 @item SFF trace archives
10328 @item SRF trace archives
10329 @item Experiment files
10330 @item Plain text files
10331 @item SAM/BAM sequence files
10332 @item CRAM sequence files
10334 (license license:bsd-3)))
10336 (define-public salmon
10342 (uri (git-reference
10343 (url "https://github.com/COMBINE-lab/salmon")
10344 (commit (string-append "v" version))))
10345 (file-name (git-file-name name version))
10348 "1370ry3jpj05gplzyny44mqg77a29a6gp8ijmjz135d2igf956r8"))
10349 (modules '((guix build utils)))
10351 ;; Delete bundled headers for eigen3.
10352 '(delete-file-recursively "include/eigen3/"))))
10353 (build-system cmake-build-system)
10357 #~(list (string-append "-Dlibgff_DIR="
10358 #$(this-package-input "libgff") "/lib")
10359 "-DCMAKE_CXX_FLAGS=\"-DHAVE_NUMERIC_LIMITS128=1\""
10360 "-Dlibgff_FOUND=TRUE"
10362 #$(string-append "-DTBB_VERSION=" (package-version tbb))
10363 "-DFETCHED_PUFFERFISH=TRUE"
10364 "-DUSE_SHARED_LIBS=TRUE")
10366 '(modify-phases %standard-phases
10367 (add-after 'unpack 'prepare-pufferfish
10368 (lambda* (#:key inputs #:allow-other-keys)
10369 (copy-recursively (assoc-ref inputs "pufferfish")
10370 "external/pufferfish")
10371 ;; This test isn't working correctly, so compilation aborts.
10372 (substitute* "external/pufferfish/include/string_view.hpp"
10373 (("#if __has_include\\(<string_view>\\)")
10375 (let ((headers "external/install/pufferfish/include/pufferfish")
10376 (source "external/install/src/pufferfish"))
10379 (for-each (lambda (file)
10380 (install-file (string-append "external/pufferfish/include/" file)
10382 (list "ProgOpts.hpp" "BooPHF.hpp" "SpinLock.hpp"
10383 "Kmer.hpp" "CanonicalKmer.hpp" "string_view.hpp"
10384 "CanonicalKmerIterator.hpp"
10385 "PufferfishBaseIndex.hpp"
10386 "PufferfishIndex.hpp"
10387 "PufferfishSparseIndex.hpp"
10388 "PufferfishLossyIndex.hpp"
10389 "PufferfishTypes.hpp"
10390 "rank9b.hpp" "rank9sel.hpp" "macros.hpp"
10391 "select.hpp" "Util.hpp"
10392 "PairedAlignmentFormatter.hpp"
10393 "SelectiveAlignmentUtils.hpp"
10394 "PuffAligner.hpp" "MemCollector.hpp"
10395 "MemChainer.hpp" "CommonTypes.hpp"
10396 "SAMWriter.hpp" "PufferfishConfig.hpp"
10397 "BulkChunk.hpp" "BinWriter.hpp"))
10399 (for-each (lambda (dir)
10401 (string-append "external/pufferfish/include/" dir)
10402 (string-append headers "/" dir)))
10413 (string-append "external/pufferfish/src/metro/")
10414 (string-append source "/metro"))
10416 (string-append "external/pufferfish/src/rank9b.cpp")
10419 ;; Do not complain about not having built libtbb
10420 (substitute* "external/pufferfish/external/twopaco/CMakeLists.txt"
10421 (("add_dependencies.*") "")))))
10422 (add-after 'unpack 'do-not-phone-home
10424 (substitute* "src/Salmon.cpp"
10425 (("getVersionMessage\\(\\)") "\"\""))))
10426 (add-after 'unpack 'use-system-libraries
10427 (lambda* (#:key inputs #:allow-other-keys)
10428 ;; Ensure that all headers can be found
10429 (setenv "CPLUS_INCLUDE_PATH"
10430 (string-append (or (getenv "CPLUS_INCLUDE_PATH") "")
10432 (getcwd) "/external/install/pufferfish/include:"
10433 (assoc-ref inputs "eigen")
10434 "/include/eigen3"))))
10435 (add-after 'unpack 'fix-error-message-in-tests
10437 (substitute* "cmake/TestSalmonQuasi.cmake"
10438 (("SALMON_QUASI_INDEX_COMMAND")
10439 "SALMON_QUASI_INDEX_CMD")))))))
10449 libstadenio-for-salmon
10453 `(("pkg-config" ,pkg-config)
10454 ("pufferfish" ,(origin
10456 (uri (git-reference
10457 (url "https://github.com/COMBINE-lab/pufferfish")
10458 (commit (string-append "salmon-v" version))))
10459 (file-name (git-file-name "pufferfish" version))
10462 "048a006mc2d0h78ym58mv67hl1pj480ilc5ifq0rlzfdyyfs1b8i"))))))
10463 (home-page "https://github.com/COMBINE-lab/salmon")
10464 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
10465 (description "Salmon is a program to produce highly-accurate,
10466 transcript-level quantification estimates from RNA-seq data. Salmon achieves
10467 its accuracy and speed via a number of different innovations, including the
10468 use of lightweight alignments (accurate but fast-to-compute proxies for
10469 traditional read alignments) and massively-parallel stochastic collapsed
10470 variational inference.")
10471 (license license:gpl3+)))
10473 (define-public python-loompy
10475 (name "python-loompy")
10477 ;; The tarball on Pypi does not include the tests.
10480 (uri (git-reference
10481 (url "https://github.com/linnarsson-lab/loompy")
10483 (file-name (git-file-name name version))
10486 "0xmw2yv1y3y7vh5jcbrmlkn43nmfs0pf6z78k1yxqs3qy248m9b0"))))
10487 (build-system python-build-system)
10490 (modify-phases %standard-phases
10491 ;; See https://github.com/linnarsson-lab/loompy/issues/169
10492 (add-after 'unpack 'fix-h5py-error
10494 (substitute* "tests/test_file_attribute_manager.py"
10495 (("h5py.File\\(f.name\\)")
10496 "h5py.File(f.name, 'a')"))))
10497 ;; Numba needs a writable dir to cache functions.
10498 (add-before 'check 'set-numba-cache-dir
10500 (setenv "NUMBA_CACHE_DIR" "/tmp")))
10502 (lambda* (#:key tests? #:allow-other-keys)
10504 (invoke "pytest" "tests")))))))
10510 python-numpy-groupies
10514 (list python-pytest))
10515 (home-page "https://github.com/linnarsson-lab/loompy")
10516 (synopsis "Work with .loom files for single-cell RNA-seq data")
10517 (description "The loom file format is an efficient format for very large
10518 omics datasets, consisting of a main matrix, optional additional layers, a
10519 variable number of row and column annotations. Loom also supports sparse
10520 graphs. This library makes it easy to work with @file{.loom} files for
10521 single-cell RNA-seq data.")
10522 (license license:bsd-3)))
10524 (define-public python-biothings-client
10526 (name "python-biothings-client")
10531 (uri (pypi-uri "biothings_client" version))
10533 (base32 "0bccs37d5saxn5xsd2rfpkrnc5a120xs3ibizai66fgvp1vxbnc4"))))
10534 (build-system python-build-system)
10535 (arguments `(#:tests? #false)) ; require internet access
10536 (propagated-inputs (list python-requests))
10537 (home-page "https://github.com/biothings/biothings_client.py")
10538 (synopsis "Python client for BioThings API services")
10539 (description "This package provides a Python client for BioThings
10541 (license license:bsd-3)))
10543 (define-public python-mygene
10545 (name "python-mygene")
10550 (uri (pypi-uri "mygene" version))
10552 (base32 "1snszwdgfygchxshcbry3b5pbcw3g1isp8dw46razxccqaxwlag7"))))
10553 (build-system python-build-system)
10555 (list python-biothings-client))
10556 (home-page "https://github.com/biothings/mygene.py")
10557 (synopsis "Python Client for MyGene.Info services")
10558 (description "MyGene.Info provides simple-to-use REST web services
10559 to query/retrieve gene annotation data. It's designed with simplicity
10560 and performance emphasized. Mygene is a Python wrapper to access
10561 MyGene.Info services.")
10562 (license license:bsd-3)))
10564 ;; We cannot use the latest commit because it requires Java 9.
10565 (define-public java-forester
10566 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
10569 (name "java-forester")
10570 (version (string-append "0-" revision "." (string-take commit 7)))
10573 (uri (git-reference
10574 (url "https://github.com/cmzmasek/forester")
10576 (file-name (string-append name "-" version "-checkout"))
10579 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
10580 (modules '((guix build utils)))
10583 ;; Delete bundled jars and pre-built classes
10584 (delete-file-recursively "forester/java/resources")
10585 (delete-file-recursively "forester/java/classes")
10586 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
10587 ;; Delete bundled applications
10588 (delete-file-recursively "forester_applications")
10590 (build-system ant-build-system)
10592 `(#:tests? #f ; there are none
10594 #:modules ((guix build ant-build-system)
10596 (guix build java-utils)
10600 (modify-phases %standard-phases
10601 (add-after 'unpack 'chdir
10602 (lambda _ (chdir "forester/java") #t))
10603 (add-after 'chdir 'fix-dependencies
10605 (chmod "build.xml" #o664)
10606 (call-with-output-file "build.xml.new"
10610 (with-input-from-file "build.xml"
10611 (lambda _ (xml->sxml #:trim-whitespace? #t)))
10612 `(;; Remove all unjar tags to avoid repacking classes.
10613 (unjar . ,(lambda _ '()))
10614 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
10615 (*text* . ,(lambda (_ txt) txt))))
10617 (rename-file "build.xml.new" "build.xml")
10619 ;; FIXME: itext is difficult to package as it depends on a few
10620 ;; unpackaged libraries.
10621 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
10623 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
10624 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
10625 (("pdf_written_to = PdfExporter.*")
10626 "throw new IOException(\"PDF export is not available.\");"))
10628 ;; There is no install target
10629 (replace 'install (install-jars ".")))))
10631 (list java-commons-codec java-openchart2))
10632 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
10633 (synopsis "Phylogenomics libraries for Java")
10634 (description "Forester is a collection of Java libraries for
10635 phylogenomics and evolutionary biology research. It includes support for
10636 reading, writing, and exporting phylogenetic trees.")
10637 (license license:lgpl2.1+))))
10639 (define-public java-forester-1.005
10641 (name "java-forester")
10645 (uri (string-append "https://repo1.maven.org/maven2/"
10646 "org/biojava/thirdparty/forester/"
10647 version "/forester-" version "-sources.jar"))
10648 (file-name (string-append name "-" version ".jar"))
10651 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
10652 (build-system ant-build-system)
10654 `(#:tests? #f ; there are none
10656 #:modules ((guix build ant-build-system)
10658 (guix build java-utils)
10662 (modify-phases %standard-phases
10663 (add-after 'unpack 'fix-dependencies
10664 (lambda* (#:key inputs #:allow-other-keys)
10665 (call-with-output-file "build.xml"
10669 (with-input-from-file "src/build.xml"
10670 (lambda _ (xml->sxml #:trim-whitespace? #t)))
10671 `(;; Remove all unjar tags to avoid repacking classes.
10672 (unjar . ,(lambda _ '()))
10673 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
10674 (*text* . ,(lambda (_ txt) txt))))
10676 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
10677 "synth_look_and_feel_1.xml")
10678 (copy-file (assoc-ref inputs "phyloxml.xsd")
10680 (substitute* "build.xml"
10681 (("../resources/synth_laf/synth_look_and_feel_1.xml")
10682 "synth_look_and_feel_1.xml")
10683 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
10686 ;; FIXME: itext is difficult to package as it depends on a few
10687 ;; unpackaged libraries.
10688 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
10690 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
10691 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
10692 "src/org/forester/archaeopteryx/MainFrameApplication.java")
10693 (("pdf_written_to = PdfExporter.*")
10694 "throw new IOException(\"PDF export is not available.\"); /*")
10695 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
10696 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
10698 (add-after 'unpack 'delete-pre-built-classes
10699 (lambda _ (delete-file-recursively "src/classes") #t))
10700 ;; There is no install target
10701 (replace 'install (install-jars ".")))))
10703 (list java-commons-codec java-openchart2))
10704 ;; The source archive does not contain the resources.
10709 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
10710 "b61cc2dcede0bede317db362472333115756b8c6/"
10711 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
10712 (file-name (string-append name "-phyloxml-" version ".xsd"))
10715 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
10716 ("synth_look_and_feel_1.xml"
10719 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
10720 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
10721 "forester/java/classes/resources/"
10722 "synth_look_and_feel_1.xml"))
10723 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
10726 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
10727 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
10728 (synopsis "Phylogenomics libraries for Java")
10729 (description "Forester is a collection of Java libraries for
10730 phylogenomics and evolutionary biology research. It includes support for
10731 reading, writing, and exporting phylogenetic trees.")
10732 (license license:lgpl2.1+)))
10734 (define-public java-biojava-core
10736 (name "java-biojava-core")
10740 (uri (git-reference
10741 (url "https://github.com/biojava/biojava")
10742 (commit (string-append "biojava-" version))))
10743 (file-name (string-append name "-" version "-checkout"))
10746 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
10747 (build-system ant-build-system)
10750 #:jar-name "biojava-core.jar"
10751 #:source-dir "biojava-core/src/main/java/"
10752 #:test-dir "biojava-core/src/test"
10753 ;; These tests seem to require internet access.
10754 #:test-exclude (list "**/SearchIOTest.java"
10755 "**/BlastXMLParserTest.java"
10756 "**/GenbankCookbookTest.java"
10757 "**/GenbankProxySequenceReaderTest.java")
10759 (modify-phases %standard-phases
10760 (add-before 'build 'copy-resources
10762 (copy-recursively "biojava-core/src/main/resources"
10765 (add-before 'check 'copy-test-resources
10767 (copy-recursively "biojava-core/src/test/resources"
10768 "build/test-classes")
10771 (list java-log4j-api java-log4j-core java-slf4j-api
10772 java-slf4j-simple))
10774 (list java-junit java-hamcrest-core))
10775 (home-page "https://biojava.org")
10776 (synopsis "Core libraries of Java framework for processing biological data")
10777 (description "BioJava is a project dedicated to providing a Java framework
10778 for processing biological data. It provides analytical and statistical
10779 routines, parsers for common file formats, reference implementations of
10780 popular algorithms, and allows the manipulation of sequences and 3D
10781 structures. The goal of the biojava project is to facilitate rapid
10782 application development for bioinformatics.
10784 This package provides the core libraries.")
10785 (license license:lgpl2.1+)))
10787 (define-public java-biojava-phylo
10788 (package (inherit java-biojava-core)
10789 (name "java-biojava-phylo")
10790 (build-system ant-build-system)
10793 #:jar-name "biojava-phylo.jar"
10794 #:source-dir "biojava-phylo/src/main/java/"
10795 #:test-dir "biojava-phylo/src/test"
10797 (modify-phases %standard-phases
10798 (add-before 'build 'copy-resources
10800 (copy-recursively "biojava-phylo/src/main/resources"
10803 (add-before 'check 'copy-test-resources
10805 (copy-recursively "biojava-phylo/src/test/resources"
10806 "build/test-classes")
10809 (list java-log4j-api
10816 (list java-junit java-hamcrest-core))
10817 (home-page "https://biojava.org")
10818 (synopsis "Biojava interface to the forester phylogenomics library")
10819 (description "The phylo module provides a biojava interface layer to the
10820 forester phylogenomics library for constructing phylogenetic trees.")))
10822 (define-public java-biojava-alignment
10823 (package (inherit java-biojava-core)
10824 (name "java-biojava-alignment")
10825 (build-system ant-build-system)
10828 #:jar-name "biojava-alignment.jar"
10829 #:source-dir "biojava-alignment/src/main/java/"
10830 #:test-dir "biojava-alignment/src/test"
10832 (modify-phases %standard-phases
10833 (add-before 'build 'copy-resources
10835 (copy-recursively "biojava-alignment/src/main/resources"
10838 (add-before 'check 'copy-test-resources
10840 (copy-recursively "biojava-alignment/src/test/resources"
10841 "build/test-classes")
10844 (list java-log4j-api
10852 (list java-junit java-hamcrest-core))
10853 (home-page "https://biojava.org")
10854 (synopsis "Biojava API for genetic sequence alignment")
10855 (description "The alignment module of BioJava provides an API that
10859 @item implementations of dynamic programming algorithms for sequence
10861 @item reading and writing of popular alignment file formats;
10862 @item a single-, or multi- threaded multiple sequence alignment algorithm.
10865 (define-public java-biojava-core-4.0
10866 (package (inherit java-biojava-core)
10867 (name "java-biojava-core")
10871 (uri (git-reference
10872 (url "https://github.com/biojava/biojava")
10873 (commit (string-append "biojava-" version))))
10874 (file-name (string-append name "-" version "-checkout"))
10877 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
10879 (define-public java-biojava-phylo-4.0
10880 (package (inherit java-biojava-core-4.0)
10881 (name "java-biojava-phylo")
10882 (build-system ant-build-system)
10885 #:jar-name "biojava-phylo.jar"
10886 #:source-dir "biojava-phylo/src/main/java/"
10887 #:test-dir "biojava-phylo/src/test"
10889 (modify-phases %standard-phases
10890 (add-before 'build 'copy-resources
10892 (copy-recursively "biojava-phylo/src/main/resources"
10895 (add-before 'check 'copy-test-resources
10897 (copy-recursively "biojava-phylo/src/test/resources"
10898 "build/test-classes")
10901 (list java-log4j-api
10905 java-biojava-core-4.0
10906 java-forester-1.005))
10908 (list java-junit java-hamcrest-core))
10909 (home-page "https://biojava.org")
10910 (synopsis "Biojava interface to the forester phylogenomics library")
10911 (description "The phylo module provides a biojava interface layer to the
10912 forester phylogenomics library for constructing phylogenetic trees.")))
10914 (define-public java-biojava-alignment-4.0
10915 (package (inherit java-biojava-core-4.0)
10916 (name "java-biojava-alignment")
10917 (build-system ant-build-system)
10920 #:jar-name "biojava-alignment.jar"
10921 #:source-dir "biojava-alignment/src/main/java/"
10922 #:test-dir "biojava-alignment/src/test"
10924 (modify-phases %standard-phases
10925 (add-before 'build 'copy-resources
10927 (copy-recursively "biojava-alignment/src/main/resources"
10930 (add-before 'check 'copy-test-resources
10932 (copy-recursively "biojava-alignment/src/test/resources"
10933 "build/test-classes")
10936 (list java-log4j-api
10940 java-biojava-core-4.0
10941 java-biojava-phylo-4.0
10942 java-forester-1.005))
10944 (list java-junit java-hamcrest-core))
10945 (home-page "https://biojava.org")
10946 (synopsis "Biojava API for genetic sequence alignment")
10947 (description "The alignment module of BioJava provides an API that
10951 @item implementations of dynamic programming algorithms for sequence
10953 @item reading and writing of popular alignment file formats;
10954 @item a single-, or multi- threaded multiple sequence alignment algorithm.
10957 (define-public dropseq-tools
10959 (name "dropseq-tools")
10964 (uri "http://mccarrolllab.com/download/1276/")
10965 (file-name (string-append "dropseq-tools-" version ".zip"))
10968 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
10969 ;; Delete bundled libraries
10970 (modules '((guix build utils)))
10973 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
10974 (delete-file-recursively "3rdParty")))))
10975 (build-system ant-build-system)
10977 `(#:tests? #f ; test data are not included
10978 #:test-target "test"
10979 #:build-target "all"
10980 #:source-dir "public/src/"
10983 (list ,#~(string-append "-Dpicard.executable.dir="
10984 #$(this-package-input "java-picard")
10986 #:modules ((ice-9 match)
10989 (guix build java-utils)
10990 (guix build ant-build-system))
10992 (modify-phases %standard-phases
10993 ;; FIXME: fails with "java.io.FileNotFoundException:
10994 ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
10995 (delete 'generate-jar-indices)
10996 ;; All dependencies must be linked to "lib", because that's where
10997 ;; they will be searched for when the Class-Path property of the
10998 ;; manifest is computed.
10999 (add-after 'unpack 'record-references
11000 (lambda* (#:key inputs #:allow-other-keys)
11001 (mkdir-p "jar/lib")
11002 (let ((dirs (filter-map (match-lambda
11004 (if (and (string-prefix? "java-" name)
11005 (not (string=? name "java-testng")))
11008 (for-each (lambda (jar)
11009 (symlink jar (string-append "jar/lib/" (basename jar))))
11010 (append-map (lambda (dir) (find-files dir "\\.jar$"))
11012 ;; There is no installation target
11014 (lambda* (#:key inputs outputs #:allow-other-keys)
11015 (let* ((out (assoc-ref outputs "out"))
11016 (bin (string-append out "/bin"))
11017 (share (string-append out "/share/java/"))
11018 (lib (string-append share "/lib/"))
11019 (scripts (list "BAMTagHistogram"
11020 "BAMTagofTagCounts"
11021 "BaseDistributionAtReadPosition"
11022 "CollapseBarcodesInPlace"
11023 "CollapseTagWithContext"
11025 "CreateIntervalsFiles"
11026 "DetectBeadSynthesisErrors"
11027 "DigitalExpression"
11028 "Drop-seq_alignment.sh"
11031 "GatherGeneGCLength"
11032 "GatherMolecularBarcodeDistributionByGene"
11033 "GatherReadQualityMetrics"
11036 "SelectCellsByNumTranscripts"
11037 "SingleCellRnaSeqMetricsCollector"
11038 "TagBamWithReadSequenceExtended"
11039 "TagReadWithGeneExon"
11040 "TagReadWithInterval"
11041 "TrimStartingSequence"
11042 "ValidateReference")))
11043 (for-each mkdir-p (list bin share lib))
11044 (install-file "dist/dropseq.jar" share)
11045 (for-each (lambda (script)
11046 (chmod script #o555)
11047 (install-file script bin))
11049 (substitute* (map (lambda (script)
11050 (string-append bin "/" script))
11052 (("^java") (which "java"))
11053 (("jar_deploy_dir=.*")
11054 (string-append "jar_deploy_dir=" share "\n"))))))
11055 ;; FIXME: We do this after stripping jars because we don't want it to
11056 ;; copy all these jars and strip them. We only want to install
11057 ;; links. Arguably, this is a problem with the ant-build-system.
11058 (add-after 'strip-jar-timestamps 'install-links
11059 (lambda* (#:key outputs #:allow-other-keys)
11060 (let* ((out (assoc-ref outputs "out"))
11061 (share (string-append out "/share/java/"))
11062 (lib (string-append share "/lib/")))
11063 (for-each (lambda (jar)
11064 (symlink (readlink jar)
11065 (string-append lib (basename jar))))
11066 (find-files "jar/lib" "\\.jar$"))))))))
11068 `(("jdk" ,icedtea-8)
11069 ("java-picard" ,java-picard-2.10.3)
11070 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
11071 ("java-commons-math3" ,java-commons-math3)
11072 ("java-commons-jexl2" ,java-commons-jexl-2)
11073 ("java-commons-collections4" ,java-commons-collections4)
11074 ("java-commons-lang2" ,java-commons-lang)
11075 ("java-commons-io" ,java-commons-io)
11076 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
11077 ("java-guava" ,java-guava)
11078 ("java-la4j" ,java-la4j)
11079 ("java-biojava-core" ,java-biojava-core-4.0)
11080 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
11081 ("java-jdistlib" ,java-jdistlib)
11082 ("java-simple-xml" ,java-simple-xml)
11083 ("java-snakeyaml" ,java-snakeyaml)))
11085 (list unzip java-testng))
11086 (home-page "http://mccarrolllab.com/dropseq/")
11087 (synopsis "Tools for Drop-seq analyses")
11088 (description "Drop-seq is a technology to enable biologists to
11089 analyze RNA expression genome-wide in thousands of individual cells at
11090 once. This package provides tools to perform Drop-seq analyses.")
11091 (license license:expat)))
11093 (define-public pigx-rnaseq
11095 (name "pigx-rnaseq")
11099 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
11100 "releases/download/v" version
11101 "/pigx_rnaseq-" version ".tar.gz"))
11104 "0acdjimfb9ywba8zsv7lavv436pmcmp8ra683h11wr4s3681pqk8"))))
11105 (build-system gnu-build-system)
11107 `(#:parallel-tests? #f ; not supported
11109 (modify-phases %standard-phases
11110 ;; "test.sh" runs the whole pipeline, which takes a long time and
11111 ;; might fail due to OOM. The MultiQC is also resource intensive.
11112 (add-after 'unpack 'disable-resource-intensive-test
11114 (substitute* "Makefile.in"
11115 (("^ tests/test_multiqc/test.sh") "")
11116 (("^ test.sh") ""))))
11117 (add-before 'bootstrap 'autoreconf
11119 (invoke "autoreconf" "-vif")))
11120 (add-before 'configure 'set-PYTHONPATH
11122 (setenv "PYTHONPATH" (getenv "GUIX_PYTHONPATH"))))
11123 (add-before 'check 'set-timezone
11124 ;; The readr package is picky about timezones.
11125 (lambda* (#:key inputs #:allow-other-keys)
11126 (setenv "TZ" "UTC+1")
11128 (search-input-directory inputs
11129 "share/zoneinfo")))))))
11156 r-summarizedexperiment
11167 (list tzdata automake autoconf))
11168 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11169 (synopsis "Analysis pipeline for RNA sequencing experiments")
11170 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
11171 reporting for RNA sequencing experiments. It is easy to use and produces high
11172 quality reports. The inputs are reads files from the sequencing experiment,
11173 and a configuration file which describes the experiment. In addition to
11174 quality control of the experiment, the pipeline produces a differential
11175 expression report comparing samples in an easily configurable manner.")
11176 (license license:gpl3+)))
11178 (define-public pigx-chipseq
11180 (name "pigx-chipseq")
11184 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
11185 "releases/download/v" version
11186 "/pigx_chipseq-" version ".tar.gz"))
11189 "008n6drj9q5av86xihxlj4py2c9p3c5z5ld89c3bksrp77zxiy67"))))
11190 (build-system gnu-build-system)
11192 `(#:tests? #f ; parts of the tests rely on access to the network
11194 (modify-phases %standard-phases
11195 (add-before 'configure 'set-PYTHONPATH
11197 (setenv "PYTHONPATH" (getenv "GUIX_PYTHONPATH")))))))
11212 r-genomicalignments
11248 (list python-pytest))
11249 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11250 (synopsis "Analysis pipeline for ChIP sequencing experiments")
11251 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
11252 calling and reporting for ChIP sequencing experiments. It is easy to use and
11253 produces high quality reports. The inputs are reads files from the sequencing
11254 experiment, and a configuration file which describes the experiment. In
11255 addition to quality control of the experiment, the pipeline enables to set up
11256 multiple peak calling analysis and allows the generation of a UCSC track hub
11257 in an easily configurable manner.")
11258 (license license:gpl3+)))
11260 (define-public pigx-bsseq
11262 (name "pigx-bsseq")
11266 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
11267 "releases/download/v" version
11268 "/pigx_bsseq-" version ".tar.gz"))
11271 "1s8zgrqxabrawrgkga5rmgb0gyzj7ck47p3rkicjkfv7r2yjy0d7"))))
11272 (build-system gnu-build-system)
11274 `(;; TODO: tests currently require 12+GB of RAM. See
11275 ;; https://github.com/BIMSBbioinfo/pigx_bsseq/issues/164
11278 (modify-phases %standard-phases
11279 (add-before 'configure 'set-PYTHONPATH
11281 (setenv "PYTHONPATH" (getenv "GUIX_PYTHONPATH"))))
11282 (add-before 'check 'set-timezone
11283 ;; The readr package is picky about timezones.
11284 (lambda* (#:key inputs #:allow-other-keys)
11285 (setenv "TZ" "UTC+1")
11287 (search-input-directory inputs
11288 "share/zoneinfo")))))))
11323 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11324 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
11325 (description "PiGx BSseq is a data processing pipeline for raw fastq read
11326 data of bisulfite experiments; it produces reports on aggregate methylation
11327 and coverage and can be used to produce information on differential
11328 methylation and segmentation.")
11329 (license license:gpl3+)))
11331 (define-public pigx-scrnaseq
11333 (name "pigx-scrnaseq")
11337 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
11338 "releases/download/v" version
11339 "/pigx_scrnaseq-" version ".tar.gz"))
11342 "0adx7877c3lhlrzfid76i8bc829wcmzvrm0jx47gyid8mxqb7vqs"))))
11343 (build-system gnu-build-system)
11346 (modify-phases %standard-phases
11347 (add-before 'configure 'set-additional-environment-variables
11349 ;; Needed because of loompy
11350 (setenv "NUMBA_CACHE_DIR" "/tmp")
11351 ;; Needed to capture environment
11352 (setenv "PYTHONPATH" (getenv "GUIX_PYTHONPATH")))))))
11375 r-delayedmatrixstats
11379 r-genomicalignments
11392 r-singlecellexperiment
11395 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11396 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
11398 "PiGX scRNAseq is an analysis pipeline for preprocessing and
11399 quality control for single cell RNA sequencing experiments. The inputs are
11400 read files from the sequencing experiment, and a configuration file which
11401 describes the experiment. It produces processed files for downstream analysis
11402 and interactive quality reports. The pipeline is designed to work with UMI
11404 (license license:gpl3+)))
11406 (define-public pigx-sars-cov-2
11408 (name "pigx-sars-cov-2")
11412 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_sars-cov-2"
11413 "/releases/download/v" version
11414 "/pigx_sars-cov-2-" version ".tar.gz"))
11417 "1bqm03ypf7l8lrkjkydxzn7vy0qlps3v9c5cpz2wb008zw44bi3k"))))
11418 (build-system gnu-build-system)
11420 `(#:tests? #f ;requires huge kraken database
11422 (modify-phases %standard-phases
11423 (add-before 'bootstrap 'autoreconf
11425 ;; https://github.com/BIMSBbioinfo/pigx_sars-cov-2/issues/123
11426 (substitute* "m4/ax_r_package.m4"
11427 (("if\\(is.na\\(packageDescription\\(\"PKG\"\\)\\)\\)")
11428 "if(system.file(package=\"PKG\") == \"\")"))
11429 (invoke "autoreconf" "-vif")))
11430 (add-before 'configure 'set-PYTHONPATH
11432 (setenv "PYTHONPATH" (getenv "GUIX_PYTHONPATH")))))))
11434 (list automake autoconf))
11466 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11467 (synopsis "Analysis pipeline for wastewater sequencing")
11468 (description "PiGx SARS-CoV-2 is a pipeline for analysing data from
11469 sequenced wastewater samples and identifying given variants-of-concern of
11470 SARS-CoV-2. The pipeline can be used for continuous sampling. The output
11471 report will provide an intuitive visual overview about the development of
11472 variant abundance over time and location.")
11473 (license license:gpl3+)))
11475 (define-public pigx-sars-cov2-ww
11476 (deprecated-package "pigx-sars-cov2-ww" pigx-sars-cov-2))
11478 (define-public pigx
11484 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
11485 "releases/download/v" version
11486 "/pigx-" version ".tar.gz"))
11489 "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
11490 (build-system gnu-build-system)
11492 (list python pigx-bsseq pigx-chipseq pigx-rnaseq pigx-scrnaseq))
11493 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11494 (synopsis "Analysis pipelines for genomics")
11495 (description "PiGx is a collection of genomics pipelines. It includes the
11496 following pipelines:
11499 @item PiGx BSseq for raw fastq read data of bisulfite experiments
11500 @item PiGx RNAseq for RNAseq samples
11501 @item PiGx scRNAseq for single cell dropseq analysis
11502 @item PiGx ChIPseq for reads from ChIPseq experiments
11505 All pipelines are easily configured with a simple sample sheet and a
11506 descriptive settings file. The result is a set of comprehensive, interactive
11507 HTML reports with interesting findings about your samples.")
11508 (license license:gpl3+)))
11510 (define-public genrich
11516 (uri (git-reference
11517 (url "https://github.com/jsh58/Genrich")
11518 (commit (string-append "v" version))))
11519 (file-name (git-file-name name version))
11522 "0x0q6z0208n3cxzqjla4rgjqpyqgwpmz27852lcvzkzaigymq4zp"))))
11523 (build-system gnu-build-system)
11525 `(#:tests? #f ; there are none
11527 (modify-phases %standard-phases
11528 (delete 'configure)
11530 (lambda* (#:key outputs #:allow-other-keys)
11531 (install-file "Genrich" (string-append (assoc-ref outputs "out") "/bin"))
11535 (home-page "https://github.com/jsh58/Genrich")
11536 (synopsis "Detecting sites of genomic enrichment")
11537 (description "Genrich is a peak-caller for genomic enrichment
11538 assays (e.g. ChIP-seq, ATAC-seq). It analyzes alignment files generated
11539 following the assay and produces a file detailing peaks of significant
11541 (license license:expat)))
11543 (define-public mantis
11544 ;; This is an arbitrary commit as a year has passed since 0.1 was tagged.
11545 (let ((commit "b6979a269172a45201c8366680d8b889f889432b")
11549 (version (git-version "0.1" revision commit))
11552 (uri (git-reference
11553 (url "https://github.com/splatlab/mantis")
11555 (file-name (git-file-name name version))
11558 "0dq8a785hnaxx5kq757m5czs8xpcjpcph1inq2nm8h6zfvqyj8xs"))))
11559 (build-system cmake-build-system)
11561 '(#:tests? #f ; there are none
11562 #:configure-flags (list "-DNH=ON"))) ; do not use SSE4.2 instructions
11564 (list sdsl-lite openssl zlib))
11567 (home-page "https://github.com/splatlab/mantis")
11568 (synopsis "Large-scale sequence-search index data structure")
11569 (description "Mantis is a space-efficient data structure that can be
11570 used to index thousands of raw-read genomics experiments and facilitate
11571 large-scale sequence searches on those experiments. Mantis uses counting
11572 quotient filters instead of Bloom filters, enabling rapid index builds and
11573 queries, small indexes, and exact results, i.e., no false positives or
11574 negatives. Furthermore, Mantis is also a colored de Bruijn graph
11575 representation, so it supports fast graph traversal and other topological
11576 analyses in addition to large-scale sequence-level searches.")
11577 ;; uses __uint128_t and inline assembly
11578 (supported-systems '("x86_64-linux"))
11579 (license license:bsd-3))))
11581 (define-public sjcount
11582 ;; There is no tag for version 3.2, nor is there a release archive.
11583 (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
11587 (version (git-version "3.2" revision commit))
11590 (uri (git-reference
11591 (url "https://github.com/pervouchine/sjcount-full")
11593 (file-name (string-append name "-" version "-checkout"))
11596 "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
11597 (build-system gnu-build-system)
11599 `(#:tests? #f ; requires a 1.4G test file
11601 ,#~(list (string-append "SAMTOOLS_DIR="
11602 #$(this-package-input "samtools")
11605 (modify-phases %standard-phases
11606 (replace 'configure
11607 (lambda* (#:key inputs #:allow-other-keys)
11608 (substitute* "makefile"
11609 (("-I \\$\\{SAMTOOLS_DIR\\}")
11610 (string-append "-I" (assoc-ref inputs "samtools")
11611 "/include/samtools"))
11612 (("-lz ") "-lz -lpthread "))))
11614 (lambda* (#:key outputs #:allow-other-keys)
11615 (for-each (lambda (tool)
11617 (string-append (assoc-ref outputs "out")
11619 '("j_count" "b_count" "sjcount")))))))
11621 (list samtools-0.1 zlib))
11622 (home-page "https://github.com/pervouchine/sjcount-full/")
11623 (synopsis "Annotation-agnostic splice junction counting pipeline")
11624 (description "Sjcount is a utility for fast quantification of splice
11625 junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
11626 version does count multisplits.")
11627 (license license:gpl3+))))
11629 (define-public minimap2
11636 (uri (string-append "https://github.com/lh3/minimap2/"
11637 "releases/download/v" version "/"
11638 "minimap2-" version ".tar.bz2"))
11641 "05d6h2c1h95s5vblf1fijn9g0r4g69nsvkabji42j642y0gw7m4x"))))
11642 (build-system gnu-build-system)
11644 `(#:tests? #f ; there are none
11645 #:modules ((guix build utils)
11646 (guix build gnu-build-system)
11649 (list (string-append "CC=" ,(cc-for-target))
11650 (let ((system ,(or (%current-target-system)
11651 (%current-system))))
11653 ((string-prefix? "x86_64" system)
11655 ((or (string-prefix? "i586" system)
11656 (string-prefix? "i686" system))
11658 ((string-prefix? "armhf" system)
11660 ((string-prefix? "aarch64" system)
11664 (modify-phases %standard-phases
11665 (delete 'configure)
11667 (lambda* (#:key outputs #:allow-other-keys)
11668 (let* ((out (assoc-ref outputs "out"))
11669 (bin (string-append out "/bin"))
11670 (lib (string-append out "/lib"))
11671 (inc (string-append out "/include"))
11672 (man (string-append out "/share/man/man1")))
11673 (install-file "minimap2" bin)
11674 (install-file "libminimap2.a" lib)
11675 (install-file "minimap2.1" man)
11676 (map (cut install-file <> inc)
11677 (find-files "." "\\.h$"))
11679 (delete-file (string-append inc "/emmintrin.h"))
11680 (mkdir-p (string-append lib "/pkgconfig"))
11681 (with-output-to-file (string-append lib "/pkgconfig/minimap2.pc")
11683 (format #t "prefix=~a~@
11684 exec_prefix=${prefix}~@
11685 libdir=${exec_prefix}/lib~@
11686 includedir=${prefix}/include~@
11688 Name: libminimap2~@
11690 Description: A versatile pairwise aligner for genomic and spliced nucleotide sequence~@
11691 Libs: -L${libdir} -lminimap2~@
11692 Cflags: -I${includedir}~%"
11697 (home-page "https://lh3.github.io/minimap2/")
11698 (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
11699 (description "Minimap2 is a versatile sequence alignment program that
11700 aligns DNA or mRNA sequences against a large reference database. Typical use
11704 @item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
11705 @item finding overlaps between long reads with error rate up to ~15%;
11706 @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
11707 reads against a reference genome;
11708 @item aligning Illumina single- or paired-end reads;
11709 @item assembly-to-assembly alignment;
11710 @item full-genome alignment between two closely related species with
11711 divergence below ~15%.
11713 (license license:expat)))
11715 (define-public python-mappy
11717 (name "python-mappy")
11721 (uri (pypi-uri "mappy" version))
11724 "1ycszza87p9qvx8mis9v1hry0ac465x1xcxbsn1k45qlxxrzp8im"))))
11725 (build-system python-build-system)
11727 (list python-cython))
11730 (home-page "https://github.com/lh3/minimap2")
11731 (synopsis "Python binding for minimap2")
11732 (description "This package provides a convenient interface to minimap2,
11733 a fast and accurate C program to align genomic and transcribe nucleotide
11735 (license license:expat)))
11737 (define-public miniasm
11743 (uri (git-reference
11744 (url "https://github.com/lh3/miniasm")
11745 (commit (string-append "v" version))))
11746 (file-name (git-file-name name version))
11749 "04dv5wv8bhsw1imxwyd438bnn9kby7svp44nbcz8lsadzjjci5gs"))))
11750 (build-system gnu-build-system)
11754 `(#:tests? #f ; There are no tests.
11756 (modify-phases %standard-phases
11757 (delete 'configure)
11759 (lambda* (#:key inputs outputs #:allow-other-keys)
11760 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
11761 (install-file "miniasm" bin)
11762 (install-file "minidot" bin)
11764 (home-page "https://github.com/lh3/miniasm")
11765 (synopsis "Ultrafast de novo assembly for long noisy reads")
11766 (description "Miniasm is a very fast OLC-based de novo assembler for noisy
11767 long reads. It takes all-vs-all read self-mappings (typically by minimap) as
11768 input and outputs an assembly graph in the GFA format. Different from
11769 mainstream assemblers, miniasm does not have a consensus step. It simply
11770 concatenates pieces of read sequences to generate the final unitig sequences.
11771 Thus the per-base error rate is similar to the raw input reads.")
11772 (license license:expat)))
11774 (define-public bandage
11781 (uri (git-reference
11782 (url "https://github.com/rrwick/Bandage")
11783 (commit (string-append "v" version))))
11784 (file-name (git-file-name name version))
11786 (base32 "1bbsn5f5x8wlspg4pbibqz6m5vin8c19nl224f3z3km0pkc97rwv"))))
11787 (build-system qt-build-system)
11790 (modify-phases %standard-phases
11791 (replace 'configure
11793 (invoke "qmake" "Bandage.pro")))
11795 (lambda* (#:key tests? #:allow-other-keys)
11797 (substitute* "tests/bandage_command_line_tests.sh"
11798 (("^bandagepath=.*")
11799 (string-append "bandagepath=" (getcwd) "/Bandage\n")))
11800 (with-directory-excursion "tests"
11801 (setenv "XDG_RUNTIME_DIR" (getcwd))
11802 (invoke "./bandage_command_line_tests.sh")))
11805 (lambda* (#:key outputs #:allow-other-keys)
11806 (let ((out (assoc-ref outputs "out")))
11807 (install-file "Bandage" (string-append out "/bin"))
11810 (list qtbase-5 qtsvg-5))
11812 (list imagemagick))
11813 (home-page "https://rrwick.github.io/Bandage/")
11815 "Bioinformatics Application for Navigating De novo Assembly Graphs Easily")
11816 (description "Bandage is a program for visualising de novo assembly graphs.
11817 It allows users to interact with the assembly graphs made by de novo assemblers
11818 such as Velvet, SPAdes, MEGAHIT and others. De novo assembly graphs contain not
11819 only assembled contigs but also the connections between those contigs, which
11820 were previously not easily accessible. Bandage visualises assembly graphs, with
11821 connections, using graph layout algorithms. Nodes in the drawn graph, which
11822 represent contigs, can be automatically labelled with their ID, length or depth.
11823 Users can interact with the graph by moving, labelling and colouring nodes.
11824 Sequence information can also be extracted directly from the graph viewer. By
11825 displaying connections between contigs, Bandage opens up new possibilities for
11826 analysing and improving de novo assemblies that are not possible by looking at
11828 (license (list license:gpl2+ ; bundled ogdf
11831 (define-public libmaus2
11834 (version "2.0.786")
11837 (uri (git-reference
11838 (url "https://gitlab.com/german.tischler/libmaus2")
11839 (commit (string-append version "-release-20210531143054"))))
11840 (file-name (git-file-name name version))
11843 "1rxakmwjcx2yq5sjh3v849f7dfw4xzc2fyzf6s28s3p95z84w564"))))
11844 (build-system gnu-build-system)
11845 ;; The test suite attempts to execute ../test-driver, which does not exist.
11846 (arguments '(#:tests? #false))
11851 (home-page "https://gitlab.com/german.tischler/libmaus2")
11852 (synopsis "Collection of data structures and algorithms useful for bioinformatics")
11853 (description "libmaus2 is a collection of data structures and
11854 algorithms. It contains:
11857 @item I/O classes (single byte and UTF-8);
11858 @item @code{bitio} classes (input, output and various forms of bit level
11860 @item text indexing classes (suffix and LCP array, fulltext and minute (FM),
11862 @item BAM sequence alignment files input/output (simple and collating);
11863 and many lower level support classes.
11865 ;; The code is explicitly available under the terms of either GPLv2 or
11866 ;; GPLv3 according to the AUTHORS file, though most files have a GPLv3+
11868 (license (list license:gpl2+ license:gpl3+))))
11870 (define-public biobambam2
11872 (name "biobambam2")
11873 (version "2.0.182")
11876 (uri (git-reference
11877 (url "https://gitlab.com/german.tischler/biobambam2")
11878 (commit (string-append version "-release-20210412001032"))))
11879 (file-name (git-file-name name version))
11882 "0b7w7a2a7hpkgrdn0n7hy4pilzrj82zqrh7q4bg1l0cd6bqr60m5"))))
11883 (build-system gnu-build-system)
11885 ;; The test suite attempts to execute ../test-driver, which does not exist.
11888 ,#~(list (string-append "--with-libmaus2="
11889 #$(this-package-input "libmaus2")))))
11891 (list libmaus2 xerces-c))
11894 (home-page "https://gitlab.com/german.tischler/biobambam2")
11895 (synopsis "Tools for processing BAM files")
11896 (description "This package contains some tools for processing BAM files
11900 @item bamsormadup: parallel sorting and duplicate marking
11901 @item bamcollate2: reads BAM and writes BAM reordered such that alignment or
11902 collated by query name
11903 @item bammarkduplicates: reads BAM and writes BAM with duplicate alignments
11904 marked using the BAM flags field
11905 @item bammaskflags: reads BAM and writes BAM while masking (removing) bits
11906 from the flags column
11907 @item bamrecompress: reads BAM and writes BAM with a defined compression
11908 setting. This tool is capable of multi-threading.
11909 @item bamsort: reads BAM and writes BAM resorted by coordinates or query name
11910 @item bamtofastq: reads BAM and writes FastQ; output can be collated or
11911 uncollated by query name.
11914 ;; The COPYING file states that the code is distributed under version 3 of
11915 ;; the GPL, but the license headers include the "or later" clause.
11916 (license license:gpl3+)))
11918 (define-public r-dyngen
11925 (uri (cran-uri "dyngen" version))
11928 "1qmqy0dyiz30zpf3ii4h2ip6hg2449ghb474sjzrqa1yk9mdpy4i"))))
11929 (properties `((upstream-name . "dyngen")))
11930 (build-system r-build-system)
11950 (home-page "https://github.com/dynverse/dyngen")
11951 (synopsis "Multi-Modal simulator for single-cell omics analyses")
11953 "This package provides a multi-modal simulation engine for studying
11954 dynamic cellular processes at single-cell resolution.")
11955 (license license:expat)))
11957 ;; Needed for r-liana
11958 (define-public r-omnipathr/devel
11959 (let ((commit "679bb79e319af246a16968d27d64d8d6937a331a")
11962 (name "r-omnipathr")
11963 (version (git-version "3.5.5" revision commit))
11966 (uri (git-reference
11967 (url "https://github.com/saezlab/omnipathr")
11969 (file-name (git-file-name name version))
11972 "10h6lyapyx4ik8r4kx5z2dly46jlf2v57caq4g6i0hzifyz2vgjq"))))
11973 (properties `((upstream-name . "OmnipathR")))
11974 (build-system r-build-system)
11977 (modify-phases %standard-phases
11978 (add-after 'unpack 'set-HOME
11979 (lambda _ (setenv "HOME" "/tmp"))))))
12006 (native-inputs (list r-knitr))
12007 (home-page "https://github.com/saezlab/omnipathr")
12008 (synopsis "OmniPath web service client and more")
12010 "This package provides a client for the OmniPath web service and many
12011 other resources. It also includes functions to transform and pretty print
12012 some of the downloaded data, functions to access a number of other resources
12013 such as BioPlex, ConsensusPathDB, EVEX, Gene Ontology, Guide to
12014 Pharmacology (IUPHAR/BPS), Harmonizome, HTRIdb, Human Phenotype Ontology,
12015 InWeb InBioMap, KEGG Pathway, Pathway Commons, Ramilowski et al. 2015,
12016 RegNetwork, ReMap, TF census, TRRUST and Vinayagam et al. 2011. Furthermore,
12017 OmnipathR features a close integration with the NicheNet method for ligand
12018 activity prediction from transcriptomics data, and its R implementation
12019 @code{nichenetr}.")
12020 (license license:expat))))
12022 (define-public r-liana
12023 (let ((commit "efb1249af46f576d1d620956053cfa93b2cee961")
12027 (version (git-version "0.1.5" revision commit))
12030 (uri (git-reference
12031 (url "https://github.com/saezlab/liana/")
12033 (file-name (git-file-name name version))
12036 "0z645k26kqrfj5f1s412vwclw1q47h1zfxxrh9ijr30pxhpv6cv0"))))
12037 (properties `((upstream-name . "liana")))
12038 (build-system r-build-system)
12042 `(modify-phases %standard-phases
12043 ;; This is needed to find ~/.config/OmnipathR/omnipathr.yml
12044 (add-after 'unpack 'set-HOME
12045 (lambda _ (setenv "HOME" "/tmp"))))))
12047 (list r-complexheatmap
12061 r-singlecellexperiment
12066 (native-inputs (list r-knitr))
12067 (home-page "https://github.com/saezlab/liana/")
12068 (synopsis "LIANA: a LIgand-receptor ANalysis frAmework")
12070 "LIANA provides a number of methods and resource for ligand-receptor
12071 interaction inference from scRNA-seq data.")
12072 (license license:gpl3))))
12074 (define-public r-circus
12081 (uri (git-reference
12082 (url "https://github.com/BIMSBbioinfo/ciRcus")
12083 (commit (string-append "v" version))))
12084 (file-name (git-file-name name version))
12087 "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
12088 (build-system r-build-system)
12090 (list r-annotationdbi
12104 r-summarizedexperiment))
12107 (home-page "https://github.com/BIMSBbioinfo/ciRcus")
12108 (synopsis "Annotation, analysis and visualization of circRNA data")
12109 (description "Circus is an R package for annotation, analysis and
12110 visualization of circRNA data. Users can annotate their circRNA candidates
12111 with host genes, gene featrues they are spliced from, and discriminate between
12112 known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
12113 can be calculated, and a number of descriptive plots easily generated.")
12114 (license license:artistic2.0)))
12116 (define-public r-doubletfinder
12117 (let ((commit "554097ba4e2c0ed7c28dc7f0b5b75277f3a50551")
12120 (name "r-doubletfinder")
12121 (version (git-version "2.0.3" revision commit))
12125 (uri (git-reference
12126 (url "https://github.com/chris-mcginnis-ucsf/DoubletFinder")
12128 (file-name (git-file-name name version))
12130 (base32 "1q1pnqw7ry4syp04wjmvz5bws6z4vg4c340ky07lk0vp577x2773"))))
12131 (properties `((upstream-name . "DoubletFinder")))
12132 (build-system r-build-system)
12133 (propagated-inputs (list r-fields r-kernsmooth r-rocr))
12134 (home-page "https://github.com/chris-mcginnis-ucsf/DoubletFinder")
12135 (synopsis "Identify doublets in single-cell RNA sequencing data")
12137 "DoubletFinder identifies doublets by generating artificial doublets
12138 from existing scRNA-seq data and defining which real cells preferentially
12139 co-localize with artificial doublets in gene expression space. Other
12140 DoubletFinder package functions are used for fitting DoubletFinder to
12141 different scRNA-seq datasets. For example, ideal DoubletFinder performance in
12142 real-world contexts requires optimal pK selection and homotypic doublet
12143 proportion estimation. pK selection is achieved using pN-pK parameter sweeps
12144 and maxima identification in mean-variance-normalized bimodality coefficient
12145 distributions. Homotypic doublet proportion estimation is achieved by finding
12146 the sum of squared cell annotation frequencies.")
12147 (license license:cc0))))
12149 ;; There have been no releases.
12150 (define-public r-cytobackbone
12151 (let ((commit "4c1a0a35cc5ae1f8f516127cec92351d96fe26e7")
12154 (name "r-cytobackbone")
12155 (version (git-version "1.0.0" revision commit))
12158 (uri (git-reference
12159 (url "https://github.com/tchitchek-lab/CytoBackBone")
12161 (file-name (git-file-name name version))
12164 "0ahiad14zcgdk42xzw5xryic2ibn2l8lkrcdvl2b5sz2js028yb3"))))
12165 (properties `((upstream-name . "CytoBackBone")))
12166 (build-system r-build-system)
12173 (native-inputs (list r-knitr))
12174 (home-page "https://github.com/tchitchek-lab/CytoBackBone")
12175 (synopsis "Merge phenotype information from different cytometric profiles")
12177 "This package implements an algorithm which increases the number of
12178 simultaneously measurable markers and in this way helps with study of the
12179 immune responses. Thus, the present algorithm, named @code{CytoBackBone},
12180 allows combining phenotypic information of cells from different cytometric
12181 profiles obtained from different cytometry panels. This computational
12182 approach is based on the principle that each cell has its own phenotypic and
12183 functional characteristics that can be used as an identification card.
12184 @code{CytoBackBone} uses a set of predefined markers, that we call the
12185 backbone, to define this identification card. The phenotypic information of
12186 cells with similar identification cards in the different cytometric profiles
12188 (license license:gpl2))))
12190 (define-public r-giotto
12191 (let ((commit "68d7390dce87223cac11d4d8f31705fe0144d011")
12195 (version (git-version "1.1.1" revision commit))
12199 (uri (git-reference
12200 (url "https://github.com/RubD/Giotto/")
12202 (file-name (git-file-name name version))
12204 (base32 "0mv60khc05wrxzr4ir6cirn7dpqvgwan5hm00lmafsyalr51nf5i"))))
12205 (properties `((upstream-name . "Giotto")))
12206 (build-system r-build-system)
12240 (native-inputs (list r-knitr))
12241 (home-page "https://github.com/RubD/Giotto/")
12242 (synopsis "Spatial single-cell transcriptomics toolbox")
12244 "This package provides a toolbox to process, analyze and visualize
12245 spatial single-cell expression data.")
12246 (license license:expat))))
12248 (define-public gffread
12249 ;; We cannot use the tagged release because it is not in sync with gclib.
12250 ;; See https://github.com/gpertea/gffread/issues/26
12251 (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
12255 (version (git-version "0.9.12" revision commit))
12259 (uri (git-reference
12260 (url "https://github.com/gpertea/gffread")
12262 (file-name (git-file-name name version))
12265 "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
12266 (build-system gnu-build-system)
12268 `(#:tests? #f ; no check target
12270 (list "GCLDIR=gclib")
12272 (modify-phases %standard-phases
12273 (delete 'configure)
12274 (add-after 'unpack 'copy-gclib-source
12275 (lambda* (#:key inputs #:allow-other-keys)
12277 (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
12279 ;; There is no install target
12281 (lambda* (#:key outputs #:allow-other-keys)
12282 (let* ((out (assoc-ref outputs "out"))
12283 (bin (string-append out "/bin")))
12284 (install-file "gffread" bin))
12288 ,(let ((version "0.10.3")
12289 (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
12293 (uri (git-reference
12294 (url "https://github.com/gpertea/gclib")
12296 (file-name (git-file-name "gclib" version))
12299 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
12300 (home-page "https://github.com/gpertea/gffread/")
12301 (synopsis "Parse and convert GFF/GTF files")
12303 "This package provides a GFF/GTF file parsing utility providing format
12304 conversions, region filtering, FASTA sequence extraction and more.")
12305 ;; gffread is under Expat, but gclib is under Artistic 2.0
12306 (license (list license:expat
12307 license:artistic2.0)))))
12309 (define-public fit-sne
12316 (uri (git-reference
12317 (url "https://github.com/KlugerLab/FIt-SNE")
12318 (commit (string-append "v" version))))
12319 (file-name (git-file-name name version))
12322 "1imq4577awc226wvygf94kpz156qdfw8xl0w0f7ss4w10lhmpmf5"))))
12323 (build-system gnu-build-system)
12325 `(#:tests? #false ; there are none
12327 ;; There is no build system.
12328 (modify-phases %standard-phases
12329 (delete 'configure)
12332 (invoke "g++" "-std=c++11" "-O3"
12336 "-o" "bin/fast_tsne"
12337 "-pthread" "-lfftw3" "-lm"
12338 "-Wno-address-of-packed-member")))
12340 (lambda* (#:key outputs #:allow-other-keys)
12341 (let* ((out (assoc-ref outputs "out"))
12342 (bin (string-append out "/bin"))
12343 (share (string-append out "/share/fit-sne")))
12344 (for-each (lambda (file) (install-file file bin))
12345 (find-files "bin"))
12347 (substitute* "fast_tsne.R"
12348 (("^FAST_TSNE_SCRIPT_DIR.*")
12349 (string-append "FAST_TSNE_SCRIPT_DIR = \"" out "\"\n")))
12350 (install-file "fast_tsne.R" share)))))))
12353 (home-page "https://github.com/KlugerLab/FIt-SNE")
12354 (synopsis "Fast Fourier Transform-accelerated interpolation-based t-SNE")
12355 (description "@dfn{t-Stochastic Neighborhood Embedding} (t-SNE) is a
12356 method for dimensionality reduction and visualization of high dimensional
12357 datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
12358 approximate the gradient at each iteration of gradient descent. This
12359 implementation differs in these ways:
12362 @item Instead of approximating the N-body simulation using Barnes-Hut, we
12363 interpolate onto an equispaced grid and use FFT to perform the convolution.
12364 @item Instead of computing nearest neighbors using vantage-point trees, we
12365 approximate nearest neighbors using the Annoy library. The neighbor lookups
12366 are multithreaded to take advantage of machines with multiple cores.
12369 ;; See LICENSE.txt for details on what license applies to what files.
12370 (license (list license:bsd-4 license:expat license:asl2.0))))
12372 (define-public python-scanpy
12374 (name "python-scanpy")
12379 (uri (git-reference
12380 (url "https://github.com/theislab/scanpy")
12382 (file-name (git-file-name name version))
12385 "0k524xnx3dvpz5yx65p316wghvi01zs17is8w2m3w2qywiswk0sl"))))
12386 (build-system python-build-system)
12389 (modify-phases %standard-phases
12392 (setenv "SETUPTOOLS_SCM_PRETEND_VERSION" ,version)
12393 ;; ZIP does not support timestamps before 1980.
12394 (setenv "SOURCE_DATE_EPOCH" "315532800")
12395 (invoke "flit" "build")))
12397 (lambda* (#:key inputs outputs #:allow-other-keys)
12398 (add-installed-pythonpath inputs outputs)
12399 (let ((out (assoc-ref outputs "out")))
12400 (for-each (lambda (wheel)
12401 (format #true wheel)
12402 (invoke "python" "-m" "pip" "install"
12403 wheel (string-append "--prefix=" out)))
12404 (find-files "dist" "\\.whl$")))))
12405 ;; Numba needs a writable dir to cache functions.
12406 (add-before 'check 'set-numba-cache-dir
12408 (setenv "NUMBA_CACHE_DIR" "/tmp")))
12410 (lambda* (#:key tests? inputs #:allow-other-keys)
12412 ;; These tests require Internet access.
12413 (delete-file-recursively "scanpy/tests/notebooks")
12414 (delete-file "scanpy/tests/test_clustering.py")
12415 (delete-file "scanpy/tests/test_datasets.py")
12416 (delete-file "scanpy/tests/test_normalization.py")
12417 (delete-file "scanpy/tests/test_score_genes.py")
12418 (delete-file "scanpy/tests/test_highly_variable_genes.py")
12420 ;; TODO: I can't get the plotting tests to work, even with Xvfb.
12421 (delete-file "scanpy/tests/test_embedding_plots.py")
12422 (delete-file "scanpy/tests/test_preprocessing.py")
12423 (delete-file "scanpy/tests/test_read_10x.py")
12425 ;; These two fail with "ValueError: I/O operation on closed file."
12426 (delete-file "scanpy/tests/test_neighbors_key_added.py")
12428 ;; TODO: these fail with TypingError and "Use of unsupported
12429 ;; NumPy function 'numpy.split'".
12430 (delete-file "scanpy/tests/test_metrics.py")
12432 ;; The following tests requires 'scanorama', which isn't
12434 (delete-file "scanpy/tests/external/test_scanorama_integrate.py")
12436 (setenv "PYTHONPATH"
12437 (string-append (getcwd) ":"
12438 (assoc-ref inputs "python-anndata:source") ":"
12439 (getenv "GUIX_PYTHONPATH")))
12440 (invoke "pytest" "-vv"
12442 ;; Plot tests that fail.
12443 (string-append "not test_dotplot_matrixplot_stacked_violin"
12444 " and not test_violin_without_raw"
12445 " and not test_correlation"
12446 " and not test_scatterplots"
12447 " and not test_scatter_embedding_add_outline_vmin_vmax_norm"
12448 " and not test_paga"
12449 " and not test_paga_compare"
12450 " and not test_clustermap"
12452 ;; These try to connect to the network
12453 " and not test_scrublet_plots"
12454 " and not test_plot_rank_genes_groups_gene_symbols"
12455 " and not test_pca_n_pcs"
12456 " and not test_pca_chunked"
12457 " and not test_pca_sparse"
12458 " and not test_pca_reproducible"))))))))
12460 (list python-anndata
12465 python-legacy-api-wrap
12474 python-scikit-learn
12477 python-session-info
12483 python-umap-learn))
12485 `(;; This package needs anndata.tests, which is not installed.
12486 ("python-anndata:source" ,(package-source python-anndata))
12487 ("python-flit" ,python-flit)
12488 ("python-leidenalg" ,python-leidenalg)
12489 ("python-pytest" ,python-pytest)
12490 ("python-setuptools-scm" ,python-setuptools-scm)))
12491 (home-page "https://github.com/theislab/scanpy")
12492 (synopsis "Single-Cell Analysis in Python")
12493 (description "Scanpy is a scalable toolkit for analyzing single-cell gene
12494 expression data. It includes preprocessing, visualization, clustering,
12495 pseudotime and trajectory inference and differential expression testing. The
12496 Python-based implementation efficiently deals with datasets of more than one
12498 (license license:bsd-3)))
12500 (define-public python-bbknn
12502 (name "python-bbknn")
12507 (uri (pypi-uri "bbknn" version))
12510 "0q11xdmjr2kf6f179a6kjizj3lllfrq743gslgw67qyzimvrrnhn"))))
12511 (build-system python-build-system)
12513 `(#:tests? #f ; no tests are included
12515 (modify-phases %standard-phases
12516 ;; Numba needs a writable dir to cache functions.
12517 (add-before 'check 'set-numba-cache-dir
12519 (setenv "NUMBA_CACHE_DIR" "/tmp")))
12520 (add-after 'unpack 'do-not-fail-to-find-sklearn
12522 ;; XXX: I have no idea why it cannot seem to find sklearn.
12523 (substitute* "setup.py"
12524 (("'sklearn'") "")))))))
12530 python-scikit-learn
12532 python-umap-learn))
12533 (home-page "https://github.com/Teichlab/bbknn")
12534 (synopsis "Batch balanced KNN")
12535 (description "BBKNN is a batch effect removal tool that can be directly
12536 used in the Scanpy workflow. It serves as an alternative to
12537 @code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour
12538 graph for subsequent use in clustering, pseudotime and UMAP visualisation. If
12539 technical artifacts are present in the data, they will make it challenging to
12540 link corresponding cell types across different batches. BBKNN actively
12541 combats this effect by splitting your data into batches and finding a smaller
12542 number of neighbours for each cell within each of the groups. This helps
12543 create connections between analogous cells in different batches without
12544 altering the counts or PCA space.")
12545 (license license:expat)))
12547 (define-public python-drep
12549 (name "python-drep")
12554 (uri (pypi-uri "drep" version))
12557 "08vk0x6v5c5n7afgd5pcjhsvb424absypxy22hw1cm1n9kirbi77"))))
12558 (build-system python-build-system)
12560 (list python-biopython
12565 python-scikit-learn
12568 (home-page "https://github.com/MrOlm/drep")
12569 (synopsis "De-replication of microbial genomes assembled from multiple samples")
12571 "dRep is a Python program for rapidly comparing large numbers of genomes.
12572 dRep can also \"de-replicate\" a genome set by identifying groups of highly
12573 similar genomes and choosing the best representative genome for each genome
12575 (license license:expat)))
12577 (define-public instrain
12584 (uri (pypi-uri "inStrain" version))
12587 "05w1lw75x4lwkzg4qpi055g7hdjp9rnc4ksbxg2hfgksq9djk0hx"))))
12588 (build-system python-build-system)
12591 (modify-phases %standard-phases
12592 (add-after 'unpack 'patch-relative-imports
12594 (substitute* "docker/run_instrain.py"
12597 (("from job_utils")
12598 "from .job_utils")))))))
12600 (list python-biopython-1.73
12611 python-scikit-learn
12614 ;; drep is needed for deprecated plot utilities
12617 (list python-pytest))
12618 (home-page "https://github.com/MrOlm/inStrain")
12619 (synopsis "Calculation of strain-level metrics")
12621 "inStrain is a Python program for analysis of co-occurring genome
12622 populations from metagenomes that allows highly accurate genome comparisons,
12623 analysis of coverage, microdiversity, and linkage, and sensitive SNP detection
12624 with gene localization and synonymous non-synonymous identification.")
12625 ;; The tool itself says that the license is "MIT", but the repository
12626 ;; contains a LICENSE file with the GPLv3.
12627 ;; See https://github.com/MrOlm/inStrain/issues/51
12628 (license license:expat)))
12630 (define-public gffcompare
12631 (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
12634 (name "gffcompare")
12635 (version (git-version "0.10.15" revision commit))
12639 (uri (git-reference
12640 (url "https://github.com/gpertea/gffcompare/")
12642 (file-name (git-file-name name version))
12644 (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
12645 (build-system gnu-build-system)
12647 `(#:tests? #f ; no check target
12649 (modify-phases %standard-phases
12650 (delete 'configure)
12651 (add-before 'build 'copy-gclib-source
12652 (lambda* (#:key inputs #:allow-other-keys)
12655 (assoc-ref inputs "gclib-source") "../gclib")
12658 (lambda* (#:key outputs #:allow-other-keys)
12659 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
12660 (install-file "gffcompare" bin)
12663 `(("gclib-source" ; see 'README.md' of gffcompare
12664 ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
12667 (version (git-version "0.10.3" revision commit)))
12670 (uri (git-reference
12671 (url "https://github.com/gpertea/gclib/")
12673 (file-name (git-file-name name version))
12675 (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
12676 (home-page "https://github.com/gpertea/gffcompare/")
12677 (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
12679 "@code{gffcompare} is a tool that can:
12681 @item compare and evaluate the accuracy of RNA-Seq transcript assemblers
12682 (Cufflinks, Stringtie);
12683 @item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
12684 resulted from assembly of different samples);
12685 @item classify transcripts from one or multiple GTF/GFF3 files as they relate to
12686 reference transcripts provided in a annotation file (also in GTF/GFF3 format).
12690 license:expat ;license for gffcompare
12691 license:artistic2.0))))) ;license for gclib
12693 (define-public intervaltree
12694 (let ((commit "b90527f9e6d51cd36ecbb50429e4524d3a418ea5"))
12696 (name "intervaltree")
12697 (version (git-version "0.0.0" "1" commit))
12701 (uri (git-reference
12702 (url "https://github.com/ekg/intervaltree/")
12704 (file-name (git-file-name name version))
12706 (base32 "0rgv6q5fl4x5d74n6p5wvdna6zmbdbqpb4jqqh6vq3670gn08xad"))))
12707 (build-system gnu-build-system)
12709 `(#:tests? #f ; No tests.
12711 ,#~(list (string-append "PREFIX=" #$output) "DESTDIR=\"\"")
12713 (modify-phases %standard-phases
12714 (delete 'configure)))) ; There is no configure phase.
12715 (home-page "https://github.com/ekg/intervaltree")
12716 (synopsis "Minimal C++ interval tree implementation")
12717 (description "An interval tree can be used to efficiently find a set of
12718 numeric intervals overlapping or containing another interval. This library
12719 provides a basic implementation of an interval tree using C++ templates,
12720 allowing the insertion of arbitrary types into the tree.")
12721 (license license:expat))))
12723 (define-public python-intervaltree
12725 (name "python-intervaltree")
12730 (uri (pypi-uri "intervaltree" version))
12733 "0bcm6c6r4ck9nfj9xwz4rm2swc5lrjvmw3lyl6rgj639jf41nawh"))))
12734 (build-system python-build-system)
12737 (modify-phases %standard-phases
12738 ;; pytest seems to have a check to make sure the user is testing
12739 ;; their checked-out code and not an installed, potentially
12740 ;; out-of-date copy. This is harmless here, since we just installed
12741 ;; the package, so we disable the check to avoid skipping tests
12743 (add-before 'check 'import-mismatch-error-workaround
12745 (setenv "PY_IGNORE_IMPORTMISMATCH" "1")
12748 (list python-sortedcontainers))
12750 (list python-pytest))
12751 (home-page "https://github.com/chaimleib/intervaltree")
12752 (synopsis "Editable interval tree data structure")
12754 "This package provides a mutable, self-balancing interval tree
12755 implementation for Python. Queries may be by point, by range overlap, or by
12756 range envelopment. This library was designed to allow tagging text and time
12757 intervals, where the intervals include the lower bound but not the upper
12759 (license license:asl2.0)))
12761 (define-public python-pypairix
12763 (name "python-pypairix")
12765 ;; The tarball on pypi does not include the makefile to build the
12770 (uri (git-reference
12771 (url "https://github.com/4dn-dcic/pairix")
12773 (file-name (git-file-name name version))
12776 "1snr3lrmsld8sy77ng6ba6wcmd33xjccf1l2f3m6pi29xis9nd6p"))))
12777 (build-system python-build-system)
12780 (modify-phases %standard-phases
12781 (add-before 'build 'build-programs
12782 (lambda _ (invoke "make")))
12783 (add-after 'install 'install-programs
12784 (lambda* (#:key outputs #:allow-other-keys)
12785 (copy-recursively "bin" (string-append
12786 (assoc-ref outputs "out")
12791 (home-page "https://github.com/4dn-dcic/pairix")
12792 (synopsis "Support for querying pairix-indexed bgzipped text files")
12794 "Pypairix is a Python module for fast querying on a pairix-indexed
12795 bgzipped text file that contains a pair of genomic coordinates per line.")
12796 (license license:expat)))
12798 (define-public python-pyfaidx
12800 (name "python-pyfaidx")
12805 (uri (pypi-uri "pyfaidx" version))
12808 "038xi3a6zvrxbyyfpp64ka8pcjgsdq4fgw9cl5lpxbvmm1bzzw2q"))))
12809 (build-system python-build-system)
12812 (home-page "http://mattshirley.com")
12813 (synopsis "Random access to fasta subsequences")
12815 "This package provides procedures for efficient pythonic random access to
12816 fasta subsequences.")
12817 (license license:bsd-3)))
12819 (define-public python-cooler
12821 (name "python-cooler")
12826 (uri (pypi-uri "cooler" version))
12829 "1i96fmpsimj4wrx51rxn8lw2gqxf5a2pvrj5rwdd6ivnm3pmhyrn"))))
12830 (build-system python-build-system)
12833 (modify-phases %standard-phases
12834 ;; cooler requests cytoolz<0.11. It only uses cytoolz for "compose",
12835 ;; which composes two functions.
12836 (add-after 'unpack 'use-recent-cytoolz
12838 (substitute* '("requirements.txt"
12839 "cooler.egg-info/requires.txt")
12840 (("cytoolz.*<.*0.11") "cytoolz"))))
12841 ;; This version of flake8 just won't work with this version of
12842 ;; pytest, because of dependency pinning.
12843 (add-after 'unpack 'do-not-use-flake8
12845 (substitute* "setup.cfg"
12846 (("addopts = --flake8") "addopts = "))))
12847 (add-after 'unpack 'patch-tests
12849 (substitute* "tests/test_create.py"
12850 (("def test_roundtrip")
12851 (string-append "@pytest.mark.skip(reason=\"requires network "
12852 "access to genome.ucsc.edu\")\n"
12853 "def test_roundtrip")))
12854 (substitute* "tests/test_util.py"
12855 (("def test_fetch_chromsizes")
12856 (string-append "@pytest.mark.skip(reason=\"requires network "
12857 "access to genome.ucsc.edu\")\n"
12858 "def test_fetch_chromsizes"))
12859 ;; See https://github.com/open2c/cooler/issues/287
12860 (("skipif\\(six.PY2, reason=\"Scipy on Py2 is too old\"")
12861 "skip(reason=\"Scipy is too new\""))
12862 ;; This test depends on ipytree, which contains a lot of minified
12864 (substitute* "tests/test_fileops.py"
12865 (("def test_print_trees")
12866 "def _test_print_trees"))))
12868 (lambda* (#:key tests? #:allow-other-keys)
12870 (invoke "python" "-m" "pytest" "-v")))))))
12872 (list python-asciitree
12878 python-multiprocess
12890 (list python-codecov python-mock python-pytest python-pytest-cov
12891 python-pytest-flake8))
12892 ;; Almost all the projects of the Mirnylab are moved under Open2C umbrella
12893 (home-page "https://github.com/open2c/cooler")
12894 (synopsis "Sparse binary format for genomic interaction matrices")
12896 "Cooler is a support library for a sparse, compressed, binary persistent
12897 storage format, called @code{cool}, used to store genomic interaction data,
12898 such as Hi-C contact matrices.")
12899 (license license:bsd-3)))
12901 (define-public python-hicmatrix
12903 (name "python-hicmatrix")
12907 ;;Pypi sources do not contain any test
12909 (uri (git-reference
12910 (url "https://github.com/deeptools/HiCMatrix")
12912 (file-name (git-file-name name version))
12915 "1dshjxgb16sdfg9k1bhw2yhyngac04k4ca7aqy8g3i3pprr068r5"))))
12916 (build-system python-build-system)
12919 (modify-phases %standard-phases
12921 (lambda* (#:key tests? #:allow-other-keys)
12923 (invoke "python" "-m" "pytest" "-v")))))))
12925 (list python-cooler
12926 python-intervaltree
12932 (list python-pytest))
12933 (home-page "https://github.com/deeptools/HiCMatrix/")
12934 (synopsis "HiCMatrix class for HiCExplorer and pyGenomeTracks")
12936 "This helper package implements the @code{HiCMatrix} class for
12937 the HiCExplorer and pyGenomeTracks packages.")
12938 (license license:gpl3+)))
12940 (define-public python-hicexplorer
12942 (name "python-hicexplorer")
12946 ;; The latest version is not available on Pypi.
12948 (uri (git-reference
12949 (url "https://github.com/deeptools/HiCExplorer")
12951 (file-name (git-file-name name version))
12954 "0q5gpbzmrkvygqgw524q36b4nrivcmyi5v194vsx0qw7b3gcmq08"))))
12955 (build-system python-build-system)
12958 (modify-phases %standard-phases
12959 (add-after 'unpack 'loosen-up-requirements
12961 (substitute* "setup.py"
12965 (list python-biopython
12966 python-configparser
12969 python-intervaltree
12980 (home-page "https://hicexplorer.readthedocs.io")
12981 (synopsis "Process, analyze and visualize Hi-C data")
12983 "HiCExplorer is a powerful and easy to use set of tools to process,
12984 normalize and visualize Hi-C data. HiCExplorer facilitates the creation of
12985 contact matrices, correction of contacts, TAD detection, A/B compartments,
12986 merging, reordering or chromosomes, conversion from different formats
12987 including cooler and detection of long-range contacts. Moreover, it allows
12988 the visualization of multiple contact matrices along with other types of data
12989 like genes, compartments, ChIP-seq coverage tracks (and in general any type of
12990 genomic scores), long range contacts and the visualization of viewpoints.")
12991 (license license:gpl3)))
12993 (define-public python-pygenometracks
12995 (name "python-pygenometracks")
13000 (uri (pypi-uri "pyGenomeTracks" version))
13003 "16laa0wnf4qn9fb9ych4w1vqhqwjss70v0y0f6wp4gwqfrlgac0f"))))
13004 (build-system python-build-system)
13006 `(#:tests? #f ; there are none
13008 (modify-phases %standard-phases
13009 (add-after 'unpack 'relax-requirements
13011 (substitute* "setup.py"
13012 (("matplotlib ==3.1.1")
13013 "matplotlib >=3.1.1"))
13016 (list python-future
13019 python-intervaltree
13026 (list python-pytest))
13027 (home-page "https://pygenometracks.readthedocs.io")
13028 (synopsis "Program and library to plot beautiful genome browser tracks")
13030 "This package aims to produce high-quality genome browser tracks that
13031 are highly customizable. Currently, it is possible to plot: bigwig, bed (many
13032 options), bedgraph, links (represented as arcs), and Hi-C matrices.
13033 pyGenomeTracks can make plots with or without Hi-C data.")
13034 (license license:gpl3+)))
13036 (define-public python-iced
13038 (name "python-iced")
13043 (uri (pypi-uri "iced" version))
13046 "1avcjmpyyvhgbj5qca4l70ipiz7j3xmcw9p6rd9c06j99faa0r71"))))
13047 (build-system python-build-system)
13048 (arguments `(#:tests? #false)) ; there are none
13050 (list python-numpy python-pandas python-scipy python-scikit-learn))
13051 (home-page "https://github.com/hiclib/iced")
13052 (synopsis "ICE normalization")
13053 (description "This is a package for normalizing Hi-C contact counts
13055 (license license:bsd-3)))
13057 (define-public python-hic2cool
13059 (name "python-hic2cool")
13061 ;; pypi sources do not contain the test_data directory and no test can be
13066 (uri (git-reference
13067 (url "https://github.com/4dn-dcic/hic2cool")
13069 (file-name (git-file-name name version))
13072 "0dlnf0qfcp4jrc1nyya32a035c13xicyq16bwfnwhbb9s47mz7gl"))))
13073 (build-system python-build-system)
13076 (modify-phases %standard-phases
13077 ;; Two of the test-data files need to be writable.
13078 (add-after 'unpack 'make-test-data-writable
13080 (for-each make-file-writable
13081 (list "test_data/hic2cool_0.4.2_single_res.cool"
13082 "test_data/hic2cool_0.7.0_multi_res.mcool"))))
13083 ;; See https://github.com/4dn-dcic/hic2cool/issues/58
13084 (add-after 'unpack 'fix-incompatibility-with-h5py-3
13086 (substitute* "test.py"
13087 (("h5py.File\\(fname\\)") "h5py.File(fname, 'r')"))
13088 (substitute* "hic2cool/hic2cool_updates.py"
13089 (("h5py.File\\(writefile\\)")
13090 "h5py.File(writefile, 'a')"))))
13091 ;; These two tests fail for unknown reasons.
13092 (add-after 'unpack 'disable-broken-tests
13094 (substitute* "test.py"
13095 (("def test_convert") "def _test_convert")))))))
13097 (list python-cooler python-h5py python-numpy python-pandas
13099 (home-page "https://github.com/4dn-dcic/hic2cool")
13100 (synopsis "Converter for .hic and .cool files")
13102 "This package provides a converter between @code{.hic} files (from
13103 juicer) and single-resolution or multi-resolution @code{.cool} files (for
13104 cooler). Both @code{hic} and @code{cool} files describe Hi-C contact
13106 (license license:expat)))
13108 (define-public python-scanorama
13110 (name "python-scanorama")
13114 (uri (pypi-uri "scanorama" version))
13117 "0il7bf4c7vli2dm2jx7dskh3ymgv8nmk0y90jzgfrnqjzh250x5w"))))
13118 (build-system python-build-system)
13123 python-intervaltree
13126 python-scikit-learn
13128 (home-page "https://github.com/brianhie/scanorama")
13129 (synopsis "Panoramic stitching of heterogeneous single cell transcriptomic data")
13131 "Scanorama enables batch-correction and integration of heterogeneous
13132 scRNA-seq datasets, which is described in the paper \"Efficient integration of
13133 heterogeneous single-cell transcriptomes using Scanorama\" by Brian Hie, Bryan
13134 Bryson, and Bonnie Berger.")
13135 (license license:expat)))
13137 (define-public r-pore
13145 (string-append "mirror://sourceforge/rpore/" version
13146 "/poRe_" version ".tar.gz"))
13148 (base32 "0pih9nljbv8g4x8rkk29i7aqq681b782r5s5ynp4nw9yzqnmmksv"))))
13149 (properties `((upstream-name . "poRe")))
13150 (build-system r-build-system)
13152 (list r-bit64 r-data-table r-rhdf5 r-shiny r-svdialogs))
13153 (home-page "https://sourceforge.net/projects/rpore/")
13154 (synopsis "Visualize Nanopore sequencing data")
13156 "This package provides graphical user interfaces to organize and visualize Nanopore
13158 ;; This is free software but the license variant is unclear:
13159 ;; <https://github.com/mw55309/poRe_docs/issues/10>.
13160 (license license:bsd-3)))
13162 (define-public r-xbioc
13163 (let ((revision "1")
13164 (commit "6ff0670a37ab3036aaf1d94aa4b208310946b0b5"))
13167 (version (git-version "0.1.16" revision commit))
13170 (uri (git-reference
13171 (url "https://github.com/renozao/xbioc")
13173 (file-name (git-file-name name version))
13176 "0w8bsq5myiwkfhh83nm6is5ichiyvwa1axx2szvxnzq39x6knf66"))))
13177 (build-system r-build-system)
13179 (list r-annotationdbi
13188 (home-page "https://github.com/renozao/xbioc/")
13189 (synopsis "Extra base functions for Bioconductor")
13190 (description "This package provides extra utility functions to perform
13191 common tasks in the analysis of omics data, leveraging and enhancing features
13192 provided by Bioconductor packages.")
13193 (license license:gpl3+))))
13195 (define-public r-cssam
13196 (let ((revision "1")
13197 (commit "9ec58c982fa551af0d80b1a266890d92954833f2"))
13200 (version (git-version "1.4" revision commit))
13203 (uri (git-reference
13204 (url "https://github.com/shenorrLab/csSAM")
13206 (file-name (git-file-name name version))
13209 "128syf9v39gk0z3ip000qpsjbg6l1siyq6c8b0hz41dzg5achyb3"))))
13210 (build-system r-build-system)
13218 (home-page "https://github.com/shenorrLab/csSAM/")
13219 (synopsis "Cell type-specific statistical analysis of microarray")
13220 (description "This package implements the method csSAM that computes
13221 cell-specific differential expression from measured cell proportions using
13224 (license license:lgpl2.1+))))
13226 (define-public r-bseqsc
13227 (let ((revision "1")
13228 (commit "fef3f3e38dcf3df37103348b5780937982b43b98"))
13231 (version (git-version "1.0" revision commit))
13234 (uri (git-reference
13235 (url "https://github.com/shenorrLab/bseqsc")
13237 (file-name (git-file-name name version))
13240 "1prw13wa20f7wlc3gkkls66n1kxz8d28qrb8icfqdwdnnv8w5qg8"))))
13241 (build-system r-build-system)
13260 (home-page "https://github.com/shenorrLab/bseqsc")
13261 (synopsis "Deconvolution of bulk sequencing experiments using single cell data")
13262 (description "BSeq-sc is a bioinformatics analysis pipeline that
13263 leverages single-cell sequencing data to estimate cell type proportion and
13264 cell type-specific gene expression differences from RNA-seq data from bulk
13265 tissue samples. This is a companion package to the publication \"A
13266 single-cell transcriptomic map of the human and mouse pancreas reveals inter-
13267 and intra-cell population structure.\" Baron et al. Cell Systems (2016)
13268 @url{https://www.ncbi.nlm.nih.gov/pubmed/27667365}.")
13269 (license license:gpl2+))))
13271 (define-public porechop
13272 ;; The recommended way to install is to clone the git repository
13273 ;; https://github.com/rrwick/Porechop#installation
13274 (let ((commit "289d5dca4a5fc327f97b3f8cecb68ecaf1014861")
13278 (version (git-version "0.2.3" revision commit))
13282 (uri (git-reference
13283 (url "https://github.com/rrwick/Porechop")
13285 (file-name (git-file-name name version))
13287 (base32 "05ps43gig0d3ia9x5lj84lb00hbsl6ba9n7y7jz927npxbr2ym23"))))
13288 (build-system python-build-system)
13289 (home-page "https://github.com/rrwick/porechop")
13290 (synopsis "Finding, trimming or splitting adapters, in Oxford Nanopore reads")
13292 "The porechop package is a tool for finding and removing adapters from Oxford
13293 Nanopore reads. Adapters on the ends of reads are trimmed off, and when a read
13294 has an adapter in its middle, it is treated as chimeric and chopped into
13295 separate reads. Porechop performs thorough alignments to effectively find
13296 adapters, even at low sequence identity. Porechop also supports demultiplexing
13297 of Nanopore reads that were barcoded with the Native Barcoding Kit, PCR
13298 Barcoding Kit or Rapid Barcoding Kit.")
13299 (license license:gpl3+))))
13301 (define-public jamm
13304 (version "1.0.7.6")
13308 (uri (git-reference
13309 (url "https://github.com/mahmoudibrahim/JAMM")
13310 (commit (string-append "JAMMv" version))))
13311 (file-name (git-file-name name version))
13314 "0bsa5mf9n9q5jz7mmacrra41l7r8rac5vgsn6wv1fb52ya58b970"))))
13315 (build-system gnu-build-system)
13317 `(#:tests? #f ; there are none
13319 (modify-phases %standard-phases
13320 (delete 'configure)
13323 (lambda* (#:key inputs outputs #:allow-other-keys)
13324 (let* ((out (assoc-ref outputs "out"))
13325 (libexec (string-append out "/libexec/jamm"))
13326 (bin (string-append out "/bin")))
13327 (substitute* '("JAMM.sh"
13328 "SignalGenerator.sh")
13330 (string-append "sPath=\"" libexec "\"\n")))
13331 (for-each (lambda (file)
13332 (install-file file libexec))
13333 (list "bincalculator.r"
13345 (chmod script #o555)
13346 (install-file script bin)
13347 (wrap-program (string-append bin "/" script)
13348 `("PATH" ":" prefix
13349 (,(string-append (assoc-ref inputs "coreutils") "/bin")
13350 ,(string-append (assoc-ref inputs "gawk") "/bin")
13351 ,(string-append (assoc-ref inputs "perl") "/bin")
13352 ,(string-append (assoc-ref inputs "r-minimal") "/bin")))
13353 `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))
13354 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
13355 (list "JAMM.sh" "SignalGenerator.sh")))
13363 ;;("r-parallel" ,r-parallel)
13366 (home-page "https://github.com/mahmoudibrahim/JAMM")
13367 (synopsis "Peak finder for NGS datasets")
13369 "JAMM is a peak finder for next generation sequencing datasets (ChIP-Seq,
13370 ATAC-Seq, DNase-Seq, etc.) that can integrate replicates and assign peak
13371 boundaries accurately. JAMM is applicable to both broad and narrow
13373 (license license:gpl3+)))
13375 (define-public ngless
13382 (uri (git-reference
13383 (url "https://github.com/ngless-toolkit/ngless.git")
13384 (commit (string-append "v" version))))
13385 (file-name (git-file-name name version))
13388 "0pb9f6b0yk9p4cdwiym8r190q1bcdiwvc7i2s6rw54qgi8r3g6pj"))
13389 (patches (search-patches "ngless-unliftio.patch"))))
13390 (build-system haskell-build-system)
13392 `(#:haddock? #f ; The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
13393 ; error: parse error on input import
13394 ; import Options.Applicative
13396 (modify-phases %standard-phases
13397 (add-after 'unpack 'create-Versions.hs
13399 (substitute* "Makefile"
13400 (("BWA_VERSION = .*")
13401 (string-append "BWA_VERSION = "
13402 ,(package-version bwa) "\n"))
13403 (("SAM_VERSION = .*")
13404 (string-append "SAM_VERSION = "
13405 ,(package-version samtools) "\n"))
13406 (("PRODIGAL_VERSION = .*")
13407 (string-append "PRODIGAL_VERSION = "
13408 ,(package-version prodigal) "\n"))
13409 (("MINIMAP2_VERSION = .*")
13410 (string-append "MINIMAP2_VERSION = "
13411 ,(package-version minimap2) "\n")))
13412 (invoke "make" "NGLess/Dependencies/Versions.hs")
13414 (add-after 'create-Versions.hs 'create-cabal-file
13415 (lambda _ (invoke "hpack") #t))
13416 ;; These tools are expected to be installed alongside ngless.
13417 (add-after 'install 'link-tools
13418 (lambda* (#:key inputs outputs #:allow-other-keys)
13419 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
13420 (symlink (search-input-file inputs "/bin/prodigal")
13421 (string-append bin "ngless-" ,version "-prodigal"))
13422 (symlink (search-input-file inputs "/bin/minimap2")
13423 (string-append bin "ngless-" ,version "-minimap2"))
13424 (symlink (search-input-file inputs "/bin/samtools")
13425 (string-append bin "ngless-" ,version "-samtools"))
13426 (symlink (search-input-file inputs "/bin/bwa")
13427 (string-append bin "ngless-" ,version "-bwa"))
13438 ghc-bytestring-lexing
13440 ghc-conduit-algorithms
13448 ghc-double-conversion
13460 ghc-int-interval-map
13462 ghc-optparse-applicative
13475 (list r-r6 r-hdf5r r-iterators r-itertools r-matrix))
13480 ghc-test-framework-hunit
13481 ghc-test-framework-quickcheck2
13482 ghc-test-framework-th))
13483 (home-page "https://ngless.embl.de/")
13484 (synopsis "DSL for processing next-generation sequencing data")
13485 (description "Ngless is a domain-specific language for
13486 @dfn{next-generation sequencing} (NGS) data processing.")
13487 (license license:expat)))
13489 (define-public ghc-int-interval-map
13490 (let ((commit "678763de7fe6d7fa3f1c44b32d18ce58670270f4")
13493 (name "ghc-int-interval-map")
13494 (version "0.0.0.0")
13498 (uri (git-reference
13499 (url "https://github.com/ngless-toolkit/interval-to-int.git")
13501 (file-name (git-file-name name version))
13502 (sha256 (base32 "0fd728b5if89vj5j4f9y7k0b2xv2ycz5a21iy15wbdcf5bhim7i8"))))
13503 (build-system haskell-build-system)
13505 (list ghc-either ghc-primitive ghc-vector ghc-vector-algorithms))
13511 ghc-tasty-quickcheck
13513 (home-page "https://github.com/luispedro/interval-to-int#readme")
13514 (synopsis "Interval map structure in Haskell")
13515 (description "An interval map structure that is optimized for low
13516 memory (each interval is represented by about 3 words + whatever the
13517 cargo is) and has semantics that are appropriate for genomic intervals
13518 (namely, intervals can overlap and queries will return all matches
13519 together). It also designed to be used in two phases: a construction
13520 phase + query phase).")
13521 (license license:expat))))
13523 (define-public filtlong
13524 ;; The recommended way to install is to clone the git repository
13525 ;; https://github.com/rrwick/Filtlong#installation
13526 ;; and the lastest release is more than nine months old
13527 (let ((commit "d1bb46dfe8bc7efe6257b5ce222c04bfe8aedaab")
13531 (version (git-version "0.2.0" revision commit))
13535 (uri (git-reference
13536 (url "https://github.com/rrwick/Filtlong")
13538 (file-name (git-file-name name version))
13540 (base32 "1xr92r820x8qlkcr3b57iw223yq8vjgyi42jr79w2xgw47qzr575"))))
13541 (build-system gnu-build-system)
13543 `(#:tests? #f ; no check target
13545 (modify-phases %standard-phases
13546 (delete 'configure)
13548 (lambda* (#:key outputs #:allow-other-keys)
13549 (let* ((out (assoc-ref outputs "out"))
13550 (bin (string-append out "/bin"))
13551 (scripts (string-append out "/share/filtlong/scripts")))
13552 (install-file "bin/filtlong" bin)
13553 (install-file "scripts/histogram.py" scripts)
13554 (install-file "scripts/read_info_histograms.sh" scripts))
13556 (add-after 'install 'wrap-program
13557 (lambda* (#:key inputs outputs #:allow-other-keys)
13558 (let* ((out (assoc-ref outputs "out"))
13559 (path (getenv "GUIX_PYTHONPATH")))
13560 (wrap-program (string-append out
13561 "/share/filtlong/scripts/histogram.py")
13562 `("GUIX_PYTHONPATH" ":" prefix (,path))))
13564 (add-before 'check 'patch-tests
13566 (substitute* "scripts/read_info_histograms.sh"
13567 (("awk") (which "gawk")))
13570 `(("gawk" ,gawk) ;for read_info_histograms.sh
13571 ("python" ,python-2) ;required for histogram.py
13573 (home-page "https://github.com/rrwick/Filtlong/")
13574 (synopsis "Tool for quality filtering of Nanopore and PacBio data")
13576 "The Filtlong package is a tool for filtering long reads by quality.
13577 It can take a set of long reads and produce a smaller, better subset. It uses
13578 both read length (longer is better) and read identity (higher is better) when
13579 choosing which reads pass the filter.")
13580 (license (list license:gpl3 ;filtlong
13581 license:asl2.0))))) ;histogram.py
13583 (define-public nanopolish
13584 ;; The recommended way to install is to clone the git repository
13585 ;; <https://github.com/jts/nanopolish#installing-a-particular-release>.
13586 ;; Also, the differences between release and current version seem to be
13588 (let ((commit "6331dc4f15b9dfabb954ba3fae9d76b6c3ca6377")
13591 (name "nanopolish")
13592 (version (git-version "0.11.1" revision commit))
13596 (uri (git-reference
13597 (url "https://github.com/jts/nanopolish")
13600 (file-name (git-file-name name version))
13602 (base32 "15ikl3d37y49pwd7vx36xksgsqajhf24q7qqsnpl15dqqyy5qgbc"))
13603 (modules '((guix build utils)))
13606 (delete-file-recursively "htslib")
13608 (build-system gnu-build-system)
13611 `("HDF5=noinstall" "EIGEN=noinstall" "HTS=noinstall" "CC=gcc")
13612 #:tests? #f ; no check target
13614 (modify-phases %standard-phases
13615 (add-after 'unpack 'find-eigen
13616 (lambda* (#:key inputs #:allow-other-keys)
13619 (search-input-directory inputs "/include/eigen3")
13620 ":" (or (getenv "CPATH") "")))))
13621 (delete 'configure)
13623 (lambda* (#:key outputs #:allow-other-keys)
13624 (let* ((out (assoc-ref outputs "out"))
13625 (bin (string-append out "/bin"))
13626 (scripts (string-append out "/share/nanopolish/scripts")))
13628 (install-file "nanopolish" bin)
13629 (for-each (lambda (file) (install-file file scripts))
13630 (find-files "scripts" ".*"))
13632 (add-after 'install 'wrap-programs
13633 (lambda* (#:key inputs outputs #:allow-other-keys)
13634 (let ((pythonpath (getenv "GUIX_PYTHONPATH"))
13635 (perl5lib (getenv "PERL5LIB"))
13636 (scripts (string-append (assoc-ref outputs "out")
13637 "/share/nanopolish/scripts"))
13638 (guile (search-input-file inputs "bin/guile")))
13639 (for-each (lambda (file)
13640 (wrap-program file `("GUIX_PYTHONPATH" ":" prefix (,pythonpath))))
13641 (find-files scripts "\\.py"))
13642 (for-each (lambda (file)
13643 (wrap-script file #:guile guile
13644 `("PERL5LIB" ":" prefix (,perl5lib))))
13645 (find-files scripts "\\.pl"))))))))
13647 `(("guile" ,guile-3.0) ; for wrappers
13652 ("bioperl" ,bioperl-minimal)
13653 ("perl-getopt-long" ,perl-getopt-long)
13654 ("python" ,python-wrapper)
13655 ("python-biopython" ,python-biopython)
13656 ("python-numpy" ,python-numpy)
13657 ("python-pysam" ,python-pysam)
13658 ("python-scikit-learn" , python-scikit-learn)
13659 ("python-scipy" ,python-scipy)
13661 (home-page "https://github.com/jts/nanopolish")
13662 (synopsis "Signal-level analysis of Oxford Nanopore sequencing data")
13664 "This package analyses the Oxford Nanopore sequencing data at signal-level.
13665 Nanopolish can calculate an improved consensus sequence for a draft genome
13666 assembly, detect base modifications, call SNPs (Single nucleotide
13667 polymorphisms) and indels with respect to a reference genome and more.")
13668 (license license:expat))))
13670 (define-public cnvkit
13677 (uri (git-reference
13678 (url "https://github.com/etal/cnvkit")
13679 (commit (string-append "v" version))))
13680 (file-name (git-file-name name version))
13682 (base32 "0g2f78k68yglmj4fsfmgs8idqv3di9aj53fg0ld0hqljg8chhh82"))))
13683 (build-system python-build-system)
13685 (list python-biopython
13696 (home-page "https://cnvkit.readthedocs.org/")
13697 (synopsis "Copy number variant detection from targeted DNA sequencing")
13699 "CNVkit is a Python library and command-line software toolkit to infer
13700 and visualize copy number from high-throughput DNA sequencing data. It is
13701 designed for use with hybrid capture, including both whole-exome and custom
13702 target panels, and short-read sequencing platforms such as Illumina and Ion
13704 (license license:asl2.0)))
13706 (define-public python-pyfit-sne
13708 (name "python-pyfit-sne")
13713 (uri (git-reference
13714 (url "https://github.com/KlugerLab/pyFIt-SNE")
13716 (file-name (git-file-name name version))
13718 (base32 "0f3n7wcmxbnqiisgimhpa6p5chqpb1hj69i6rpg2hv2671i8nn68"))))
13719 (build-system python-build-system)
13720 (arguments '(#:tests? #false)) ; there are none
13722 (list python-numpy))
13726 (list python-cython))
13727 (home-page "https://github.com/KlugerLab/pyFIt-SNE")
13728 (synopsis "FFT-accelerated Interpolation-based t-SNE")
13730 "t-Stochastic Neighborhood Embedding (t-SNE) is a highly successful
13731 method for dimensionality reduction and visualization of high dimensional
13732 datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
13733 approximate the gradient at each iteration of gradient descent. This package
13734 is a Cython wrapper for FIt-SNE.")
13735 (license license:bsd-4)))
13737 (define-public bbmap
13743 (uri (string-append
13744 "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz"))
13747 "1wb94bcc006qq86x77z2rz0lc8m9f1kpnw6gdhjfg9bdaqf56rm3"))))
13748 (build-system ant-build-system)
13750 `(#:build-target "dist"
13751 #:tests? #f ; there are none
13753 ,#~(list (string-append "-Dmpijar="
13754 #$(this-package-input "java-openmpi")
13756 #:modules ((guix build ant-build-system)
13758 (guix build java-utils))
13760 (modify-phases %standard-phases
13761 (add-after 'build 'build-jni-library
13763 (with-directory-excursion "jni"
13764 (invoke "make" "-f" "makefile.linux"))))
13765 ;; There is no install target
13766 (replace 'install (install-jars "dist"))
13767 (add-after 'install 'install-scripts-and-documentation
13768 (lambda* (#:key outputs #:allow-other-keys)
13769 (substitute* "calcmem.sh"
13770 (("\\| awk ") (string-append "| " (which "awk") " ")))
13771 (let* ((scripts (find-files "." "\\.sh$"))
13772 (out (assoc-ref outputs "out"))
13773 (bin (string-append out "/bin"))
13774 (doc (string-append out "/share/doc/bbmap"))
13775 (jni (string-append out "/lib/jni")))
13776 (substitute* scripts
13777 (("\\$DIR\"\"docs") doc)
13779 (string-append "CP=" out "/share/java/BBTools.jar\n"))
13780 (("^NATIVELIBDIR.*")
13781 (string-append "NATIVELIBDIR=" jni "\n"))
13783 (string-append "CMD=\"" (which "java"))))
13784 (for-each (lambda (script) (install-file script bin)) scripts)
13786 ;; Install JNI library
13787 (install-file "jni/libbbtoolsjni.so" jni)
13789 ;; Install documentation
13790 (install-file "docs/readme.txt" doc)
13791 (copy-recursively "docs/guides" doc))
13795 (list gawk java-eclipse-jdt-core java-eclipse-jdt-compiler-apt
13797 (home-page "https://sourceforge.net/projects/bbmap/")
13798 (synopsis "Aligner and other tools for short sequencing reads")
13800 "This package provides bioinformatic tools to align, deduplicate,
13801 reformat, filter and normalize DNA and RNA-seq data. It includes the
13802 following tools: BBMap, a short read aligner for DNA and RNA-seq data; BBNorm,
13803 a kmer-based error-correction and normalization tool; Dedupe, a tool to
13804 simplify assemblies by removing duplicate or contained subsequences that share
13805 a target percent identity; Reformat, to convert reads between
13806 fasta/fastq/scarf/fasta+qual/sam, interleaved/paired, and ASCII-33/64, at over
13807 500 MB/s; and BBDuk, a tool to filter, trim, or mask reads with kmer matches
13808 to an artifact/contaminant file.")
13809 (license license:bsd-3)))
13811 (define-public velvet
13817 (uri (string-append "https://www.ebi.ac.uk/~zerbino/velvet/"
13818 "velvet_" version ".tgz"))
13821 "0h3njwy66p6bx14r3ar1byb0ccaxmxka4c65rn4iybyiqa4d8kc8"))
13822 ;; Delete bundled libraries
13823 (modules '((guix build utils)))
13826 (delete-file "Manual.pdf")
13827 (delete-file-recursively "third-party")))))
13828 (build-system gnu-build-system)
13830 `(#:make-flags '("OPENMP=t")
13831 #:test-target "test"
13833 (modify-phases %standard-phases
13834 (delete 'configure)
13835 (add-after 'unpack 'fix-zlib-include
13837 (substitute* "src/binarySequences.c"
13838 (("../third-party/zlib-1.2.3/zlib.h") "zlib.h"))))
13840 (lambda* (#:key outputs #:allow-other-keys)
13841 (let* ((out (assoc-ref outputs "out"))
13842 (bin (string-append out "/bin"))
13843 (doc (string-append out "/share/doc/velvet")))
13846 (install-file "velveth" bin)
13847 (install-file "velvetg" bin)
13848 (install-file "Manual.pdf" doc)
13849 (install-file "Columbus_manual.pdf" doc)))))))
13851 (list openmpi zlib))
13853 `(("texlive" ,(texlive-updmap.cfg (list texlive-latex-graphics
13855 texlive-hyperref)))))
13856 (home-page "https://www.ebi.ac.uk/~zerbino/velvet/")
13857 (synopsis "Nucleic acid sequence assembler for very short reads")
13859 "Velvet is a de novo genomic assembler specially designed for short read
13860 sequencing technologies, such as Solexa or 454. Velvet currently takes in
13861 short read sequences, removes errors then produces high quality unique
13862 contigs. It then uses paired read information, if available, to retrieve the
13863 repeated areas between contigs.")
13864 (license license:gpl2+)))
13866 (define-public python-velocyto
13868 (name "python-velocyto")
13869 (version "0.17.17")
13873 (uri (pypi-uri "velocyto" version))
13876 "0fgygyzqgrq32dv6a00biq1p1cwi6kbl5iqblxq1kklj6b2mzmhs"))
13877 (modules '((guix build utils)))
13878 ;; Delete generated C files.
13880 '(for-each delete-file (find-files "." "\\.c")))))
13881 (build-system python-build-system)
13884 (modify-phases %standard-phases
13885 ;; Numba needs a writable dir to cache functions.
13886 (add-before 'check 'set-numba-cache-dir
13888 (setenv "NUMBA_CACHE_DIR" "/tmp"))))))
13890 (list python-joblib))
13901 python-scikit-learn
13903 (home-page "https://github.com/velocyto-team/velocyto.py")
13904 (synopsis "RNA velocity analysis for single cell RNA-seq data")
13906 "Velocyto is a library for the analysis of RNA velocity. Velocyto
13907 includes a command line tool and an analysis pipeline.")
13908 (license license:bsd-2)))
13910 (define-public arriba
13917 (uri (string-append "https://github.com/suhrig/arriba/releases/"
13918 "download/v" version "/arriba_v" version ".tar.gz"))
13921 "0jx9656ry766vb8z08m1c3im87b0c82qpnjby9wz4kcz8vn87dx2"))))
13922 (build-system gnu-build-system)
13924 `(#:tests? #f ; there are none
13926 (modify-phases %standard-phases
13927 (replace 'configure
13928 (lambda* (#:key inputs #:allow-other-keys)
13929 (let ((htslib (assoc-ref inputs "htslib")))
13930 (substitute* "Makefile"
13931 (("-I\\$\\(HTSLIB\\)/htslib")
13932 (string-append "-I" htslib "/include/htslib"))
13933 ((" \\$\\(HTSLIB\\)/libhts.a")
13934 (string-append " " htslib "/lib/libhts.so"))))
13935 (substitute* "run_arriba.sh"
13936 (("^STAR ") (string-append (which "STAR") " "))
13937 (("samtools --version-only")
13938 (string-append (which "samtools") " --version-only"))
13939 (("samtools index")
13940 (string-append (which "samtools") " index"))
13942 (string-append (which "samtools") " sort")))
13945 (lambda* (#:key outputs #:allow-other-keys)
13946 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13947 (install-file "arriba" bin)
13948 (install-file "run_arriba.sh" bin)
13949 (install-file "draw_fusions.R" bin)
13950 (wrap-program (string-append bin "/draw_fusions.R")
13951 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
13957 r-genomicalignments
13962 (home-page "https://github.com/suhrig/arriba")
13963 (synopsis "Gene fusion detection from RNA-Seq data")
13965 "Arriba is a command-line tool for the detection of gene fusions from
13966 RNA-Seq data. It was developed for the use in a clinical research setting.
13967 Therefore, short runtimes and high sensitivity were important design criteria.
13968 It is based on the fast STAR aligner and the post-alignment runtime is
13969 typically just around two minutes. In contrast to many other fusion detection
13970 tools which build on STAR, Arriba does not require to reduce the
13971 @code{alignIntronMax} parameter of STAR to detect small deletions.")
13972 ;; All code is under the Expat license with the exception of
13973 ;; "draw_fusions.R", which is under GPLv3.
13974 (license (list license:expat license:gpl3))))
13976 (define-public adapterremoval
13978 (name "adapterremoval")
13983 (uri (git-reference
13984 (url "https://github.com/MikkelSchubert/adapterremoval")
13985 (commit (string-append "v" version))))
13986 (file-name (git-file-name name version))
13989 "1nf3ki5pfzalhrx2fr1y6pfqfi133yj2m7q4fj9irf5fb94bapwr"))))
13990 (build-system gnu-build-system)
13993 ,#~(list "COLOR_BUILD=no"
13994 (string-append "PREFIX=" #$output))
13995 #:test-target "test"
13997 (modify-phases %standard-phases
13998 (delete 'configure))))
14001 (home-page "https://adapterremoval.readthedocs.io/")
14002 (synopsis "Rapid sequence adapter trimming, identification, and read merging")
14004 "This program searches for and removes remnant adapter sequences from
14005 @dfn{High-Throughput Sequencing} (HTS) data and (optionally) trims low quality
14006 bases from the 3' end of reads following adapter removal. AdapterRemoval can
14007 analyze both single end and paired end data, and can be used to merge
14008 overlapping paired-ended reads into (longer) consensus sequences.
14009 Additionally, the AdapterRemoval may be used to recover a consensus adapter
14010 sequence for paired-ended data, for which this information is not available.")
14011 (license license:gpl3+)))
14013 (define-public checkm
14020 (uri (pypi-uri "checkm-genome" version))
14023 "0i2nnki639hgjag17wlva2x0ymn37b4krqsf6akxddykhfbkdnkz"))))
14024 (build-system python-build-system)
14026 `(#:tests? #f ; Some tests fail for unknown reasons.
14028 (modify-phases %standard-phases
14029 (add-before 'check 'set-HOME
14031 (setenv "HOME" "/tmp"))))))
14033 (list python-dendropy python-matplotlib python-numpy python-pysam
14035 (home-page "https://ecogenomics.github.io/CheckM/")
14036 (synopsis "Assess the quality of putative genome bins")
14038 "CheckM provides a set of tools for assessing the quality of genomes
14039 recovered from isolates, single cells, or metagenomes. It provides robust
14040 estimates of genome completeness and contamination by using collocated sets of
14041 genes that are ubiquitous and single-copy within a phylogenetic lineage.
14042 Assessment of genome quality can also be examined using plots depicting key
14043 genomic characteristics (e.g., GC, coding density) which highlight sequences
14044 outside the expected distributions of a typical genome. CheckM also provides
14045 tools for identifying genome bins that are likely candidates for merging based
14046 on marker set compatibility, similarity in genomic characteristics, and
14047 proximity within a reference genome.")
14048 (license license:gpl3+)))
14050 (define-public umi-tools
14057 (uri (pypi-uri "umi_tools" version))
14060 "08y3vz1vcx09whmbsn722lcs6jl9wyrh9i4p3k8j4cb1i32bij4a"))))
14061 (build-system python-build-system)
14063 (list python-pandas
14070 (list python-cython))
14071 (home-page "https://github.com/CGATOxford/UMI-tools")
14072 (synopsis "Tools for analyzing unique modular identifiers")
14073 (description "This package provides tools for dealing with @dfn{Unique
14074 Molecular Identifiers} (UMIs) and @dfn{Random Molecular Tags} (RMTs) in
14075 genetic sequences. There are six tools: the @code{extract} and
14076 @code{whitelist} commands are used to prepare a fastq containing UMIs @code{+/-}
14077 cell barcodes for alignment. The remaining commands, @code{group},
14078 @code{dedup}, and @{count}/@code{count_tab}, are used to identify PCR
14079 duplicates using the UMIs and perform different levels of analysis depending
14080 on the needs of the user.")
14081 (license license:expat)))
14083 (define-public ataqv
14090 (uri (git-reference
14091 (url "https://github.com/ParkerLab/ataqv")
14093 (file-name (git-file-name name version))
14096 "031xr6jx1aprh26y5b1lv3gzrlmzg4alfl73vvshymx8cq8asrqi"))))
14097 (build-system gnu-build-system)
14100 ,#~(list (string-append "prefix=" #$output)
14101 (string-append "BOOST_ROOT="
14102 #$(this-package-input "boost"))
14103 (string-append "HTSLIB_ROOT="
14104 #$(this-package-input "htslib")))
14105 #:test-target "test"
14107 (modify-phases %standard-phases
14108 (delete 'configure))))
14110 (list boost htslib ncurses zlib))
14113 (home-page "https://github.com/ParkerLab/ataqv")
14114 (synopsis "Toolkit for quality control and visualization of ATAC-seq data")
14115 (description "This package provides a toolkit for measuring and comparing
14116 ATAC-seq results. It was written to make it easier to spot differences that
14117 might be caused by ATAC-seq library prep or sequencing. The main program,
14118 @code{ataqv}, examines aligned reads and reports some basic metrics.")
14119 (license license:gpl3+)))
14121 (define-public r-psiplot
14128 (uri (git-reference
14129 (url "https://github.com/kcha/psiplot")
14130 (commit (string-append "v" version))))
14131 (file-name (git-file-name name version))
14133 (base32 "08438h16cfry5kqh3y9hs8q1b1a8bxhblsm75knviz5r6q0n1jxh"))))
14134 (build-system r-build-system)
14143 (home-page "https://github.com/kcha/psiplot")
14144 (synopsis "Plot percent spliced-in values of alternatively-spliced exons")
14146 "PSIplot is an R package for generating plots of @dfn{percent
14147 spliced-in} (PSI) values of alternatively-spliced exons that were computed by
14148 vast-tools, an RNA-Seq pipeline for alternative splicing analysis. The plots
14149 are generated using @code{ggplot2}.")
14150 (license license:expat)))
14152 (define-public r-scopeloomr
14153 (let ((commit "99726f5f7da794042036b73924b6a10d6e7b4d5d")
14156 (name "r-scopeloomr")
14157 (version (git-version "0.13.0" revision commit))
14161 (uri (git-reference
14162 (url "https://github.com/aertslab/SCopeLoomR")
14164 (file-name (git-file-name name version))
14166 (base32 "1ci17ms0c0hf7yfp9ckcg7a2y1s0nm19jj3cifsd55hwc0gdglmz"))))
14167 (properties `((upstream-name . "SCopeLoomR")))
14168 (build-system r-build-system)
14170 (list r-base64enc r-hdf5r r-igraph r-matrix r-plyr r-rjson r-rlist))
14171 (home-page "https://github.com/aertslab/SCopeLoomR")
14172 (synopsis "Build .loom files and extract data from them")
14174 "This is an R package to build generic @code{.loom} files aligning with
14175 the default naming convention of the @code{.loom} format and to integrate
14176 other data types e.g.: regulons (SCENIC), clusters from Seurat, trajectory
14177 information... The package can also be used to extract data from @code{.loom}
14179 (license license:expat))))
14181 (define-public python-ctxcore
14183 (name "python-ctxcore")
14188 (uri (git-reference
14189 (url "https://github.com/aertslab/ctxcore")
14191 (file-name (git-file-name name version))
14194 "16nlj7z8pirgjad7vlgm7226b3hpw4a7n967vyfg26dsf5n8k70d"))))
14195 (build-system python-build-system)
14199 #~(modify-phases %standard-phases
14200 (add-before 'build 'pretend-version
14201 ;; The version string is usually derived via setuptools-scm, but
14202 ;; it doesn't work without the .git directory.
14204 (setenv "SETUPTOOLS_SCM_PRETEND_VERSION" #$version))))))
14206 (list python-cytoolz
14212 python-pyarrow-0.16
14215 (list python-pytest
14216 python-setuptools-scm))
14217 (home-page "https://github.com/aertslab/ctxcore")
14218 (synopsis "Core functions for pycisTarget and the SCENIC tool suite")
14220 "ctxcore is part of the SCENIC suite of tools. It provides core functions for
14221 pycisTarget and SCENIC.")
14222 (license license:gpl3+)))
14224 (define-public python-arboreto
14226 (name "python-arboreto")
14230 (uri (git-reference
14231 (url "https://github.com/aertslab/arboreto")
14232 (commit "2f475dca08f47a60acc2beb8dd897e77b7495ca4")))
14233 (file-name (git-file-name name version))
14236 "0l0im8ay7l2d24f7vaha454vsaha9s36bfqhbijg3b8ir8apsd7l"))))
14237 (build-system python-build-system)
14238 ;; Lots of tests fail because python-distributed fails to start the
14239 ;; "Nanny" process.
14240 (arguments '(#:tests? #false))
14247 python-scikit-learn
14250 (home-page "https://github.com/aertslab/arboreto")
14251 (synopsis "Gene regulatory network inference using tree-based ensemble regressors")
14253 "This package implements scalable gene regulatory network inference using
14254 tree-based ensemble regressors.")
14255 (license license:bsd-3)))
14257 (define-public pyscenic
14264 (uri (git-reference
14265 (url "https://github.com/aertslab/pySCENIC")
14267 (file-name (git-file-name name version))
14270 "0pbmmr1zdb1vbbs6wx357s59d13pna6x03wq8blj6ckjws8bbq73"))))
14271 (build-system python-build-system)
14274 (modify-phases %standard-phases
14275 ;; Numba needs a writable dir to cache functions.
14276 (add-before 'check 'set-numba-cache-dir
14278 (setenv "NUMBA_CACHE_DIR" "/tmp")))
14281 (invoke "pytest" "-v"))))))
14283 (list python-ctxcore
14285 python-multiprocessing-on-dill
14308 python-scikit-learn))
14310 (list python-pytest))
14311 (home-page "https://scenic.aertslab.org/")
14312 (synopsis "Single-Cell regulatory network inference and clustering")
14314 "pySCENIC is a Python implementation of the SCENIC pipeline (Single-Cell
14315 rEgulatory Network Inference and Clustering) which enables biologists to infer
14316 transcription factors, gene regulatory networks and cell types from
14317 single-cell RNA-seq data.")
14318 (license license:gpl3+)))
14320 (define-public python-ikarus
14322 (name "python-ikarus")
14327 (uri (pypi-uri "ikarus" version))
14330 "086czpvj4yafz4vrq5rx2gy0bj2l8nzwnkk0gw8qvy4w133xjysy"))))
14331 (build-system python-build-system)
14335 (modify-phases %standard-phases
14336 ;; See https://github.com/BIMSBbioinfo/ikarus/issues/12
14337 (add-after 'unpack 'fix-issue-12
14339 (substitute* "ikarus/classifier.py"
14340 (("pyscenic.genesig") "ctxcore.genesig"))))
14341 ;; Numba needs a writable dir to cache functions.
14342 (add-before 'check 'set-numba-cache-dir
14344 (setenv "NUMBA_CACHE_DIR" "/tmp"))))))
14351 python-ctxcore ;because of issue 12
14353 (home-page "https://github.com/BIMSBbioinfo/ikarus")
14354 (synopsis "Machine learning classifier of tumor cells")
14356 "ikarus is a stepwise machine learning pipeline that tries to cope with a task
14357 of distinguishing tumor cells from normal cells. Leveraging multiple
14358 annotated single cell datasets it can be used to define a gene set specific to
14359 tumor cells. First, the latter gene set is used to rank cells and then to
14360 train a logistic classifier for the robust classification of tumor and normal
14361 cells. Finally, sensitivity is increased by propagating the cell labels based
14362 on a custom cell-cell network. ikarus is tested on multiple single cell
14363 datasets to ascertain that it achieves high sensitivity and specificity in
14364 multiple experimental contexts.")
14365 (license license:expat)))
14367 (define-public vbz-compression
14369 (name "vbz-compression")
14374 (uri (git-reference
14375 (url "https://github.com/nanoporetech/vbz_compression/")
14376 (commit (string-append "v" version))
14377 ;; We include the streamvbyte sources
14378 (recursive? #true)))
14379 (file-name (git-file-name name version))
14382 "1c6wsrnw03vsc5cfp2rdakly5xy55m9chjmy6v685yapdwirdky0"))))
14383 (build-system cmake-build-system)
14385 `(#:configure-flags
14386 '("-DENABLE_CONAN=OFF"
14387 ;; Python things aren't even installed, so we might as well
14388 ;; disable building them.
14389 "-DENABLE_PYTHON=OFF")))
14391 (list ;("hdf5" ,hdf5-1.10)
14394 (list googlebenchmark))
14395 (home-page "https://github.com/nanoporetech/vbz_compression/")
14396 (synopsis "VBZ compression plugin for nanopore signal data")
14398 "VBZ Compression uses variable byte integer encoding to compress
14399 nanopore signal data. The performance of VBZ is achieved by taking
14400 advantage of the properties of the raw signal and therefore is most
14401 effective when applied to the signal dataset.")
14402 (license license:mpl2.0)))
14404 (define-public python-ont-fast5-api
14406 (name "python-ont-fast5-api")
14411 (uri (git-reference
14412 (url "https://github.com/nanoporetech/ont_fast5_api")
14413 (commit (string-append "release_" version))))
14414 (file-name (git-file-name name version))
14417 "01hj4751j424lzic2sc4bz1f8w7i7fpkjpy3rgghdyl5lyfyb4s4"))
14418 (modules '((guix build utils)))
14420 '(delete-file-recursively "ont_fast5_api/vbz_plugin"))))
14421 (build-system python-build-system)
14424 (modify-phases %standard-phases
14425 (add-after 'unpack 'copy-plugin
14426 (lambda* (#:key inputs #:allow-other-keys)
14427 (mkdir-p "ont_fast5_api/vbz_plugin/")
14428 (install-file (string-append
14429 (assoc-ref inputs "vbz-compression")
14430 "/hdf5/lib/plugin/libvbz_hdf_plugin.so")
14431 "ont_fast5_api/vbz_plugin/"))))))
14433 (list vbz-compression))
14435 (list python-numpy python-h5py python-packaging python-progressbar33))
14436 (home-page "https://github.com/nanoporetech/ont_fast5_api")
14437 (synopsis "Interface to HDF5 files of the Oxford Nanopore fast5 file format")
14439 "This package provides a concrete implementation of the fast5 file schema
14440 using the generic @code{h5py} library, plain-named methods to interact with
14441 and reflect the fast5 file schema, and tools to convert between
14442 @code{multi_read} and @code{single_read} formats.")
14443 (license license:mpl2.0)))
14445 (define-public tbsp
14446 (let ((commit "dc30c03868233c5504299c9cb0d7b2064ba9cb41")
14450 (version (git-version "1.0.0" revision commit))
14454 (uri (git-reference
14455 (url "https://github.com/phoenixding/tbsp")
14457 (file-name (git-file-name name version))
14460 "1im0bik2hxkcb7jzkcxp5nqb30hd8lfraxml6i5ik52j6z3qqln1"))))
14461 (build-system python-build-system)
14463 '(#:tests? #f ; no tests included
14465 (modify-phases %standard-phases
14466 (add-after 'unpack 'relax-requirements
14468 (substitute* "setup.py"
14469 ((", <3.0") ""))))))) ; matplotlib
14471 (list python-matplotlib
14475 python-biopython-1.73
14476 python-scikit-learn
14478 (home-page "https://github.com/phoenixding/tbsp/")
14479 (synopsis "SNP-based trajectory inference")
14481 "Several studies focus on the inference of developmental and response
14482 trajectories from single cell RNA-Seq (scRNA-Seq) data. A number of
14483 computational methods, often referred to as pseudo-time ordering, have been
14484 developed for this task. CRISPR has also been used to reconstruct lineage
14485 trees by inserting random mutations. The tbsp package implements an
14486 alternative method to detect significant, cell type specific sequence
14487 mutations from scRNA-Seq data.")
14488 (license license:expat))))
14490 (define-public tabixpp
14496 (uri (git-reference
14497 (url "https://github.com/ekg/tabixpp")
14498 (commit (string-append "v" version))))
14499 (file-name (git-file-name name version))
14501 (base32 "1k2a3vbq96ic4lw72iwp5s3mwwc4xhdffjj584yn6l9637q9j1yd"))
14502 (modules '((guix build utils)))
14505 (delete-file-recursively "htslib")))))
14506 (build-system gnu-build-system)
14508 (list bzip2 htslib xz zlib))
14510 (list #:make-flags #~(list (string-append "CC=" #$(cc-for-target))
14511 (string-append "CXX=" #$(cxx-for-target))
14513 (string-append "HTS_LIB="
14514 (search-input-file %build-inputs
14517 #:tests? #f ; There are no tests to run.
14519 #~(modify-phases %standard-phases
14520 (delete 'configure) ; There is no configure phase.
14521 ;; Build shared and static libraries.
14522 (add-after 'build 'build-libraries
14523 (lambda* (#:key inputs #:allow-other-keys)
14524 (invoke #$(cxx-for-target)
14525 "-shared" "-o" "libtabixpp.so" "tabix.o" "-lhts")
14526 (invoke #$(ar-for-target) "rcs" "libtabixpp.a" "tabix.o")))
14528 (lambda* (#:key outputs #:allow-other-keys)
14529 (let* ((out (assoc-ref outputs "out"))
14530 (lib (string-append out "/lib"))
14531 (bin (string-append out "/bin")))
14532 (install-file "tabix++" bin)
14533 (install-file "libtabixpp.so" lib)
14534 (install-file "libtabixpp.a" lib)
14535 (install-file "tabix.hpp" (string-append out "/include"))
14536 (mkdir-p (string-append lib "/pkgconfig"))
14537 (with-output-to-file (string-append lib "/pkgconfig/tabixpp.pc")
14539 (format #t "prefix=~a~@
14540 exec_prefix=${prefix}~@
14541 libdir=${exec_prefix}/lib~@
14542 includedir=${prefix}/include~@
14547 Description: C++ wrapper around tabix project~@
14548 Libs: -L${libdir} -ltabixpp~@
14549 Cflags: -I${includedir}~%"
14550 out #$version)))))))))
14551 (home-page "https://github.com/ekg/tabixpp")
14552 (synopsis "C++ wrapper around tabix project")
14553 (description "This is a C++ wrapper around the Tabix project which abstracts
14554 some of the details of opening and jumping in tabix-indexed files.")
14555 (license license:expat)))
14557 (define-public smithwaterman
14558 (let ((commit "2610e259611ae4cde8f03c72499d28f03f6d38a7"))
14560 (name "smithwaterman")
14561 (version (git-version "0.0.0" "2" commit))
14564 (uri (git-reference
14565 (url "https://github.com/ekg/smithwaterman/")
14567 (file-name (git-file-name name version))
14569 (base32 "0i9d8zrxpiracw3mxzd9siybpy62p06rqz9mc2w93arajgbk45bs"))))
14570 (build-system gnu-build-system)
14573 #:tests? #f ; There are no tests to run.
14575 #~(list (string-append "CXX=" #$(cxx-for-target))
14578 #~(modify-phases %standard-phases
14579 (delete 'configure) ; There is no configure phase.
14580 (add-after 'unpack 'patch-source
14582 (substitute* "Makefile"
14583 (("-c ") "-c -fPIC "))
14584 #$@(if (%current-target-system)
14585 #~((substitute* "Makefile"
14587 (string-append "\t" #$(%current-target-system) "-ld"))
14589 (string-append "\t" #$(%current-target-system) "-ar"))))
14591 (add-after 'build 'build-dynamic
14593 (invoke #$(cxx-for-target)
14594 "-shared" "-o" "libsmithwaterman.so"
14595 "smithwaterman.o" "SmithWatermanGotoh.o"
14596 "disorder.o" "BandedSmithWaterman.o"
14597 "LeftAlign.o" "Repeats.o" "IndelAllele.o")))
14599 (lambda* (#:key outputs #:allow-other-keys)
14600 (let* ((out (assoc-ref outputs "out"))
14601 (bin (string-append out "/bin"))
14602 (lib (string-append out "/lib")))
14603 (install-file "smithwaterman" bin)
14606 (install-file file (string-append out "/include/smithwaterman")))
14607 (find-files "." "\\.h$"))
14608 (install-file "libsmithwaterman.so" lib)
14609 (install-file "libsw.a" lib)
14610 (mkdir-p (string-append lib "/pkgconfig"))
14611 (with-output-to-file (string-append lib "/pkgconfig/smithwaterman.pc")
14613 (format #t "prefix=~a~@
14614 exec_prefix=${prefix}~@
14615 libdir=${exec_prefix}/lib~@
14616 includedir=${prefix}/include/smithwaterman~@
14619 Name: smithwaterman~@
14621 Description: smith-waterman-gotoh alignment algorithm~@
14622 Libs: -L${libdir} -lsmithwaterman~@
14623 Cflags: -I${includedir}~%"
14624 out #$version)))))))))
14625 (home-page "https://github.com/ekg/smithwaterman")
14626 (synopsis "Implementation of the Smith-Waterman algorithm")
14627 (description "Implementation of the Smith-Waterman algorithm.")
14628 ;; The licensing terms are unclear: https://github.com/ekg/smithwaterman/issues/9.
14629 (license (list license:gpl2 license:expat)))))
14631 (define-public sylamer
14637 (uri (git-reference
14638 (url "https://github.com/micans/sylamer/")
14639 (commit "aa75c3584797c0c15f860addb645f7bc1dd7627d")))
14640 (file-name (git-file-name name version))
14643 "1ddiwlrdghhb4574rvfw0brjp9gs5l6nfsy82h0m4mvz1dr3gkj5"))))
14644 (build-system gnu-build-system)
14647 #:tests? #f ; no test target
14649 #~(list (string-append "GSLPREFIX=" #$(this-package-input "gsl")))
14651 '(modify-phases %standard-phases
14652 (replace 'configure
14653 (lambda* (#:key outputs #:allow-other-keys)
14654 (substitute* "Makefile"
14655 (("cp sylamer \\$\\(HOME\\)/local/bin")
14656 (string-append "install -D -t " (assoc-ref outputs "out")
14658 (install-file "Makefile" "src")
14660 (inputs (list gsl zlib))
14661 (home-page "https://www.ebi.ac.uk/research/enright/software/sylamer")
14662 (synopsis "Asses microRNA binding and siRNA off-target effects")
14663 (description "Sylamer is a system for finding significantly over or
14664 under-represented words in sequences according to a sorted gene list.
14665 Typically it is used to find significant enrichment or depletion of microRNA
14666 or siRNA seed sequences from microarray expression data. Sylamer is extremely
14667 fast and can be applied to genome-wide datasets with ease. Results are
14668 plotted in terms of a significance landscape plot. These plots show
14669 significance profiles for each word studied across the sorted genelist.")
14670 (license license:gpl3+)))
14672 (define-public multichoose
14674 (name "multichoose")
14678 (uri (git-reference
14679 (url "https://github.com/ekg/multichoose/")
14680 (commit (string-append "v" version))))
14681 (file-name (git-file-name name version))
14683 (base32 "0ci5fqvmpamwgxvmyd79ygj6n3bnbl3vc7b6h1sxz58186sm3pfs"))))
14684 (build-system gnu-build-system)
14686 `(#:tests? #f ; Tests require node.
14688 (modify-phases %standard-phases
14689 (delete 'configure) ; There is no configure phase.
14691 (lambda* (#:key outputs #:allow-other-keys)
14692 (let* ((out (assoc-ref outputs "out"))
14693 (bin (string-append out "/bin"))
14694 (include (string-append out "/include")))
14695 ;; TODO: There are Python modules for these programs too.
14696 (install-file "multichoose" bin)
14697 (install-file "multipermute" bin)
14698 (install-file "multichoose.h" include)
14699 (install-file "multipermute.h" include))
14701 (home-page "https://github.com/ekg/multichoose")
14702 (synopsis "Efficient loopless multiset combination generation algorithm")
14703 (description "This library implements an efficient loopless multiset
14704 combination generation algorithm which is (approximately) described in
14705 \"Loopless algorithms for generating permutations, combinations, and other
14706 combinatorial configurations.\", G. Ehrlich - Journal of the ACM (JACM),
14707 1973. (Algorithm 7.)")
14708 (license license:expat)))
14710 (define-public fsom
14711 (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3"))
14714 (version (git-version "0.0.0" "1" commit))
14717 (uri (git-reference
14718 (url "https://github.com/ekg/fsom/")
14720 (file-name (git-file-name name version))
14722 (base32 "0gw1lpvr812pywg9y546x0h1hhj261xwls41r6kqhddjlrcjc0pi"))))
14723 (build-system gnu-build-system)
14725 `(#:tests? #f ; There are no tests to run.
14727 (modify-phases %standard-phases
14728 (delete 'configure) ; There is no configure phase.
14730 (lambda* (#:key outputs #:allow-other-keys)
14731 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
14732 (install-file "fsom" bin)))))))
14735 (home-page "https://github.com/ekg/fsom")
14736 (synopsis "Manage SOM (Self-Organizing Maps) neural networks")
14737 (description "A tiny C library for managing SOM (Self-Organizing Maps)
14739 (license license:gpl3))))
14741 (define-public fastahack
14747 (uri (git-reference
14748 (url "https://github.com/ekg/fastahack/")
14749 (commit (string-append "v" version))))
14750 (file-name (git-file-name name version))
14752 (base32 "0rp1blskhzxf7vbh253ibpxbgl9wwgyzf1wbkxndi08d3j4vcss9"))))
14753 (build-system gnu-build-system)
14755 (list #:make-flags #~(list (string-append "CXX=" #$(cxx-for-target)))
14756 ;; Unclear how to run tests: https://github.com/ekg/fastahack/issues/15
14759 #~(modify-phases %standard-phases
14760 (delete 'configure) ; There is no configure phase.
14761 (add-after 'unpack 'patch-source
14763 (substitute* "Makefile"
14764 (("-c ") "-c -fPIC "))))
14765 (add-after 'build 'build-dynamic
14767 (invoke #$(cxx-for-target)
14768 "-shared" "-o" "libfastahack.so"
14769 "Fasta.o" "FastaHack.o" "split.o" "disorder.o")))
14771 (lambda* (#:key outputs #:allow-other-keys)
14772 (let* ((out (assoc-ref outputs "out"))
14773 (lib (string-append out "/lib"))
14774 (bin (string-append out "/bin")))
14775 (mkdir-p (string-append out "/include/fastahack"))
14778 (install-file file (string-append out "/include/fastahack")))
14779 (find-files "." "\\.h$"))
14780 (install-file "fastahack" bin)
14781 (install-file "libfastahack.so" lib)
14782 (mkdir-p (string-append lib "/pkgconfig"))
14783 (with-output-to-file (string-append lib "/pkgconfig/fastahack.pc")
14785 (format #t "prefix=~a~@
14786 exec_prefix=${prefix}~@
14787 libdir=${exec_prefix}/lib~@
14788 includedir=${prefix}/include/fastahack~@
14793 Description: Indexing and sequence extraction from FASTA files~@
14794 Libs: -L${libdir} -lfastahack~@
14795 Cflags: -I${includedir}~%"
14796 out #$version)))))))))
14797 (home-page "https://github.com/ekg/fastahack")
14798 (synopsis "Indexing and sequence extraction from FASTA files")
14799 (description "Fastahack is a small application for indexing and
14800 extracting sequences and subsequences from FASTA files. The included library
14801 provides a FASTA reader and indexer that can be embedded into applications
14802 which would benefit from directly reading subsequences from FASTA files. The
14803 library automatically handles index file generation and use.")
14804 (license (list license:expat license:gpl2))))
14806 (define-public vcflib
14813 (uri (git-reference
14814 (url "https://github.com/vcflib/vcflib")
14815 (commit (string-append "v" version))))
14816 (file-name (git-file-name name version))
14818 (base32 "1r7pnajg997zdjkf1b38m14v0zqnfx52w7nbldwh1xpbpahb1hjh"))
14819 (modules '((guix build utils)))
14822 (substitute* "CMakeLists.txt"
14823 ((".*fastahack.*") "")
14824 ((".*smithwaterman.*") "")
14825 (("(pkg_check_modules\\(TABIXPP)" text)
14827 "pkg_check_modules(FASTAHACK REQUIRED fastahack)\n"
14828 "pkg_check_modules(SMITHWATERMAN REQUIRED smithwaterman)\n"
14830 (("\\$\\{TABIXPP_LIBRARIES\\}" text)
14831 (string-append "${FASTAHACK_LIBRARIES} "
14832 "${SMITHWATERMAN_LIBRARIES} "
14834 (substitute* (find-files "." "\\.(h|c)(pp)?$")
14835 (("\"SmithWatermanGotoh.h\"") "<smithwaterman/SmithWatermanGotoh.h>")
14836 (("\"convert.h\"") "<smithwaterman/convert.h>")
14837 (("\"disorder.h\"") "<smithwaterman/disorder.h>")
14838 (("Fasta.h") "fastahack/Fasta.h"))
14839 (for-each delete-file-recursively
14840 '("fastahack" "filevercmp" "fsom" "googletest" "intervaltree"
14841 "libVCFH" "multichoose" "smithwaterman"))))))
14842 (build-system cmake-build-system)
14854 `(("pkg-config" ,pkg-config)
14856 ;; This package builds against the .o files so we need to extract the source.
14857 ("filevercmp-src" ,(package-source filevercmp))
14858 ("fsom-src" ,(package-source fsom))
14859 ("intervaltree-src" ,(package-source intervaltree))
14860 ("multichoose-src" ,(package-source multichoose))))
14862 (list #:configure-flags
14863 #~(list (string-append
14864 "-DPKG_CONFIG_EXECUTABLE="
14866 %build-inputs (string-append
14867 "/bin/" #$(pkg-config-for-target)))))
14868 #:tests? #f ; no tests
14870 #~(modify-phases %standard-phases
14871 (add-after 'unpack 'build-shared-library
14873 (substitute* "CMakeLists.txt"
14874 (("vcflib STATIC") "vcflib SHARED"))
14875 (substitute* "test/Makefile"
14876 (("libvcflib.a") "libvcflib.so"))))
14877 (add-after 'unpack 'unpack-submodule-sources
14878 (lambda* (#:key inputs native-inputs #:allow-other-keys)
14879 (let ((unpack (lambda (source target)
14881 (with-directory-excursion target
14882 (let ((source (or (assoc-ref inputs source)
14883 (assoc-ref native-inputs source))))
14884 (if (file-is-directory? source)
14885 (copy-recursively source ".")
14886 (invoke "tar" "xvf"
14888 "--strip-components=1")))))))
14890 (unpack "filevercmp-src" "filevercmp")
14891 (unpack "fsom-src" "fsom")
14892 (unpack "intervaltree-src" "intervaltree")
14893 (unpack "multichoose-src" "multichoose")))))
14894 ;; This pkg-config file is provided by other distributions.
14895 (add-after 'install 'install-pkg-config-file
14896 (lambda* (#:key outputs #:allow-other-keys)
14897 (let* ((out (assoc-ref outputs "out"))
14898 (pkgconfig (string-append out "/lib/pkgconfig")))
14899 (mkdir-p pkgconfig)
14900 (with-output-to-file (string-append pkgconfig "/vcflib.pc")
14902 (format #t "prefix=~a~@
14903 exec_prefix=${prefix}~@
14904 libdir=${exec_prefix}/lib~@
14905 includedir=${prefix}/include~@
14909 Requires: smithwaterman, fastahack, tabixpp~@
14910 Description: C++ library for parsing and manipulating VCF files~@
14911 Libs: -L${libdir} -lvcflib~@
14912 Cflags: -I${includedir}~%"
14913 out #$version)))))))))
14914 (home-page "https://github.com/vcflib/vcflib/")
14915 (synopsis "Library for parsing and manipulating VCF files")
14916 (description "Vcflib provides methods to manipulate and interpret
14917 sequence variation as it can be described by VCF. It is both an API for parsing
14918 and operating on records of genomic variation as it can be described by the VCF
14919 format, and a collection of command-line utilities for executing complex
14920 manipulations on VCF files.")
14921 (license license:expat)))
14923 (define-public freebayes
14929 (uri (git-reference
14930 (url "https://github.com/freebayes/freebayes")
14931 (commit (string-append "v" version))))
14932 (file-name (git-file-name name version))
14934 (base32 "1l0z88gq57kva677a6xri5g9k2d9h9lk5yk1q2xmq64wqhv7dvc3"))
14935 (modules '((guix build utils)))
14938 (delete-file-recursively "contrib/htslib")
14940 (build-system meson-build-system)
14942 (list fastahack htslib smithwaterman tabixpp vcflib))
14944 `(("bash-tap" ,bash-tap)
14946 ("grep" ,grep) ; Built with perl support.
14947 ("parallel" ,parallel)
14949 ("pkg-config" ,pkg-config)
14951 ("samtools" ,samtools)
14953 ;; This submodule is needed to run the tests.
14954 ("test-simple-bash-src"
14957 (uri (git-reference
14958 (url "https://github.com/ingydotnet/test-simple-bash/")
14959 (commit "124673ff204b01c8e96b7fc9f9b32ee35d898acc")))
14960 (file-name "test-simple-bash-src-checkout")
14962 (base32 "043plp6z0x9yf7mdpky1fw7zcpwn1p47px95w9mh16603zqqqpga"))))))
14965 (modify-phases %standard-phases
14966 (add-after 'unpack 'patch-source
14967 (lambda* (#:key inputs #:allow-other-keys)
14968 (let ((bash-tap (assoc-ref inputs "bash-tap")))
14969 (substitute* (find-files "test/t")
14970 (("BASH_TAP_ROOT=bash-tap")
14971 (string-append "BASH_TAP_ROOT=" bash-tap "/bin"))
14972 (("bash-tap/bash-tap-bootstrap")
14973 (string-append bash-tap "/bin/bash-tap-bootstrap"))
14974 (("source.*bash-tap-bootstrap")
14975 (string-append "source " bash-tap "/bin/bash-tap-bootstrap")))
14976 (substitute* '("src/BedReader.cpp"
14978 (("../intervaltree/IntervalTree.h") "IntervalTree.h"))
14979 (substitute* "meson.build"
14980 ;; Our pkg-config file is vcflib.pc
14981 (("libvcflib") "vcflib")
14982 (("vcflib_inc,") ""))
14984 (add-after 'unpack 'unpack-submodule-sources
14985 (lambda* (#:key inputs #:allow-other-keys)
14986 (mkdir-p "test/test-simple-bash")
14987 (copy-recursively (assoc-ref inputs "test-simple-bash-src")
14988 "test/test-simple-bash")
14990 ;; The slow tests take longer than the specified timeout.
14991 ,@(if (any (cute string=? <> (%current-system))
14992 '("armhf-linux" "aarch64-linux"))
14994 (lambda* (#:key tests? #:allow-other-keys)
14996 (invoke "meson" "test" "--timeout-multiplier" "5"))
14999 (home-page "https://github.com/freebayes/freebayes")
15000 (synopsis "Haplotype-based variant detector")
15001 (description "FreeBayes is a Bayesian genetic variant detector designed to
15002 find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
15003 indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
15004 complex events (composite insertion and substitution events) smaller than the
15005 length of a short-read sequencing alignment.")
15006 (license license:expat)))
15008 (define-public samblaster
15010 (name "samblaster")
15014 (uri (git-reference
15015 (url "https://github.com/GregoryFaust/samblaster")
15016 (commit (string-append "v." version))))
15017 (file-name (git-file-name name version))
15020 "0iv2ddfw8363vb2x8gr3p8g88whb6mb9m0pf71i2cqsbv6jghap7"))))
15021 (build-system gnu-build-system)
15023 `(#:tests? #f ; there are none
15025 (modify-phases %standard-phases
15026 (delete 'configure) ; There is no configure phase.
15028 (lambda* (#:key outputs #:allow-other-keys)
15029 (install-file "samblaster"
15030 (string-append (assoc-ref outputs "out") "/bin"))
15032 (home-page "https://github.com/GregoryFaust/samblaster")
15033 (synopsis "Mark duplicates in paired-end SAM files")
15034 (description "Samblaster is a fast and flexible program for marking
15035 duplicates in read-id grouped paired-end SAM files. It can also optionally
15036 output discordant read pairs and/or split read mappings to separate SAM files,
15037 and/or unmapped/clipped reads to a separate FASTQ file. When marking
15038 duplicates, samblaster will require approximately 20MB of memory per 1M read
15040 (license license:expat)))
15042 (define-public r-velocyto
15043 (let ((commit "d7790346cb99f49ab9c2b23ba70dcf9d2c9fc350")
15046 (name "r-velocyto")
15047 (version (git-version "0.6" revision commit))
15051 (uri (git-reference
15052 (url "https://github.com/velocyto-team/velocyto.R")
15054 (file-name (git-file-name name version))
15057 "16wqf70j7rd7pay2q513iyz12i8n9vrpg1bisah4lddbcpx5dz1n"))))
15058 (build-system r-build-system)
15068 ;; Suggested packages
15074 r-genomicalignments
15078 (home-page "https://velocyto.org")
15079 (synopsis "RNA velocity estimation in R")
15081 "This package provides basic routines for estimation of gene-specific
15082 transcriptional derivatives and visualization of the resulting velocity
15084 (license license:gpl3))))
15086 (define-public methyldackel
15088 (name "methyldackel")
15092 (uri (git-reference
15093 (url "https://github.com/dpryan79/MethylDackel")
15095 (file-name (git-file-name name version))
15098 "1sfhf2ap75qxpnmy1ifgmxqs18rq8mah9mcgkby73vc6h0sw99ws"))))
15099 (build-system gnu-build-system)
15101 `(#:test-target "test"
15105 (string-append "prefix=" #$output "/bin/"))
15107 (modify-phases %standard-phases
15108 (replace 'configure
15109 (lambda* (#:key outputs #:allow-other-keys)
15110 (substitute* "Makefile"
15111 (("-lhts ") "-lhts -lBigWig ")
15112 (("install MethylDackel \\$\\(prefix\\)" match)
15113 (string-append "install -d $(prefix); " match))))))))
15115 (list curl ; XXX: needed by libbigwig
15116 htslib-1.9 libbigwig zlib))
15117 ;; Needed for tests
15119 `(("python" ,python-wrapper)))
15120 (home-page "https://github.com/dpryan79/MethylDackel")
15121 (synopsis "Universal methylation extractor for BS-seq experiments")
15123 "MethylDackel will process a coordinate-sorted and indexed BAM or CRAM
15124 file containing some form of BS-seq alignments and extract per-base
15125 methylation metrics from them. MethylDackel requires an indexed fasta file
15126 containing the reference genome as well.")
15127 ;; See https://github.com/dpryan79/MethylDackel/issues/85
15128 (license license:expat)))
15130 ;; This package bundles PCRE 8.02 and cannot be built with the current
15132 (define-public phast
15138 (uri (git-reference
15139 (url "https://github.com/CshlSiepelLab/phast")
15140 (commit (string-append "v" version))))
15141 (file-name (git-file-name name version))
15144 "10lpbllvny923jjbbyrpxahhd1m5h7sbj9gx7rd123rg10mlidki"))))
15145 (build-system gnu-build-system)
15149 (string-append "DESTDIR=" #$output))
15151 (modify-phases %standard-phases
15152 (replace 'configure
15153 (lambda* (#:key inputs outputs #:allow-other-keys)
15155 (substitute* "test/Makefile"
15157 (substitute* "Makefile"
15158 (("CLAPACKPATH=/usr/lib")
15159 (string-append "CLAPACKPATH="
15160 (assoc-ref inputs "clapack") "/lib")))
15161 ;; Renaming the libraries is not necessary with our version of
15163 (substitute* "src/lib/Makefile"
15164 (("ifdef CLAPACKPATH") "ifdef UNNECESSARY"))
15165 (substitute* "src/make-include.mk"
15166 (("-lblaswr") "-lblas")
15167 (("-ltmg") "-ltmglib")
15168 (("liblapack.a") "liblapack.so")
15169 (("libblas.a") "libblas.so")
15170 (("libf2c.a") "libf2c.so"))
15171 (substitute* "src/Makefile"
15172 (("/opt") "/share")
15178 (string-append (getcwd) "/bin:" (getenv "PATH")))
15179 ;; Disable broken test
15180 (substitute* "test/Makefile"
15181 ((".*if.*hmrc_summary" m) (string-append "#" m)))
15182 ;; Only run the msa_view tests because the others fail for
15183 ;; unknown reasons.
15184 (invoke "make" "-C" "test" "msa_view"))))))
15189 (home-page "http://compgen.cshl.edu/phast/")
15190 (synopsis "Phylogenetic analysis with space/time models")
15192 "Phylogenetic Analysis with Space/Time models (PHAST) is a collection of
15193 command-line programs and supporting libraries for comparative and
15194 evolutionary genomics. Best known as the search engine behind the
15195 Conservation tracks in the University of California, Santa Cruz (UCSC) Genome
15196 Browser, PHAST also includes several tools for phylogenetic modeling,
15197 functional element identification, as well as utilities for manipulating
15198 alignments, trees and genomic annotations.")
15199 (license license:bsd-3)))
15201 (define-public python-gffutils
15203 (name "python-gffutils")
15208 (uri (git-reference
15209 (url "https://github.com/daler/gffutils")
15210 (commit (string-append "v" version))))
15211 (file-name (git-file-name name version))
15214 "1gkzk7ps6w3ai2r81js9s9bzpba0jmxychnd2da6n9ggdnf2xzqz"))))
15215 (build-system python-build-system)
15218 (modify-phases %standard-phases
15220 (lambda* (#:key tests? #:allow-other-keys)
15222 ;; Tests need to access the HOME directory
15223 (setenv "HOME" "/tmp")
15224 (invoke "nosetests" "-a" "!slow")))))))
15226 (list python-argcomplete
15234 (list python-nose))
15235 (home-page "https://github.com/daler/gffutils")
15236 (synopsis "Tool for manipulation of GFF and GTF files")
15238 "python-gffutils is a Python package for working with and manipulating
15239 the GFF and GTF format files typically used for genomic annotations. The
15240 files are loaded into a SQLite database, allowing much more complex
15241 manipulation of hierarchical features (e.g., genes, transcripts, and exons)
15242 than is possible with plain-text methods alone.")
15243 (license license:expat)))
15245 (define-public indelfixer
15247 (name "indelfixer")
15251 (uri (git-reference
15252 (url "https://github.com/cbg-ethz/InDelFixer/")
15253 (commit (string-append "v" version))))
15254 (file-name (git-file-name name version))
15257 "10ak05x8i1bx2p7rriv2rglqg1wr7c8wrhjrqlq1wm7ka99w8i79"))))
15258 (build-system ant-build-system)
15260 `(#:jar-name "InDelFixer.jar"
15261 #:source-dir "src/main/java"
15262 #:test-dir "src/test"))
15264 `(("java-commons-lang2" ,java-commons-lang)
15265 ("java-args4j" ,java-args4j)))
15268 (home-page "https://github.com/cbg-ethz/InDelFixer/")
15269 (synopsis "Iterative and sensitive NGS sequence aligner")
15270 (description "InDelFixer is a sensitive aligner for 454, Illumina and
15271 PacBio data, employing a full Smith-Waterman alignment against a reference.
15272 This Java command line application aligns Next-Generation Sequencing (NGS) and
15273 third-generation reads to a set of reference sequences, by a prior fast k-mer
15274 matching and removes indels, causing frame shifts. In addition, only a
15275 specific region can be considered. An iterative refinement of the alignment
15276 can be performed, by alignment against the consensus sequence with wobbles.
15277 The output is in SAM format.")
15278 (license license:gpl3+)))
15280 (define-public libsbml
15286 (uri (string-append "mirror://sourceforge/sbml/libsbml/"
15287 version "/stable/libSBML-"
15288 version "-core-src.tar.gz"))
15291 "0slkagrk3nfi2qsksv6b1brj6zhx4bj4bkib2sdycvrcd10ql2lh"))))
15292 (build-system cmake-build-system)
15294 `(#:test-target "test"
15296 ,#~(list "-DWITH_CHECK=ON"
15297 (string-append "-DLIBXML_LIBRARY="
15298 #$(this-package-input "libxml2")
15300 (string-append "-DLIBXML_INCLUDE_DIR="
15301 #$(this-package-input "libxml2")
15302 "/include/libxml2"))))
15307 (home-page "http://sbml.org/Software/libSBML")
15308 (synopsis "Process SBML files and data streams")
15309 (description "LibSBML is a library to help you read, write, manipulate,
15310 translate, and validate SBML files and data streams. The @dfn{Systems Biology
15311 Markup Language} (SBML) is an interchange format for computer models of
15312 biological processes. SBML is useful for models of metabolism, cell
15313 signaling, and more. It continues to be evolved and expanded by an
15314 international community.")
15315 (license license:lgpl2.1+)))
15317 (define-public kraken2
15323 (uri (git-reference
15324 (url "https://github.com/DerrickWood/kraken2")
15325 (commit (string-append "v" version))))
15326 (file-name (git-file-name name version))
15329 "1pl6ml1ldg2hnhy8ps56q0fl1wq3g91qkhinj6pb4yjjhv1rxsjf"))))
15330 (build-system gnu-build-system)
15332 `(#:tests? #false ; there are none
15334 ,#~(list "-C" "src"
15335 (string-append "KRAKEN2_DIR=" #$output "/bin"))
15337 (modify-phases %standard-phases
15338 (delete 'configure)
15339 (add-before 'install 'install-scripts
15340 (lambda* (#:key outputs #:allow-other-keys)
15341 (let* ((bin (string-append (assoc-ref outputs "out") "/bin"))
15342 (replacements `(("KRAKEN2_DIR" . ,bin)
15343 ("VERSION" . ,,version))))
15346 (with-directory-excursion "scripts"
15347 (let ((scripts (find-files "." ".*")))
15348 (substitute* scripts
15349 (("#####=([^=]+)=#####" _ key)
15350 (or (assoc-ref replacements key)
15351 (error (format #false "unknown key: ~a~%" key)))))
15352 (substitute* "kraken2"
15353 (("compression_program = \"bzip2\"")
15354 (string-append "compression_program = \""
15357 (("compression_program = \"gzip\"")
15358 (string-append "compression_program = \""
15361 (substitute* '("download_genomic_library.sh"
15362 "download_taxonomy.sh"
15363 "16S_gg_installation.sh"
15364 "16S_silva_installation.sh"
15365 "16S_rdp_installation.sh")
15366 (("wget") (which "wget")))
15367 (substitute* '("download_taxonomy.sh"
15368 "download_genomic_library.sh"
15369 "rsync_from_ncbi.pl")
15371 (string-append (which "rsync") " -")))
15372 (substitute* "mask_low_complexity.sh"
15373 (("which") (which "which")))
15374 (substitute* '("mask_low_complexity.sh"
15375 "download_genomic_library.sh"
15376 "16S_silva_installation.sh")
15378 (string-append (which "sed") " -e ")))
15379 (substitute* '("rsync_from_ncbi.pl"
15380 "16S_rdp_installation.sh"
15381 "16S_silva_installation.sh"
15382 "16S_gg_installation.sh"
15383 "download_taxonomy.sh"
15384 "download_genomic_library.sh")
15385 (("gunzip") (which "gunzip")))
15386 (for-each (lambda (script)
15387 (chmod script #o555)
15388 (install-file script bin))
15397 (home-page "https://github.com/DerrickWood/kraken2")
15398 (synopsis "Taxonomic sequence classification system")
15399 (description "Kraken is a taxonomic sequence classifier that assigns
15400 taxonomic labels to DNA sequences. Kraken examines the k-mers within a query
15401 sequence and uses the information within those k-mers to query a
15402 database. That database maps k-mers to the lowest common ancestor (LCA) of all
15403 genomes known to contain a given k-mer.")
15404 (license license:expat)))
15406 (define-public lofreq
15412 (uri (git-reference
15413 (url "https://github.com/CSB5/lofreq")
15414 (commit (string-append "v" version))))
15415 (file-name (git-file-name name version))
15418 "0qssrn3mgjak7df6iqc1rljqd3g3a5syvg0lsv4vds43s3fq23bl"))))
15419 (build-system gnu-build-system)
15421 '(#:test-target "bug-tests"
15422 #:tests? #false)) ; test data are not included
15424 `(("htslib" ,htslib)
15425 ("python" ,python-wrapper)
15428 (list autoconf automake which))
15429 (home-page "https://csb5.github.io/lofreq/")
15430 (synopsis "Sensitive variant calling from sequencing data")
15431 (description "LoFreq is a fast and sensitive variant-caller for inferring
15432 SNVs and indels from next-generation sequencing data. It makes full use of
15433 base-call qualities and other sources of errors inherent in
15434 sequencing (e.g. mapping or base/indel alignment uncertainty), which are
15435 usually ignored by other methods or only used for filtering.")
15436 (license license:expat)))
15438 (define-public ivar
15444 (uri (git-reference
15445 (url "https://github.com/andersen-lab/ivar")
15446 (commit (string-append "v" version))))
15447 (file-name (git-file-name name version))
15450 "044xa0hm3b8fga64csrdx05ih8w7kwmvcdrdrhkg8j11ml4bi4xv"))))
15451 (build-system gnu-build-system)
15452 (arguments `(#:parallel-tests? #false)) ; not supported
15454 (list htslib zlib))
15456 (list autoconf automake))
15457 (home-page "https://andersen-lab.github.io/ivar/html/")
15458 (synopsis "Tools for amplicon-based sequencing")
15459 (description "iVar is a computational package that contains functions
15460 broadly useful for viral amplicon-based sequencing.")
15461 (license license:gpl3+)))
15463 (define-public python-pyliftover
15465 (name "python-pyliftover")
15467 ;; The version of pypi does not include test data.
15470 (uri (git-reference
15471 (url "https://github.com/konstantint/pyliftover")
15473 (file-name (git-file-name name version))
15476 "1j8jp9iynv2l3jv5pr0pn0p3azlama1bqg233piglzm6bqh3m2m3"))))
15477 (build-system python-build-system)
15478 (arguments `(#:tests? #false)) ; the tests access the web
15480 (list python-pytest))
15481 (home-page "https://github.com/konstantint/pyliftover")
15482 (synopsis "Python implementation of UCSC liftOver genome coordinate conversion")
15484 "PyLiftover is a library for quick and easy conversion of genomic (point)
15485 coordinates between different assemblies.")
15486 (license license:expat)))
15488 (define-public python-cgatcore
15490 (name "python-cgatcore")
15492 ;; The version of pypi does not include test data.
15495 (uri (git-reference
15496 (url "https://github.com/cgat-developers/cgat-core")
15497 (commit (string-append "v" version))))
15498 (file-name (git-file-name name version))
15501 "17vk88v1bx7x02ibzkc9i7ir4b5p1hcjr38jpsfzyzxr68352d5k"))))
15502 (build-system python-build-system)
15505 (modify-phases %standard-phases
15506 (add-after 'unpack 'fix-references
15508 (substitute* "cgatcore/pipeline/execution.py"
15509 (("#!/bin/bash") (string-append "#!" (which "bash")))
15510 (("executable=\"/bin/bash\"")
15511 (string-append "executable=\"" (which "bash") "\""))
15512 (("\\\\time") (which "time")))))
15514 (add-after 'install 'check
15515 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
15517 (add-installed-pythonpath inputs outputs)
15518 ;; Requires network access
15519 (delete-file "tests/test_pipeline_execution.py")
15520 (invoke "python" "-m" "pytest" "-v")))))))
15522 `(("python-pytest" ,python-pytest)
15524 ("hostname" ,inetutils)
15525 ("openssl" ,openssl)))
15535 python-sqlalchemy))
15536 (home-page "https://github.com/cgat-developers/cgat-core")
15537 (synopsis "Computational genomics analysis toolkit")
15539 "CGAT-core is a set of libraries and helper functions used to enable
15540 researchers to design and build computational workflows for the analysis of
15541 large-scale data-analysis.")
15542 (license license:expat)))
15544 (define-public perl-cworld-dekker
15546 (name "perl-cworld-dekker")
15550 (uri (git-reference
15551 (url "https://github.com/dekkerlab/cworld-dekker.git")
15552 (commit (string-append "v" version))))
15553 (file-name (git-file-name name version))
15556 "1dvh23fx52m59y6304xi2j2pl2hiqadlqg8jyv2pm14j1hy71ych"))))
15557 (build-system perl-build-system)
15559 `(#:modules ((guix build perl-build-system)
15563 (modify-phases %standard-phases
15564 (add-after 'unpack 'hardcode-references
15565 (lambda* (#:key inputs #:allow-other-keys)
15566 (let ((bedtools (assoc-ref inputs "bedtools"))
15567 (r (assoc-ref inputs "r-minimal")))
15568 (substitute* '("scripts/python/getEigenVectors.py"
15569 "scripts/python/matrix2EigenVectors.py")
15570 (("bedtools intersect")
15571 (string-append bedtools "/bin/bedtools intersect")))
15572 (substitute* "lib/cworld/dekker.pm"
15573 (("bedtools --version")
15574 (string-append bedtools "/bin/bedtools --version")))
15575 (substitute* '("scripts/perl/correlateMatrices.pl"
15576 "scripts/perl/matrix2scaling.pl"
15577 "scripts/perl/matrix2distance.pl"
15578 "scripts/perl/coverageCorrect.pl"
15579 "scripts/perl/matrix2anchorPlot.pl"
15580 "scripts/python/matrix2EigenVectors.py"
15581 "scripts/python/matrix2insulation-lite.py"
15582 "scripts/perl/matrix2compartment.pl"
15583 "scripts/perl/anchorPurge.pl"
15584 "scripts/perl/applyCorrection.pl"
15585 "scripts/perl/compareInsulation.pl"
15586 "scripts/perl/fillMissingData.pl"
15587 "scripts/perl/matrix2loess.pl"
15588 "scripts/python/getEigenVectors.py"
15589 "scripts/perl/aggregateBED.pl"
15590 "scripts/perl/collapseMatrix.pl"
15591 "scripts/perl/matrix2direction.pl"
15592 "scripts/perl/singletonRemoval.pl"
15593 "lib/cworld/dekker.pm"
15594 "scripts/perl/matrix2insulation.pl")
15595 (("(`|\")Rscript" _ pre)
15596 (string-append pre r "/bin/Rscript"))))))
15597 (add-after 'install 'install-scripts
15598 (lambda* (#:key outputs #:allow-other-keys)
15599 (let* ((out (assoc-ref outputs "out"))
15600 (share (string-append out "/share/cworld-dekker")))
15602 (copy-recursively "scripts" share)
15604 ;; Make all scripts executable and wrap them.
15605 (let ((r (find-files share "\\.R$"))
15606 (py (find-files share "\\.py$"))
15607 (pl (find-files share "\\.pl$"))
15608 (wrap (lambda* (script var #:optional (extra ""))
15609 (let ((path (string-append (getenv var)
15611 (wrap-program script
15612 `(,var ":" prefix (,path)))))))
15613 (for-each (cut chmod <> #o555) (append r py pl))
15614 (for-each (cut wrap <> "PERL5LIB"
15615 (string-append ":" out
15616 "/lib/perl5/site_perl"))
15618 (for-each (cut wrap <> "GUIX_PYTHONPATH") py))))))))
15621 ("perl-gd" ,perl-gd)
15622 ("bedtools" ,bedtools)
15623 ("python" ,python-wrapper)
15624 ("python-scipy" ,python-scipy)
15625 ("python-numpy" ,python-numpy)
15626 ("python-matplotlib" ,python-matplotlib)
15627 ("python-h5py" ,python-h5py)
15628 ("python-scikit-learn" ,python-scikit-learn)
15629 ("r-minimal" ,r-minimal)))
15631 (list perl-module-build))
15632 (home-page "https://github.com/dekkerlab/cworld-dekker")
15633 (synopsis "Utility and analysis scripts for 3C, 4C, 5C, and Hi-C data")
15634 (description "This package is a collection of Perl, Python, and R
15635 scripts for manipulating 3C/4C/5C/Hi-C data.")
15636 (license license:asl2.0)))
15638 (define-public ensembl-vep
15639 (let* ((api-version "103")
15641 (lambda (name hash)
15642 (origin (method git-fetch)
15643 (uri (git-reference
15644 (url (string-append "https://github.com/Ensembl/"
15646 (commit (string-append "release/" api-version))))
15647 (file-name (string-append name "-" api-version "-checkout"))
15648 (sha256 (base32 hash))))))
15650 (name "ensembl-vep")
15651 (version (string-append api-version ".1"))
15655 (uri (git-reference
15656 (url "https://github.com/Ensembl/ensembl-vep.git")
15657 (commit (string-append "release/" version))))
15658 (file-name (git-file-name name version))
15661 "1iq7p72cv9b38jz2v8a4slzy2n8y0md487943180ym9xc8qvw09c"))))
15662 (build-system gnu-build-system)
15664 `(#:modules ((guix build gnu-build-system)
15668 (modify-phases %standard-phases
15669 (delete 'configure)
15671 ;; Tests need to run after installation
15674 (lambda* (#:key inputs outputs #:allow-other-keys)
15675 (let* ((modules '(("ensembl" "/")
15676 ("ensembl-variation" "/Variation")
15677 ("ensembl-funcgen" "/Funcgen")
15678 ("ensembl-io" "/")))
15679 (scripts '(("convert_cache.pl" "vep_convert_cache.pl")
15680 ("INSTALL.pl" "vep_install.pl")
15682 ("variant_recoder" #f)
15685 (out (assoc-ref outputs "out"))
15686 (bin (string-append out "/bin"))
15687 (perl (string-append out "/lib/perl5/site_perl")))
15691 (let ((dir (string-append perl "/Bio/EnsEMBL" path)))
15694 (string-append (assoc-ref inputs (string-append "api-module-" name))
15695 "/modules/Bio/EnsEMBL" path)
15698 (copy-recursively "modules/" perl)
15703 (let ((location (string-append bin "/"
15704 (or new-name (basename script)))))
15705 (copy-file script location)
15706 (chmod location #o555)
15707 (wrap-program location
15708 `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
15712 ;; Fix path to tools
15713 (with-directory-excursion (string-append perl "/Bio/EnsEMBL")
15714 (substitute* '("Funcgen/RunnableDB/ProbeMapping/PrePipelineChecks.pm"
15715 "VEP/BaseRunner.pm"
15717 "VEP/AnnotationSource/Cache/VariationTabix.pm"
15718 "VEP/AnnotationSource/Cache/BaseSerialized.pm"
15719 "Variation/Utils/BaseVepTabixPlugin.pm"
15720 "Variation/Utils/VEP.pm"
15721 "Variation/Pipeline/ReleaseDataDumps/PreRunChecks.pm")
15724 (assoc-ref inputs "which")
15725 "/bin/which")))))))
15726 (add-after 'install 'check
15727 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
15730 (string-append (getenv "PERL5LIB")
15732 (assoc-ref outputs "out")
15733 "/lib/perl5/site_perl"))
15734 (copy-recursively (string-append (assoc-ref inputs "source") "/t")
15736 (for-each make-file-writable (find-files "/tmp/t"))
15737 ;; TODO: haplo needs Set/IntervalTree.pm
15738 (invoke "perl" "-e" (string-append "
15739 use Test::Harness; use Test::Exception;
15740 my $dirname = \"/tmp\";
15741 opendir TEST, \"$dirname\\/t\";
15742 my @test_files = map {\"$dirname\\/t\\/\".$_} grep {!/^\\./ && /\\.t$/} readdir TEST; closedir TEST;
15743 @test_files = grep {!/Haplo/} @test_files;
15744 runtests(@test_files);
15747 (list bioperl-minimal
15760 ("api-module-ensembl"
15761 ,(api-module "ensembl"
15762 "0s59rj905g72hljzfpvnx5nxwz925b917y4jp912i23f5gwxh14v"))
15763 ("api-module-ensembl-variation"
15764 ,(api-module "ensembl-variation"
15765 "1dvwdzzfjhzymq02b6n4p6j3a9q4jgq0g89hs7hj1apd7zhirgkq"))
15766 ("api-module-ensembl-funcgen"
15767 ,(api-module "ensembl-funcgen"
15768 "1x23pv38dmv0w0gby6rv3wds50qghb4v3v1mf43vk55msfxzry8n"))
15769 ("api-module-ensembl-io"
15770 ,(api-module "ensembl-io"
15771 "14adb2x934lzsq20035mazdkhrkcw0qzb0xhz6zps9vk4wixwaix"))
15772 ("perl-test-harness" ,perl-test-harness)
15773 ("perl-test-exception" ,perl-test-exception)))
15774 (home-page "http://www.ensembl.org/vep")
15775 (synopsis "Predict functional effects of genomic variants")
15777 "This package provides a Variant Effect Predictor, which predicts
15778 the functional effects of genomic variants. It also provides
15779 Haplosaurus, which uses phased genotype data to predict
15780 whole-transcript haplotype sequences, and Variant Recoder, which
15781 translates between different variant encodings.")
15782 (license license:asl2.0))))
15784 (define-public r-signac
15785 (let ((commit "458e647b503c3472b0b98c0aeca934f452e039ee")
15789 (version (git-version "1.6.0" revision commit))
15793 (uri (git-reference
15794 (url "https://github.com/timoast/signac/")
15796 (file-name (git-file-name name version))
15798 (base32 "1hgwpgighkvfkai80n4d2252s4sdpa4faag4ncdiylicl5wa7lbj"))))
15799 (properties `((upstream-name . "Signac")))
15800 (build-system r-build-system)
15801 (inputs (list zlib))
15803 (list r-biocgenerics
15832 (home-page "https://github.com/timoast/signac/")
15833 (synopsis "Analysis of single-cell chromatin data")
15835 "This package provides a framework for the analysis and exploration of
15836 single-cell chromatin data. The Signac package contains functions for
15837 quantifying single-cell chromatin data, computing per-cell quality control
15838 metrics, dimension reduction and normalization, visualization, and DNA
15839 sequence motif analysis.")
15840 (license license:expat))))
15842 (define-public tombo
15849 (uri (pypi-uri "ont-tombo" version))
15852 "1023hadgcsgi53kz53ql45207hfizf9sw57z0qij3ay1bx68zbpm"))))
15853 (build-system python-build-system)
15855 '(#:tests? #false)) ;no tests
15857 (list python-cython python-nose2))
15858 ;; The package mainly consists of a command-line tool, but also has a
15859 ;; Python-API. Thus these must be propagated.
15861 (list python-future
15868 (home-page "https://github.com/nanoporetech/tombo")
15869 (synopsis "Analysis of raw nanopore sequencing data")
15870 (description "Tombo is a suite of tools primarily for the identification of
15871 modified nucleotides from nanopore sequencing data. Tombo also provides tools
15872 for the analysis and visualization of raw nanopore signal.")
15873 ;; Some parts may be BSD-3-licensed.
15874 (license license:mpl2.0)))
15876 (define-public python-pyvcf
15877 (let ((commit "476169cd457ba0caa6b998b301a4d91e975251d9")
15880 (name "python-pyvcf")
15881 (version (git-version "0.6.8" revision commit))
15882 ;; Use git, because the PyPI tarballs lack test data.
15886 (uri (git-reference
15887 (url "https://github.com/jamescasbon/PyVCF.git")
15888 ;; Latest release is not tagged.
15890 (file-name (git-file-name name version))
15893 "0qf9lwj7r2hjjp4bd4vc7nayrhblfm4qcqs4dbd43a6p4bj2jv5p"))))
15894 (build-system python-build-system)
15897 (modify-phases %standard-phases
15898 (add-after 'unpack 'patch-sample-script
15900 ;; Add Python 3 compatibility to this sample script.
15901 (substitute* "scripts/vcf_sample_filter.py"
15902 (("print (.*)\n" _ arg)
15903 (string-append "print(" arg ")\n")))))
15904 (add-after 'install 'remove-installed-tests
15905 ;; Do not install test files.
15906 (lambda* (#:key inputs outputs #:allow-other-keys)
15907 (delete-file-recursively (string-append
15908 (site-packages inputs outputs)
15911 ;; Older setuptools is needed for use_2to3.
15912 (list python-cython python-setuptools-for-tensorflow))
15914 (list python-pysam python-rpy2))
15915 (home-page "https://github.com/jamescasbon/PyVCF")
15916 (synopsis "Variant Call Format parser for Python")
15917 (description "This package provides a @acronym{VCF,Variant Call Format}
15918 parser for Python.")
15919 (license license:expat))))
15921 (define-public nanosv
15927 (uri (pypi-uri "NanoSV" version))
15930 "1wl2daj0bwrl8fx5xi8j8hfs3mp3vg3qycy66538n032v1qkc6xg"))))
15931 (build-system python-build-system)
15933 (list python-configparser python-pysam python-pyvcf))
15934 (home-page "https://github.com/mroosmalen/nanosv")
15935 (synopsis "Structural variation detection tool for Oxford Nanopore data")
15936 (description "NanoSV is a software package that can be used to identify
15937 structural genomic variations in long-read sequencing data, such as data
15938 produced by Oxford Nanopore Technologies’ MinION, GridION or PromethION
15939 instruments, or Pacific Biosciences RSII or Sequel sequencers.")
15940 (license license:expat)))
15942 (define-public python-strawc
15944 (name "python-strawc")
15945 (version "0.0.2.1")
15949 (uri (pypi-uri "strawC" version))
15952 "1z1gy8n56lhriy6hdkh9r82ndikndipq2cy2wh8q185qig4rimr6"))))
15953 (build-system python-build-system)
15958 (home-page "https://github.com/aidenlab/straw")
15959 (synopsis "Stream data from .hic files")
15960 (description "Straw is library which allows rapid streaming of contact
15961 data from @file{.hic} files. This package provides Python bindings.")
15962 (license license:expat)))
15964 (define-public python-pybbi
15966 (name "python-pybbi")
15971 (uri (pypi-uri "pybbi" version))
15974 "1hvy2f28i2b41l1pq15vciqbj538n0lichp8yr6413jmgg06xdsk"))))
15975 (build-system python-build-system)
15977 `(#:tests? #false ; tests require network access
15979 (modify-phases %standard-phases
15980 (add-after 'unpack 'set-cc
15981 (lambda _ (setenv "CC" "gcc")))
15983 (lambda* (#:key inputs outputs tests? #:allow-other-keys)
15985 (add-installed-pythonpath inputs outputs)
15986 (copy-recursively "tests" "/tmp/tests")
15987 (with-directory-excursion "/tmp/tests"
15988 (invoke "python" "-m" "pytest" "-v"))))))))
15990 (list pkg-config python-pkgconfig python-pytest))
15992 (list libpng openssl zlib))
15994 (list python-cython python-numpy python-pandas python-six))
15995 (home-page "https://github.com/nvictus/pybbi")
15996 (synopsis "Python bindings to UCSC Big Binary file library")
15998 "This package provides Python bindings to the UCSC Big
15999 Binary (bigWig/bigBed) file library. This provides read-level access to local
16000 and remote bigWig and bigBed files but no write capabilitites. The main
16001 feature is fast retrieval of range queries into numpy arrays.")
16002 (license license:expat)))
16004 (define-public python-dna-features-viewer
16006 (name "python-dna-features-viewer")
16011 (uri (pypi-uri "dna_features_viewer" version))
16014 "0vci6kg2id6r6rh3cifq7ccnh7j0mb8iqg3hji6rva0ayrdqzafc"))))
16015 (build-system python-build-system)
16016 (arguments '(#:tests? #false)) ; there are none
16018 (list python-biopython python-matplotlib))
16020 "https://github.com/Edinburgh-Genome-Foundry/DnaFeaturesViewer")
16021 (synopsis "Plot features from DNA sequences")
16023 "DNA Features Viewer is a Python library to visualize DNA features,
16024 e.g. from GenBank or Gff files, or Biopython SeqRecords.")
16025 (license license:expat)))
16027 (define-public python-coolbox
16029 (name "python-coolbox")
16034 (uri (pypi-uri "coolbox" version))
16037 "0gqp76285w9klswr47y6kxbzwhv033b26jfa179kccfhiaq5p2xa"))))
16038 (build-system python-build-system)
16039 (arguments '(#:tests? #false)) ; there are none
16043 (list python-cooler
16044 python-dna-features-viewer
16047 python-intervaltree
16063 (home-page "https://github.com/GangCaoLab/CoolBox")
16064 (synopsis "Genomic data visualization toolkit")
16066 "CoolBox is a toolkit for visual analysis of genomics data. It aims to
16067 be highly compatible with the Python ecosystem, easy to use and highly
16068 customizable with a well-designed user interface. It can be used in various
16069 visualization situations, for example, to produce high-quality genome track
16070 plots or fetch common used genomic data files with a Python script or command
16071 line, interactively explore genomic data within Jupyter environment or web
16073 (license license:gpl3+)))
16075 (define-public python-pyspoa
16077 (name "python-pyspoa")
16082 (uri (git-reference
16083 (url "https://github.com/nanoporetech/pyspoa")
16084 (commit (string-append "v" version))
16085 (recursive? #true)))
16086 (file-name (git-file-name name version))
16089 "1lgf2shzhxkcsircd6vy46h27pjljd5q95fyz1cm3lkk702qbnzx"))))
16090 (build-system python-build-system)
16093 (modify-phases %standard-phases
16094 (add-before 'build 'build-libspoa
16096 (mkdir-p "src/build")
16097 (with-directory-excursion "src/build"
16099 "-Dspoa_optimize_for_portability=ON"
16100 "-DCMAKE_BUILD_TYPE=Release"
16101 "-DCMAKE_CXX_FLAGS=\"-I ../vendor/cereal/include/\" -fPIC"
16105 (lambda* (#:key inputs outputs tests? #:allow-other-keys)
16107 (add-installed-pythonpath inputs outputs)
16108 (invoke "python" "tests/test_pyspoa.py")))))))
16112 `(("cmake" ,cmake-minimal)))
16113 (home-page "https://github.com/nanoporetech/pyspoa")
16114 (synopsis "Python bindings for the SIMD partial order alignment library")
16116 "This package provides Python bindings for spoa, a C++ implementation of
16117 the @dfn{partial order alignment} (POA) algorithm (as described in
16118 10.1093/bioinformatics/18.3.452) which is used to generate consensus
16120 (license license:expat)))
16122 (define-public python-bwapy
16124 (name "python-bwapy")
16129 (uri (pypi-uri "bwapy" version))
16131 (base32 "090qwx3vl729zn3a7sksbviyg04kc71gpbm3nd8dalqp673x1npw"))
16132 (modules '((guix build utils)))
16134 '(for-each delete-file (find-files "." "\\.o$")))))
16135 (build-system python-build-system)
16138 (modify-phases %standard-phases
16139 (add-after 'unpack 'relax-requirements
16141 (substitute* "setup.py"
16142 (("wheel>=0.34") "wheel>=0.30"))))
16143 ;; TODO: it's possible that the import error points to a real
16144 ;; problem with the C sources.
16145 (delete 'sanity-check))))
16147 (list python-cffi python-setuptools python-wheel))
16150 (home-page "https://github.com/ACEnglish/bwapy")
16151 (synopsis "Python bindings to bwa alinger")
16152 (description "This package provides Python bindings to the bwa mem
16154 ;; These Python bindings are licensed under Mozilla Public License 2.0,
16155 ;; bwa itself is licenced under GNU General Public License v3.0.
16156 (license license:mpl2.0)))
16158 (define-public scvelo
16165 (uri (pypi-uri "scvelo" version))
16167 (base32 "0h5ha1459ljs0qgpnlfsw592i8dxqn6p9bl08l1ikpwk36baxb7z"))))
16168 (build-system python-build-system)
16171 (modify-phases %standard-phases
16172 ;; Numba needs a writable dir to cache functions.
16173 (add-before 'check 'set-numba-cache-dir
16175 (setenv "NUMBA_CACHE_DIR" "/tmp")))
16177 (lambda* (#:key outputs tests? #:allow-other-keys)
16179 ;; The discovered test file names must match the names of the
16180 ;; compiled files, so we cannot run the tests from
16181 ;; /tmp/guix-build-*.
16182 (with-directory-excursion
16183 (string-append (assoc-ref outputs "out")
16184 "/lib/python3.9/site-packages/scvelo/core/tests/")
16185 (invoke "pytest" "-v"))))))))
16187 (list python-anndata
16198 python-scikit-learn
16208 python-setuptools-scm
16210 (home-page "https://scvelo.org")
16211 (synopsis "RNA velocity generalized through dynamical modeling")
16212 (description "ScVelo is a scalable toolkit for RNA velocity analysis in
16213 single cells. RNA velocity enables the recovery of directed dynamic
16214 information by leveraging splicing kinetics. scVelo generalizes the concept of
16215 RNA velocity by relaxing previously made assumptions with a stochastic and a
16216 dynamical model that solves the full transcriptional dynamics. It thereby
16217 adapts RNA velocity to widely varying specifications such as non-stationary
16219 (license license:bsd-3)))
16221 (define-public scregseg
16227 (uri (git-reference
16228 (url "https://github.com/BIMSBbioinfo/scregseg")
16229 (commit (string-append "v" version))))
16230 (file-name (git-file-name name version))
16233 "1k8hllr5if6k2mm2zj391fv40sfc008cjm04l9vgfsdppb80i112"))
16236 (use-modules ((guix build utils)))
16237 (delete-file "src/scregseg/_utils.c")))))
16238 (build-system python-build-system)
16240 `(#:tests? #false ; tests require network access
16242 (modify-phases %standard-phases
16243 (add-after 'unpack 'do-not-fail-to-find-sklearn
16245 ;; XXX: I have no idea why it cannot seem to find sklearn.
16246 (substitute* "setup.py"
16247 (("'sklearn',") "")))))))
16249 (list python-cython))
16251 (list python-scikit-learn
16264 (home-page "https://github.com/BIMSBbioinfo/scregseg")
16265 (synopsis "Single-cell regulatory landscape segmentation")
16266 (description "Scregseg (Single-Cell REGulatory landscape SEGmentation) is a
16267 tool that facilitates the analysis of single cell ATAC-seq data by an
16268 HMM-based segmentation algorithm. Scregseg uses an HMM with
16269 Dirichlet-Multinomial emission probabilities to segment the genome either
16270 according to distinct relative cross-cell accessibility profiles or (after
16271 collapsing the single-cell tracks to pseudo-bulk tracks) to capture distinct
16272 cross-cluster accessibility profiles.")
16273 (license license:gpl3+)))
16275 (define-public megadepth
16281 (uri (git-reference
16282 (url "https://github.com/ChristopherWilks/megadepth")
16284 (file-name (git-file-name name version))
16287 "0hj69d2dgmk2zwgazik7xzc04fxxlk93p888kpgc52fmhd95qph7"))))
16288 (build-system cmake-build-system)
16290 `(#:tests? #false ; some tests seem to require connection to
16291 ; www.ebi.ac.uk; this may be caused by htslib.
16293 (modify-phases %standard-phases
16294 (add-after 'unpack 'prepare-CMakeLists.txt
16296 (rename-file "CMakeLists.txt.ci" "CMakeLists.txt")
16297 (substitute* "CMakeLists.txt"
16298 (("`cat ../VERSION`") ,version)
16299 (("target_link_libraries\\(megadepth_static") "#")
16300 (("target_link_libraries\\(megadepth_statlib") "#")
16301 (("add_executable\\(megadepth_static") "#")
16302 (("add_executable\\(megadepth_statlib") "#"))
16304 (substitute* "tests/test.sh"
16305 ;; Disable remote test
16306 (("./megadepth http://stingray.cs.jhu.edu/data/temp/test.bam") "#")
16307 ;; Prior to installation the binary's name differs from what
16308 ;; the test script assumes.
16309 (("./megadepth") "../build/megadepth_dynamic"))))
16311 (lambda* (#:key tests? #:allow-other-keys)
16313 (with-directory-excursion "../source"
16314 (invoke "bash" "tests/test.sh" "use-local-test-data")))))
16316 (lambda* (#:key outputs #:allow-other-keys)
16317 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
16319 (copy-file "megadepth_dynamic"
16320 (string-append bin "/megadepth"))))))))
16322 (list diffutils perl grep))
16324 (list curl htslib libdeflate libbigwig zlib))
16325 (home-page "https://github.com/ChristopherWilks/megadepth")
16326 (synopsis "BigWig and BAM/CRAM related utilities")
16327 (description "Megadepth is an efficient tool for extracting coverage
16328 related information from RNA and DNA-seq BAM and BigWig files. It supports
16329 reading whole-genome coverage from BAM files and writing either indexed TSV or
16330 BigWig files, as well as efficient region coverage summary over intervals from
16331 both types of files.")
16332 (license license:expat)))
16334 (define-public mudskipper
16336 (name "mudskipper")
16340 (uri (crate-uri "mudskipper" version))
16341 (file-name (string-append name "-" version ".tar.gz"))
16344 "1y7fnlz6irmxdmv6bxzm95w4ws4vzldlrh8npvgxmdnrz9pgb1dv"))))
16345 (build-system cargo-build-system)
16347 `(#:tests? #false ;fail because the "mudskipper" crate cannot be found
16349 (("rust-bio" ,rust-bio-0.39)
16350 ("rust-bio-types" ,rust-bio-types-0.12)
16351 ("rust-clap" ,rust-clap-2)
16352 ("rust-coitrees" ,rust-coitrees-0.2)
16353 ("rust-env-logger" ,rust-env-logger-0.9)
16354 ("rust-fnv" ,rust-fnv-1)
16355 ("rust-indicatif" ,rust-indicatif-0.16)
16356 ("rust-libradicl" ,rust-libradicl-0.4)
16357 ("rust-linecount" ,rust-linecount-0.1)
16358 ("rust-log" ,rust-log-0.4)
16359 ("rust-num-cpus" ,rust-num-cpus-1)
16360 ("rust-rust-htslib" ,rust-rust-htslib-0.38))))
16362 (list cmake pkg-config))
16365 (home-page "https://github.com/OceanGenomics/mudskipper")
16366 (synopsis "Convert genomic alignments to transcriptomic BAM/RAD files.")
16367 (description "Mudskipper is a tool for projecting genomic alignments to
16368 transcriptomic coordinates.")
16369 (license license:bsd-3)))
16371 (define-public r-ascat
16377 (uri (git-reference
16378 (url "https://github.com/Crick-CancerGenomics/ascat.git")
16379 (commit (string-append "v" version))))
16380 (file-name (git-file-name name version))
16383 "0cc0y3as6cb64iwnx0pgbajiig7m4z723mns9d5i4j09ccid3ccm"))))
16384 (build-system r-build-system)
16387 (modify-phases %standard-phases
16388 (add-after 'unpack 'move-to-ascat-dir
16390 (chdir "ASCAT"))))))
16392 (list r-rcolorbrewer))
16393 (home-page "https://github.com/VanLoo-lab/ascat/")
16394 (synopsis "Allele-Specific Copy Number Analysis of Tumors in R")
16395 (description "This package provides the @acronym{ASCAT,Allele-Specific Copy
16396 Number Analysis of Tumors} R package that can be used to infer tumour purity,
16397 ploidy and allele-specific copy number profiles.")
16398 (license license:gpl3)))
16400 (define-public r-battenberg
16402 (name "r-battenberg")
16406 (uri (git-reference
16407 (url "https://github.com/Wedge-lab/battenberg.git")
16408 (commit (string-append "v" version))))
16409 (file-name (git-file-name name version))
16412 "0nmcq4c7y5g8h8lxsq9vadz9bj4qgqn118alip520ny6czaxki4h"))))
16413 (build-system r-build-system)
16423 (home-page "https://github.com/Wedge-lab/battenberg")
16424 (synopsis "Subclonal copy number estimation in R")
16425 (description "This package contains the Battenberg R package for subclonal
16426 copy number estimation, as described by
16427 @url{doi:10.1016/j.cell.2012.04.023,Nik-Zainal et al.}")
16428 (license license:gpl3)))
16430 (define-public r-catch
16431 (let ((commit "196ddd5a51b1a5f5daa01de53fdaad9b7505e084")
16435 (version (git-version "1.0" revision commit))
16438 (uri (git-reference
16439 (url "https://github.com/zhanyinx/CaTCH")
16441 (file-name (git-file-name name version))
16444 "11c7f1fc8f57wnwk1hrgr5y814m80zj8gkz5021vxyxy2v02cqgd"))))
16445 (build-system r-build-system)
16448 (modify-phases %standard-phases
16449 (add-after 'unpack 'chdir
16450 (lambda _ (chdir "CaTCH"))))))
16451 (home-page "https://github.com/zhanyinx/CaTCH_R")
16452 (synopsis "Call a hierarchy of domains based on Hi-C data")
16453 (description "This package allows building the hierarchy of domains
16454 starting from Hi-C data. Each hierarchical level is identified by a minimum
16455 value of physical insulation between neighboring domains.")
16456 (license license:gpl2+))))
16458 (define-public r-spectre
16459 (let ((commit "f6648ab3eb9499300d86502b5d60ec370ae9b61a")
16463 (version (git-version "0.5.5" revision commit))
16467 (uri (git-reference
16468 (url "https://github.com/ImmuneDynamics/Spectre")
16470 (file-name (git-file-name name version))
16473 "0g38grrhbqqa4bmcilvdyawbkcnax6k4vffx2giywp18mbirmj0x"))))
16474 (properties `((upstream-name . "Spectre")))
16475 (build-system r-build-system)
16522 (home-page "https://github.com/ImmuneDynamics/Spectre")
16523 (synopsis "High-dimensional cytometry and imaging analysis")
16525 "This package provides a computational toolkit in R for the
16526 integration, exploration, and analysis of high-dimensional single-cell
16527 cytometry and imaging data.")
16528 (license license:expat))))
16530 (define-public r-compgenomrdata
16531 (let ((commit "24484cb77631e1123ead6c329b9d62c160e600c6")
16534 (name "r-compgenomrdata")
16535 (version (git-version "0.1.0" revision commit))
16538 (uri (git-reference
16539 (url "https://github.com/compgenomr/compGenomRData")
16541 (file-name (git-file-name name version))
16544 "06gdvz4m4qlb1ylv10qfx09zv4c95cm7nps4y2s67m946kv8czv8"))))
16545 (properties `((upstream-name . "compGenomRData")))
16546 (build-system r-build-system)
16547 (home-page "https://github.com/compgenomr/compGenomRData")
16548 (synopsis "Data for Computational Genomics with R book")
16549 (description "This package provides data for the book \"Computational
16550 Genomics with R\".")
16551 (license license:gpl3))))
16553 (define-public r-cytonorm
16554 (let ((commit "e4b9d343ee65db3c422800f1db3e77c25abde987")
16557 (name "r-cytonorm")
16558 (version (git-version "0.0.7" revision commit))
16562 (uri (git-reference
16563 (url "https://github.com/saeyslab/CytoNorm")
16565 (file-name (git-file-name name version))
16568 "0h2rdy15i4zymd4dv60n5w0frbsdbmzpv99dgm0l2dn041qv7fah"))))
16569 (properties `((upstream-name . "CytoNorm")))
16570 (build-system r-build-system)
16582 (home-page "https://github.com/saeyslab/CytoNorm")
16583 (synopsis "Normalize cytometry data measured across multiple batches")
16585 "This package can be used to normalize cytometry samples when a control
16586 sample is taken along in each of the batches. This is done by first
16587 identifying multiple clusters/cell types, learning the batch effects from the
16588 control samples and applying quantile normalization on all markers of
16590 (license license:gpl2+))))
16592 (define-public r-kbet
16593 (let ((commit "f35171dfb04c7951b8a09ac778faf7424c4b6bc0")
16597 (version (git-version "0.99.6" revision commit))
16600 (uri (git-reference
16601 (url "https://github.com/theislab/kBET")
16603 (file-name (git-file-name name version))
16606 "1r91prl2kki3zk694vhlmxdlqh0ixlhs8jfcqw6wc7cdsa0nv67k"))))
16607 (properties `((upstream-name . "kBET")))
16608 (build-system r-build-system)
16609 (propagated-inputs (list r-cluster r-fnn r-ggplot2 r-mass r-rcolorbrewer))
16610 (native-inputs (list r-knitr))
16611 (home-page "https://github.com/theislab/kBET")
16612 (synopsis "k-nearest neighbour batch effect test")
16614 "This tool detects batch effects in high-dimensional data based on chi^2-test.")
16615 ;; Any version of the GPL
16616 (license license:gpl3+))))
16618 (define-public ccwl
16625 (uri (string-append "https://ccwl.systemreboot.net/releases/ccwl-"
16626 version ".tar.lz"))
16629 "1ar8rfz3zrksgygrv67zv77y8gfvvz54zcs546jn6j28y20basla"))))
16630 (build-system gnu-build-system)
16632 `(#:make-flags '("GUILE_AUTO_COMPILE=0") ; to prevent guild warnings
16633 #:modules (((guix build guile-build-system)
16634 #:select (target-guile-effective-version))
16635 ,@%gnu-build-system-modules)
16636 #:imported-modules ((guix build guile-build-system)
16637 ,@%gnu-build-system-modules)
16639 (modify-phases %standard-phases
16640 (add-after 'install 'wrap
16641 (lambda* (#:key inputs outputs #:allow-other-keys)
16642 (let ((out (assoc-ref outputs "out"))
16643 (effective-version (target-guile-effective-version)))
16644 (wrap-program (string-append out "/bin/ccwl")
16645 `("GUILE_LOAD_PATH" prefix
16646 (,(string-append out "/share/guile/site/" effective-version)
16647 ,(getenv "GUILE_LOAD_PATH")))
16648 `("GUILE_LOAD_COMPILED_PATH" prefix
16649 (,(string-append out "/lib/guile/" effective-version "/site-ccache")
16650 ,(getenv "GUILE_LOAD_COMPILED_PATH"))))))))))
16652 `(("bash" ,bash-minimal)
16653 ("guile" ,guile-3.0)
16654 ("guile-libyaml" ,guile-libyaml)))
16658 ;; To build documentation
16662 (home-page "https://ccwl.systemreboot.net")
16663 (synopsis "Concise common workflow language")
16664 (description "The @acronym{ccwl, Concise Common Workflow Language} is a
16665 concise syntax to express CWL workflows. ccwl is a compiler to generate CWL
16666 workflows from concise descriptions in ccwl. It is implemented as an
16667 @acronym{EDSL, Embedded Domain Specific Language} in the Scheme programming
16669 (license license:gpl3+)))
16671 (define-public hh-suite
16678 (uri (git-reference
16679 (url "https://github.com/soedinglab/hh-suite")
16680 (commit (string-append "v" version))))
16681 (file-name (git-file-name name version))
16684 "1bcmzg0ii6nkda2xm5jdddbwkgsag7k38j20af0c9chr2mbxwx4d"))
16685 (modules '((guix build utils)))
16687 '(delete-file-recursively "lib/simde"))))
16688 (build-system cmake-build-system)
16689 (arguments '(#:tests? #false)) ;no test target
16691 (list openmpi simde))
16693 (list perl pkg-config xxd))
16694 (home-page "https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-3019-7")
16695 (synopsis "Remote protein homology detection suite")
16696 (description "The HH-suite is a software package for sensitive protein sequence searching
16697 based on the pairwise alignment of hidden Markov models (HMMs).")
16698 (license license:gpl3+)))
16700 (define-public wfmash
16707 (uri (string-append "https://github.com/ekg/wfmash/releases/download/v"
16708 version "/wfmash-v" version ".tar.gz"))
16711 "031cm1arpfckvihb28vlk69mirpnmlag81zcscfba1bac58wvr7c"))
16714 (use-modules (guix build utils))
16715 ;; Unbundle atomic-queue.
16716 (delete-file-recursively "src/common/atomic_queue")
16717 (substitute* "src/align/include/computeAlignments.hpp"
16718 (("\"common/atomic_queue/atomic_queue.h\"")
16719 "<atomic_queue/atomic_queue.h>"))
16720 ;; Remove compiler optimizations.
16721 (substitute* (find-files "." "CMakeLists\\.txt")
16723 (("-march=native ") ""))
16724 ;; Allow building on architectures other than x86_64.
16725 (substitute* "src/common/dset64.hpp"
16726 (("!__x86_64__") "0"))))))
16727 (build-system cmake-build-system)
16731 #~(modify-phases %standard-phases
16733 ;; Adapted from .github/workflows/test_on_push.yml
16734 (lambda* (#:key tests? inputs #:allow-other-keys)
16736 (let ((samtools (search-input-file inputs "/bin/samtools")))
16737 ;; This is the easiest way to access the data
16738 ;; needed for the test suite.
16739 (symlink (string-append "../wfmash-v" #$version "/data")
16742 ;; This test takes 60 minutes on riscv64-linux.
16743 #$@(if (not (target-riscv64?))
16745 ;; Test with a subset of the LPA dataset (PAF output)
16746 (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
16747 (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
16748 (with-output-to-file "LPA.subset.paf"
16750 (invoke "bin/wfmash"
16751 "data/LPA.subset.fa.gz"
16752 "data/LPA.subset.fa.gz"
16753 "-X" "-n" "10" "-T" "wflign_info."
16755 (invoke "head" "LPA.subset.paf")))
16757 ;; This test takes about 5 hours on riscv64-linux.
16758 #$@(if (not (target-riscv64?))
16760 ;; Test with a subset of the LPA dataset (SAM output)
16761 (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
16762 (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
16763 (with-output-to-file "LPA.subset.sam"
16765 (invoke "bin/wfmash"
16766 "data/LPA.subset.fa.gz"
16767 "data/LPA.subset.fa.gz"
16768 "-X" "-N" "-a" "-T" "wflign_info.")))
16769 (with-output-to-file "LPA.subset.sam-view"
16771 (invoke samtools "view" "LPA.subset.sam" "-bS")))
16772 (with-output-to-file "LPA.subset.bam"
16774 (invoke samtools "sort" "LPA.subset.sam-view")))
16775 (invoke samtools "index" "LPA.subset.bam")
16776 ;; samtools view LPA.subset.bam | head | cut -f 1-9
16777 ;(invoke samtools "view" "LPA.subset.bam")
16778 ;; There should be an easier way to do this with pipes.
16779 (with-output-to-file "LPA.subset.bam-incr1"
16781 (invoke samtools "view" "LPA.subset.bam")))
16782 (with-output-to-file "LPA.subset.bam-incr2"
16784 (invoke "head" "LPA.subset.bam-incr1")))
16785 (invoke "cut" "-f" "1-9" "LPA.subset.bam-incr2")))
16787 ;; This test takes 60 minutes on riscv64-linux.
16788 #$@(if (not (target-riscv64?))
16790 ;; Test with a subset of the LPA dataset,
16791 ;; setting a lower identity threshold (PAF output)
16792 (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
16793 (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
16794 (with-output-to-file "LPA.subset.p90.paf"
16796 (invoke "bin/wfmash"
16797 "data/LPA.subset.fa.gz"
16798 "data/LPA.subset.fa.gz"
16799 "-X" "-p" "90" "-n" "10"
16800 "-T" "wflign_info.")))
16801 (invoke "head" "LPA.subset.p90.paf")))
16804 ;; Test aligning short reads (500 bps) to a reference (SAM output)
16805 (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
16806 (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
16807 (with-output-to-file "reads.500bps.sam"
16809 (invoke "bin/wfmash"
16810 "data/reference.fa.gz"
16811 "data/reads.500bps.fa.gz"
16812 "-s" "0.5k" "-N" "-a")))
16813 (with-output-to-file "reads.500bps.sam-view"
16815 (invoke samtools "view" "reads.500bps.sam" "-bS")))
16816 (with-output-to-file "reads.500bps.bam"
16818 (invoke samtools "sort" "reads.500bps.sam-view")))
16819 (invoke samtools "index" "reads.500bps.bam")
16820 (with-output-to-file "reads.500bps.bam-view"
16822 (invoke samtools "view" "reads.500bps.bam")))
16823 (invoke "head" "reads.500bps.bam-view"))
16825 ;; Test with few very short reads (255bps) (PAF output)
16826 (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
16827 (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
16828 (with-output-to-file "reads.255bps.paf"
16830 (invoke "bin/wfmash"
16831 "data/reads.255bps.fa.gz"
16832 "data/reads.255bps.fa.gz"
16834 (invoke "head" "reads.255bps.paf"))))))))))
16843 (synopsis "Base-accurate DNA sequence aligner")
16844 (description "@code{wfmash} is a DNA sequence read mapper based on mash
16845 distances and the wavefront alignment algorithm. It is a fork of MashMap that
16846 implements base-level alignment via the wflign tiled wavefront global
16847 alignment algorithm. It completes MashMap with a high-performance alignment
16848 module capable of computing base-level alignments for very large sequences.")
16849 (home-page "https://github.com/ekg/wfmash")
16850 (license license:expat)))
16852 (define-public flair
16859 (uri (git-reference
16860 (url "https://github.com/BrooksLabUCSC/flair")
16861 (commit (string-append "v" version))))
16862 (file-name (git-file-name name version))
16865 "106swb2q7l20ki58fca1hg95q5f79bgp9gjb0clr2243ycrzyxf8"))))
16866 (build-system python-build-system)
16869 #:tests? #false ;there are none
16871 #~(modify-phases %standard-phases
16872 ;; TODO: implement as a feature of python-build-system (PEP-621,
16873 ;; PEP-631, PEP-660)
16876 (setenv "SETUPTOOLS_SCM_PRETEND_VERSION" #$version)
16877 ;; ZIP does not support timestamps before 1980.
16878 (setenv "SOURCE_DATE_EPOCH" "315532800")
16879 (invoke "python" "-m" "build" "--wheel" "--no-isolation" ".")))
16882 (apply invoke "pip" "--no-cache-dir" "--no-input"
16883 "install" "--no-deps" "--prefix" #$output
16884 (find-files "dist" "\\.whl$")))))))
16892 (list python-pypa-build python-setuptools))
16893 (home-page "https://flair.readthedocs.io/en/latest/")
16894 (synopsis "Full-length alternative isoform analysis of RNA")
16895 (description "This package implements FLAIR (Full-Length Alternative
16896 Isoform analysis of RNA) for the correction, isoform definition, and
16897 alternative splicing analysis of noisy reads. FLAIR has primarily been used
16898 for nanopore cDNA, native RNA, and PacBio sequencing reads.")
16899 (license license:bsd-3)))
16901 (define-public go-github-com-biogo-graph
16903 (name "go-github-com-biogo-graph")
16904 (version "0.0.0-20150317020928-057c1989faed")
16907 (uri (git-reference
16908 (url "https://github.com/biogo/graph")
16909 (commit (go-version->git-ref version))))
16910 (file-name (git-file-name name version))
16913 "1kpzs5dfd5dsk4mg1g2qjz1prqd84ixhrcxxnf90hq25vxcnk7lh"))))
16914 (build-system go-build-system)
16916 '(#:import-path "github.com/biogo/graph"
16917 #:tests? #false)) ;TODO: one of 13 tests fails for unknown reasons
16919 (list go-gopkg-in-check-v1))
16920 (home-page "https://github.com/biogo/graph")
16921 (synopsis "Undirected graph analysis for biogo")
16922 (description "The package @code{graph} implements graph manipulation
16924 (license license:bsd-3)))
16926 (define-public go-github-com-biogo-store-interval
16928 (name "go-github-com-biogo-store-interval")
16929 (version "0.0.0-20201120204734-aad293a2328f")
16932 (uri (git-reference
16933 (url "https://github.com/biogo/store")
16934 (commit (go-version->git-ref version))))
16935 (file-name (git-file-name name version))
16938 "0skizrp1j6vgbl0g1kmh73picagqlvwckaqs0gkl6rai5lckxj8a"))))
16939 (build-system go-build-system)
16941 '(#:import-path "github.com/biogo/store/interval"
16942 #:unpack-path "github.com/biogo/store"))
16944 (list go-gopkg-in-check-v1
16945 go-github-com-kr-pretty))
16946 (home-page "https://github.com/biogo/store")
16947 (synopsis "Interval store type for biogo")
16949 "The @code{store} package provides a number of data store types that are
16950 useful for bioinformatic analysis.")
16951 (license license:bsd-3)))
16953 (define-public go-github-com-biogo-store-kdtree
16955 (inherit go-github-com-biogo-store-interval)
16956 (name "go-github-com-biogo-store-kdtree")
16958 '(#:import-path "github.com/biogo/store/kdtree"
16959 #:unpack-path "github.com/biogo/store"))
16961 (list go-gopkg-in-check-v1
16962 go-github-com-kr-pretty))
16963 (synopsis "kdtree store type for biogo")))
16965 (define-public go-github-com-biogo-store-llrb
16967 (inherit go-github-com-biogo-store-interval)
16968 (name "go-github-com-biogo-store-llrb")
16970 '(#:import-path "github.com/biogo/store/llrb"
16971 #:unpack-path "github.com/biogo/store"))
16973 (list go-gopkg-in-check-v1
16974 go-github-com-kr-pretty))
16975 (synopsis "LLRB store for biogo")))
16977 (define-public go-github-com-biogo-store-step
16979 (inherit go-github-com-biogo-store-interval)
16980 (name "go-github-com-biogo-store-step")
16982 '(#:import-path "github.com/biogo/store/step"
16983 #:unpack-path "github.com/biogo/store"))
16985 (list go-gopkg-in-check-v1
16986 go-github-com-kr-pretty))
16987 (synopsis "Step store for biogo")))
16989 (define-public go-github-com-biogo-hts-bam
16991 (name "go-github-com-biogo-hts-bam")
16995 (uri (git-reference
16996 (url "https://github.com/biogo/hts")
16997 (commit (string-append "v" version))))
16998 (file-name (git-file-name name version))
17001 "013ga6ilc4m3hyfr3yyiva9g4vs81afhj73v2sy7r75b5zxw7lx1"))))
17002 (build-system go-build-system)
17004 '(#:import-path "github.com/biogo/hts/bam"
17005 #:unpack-path "github.com/biogo/hts"))
17007 (list go-gopkg-in-check-v1))
17008 (home-page "https://github.com/biogo/hts")
17009 (synopsis "HTS BAM module for biogo")
17010 (description "This package provides tools for handling BAM files.")
17011 (license license:bsd-3)))
17013 (define-public go-github-com-biogo-hts-sam
17015 (inherit go-github-com-biogo-hts-bam)
17016 (name "go-github-com-biogo-hts-sam")
17018 '(#:import-path "github.com/biogo/hts/sam"
17019 #:unpack-path "github.com/biogo/hts"))
17021 (list go-gopkg-in-check-v1))
17022 (synopsis "HTS SAM module for biogo")
17023 (description "This package provides tools for handling SAM files.")))
17025 (define-public go-github-com-biogo-hts-tabix
17027 (inherit go-github-com-biogo-hts-bam)
17028 (name "go-github-com-biogo-hts-tabix")
17030 '(#:import-path "github.com/biogo/hts/tabix"
17031 #:unpack-path "github.com/biogo/hts"))
17033 (list go-gopkg-in-check-v1))
17034 (synopsis "HTS Tabix module for biogo")
17035 (description "This package provides tools for handling Tabix files.")))
17037 (define-public go-github-com-biogo-hts-bgzf
17039 (inherit go-github-com-biogo-hts-bam)
17040 (name "go-github-com-biogo-hts-bgzf")
17042 '(#:import-path "github.com/biogo/hts/bgzf"
17043 #:unpack-path "github.com/biogo/hts"))
17045 (list go-gopkg-in-check-v1))
17046 (synopsis "HTS bgzf module for biogo")
17047 (description "This package provides tools for handling bgzf files.")))
17049 (define-public go-github-com-biogo-hts-cram
17051 (inherit go-github-com-biogo-hts-bam)
17052 (name "go-github-com-biogo-hts-cram")
17054 '(#:import-path "github.com/biogo/hts/cram"
17055 #:unpack-path "github.com/biogo/hts"
17056 #:tests? #false)) ;require network access
17058 (list go-gopkg-in-check-v1
17059 go-github.com-ulikunitz-xz
17060 go-github-com-kortschak-utter))
17061 (synopsis "HTS CRAM module for biogo")
17062 (description "This package provides tools for handling CRAM files.")))
17064 (define-public go-github-com-biogo-hts-csi
17066 (inherit go-github-com-biogo-hts-bam)
17067 (name "go-github-com-biogo-hts-csi")
17069 '(#:import-path "github.com/biogo/hts/csi"
17070 #:unpack-path "github.com/biogo/hts"))
17072 (list go-gopkg-in-check-v1))
17073 (synopsis "Coordinate sorted indexing for biogo")
17074 (description "This package implements CSIv1 and CSIv2 coordinate sorted
17077 (define-public go-github-com-biogo-hts-fai
17079 (inherit go-github-com-biogo-hts-bam)
17080 (name "go-github-com-biogo-hts-fai")
17082 '(#:import-path "github.com/biogo/hts/fai"
17083 #:unpack-path "github.com/biogo/hts"))
17085 (list go-gopkg-in-check-v1))
17086 (synopsis "Fasta sequence file index handling for biogo")
17087 (description "This package implements FAI fasta sequence file index
17090 (define-public go-github-com-biogo-biogo
17092 (name "go-github-com-biogo-biogo")
17096 (uri (git-reference
17097 (url "https://github.com/biogo/biogo")
17098 (commit (string-append "v" version))))
17099 (file-name (git-file-name name version))
17102 "0ali1mqf3dc26myv6l7wmqfr8i25461rbq3qdad8s0wi29622199"))))
17103 (build-system go-build-system)
17105 '(#:import-path "github.com/biogo/biogo"))
17107 (list go-gopkg-in-check-v1
17108 go-github-com-biogo-store-interval
17109 go-github-com-biogo-store-kdtree
17110 go-github-com-biogo-store-llrb
17111 go-github-com-biogo-store-step
17112 go-github-com-biogo-hts-bam
17113 go-github-com-biogo-graph))
17114 (home-page "https://github.com/biogo/biogo")
17115 (synopsis "Bioinformatics library for Go")
17117 "Bíogo is a bioinformatics library for the Go language.")
17118 (license license:bsd-3)))
17121 ;;; Avoid adding new packages to the end of this file. To reduce the chances
17122 ;;; of a merge conflict, place them above by existing packages with similar
17123 ;;; functionality or similar names.