gnu: Add cd-hit-auxtools.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016, 2020, 2021 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021, 2022 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016, 2020, 2022 Marius Bakke <marius@gnu.org>
9 ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;; Copyright © 2017, 2021, 2022 Arun Isaac <arunisaac@systemreboot.net>
12 ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
13 ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
14 ;;; Copyright © 2018, 2019, 2020, 2021, 2022 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
15 ;;; Copyright © 2019, 2020, 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com>
16 ;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
17 ;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
18 ;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
19 ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
20 ;;; Copyright © 2020 Pierre Langlois <pierre.langlois@gmx.com>
21 ;;; Copyright © 2020 Bonface Munyoki Kilyungi <bonfacemunyoki@gmail.com>
22 ;;; Copyright © 2021 Tim Howes <timhowes@lavabit.com>
23 ;;; Copyright © 2021 Hong Li <hli@mdc-berlin.de>
24 ;;; Copyright © 2021, 2022 Simon Tournier <zimon.toutoune@gmail.com>
25 ;;; Copyright © 2021 Felix Gruber <felgru@posteo.net>
26 ;;;
27 ;;; This file is part of GNU Guix.
28 ;;;
29 ;;; GNU Guix is free software; you can redistribute it and/or modify it
30 ;;; under the terms of the GNU General Public License as published by
31 ;;; the Free Software Foundation; either version 3 of the License, or (at
32 ;;; your option) any later version.
33 ;;;
34 ;;; GNU Guix is distributed in the hope that it will be useful, but
35 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
36 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37 ;;; GNU General Public License for more details.
38 ;;;
39 ;;; You should have received a copy of the GNU General Public License
40 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
41
42 (define-module (gnu packages bioinformatics)
43 #:use-module ((guix licenses) #:prefix license:)
44 #:use-module (guix packages)
45 #:use-module (guix gexp)
46 #:use-module (guix utils)
47 #:use-module (guix download)
48 #:use-module (guix git-download)
49 #:use-module (guix hg-download)
50 #:use-module (guix build-system ant)
51 #:use-module (guix build-system gnu)
52 #:use-module (guix build-system cargo)
53 #:use-module (guix build-system cmake)
54 #:use-module (guix build-system go)
55 #:use-module (guix build-system haskell)
56 #:use-module (guix build-system meson)
57 #:use-module (guix build-system ocaml)
58 #:use-module (guix build-system perl)
59 #:use-module (guix build-system python)
60 #:use-module (guix build-system qt)
61 #:use-module (guix build-system r)
62 #:use-module (guix build-system ruby)
63 #:use-module (guix build-system trivial)
64 #:use-module (guix deprecation)
65 #:use-module (gnu packages)
66 #:use-module (gnu packages admin)
67 #:use-module (gnu packages assembly)
68 #:use-module (gnu packages autotools)
69 #:use-module (gnu packages algebra)
70 #:use-module (gnu packages base)
71 #:use-module (gnu packages bash)
72 #:use-module (gnu packages bison)
73 #:use-module (gnu packages bioconductor)
74 #:use-module (gnu packages boost)
75 #:use-module (gnu packages check)
76 #:use-module (gnu packages code)
77 #:use-module (gnu packages cmake)
78 #:use-module (gnu packages compression)
79 #:use-module (gnu packages cpp)
80 #:use-module (gnu packages cpio)
81 #:use-module (gnu packages cran)
82 #:use-module (gnu packages crates-io)
83 #:use-module (gnu packages curl)
84 #:use-module (gnu packages documentation)
85 #:use-module (gnu packages databases)
86 #:use-module (gnu packages datastructures)
87 #:use-module (gnu packages dlang)
88 #:use-module (gnu packages file)
89 #:use-module (gnu packages flex)
90 #:use-module (gnu packages gawk)
91 #:use-module (gnu packages gcc)
92 #:use-module (gnu packages gd)
93 #:use-module (gnu packages golang)
94 #:use-module (gnu packages glib)
95 #:use-module (gnu packages graph)
96 #:use-module (gnu packages graphics)
97 #:use-module (gnu packages graphviz)
98 #:use-module (gnu packages groff)
99 #:use-module (gnu packages gtk)
100 #:use-module (gnu packages guile)
101 #:use-module (gnu packages guile-xyz)
102 #:use-module (gnu packages haskell-check)
103 #:use-module (gnu packages haskell-web)
104 #:use-module (gnu packages haskell-xyz)
105 #:use-module (gnu packages image)
106 #:use-module (gnu packages image-processing)
107 #:use-module (gnu packages imagemagick)
108 #:use-module (gnu packages java)
109 #:use-module (gnu packages java-compression)
110 #:use-module (gnu packages jemalloc)
111 #:use-module (gnu packages jupyter)
112 #:use-module (gnu packages libffi)
113 #:use-module (gnu packages linux)
114 #:use-module (gnu packages llvm)
115 #:use-module (gnu packages logging)
116 #:use-module (gnu packages lsof)
117 #:use-module (gnu packages machine-learning)
118 #:use-module (gnu packages man)
119 #:use-module (gnu packages maths)
120 #:use-module (gnu packages mpi)
121 #:use-module (gnu packages ncurses)
122 #:use-module (gnu packages node)
123 #:use-module (gnu packages ocaml)
124 #:use-module (gnu packages pcre)
125 #:use-module (gnu packages parallel)
126 #:use-module (gnu packages pdf)
127 #:use-module (gnu packages perl)
128 #:use-module (gnu packages perl-check)
129 #:use-module (gnu packages pkg-config)
130 #:use-module (gnu packages popt)
131 #:use-module (gnu packages protobuf)
132 #:use-module (gnu packages python)
133 #:use-module (gnu packages python-build)
134 #:use-module (gnu packages python-check)
135 #:use-module (gnu packages python-compression)
136 #:use-module (gnu packages python-crypto)
137 #:use-module (gnu packages python-science)
138 #:use-module (gnu packages python-web)
139 #:use-module (gnu packages python-xyz)
140 #:use-module (gnu packages qt)
141 #:use-module (gnu packages rdf)
142 #:use-module (gnu packages readline)
143 #:use-module (gnu packages rpc)
144 #:use-module (gnu packages rsync)
145 #:use-module (gnu packages ruby)
146 #:use-module (gnu packages serialization)
147 #:use-module (gnu packages shells)
148 #:use-module (gnu packages skribilo)
149 #:use-module (gnu packages sphinx)
150 #:use-module (gnu packages sqlite)
151 #:use-module (gnu packages statistics)
152 #:use-module (gnu packages swig)
153 #:use-module (gnu packages tbb)
154 #:use-module (gnu packages tex)
155 #:use-module (gnu packages texinfo)
156 #:use-module (gnu packages textutils)
157 #:use-module (gnu packages time)
158 #:use-module (gnu packages tls)
159 #:use-module (gnu packages uglifyjs)
160 #:use-module (gnu packages vim)
161 #:use-module (gnu packages web)
162 #:use-module (gnu packages wget)
163 #:use-module (gnu packages xml)
164 #:use-module (gnu packages xorg)
165 #:use-module (guix platform)
166 #:use-module (srfi srfi-1)
167 #:use-module (srfi srfi-26)
168 #:use-module (ice-9 match))
169
170 (define-public aragorn
171 (package
172 (name "aragorn")
173 (version "1.2.38")
174 (source (origin
175 (method url-fetch)
176 (uri (string-append
177 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
178 version ".tgz"))
179 (sha256
180 (base32
181 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
182 (build-system gnu-build-system)
183 (arguments
184 `(#:tests? #f ; there are no tests
185 #:phases
186 (modify-phases %standard-phases
187 (delete 'configure)
188 (replace 'build
189 (lambda _
190 (invoke "gcc"
191 "-O3"
192 "-ffast-math"
193 "-finline-functions"
194 "-o"
195 "aragorn"
196 (string-append "aragorn" ,version ".c"))
197 #t))
198 (replace 'install
199 (lambda* (#:key outputs #:allow-other-keys)
200 (let* ((out (assoc-ref outputs "out"))
201 (bin (string-append out "/bin"))
202 (man (string-append out "/share/man/man1")))
203 (install-file "aragorn" bin)
204 (install-file "aragorn.1" man))
205 #t)))))
206 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
207 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
208 (description
209 "Aragorn identifies transfer RNA, mitochondrial RNA and
210 transfer-messenger RNA from nucleotide sequences, based on homology to known
211 tRNA consensus sequences and RNA structure. It also outputs the secondary
212 structure of the predicted RNA.")
213 (license license:gpl2)))
214
215 (define-public bamtools
216 (package
217 (name "bamtools")
218 (version "2.5.1")
219 (source (origin
220 (method git-fetch)
221 (uri (git-reference
222 (url "https://github.com/pezmaster31/bamtools")
223 (commit (string-append "v" version))))
224 (file-name (git-file-name name version))
225 (sha256
226 (base32
227 "0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
228 (build-system cmake-build-system)
229 (arguments
230 `(#:tests? #f ;no "check" target
231 #:phases
232 (modify-phases %standard-phases
233 (add-before
234 'configure 'set-ldflags
235 (lambda* (#:key outputs #:allow-other-keys)
236 (setenv "LDFLAGS"
237 (string-append
238 "-Wl,-rpath="
239 (assoc-ref outputs "out") "/lib/bamtools"))
240 #t)))))
241 (inputs (list zlib))
242 (home-page "https://github.com/pezmaster31/bamtools")
243 (synopsis "C++ API and command-line toolkit for working with BAM data")
244 (description
245 "BamTools provides both a C++ API and a command-line toolkit for handling
246 BAM files.")
247 (license license:expat)))
248
249 (define-public bamutils
250 (package
251 (name "bamutils")
252 (version "1.0.14")
253 (source (origin
254 (method git-fetch)
255 (uri (git-reference
256 (url "https://github.com/statgen/bamUtil")
257 (commit (string-append "v" version))))
258 (file-name (git-file-name name version))
259 (sha256
260 (base32
261 "0i2r332k1kz0jysyg89d858wqq59n16lw6dv5qmilcwshb77r9v7"))))
262 (build-system gnu-build-system)
263 (arguments
264 `(#:tests? #f ; Unclear how to run tests
265 #:make-flags
266 ,#~(list "USER_WARNINGS=-std=gnu++98" ;
267 (string-append "INSTALLDIR=" #$output "/bin"))
268 #:phases
269 (modify-phases %standard-phases
270 (replace 'configure
271 (lambda* (#:key inputs #:allow-other-keys)
272 (substitute* "src/Makefile" ;
273 (("^DATE=.*") "DATE=\"1970-01-01\"\n"))
274 (copy-recursively (assoc-ref inputs "libstatgen")
275 "../libStatGen"))))))
276 (inputs
277 (list zlib))
278 (native-inputs
279 `(("libstatgen"
280 ,(origin
281 (method git-fetch)
282 (uri (git-reference
283 (url "https://github.com/statgen/libStatGen/")
284 (commit (string-append "v" version))))
285 (file-name (git-file-name "libstatgen" version))
286 (sha256
287 (base32
288 "0q9iyk046r4m7qnav8c3f28zsar25lj9nydiklwaswmzdijhi4p1"))))))
289 (home-page "https://genome.sph.umich.edu/wiki/BamUtil")
290 (synopsis "Programs for working on SAM/BAM files")
291 (description "This package provides several programs that perform
292 operations on SAM/BAM files. All of these programs are built into a
293 single executable called @code{bam}.")
294 (license license:gpl3+)))
295
296 (define-public bcftools
297 (package
298 (name "bcftools")
299 (version "1.14")
300 (source (origin
301 (method url-fetch)
302 (uri (string-append "https://github.com/samtools/bcftools/"
303 "releases/download/"
304 version "/bcftools-" version ".tar.bz2"))
305 (sha256
306 (base32
307 "1jqrma16fx8kpvb3c0462dg0asvmiv5yi8myqmc5ddgwi6p8ivxp"))
308 (modules '((guix build utils)))
309 (snippet '(begin
310 ;; Delete bundled htslib.
311 (delete-file-recursively "htslib-1.14")))))
312 (build-system gnu-build-system)
313 (arguments
314 `(#:configure-flags
315 (list "--enable-libgsl")
316 #:test-target "test"
317 #:phases
318 (modify-phases %standard-phases
319 (add-before 'check 'patch-tests
320 (lambda _
321 (substitute* "test/test.pl"
322 (("/bin/bash") (which "bash"))))))))
323 (native-inputs
324 (list htslib perl))
325 (inputs
326 (list gsl zlib))
327 (home-page "https://samtools.github.io/bcftools/")
328 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
329 (description
330 "BCFtools is a set of utilities that manipulate variant calls in the
331 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
332 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
333 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
334 (license (list license:gpl3+ license:expat))))
335
336 (define-public bcftools-1.12
337 (package/inherit bcftools
338 (version "1.12")
339 (source (origin
340 (method url-fetch)
341 (uri (string-append "https://github.com/samtools/bcftools/"
342 "releases/download/"
343 version "/bcftools-" version ".tar.bz2"))
344 (sha256
345 (base32
346 "1x94l1hy2pi3lbz0sxlbw0g6q5z5apcrhrlcwda94ns9n4r6a3ks"))
347 (modules '((guix build utils)))
348 (snippet '(begin
349 ;; Delete bundled htslib.
350 (delete-file-recursively "htslib-1.12")))))
351 (native-inputs (list htslib-1.12 perl))))
352
353 (define-public bcftools-1.10
354 (package/inherit bcftools
355 (version "1.10")
356 (source (origin
357 (method url-fetch)
358 (uri (string-append "https://github.com/samtools/bcftools/"
359 "releases/download/"
360 version "/bcftools-" version ".tar.bz2"))
361 (sha256
362 (base32
363 "10xgwfdgqb6dsmr3ndnpb77mc3a38dy8kh2c6czn6wj7jhdp4dra"))
364 (modules '((guix build utils)))
365 (snippet '(begin
366 ;; Delete bundled htslib.
367 (delete-file-recursively "htslib-1.10")))))
368 (native-inputs (list htslib-1.10 perl))))
369
370 (define-public bedops
371 (package
372 (name "bedops")
373 (version "2.4.35")
374 (source (origin
375 (method git-fetch)
376 (uri (git-reference
377 (url "https://github.com/bedops/bedops")
378 (commit (string-append "v" version))))
379 (file-name (git-file-name name version))
380 (sha256
381 (base32
382 "0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
383 (build-system gnu-build-system)
384 (arguments
385 '(#:tests? #f
386 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
387 #:phases
388 (modify-phases %standard-phases
389 (add-after 'unpack 'unpack-tarballs
390 (lambda _
391 ;; FIXME: Bedops includes tarballs of minimally patched upstream
392 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
393 ;; libraries because at least one of the libraries (zlib) is
394 ;; patched to add a C++ function definition (deflateInit2cpp).
395 ;; Until the Bedops developers offer a way to link against system
396 ;; libraries we have to build the in-tree copies of these three
397 ;; libraries.
398
399 ;; See upstream discussion:
400 ;; https://github.com/bedops/bedops/issues/124
401
402 ;; Unpack the tarballs to benefit from shebang patching.
403 (with-directory-excursion "third-party"
404 (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
405 (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
406 (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
407 ;; Disable unpacking of tarballs in Makefile.
408 (substitute* "system.mk/Makefile.linux"
409 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
410 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
411 (substitute* "third-party/zlib-1.2.7/Makefile.in"
412 (("^SHELL=.*$") "SHELL=bash\n"))
413 #t))
414 (delete 'configure))))
415 (home-page "https://github.com/bedops/bedops")
416 (synopsis "Tools for high-performance genomic feature operations")
417 (description
418 "BEDOPS is a suite of tools to address common questions raised in genomic
419 studies---mostly with regard to overlap and proximity relationships between
420 data sets. It aims to be scalable and flexible, facilitating the efficient
421 and accurate analysis and management of large-scale genomic data.
422
423 BEDOPS provides tools that perform highly efficient and scalable Boolean and
424 other set operations, statistical calculations, archiving, conversion and
425 other management of genomic data of arbitrary scale. Tasks can be easily
426 split by chromosome for distributing whole-genome analyses across a
427 computational cluster.")
428 (license license:gpl2+)))
429
430 (define-public bedtools
431 (package
432 (name "bedtools")
433 (version "2.30.0")
434 (source (origin
435 (method url-fetch)
436 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
437 "download/v" version "/"
438 "bedtools-" version ".tar.gz"))
439 (sha256
440 (base32
441 "1f2hh79l7dn147c2xyfgf5wfjvlqfw32kjfnnh2n1qy6rpzx2fik"))))
442 (build-system gnu-build-system)
443 (arguments
444 `(#:test-target "test"
445 #:make-flags
446 ,#~(list (string-append "prefix=" #$output))
447 #:phases
448 (modify-phases %standard-phases
449 (delete 'configure))))
450 (native-inputs
451 `(("python" ,python-wrapper)))
452 (inputs
453 (list samtools zlib))
454 (home-page "https://github.com/arq5x/bedtools2")
455 (synopsis "Tools for genome analysis and arithmetic")
456 (description
457 "Collectively, the bedtools utilities are a swiss-army knife of tools for
458 a wide-range of genomics analysis tasks. The most widely-used tools enable
459 genome arithmetic: that is, set theory on the genome. For example, bedtools
460 allows one to intersect, merge, count, complement, and shuffle genomic
461 intervals from multiple files in widely-used genomic file formats such as BAM,
462 BED, GFF/GTF, VCF.")
463 (license license:expat)))
464
465 ;; Later releases of bedtools produce files with more columns than
466 ;; what Ribotaper expects.
467 (define-public bedtools-2.18
468 (package (inherit bedtools)
469 (name "bedtools")
470 (version "2.18.0")
471 (source (origin
472 (method url-fetch)
473 (uri (string-append "https://github.com/arq5x/bedtools2/"
474 "releases/download/v" version
475 "/bedtools-" version ".tar.gz"))
476 (sha256
477 (base32
478 "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
479 (arguments
480 '(#:test-target "test"
481 #:phases
482 (modify-phases %standard-phases
483 (add-after 'unpack 'compatibility
484 (lambda _
485 (substitute* "src/utils/fileType/FileRecordTypeChecker.h"
486 (("static const float PERCENTAGE")
487 "static constexpr float PERCENTAGE"))
488 (substitute* "src/utils/general/DualQueue.h"
489 (("template <class T, template<class T> class CompareFunc>")
490 "template <class T, template<class U> class CompareFunc>"))))
491 (delete 'configure)
492 (replace 'install
493 (lambda* (#:key outputs #:allow-other-keys)
494 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
495 (for-each (lambda (file)
496 (install-file file bin))
497 (find-files "bin" ".*"))))))))
498 (native-inputs
499 `(("python" ,python-wrapper)))
500 (inputs
501 (list samtools zlib))))
502
503 (define-public pbcopper
504 ;; This is the latest commit at the time of this writing.
505 (let ((commit "ad4143afd25a0bd6adc977c544865c992a515841")
506 (revision "1"))
507 (package
508 (name "pbcopper")
509 (version (git-version "1.9.0" revision commit))
510 (source (origin
511 (method git-fetch)
512 (uri (git-reference
513 (url "https://github.com/PacificBiosciences/pbcopper")
514 (commit commit)))
515 (file-name (git-file-name name version))
516 (sha256
517 (base32
518 "1qxkbpdkamfisnk36lpi1vdvf3p1lg2hdqna3xgd94pz52bwbmp7"))))
519 (build-system meson-build-system)
520 (arguments
521 `(#:phases
522 (modify-phases %standard-phases
523 (add-after 'unpack 'patch-meson-files
524 (lambda* (#:key inputs #:allow-other-keys)
525 (substitute* "meson.build"
526 (("-msse4.1") "")))))))
527 (inputs
528 (list boost))
529 (native-inputs
530 (list googletest pkg-config))
531 (home-page "https://github.com/PacificBiosciences/pbcopper")
532 (synopsis "Data structures, algorithms, and utilities for PacBio C++ applications")
533 (description
534 "The pbcopper library provides a suite of data structures, algorithms,
535 and utilities for PacBio C++ applications.")
536 (license license:bsd-3))))
537
538 (define-public pbbam
539 (package
540 (name "pbbam")
541 (version "1.7.0")
542 (source (origin
543 (method git-fetch)
544 (uri (git-reference
545 (url "https://github.com/PacificBiosciences/pbbam")
546 (commit (string-append "v" version))))
547 (file-name (git-file-name name version))
548 (sha256
549 (base32
550 "1avdm5hwhr5ls79017blyalx1npzbf1aa6dgb6j6lg8sq4nk9yyg"))))
551 (build-system meson-build-system)
552 (arguments
553 `(#:phases
554 (modify-phases %standard-phases
555 (add-after 'unpack 'patch-tests
556 (lambda* (#:key inputs #:allow-other-keys)
557 ;; Disable this test. I tried fixing it by including
558 ;; optional_io.hpp, but there's a type error.
559 (substitute* "tests/src/meson.build"
560 (("'test_ReadGroupInfo.cpp',") ""))
561 #;
562 (substitute* "include/pbbam/ReadGroupInfo.h"
563 (("#include <boost/optional.hpp>" m)
564 (string-append m "\n#include <boost/optional/optional_io.hpp>")))
565 (substitute* '("tests/scripts/cram/_test.py"
566 "tests/scripts/cram/_main.py")
567 (("'/bin/sh'")
568 (string-append "'" (which "sh") "'"))))))))
569 ;; These libraries are listed as "Required" in the pkg-config file.
570 (propagated-inputs
571 (list htslib pbcopper zlib))
572 (inputs
573 (list boost samtools))
574 (native-inputs
575 `(("googletest" ,googletest)
576 ("pkg-config" ,pkg-config)
577 ("python" ,python-wrapper))) ; for tests
578 (home-page "https://github.com/PacificBiosciences/pbbam")
579 (synopsis "Work with PacBio BAM files")
580 (description
581 "The pbbam software package provides components to create, query, and
582 edit PacBio BAM files and associated indices. These components include a core
583 C++ library, bindings for additional languages, and command-line utilities.
584 This library is not intended to be used as a general-purpose BAM utility - all
585 input and output BAMs must adhere to the PacBio BAM format specification.
586 Non-PacBio BAMs will cause exceptions to be thrown.")
587 (license license:bsd-3)))
588
589 (define-public pbgzip
590 (let ((commit "2b09f97b5f20b6d83c63a5c6b408d152e3982974"))
591 (package
592 (name "pbgzip")
593 (version (git-version "0.0.0" "0" commit))
594 (source (origin
595 (method git-fetch)
596 (uri (git-reference
597 (url "https://github.com/nh13/pbgzip")
598 (commit commit)))
599 (file-name (git-file-name name version))
600 (sha256
601 (base32
602 "1mlmq0v96irbz71bgw5zcc43g1x32zwnxx21a5p1f1ch4cikw1yd"))))
603 (build-system gnu-build-system)
604 (native-inputs
605 (list autoconf automake))
606 (inputs
607 (list zlib))
608 (home-page "https://github.com/nh13/pbgzip")
609 (synopsis "Parallel Block GZIP")
610 (description "This package implements parallel block gzip. For many
611 formats, in particular genomics data formats, data are compressed in
612 fixed-length blocks such that they can be easily indexed based on a (genomic)
613 coordinate order, since typically each block is sorted according to this order.
614 This allows for each block to be individually compressed (deflated), or more
615 importantly, decompressed (inflated), with the latter enabling random retrieval
616 of data in large files (gigabytes to terabytes). @code{pbgzip} is not limited
617 to any particular format, but certain features are tailored to genomics data
618 formats when enabled. Parallel decompression is somewhat faster, but the true
619 speedup comes during compression.")
620 (license license:expat))))
621
622 (define-public blasr-libcpp
623 (package
624 (name "blasr-libcpp")
625 (version "5.3.5")
626 (source (origin
627 (method git-fetch)
628 (uri (git-reference
629 (url "https://github.com/PacificBiosciences/blasr_libcpp")
630 (commit version)))
631 (file-name (git-file-name name version))
632 (sha256
633 (base32
634 "07cdfnfl29zf2j7fpaaqaxghq3p0wnc109razs0icwm2q6l3gycb"))))
635 (build-system meson-build-system)
636 (arguments
637 `(#:phases
638 (modify-phases %standard-phases
639 (add-after 'unpack 'link-with-hdf5
640 (lambda* (#:key inputs #:allow-other-keys)
641 (let ((hdf5 (assoc-ref inputs "hdf5")))
642 (substitute* "meson.build"
643 (("libblasr_deps = \\[" m)
644 (string-append
645 m
646 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
647 cpp.find_library('hdf5_cpp', dirs : '~a'), "
648 hdf5 hdf5)))))
649 #t))
650 (add-after 'unpack 'find-googletest
651 (lambda* (#:key inputs #:allow-other-keys)
652 ;; It doesn't find gtest_main because there's no pkg-config file
653 ;; for it. Find it another way.
654 (substitute* "unittest/meson.build"
655 (("libblasr_gtest_dep = dependency\\('gtest_main'.*")
656 (format #f "cpp = meson.get_compiler('cpp')
657 libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
658 (assoc-ref inputs "googletest"))))
659 #t)))
660 ;; TODO: unittest/libblasr_unittest cannot be linked
661 ;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
662 ;; undefined reference to symbol
663 ;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
664 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
665 ;; error adding symbols: DSO missing from command line
666 #:tests? #f
667 #:configure-flags '("-Dtests=false")))
668 (inputs
669 (list boost hdf5 htslib pbbam zlib))
670 (native-inputs
671 (list googletest pkg-config))
672 (home-page "https://github.com/PacificBiosciences/blasr_libcpp")
673 (synopsis "Library for analyzing PacBio genomic sequences")
674 (description
675 "This package provides three libraries used by applications for analyzing
676 PacBio genomic sequences. This library contains three sub-libraries: pbdata,
677 hdf and alignment.")
678 (license license:bsd-3)))
679
680 (define-public blasr
681 (package
682 (name "blasr")
683 (version "5.3.5")
684 (source (origin
685 (method git-fetch)
686 (uri (git-reference
687 (url "https://github.com/PacificBiosciences/blasr")
688 (commit version)))
689 (file-name (git-file-name name version))
690 (sha256
691 (base32
692 "0axyd06gn2xa0p0k76fihsbxpfxvhlb18jn6bf97c0ii58r1wc0k"))))
693 (build-system meson-build-system)
694 (arguments
695 `(#:phases
696 (modify-phases %standard-phases
697 (add-after 'unpack 'link-with-hdf5
698 (lambda* (#:key inputs #:allow-other-keys)
699 (let ((hdf5 (assoc-ref inputs "hdf5")))
700 (substitute* "meson.build"
701 (("blasr_deps = \\[" m)
702 (string-append
703 m
704 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
705 cpp.find_library('hdf5_cpp', dirs : '~a'), "
706 hdf5 hdf5))))))))
707 ;; Tests require "cram" executable, which is not packaged.
708 #:tests? #f
709 #:configure-flags '("-Dtests=false")))
710 (inputs
711 (list boost blasr-libcpp hdf5 pbbam zlib))
712 (native-inputs
713 (list pkg-config))
714 (home-page "https://github.com/PacificBiosciences/blasr")
715 (synopsis "PacBio long read aligner")
716 (description
717 "Blasr is a genomic sequence aligner for processing PacBio long reads.")
718 (license license:bsd-3)))
719
720 (define-public ribotaper
721 (package
722 (name "ribotaper")
723 (version "1.3.1")
724 (source (origin
725 (method url-fetch)
726 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
727 "files/RiboTaper/RiboTaper_Version_"
728 version ".tar.gz"))
729 (sha256
730 (base32
731 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
732 (build-system gnu-build-system)
733 (arguments
734 `(#:phases
735 (modify-phases %standard-phases
736 (add-after 'install 'wrap-executables
737 (lambda* (#:key inputs outputs #:allow-other-keys)
738 (let* ((out (assoc-ref outputs "out")))
739 (for-each
740 (lambda (script)
741 (wrap-program (string-append out "/bin/" script)
742 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
743 '("create_annotations_files.bash"
744 "create_metaplots.bash"
745 "Ribotaper_ORF_find.sh"
746 "Ribotaper.sh")))
747 #t)))))
748 (inputs
749 (list bedtools-2.18
750 samtools-0.1
751 r-minimal
752 r-foreach
753 r-xnomial
754 r-domc
755 r-multitaper
756 r-seqinr))
757 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
758 (synopsis "Define translated ORFs using ribosome profiling data")
759 (description
760 "Ribotaper is a method for defining translated @dfn{open reading
761 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
762 provides the Ribotaper pipeline.")
763 (license license:gpl3+)))
764
765 (define-public bioawk
766 (package
767 (name "bioawk")
768 (version "1.0")
769 (source (origin
770 (method git-fetch)
771 (uri (git-reference
772 (url "https://github.com/lh3/bioawk")
773 (commit (string-append "v" version))))
774 (file-name (git-file-name name version))
775 (sha256
776 (base32
777 "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
778 (build-system gnu-build-system)
779 (inputs
780 (list zlib))
781 (native-inputs
782 (list bison))
783 (arguments
784 `(#:tests? #f ; There are no tests to run.
785 ;; Bison must generate files, before other targets can build.
786 #:parallel-build? #f
787 #:phases
788 (modify-phases %standard-phases
789 (delete 'configure) ; There is no configure phase.
790 (replace 'install
791 (lambda* (#:key outputs #:allow-other-keys)
792 (let* ((out (assoc-ref outputs "out"))
793 (bin (string-append out "/bin"))
794 (man (string-append out "/share/man/man1")))
795 (mkdir-p man)
796 (copy-file "awk.1" (string-append man "/bioawk.1"))
797 (install-file "bioawk" bin))
798 #t)))))
799 (home-page "https://github.com/lh3/bioawk")
800 (synopsis "AWK with bioinformatics extensions")
801 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
802 support of several common biological data formats, including optionally gzip'ed
803 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
804 also adds a few built-in functions and a command line option to use TAB as the
805 input/output delimiter. When the new functionality is not used, bioawk is
806 intended to behave exactly the same as the original BWK awk.")
807 (license license:x11)))
808
809 (define-public python-cellbender
810 (package
811 (name "python-cellbender")
812 (version "0.2.1")
813 (source
814 (origin
815 (method git-fetch)
816 (uri (git-reference
817 (url "https://github.com/broadinstitute/CellBender")
818 (commit (string-append "v" version))))
819 (file-name (git-file-name name version))
820 (sha256
821 (base32
822 "1zav2q8nnss80i25y06fccagkvrqsy7lpylsl4dxv4qkj8p4fnv3"))))
823 (build-system python-build-system)
824 (arguments
825 (list #:tests? #false)) ;there are none
826 (propagated-inputs
827 (list python-anndata
828 python-matplotlib
829 python-numpy
830 python-pandas
831 python-pyro-ppl
832 python-scikit-learn
833 python-scipy
834 python-sphinx
835 python-sphinx-argparse
836 python-sphinx-autodoc-typehints
837 python-sphinx-rtd-theme
838 python-sphinxcontrib-programoutput
839 python-tables))
840 (home-page "https://cellbender.rtfd.io/")
841 (synopsis "Eliminate technical artifacts from single-cell RNA-seq data")
842 (description
843 "CellBender is a software package for eliminating technical artifacts
844 from high-throughput single-cell RNA sequencing (scRNA-seq) data.")
845 (license license:bsd-3)))
846
847 (define-public python-htsget
848 (package
849 (name "python-htsget")
850 (version "0.2.5")
851 (source (origin
852 (method url-fetch)
853 (uri (pypi-uri "htsget" version))
854 (sha256
855 (base32
856 "0ic07q85vhw9djf23k57b21my7i5xp400m8gfqgr5gcryqvdr0yk"))))
857 (build-system python-build-system)
858 (native-inputs
859 (list python-setuptools-scm))
860 (propagated-inputs
861 (list python-humanize python-requests python-six))
862 (home-page "https://pypi.org/project/htsget/")
863 (synopsis "Python API and command line interface for the GA4GH htsget API")
864 (description "This package is a client implementation of the GA4GH htsget
865 protocol. It provides a simple and reliable way to retrieve genomic data from
866 servers supporting the protocol.")
867 (license license:asl2.0)))
868
869 (define-public python-pybedtools
870 (package
871 (name "python-pybedtools")
872 (version "0.9.0")
873 (source (origin
874 (method url-fetch)
875 (uri (pypi-uri "pybedtools" version))
876 (sha256
877 (base32
878 "18rhzk08d3rpxhi5xh6pqg64x6v5q3daw6y3v54k85v4swncjrwj"))))
879 (build-system python-build-system)
880 (arguments
881 `(#:modules ((srfi srfi-26)
882 (guix build utils)
883 (guix build python-build-system))
884 ;; See https://github.com/daler/pybedtools/issues/192
885 #:phases
886 (modify-phases %standard-phases
887 (add-after 'unpack 'disable-broken-tests
888 (lambda _
889 (substitute* "pybedtools/test/test_helpers.py"
890 ;; Requires internet access.
891 (("def test_chromsizes")
892 "def _do_not_test_chromsizes")
893 ;; Broken as a result of the workaround used in the check phase
894 ;; (see: https://github.com/daler/pybedtools/issues/192).
895 (("def test_getting_example_beds")
896 "def _do_not_test_getting_example_beds"))
897 ;; This issue still occurs on python2
898 (substitute* "pybedtools/test/test_issues.py"
899 (("def test_issue_303")
900 "def _test_issue_303"))))
901 ;; Force the Cythonization of C++ files to guard against compilation
902 ;; problems.
903 (add-after 'unpack 'remove-cython-generated-files
904 (lambda _
905 (let ((cython-sources (map (cut string-drop-right <> 4)
906 (find-files "." "\\.pyx$")))
907 (c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
908 (define (strip-extension filename)
909 (string-take filename (string-index-right filename #\.)))
910 (define (cythonized? c/c++-file)
911 (member (strip-extension c/c++-file) cython-sources))
912 (for-each delete-file (filter cythonized? c/c++-files)))))
913 (add-after 'remove-cython-generated-files 'generate-cython-extensions
914 (lambda _
915 (invoke "python" "setup.py" "cythonize")))
916 (replace 'check
917 (lambda _
918 ;; The tests need to be run from elsewhere...
919 (mkdir-p "/tmp/test")
920 (copy-recursively "pybedtools/test" "/tmp/test")
921 (with-directory-excursion "/tmp/test"
922 (invoke "pytest" "-v" "--doctest-modules")))))))
923 (propagated-inputs
924 (list bedtools samtools python-matplotlib python-pysam
925 python-pyyaml))
926 (native-inputs
927 (list python-numpy
928 python-pandas
929 python-cython
930 kentutils ; for bedGraphToBigWig
931 python-six
932 ;; For the test suite.
933 python-pytest
934 python-psutil))
935 (home-page "https://pythonhosted.org/pybedtools/")
936 (synopsis "Python wrapper for BEDtools programs")
937 (description
938 "This package is a Python wrapper for Aaron Quinlan's BEDtools programs,
939 which are widely used for genomic interval manipulation or \"genome algebra\".
940 pybedtools extends BEDTools by offering feature-level manipulations from with
941 Python.")
942 ;; pypi lists GPLv2 in the PKG-INFO and website, but was relicensed in
943 ;; version 0.9.0 and the LICENSE.txt is consistant with the source code.
944 ;;
945 ;; pybedtools/include/gzstream.cpp and pybedtools/include/gzstream.h are
946 ;; licensed lgpl2.1+
947 (license (list license:expat license:lgpl2.1+))))
948
949 (define-public python-biom-format
950 (package
951 (name "python-biom-format")
952 (version "2.1.10")
953 (source
954 (origin
955 (method git-fetch)
956 ;; Use GitHub as source because PyPI distribution does not contain
957 ;; test data: https://github.com/biocore/biom-format/issues/693
958 (uri (git-reference
959 (url "https://github.com/biocore/biom-format")
960 (commit version)))
961 (file-name (git-file-name name version))
962 (sha256
963 (base32
964 "0i62j6ksmp78ap2dnl969gq6vprc3q87zc8ksj9if8g2603iq6i8"))
965 (modules '((guix build utils)))
966 ;; Delete generated C files.
967 (snippet
968 '(for-each delete-file (find-files "." "\\.c")))))
969 (build-system python-build-system)
970 (arguments
971 `(#:phases
972 (modify-phases %standard-phases
973 (add-after 'unpack 'use-cython
974 (lambda _ (setenv "USE_CYTHON" "1")))
975 (add-after 'unpack 'relax
976 (lambda _
977 (substitute* "setup.py"
978 (("pytest < 5.3.4") "pytest"))))
979 (add-after 'unpack 'disable-broken-tests
980 (lambda _
981 (substitute* "biom/tests/test_cli/test_validate_table.py"
982 (("^(.+)def test_invalid_hdf5" m indent)
983 (string-append indent
984 "@npt.dec.skipif(True, msg='Guix')\n"
985 m)))
986 (substitute* "biom/tests/test_table.py"
987 (("^(.+)def test_from_hdf5_issue_731" m indent)
988 (string-append indent
989 "@npt.dec.skipif(True, msg='Guix')\n"
990 m))))))))
991 (propagated-inputs
992 (list python-anndata
993 python-numpy
994 python-scipy
995 python-flake8
996 python-future
997 python-click
998 python-h5py
999 python-pandas))
1000 (native-inputs
1001 (list python-cython python-pytest python-pytest-cov python-nose))
1002 (home-page "http://www.biom-format.org")
1003 (synopsis "Biological Observation Matrix (BIOM) format utilities")
1004 (description
1005 "The BIOM file format is designed to be a general-use format for
1006 representing counts of observations e.g. operational taxonomic units, KEGG
1007 orthology groups or lipid types, in one or more biological samples
1008 e.g. microbiome samples, genomes, metagenomes.")
1009 (license license:bsd-3)))
1010
1011 (define-public python-pairtools
1012 (package
1013 (name "python-pairtools")
1014 (version "0.3.0")
1015 (source (origin
1016 (method git-fetch)
1017 (uri (git-reference
1018 (url "https://github.com/mirnylab/pairtools")
1019 (commit (string-append "v" version))))
1020 (file-name (git-file-name name version))
1021 (sha256
1022 (base32
1023 "0gr8y13q7sd6yai6df4aavl2470n1f9s3cib6r473z4hr8hcbwmc"))))
1024 (build-system python-build-system)
1025 (arguments
1026 `(#:phases
1027 (modify-phases %standard-phases
1028 (add-after 'unpack 'fix-references
1029 (lambda _
1030 (substitute* '("pairtools/pairtools_merge.py"
1031 "pairtools/pairtools_sort.py")
1032 (("/bin/bash") (which "bash")))
1033 #t))
1034 (replace 'check
1035 (lambda* (#:key inputs outputs #:allow-other-keys)
1036 (add-installed-pythonpath inputs outputs)
1037 (with-directory-excursion "/tmp"
1038 (invoke "pytest" "-v")))))))
1039 (native-inputs
1040 (list python-cython python-nose python-pytest))
1041 (inputs
1042 `(("python" ,python-wrapper)))
1043 (propagated-inputs
1044 (list htslib ; for bgzip, looked up in PATH
1045 samtools ; looked up in PATH
1046 lz4 ; for lz4c
1047 python-click
1048 python-numpy))
1049 (home-page "https://github.com/mirnylab/pairtools")
1050 (synopsis "Process mapped Hi-C data")
1051 (description "Pairtools is a simple and fast command-line framework to
1052 process sequencing data from a Hi-C experiment. Process pair-end sequence
1053 alignments and perform the following operations:
1054
1055 @itemize
1056 @item detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end
1057 sequences of Hi-C DNA molecules
1058 @item sort @code{.pairs} files for downstream analyses
1059 @item detect, tag and remove PCR/optical duplicates
1060 @item generate extensive statistics of Hi-C datasets
1061 @item select Hi-C pairs given flexibly defined criteria
1062 @item restore @code{.sam} alignments from Hi-C pairs.
1063 @end itemize
1064 ")
1065 (license license:expat)))
1066
1067 (define-public bioperl-minimal
1068 (package
1069 (name "bioperl-minimal")
1070 (version "1.7.0")
1071 (source
1072 (origin
1073 (method git-fetch)
1074 (uri (git-reference
1075 (url "https://github.com/bioperl/bioperl-live")
1076 (commit (string-append "release-"
1077 (string-map (lambda (c)
1078 (if (char=? c #\.)
1079 #\- c)) version)))))
1080 (file-name (git-file-name name version))
1081 (sha256
1082 (base32
1083 "0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
1084 (build-system perl-build-system)
1085 (arguments
1086 (let ((transitive-inputs
1087 (map (compose package-name cadr)
1088 (delete-duplicates
1089 (concatenate
1090 (map (compose package-transitive-target-inputs cadr)
1091 (package-inputs this-package)))))))
1092 `(#:phases
1093 (modify-phases %standard-phases
1094 (add-after
1095 'install 'wrap-programs
1096 (lambda* (#:key outputs #:allow-other-keys)
1097 ;; Make sure all executables in "bin" find the required Perl
1098 ;; modules at runtime. As the PERL5LIB variable contains also
1099 ;; the paths of native inputs, we pick the transitive target
1100 ;; inputs from %build-inputs.
1101 (let* ((out (assoc-ref outputs "out"))
1102 (bin (string-append out "/bin/"))
1103 (path (string-join
1104 (cons (string-append out "/lib/perl5/site_perl")
1105 (map (lambda (name)
1106 (assoc-ref %build-inputs name))
1107 ',transitive-inputs))
1108 ":")))
1109 (for-each (lambda (file)
1110 (wrap-program file
1111 `("PERL5LIB" ":" prefix (,path))))
1112 (find-files bin "\\.pl$"))
1113 #t)))))))
1114 (inputs
1115 (list perl-module-build perl-data-stag perl-libwww perl-uri))
1116 (native-inputs
1117 (list perl-test-most))
1118 (home-page "https://metacpan.org/release/BioPerl")
1119 (synopsis "Bioinformatics toolkit")
1120 (description
1121 "BioPerl is the product of a community effort to produce Perl code which
1122 is useful in biology. Examples include Sequence objects, Alignment objects
1123 and database searching objects. These objects not only do what they are
1124 advertised to do in the documentation, but they also interact - Alignment
1125 objects are made from the Sequence objects, Sequence objects have access to
1126 Annotation and SeqFeature objects and databases, Blast objects can be
1127 converted to Alignment objects, and so on. This means that the objects
1128 provide a coordinated and extensible framework to do computational biology.")
1129 (license license:perl-license)))
1130
1131 (define-public perl-bio-db-hts
1132 (package
1133 (name "perl-bio-db-hts")
1134 (version "3.01")
1135 (source
1136 (origin
1137 (method url-fetch)
1138 (uri (string-append "mirror://cpan/authors/id/A/AV/AVULLO/Bio-DB-HTS-"
1139 version ".tar.gz"))
1140 (sha256
1141 (base32
1142 "0hjg0igfkpvh27zdkdr6pa7cqm9n6r7cwz0np74cl4wmawgvr9hj"))))
1143 (build-system perl-build-system)
1144 (native-inputs
1145 (list perl-module-build pkg-config))
1146 (propagated-inputs
1147 (list bioperl-minimal htslib-1.9))
1148 (home-page "https://metacpan.org/release/Bio-DB-HTS")
1149 (synopsis "Perl interface to HTS library for DNA sequencing")
1150 (description "This is a Perl interface to the HTS library for DNA
1151 sequencing.")
1152 (license license:asl2.0)))
1153
1154 (define-public python-biopython
1155 (package
1156 (name "python-biopython")
1157 (version "1.76")
1158 (source (origin
1159 (method url-fetch)
1160 ;; use PyPi rather than biopython.org to ease updating
1161 (uri (pypi-uri "biopython" version))
1162 (sha256
1163 (base32
1164 "0wlch9xpa0fpgjzyxi6jsfca6iakaq9a05927xg8vqnmvaccnwrq"))))
1165 (build-system python-build-system)
1166 (arguments
1167 `(#:phases
1168 (modify-phases %standard-phases
1169 (add-before 'check 'set-home
1170 ;; Some tests require a home directory to be set.
1171 (lambda _ (setenv "HOME" "/tmp") #t)))))
1172 (propagated-inputs
1173 (list python-numpy))
1174 (home-page "https://biopython.org/")
1175 (synopsis "Tools for biological computation in Python")
1176 (description
1177 "Biopython is a set of tools for biological computation including parsers
1178 for bioinformatics files into Python data structures; interfaces to common
1179 bioinformatics programs; a standard sequence class and tools for performing
1180 common operations on them; code to perform data classification; code for
1181 dealing with alignments; code making it easy to split up parallelizable tasks
1182 into separate processes; and more.")
1183 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
1184
1185 (define-public python-biopython-1.73
1186 (package
1187 (inherit python-biopython)
1188 (version "1.73")
1189 (source (origin
1190 (method url-fetch)
1191 ;; use PyPi rather than biopython.org to ease updating
1192 (uri (pypi-uri "biopython" version))
1193 (sha256
1194 (base32
1195 "1q55jhf76z3k6is3psis0ckbki7df26x7dikpcc3vhk1vhkwribh"))))))
1196
1197 (define-public python-fastalite
1198 (package
1199 (name "python-fastalite")
1200 (version "0.3")
1201 (source
1202 (origin
1203 (method url-fetch)
1204 (uri (pypi-uri "fastalite" version))
1205 (sha256
1206 (base32
1207 "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
1208 (build-system python-build-system)
1209 (arguments
1210 `(#:tests? #f)) ; Test data is not distributed.
1211 (home-page "https://github.com/nhoffman/fastalite")
1212 (synopsis "Simplest possible FASTA parser")
1213 (description "This library implements a FASTA and a FASTQ parser without
1214 relying on a complex dependency tree.")
1215 (license license:expat)))
1216
1217 (define-public biosoup
1218 (package
1219 (name "biosoup")
1220 (version "0.10.0")
1221 (source
1222 (origin
1223 (method git-fetch)
1224 (uri (git-reference
1225 (url "https://github.com/rvaser/biosoup")
1226 ;; Corresponds to version 0.10.0
1227 (commit "38181f09854ff42cbd9632200a2ec9fb37a4b7b6")))
1228 (file-name (git-file-name name version))
1229 (sha256
1230 (base32
1231 "02hvyka703zagx0nvv2yx3dkc748zc8g6qbrpya7r8kfkcl7y8hw"))))
1232 (build-system cmake-build-system)
1233 (arguments
1234 `(#:phases
1235 (modify-phases %standard-phases
1236 (replace 'check
1237 (lambda* (#:key tests? #:allow-other-keys)
1238 (when tests?
1239 (invoke "./bin/biosoup_test")))))))
1240 (native-inputs
1241 (list googletest))
1242 (home-page "https://github.com/rvaser/biosoup")
1243 (synopsis "C++ support library for bioinformatics tools")
1244 (description "Biosoup is a C++ collection of header-only data structures
1245 used for storage and logging in bioinformatics tools.")
1246 (license license:expat)))
1247
1248 (define-public bioparser
1249 (package
1250 (name "bioparser")
1251 (version "3.0.13")
1252 (source
1253 (origin
1254 (method git-fetch)
1255 (uri (git-reference
1256 (url "https://github.com/rvaser/bioparser")
1257 ;; Corresponds to tag 3.0.13
1258 (commit "13341e6e0855c6b358ffcea6dad216e1009e1287")))
1259 (file-name (git-file-name name version))
1260 (sha256
1261 (base32
1262 "0c5p2dl8jb12ci9f427jzrmmm9cgvc1k4fxsn2ggkfsin6r1r82i"))))
1263 (build-system cmake-build-system)
1264 (arguments
1265 `(#:phases
1266 (modify-phases %standard-phases
1267 (replace 'check
1268 (lambda* (#:key tests? #:allow-other-keys)
1269 (when tests?
1270 (invoke "./bin/bioparser_test")))))))
1271 (inputs
1272 (list biosoup))
1273 (propagated-inputs
1274 (list zlib))
1275 (native-inputs
1276 (list googletest))
1277 (home-page "https://github.com/rvaser/bioparser")
1278 (synopsis "C++ library for parsing several formats in bioinformatics")
1279 (description "Bioparser is a C++ header only parsing library for several
1280 bioinformatics formats (FASTA/Q, MHAP/PAF/SAM), with support for zlib
1281 compressed files.")
1282 (license license:expat)))
1283
1284 (define-public circtools
1285 (package
1286 (name "circtools")
1287 (version "1.0.0")
1288 (source
1289 (origin
1290 (method git-fetch)
1291 (uri (git-reference
1292 (url "https://github.com/Kevinzjy/circtools")
1293 ;; Corresponds to tag v1.0.0
1294 (commit "79380de59013601021ca3b1352d6f64d2fb89646")
1295 (recursive? #t)))
1296 (file-name (git-file-name name version))
1297 (sha256
1298 (base32
1299 "0wg1s927g32k25j967kfr8l30nmr4c0p4zvy5igvy7cs6chd60lh"))))
1300 (build-system cargo-build-system)
1301 (arguments
1302 `(#:phases
1303 (modify-phases %standard-phases
1304 (add-after 'unpack 'make-writable
1305 (lambda _
1306 (for-each make-file-writable (find-files "."))))
1307 (add-after 'unpack 'prepare-spoa-dependencies
1308 (lambda* (#:key inputs #:allow-other-keys)
1309 (substitute* "vendor/spoa/CMakeLists.txt"
1310 (("find_package\\(bioparser 3.0.13 QUIET\\)")
1311 "find_package(bioparser 3.0.13 CONFIG)")
1312 (("find_package\\(biosoup 0.10.0 QUIET\\)")
1313 "find_package(biosoup 0.10.0 CONFIG)")
1314 (("GTest_FOUND") "TRUE")))))
1315 #:cargo-inputs
1316 (("rust-anyhow" ,rust-anyhow-1)
1317 ("rust-bio" ,rust-bio-0.33)
1318 ("rust-chrono" ,rust-chrono-0.4)
1319 ("rust-docopt" ,rust-docopt-1)
1320 ("rust-flate2" ,rust-flate2-1)
1321 ("rust-indicatif" ,rust-indicatif-0.15)
1322 ("rust-libc" ,rust-libc-0.2)
1323 ("rust-serde" ,rust-serde-1)
1324 ("rust-seq-io" ,rust-seq-io-0.3))))
1325 (inputs
1326 (list bioparser biosoup))
1327 (native-inputs
1328 (list cmake pkg-config googletest))
1329 (home-page "https://github.com/Kevinzjy/circtools")
1330 (synopsis "Accelerating functions in CIRI toolkit")
1331 (description "This package provides accelerated functions for the CIRI
1332 toolkit. It also provides the @code{ccs} executable to scan for circular
1333 consensus sequences.")
1334 (license license:expat)))
1335
1336 (define-public ciri-long
1337 (package
1338 (name "ciri-long")
1339 (version "1.0.2")
1340 (source
1341 (origin
1342 (method git-fetch)
1343 (uri (git-reference
1344 (url "https://github.com/bioinfo-biols/CIRI-long")
1345 (commit (string-append "v" version))))
1346 (file-name (git-file-name name version))
1347 (sha256
1348 (base32
1349 "10k88i1fcqchrrjv82rmylwvbwqfba0n51palhig9hsg71xs0dbi"))
1350 ;; Delete bundled binary
1351 (snippet '(delete-file "libs/ccs"))))
1352 (build-system python-build-system)
1353 (arguments
1354 `(#:phases
1355 (modify-phases %standard-phases
1356 (add-after 'unpack 'relax-requirements
1357 (lambda _
1358 (substitute* "setup.py"
1359 (("'argparse[^']*',") "") ; only for python2
1360 (("==") ">="))))
1361 (add-before 'build 'build-libssw
1362 (lambda _
1363 (with-directory-excursion "libs/striped_smith_waterman"
1364 (invoke "make" "libssw.so"))))
1365 (add-before 'build 'fix-reference-to-ccs
1366 (lambda* (#:key inputs #:allow-other-keys)
1367 (substitute* "CIRI_long/pipeline.py"
1368 (("'ccs -i")
1369 (string-append "'"
1370 (assoc-ref inputs "circtools") "/bin/ccs"
1371 " -i")))
1372 ;; yuck!
1373 (substitute* "CIRI_long/main.py"
1374 (("os.chmod\\(lib_path.*") "")))))))
1375 (inputs
1376 (list circtools
1377 python-biopython
1378 python-bwapy
1379 python-levenshtein
1380 python-mappy
1381 python-numpy
1382 python-pandas
1383 python-pysam
1384 python-pyspoa
1385 python-scikit-learn
1386 python-scipy))
1387 (native-inputs
1388 (list python-cython python-nose python-setuptools))
1389 (home-page "https://ciri-cookbook.readthedocs.io/")
1390 (synopsis "Circular RNA identification for Nanopore sequencing")
1391 (description "CIRI-long is a package for circular RNA identification using
1392 long-read sequencing data.")
1393 (license license:expat)))
1394
1395 (define-public qtltools
1396 (package
1397 (name "qtltools")
1398 (version "1.3.1")
1399 (source (origin
1400 (method url-fetch/tarbomb)
1401 (uri (string-append "https://qtltools.github.io/qtltools/"
1402 "binaries/QTLtools_" version
1403 "_source.tar.gz"))
1404 (sha256
1405 (base32
1406 "13gdry5l43abn3464fmk8qzrxgxnxah2612r66p9dzhhl92j30cd"))))
1407 (build-system gnu-build-system)
1408 (arguments
1409 `(#:tests? #f ; no tests included
1410 #:make-flags
1411 ,#~(list (string-append "BOOST_INC="
1412 #$(this-package-input "boost") "/include")
1413 (string-append "BOOST_LIB="
1414 #$(this-package-input "boost") "/lib")
1415 (string-append "HTSLD_INC="
1416 #$(this-package-input "htslib") "/include")
1417 (string-append "HTSLD_LIB="
1418 #$(this-package-input "htslib") "/lib")
1419 (string-append "RMATH_INC="
1420 #$(this-package-input "rmath-standalone")
1421 "/include")
1422 (string-append "RMATH_LIB="
1423 #$(this-package-input "rmath-standalone")
1424 "/lib"))
1425 #:phases
1426 (modify-phases %standard-phases
1427 (add-after 'unpack 'fix-linkage
1428 (lambda _
1429 (substitute* "qtltools/Makefile"
1430 (("libboost_iostreams.a")
1431 "libboost_iostreams.so")
1432 (("libboost_program_options.a")
1433 "libboost_program_options.so")
1434 (("-lblas") "-lopenblas"))))
1435 (add-before 'build 'chdir
1436 (lambda _ (chdir "qtltools")))
1437 (replace 'configure
1438 (lambda _
1439 (substitute* "qtltools/Makefile"
1440 (("LIB_FLAGS=-lz")
1441 "LIB_FLAGS=-lz -lcrypto -lssl")
1442 (("LIB_FILES=\\$\\(RMATH_LIB\\)/libRmath.a \
1443 \\$\\(HTSLD_LIB\\)/libhts.a \
1444 \\$\\(BOOST_LIB\\)/libboost_iostreams.a \
1445 \\$\\(BOOST_LIB\\)/libboost_program_options.a")
1446 "LIB_FILES=$(RMATH_LIB)/libRmath.so \
1447 $(HTSLD_LIB)/libhts.so \
1448 $(BOOST_LIB)/libboost_iostreams.so \
1449 $(BOOST_LIB)/libboost_program_options.so"))))
1450 (replace 'install
1451 (lambda* (#:key outputs #:allow-other-keys)
1452 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
1453 (mkdir-p bin)
1454 (install-file "bin/QTLtools" bin)))))))
1455 (inputs
1456 (list curl
1457 gsl
1458 boost
1459 rmath-standalone
1460 htslib-1.3
1461 openssl
1462 openblas
1463 zlib))
1464 (home-page "https://qtltools.github.io/qtltools/")
1465 (synopsis "Tool set for molecular QTL discovery and analysis")
1466 (description "QTLtools is a tool set for molecular QTL discovery
1467 and analysis. It allows going from the raw genetic sequence data to
1468 collection of molecular @dfn{Quantitative Trait Loci} (QTLs) in few
1469 easy-to-perform steps.")
1470 (license license:gpl3+)))
1471
1472 (define-public bpp-core
1473 ;; The last release was in 2014 and the recommended way to install from source
1474 ;; is to clone the git repository, so we do this.
1475 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1476 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
1477 (package
1478 (name "bpp-core")
1479 (version (string-append "2.2.0-1." (string-take commit 7)))
1480 (source (origin
1481 (method git-fetch)
1482 (uri (git-reference
1483 (url "http://biopp.univ-montp2.fr/git/bpp-core")
1484 (commit commit)))
1485 (file-name (string-append name "-" version "-checkout"))
1486 (sha256
1487 (base32
1488 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
1489 (build-system cmake-build-system)
1490 (arguments
1491 `(#:parallel-build? #f))
1492 (home-page "http://biopp.univ-montp2.fr")
1493 (synopsis "C++ libraries for Bioinformatics")
1494 (description
1495 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1496 analysis, phylogenetics, molecular evolution and population genetics. It is
1497 Object Oriented and is designed to be both easy to use and computer efficient.
1498 Bio++ intends to help programmers to write computer expensive programs, by
1499 providing them a set of re-usable tools.")
1500 (license license:cecill-c))))
1501
1502 (define-public bpp-phyl
1503 ;; The last release was in 2014 and the recommended way to install from source
1504 ;; is to clone the git repository, so we do this.
1505 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1506 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
1507 (package
1508 (name "bpp-phyl")
1509 (version (string-append "2.2.0-1." (string-take commit 7)))
1510 (source (origin
1511 (method git-fetch)
1512 (uri (git-reference
1513 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
1514 (commit commit)))
1515 (file-name (string-append name "-" version "-checkout"))
1516 (sha256
1517 (base32
1518 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
1519 (build-system cmake-build-system)
1520 (arguments
1521 `(#:parallel-build? #f
1522 ;; If out-of-source, test data is not copied into the build directory
1523 ;; so the tests fail.
1524 #:out-of-source? #f))
1525 (inputs
1526 (list bpp-core bpp-seq))
1527 (home-page "http://biopp.univ-montp2.fr")
1528 (synopsis "Bio++ phylogenetic Library")
1529 (description
1530 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1531 analysis, phylogenetics, molecular evolution and population genetics. This
1532 library provides phylogenetics-related modules.")
1533 (license license:cecill-c))))
1534
1535 (define-public bpp-popgen
1536 ;; The last release was in 2014 and the recommended way to install from source
1537 ;; is to clone the git repository, so we do this.
1538 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1539 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
1540 (package
1541 (name "bpp-popgen")
1542 (version (string-append "2.2.0-1." (string-take commit 7)))
1543 (source (origin
1544 (method git-fetch)
1545 (uri (git-reference
1546 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
1547 (commit commit)))
1548 (file-name (string-append name "-" version "-checkout"))
1549 (sha256
1550 (base32
1551 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
1552 (build-system cmake-build-system)
1553 (arguments
1554 `(#:parallel-build? #f
1555 #:tests? #f)) ; There are no tests.
1556 (inputs
1557 (list bpp-core bpp-seq))
1558 (home-page "http://biopp.univ-montp2.fr")
1559 (synopsis "Bio++ population genetics library")
1560 (description
1561 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1562 analysis, phylogenetics, molecular evolution and population genetics. This
1563 library provides population genetics-related modules.")
1564 (license license:cecill-c))))
1565
1566 (define-public bpp-seq
1567 ;; The last release was in 2014 and the recommended way to install from source
1568 ;; is to clone the git repository, so we do this.
1569 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1570 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
1571 (package
1572 (name "bpp-seq")
1573 (version (string-append "2.2.0-1." (string-take commit 7)))
1574 (source (origin
1575 (method git-fetch)
1576 (uri (git-reference
1577 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
1578 (commit commit)))
1579 (file-name (string-append name "-" version "-checkout"))
1580 (sha256
1581 (base32
1582 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
1583 (build-system cmake-build-system)
1584 (arguments
1585 `(#:parallel-build? #f
1586 ;; If out-of-source, test data is not copied into the build directory
1587 ;; so the tests fail.
1588 #:out-of-source? #f))
1589 (inputs
1590 (list bpp-core))
1591 (home-page "http://biopp.univ-montp2.fr")
1592 (synopsis "Bio++ sequence library")
1593 (description
1594 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1595 analysis, phylogenetics, molecular evolution and population genetics. This
1596 library provides sequence-related modules.")
1597 (license license:cecill-c))))
1598
1599 (define-public bppsuite
1600 ;; The last release was in 2014 and the recommended way to install from source
1601 ;; is to clone the git repository, so we do this.
1602 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1603 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
1604 (package
1605 (name "bppsuite")
1606 (version (string-append "2.2.0-1." (string-take commit 7)))
1607 (source (origin
1608 (method git-fetch)
1609 (uri (git-reference
1610 (url "http://biopp.univ-montp2.fr/git/bppsuite")
1611 (commit commit)))
1612 (file-name (string-append name "-" version "-checkout"))
1613 (sha256
1614 (base32
1615 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
1616 (build-system cmake-build-system)
1617 (arguments
1618 `(#:parallel-build? #f
1619 #:tests? #f)) ; There are no tests.
1620 (native-inputs
1621 (list groff man-db texinfo))
1622 (inputs
1623 `(("bpp-core" ,bpp-core)
1624 ("bpp-seq" ,bpp-seq)
1625 ("bpp-phyl" ,bpp-phyl)
1626 ("bpp-phyl" ,bpp-popgen)))
1627 (home-page "http://biopp.univ-montp2.fr")
1628 (synopsis "Bioinformatics tools written with the Bio++ libraries")
1629 (description
1630 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1631 analysis, phylogenetics, molecular evolution and population genetics. This
1632 package provides command line tools using the Bio++ library.")
1633 (license license:cecill-c))))
1634
1635 (define-public blast+
1636 (package
1637 (name "blast+")
1638 (version "2.11.0")
1639 (source (origin
1640 (method url-fetch)
1641 (uri (string-append
1642 "https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
1643 version "/ncbi-blast-" version "+-src.tar.gz"))
1644 (sha256
1645 (base32
1646 "0m0r9vkw631ky1za1wilsfk9k9spwqh22nkrb9a57rbwmrc1i3nq"))
1647 (modules '((guix build utils)))
1648 (snippet
1649 '(begin
1650 ;; Remove bundled bzip2, zlib and pcre.
1651 (delete-file-recursively "c++/src/util/compress/bzip2")
1652 (delete-file-recursively "c++/src/util/compress/zlib")
1653 (delete-file-recursively "c++/src/util/regexp")
1654 (substitute* "c++/src/util/compress/Makefile.in"
1655 (("bzip2 zlib api") "api"))
1656 ;; Remove useless msbuild directory
1657 (delete-file-recursively
1658 "c++/src/build-system/project_tree_builder/msbuild")
1659
1660 ;; Build reproducibly.
1661 ;; Do not record the kernel version
1662 (substitute* "c++/src/build-system/configure"
1663 (("kver=.*") "kver=\"\""))
1664 ;; Do not generate random numbers.
1665 (substitute* "c++/scripts/common/impl/define_random_macros.sh"
1666 (("#define NCBI_RANDOM_VALUE_MAX 0xffffffffu" m)
1667 (string-append m "
1668 #define NCBI_RANDOM_VALUE_0 2845495105u
1669 #define NCBI_RANDOM_VALUE_1 2158634051u
1670 #define NCBI_RANDOM_VALUE_2 4072202242u
1671 #define NCBI_RANDOM_VALUE_3 902228395u
1672 #define NCBI_RANDOM_VALUE_4 1353323915u
1673 #define NCBI_RANDOM_VALUE_5 574823513u
1674 #define NCBI_RANDOM_VALUE_6 4119501261u
1675 #define NCBI_RANDOM_VALUE_7 2477640938u
1676 #define NCBI_RANDOM_VALUE_8 2776595395u
1677 #define NCBI_RANDOM_VALUE_9 270550684u
1678 "))
1679 (("cksum") "cksum >/dev/null"))))))
1680 (build-system gnu-build-system)
1681 (arguments
1682 `(;; There are two(!) tests for this massive library, and both fail with
1683 ;; "unparsable timing stats".
1684 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
1685 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
1686 #:tests? #f
1687 #:out-of-source? #t
1688 #:parallel-build? #f ; not supported
1689 #:phases
1690 (modify-phases %standard-phases
1691 (add-before 'configure 'set-HOME
1692 ;; $HOME needs to be set at some point during the configure phase
1693 (lambda _ (setenv "HOME" "/tmp") #t))
1694 (add-after 'unpack 'enter-dir
1695 (lambda _ (chdir "c++") #t))
1696 (add-after 'enter-dir 'fix-build-system
1697 (lambda _
1698 (define (which* cmd)
1699 (cond ((string=? cmd "date")
1700 ;; make call to "date" deterministic
1701 "date -d @0")
1702 ((which cmd)
1703 => identity)
1704 (else
1705 (format (current-error-port)
1706 "WARNING: Unable to find absolute path for ~s~%"
1707 cmd)
1708 #f)))
1709
1710 ;; Rewrite hardcoded paths to various tools
1711 (substitute* (append '("src/build-system/configure.ac"
1712 "src/build-system/configure"
1713 "src/build-system/helpers/run_with_lock.c"
1714 "scripts/common/impl/if_diff.sh"
1715 "scripts/common/impl/run_with_lock.sh"
1716 "src/build-system/Makefile.configurables.real"
1717 "src/build-system/Makefile.in.top"
1718 "src/build-system/Makefile.meta.gmake=no"
1719 "src/build-system/Makefile.meta.in"
1720 "src/build-system/Makefile.meta_l"
1721 "src/build-system/Makefile.meta_p"
1722 "src/build-system/Makefile.meta_r"
1723 "src/build-system/Makefile.mk.in"
1724 "src/build-system/Makefile.requirements"
1725 "src/build-system/Makefile.rules_with_autodep.in")
1726 (find-files "scripts/common/check" "\\.sh$"))
1727 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1728 (or (which* cmd) all)))
1729
1730 (substitute* (find-files "src/build-system" "^config.*")
1731 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1732 (("^PATH=.*") ""))
1733
1734 ;; rewrite "/var/tmp" in check script
1735 (substitute* "scripts/common/check/check_make_unix.sh"
1736 (("/var/tmp") "/tmp"))
1737
1738 ;; do not reset PATH
1739 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1740 (("^ *PATH=.*") "")
1741 (("action=/bin/") "action=")
1742 (("export PATH") ":"))
1743 #t))
1744 (replace 'configure
1745 (lambda* (#:key inputs outputs #:allow-other-keys)
1746 (let ((out (assoc-ref outputs "out"))
1747 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1748 (include (string-append (assoc-ref outputs "include")
1749 "/include/ncbi-tools++")))
1750 ;; The 'configure' script doesn't recognize things like
1751 ;; '--enable-fast-install'.
1752 (invoke "./configure.orig"
1753 (string-append "--with-build-root=" (getcwd) "/build")
1754 (string-append "--prefix=" out)
1755 (string-append "--libdir=" lib)
1756 (string-append "--includedir=" include)
1757 (string-append "--with-bz2="
1758 (assoc-ref inputs "bzip2"))
1759 (string-append "--with-z="
1760 (assoc-ref inputs "zlib"))
1761 (string-append "--with-pcre="
1762 (assoc-ref inputs "pcre"))
1763 ;; Each library is built twice by default, once
1764 ;; with "-static" in its name, and again
1765 ;; without.
1766 "--without-static"
1767 "--with-dll")
1768 #t))))))
1769 (outputs '("out" ; 21 MB
1770 "lib" ; 226 MB
1771 "include")) ; 33 MB
1772 (inputs
1773 `(("bzip2" ,bzip2)
1774 ("lmdb" ,lmdb)
1775 ("zlib" ,zlib)
1776 ("pcre" ,pcre)
1777 ("perl" ,perl)
1778 ("python" ,python-wrapper)))
1779 (native-inputs
1780 (list cpio))
1781 (home-page "https://blast.ncbi.nlm.nih.gov")
1782 (synopsis "Basic local alignment search tool")
1783 (description
1784 "BLAST is a popular method of performing a DNA or protein sequence
1785 similarity search, using heuristics to produce results quickly. It also
1786 calculates an “expect value” that estimates how many matches would have
1787 occurred at a given score by chance, which can aid a user in judging how much
1788 confidence to have in an alignment.")
1789 ;; Most of the sources are in the public domain, with the following
1790 ;; exceptions:
1791 ;; * Expat:
1792 ;; * ./c++/include/util/bitset/
1793 ;; * ./c++/src/html/ncbi_menu*.js
1794 ;; * Boost license:
1795 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1796 ;; * LGPL 2+:
1797 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1798 ;; * ASL 2.0:
1799 ;; * ./c++/src/corelib/teamcity_*
1800 (license (list license:public-domain
1801 license:expat
1802 license:boost1.0
1803 license:lgpl2.0+
1804 license:asl2.0))))
1805
1806 (define-public bless
1807 (package
1808 (name "bless")
1809 (version "1p02")
1810 (source (origin
1811 (method url-fetch)
1812 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1813 version ".tgz"))
1814 (sha256
1815 (base32
1816 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1817 (modules '((guix build utils)))
1818 (snippet
1819 `(begin
1820 ;; Remove bundled boost, pigz, zlib, and .git directory
1821 ;; FIXME: also remove bundled sources for murmurhash3 and
1822 ;; kmc once packaged.
1823 (delete-file-recursively "boost")
1824 (delete-file-recursively "pigz")
1825 (delete-file-recursively "google-sparsehash")
1826 (delete-file-recursively "zlib")
1827 (delete-file-recursively ".git")))))
1828 (build-system gnu-build-system)
1829 (arguments
1830 `(#:tests? #f ;no "check" target
1831 #:make-flags
1832 ,#~(list (string-append "ZLIB="
1833 #$(this-package-input "zlib")
1834 "/lib/libz.so")
1835 (string-append "LDFLAGS="
1836 (string-join '("-lboost_filesystem"
1837 "-lboost_system"
1838 "-lboost_iostreams"
1839 "-lz"
1840 "-fopenmp"))))
1841 #:phases
1842 (modify-phases %standard-phases
1843 (add-after 'unpack 'do-not-build-bundled-pigz
1844 (lambda* (#:key inputs outputs #:allow-other-keys)
1845 (substitute* "Makefile"
1846 (("cd pigz/pigz-2.3.3; make") ""))))
1847 (add-after 'unpack 'patch-paths-to-executables
1848 (lambda* (#:key inputs outputs #:allow-other-keys)
1849 (substitute* "parse_args.cpp"
1850 (("kmc_binary = .*")
1851 (string-append "kmc_binary = \""
1852 (assoc-ref outputs "out")
1853 "/bin/kmc\";"))
1854 (("pigz_binary = .*")
1855 (string-append "pigz_binary = \""
1856 (assoc-ref inputs "pigz")
1857 "/bin/pigz\";")))))
1858 (replace 'install
1859 (lambda* (#:key outputs #:allow-other-keys)
1860 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1861 (for-each (lambda (file)
1862 (install-file file bin))
1863 '("bless" "kmc/bin/kmc")))))
1864 (delete 'configure))))
1865 (native-inputs
1866 (list perl))
1867 (inputs
1868 (list openmpi boost sparsehash pigz zlib))
1869 (supported-systems '("x86_64-linux"))
1870 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1871 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1872 (description
1873 "@dfn{Bloom-filter-based error correction solution for high-throughput
1874 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1875 correction tool for genomic reads produced by @dfn{Next-generation
1876 sequencing} (NGS). BLESS produces accurate correction results with much less
1877 memory compared with previous solutions and is also able to tolerate a higher
1878 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1879 errors at the end of reads.")
1880 (license license:gpl3+)))
1881
1882 (define-public bowtie
1883 (package
1884 (name "bowtie")
1885 (version "2.3.4.3")
1886 (source (origin
1887 (method git-fetch)
1888 (uri (git-reference
1889 (url "https://github.com/BenLangmead/bowtie2")
1890 (commit (string-append "v" version))))
1891 (file-name (git-file-name name version))
1892 (sha256
1893 (base32
1894 "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
1895 (modules '((guix build utils)))
1896 (snippet
1897 '(begin
1898 (substitute* "Makefile"
1899 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1900 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1901 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))))))
1902 (build-system gnu-build-system)
1903 (arguments
1904 `(#:make-flags
1905 ,#~(list "allall"
1906 "WITH_TBB=1"
1907 (string-append "prefix=" #$output))
1908 #:phases
1909 (modify-phases %standard-phases
1910 (replace 'configure
1911 (lambda _
1912 ;; This "extended character" is not considered valid.
1913 (substitute* "processor_support.h"
1914 (("“") "\"")
1915 (("”") "\""))))
1916 (replace 'check
1917 (lambda _
1918 (invoke "perl"
1919 "scripts/test/simple_tests.pl"
1920 "--bowtie2=./bowtie2"
1921 "--bowtie2-build=./bowtie2-build"))))))
1922 (inputs
1923 `(("tbb" ,tbb-2020)
1924 ("zlib" ,zlib)
1925 ("python" ,python-wrapper)))
1926 (native-inputs
1927 (list perl perl-clone perl-test-deep perl-test-simple))
1928 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1929 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1930 (description
1931 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1932 reads to long reference sequences. It is particularly good at aligning reads
1933 of about 50 up to 100s or 1,000s of characters, and particularly good at
1934 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1935 genome with an FM Index to keep its memory footprint small: for the human
1936 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1937 gapped, local, and paired-end alignment modes.")
1938 (supported-systems '("x86_64-linux"))
1939 (license license:gpl3+)))
1940
1941 (define-public bowtie1
1942 (package
1943 (name "bowtie1")
1944 (version "1.3.0")
1945 (source (origin
1946 (method url-fetch)
1947 (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
1948 version "/bowtie-" version "-src.zip"))
1949 (sha256
1950 (base32
1951 "11dbihdnrizc6qhx9xsw77w3q5ssx642alaqzvhxx32ak9glvq04"))
1952 (modules '((guix build utils)))
1953 (snippet
1954 '(substitute* "Makefile"
1955 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1956 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1957 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
1958 (build-system gnu-build-system)
1959 (arguments
1960 `(#:tests? #f ; no "check" target
1961 #:make-flags
1962 ,#~(list "CC=gcc" "all"
1963 (string-append "prefix=" #$output))
1964 #:phases
1965 (modify-phases %standard-phases
1966 (delete 'configure))))
1967 (inputs
1968 (list python-wrapper tbb-2020 zlib))
1969 (supported-systems '("x86_64-linux"))
1970 (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
1971 (synopsis "Fast aligner for short nucleotide sequence reads")
1972 (description
1973 "Bowtie is a fast, memory-efficient short read aligner. It aligns short
1974 DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
1975 reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to
1976 keep its memory footprint small: typically about 2.2 GB for the human
1977 genome (2.9 GB for paired-end).")
1978 (license license:artistic2.0)))
1979
1980 (define-public tophat
1981 (package
1982 (name "tophat")
1983 (version "2.1.1")
1984 (source (origin
1985 (method url-fetch)
1986 (uri (string-append
1987 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1988 version ".tar.gz"))
1989 (sha256
1990 (base32
1991 "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
1992 (modules '((guix build utils)))
1993 (snippet
1994 '(begin
1995 ;; Remove bundled SeqAn and samtools
1996 (delete-file-recursively "src/SeqAn-1.4.2")
1997 (delete-file-recursively "src/samtools-0.1.18")
1998 #t))))
1999 (build-system gnu-build-system)
2000 (arguments
2001 '(#:parallel-build? #f ; not supported
2002 #:phases
2003 (modify-phases %standard-phases
2004 (add-after 'set-paths 'hide-default-gcc
2005 (lambda* (#:key inputs #:allow-other-keys)
2006 (let ((gcc (assoc-ref inputs "gcc")))
2007 ;; Remove the default GCC from CPLUS_INCLUDE_PATH to prevent
2008 ;; conflicts with the GCC 5 input.
2009 (setenv "CPLUS_INCLUDE_PATH"
2010 (string-join
2011 (delete (string-append gcc "/include/c++")
2012 (string-split (getenv "CPLUS_INCLUDE_PATH") #\:))
2013 ":"))
2014 #t)))
2015 (add-after 'unpack 'use-system-samtools
2016 (lambda* (#:key inputs #:allow-other-keys)
2017 (substitute* "src/Makefile.in"
2018 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
2019 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
2020 (("SAMPROG = samtools_0\\.1\\.18") "")
2021 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
2022 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
2023 (substitute* '("src/common.cpp"
2024 "src/tophat.py")
2025 (("samtools_0.1.18") (which "samtools")))
2026 (substitute* '("src/common.h"
2027 "src/bam2fastx.cpp")
2028 (("#include \"bam.h\"") "#include <samtools/bam.h>")
2029 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
2030 (substitute* '("src/bwt_map.h"
2031 "src/map2gtf.h"
2032 "src/align_status.h")
2033 (("#include <bam.h>") "#include <samtools/bam.h>")
2034 (("#include <sam.h>") "#include <samtools/sam.h>"))
2035 #t)))))
2036 (native-inputs
2037 `(("gcc@5" ,gcc-5))) ;; doesn't build with later versions
2038 (inputs
2039 `(("boost" ,boost)
2040 ("bowtie" ,bowtie)
2041 ("ncurses" ,ncurses)
2042 ("perl" ,perl)
2043 ("python" ,python-2)
2044 ("samtools" ,samtools-0.1)
2045 ("seqan" ,seqan-1)
2046 ("zlib" ,zlib)))
2047 (home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
2048 (synopsis "Spliced read mapper for RNA-Seq data")
2049 (description
2050 "TopHat is a fast splice junction mapper for nucleotide sequence
2051 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
2052 mammalian-sized genomes using the ultra high-throughput short read
2053 aligner Bowtie, and then analyzes the mapping results to identify
2054 splice junctions between exons.")
2055 ;; TopHat is released under the Boost Software License, Version 1.0
2056 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
2057 (license license:boost1.0)))
2058
2059 (define-public bwa
2060 (package
2061 (name "bwa")
2062 (version "0.7.17")
2063 (source (origin
2064 (method url-fetch)
2065 (uri (string-append
2066 "https://github.com/lh3/bwa/releases/download/v"
2067 version "/bwa-" version ".tar.bz2"))
2068 (sha256
2069 (base32
2070 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
2071 (build-system gnu-build-system)
2072 (arguments
2073 '(#:tests? #f ;no "check" target
2074 #:make-flags '("CFLAGS=-fcommon")
2075 #:phases
2076 (modify-phases %standard-phases
2077 (replace 'install
2078 (lambda* (#:key outputs #:allow-other-keys)
2079 (let* ((out (assoc-ref outputs "out"))
2080 (bin (string-append out "/bin"))
2081 (lib (string-append out "/lib"))
2082 (doc (string-append out "/share/doc/bwa"))
2083 (man (string-append out "/share/man/man1")))
2084 (install-file "bwa" bin)
2085 (install-file "libbwa.a" lib)
2086 (install-file "README.md" doc)
2087 (install-file "bwa.1" man))))
2088 ;; no "configure" script
2089 (delete 'configure))))
2090 (inputs (list zlib))
2091 ;; Non-portable SSE instructions are used so building fails on platforms
2092 ;; other than x86_64.
2093 (supported-systems '("x86_64-linux"))
2094 (home-page "http://bio-bwa.sourceforge.net/")
2095 (synopsis "Burrows-Wheeler sequence aligner")
2096 (description
2097 "BWA is a software package for mapping low-divergent sequences against a
2098 large reference genome, such as the human genome. It consists of three
2099 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
2100 designed for Illumina sequence reads up to 100bp, while the rest two for
2101 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
2102 features such as long-read support and split alignment, but BWA-MEM, which is
2103 the latest, is generally recommended for high-quality queries as it is faster
2104 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
2105 70-100bp Illumina reads.")
2106 (license license:gpl3+)))
2107
2108 (define-public bwa-pssm
2109 (package (inherit bwa)
2110 (name "bwa-pssm")
2111 (version "0.5.11")
2112 (source (origin
2113 (method git-fetch)
2114 (uri (git-reference
2115 (url "https://github.com/pkerpedjiev/bwa-pssm")
2116 (commit version)))
2117 (file-name (git-file-name name version))
2118 (sha256
2119 (base32
2120 "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
2121 (build-system gnu-build-system)
2122 (arguments
2123 (substitute-keyword-arguments (package-arguments bwa)
2124 ((#:phases phases '%standard-phases)
2125 `(modify-phases ,phases
2126 (add-after 'unpack 'patch-C-error
2127 (lambda _
2128 (substitute* "pssm.c"
2129 (("inline int map") "int map"))))))))
2130 (inputs
2131 (list gdsl zlib perl))
2132 (home-page "http://bwa-pssm.binf.ku.dk/")
2133 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
2134 (description
2135 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
2136 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
2137 existing aligners it is fast and sensitive. Unlike most other aligners,
2138 however, it is also adaptible in the sense that one can direct the alignment
2139 based on known biases within the data set. It is coded as a modification of
2140 the original BWA alignment program and shares the genome index structure as
2141 well as many of the command line options.")
2142 (license license:gpl3+)))
2143
2144 (define-public bwa-meth
2145 (package
2146 (name "bwa-meth")
2147 (version "0.2.3")
2148 (source (origin
2149 (method git-fetch)
2150 (uri (git-reference
2151 (url "https://github.com/brentp/bwa-meth")
2152 (commit (string-append "v" version))))
2153 (file-name (git-file-name name version))
2154 (sha256
2155 (base32
2156 "0c695lkrr0996zwkibl7324wg2vxmn6522sz30xv4a9gaf0lnbh3"))))
2157 (build-system python-build-system)
2158 (arguments
2159 `(#:phases
2160 (modify-phases %standard-phases
2161 (add-after 'unpack 'keep-references-to-bwa
2162 (lambda* (#:key inputs #:allow-other-keys)
2163 (substitute* "bwameth.py"
2164 (("bwa (mem|index)" _ command)
2165 (string-append (which "bwa") " " command))
2166 ;; There's an ill-advised check for "samtools" on PATH.
2167 (("^checkX.*") "")))))))
2168 (inputs
2169 (list bwa))
2170 (native-inputs
2171 (list python-toolshed))
2172 (home-page "https://github.com/brentp/bwa-meth")
2173 (synopsis "Fast and accurante alignment of BS-Seq reads")
2174 (description
2175 "BWA-Meth works for single-end reads and for paired-end reads from the
2176 directional protocol (most common). It uses the method employed by
2177 methylcoder and Bismark of in silico conversion of all C's to T's in both
2178 reference and reads. It recovers the original read (needed to tabulate
2179 methylation) by attaching it as a comment which BWA appends as a tag to the
2180 read. It performs favorably to existing aligners gauged by number of on and
2181 off-target reads for a capture method that targets CpG-rich region.")
2182 (license license:expat)))
2183
2184 (define-public python-bx-python
2185 (package
2186 (name "python-bx-python")
2187 (version "0.8.12")
2188 (source (origin
2189 (method git-fetch)
2190 (uri (git-reference
2191 (url "https://github.com/bxlab/bx-python")
2192 (commit "f4e6a5c93e719db69b5798b6fdd9b167da358316")))
2193 (file-name (git-file-name name version))
2194 (sha256
2195 (base32
2196 "0mclahslz34vq9x424jmzsxk0nmpm1j716fa8h3zwr9ssvch7skc"))))
2197 (build-system python-build-system)
2198 (propagated-inputs
2199 (list python-numpy))
2200 (inputs
2201 (list zlib))
2202 (native-inputs
2203 (list python-lzo python-nose python-cython))
2204 (home-page "https://github.com/bxlab/bx-python")
2205 (synopsis "Tools for manipulating biological data")
2206 (description
2207 "bx-python provides tools for manipulating biological data, particularly
2208 multiple sequence alignments.")
2209 (license license:expat)))
2210
2211 (define-public python-pyega3
2212 (package
2213 (name "python-pyega3")
2214 (version "3.4.1")
2215 (source (origin
2216 (method url-fetch)
2217 (uri (pypi-uri "pyega3" version))
2218 (sha256
2219 (base32
2220 "1k736in8g27rarx65ym9xk50x53zjg75h37bb8ljynxv04rypx2q"))))
2221 (build-system python-build-system)
2222 (arguments
2223 `(#:tests? #f)) ; The tests require network access.
2224 (native-inputs
2225 (list python-psutil python-htsget))
2226 (propagated-inputs
2227 (list python-requests python-tqdm python-urllib3 python-responses))
2228 (home-page "https://github.com/EGA-archive/ega-download-client")
2229 (synopsis "Python client for EGA")
2230 (description "This package is a python-based tool for viewing and
2231 downloading files from authorized EGA datasets. It uses the EGA data API and
2232 has several key features:
2233 @itemize
2234 @item Files are transferred over secure https connections and received
2235 unencrypted, so no need for decryption after download.
2236 @item Downloads resume from where they left off in the event that the
2237 connection is interrupted.
2238 @item Supports file segmenting and parallelized download of segments,
2239 improving overall performance.
2240 @item After download completes, file integrity is verified using checksums.
2241 @item Implements the GA4GH-compliant htsget protocol for download of genomic
2242 ranges for data files with accompanying index files.
2243 @end itemize\n")
2244 (license license:asl2.0)))
2245
2246 (define-public python-pysam
2247 (package
2248 (name "python-pysam")
2249 (version "0.18.0")
2250 (source (origin
2251 (method git-fetch)
2252 ;; Test data is missing on PyPi.
2253 (uri (git-reference
2254 (url "https://github.com/pysam-developers/pysam")
2255 (commit (string-append "v" version))))
2256 (file-name (git-file-name name version))
2257 (sha256
2258 (base32
2259 "042ca27r6634xg2ixgvq1079cp714wmm6ml7bwc1snn0wxxzywfg"))
2260 (modules '((guix build utils)))
2261 (snippet '(begin
2262 ;; FIXME: Unbundle samtools and bcftools.
2263 (delete-file-recursively "htslib")))))
2264 (build-system python-build-system)
2265 (arguments
2266 `(#:phases
2267 (modify-phases %standard-phases
2268 (add-before 'build 'set-flags
2269 (lambda* (#:key inputs #:allow-other-keys)
2270 (setenv "HTSLIB_MODE" "external")
2271 (setenv "HTSLIB_LIBRARY_DIR"
2272 (string-append (assoc-ref inputs "htslib") "/lib"))
2273 (setenv "HTSLIB_INCLUDE_DIR"
2274 (string-append (assoc-ref inputs "htslib") "/include"))
2275 (setenv "LDFLAGS" "-lncurses")
2276 (setenv "CFLAGS" "-D_CURSES_LIB=1")))
2277 (replace 'check
2278 (lambda* (#:key tests? #:allow-other-keys)
2279 (when tests?
2280 ;; Step out of source dir so python does not import from CWD.
2281 (with-directory-excursion "tests"
2282 (setenv "HOME" "/tmp")
2283 (invoke "make" "-C" "pysam_data")
2284 (invoke "make" "-C" "cbcf_data")
2285 ;; The FileHTTP test requires network access.
2286 (invoke "pytest" "-k" "not FileHTTP"))))))))
2287 (propagated-inputs
2288 (list htslib)) ; Included from installed header files.
2289 (inputs
2290 (list ncurses curl zlib))
2291 (native-inputs
2292 (list python-cython
2293 python-pytest
2294 ;; Dependencies below are are for tests only.
2295 samtools
2296 bcftools))
2297 (home-page "https://github.com/pysam-developers/pysam")
2298 (synopsis "Python bindings to the SAMtools C API")
2299 (description
2300 "Pysam is a Python module for reading and manipulating files in the
2301 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
2302 also includes an interface for tabix.")
2303 (license license:expat)))
2304
2305 (define-public python-twobitreader
2306 (package
2307 (name "python-twobitreader")
2308 (version "3.1.6")
2309 (source (origin
2310 (method git-fetch)
2311 (uri (git-reference
2312 (url "https://github.com/benjschiller/twobitreader")
2313 (commit version)))
2314 (file-name (git-file-name name version))
2315 (sha256
2316 (base32
2317 "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
2318 (build-system python-build-system)
2319 ;; Tests are not included
2320 (arguments '(#:tests? #f))
2321 (native-inputs
2322 (list python-sphinx))
2323 (home-page "https://github.com/benjschiller/twobitreader")
2324 (synopsis "Python library for reading .2bit files")
2325 (description
2326 "twobitreader is a Python library for reading .2bit files as used by the
2327 UCSC genome browser.")
2328 (license license:artistic2.0)))
2329
2330 (define-public python-plastid
2331 (package
2332 (name "python-plastid")
2333 (version "0.5.1")
2334 (source (origin
2335 (method url-fetch)
2336 (uri (pypi-uri "plastid" version))
2337 (sha256
2338 (base32
2339 "1a7mdky2xw02y88l51f58pqk8039ahdp6sblj3zx58zarmy2pqyl"))))
2340 (build-system python-build-system)
2341 (arguments
2342 ;; Some test files are not included.
2343 `(#:tests? #f))
2344 (propagated-inputs
2345 (list python-numpy
2346 python-scipy
2347 python-pandas
2348 python-pysam
2349 python-matplotlib
2350 python-biopython
2351 python-twobitreader
2352 python-termcolor))
2353 (native-inputs
2354 (list python-cython python-nose))
2355 (home-page "https://github.com/joshuagryphon/plastid")
2356 (synopsis "Python library for genomic analysis")
2357 (description
2358 "plastid is a Python library for genomic analysis – in particular,
2359 high-throughput sequencing data – with an emphasis on simplicity.")
2360 (license license:bsd-3)))
2361
2362 (define-public tetoolkit
2363 (package
2364 (name "tetoolkit")
2365 (version "2.2.1b")
2366 (source (origin
2367 (method git-fetch)
2368 (uri (git-reference
2369 (url "https://github.com/mhammell-laboratory/TEtranscripts")
2370 (commit version)))
2371 (file-name (git-file-name name version))
2372 (sha256
2373 (base32
2374 "1m3xsydakhdan9gp9mfdz7llka5g6ak91d0mbl1cmmxq9qs6an4y"))))
2375 (build-system python-build-system)
2376 (arguments
2377 `(#:phases
2378 (modify-phases %standard-phases
2379 (add-after 'unpack 'adjust-requirements
2380 (lambda _
2381 (substitute* "setup.py"
2382 ;; This defunct dependency isn't required for Python 3 (see:
2383 ;; https://github.com/mhammell-laboratory/TEtranscripts/issues/111).
2384 ((".*'argparse'.*") ""))))
2385 (add-after 'unpack 'patch-invocations
2386 (lambda* (#:key inputs #:allow-other-keys)
2387 (substitute* '("bin/TEtranscripts"
2388 "bin/TEcount")
2389 (("'sort ")
2390 (string-append "'" (search-input-file inputs "bin/sort") " "))
2391 (("'rm -f ")
2392 (string-append "'" (search-input-file inputs "bin/rm") " -f "))
2393 (("'Rscript'")
2394 (string-append "'" (search-input-file inputs "bin/Rscript")
2395 "'")))
2396 (substitute* "TEToolkit/IO/ReadInputs.py"
2397 (("BamToBED")
2398 (search-input-file inputs "bin/bamToBed")))
2399 (substitute* "TEToolkit/Normalization.py"
2400 (("\"Rscript\"")
2401 (string-append "\"" (search-input-file inputs "bin/Rscript")
2402 "\"")))))
2403 (add-after 'install 'wrap-program
2404 (lambda* (#:key outputs #:allow-other-keys)
2405 ;; Make sure the executables find R packages.
2406 (for-each (lambda (script)
2407 (wrap-program script
2408 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
2409 (list (search-input-file outputs "bin/TEtranscripts")
2410 (search-input-file outputs "bin/TEcount"))))))))
2411 (inputs
2412 (list bash-minimal
2413 coreutils
2414 bedtools
2415 python-pysam
2416 r-minimal
2417 r-deseq2))
2418 (home-page "https://github.com/mhammell-laboratory/TEtranscripts")
2419 (synopsis "Transposable elements in differential enrichment analysis")
2420 (description
2421 "This is package for including transposable elements in differential
2422 enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
2423 RNA-seq (and similar data) and annotates reads to both genes and transposable
2424 elements. TEtranscripts then performs differential analysis using DESeq2.
2425 Note that TEtranscripts and TEcount rely on specially curated GTF files, which
2426 are not included due to their size.")
2427 (license license:gpl3+)))
2428
2429 (define-public cd-hit
2430 (package
2431 (name "cd-hit")
2432 (version "4.8.1")
2433 (source (origin
2434 (method url-fetch)
2435 (uri (string-append "https://github.com/weizhongli/cdhit"
2436 "/releases/download/V" version
2437 "/cd-hit-v" version
2438 "-2019-0228.tar.gz"))
2439 (sha256
2440 (base32
2441 "1phmfhgcpyfd6kj7jwzw976613lcpv1wc2pzfdfaxla062x2s5r6"))))
2442 (build-system gnu-build-system)
2443 (arguments
2444 (list
2445 #:tests? #f ; there are no tests
2446 #:make-flags
2447 ;; Executables are copied directly to the PREFIX.
2448 #~(list (string-append "PREFIX=" #$output "/bin")
2449 ;; Support longer sequences (e.g. Pacbio sequences)
2450 "MAX_SEQ=60000000")
2451 #:phases
2452 '(modify-phases %standard-phases
2453 ;; No "configure" script
2454 (delete 'configure)
2455 ;; Remove sources of non-determinism
2456 (add-after 'unpack 'be-timeless
2457 (lambda _
2458 (substitute* "cdhit-utility.c++"
2459 ((" \\(built on \" __DATE__ \"\\)") ""))
2460 (substitute* "cdhit-common.c++"
2461 (("__DATE__") "\"0\"")
2462 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))))
2463 ;; The "install" target does not create the target directory.
2464 (add-before 'install 'create-target-dir
2465 (lambda* (#:key outputs #:allow-other-keys)
2466 (mkdir-p (string-append (assoc-ref outputs "out") "/bin")))))))
2467 (inputs
2468 (list perl zlib))
2469 (home-page "http://weizhongli-lab.org/cd-hit/")
2470 (synopsis "Cluster and compare protein or nucleotide sequences")
2471 (description
2472 "CD-HIT is a program for clustering and comparing protein or nucleotide
2473 sequences. CD-HIT is designed to be fast and handle extremely large
2474 databases.")
2475 ;; The manual says: "It can be copied under the GNU General Public License
2476 ;; version 2 (GPLv2)."
2477 (license license:gpl2)))
2478
2479 (define-public cd-hit-auxtools
2480 (package
2481 (inherit cd-hit)
2482 (name "cd-hit-auxtools")
2483 (arguments
2484 (list
2485 #:tests? #f ; there are no tests
2486 #:phases
2487 #~(modify-phases %standard-phases
2488 (add-after 'unpack 'chdir (lambda _ (chdir "cd-hit-auxtools")))
2489 ;; No "configure" script
2490 (delete 'configure)
2491 ;; There is no install target.
2492 (replace 'install
2493 (lambda _
2494 (for-each (lambda (file)
2495 (install-file file (string-append #$output "/bin")))
2496 '("cd-hit-dup" "cd-hit-lap" "read-linker")))))))
2497 (inputs '())))
2498
2499 (define-public clipper
2500 (package
2501 (name "clipper")
2502 (version "2.0.1")
2503 (source (origin
2504 (method git-fetch)
2505 (uri (git-reference
2506 (url "https://github.com/YeoLab/clipper")
2507 (commit version)))
2508 (file-name (git-file-name name version))
2509 (sha256
2510 (base32
2511 "0508rgnfjk5ar5d1mjbjyrnarv4kw9ksq0m3jw2bmgabmb5v6ikk"))
2512 (modules '((guix build utils)))
2513 (snippet
2514 '(begin
2515 ;; Delete pre-compiled files.
2516 (delete-file "clipper/src/peaks.so")))))
2517 (build-system python-build-system)
2518 (arguments
2519 `(#:tests? #false
2520 #:phases
2521 (modify-phases %standard-phases
2522 (add-after 'unpack 'use-python3-for-cython
2523 (lambda _
2524 (substitute* "setup.py"
2525 (("^setup")
2526 "\
2527 peaks.cython_directives = {'language_level': '3'}
2528 readsToWiggle.cython_directives = {'language_level': '3'}
2529 setup"))))
2530 (add-after 'unpack 'disable-nondeterministic-test
2531 (lambda _
2532 ;; This test fails/succeeds non-deterministically.
2533 (substitute* "clipper/test/test_call_peak.py"
2534 (("test_get_FDR_cutoff_mean") "_test_get_FDR_cutoff_mean"))))
2535 ;; This doesn't work because "usage" is executed, and that calls
2536 ;; exit(8).
2537 (replace 'check
2538 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
2539 (when tests?
2540 (add-installed-pythonpath inputs outputs)
2541 (with-directory-excursion "clipper/test"
2542 (invoke "python" "-m" "unittest")))))
2543 ;; This is not a library
2544 (delete 'sanity-check))))
2545 (inputs
2546 (list htseq
2547 python-pybedtools
2548 python-cython
2549 python-scikit-learn
2550 python-matplotlib
2551 python-pandas
2552 python-pysam
2553 python-numpy
2554 python-scipy))
2555 (native-inputs
2556 (list python-setuptools-git
2557 python-mock ; for tests
2558 python-nose ; for tests
2559 python-pytz)) ; for tests
2560 (home-page "https://github.com/YeoLab/clipper")
2561 (synopsis "CLIP peak enrichment recognition")
2562 (description
2563 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
2564 (license license:gpl2)))
2565
2566 (define-public codingquarry
2567 (package
2568 (name "codingquarry")
2569 (version "2.0")
2570 (source (origin
2571 (method url-fetch)
2572 (uri (string-append
2573 "mirror://sourceforge/codingquarry/CodingQuarry_v"
2574 version ".tar.gz"))
2575 (sha256
2576 (base32
2577 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
2578 (build-system gnu-build-system)
2579 (arguments
2580 '(#:tests? #f ; no "check" target
2581 #:phases
2582 (modify-phases %standard-phases
2583 (delete 'configure)
2584 (replace 'install
2585 (lambda* (#:key outputs #:allow-other-keys)
2586 (let* ((out (assoc-ref outputs "out"))
2587 (bin (string-append out "/bin"))
2588 (doc (string-append out "/share/doc/codingquarry")))
2589 (install-file "INSTRUCTIONS.pdf" doc)
2590 (copy-recursively "QuarryFiles"
2591 (string-append out "/QuarryFiles"))
2592 (install-file "CodingQuarry" bin)
2593 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
2594 #t)))))
2595 (inputs (list openmpi))
2596 (native-search-paths
2597 (list (search-path-specification
2598 (variable "QUARRY_PATH")
2599 (files '("QuarryFiles")))))
2600 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
2601 (synopsis "Fungal gene predictor")
2602 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
2603 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
2604 (home-page "https://sourceforge.net/projects/codingquarry/")
2605 (license license:gpl3+)))
2606
2607 (define-public clustal-omega
2608 (package
2609 (name "clustal-omega")
2610 (version "1.2.4")
2611 (source (origin
2612 (method url-fetch)
2613 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
2614 version ".tar.gz"))
2615 (sha256
2616 (base32
2617 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
2618 (build-system gnu-build-system)
2619 (inputs
2620 (list argtable))
2621 (home-page "http://www.clustal.org/omega/")
2622 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
2623 (description
2624 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
2625 program for protein and DNA/RNA. It produces high quality MSAs and is capable
2626 of handling data-sets of hundreds of thousands of sequences in reasonable
2627 time.")
2628 (license license:gpl2+)))
2629
2630 (define-public crossmap
2631 (package
2632 (name "crossmap")
2633 (version "0.6.1")
2634 (source (origin
2635 (method url-fetch)
2636 (uri (pypi-uri "CrossMap" version))
2637 (sha256
2638 (base32
2639 "0hqminh5wn1p3x481jbyc7gmncp5xc196hpvki7k25vzbryhwcix"))
2640 (modules '((guix build utils)))
2641 (snippet
2642 '(begin
2643 ;; Delete compiled Python files.
2644 (for-each delete-file (find-files "." "\\.pyc$"))
2645 (delete-file-recursively ".eggs")))))
2646 (build-system python-build-system)
2647 (inputs
2648 (list python-bx-python python-numpy python-pybigwig python-pysam
2649 zlib))
2650 (native-inputs
2651 (list python-cython python-nose))
2652 (home-page "http://crossmap.sourceforge.net/")
2653 (synopsis "Convert genome coordinates between assemblies")
2654 (description
2655 "CrossMap is a program for conversion of genome coordinates or annotation
2656 files between different genome assemblies. It supports most commonly used
2657 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
2658 (license license:gpl2+)))
2659
2660 (define-public python-dnaio
2661 (package
2662 (name "python-dnaio")
2663 (version "0.6.0")
2664 (source
2665 (origin
2666 (method url-fetch)
2667 (uri (pypi-uri "dnaio" version))
2668 (sha256
2669 (base32
2670 "14v5yyasq2bz34j38wi3xfcp06jj7l35ppibjcn95l2n73hz3zwi"))))
2671 (build-system python-build-system)
2672 (native-inputs
2673 (list python-cython python-pytest python-xopen))
2674 (home-page "https://github.com/marcelm/dnaio/")
2675 (synopsis "Read FASTA and FASTQ files efficiently")
2676 (description
2677 "dnaio is a Python library for fast parsing of FASTQ and also FASTA
2678 files. The code was previously part of the cutadapt tool.")
2679 (license license:expat)))
2680
2681 (define-public python-deeptoolsintervals
2682 (package
2683 (name "python-deeptoolsintervals")
2684 (version "0.1.9")
2685 (source (origin
2686 (method url-fetch)
2687 (uri (pypi-uri "deeptoolsintervals" version))
2688 (sha256
2689 (base32
2690 "1xnl80nblysj6dylj4683wgrfa425rkx4dp5k65hvwdns9pw753x"))))
2691 (build-system python-build-system)
2692 (inputs
2693 (list zlib))
2694 (home-page "https://github.com/deeptools/deeptools_intervals")
2695 (synopsis "Create GTF-based interval trees with associated meta-data")
2696 (description
2697 "This package provides a Python module creating/accessing GTF-based
2698 interval trees with associated meta-data. It is primarily used by the
2699 @code{deeptools} package.")
2700 (license license:expat)))
2701
2702 (define-public python-deeptools
2703 (package
2704 (name "python-deeptools")
2705 (version "3.4.3")
2706 (source (origin
2707 (method git-fetch)
2708 (uri (git-reference
2709 (url "https://github.com/deeptools/deepTools")
2710 (commit version)))
2711 (file-name (git-file-name name version))
2712 (sha256
2713 (base32
2714 "0l09vyynz6s6w7fnyd94rpys4a6aja6kp4gli64pngdxdz3md1nl"))))
2715 (build-system python-build-system)
2716 (native-inputs
2717 (list python-mock python-nose))
2718 (propagated-inputs
2719 (list python-matplotlib
2720 python-numpy
2721 python-numpydoc
2722 python-py2bit
2723 python-pybigwig
2724 python-pysam
2725 python-scipy
2726 python-deeptoolsintervals
2727 python-plotly-2.4.1))
2728 (home-page "https://pypi.org/project/deepTools/")
2729 (synopsis "Useful tools for exploring deep sequencing data")
2730 (description "This package addresses the challenge of handling large amounts
2731 of data that are now routinely generated from DNA sequencing centers.
2732 @code{deepTools} contains useful modules to process the mapped reads data for
2733 multiple quality checks, creating normalized coverage files in standard bedGraph
2734 and bigWig file formats, that allow comparison between different files. Finally,
2735 using such normalized and standardized files, deepTools can create many
2736 publication-ready visualizations to identify enrichments and for functional
2737 annotations of the genome.")
2738 ;; The file deeptools/cm.py is licensed under the BSD license. The
2739 ;; remainder of the code is licensed under the MIT license.
2740 (license (list license:bsd-3 license:expat))))
2741
2742 (define-deprecated deeptools python-deeptools)
2743
2744 (define-public cutadapt
2745 (package
2746 (name "cutadapt")
2747 (version "2.1")
2748 (source (origin
2749 (method url-fetch)
2750 (uri (pypi-uri "cutadapt" version))
2751 (sha256
2752 (base32
2753 "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
2754 (build-system python-build-system)
2755 (arguments
2756 `(#:phases
2757 (modify-phases %standard-phases
2758 (add-after 'unpack 'always-cythonize
2759 (lambda _
2760 (delete-file "src/cutadapt/_align.c")
2761 ;; If PKG-INFO exists, setup.py decides not to run Cython.
2762 (substitute* "setup.py"
2763 (("os.path.exists\\('PKG-INFO'\\):")
2764 "os.path.exists('totally-does-not-exist'):")))))))
2765 (inputs
2766 (list python-dnaio python-xopen))
2767 (native-inputs
2768 (list python-cython python-pytest python-setuptools-scm))
2769 (home-page "https://cutadapt.readthedocs.io/en/stable/")
2770 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
2771 (description
2772 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
2773 other types of unwanted sequence from high-throughput sequencing reads.")
2774 (license license:expat)))
2775
2776 (define-public libbigwig
2777 (package
2778 (name "libbigwig")
2779 (version "0.4.4")
2780 (source (origin
2781 (method git-fetch)
2782 (uri (git-reference
2783 (url "https://github.com/dpryan79/libBigWig")
2784 (commit version)))
2785 (file-name (git-file-name name version))
2786 (sha256
2787 (base32
2788 "09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
2789 (build-system gnu-build-system)
2790 (arguments
2791 `(#:test-target "test"
2792 #:tests? #f ; tests require access to the web
2793 #:make-flags
2794 ,#~(list "CC=gcc"
2795 (string-append "prefix=" #$output))
2796 #:phases
2797 (modify-phases %standard-phases
2798 (delete 'configure))))
2799 (inputs
2800 (list zlib curl))
2801 (native-inputs
2802 `(("doxygen" ,doxygen)
2803 ;; Need for tests
2804 ("python" ,python-2)))
2805 (home-page "https://github.com/dpryan79/libBigWig")
2806 (synopsis "C library for handling bigWig files")
2807 (description
2808 "This package provides a C library for parsing local and remote BigWig
2809 files.")
2810 (license license:expat)))
2811
2812 (define-public python-pybigwig
2813 (package
2814 (name "python-pybigwig")
2815 (version "0.3.17")
2816 (source (origin
2817 (method url-fetch)
2818 (uri (pypi-uri "pyBigWig" version))
2819 (sha256
2820 (base32
2821 "157x6v48y299zm382krf1dw08fdxg95im8lnabhp5vc94s04zxj1"))
2822 (modules '((guix build utils)))
2823 (snippet
2824 '(begin
2825 ;; Delete bundled libBigWig sources
2826 (delete-file-recursively "libBigWig")
2827 #t))))
2828 (build-system python-build-system)
2829 (arguments
2830 `(#:phases
2831 (modify-phases %standard-phases
2832 (add-after 'unpack 'link-with-libBigWig
2833 (lambda* (#:key inputs #:allow-other-keys)
2834 (substitute* "setup.py"
2835 (("libs=\\[") "libs=[\"BigWig\", "))
2836 #t)))))
2837 (propagated-inputs
2838 (list python-numpy))
2839 (inputs
2840 (list libbigwig zlib curl))
2841 (home-page "https://github.com/dpryan79/pyBigWig")
2842 (synopsis "Access bigWig files in Python using libBigWig")
2843 (description
2844 "This package provides Python bindings to the libBigWig library for
2845 accessing bigWig files.")
2846 (license license:expat)))
2847
2848 (define-public python-schema-salad
2849 (package
2850 (name "python-schema-salad")
2851 (version "8.2.20211116214159")
2852 (source
2853 (origin
2854 (method url-fetch)
2855 (uri (pypi-uri "schema-salad" version))
2856 (sha256
2857 (base32
2858 "005dh2y45x92zl8sf2sqjmfvcqr4hrz8dfckgkckv87003v7lwqc"))))
2859 (build-system python-build-system)
2860 (arguments
2861 `(#:phases
2862 (modify-phases %standard-phases
2863 (add-before 'check 'skip-failing-tests
2864 (lambda _
2865 ;; Skip tests that require network access.
2866 (substitute* "schema_salad/tests/test_cwl11.py"
2867 (("^def test_(secondaryFiles|outputBinding)" all)
2868 (string-append "@pytest.mark.skip(reason="
2869 "\"test requires network access\")\n"
2870 all))))))))
2871 (propagated-inputs
2872 (list python-cachecontrol
2873 python-lockfile
2874 python-mistune
2875 python-rdflib
2876 python-rdflib-jsonld
2877 python-requests
2878 python-ruamel.yaml
2879 python-typing-extensions))
2880 (native-inputs
2881 (list python-black python-pytest python-pytest-runner))
2882 (home-page "https://github.com/common-workflow-language/schema_salad")
2883 (synopsis "Schema Annotations for Linked Avro Data (SALAD)")
2884 (description
2885 "Salad is a schema language for describing JSON or YAML structured linked
2886 data documents. Salad schema describes rules for preprocessing, structural
2887 validation, and hyperlink checking for documents described by a Salad schema.
2888 Salad supports rich data modeling with inheritance, template specialization,
2889 object identifiers, object references, documentation generation, code
2890 generation, and transformation to RDF. Salad provides a bridge between document
2891 and record oriented data modeling and the Semantic Web.")
2892 (license license:asl2.0)))
2893
2894 (define-public cwltool
2895 (package
2896 (name "cwltool")
2897 (version "3.1.20220119140128")
2898 (source (origin
2899 (method git-fetch)
2900 (uri (git-reference
2901 (url "https://github.com/common-workflow-language/cwltool")
2902 (commit version)))
2903 (file-name (git-file-name name version))
2904 (sha256
2905 (base32
2906 "1jmrm0qrqgka79avc1kq63fgh20gx6g07fc8p3iih4k85vhdyl3f"))))
2907 (build-system python-build-system)
2908 (arguments
2909 `(#:phases
2910 (modify-phases %standard-phases
2911 (add-after 'unpack 'loosen-version-restrictions
2912 (lambda _
2913 (substitute* "setup.py"
2914 (("== 1.5.1") ">=1.5.1")))) ; prov
2915 (add-after 'unpack 'dont-use-git
2916 (lambda _
2917 (substitute* "gittaggers.py"
2918 (("self.git_timestamp_tag\\(\\)")
2919 (string-append "time.strftime('.%Y%m%d%H%M%S', time.gmtime(int("
2920 (string-drop ,version 4) ")))")))))
2921 (add-after 'unpack 'modify-tests
2922 (lambda _
2923 ;; Tries to connect to the internet.
2924 (delete-file "tests/test_content_type.py")
2925 (delete-file "tests/test_udocker.py")
2926 (delete-file "tests/test_http_input.py")
2927 (substitute* "tests/test_load_tool.py"
2928 (("def test_load_graph_fragment_from_packed")
2929 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2930 "def test_load_graph_fragment_from_packed")))
2931 (substitute* "tests/test_examples.py"
2932 (("def test_env_filtering")
2933 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2934 "def test_env_filtering")))
2935 ;; Tries to use cwl-runners.
2936 (substitute* "tests/test_examples.py"
2937 (("def test_v1_0_arg_empty_prefix_separate_false")
2938 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2939 "def test_v1_0_arg_empty_prefix_separate_false")))
2940
2941 (substitute* '("cwltool/schemas/v1.1/tests/env-tool1.cwl"
2942 "cwltool/schemas/v1.1/tests/env-tool2.cwl"
2943 "cwltool/schemas/v1.1/tests/imported-hint.cwl"
2944 "tests/subgraph/env-tool2.cwl"
2945 "tests/subgraph/env-tool2_req.cwl"
2946 "tests/subgraph/env-wf2_subwf-packed.cwl"
2947 "tests/subgraph/env-tool2_no_env.cwl")
2948 (("\"/bin/sh\"") (string-append "\"" (which "sh") "\"")))
2949 ;; Pytest doesn't know what to do with "-n auto"
2950 (substitute* "tox.ini"
2951 (("-n auto") "")))))))
2952 (inputs
2953 (list python-argcomplete
2954 python-bagit
2955 python-coloredlogs
2956 python-mypy-extensions
2957 python-prov
2958 python-pydot
2959 python-psutil
2960 python-rdflib
2961 python-requests
2962 python-ruamel.yaml
2963 python-schema-salad
2964 python-shellescape
2965 python-typing-extensions
2966 ;; Not listed as needed but still necessary:
2967 node))
2968 (native-inputs
2969 (list python-arcp
2970 python-humanfriendly
2971 python-mock
2972 python-pytest
2973 python-pytest-cov
2974 python-pytest-mock
2975 python-pytest-runner))
2976 (home-page
2977 "https://github.com/common-workflow-language/common-workflow-language")
2978 (synopsis "Common Workflow Language reference implementation")
2979 (description
2980 "This is the reference implementation of the @acronym{CWL, Common Workflow
2981 Language} standards. The CWL open standards are for describing analysis
2982 workflows and tools in a way that makes them portable and scalable across a
2983 variety of software and hardware environments, from workstations to cluster,
2984 cloud, and high performance computing (HPC) environments. CWL is designed to
2985 meet the needs of data-intensive science, such as Bioinformatics, Medical
2986 Imaging, Astronomy, Physics, and Chemistry. The @acronym{cwltool, CWL reference
2987 implementation} is intended to be feature complete and to provide comprehensive
2988 validation of CWL files as well as provide other tools related to working with
2989 CWL descriptions.")
2990 (license license:asl2.0)))
2991
2992 (define-public python-dendropy
2993 (package
2994 (name "python-dendropy")
2995 (version "4.5.1")
2996 (source
2997 (origin
2998 (method git-fetch)
2999 ;; Source from GitHub so that tests are included.
3000 (uri (git-reference
3001 (url "https://github.com/jeetsukumaran/DendroPy")
3002 (commit (string-append "v" version))))
3003 (file-name (git-file-name name version))
3004 (sha256
3005 (base32
3006 "0lrfzjqzbpk1rrra9vd7z2j7q09jy9w1ss7wn2rd85i4k5y3xz8l"))))
3007 (build-system python-build-system)
3008 (arguments
3009 `(#:phases
3010 (modify-phases %standard-phases
3011 (add-after 'unpack 'skip-broken-tests
3012 (lambda _
3013 ;; These tests fail because we have no "paup" executable.
3014 (substitute* "tests/test_datamodel_split_bitmasks.py"
3015 (((format #false "(~{~a~^|~})"
3016 '("test_group1"
3017 "test_basic_split_counting_under_different_rootings"
3018 "test_basic_split_count_with_incorrect_weight_treatment_raises_error"
3019 "test_basic_split_count_with_incorrect_rootings_raises_error")) m)
3020 (string-append "_skip_" m)))
3021 (delete-file "tests/test_paup.py")
3022 (delete-file "tests/test_dataio_nexml_reader_tree_list.py")
3023 ;; Assert error for unknown reasons
3024 (substitute* "tests/test_protractedspeciation.py"
3025 (("test_by_num_lineages" m)
3026 (string-append "_skip_" m))))))))
3027 (home-page "https://dendropy.org/")
3028 (synopsis "Library for phylogenetics and phylogenetic computing")
3029 (description
3030 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
3031 writing, simulation, processing and manipulation of phylogenetic
3032 trees (phylogenies) and characters.")
3033 (license license:bsd-3)))
3034
3035 (define-public python-py2bit
3036 (package
3037 (name "python-py2bit")
3038 (version "0.3.0")
3039 (source
3040 (origin
3041 (method url-fetch)
3042 (uri (pypi-uri "py2bit" version))
3043 (sha256
3044 (base32
3045 "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
3046 (build-system python-build-system)
3047 (home-page "https://github.com/dpryan79/py2bit")
3048 (synopsis "Access 2bit files using lib2bit")
3049 (description
3050 "This package provides Python bindings for lib2bit to access 2bit files
3051 with Python.")
3052 (license license:expat)))
3053
3054 (define-public delly
3055 (package
3056 (name "delly")
3057 (version "0.8.3")
3058 (source (origin
3059 (method git-fetch)
3060 (uri (git-reference
3061 (url "https://github.com/dellytools/delly")
3062 (commit (string-append "v" version))))
3063 (file-name (git-file-name name version))
3064 (sha256
3065 (base32 "1ibnplgfzj96w8glkx17v7sld3pm402fr5ybmf3h0rlcryabxrqy"))
3066 (modules '((guix build utils)))
3067 (snippet
3068 '(begin
3069 (delete-file-recursively "src/htslib")
3070 #t))))
3071 (build-system gnu-build-system)
3072 (arguments
3073 `(#:tests? #f ; There are no tests to run.
3074 #:make-flags
3075 ,#~(list "PARALLEL=1" ; Allow parallel execution at run-time.
3076 (string-append "prefix=" #$output))
3077 #:phases
3078 (modify-phases %standard-phases
3079 (delete 'configure) ; There is no configure phase.
3080 (add-after 'install 'install-templates
3081 (lambda* (#:key outputs #:allow-other-keys)
3082 (let ((templates (string-append (assoc-ref outputs "out")
3083 "/share/delly/templates")))
3084 (mkdir-p templates)
3085 (copy-recursively "excludeTemplates" templates)
3086 #t))))))
3087 (inputs
3088 (list boost bzip2 htslib zlib))
3089 (home-page "https://github.com/dellytools/delly")
3090 (synopsis "Integrated structural variant prediction method")
3091 (description "Delly is an integrated structural variant prediction method
3092 that can discover and genotype deletions, tandem duplications, inversions and
3093 translocations at single-nucleotide resolution in short-read massively parallel
3094 sequencing data. It uses paired-ends and split-reads to sensitively and
3095 accurately delineate genomic rearrangements throughout the genome.")
3096 (license license:gpl3+)))
3097
3098 (define-public trf
3099 (package
3100 (name "trf")
3101 (version "4.09.1")
3102 (source (origin
3103 (method git-fetch)
3104 (uri (git-reference
3105 (url "https://github.com/Benson-Genomics-Lab/TRF")
3106 (commit (string-append "v" version))))
3107 (file-name (git-file-name name version))
3108 (sha256
3109 (base32 "0fhwr4s1mf8nw8fr5imwjvjr42b59p97zr961ifm8xl1bajz4wpg"))))
3110 (build-system gnu-build-system)
3111 (home-page "https://github.com/Benson-Genomics-Lab/TRF")
3112 (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
3113 (description "A tandem repeat in DNA is two or more adjacent, approximate
3114 copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
3115 locate and display tandem repeats in DNA sequences. In order to use the
3116 program, the user submits a sequence in FASTA format. The output consists of
3117 two files: a repeat table file and an alignment file. Submitted sequences may
3118 be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
3119 bases are detected.")
3120 (license license:agpl3+)))
3121
3122 (define-public repeat-masker
3123 (package
3124 (name "repeat-masker")
3125 (version "4.1.2-p1")
3126 (source (origin
3127 (method url-fetch)
3128 (uri (string-append "http://www.repeatmasker.org/"
3129 "RepeatMasker/RepeatMasker-"
3130 version ".tar.gz"))
3131 (sha256
3132 (base32 "15hfdfpzmdjcx7ng7rjfid69bmvgn3z9g9r43qhjnhjhq3v4prab"))))
3133 (build-system gnu-build-system)
3134 (arguments
3135 `(#:tests? #false ; there are none
3136 #:phases
3137 (modify-phases %standard-phases
3138 (delete 'configure)
3139 (replace 'build
3140 (lambda* (#:key inputs outputs #:allow-other-keys)
3141 (let ((share (string-append (assoc-ref outputs "out")
3142 "/share/RepeatMasker")))
3143 (mkdir-p share)
3144 (copy-recursively "." share)
3145 (with-directory-excursion share
3146 (invoke "perl" "configure"
3147 "--trf_prgm" (which "trf")
3148 "--hmmer_dir"
3149 (string-append (assoc-ref inputs "hmmer")
3150 "/bin"))))))
3151 (replace 'install
3152 (lambda* (#:key outputs #:allow-other-keys)
3153 (let* ((out (assoc-ref outputs "out"))
3154 (share (string-append out "/share/RepeatMasker"))
3155 (bin (string-append out "/bin"))
3156 (path (getenv "PERL5LIB")))
3157 (install-file (string-append share "/RepeatMasker") bin)
3158 (wrap-program (string-append bin "/RepeatMasker")
3159 `("PERL5LIB" ":" prefix (,path ,share)))))))))
3160 (inputs
3161 (list perl
3162 perl-text-soundex
3163 python
3164 python-h5py
3165 hmmer
3166 trf))
3167 (home-page "https://github.com/Benson-Genomics-Lab/TRF")
3168 (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
3169 (description "A tandem repeat in DNA is two or more adjacent, approximate
3170 copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
3171 locate and display tandem repeats in DNA sequences. In order to use the
3172 program, the user submits a sequence in FASTA format. The output consists of
3173 two files: a repeat table file and an alignment file. Submitted sequences may
3174 be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
3175 bases are detected.")
3176 (license license:osl2.1)))
3177
3178 (define-public diamond
3179 (package
3180 (name "diamond")
3181 (version "0.9.30")
3182 (source (origin
3183 (method git-fetch)
3184 (uri (git-reference
3185 (url "https://github.com/bbuchfink/diamond")
3186 (commit (string-append "v" version))))
3187 (file-name (git-file-name name version))
3188 (sha256
3189 (base32
3190 "0k6f3kb6cniw11xw6763kkbs1sl0yack7xsy7q5fl5v170ssphq4"))))
3191 (build-system cmake-build-system)
3192 (arguments
3193 '(#:tests? #f ; no "check" target
3194 #:phases
3195 (modify-phases %standard-phases
3196 (add-after 'unpack 'remove-native-compilation
3197 (lambda _
3198 (substitute* "CMakeLists.txt" (("-march=native") ""))
3199 #t)))))
3200 (inputs
3201 (list zlib))
3202 (home-page "https://github.com/bbuchfink/diamond")
3203 (synopsis "Accelerated BLAST compatible local sequence aligner")
3204 (description
3205 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
3206 translated DNA query sequences against a protein reference database (BLASTP
3207 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
3208 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
3209 data and settings.")
3210 (license license:agpl3+)))
3211
3212 (define-public discrover
3213 (package
3214 (name "discrover")
3215 (version "1.6.0")
3216 (source
3217 (origin
3218 (method git-fetch)
3219 (uri (git-reference
3220 (url "https://github.com/maaskola/discrover")
3221 (commit version)))
3222 (file-name (git-file-name name version))
3223 (sha256
3224 (base32
3225 "173fwi2vb6a5kp406hm3jj6j7v4whww796f2qcygp4rpvamh307y"))))
3226 (build-system cmake-build-system)
3227 (arguments
3228 `(#:tests? #f ; there are no tests
3229 #:phases
3230 (modify-phases %standard-phases
3231 (add-before 'build 'set-force-source-date
3232 ;; for reproducible dates, texlive needs this to respect respect
3233 ;; SOURCE_DATE_EPOCH
3234 (lambda _
3235 (setenv "FORCE_SOURCE_DATE" "1")))
3236 (add-after 'unpack 'fix-latex-errors
3237 (lambda _
3238 (with-fluids ((%default-port-encoding #f))
3239 (substitute* "doc/references.bib"
3240 (("\\{S\\}illanp[^,]+,")
3241 "{S}illanp{\\\"a}{\\\"a},")))
3242 ;; XXX: I just can't get pdflatex to not complain about these
3243 ;; characters. They end up in the manual via the generated
3244 ;; discrover-cli-help.txt.
3245 (substitute* "src/hmm/cli.cpp"
3246 (("µ") "mu")
3247 (("η") "eta")
3248 (("≤") "<="))
3249 ;; This seems to be a syntax error.
3250 (substitute* "doc/discrover-manual.tex"
3251 (("theverbbox\\[t\\]") "theverbbox"))))
3252 (add-after 'unpack 'add-missing-includes
3253 (lambda _
3254 (substitute* "src/executioninformation.hpp"
3255 (("#define EXECUTIONINFORMATION_HPP" line)
3256 (string-append line "\n#include <random>")))
3257 (substitute* "src/plasma/fasta.hpp"
3258 (("#define FASTA_HPP" line)
3259 (string-append line "\n#include <random>"))))))))
3260 (inputs
3261 (list boost cairo rmath-standalone))
3262 (native-inputs
3263 `(("texlive" ,(texlive-updmap.cfg (list texlive-cm
3264 texlive-amsfonts
3265 texlive-doi
3266 texlive-fonts-ec
3267 texlive-latex-examplep
3268 texlive-hyperref
3269 texlive-latex-ms
3270 texlive-latex-natbib
3271 texlive-bibtex ; style files used by natbib
3272 texlive-pgf ; tikz
3273 texlive-latex-verbatimbox)))
3274 ("imagemagick" ,imagemagick)))
3275 (home-page "https://dorina.mdc-berlin.de/public/rajewsky/discrover/")
3276 (synopsis "Discover discriminative nucleotide sequence motifs")
3277 (description "Discrover is a motif discovery method to find binding sites
3278 of nucleic acid binding proteins.")
3279 (license license:gpl3+)))
3280
3281 (define-public eigensoft
3282 (package
3283 (name "eigensoft")
3284 (version "7.2.1")
3285 (source
3286 (origin
3287 (method git-fetch)
3288 (uri (git-reference
3289 (url "https://github.com/DReichLab/EIG")
3290 (commit (string-append "v" version))))
3291 (file-name (git-file-name name version))
3292 (sha256
3293 (base32
3294 "1c141fqvhnzibmnf22sv23vbmzm20kjjyrib44cfh75wyndp2d9k"))
3295 (modules '((guix build utils)))
3296 ;; Remove pre-built binaries.
3297 (snippet '(begin
3298 (delete-file-recursively "bin")
3299 (mkdir "bin")
3300 #t))))
3301 (build-system gnu-build-system)
3302 (arguments
3303 `(#:tests? #f ; There are no tests.
3304 #:make-flags '("CC=gcc")
3305 #:phases
3306 (modify-phases %standard-phases
3307 ;; There is no configure phase, but the Makefile is in a
3308 ;; sub-directory.
3309 (replace 'configure
3310 (lambda _ (chdir "src") #t))
3311 ;; The provided install target only copies executables to
3312 ;; the "bin" directory in the build root.
3313 (add-after 'install 'actually-install
3314 (lambda* (#:key outputs #:allow-other-keys)
3315 (let* ((out (assoc-ref outputs "out"))
3316 (bin (string-append out "/bin")))
3317 (for-each (lambda (file)
3318 (install-file file bin))
3319 (find-files "../bin" ".*"))
3320 #t))))))
3321 (inputs
3322 (list gsl lapack openblas perl
3323 `(,gfortran "lib")))
3324 (home-page "https://github.com/DReichLab/EIG")
3325 (synopsis "Tools for population genetics")
3326 (description "The EIGENSOFT package provides tools for population
3327 genetics and stratification correction. EIGENSOFT implements methods commonly
3328 used in population genetics analyses such as PCA, computation of Tracy-Widom
3329 statistics, and finding related individuals in structured populations. It
3330 comes with a built-in plotting script and supports multiple file formats and
3331 quantitative phenotypes.")
3332 ;; The license of the eigensoft tools is Expat, but since it's
3333 ;; linking with the GNU Scientific Library (GSL) the effective
3334 ;; license is the GPL.
3335 (license license:gpl3+)))
3336
3337 (define-public edirect
3338 (package
3339 (name "edirect")
3340 (version "13.3.20200128")
3341 (source (origin
3342 (method url-fetch)
3343 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
3344 "/versions/" version
3345 "/edirect-" version ".tar.gz"))
3346 (sha256
3347 (base32
3348 "093zp7klv81ph0y8mm8d78a9hnpfxbv2kdym70gzdf3vz176rw33"))
3349 (modules '((guix build utils)))
3350 (snippet
3351 '(begin (delete-file "Mozilla-CA.tar.gz")
3352 (substitute* "rchive.go"
3353 ;; This go library does not have any license.
3354 (("github.com/fiam/gounidecode/unidecode")
3355 "golang.org/rainycape/unidecode"))
3356 #t))))
3357 (build-system perl-build-system)
3358 (arguments
3359 `(#:phases
3360 (modify-phases %standard-phases
3361 (delete 'configure)
3362 (delete 'build)
3363 (delete 'check) ; simple check after install
3364 (add-after 'unpack 'patch-programs
3365 (lambda* (#:key inputs #:allow-other-keys)
3366 ;; Ignore errors about missing xtract.Linux and rchive.Linux.
3367 (substitute* "pm-refresh"
3368 (("cat \\\"\\$target")
3369 "grep ^[[:digit:]] \"$target"))
3370 #t))
3371 (replace 'install
3372 (lambda* (#:key inputs outputs #:allow-other-keys)
3373 (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
3374 (edirect-go (assoc-ref inputs "edirect-go-programs")))
3375 (for-each
3376 (lambda (file)
3377 (install-file file bin))
3378 '("archive-pubmed" "asp-cp" "asp-ls" "download-ncbi-data"
3379 "download-pubmed" "edirect.pl" "efetch" "epost" "esearch"
3380 "fetch-pubmed" "ftp-cp" "ftp-ls" "has-asp" "index-pubmed"
3381 "pm-prepare" "pm-refresh" "pm-stash" "pm-collect"
3382 "pm-index" "pm-invert" "pm-merge" "pm-promote"))
3383 (symlink (string-append edirect-go "/bin/xtract.Linux")
3384 (string-append bin "/xtract"))
3385 (symlink (string-append edirect-go "/bin/rchive.Linux")
3386 (string-append bin "/rchive")))
3387 #t))
3388 (add-after 'install 'wrap-program
3389 (lambda* (#:key outputs #:allow-other-keys)
3390 ;; Make sure everything can run in a pure environment.
3391 (let ((out (assoc-ref outputs "out"))
3392 (path (getenv "PERL5LIB")))
3393 (for-each
3394 (lambda (file)
3395 (wrap-program file
3396 `("PERL5LIB" ":" prefix (,path)))
3397 (wrap-program file
3398 `("PATH" ":" prefix (,(string-append out "/bin")
3399 ,(dirname (which "sed"))
3400 ,(dirname (which "gzip"))
3401 ,(dirname (which "grep"))
3402 ,(dirname (which "perl"))
3403 ,(dirname (which "uname"))))))
3404 (find-files out ".")))
3405 #t))
3406 (add-after 'wrap-program 'check
3407 (lambda* (#:key outputs #:allow-other-keys)
3408 (invoke (string-append (assoc-ref outputs "out")
3409 "/bin/edirect.pl")
3410 "-filter" "-help")
3411 #t)))))
3412 (inputs
3413 (list edirect-go-programs
3414 perl-html-parser
3415 perl-encode-locale
3416 perl-file-listing
3417 perl-html-tagset
3418 perl-html-tree
3419 perl-http-cookies
3420 perl-http-date
3421 perl-http-message
3422 perl-http-negotiate
3423 perl-lwp-mediatypes
3424 perl-lwp-protocol-https
3425 perl-net-http
3426 perl-uri
3427 perl-www-robotrules
3428 perl-xml-simple
3429 perl))
3430 (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
3431 (synopsis "Tools for accessing the NCBI's set of databases")
3432 (description
3433 "Entrez Direct (EDirect) is a method for accessing the National Center
3434 for Biotechnology Information's (NCBI) set of interconnected
3435 databases (publication, sequence, structure, gene, variation, expression,
3436 etc.) from a terminal. Functions take search terms from command-line
3437 arguments. Individual operations are combined to build multi-step queries.
3438 Record retrieval and formatting normally complete the process.
3439
3440 EDirect also provides an argument-driven function that simplifies the
3441 extraction of data from document summaries or other results that are returned
3442 in structured XML format. This can eliminate the need for writing custom
3443 software to answer ad hoc questions.")
3444 (native-search-paths
3445 ;; Ideally this should be set for LWP somewhere.
3446 (list (search-path-specification
3447 (variable "PERL_LWP_SSL_CA_FILE")
3448 (file-type 'regular)
3449 (separator #f)
3450 (files '("/etc/ssl/certs/ca-certificates.crt")))))
3451 (license license:public-domain)))
3452
3453 (define-public edirect-go-programs
3454 (package
3455 (inherit edirect)
3456 (name "edirect-go-programs")
3457 (build-system go-build-system)
3458 (arguments
3459 `(#:install-source? #f
3460 #:tests? #f ; No tests.
3461 #:import-path "ncbi.nlm.nih.gov/entrez/edirect"
3462 #:phases
3463 (modify-phases %standard-phases
3464 (replace 'build
3465 (lambda* (#:key import-path #:allow-other-keys)
3466 (with-directory-excursion (string-append "src/" import-path)
3467 (invoke "go" "build" "-v" "-x" "j2x.go")
3468 (invoke "go" "build" "-v" "-x" "t2x.go")
3469 (invoke "go" "build" "-v" "-x" "-o"
3470 "xtract.Linux" "xtract.go" "common.go")
3471 (invoke "go" "build" "-v" "-x" "-o"
3472 "rchive.Linux" "rchive.go" "common.go")
3473 (invoke "go" "build" "-v" "-x" "-o" "symbols.Linux" "s2p.go"))))
3474 (replace 'install
3475 (lambda* (#:key outputs import-path #:allow-other-keys)
3476 (let ((dest (string-append (assoc-ref outputs "out") "/bin"))
3477 (source (string-append "src/" import-path "/")))
3478 (for-each (lambda (file)
3479 (format #t "installing ~a~%" file)
3480 (install-file (string-append source file) dest))
3481 '("j2x" "t2x" "symbols.Linux" "xtract.Linux" "rchive.Linux"))
3482 #t))))))
3483 (native-inputs '())
3484 (propagated-inputs '())
3485 (inputs
3486 (list go-github-com-fatih-color
3487 go-github-com-fogleman-gg
3488 go-github-com-gedex-inflector
3489 go-github-com-golang-freetype
3490 go-github-com-klauspost-cpuid
3491 go-github-com-pbnjay-memory
3492 go-github-com-surgebase-porter2
3493 go-golang-org-rainycape-unidecode
3494 go-golang-org-x-image
3495 go-golang-org-x-text))))
3496
3497 (define-public exonerate
3498 (package
3499 (name "exonerate")
3500 (version "2.4.0")
3501 (source
3502 (origin
3503 (method url-fetch)
3504 (uri
3505 (string-append
3506 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
3507 "exonerate-" version ".tar.gz"))
3508 (sha256
3509 (base32
3510 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
3511 (build-system gnu-build-system)
3512 (arguments
3513 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
3514 (native-inputs
3515 (list pkg-config))
3516 (inputs
3517 (list glib))
3518 (home-page
3519 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
3520 (synopsis "Generic tool for biological sequence alignment")
3521 (description
3522 "Exonerate is a generic tool for pairwise sequence comparison. It allows
3523 the alignment of sequences using a many alignment models, either exhaustive
3524 dynamic programming or a variety of heuristics.")
3525 (license license:gpl3)))
3526
3527 (define-public express
3528 (package
3529 (name "express")
3530 (version "1.5.3")
3531 (source (origin
3532 (method git-fetch)
3533 (uri (git-reference
3534 (url "https://github.com/adarob/eXpress")
3535 (commit version)))
3536 (file-name (git-file-name name version))
3537 (sha256
3538 (base32
3539 "18nb22n7x820fzjngf4qgyb3mspqkw7xyk7v7s5ps6wfrd8qwscb"))))
3540 (build-system cmake-build-system)
3541 (arguments
3542 `(#:tests? #f ;no "check" target
3543 #:phases
3544 (modify-phases %standard-phases
3545 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
3546 (lambda* (#:key inputs #:allow-other-keys)
3547 (substitute* "CMakeLists.txt"
3548 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
3549 "set(Boost_USE_STATIC_LIBS OFF)")
3550 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
3551 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
3552 (substitute* "src/CMakeLists.txt"
3553 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
3554 (string-append (assoc-ref inputs "bamtools") "/lib"))
3555 (("libprotobuf.a") "libprotobuf.so"))
3556 #t))
3557 (add-after 'unpack 'remove-update-check
3558 (lambda _
3559 (substitute* "src/main.cpp"
3560 (("#include \"update_check.h\"") "")
3561 (("check_version\\(PACKAGE_VERSION\\);") ""))
3562 #t)))))
3563 (inputs
3564 (list boost bamtools protobuf zlib))
3565 (home-page "http://bio.math.berkeley.edu/eXpress")
3566 (synopsis "Streaming quantification for high-throughput genomic sequencing")
3567 (description
3568 "eXpress is a streaming tool for quantifying the abundances of a set of
3569 target sequences from sampled subsequences. Example applications include
3570 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
3571 analysis (from RNA-Seq), transcription factor binding quantification in
3572 ChIP-Seq, and analysis of metagenomic data.")
3573 (license license:artistic2.0)))
3574
3575 (define-public express-beta-diversity
3576 (package
3577 (name "express-beta-diversity")
3578 (version "1.0.8")
3579 (source (origin
3580 (method git-fetch)
3581 (uri (git-reference
3582 (url "https://github.com/dparks1134/ExpressBetaDiversity")
3583 (commit (string-append "v" version))))
3584 (file-name (git-file-name name version))
3585 (sha256
3586 (base32
3587 "0s0yzg5c21349rh7x4w9266jsvnp7j1hp9cf8sk32hz8nvrj745x"))))
3588 (build-system gnu-build-system)
3589 (arguments
3590 `(#:phases
3591 (modify-phases %standard-phases
3592 (delete 'configure)
3593 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
3594 (replace 'check
3595 (lambda _ (invoke "../bin/ExpressBetaDiversity" "-u") #t))
3596 (replace 'install
3597 (lambda* (#:key outputs #:allow-other-keys)
3598 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3599 (install-file "../scripts/convertToEBD.py" bin)
3600 (install-file "../bin/ExpressBetaDiversity" bin)
3601 #t))))))
3602 (inputs
3603 `(("python" ,python-2)))
3604 (home-page "https://github.com/dparks1134/ExpressBetaDiversity")
3605 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
3606 (description
3607 "Express Beta Diversity (EBD) calculates ecological beta diversity
3608 (dissimilarity) measures between biological communities. EBD implements a
3609 variety of diversity measures including those that make use of phylogenetic
3610 similarity of community members.")
3611 (license license:gpl3+)))
3612
3613 (define-public fasttree
3614 (package
3615 (name "fasttree")
3616 (version "2.1.10")
3617 (source (origin
3618 (method url-fetch)
3619 (uri (string-append
3620 "http://www.microbesonline.org/fasttree/FastTree-"
3621 version ".c"))
3622 (sha256
3623 (base32
3624 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
3625 (build-system gnu-build-system)
3626 (arguments
3627 `(#:tests? #f ; no "check" target
3628 #:phases
3629 (modify-phases %standard-phases
3630 (delete 'unpack)
3631 (delete 'configure)
3632 (replace 'build
3633 (lambda* (#:key source #:allow-other-keys)
3634 (invoke "gcc"
3635 "-O3"
3636 "-finline-functions"
3637 "-funroll-loops"
3638 "-Wall"
3639 "-o"
3640 "FastTree"
3641 source
3642 "-lm")
3643 (invoke "gcc"
3644 "-DOPENMP"
3645 "-fopenmp"
3646 "-O3"
3647 "-finline-functions"
3648 "-funroll-loops"
3649 "-Wall"
3650 "-o"
3651 "FastTreeMP"
3652 source
3653 "-lm")
3654 #t))
3655 (replace 'install
3656 (lambda* (#:key outputs #:allow-other-keys)
3657 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3658 (install-file "FastTree" bin)
3659 (install-file "FastTreeMP" bin)
3660 #t))))))
3661 (home-page "http://www.microbesonline.org/fasttree")
3662 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
3663 (description
3664 "FastTree can handle alignments with up to a million of sequences in a
3665 reasonable amount of time and memory. For large alignments, FastTree is
3666 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
3667 (license license:gpl2+)))
3668
3669 (define-public fastx-toolkit
3670 (package
3671 (name "fastx-toolkit")
3672 (version "0.0.14")
3673 (source (origin
3674 (method url-fetch)
3675 (uri
3676 (string-append
3677 "https://github.com/agordon/fastx_toolkit/releases/download/"
3678 version "/fastx_toolkit-" version ".tar.bz2"))
3679 (sha256
3680 (base32
3681 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
3682 (build-system gnu-build-system)
3683 (inputs
3684 (list libgtextutils))
3685 (native-inputs
3686 (list gcc-6 ;; doesn't build with later versions
3687 pkg-config))
3688 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
3689 (synopsis "Tools for FASTA/FASTQ file preprocessing")
3690 (description
3691 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
3692 FASTA/FASTQ files preprocessing.
3693
3694 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
3695 containing multiple short-reads sequences. The main processing of such
3696 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
3697 is sometimes more productive to preprocess the files before mapping the
3698 sequences to the genome---manipulating the sequences to produce better mapping
3699 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
3700 (license license:agpl3+)))
3701
3702 (define-public flexbar
3703 (package
3704 (name "flexbar")
3705 (version "3.4.0")
3706 (source (origin
3707 (method git-fetch)
3708 (uri (git-reference
3709 (url "https://github.com/seqan/flexbar")
3710 (commit (string-append "v" version))))
3711 (file-name (git-file-name name version))
3712 (sha256
3713 (base32
3714 "1pq9sxvdnldl14libk234m72dqhwgzs3acgl943wchwdqlcsi5r2"))))
3715 (build-system cmake-build-system)
3716 (arguments
3717 `(#:phases
3718 (modify-phases %standard-phases
3719 (add-after 'unpack 'do-not-tune-to-CPU
3720 (lambda _
3721 (substitute* "src/CMakeLists.txt"
3722 ((" -march=native") ""))))
3723 (replace 'check
3724 (lambda* (#:key outputs #:allow-other-keys)
3725 (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
3726 (with-directory-excursion "../source/test"
3727 (invoke "bash" "flexbar_test.sh"))))
3728 (replace 'install
3729 (lambda* (#:key outputs #:allow-other-keys)
3730 (let* ((out (string-append (assoc-ref outputs "out")))
3731 (bin (string-append out "/bin/")))
3732 (install-file "flexbar" bin)))))))
3733 (inputs
3734 (list tbb-2020 zlib))
3735 (native-inputs
3736 (list pkg-config seqan-2))
3737 (home-page "https://github.com/seqan/flexbar")
3738 (synopsis "Barcode and adapter removal tool for sequencing platforms")
3739 (description
3740 "Flexbar preprocesses high-throughput nucleotide sequencing data
3741 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
3742 Moreover, trimming and filtering features are provided. Flexbar increases
3743 read mapping rates and improves genome and transcriptome assemblies. It
3744 supports next-generation sequencing data in fasta/q and csfasta/q format from
3745 Illumina, Roche 454, and the SOLiD platform.")
3746 (license license:bsd-3)))
3747
3748 (define-public fxtract
3749 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
3750 (package
3751 (name "fxtract")
3752 (version "2.3")
3753 (source
3754 (origin
3755 (method git-fetch)
3756 (uri (git-reference
3757 (url "https://github.com/ctSkennerton/fxtract")
3758 (commit version)))
3759 (file-name (git-file-name name version))
3760 (sha256
3761 (base32
3762 "0hab3gpwf4w9s87qlbswq6ws1qqybh4dcqk79q1ahyldzai5fgp5"))))
3763 (build-system gnu-build-system)
3764 (arguments
3765 `(#:make-flags ,#~(list
3766 (string-append "PREFIX=" #$output)
3767 "CC=gcc")
3768 #:test-target "fxtract_test"
3769 #:phases
3770 (modify-phases %standard-phases
3771 (delete 'configure)
3772 (add-before 'build 'copy-util
3773 (lambda* (#:key inputs #:allow-other-keys)
3774 (rmdir "util")
3775 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
3776 #t))
3777 ;; Do not use make install as this requires additional dependencies.
3778 (replace 'install
3779 (lambda* (#:key outputs #:allow-other-keys)
3780 (let* ((out (assoc-ref outputs "out"))
3781 (bin (string-append out"/bin")))
3782 (install-file "fxtract" bin)
3783 #t))))))
3784 (inputs
3785 (list pcre zlib))
3786 (native-inputs
3787 ;; ctskennerton-util is licensed under GPL2.
3788 `(("ctskennerton-util"
3789 ,(origin
3790 (method git-fetch)
3791 (uri (git-reference
3792 (url "https://github.com/ctSkennerton/util")
3793 (commit util-commit)))
3794 (file-name (string-append
3795 "ctstennerton-util-" util-commit "-checkout"))
3796 (sha256
3797 (base32
3798 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
3799 (home-page "https://github.com/ctSkennerton/fxtract")
3800 (synopsis "Extract sequences from FASTA and FASTQ files")
3801 (description
3802 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
3803 or FASTQ) file given a subsequence. It uses a simple substring search for
3804 basic tasks but can change to using POSIX regular expressions, PCRE, hash
3805 lookups or multi-pattern searching as required. By default fxtract looks in
3806 the sequence of each record but can also be told to look in the header,
3807 comment or quality sections.")
3808 ;; 'util' requires SSE instructions.
3809 (supported-systems '("x86_64-linux"))
3810 (license license:expat))))
3811
3812 (define-public gemma
3813 (package
3814 (name "gemma")
3815 (version "0.98.3")
3816 (source (origin
3817 (method git-fetch)
3818 (uri (git-reference
3819 (url "https://github.com/genetics-statistics/GEMMA")
3820 (commit version)))
3821 (file-name (git-file-name name version))
3822 (sha256
3823 (base32
3824 "1p8a7kkfn1mmrg017aziy544aha8i9h6wd1x2dk3w2794wl33qb7"))
3825 (modules '((guix build utils)))
3826 (snippet
3827 '(begin
3828 (delete-file-recursively "contrib")
3829 #t))))
3830 (build-system gnu-build-system)
3831 (inputs
3832 (list gsl openblas zlib))
3833 (native-inputs
3834 `(("catch" ,catch2-1)
3835 ("perl" ,perl)
3836 ("shunit2" ,shunit2)
3837 ("which" ,which)))
3838 (arguments
3839 `(#:phases
3840 (modify-phases %standard-phases
3841 (delete 'configure)
3842 (add-after 'unpack 'prepare-build
3843 (lambda* (#:key inputs #:allow-other-keys)
3844 (mkdir-p "bin")
3845 (substitute* "Makefile"
3846 (("/usr/local/opt/openblas")
3847 (assoc-ref inputs "openblas")))
3848 #t))
3849 (replace 'check
3850 (lambda* (#:key tests? #:allow-other-keys)
3851 (when tests?
3852 ;; 'make slow-check' expects shunit2-2.0.3.
3853 (with-directory-excursion "test"
3854 (invoke "./test_suite.sh"))
3855 #t)))
3856 (replace 'install
3857 (lambda* (#:key outputs #:allow-other-keys)
3858 (install-file "bin/gemma"
3859 (string-append (assoc-ref outputs "out") "/bin"))
3860 #t)))))
3861 (home-page "https://github.com/genetics-statistics/GEMMA")
3862 (synopsis "Tool for genome-wide efficient mixed model association")
3863 (description
3864 "@acronym{GEMMA, Genome-wide Efficient Mixed Model Association} provides a
3865 standard linear mixed model resolver with application in @acronym{GWAS,
3866 genome-wide association studies}.")
3867 (license license:gpl3)))
3868
3869 (define-public hisat
3870 (package
3871 (name "hisat")
3872 (version "0.1.6")
3873 (source (origin
3874 (method url-fetch)
3875 (uri (string-append
3876 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
3877 version "-beta-source.zip"))
3878 (sha256
3879 (base32
3880 "177z85vqp9b30vgxl5py5hz4mm37ila37nzhfam23ci9iyfxgyv9"))))
3881 (build-system gnu-build-system)
3882 (arguments
3883 `(#:tests? #f ;no check target
3884 #:make-flags '("allall"
3885 ;; Disable unsupported `popcnt' instructions on
3886 ;; architectures other than x86_64
3887 ,@(if (string-prefix? "x86_64"
3888 (or (%current-target-system)
3889 (%current-system)))
3890 '()
3891 '("POPCNT_CAPABILITY=0")))
3892 #:phases
3893 (modify-phases %standard-phases
3894 (add-after 'unpack 'patch-sources
3895 (lambda _
3896 ;; XXX Cannot use snippet because zip files are not supported
3897 (substitute* "Makefile"
3898 (("^CC = .*$") "CC = gcc")
3899 (("^CPP = .*$") "CPP = g++")
3900 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
3901 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
3902 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
3903 (substitute* '("hisat-build" "hisat-inspect")
3904 (("/usr/bin/env") (which "env")))
3905 ;; This "extended character" is not considered valid.
3906 (substitute* "processor_support.h"
3907 (("“") "\"")
3908 (("”") "\""))))
3909 (replace 'install
3910 (lambda* (#:key outputs #:allow-other-keys)
3911 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3912 (for-each (lambda (file)
3913 (install-file file bin))
3914 (find-files
3915 "."
3916 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))))
3917 (delete 'configure))))
3918 (native-inputs
3919 (list unzip))
3920 (inputs
3921 (list perl python zlib))
3922 ;; Non-portable SSE instructions are used so building fails on platforms
3923 ;; other than x86_64.
3924 (supported-systems '("x86_64-linux"))
3925 (home-page "https://ccb.jhu.edu/software/hisat/index.shtml")
3926 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
3927 (description
3928 "HISAT is a fast and sensitive spliced alignment program for mapping
3929 RNA-seq reads. In addition to one global FM index that represents a whole
3930 genome, HISAT uses a large set of small FM indexes that collectively cover the
3931 whole genome. These small indexes (called local indexes) combined with
3932 several alignment strategies enable effective alignment of RNA-seq reads, in
3933 particular, reads spanning multiple exons.")
3934 (license license:gpl3+)))
3935
3936 (define-public hisat2
3937 (package
3938 (name "hisat2")
3939 (version "2.2.1")
3940 (source
3941 (origin
3942 (method git-fetch)
3943 (uri (git-reference
3944 (url "https://github.com/DaehwanKimLab/hisat2/")
3945 (commit (string-append "v" version))))
3946 (file-name (git-file-name name version))
3947 (sha256
3948 (base32
3949 "0lmzdhzjkvxw7n5w40pbv5fgzd4cz0f9pxczswn3d4cr0k10k754"))))
3950 (build-system gnu-build-system)
3951 (arguments
3952 `(#:tests? #f ; no check target
3953 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
3954 #:modules ((guix build gnu-build-system)
3955 (guix build utils)
3956 (srfi srfi-26))
3957 #:phases
3958 (modify-phases %standard-phases
3959 (add-after 'unpack 'make-deterministic
3960 (lambda _
3961 (substitute* "Makefile"
3962 (("`date`") "0"))))
3963 (delete 'configure)
3964 (add-before 'build 'build-manual
3965 (lambda _
3966 (mkdir-p "doc")
3967 (invoke "make" "doc")))
3968 (replace 'install
3969 (lambda* (#:key outputs #:allow-other-keys)
3970 (let* ((out (assoc-ref outputs "out"))
3971 (bin (string-append out "/bin/"))
3972 (doc (string-append out "/share/doc/hisat2/")))
3973 (for-each
3974 (cut install-file <> bin)
3975 (find-files "."
3976 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
3977 (mkdir-p doc)
3978 (install-file "doc/manual.inc.html" doc)))))))
3979 (native-inputs
3980 (list perl pandoc)) ; for documentation
3981 (inputs
3982 `(("python" ,python-wrapper)))
3983 (home-page "https://daehwankimlab.github.io/hisat2/")
3984 (synopsis "Graph-based alignment of genomic sequencing reads")
3985 (description "HISAT2 is a fast and sensitive alignment program for mapping
3986 next-generation sequencing reads (both DNA and RNA) to a population of human
3987 genomes (as well as to a single reference genome). In addition to using one
3988 global @dfn{graph FM} (GFM) index that represents a population of human
3989 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
3990 the whole genome. These small indexes, combined with several alignment
3991 strategies, enable rapid and accurate alignment of sequencing reads. This new
3992 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
3993 ;; HISAT2 contains files from Bowtie2, which is released under
3994 ;; GPLv2 or later. The HISAT2 source files are released under
3995 ;; GPLv3 or later.
3996 (license license:gpl3+)))
3997
3998 (define-public hmmer
3999 (package
4000 (name "hmmer")
4001 (version "3.3.2")
4002 (source
4003 (origin
4004 (method url-fetch)
4005 (uri (string-append
4006 "http://eddylab.org/software/hmmer/hmmer-" version ".tar.gz"))
4007 (sha256
4008 (base32
4009 "0s9wf6n0qanbx8qs6igfl3vyjikwbrvh4d9d6mv54yp3xysykzlj"))))
4010 (build-system gnu-build-system)
4011 (native-inputs (list perl python)) ; for tests
4012 (home-page "http://hmmer.org/")
4013 (synopsis "Biosequence analysis using profile hidden Markov models")
4014 (description
4015 "HMMER is used for searching sequence databases for homologs of protein
4016 sequences, and for making protein sequence alignments. It implements methods
4017 using probabilistic models called profile hidden Markov models (profile
4018 HMMs).")
4019 ;; hmmer uses non-portable SSE intrinsics so building fails on other
4020 ;; platforms.
4021 (supported-systems '("x86_64-linux" "i686-linux"))
4022 (license license:bsd-3)))
4023
4024 (define-public htseq
4025 (package
4026 (name "htseq")
4027 (version "2.0.2")
4028 ;; Sources on pypi do not include everything needed to run the tests.
4029 (source (origin
4030 (method git-fetch)
4031 (uri (git-reference
4032 (url "https://github.com/htseq/htseq")
4033 (commit (string-append "release_" version))))
4034 (file-name (git-file-name name version))
4035 (sha256
4036 (base32
4037 "1kbr4ydjjhizz6r5m3xd4f0wj7qnn8zs0vnzghhgaa0yhbya5r19"))))
4038 (build-system python-build-system)
4039 (arguments
4040 (list
4041 #:phases
4042 '(modify-phases %standard-phases
4043 ;; Avoid rebuilding the extension. Everything is built during the
4044 ;; 'install phase anyway.
4045 (delete 'build)
4046 (replace 'check
4047 (lambda* (#:key tests? #:allow-other-keys)
4048 (when tests?
4049 (invoke "pytest" "-v")))))))
4050 (propagated-inputs
4051 (list python-matplotlib
4052 python-numpy
4053 python-pysam))
4054 (native-inputs
4055 (list python-cython
4056 python-pandas
4057 python-pytest
4058 python-scipy
4059 swig))
4060 (home-page "https://github.com/htseq")
4061 (synopsis "Framework for analyzing high-throughput sequencing data")
4062 (description
4063 "This package provides a framework to process and analyze data from
4064 high-throughput sequencing (HTS) assays")
4065 (license license:gpl3+)))
4066
4067 (define-public java-htsjdk
4068 (package
4069 (name "java-htsjdk")
4070 (version "2.3.0") ; last version without build dependency on gradle
4071 (source (origin
4072 (method git-fetch)
4073 (uri (git-reference
4074 (url "https://github.com/samtools/htsjdk")
4075 (commit version)))
4076 (file-name (git-file-name name version))
4077 (sha256
4078 (base32
4079 "1b178ixcabanm834ydjl3jiakpyxdmki32hqfv2abrzn3rcwa28i"))
4080 (modules '((guix build utils)))
4081 (snippet
4082 ;; Delete pre-built binaries
4083 '(begin
4084 (delete-file-recursively "lib")
4085 (mkdir-p "lib")
4086 #t))))
4087 (build-system ant-build-system)
4088 (arguments
4089 `(#:tests? #f ; test require Internet access
4090 #:jdk ,icedtea-8
4091 #:make-flags
4092 ,#~(list (string-append "-Ddist=" #$output "/share/java/htsjdk/"))
4093 #:build-target "all"
4094 #:phases
4095 (modify-phases %standard-phases
4096 ;; The build phase also installs the jars
4097 (delete 'install))))
4098 (inputs
4099 `(("java-ngs" ,java-ngs)
4100 ("java-snappy-1" ,java-snappy-1)
4101 ("java-commons-compress" ,java-commons-compress)
4102 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
4103 ("java-commons-jexl-2" ,java-commons-jexl-2)
4104 ("java-xz" ,java-xz)))
4105 (native-inputs
4106 (list java-testng))
4107 (home-page "http://samtools.github.io/htsjdk/")
4108 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
4109 (description
4110 "HTSJDK is an implementation of a unified Java library for accessing
4111 common file formats, such as SAM and VCF, used for high-throughput
4112 sequencing (HTS) data. There are also an number of useful utilities for
4113 manipulating HTS data.")
4114 (license license:expat)))
4115
4116 (define-public java-htsjdk-latest
4117 (package
4118 (name "java-htsjdk")
4119 (version "2.14.3")
4120 (source (origin
4121 (method git-fetch)
4122 (uri (git-reference
4123 (url "https://github.com/samtools/htsjdk")
4124 (commit version)))
4125 (file-name (string-append name "-" version "-checkout"))
4126 (sha256
4127 (base32
4128 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
4129 (build-system ant-build-system)
4130 (arguments
4131 `(#:tests? #f ; test require Scala
4132 #:jdk ,icedtea-8
4133 #:jar-name "htsjdk.jar"
4134 #:phases
4135 (modify-phases %standard-phases
4136 (add-after 'unpack 'remove-useless-build.xml
4137 (lambda _ (delete-file "build.xml") #t))
4138 ;; The tests require the scalatest package.
4139 (add-after 'unpack 'remove-tests
4140 (lambda _ (delete-file-recursively "src/test") #t)))))
4141 (inputs
4142 `(("java-ngs" ,java-ngs)
4143 ("java-snappy-1" ,java-snappy-1)
4144 ("java-commons-compress" ,java-commons-compress)
4145 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
4146 ("java-commons-jexl-2" ,java-commons-jexl-2)
4147 ("java-xz" ,java-xz)))
4148 (native-inputs
4149 (list java-junit))
4150 (home-page "http://samtools.github.io/htsjdk/")
4151 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
4152 (description
4153 "HTSJDK is an implementation of a unified Java library for accessing
4154 common file formats, such as SAM and VCF, used for high-throughput
4155 sequencing (HTS) data. There are also an number of useful utilities for
4156 manipulating HTS data.")
4157 (license license:expat)))
4158
4159 ;; This is needed for picard 2.10.3
4160 (define-public java-htsjdk-2.10.1
4161 (package (inherit java-htsjdk-latest)
4162 (name "java-htsjdk")
4163 (version "2.10.1")
4164 (source (origin
4165 (method git-fetch)
4166 (uri (git-reference
4167 (url "https://github.com/samtools/htsjdk")
4168 (commit version)))
4169 (file-name (string-append name "-" version "-checkout"))
4170 (sha256
4171 (base32
4172 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
4173 (build-system ant-build-system)
4174 (arguments
4175 `(#:tests? #f ; tests require Scala
4176 #:jdk ,icedtea-8
4177 #:jar-name "htsjdk.jar"
4178 #:phases
4179 (modify-phases %standard-phases
4180 (add-after 'unpack 'remove-useless-build.xml
4181 (lambda _ (delete-file "build.xml") #t))
4182 ;; The tests require the scalatest package.
4183 (add-after 'unpack 'remove-tests
4184 (lambda _ (delete-file-recursively "src/test") #t)))))))
4185
4186 ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
4187 ;; recent version of java-htsjdk, which depends on gradle.
4188 (define-public java-picard
4189 (package
4190 (name "java-picard")
4191 (version "2.3.0")
4192 (source (origin
4193 (method git-fetch)
4194 (uri (git-reference
4195 (url "https://github.com/broadinstitute/picard")
4196 (commit version)))
4197 (file-name (string-append "java-picard-" version "-checkout"))
4198 (sha256
4199 (base32
4200 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
4201 (modules '((guix build utils)))
4202 (snippet
4203 '(begin
4204 ;; Delete pre-built binaries.
4205 (delete-file-recursively "lib")
4206 (mkdir-p "lib")
4207 (substitute* "build.xml"
4208 ;; Remove build-time dependency on git.
4209 (("failifexecutionfails=\"true\"")
4210 "failifexecutionfails=\"false\"")
4211 ;; Use our htsjdk.
4212 (("depends=\"compile-htsjdk, ")
4213 "depends=\"")
4214 (("depends=\"compile-htsjdk-tests, ")
4215 "depends=\"")
4216 ;; Build picard-lib.jar before building picard.jar
4217 (("name=\"picard-jar\" depends=\"" line)
4218 (string-append line "picard-lib-jar, ")))
4219 #t))))
4220 (build-system ant-build-system)
4221 (arguments
4222 `(#:build-target "picard-jar"
4223 #:test-target "test"
4224 ;; Tests require jacoco:coverage.
4225 #:tests? #f
4226 #:make-flags
4227 ,#~(list (string-append "-Dhtsjdk_lib_dir="
4228 #$(this-package-input "java-htsjdk")
4229 "/share/java/htsjdk/")
4230 "-Dhtsjdk-classes=dist/tmp"
4231 (string-append "-Dhtsjdk-version="
4232 #$(package-version java-htsjdk)))
4233 #:jdk ,icedtea-8
4234 #:phases
4235 (modify-phases %standard-phases
4236 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
4237 (delete 'generate-jar-indices)
4238 (add-after 'unpack 'use-our-htsjdk
4239 (lambda* (#:key inputs #:allow-other-keys)
4240 (substitute* "build.xml"
4241 (("\\$\\{htsjdk\\}/lib")
4242 (search-input-directory inputs
4243 "share/java/htsjdk")))))
4244 (add-after 'unpack 'make-test-target-independent
4245 (lambda* (#:key inputs #:allow-other-keys)
4246 (substitute* "build.xml"
4247 (("name=\"test\" depends=\"compile, ")
4248 "name=\"test\" depends=\""))
4249 #t))
4250 (replace 'install (install-jars "dist")))))
4251 (inputs
4252 (list java-htsjdk java-guava))
4253 (native-inputs
4254 (list java-testng))
4255 (home-page "http://broadinstitute.github.io/picard/")
4256 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
4257 (description "Picard is a set of Java command line tools for manipulating
4258 high-throughput sequencing (HTS) data and formats. Picard is implemented
4259 using the HTSJDK Java library to support accessing file formats that are
4260 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
4261 VCF.")
4262 (license license:expat)))
4263
4264 ;; This is needed for dropseq-tools
4265 (define-public java-picard-2.10.3
4266 (package
4267 (name "java-picard")
4268 (version "2.10.3")
4269 (source (origin
4270 (method git-fetch)
4271 (uri (git-reference
4272 (url "https://github.com/broadinstitute/picard")
4273 (commit version)))
4274 (file-name (string-append "java-picard-" version "-checkout"))
4275 (sha256
4276 (base32
4277 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
4278 (build-system ant-build-system)
4279 (arguments
4280 `(#:jar-name "picard.jar"
4281 ;; Tests require jacoco:coverage.
4282 #:tests? #f
4283 #:jdk ,icedtea-8
4284 #:main-class "picard.cmdline.PicardCommandLine"
4285 #:modules ((guix build ant-build-system)
4286 (guix build utils)
4287 (guix build java-utils)
4288 (sxml simple)
4289 (sxml transform)
4290 (sxml xpath))
4291 #:phases
4292 (modify-phases %standard-phases
4293 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
4294 (delete 'generate-jar-indices)
4295 (add-after 'unpack 'remove-useless-build.xml
4296 (lambda _ (delete-file "build.xml") #t))
4297 ;; This is necessary to ensure that htsjdk is found when using
4298 ;; picard.jar as an executable.
4299 (add-before 'build 'edit-classpath-in-manifest
4300 (lambda* (#:key inputs #:allow-other-keys)
4301 (chmod "build.xml" #o664)
4302 (call-with-output-file "build.xml.new"
4303 (lambda (port)
4304 (sxml->xml
4305 (pre-post-order
4306 (with-input-from-file "build.xml"
4307 (lambda _ (xml->sxml #:trim-whitespace? #t)))
4308 `((target . ,(lambda (tag . kids)
4309 (let ((name ((sxpath '(name *text*))
4310 (car kids)))
4311 ;; FIXME: We're breaking the line
4312 ;; early with a dummy path to
4313 ;; ensure that the store reference
4314 ;; isn't broken apart and can still
4315 ;; be found by the reference
4316 ;; scanner.
4317 (msg (format #f
4318 "\
4319 Class-Path: /~a \
4320 ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
4321 ;; maximum line length is 70
4322 (string-tabulate (const #\b) 57)
4323 (assoc-ref inputs "java-htsjdk"))))
4324 (if (member "manifest" name)
4325 `(,tag ,@kids
4326 (replaceregexp
4327 (@ (file "${manifest.file}")
4328 (match "\\r\\n\\r\\n")
4329 (replace "${line.separator}")))
4330 (echo
4331 (@ (message ,msg)
4332 (file "${manifest.file}")
4333 (append "true"))))
4334 `(,tag ,@kids)))))
4335 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
4336 (*text* . ,(lambda (_ txt) txt))))
4337 port)))
4338 (rename-file "build.xml.new" "build.xml")
4339 #t)))))
4340 (propagated-inputs
4341 (list java-htsjdk-2.10.1))
4342 (native-inputs
4343 (list java-testng java-guava))
4344 (home-page "http://broadinstitute.github.io/picard/")
4345 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
4346 (description "Picard is a set of Java command line tools for manipulating
4347 high-throughput sequencing (HTS) data and formats. Picard is implemented
4348 using the HTSJDK Java library to support accessing file formats that are
4349 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
4350 VCF.")
4351 (license license:expat)))
4352
4353 ;; This is the last version of Picard to provide net.sf.samtools
4354 (define-public java-picard-1.113
4355 (package (inherit java-picard)
4356 (name "java-picard")
4357 (version "1.113")
4358 (source (origin
4359 (method git-fetch)
4360 (uri (git-reference
4361 (url "https://github.com/broadinstitute/picard")
4362 (commit version)))
4363 (file-name (string-append "java-picard-" version "-checkout"))
4364 (sha256
4365 (base32
4366 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
4367 (modules '((guix build utils)))
4368 (snippet
4369 '(begin
4370 ;; Delete pre-built binaries.
4371 (delete-file-recursively "lib")
4372 (mkdir-p "lib")
4373 #t))))
4374 (build-system ant-build-system)
4375 (arguments
4376 `(#:build-target "picard-jar"
4377 #:test-target "test"
4378 ;; FIXME: the class path at test time is wrong.
4379 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
4380 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
4381 #:tests? #f
4382 #:jdk ,icedtea-8
4383 #:ant ,ant/java8
4384 ;; This is only used for tests.
4385 #:make-flags
4386 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
4387 #:phases
4388 (modify-phases %standard-phases
4389 ;; FIXME: This phase fails.
4390 (delete 'generate-jar-indices)
4391 ;; Do not use bundled ant bzip2.
4392 (add-after 'unpack 'use-ant-bzip
4393 (lambda* (#:key inputs #:allow-other-keys)
4394 (substitute* "build.xml"
4395 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
4396 (search-input-file inputs "/lib/ant.jar")))))
4397 (add-after 'unpack 'make-test-target-independent
4398 (lambda* (#:key inputs #:allow-other-keys)
4399 (substitute* "build.xml"
4400 (("name=\"test\" depends=\"compile, ")
4401 "name=\"test\" depends=\"compile-tests, ")
4402 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
4403 "name=\"compile\" depends=\"compile-src\""))
4404 #t))
4405 (add-after 'unpack 'fix-deflater-path
4406 (lambda* (#:key outputs #:allow-other-keys)
4407 (substitute* "src/java/net/sf/samtools/Defaults.java"
4408 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
4409 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
4410 (assoc-ref outputs "out")
4411 "/lib/jni/libIntelDeflater.so"
4412 "\")")))
4413 #t))
4414 ;; Build the deflater library, because we've previously deleted the
4415 ;; pre-built one. This can only be built with access to the JDK
4416 ;; sources.
4417 (add-after 'build 'build-jni
4418 (lambda* (#:key inputs #:allow-other-keys)
4419 (mkdir-p "lib/jni")
4420 (mkdir-p "jdk-src")
4421 (invoke "tar" "--strip-components=1" "-C" "jdk-src"
4422 "-xf" (assoc-ref inputs "jdk-src"))
4423 (invoke "javah" "-jni"
4424 "-classpath" "classes"
4425 "-d" "lib/"
4426 "net.sf.samtools.util.zip.IntelDeflater")
4427 (with-directory-excursion "src/c/inteldeflater"
4428 (invoke "gcc" "-I../../../lib" "-I."
4429 (string-append "-I" (assoc-ref inputs "jdk")
4430 "/include/linux")
4431 "-I../../../jdk-src/src/share/native/common/"
4432 "-I../../../jdk-src/src/solaris/native/common/"
4433 "-c" "-O3" "-fPIC" "IntelDeflater.c")
4434 (invoke "gcc" "-shared"
4435 "-o" "../../../lib/jni/libIntelDeflater.so"
4436 "IntelDeflater.o" "-lz" "-lstdc++"))
4437 #t))
4438 ;; We can only build everything else after building the JNI library.
4439 (add-after 'build-jni 'build-rest
4440 (lambda* (#:key make-flags #:allow-other-keys)
4441 (apply invoke `("ant" "all" ,@make-flags))
4442 #t))
4443 (add-before 'build 'set-JAVA6_HOME
4444 (lambda _
4445 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
4446 #t))
4447 (replace 'install (install-jars "dist"))
4448 (add-after 'install 'install-jni-lib
4449 (lambda* (#:key outputs #:allow-other-keys)
4450 (let ((jni (string-append (assoc-ref outputs "out")
4451 "/lib/jni")))
4452 (mkdir-p jni)
4453 (install-file "lib/jni/libIntelDeflater.so" jni)
4454 #t))))))
4455 (inputs
4456 `(("java-snappy-1" ,java-snappy-1)
4457 ("java-commons-jexl-2" ,java-commons-jexl-2)
4458 ("java-cofoja" ,java-cofoja)
4459 ("ant" ,ant/java8) ; for bzip2 support at runtime
4460 ("zlib" ,zlib)))
4461 (native-inputs
4462 `(("ant-apache-bcel" ,ant-apache-bcel)
4463 ("ant-junit" ,ant-junit)
4464 ("java-testng" ,java-testng)
4465 ("java-commons-bcel" ,java-commons-bcel)
4466 ("java-jcommander" ,java-jcommander)
4467 ("jdk" ,icedtea-8 "jdk")
4468 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
4469
4470 (define-public fastqc
4471 (package
4472 (name "fastqc")
4473 (version "0.11.5")
4474 (source
4475 (origin
4476 (method url-fetch)
4477 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
4478 "projects/fastqc/fastqc_v"
4479 version "_source.zip"))
4480 (sha256
4481 (base32
4482 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
4483 (build-system ant-build-system)
4484 (arguments
4485 `(#:tests? #f ; there are no tests
4486 #:build-target "build"
4487 #:phases
4488 (modify-phases %standard-phases
4489 (add-after 'unpack 'fix-dependencies
4490 (lambda* (#:key inputs #:allow-other-keys)
4491 (substitute* "build.xml"
4492 (("jbzip2-0.9.jar")
4493 (search-input-file inputs "/share/java/jbzip2.jar"))
4494 (("sam-1.103.jar")
4495 (search-input-file inputs
4496 "/share/java/sam-1.112.jar"))
4497 (("cisd-jhdf5.jar")
4498 (search-input-file inputs
4499 "/share/java/sis-jhdf5.jar")))))
4500 ;; There is no installation target
4501 (replace 'install
4502 (lambda* (#:key inputs outputs #:allow-other-keys)
4503 (let* ((out (assoc-ref outputs "out"))
4504 (bin (string-append out "/bin"))
4505 (share (string-append out "/share/fastqc/"))
4506 (exe (string-append share "/fastqc")))
4507 (for-each mkdir-p (list bin share))
4508 (copy-recursively "bin" share)
4509 (substitute* exe
4510 (("my \\$java_bin = 'java';")
4511 (string-append "my $java_bin = '"
4512 (assoc-ref inputs "java")
4513 "/bin/java';")))
4514 (chmod exe #o555)
4515 (symlink exe (string-append bin "/fastqc"))
4516 #t))))))
4517 (inputs
4518 `(("java" ,icedtea)
4519 ("perl" ,perl) ; needed for the wrapper script
4520 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
4521 ("java-picard-1.113" ,java-picard-1.113)
4522 ("java-jbzip2" ,java-jbzip2)))
4523 (native-inputs
4524 (list unzip))
4525 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
4526 (synopsis "Quality control tool for high throughput sequence data")
4527 (description
4528 "FastQC aims to provide a simple way to do some quality control
4529 checks on raw sequence data coming from high throughput sequencing
4530 pipelines. It provides a modular set of analyses which you can use to
4531 give a quick impression of whether your data has any problems of which
4532 you should be aware before doing any further analysis.
4533
4534 The main functions of FastQC are:
4535
4536 @itemize
4537 @item Import of data from BAM, SAM or FastQ files (any variant);
4538 @item Providing a quick overview to tell you in which areas there may
4539 be problems;
4540 @item Summary graphs and tables to quickly assess your data;
4541 @item Export of results to an HTML based permanent report;
4542 @item Offline operation to allow automated generation of reports
4543 without running the interactive application.
4544 @end itemize\n")
4545 (license license:gpl3+)))
4546
4547 (define-public fastp
4548 (package
4549 (name "fastp")
4550 (version "0.20.1")
4551 (source
4552 (origin
4553 (method git-fetch)
4554 (uri (git-reference
4555 (url "https://github.com/OpenGene/fastp")
4556 (commit (string-append "v" version))))
4557 (file-name (git-file-name name version))
4558 (sha256
4559 (base32
4560 "0ly8mxdvrcy23jwxyppysx3dhb1lwsqhfbgpyvargxhfk6k700x4"))))
4561 (build-system gnu-build-system)
4562 (arguments
4563 `(#:tests? #f ; there are none
4564 #:make-flags
4565 ,#~(list (string-append "PREFIX=" #$output))
4566 #:phases
4567 (modify-phases %standard-phases
4568 (delete 'configure)
4569 (add-before 'install 'create-target-dir
4570 (lambda* (#:key outputs #:allow-other-keys)
4571 (mkdir-p (string-append (assoc-ref outputs "out") "/bin")))))))
4572 (inputs
4573 (list zlib))
4574 (home-page "https://github.com/OpenGene/fastp/")
4575 (synopsis "All-in-one FastQ preprocessor")
4576 (description
4577 "Fastp is a tool designed to provide fast all-in-one preprocessing for
4578 FastQ files. This tool has multi-threading support to afford high
4579 performance.")
4580 (license license:expat)))
4581
4582 (define-public htslib
4583 (package
4584 (name "htslib")
4585 (version "1.14")
4586 (source (origin
4587 (method url-fetch)
4588 (uri (string-append
4589 "https://github.com/samtools/htslib/releases/download/"
4590 version "/htslib-" version ".tar.bz2"))
4591 (sha256
4592 (base32
4593 "0pwk8yhhvb85mi1d2qhwsb4samc3rmbcrq7b1s0jz0glaa7in8pd"))))
4594 (build-system gnu-build-system)
4595 ;; Let htslib translate "gs://" and "s3://" to regular https links with
4596 ;; "--enable-gcs" and "--enable-s3". For these options to work, we also
4597 ;; need to set "--enable-libcurl".
4598 (arguments
4599 `(#:configure-flags '("--enable-gcs"
4600 "--enable-libcurl"
4601 "--enable-s3")))
4602 (inputs
4603 (list bzip2 curl openssl xz))
4604 ;; This is referred to in the pkg-config file as a required library.
4605 (propagated-inputs
4606 (list zlib))
4607 (native-inputs
4608 (list perl))
4609 (home-page "https://www.htslib.org")
4610 (synopsis "C library for reading/writing high-throughput sequencing data")
4611 (description
4612 "HTSlib is a C library for reading/writing high-throughput sequencing
4613 data. It also provides the @command{bgzip}, @command{htsfile}, and
4614 @command{tabix} utilities.")
4615 ;; Files under cram/ are released under the modified BSD license;
4616 ;; the rest is released under the Expat license
4617 (license (list license:expat license:bsd-3))))
4618
4619 (define-public htslib-1.12
4620 (package/inherit htslib
4621 (version "1.12")
4622 (source (origin
4623 (method url-fetch)
4624 (uri (string-append
4625 "https://github.com/samtools/htslib/releases/download/"
4626 version "/htslib-" version ".tar.bz2"))
4627 (sha256
4628 (base32
4629 "1jplnvizgr0fyyvvmkfmnsywrrpqhid3760vw15bllz98qdi9012"))))))
4630
4631 (define-public htslib-1.10
4632 (package/inherit htslib
4633 (version "1.10")
4634 (source (origin
4635 (method url-fetch)
4636 (uri (string-append
4637 "https://github.com/samtools/htslib/releases/download/"
4638 version "/htslib-" version ".tar.bz2"))
4639 (sha256
4640 (base32
4641 "0wm9ay7qgypj3mwx9zl1mrpnr36298b1aj5vx69l4k7bzbclvr3s"))))))
4642
4643 (define-public htslib-1.9
4644 (package/inherit htslib
4645 (version "1.9")
4646 (source (origin
4647 (method url-fetch)
4648 (uri (string-append
4649 "https://github.com/samtools/htslib/releases/download/"
4650 version "/htslib-" version ".tar.bz2"))
4651 (sha256
4652 (base32
4653 "16ljv43sc3fxmv63w7b2ff8m1s7h89xhazwmbm1bicz8axq8fjz0"))))))
4654
4655 ;; This package should be removed once no packages rely upon it.
4656 (define htslib-1.3
4657 (package/inherit htslib
4658 (version "1.3.1")
4659 (source (origin
4660 (method url-fetch)
4661 (uri (string-append
4662 "https://github.com/samtools/htslib/releases/download/"
4663 version "/htslib-" version ".tar.bz2"))
4664 (sha256
4665 (base32
4666 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
4667
4668 (define htslib-for-samtools-1.2
4669 (package/inherit htslib
4670 (version "1.2.1")
4671 (source (origin
4672 (method url-fetch)
4673 (uri (string-append
4674 "https://github.com/samtools/htslib/releases/download/"
4675 version "/htslib-" version ".tar.bz2"))
4676 (sha256
4677 (base32
4678 "1c32ssscbnjwfw3dra140fq7riarp2x990qxybh34nr1p5r17nxx"))))
4679 (arguments
4680 `(#:phases
4681 (modify-phases %standard-phases
4682 (add-after 'unpack 'patch-tests
4683 (lambda _
4684 (substitute* "test/test.pl"
4685 (("/bin/bash") (which "bash"))))))))
4686 (inputs
4687 `(("zlib" ,zlib)))
4688 (native-inputs
4689 `(("perl" ,perl)))))
4690
4691 (define htslib-for-stringtie
4692 (package
4693 (inherit htslib-1.12)
4694 (source (origin
4695 (inherit (package-source htslib-1.12))
4696 (patches
4697 (search-patches "htslib-for-stringtie.patch"))))
4698 (arguments
4699 `(#:configure-flags '("--with-libdeflate")))
4700 (inputs
4701 (list bzip2 libdeflate openssl))))
4702
4703 (define-public idr
4704 (package
4705 (name "idr")
4706 (version "2.0.3")
4707 (source (origin
4708 (method git-fetch)
4709 (uri (git-reference
4710 (url "https://github.com/nboley/idr")
4711 (commit version)))
4712 (file-name (git-file-name name version))
4713 (sha256
4714 (base32
4715 "04j876h6z444v2q79drxx283d3k5snd72kj895wbalnl42206x9g"))
4716 ;; Delete generated C code.
4717 (snippet
4718 '(begin (delete-file "idr/inv_cdf.c") #t))))
4719 (build-system python-build-system)
4720 ;; There is only one test ("test_inv_cdf.py") and it tests features that
4721 ;; are no longer part of this package. It also asserts False, which
4722 ;; causes the tests to always fail.
4723 (arguments `(#:tests? #f))
4724 (propagated-inputs
4725 (list python-scipy python-sympy python-numpy python-matplotlib))
4726 (native-inputs
4727 (list python-cython))
4728 (home-page "https://github.com/nboley/idr")
4729 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
4730 (description
4731 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
4732 to measure the reproducibility of findings identified from replicate
4733 experiments and provide highly stable thresholds based on reproducibility.")
4734 (license license:gpl2+)))
4735
4736 (define-public jellyfish
4737 (package
4738 (name "jellyfish")
4739 (version "2.3.0")
4740 (source (origin
4741 (method url-fetch)
4742 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
4743 "releases/download/v" version
4744 "/jellyfish-" version ".tar.gz"))
4745 (sha256
4746 (base32
4747 "0npa62wzasdibas5zp3n8j3armsci4kyvh0jw7jr0am4gg7vg5g1"))))
4748 (build-system gnu-build-system)
4749 (outputs '("out" ;for library
4750 "python")) ;for Python bindings
4751 (arguments
4752 `(#:configure-flags
4753 ,#~(list "--without-sse" ; configure script probes for CPU features when SSE is enabled.
4754 (string-append "--enable-python-binding=" #$output:python))
4755 #:phases
4756 (modify-phases %standard-phases
4757 (add-before 'check 'set-SHELL-variable
4758 (lambda _
4759 ;; generator_manager.hpp either uses /bin/sh or $SHELL
4760 ;; to run tests.
4761 (setenv "SHELL" (which "bash")))))))
4762 (native-inputs
4763 `(("bc" ,bc)
4764 ("time" ,time)
4765 ("python" ,python-wrapper)
4766 ("pkg-config" ,pkg-config)))
4767 (inputs
4768 (list htslib))
4769 (synopsis "Tool for fast counting of k-mers in DNA")
4770 (description
4771 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
4772 DNA. A k-mer is a substring of length k, and counting the occurrences of all
4773 such substrings is a central step in many analyses of DNA sequence. Jellyfish
4774 is a command-line program that reads FASTA and multi-FASTA files containing
4775 DNA sequences. It outputs its k-mer counts in a binary format, which can be
4776 translated into a human-readable text format using the @code{jellyfish dump}
4777 command, or queried for specific k-mers with @code{jellyfish query}.")
4778 (home-page "http://www.genome.umd.edu/jellyfish.html")
4779 ;; JELLYFISH seems to be 64-bit only.
4780 (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
4781 ;; One of these licenses may be picked
4782 (license (list license:gpl3+ license:bsd-3))))
4783
4784 (define-public khmer
4785 (package
4786 (name "khmer")
4787 (version "3.0.0a3")
4788 (source
4789 (origin
4790 (method git-fetch)
4791 (uri (git-reference
4792 (url "https://github.com/dib-lab/khmer")
4793 (commit (string-append "v" version))))
4794 (file-name (git-file-name name version))
4795 (sha256
4796 (base32
4797 "01l4jczglkl7yfhgvzx8j0df7k54bk1r8sli9ll16i1mis0d8f37"))
4798 (modules '((guix build utils)))
4799 (snippet
4800 '(begin
4801 ;; Delete bundled libraries. We do not replace the bundled seqan
4802 ;; as it is a modified subset of the old version 1.4.1.
4803 ;;
4804 ;; We do not replace the bundled MurmurHash as the canonical
4805 ;; repository for this code 'SMHasher' is unsuitable for providing
4806 ;; a library. See
4807 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
4808 (delete-file-recursively "third-party/zlib")
4809 (delete-file-recursively "third-party/bzip2")
4810 (delete-file-recursively "third-party/seqan")
4811 (substitute* "setup.cfg"
4812 (("# libraries = z,bz2")
4813 "libraries = z,bz2")
4814 (("include:third-party/zlib:third-party/bzip2")
4815 "include:"))
4816 ;; Delete generated Cython CPP files.
4817 (for-each delete-file (find-files "khmer/_oxli/" "\\.cpp$"))))))
4818 (build-system python-build-system)
4819 (arguments
4820 `(#:phases
4821 (modify-phases %standard-phases
4822 (add-after 'unpack 'set-cc
4823 (lambda _ (setenv "CC" "gcc")))
4824 (add-after 'unpack 'python-3.8-compatibility
4825 (lambda _
4826 ;; Python 3.8 removed time.clock().
4827 (substitute* "sandbox/sweep-reads.py"
4828 (("time\\.clock")
4829 "time.process_time"))))
4830 (add-after 'unpack 'do-use-cython
4831 (lambda _
4832 (substitute* "setup.py"
4833 (("from setuptools import Extension as CyExtension")
4834 "from Cython.Distutils import Extension as CyExtension")
4835 (("from setuptools.command.build_ext import build_ext as _build_ext")
4836 "from Cython.Distutils import build_ext as _build_ext")
4837 (("HAS_CYTHON = False") "HAS_CYTHON = True")
4838 (("cy_ext = 'cpp'") "cy_ext = 'pyx'"))))
4839 (add-before 'build 'build-extensions
4840 (lambda _
4841 ;; Cython extensions have to be built before running the tests.
4842 (invoke "python" "setup.py" "build_ext" "--inplace")))
4843 (replace 'check
4844 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
4845 (when tests?
4846 (add-installed-pythonpath inputs outputs)
4847 (invoke "pytest" "-v")))))))
4848 (native-inputs
4849 (list python-cython python-pytest python-pytest-runner))
4850 (inputs
4851 (list zlib bzip2 seqan-1 python-screed python-bz2file))
4852 (home-page "https://khmer.readthedocs.org/")
4853 (synopsis "K-mer counting, filtering and graph traversal library")
4854 (description "The khmer software is a set of command-line tools for
4855 working with DNA shotgun sequencing data from genomes, transcriptomes,
4856 metagenomes and single cells. Khmer can make de novo assemblies faster, and
4857 sometimes better. Khmer can also identify and fix problems with shotgun
4858 data.")
4859 ;; When building on i686, armhf and mips64el, we get the following error:
4860 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
4861 (supported-systems '("x86_64-linux" "aarch64-linux"))
4862 (license license:bsd-3)))
4863
4864 (define-public kaiju
4865 (package
4866 (name "kaiju")
4867 (version "1.9.0")
4868 (source (origin
4869 (method git-fetch)
4870 (uri (git-reference
4871 (url "https://github.com/bioinformatics-centre/kaiju")
4872 (commit (string-append "v" version))))
4873 (file-name (git-file-name name version))
4874 (sha256
4875 (base32
4876 "1hfmadkfs6jjd7l3byly5xxb0ifm3dm1wis11sjbqfcv6l89snmg"))))
4877 (build-system gnu-build-system)
4878 (arguments
4879 (list
4880 #:tests? #f ; There are no tests.
4881 #:phases
4882 #~(modify-phases %standard-phases
4883 (delete 'configure)
4884 (add-before 'build 'move-to-src-dir
4885 (lambda _ (chdir "src")))
4886 (replace 'install
4887 (lambda _
4888 (let ((bin (string-append #$output "/bin")))
4889 (mkdir-p bin)
4890 (copy-recursively "../bin" bin)
4891 (let ((path (search-path-as-list '("bin")
4892 '#$(match (package-inputs this-package)
4893 (((_ pkg) ...) pkg)))))
4894 (for-each (lambda (script)
4895 (let ((exe (string-append bin "/" script)))
4896 (chmod exe #o555)
4897 (wrap-script exe
4898 #:guile #$(file-append guile-3.0 "/bin/guile")
4899 `("PATH" ":" prefix ,path))))
4900 (list "kaiju-convertMAR.py"
4901 "kaiju-gbk2faa.pl"
4902 "kaiju-makedb")))))))))
4903 (inputs
4904 (list bzip2
4905 coreutils
4906 curl
4907 gawk
4908 guile-3.0 ;for wrap-script
4909 gzip
4910 perl
4911 python-wrapper
4912 tar
4913 wget
4914 zlib))
4915 (home-page "http://kaiju.binf.ku.dk/")
4916 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
4917 (description "Kaiju is a program for sensitive taxonomic classification
4918 of high-throughput sequencing reads from metagenomic whole genome sequencing
4919 experiments.")
4920 (license license:gpl3+)))
4921
4922 (define-public macs
4923 (package
4924 (name "macs")
4925 (version "2.2.7.1")
4926 (source (origin
4927 ;; The PyPi tarball does not contain tests.
4928 (method git-fetch)
4929 (uri (git-reference
4930 (url "https://github.com/macs3-project/MACS")
4931 (commit (string-append "v" version))))
4932 (file-name (git-file-name name version))
4933 (sha256
4934 (base32
4935 "08zsgh65xbpv1md2s3wqmrk9g2mz6izmn59ryw5lbac54120p291"))
4936 (modules '((guix build utils)))
4937 ;; Remove files generated by Cython
4938 (snippet
4939 '(begin
4940 (for-each (lambda (file)
4941 (let ((generated-file
4942 (string-append (string-drop-right file 3) "c")))
4943 (when (file-exists? generated-file)
4944 (delete-file generated-file))))
4945 (find-files "." "\\.pyx$"))
4946 (delete-file "MACS2/IO/CallPeakUnitPrecompiled.c")))))
4947 (build-system python-build-system)
4948 (arguments
4949 `(#:phases
4950 (modify-phases %standard-phases
4951 (add-before 'build 'set-HOME
4952 (lambda _ (setenv "HOME" "/tmp")))
4953 (replace 'check
4954 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
4955 (when tests?
4956 (add-installed-pythonpath inputs outputs)
4957 (invoke "pytest" "-v")))))))
4958 (inputs
4959 (list python-numpy))
4960 (native-inputs
4961 (list python-cython python-pytest))
4962 (home-page "https://github.com/macs3-project/MACS")
4963 (synopsis "Model based analysis for ChIP-Seq data")
4964 (description
4965 "MACS is an implementation of a ChIP-Seq analysis algorithm for
4966 identifying transcript factor binding sites named Model-based Analysis of
4967 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
4968 the significance of enriched ChIP regions and it improves the spatial
4969 resolution of binding sites through combining the information of both
4970 sequencing tag position and orientation.")
4971 (license license:bsd-3)))
4972
4973 (define-public mafft
4974 (package
4975 (name "mafft")
4976 (version "7.475")
4977 (source (origin
4978 (method url-fetch)
4979 (uri (string-append
4980 "https://mafft.cbrc.jp/alignment/software/mafft-" version
4981 "-without-extensions-src.tgz"))
4982 (file-name (string-append name "-" version ".tgz"))
4983 (sha256
4984 (base32
4985 "0i2i2m3blh2xkbkdk48hxfssks30ny0v381gdl7zwhcvp0axs26r"))))
4986 (build-system gnu-build-system)
4987 (arguments
4988 `(#:tests? #f ; no automated tests, though there are tests in the read me
4989 #:make-flags ,#~(list (string-append "PREFIX=" #$output)
4990 (string-append "BINDIR="
4991 (string-append #$output "/bin")))
4992 #:phases
4993 (modify-phases %standard-phases
4994 (add-after 'unpack 'enter-dir
4995 (lambda _ (chdir "core") #t))
4996 (add-after 'enter-dir 'patch-makefile
4997 (lambda _
4998 ;; on advice from the MAFFT authors, there is no need to
4999 ;; distribute mafft-profile, mafft-distance, or
5000 ;; mafft-homologs.rb as they are too "specialised".
5001 (substitute* "Makefile"
5002 ;; remove mafft-homologs.rb from SCRIPTS
5003 (("^SCRIPTS = mafft mafft-homologs.rb")
5004 "SCRIPTS = mafft")
5005 ;; remove mafft-homologs from MANPAGES
5006 (("^MANPAGES = mafft.1 mafft-homologs.1")
5007 "MANPAGES = mafft.1")
5008 ;; remove mafft-distance from PROGS
5009 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
5010 "PROGS = dvtditr dndfast7 dndblast sextet5")
5011 ;; remove mafft-profile from PROGS
5012 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
5013 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
5014 (("^rm -f mafft-profile mafft-profile.exe") "#")
5015 (("^rm -f mafft-distance mafft-distance.exe") ")#")
5016 ;; do not install MAN pages in libexec folder
5017 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
5018 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
5019 #t))
5020 (add-after 'enter-dir 'patch-paths
5021 (lambda* (#:key inputs #:allow-other-keys)
5022 (substitute* '("pairash.c"
5023 "mafft.tmpl")
5024 (("perl") (which "perl"))
5025 (("([\"`| ])awk" _ prefix)
5026 (string-append prefix (which "awk")))
5027 (("grep") (which "grep")))
5028 #t))
5029 (delete 'configure)
5030 (add-after 'install 'wrap-programs
5031 (lambda* (#:key inputs outputs #:allow-other-keys)
5032 (let* ((out (assoc-ref outputs "out"))
5033 (bin (string-append out "/bin"))
5034 (path (string-append
5035 (assoc-ref inputs "coreutils") "/bin:")))
5036 (for-each (lambda (file)
5037 (wrap-program file
5038 `("PATH" ":" prefix (,path))))
5039 (find-files bin))))))))
5040 (inputs
5041 (list perl ruby gawk grep coreutils))
5042 (home-page "https://mafft.cbrc.jp/alignment/software/")
5043 (synopsis "Multiple sequence alignment program")
5044 (description
5045 "MAFFT offers a range of multiple alignment methods for nucleotide and
5046 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
5047 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
5048 sequences).")
5049 (license (license:non-copyleft
5050 "https://mafft.cbrc.jp/alignment/software/license.txt"
5051 "BSD-3 with different formatting"))))
5052
5053 (define-public mash
5054 (package
5055 (name "mash")
5056 (version "2.1")
5057 (source (origin
5058 (method git-fetch)
5059 (uri (git-reference
5060 (url "https://github.com/marbl/mash")
5061 (commit (string-append "v" version))))
5062 (file-name (git-file-name name version))
5063 (sha256
5064 (base32
5065 "049hwcc059p2fd9vwndn63laifvvsi0wmv84i6y1fr79k15dxwy6"))
5066 (modules '((guix build utils)))
5067 (snippet
5068 ;; Delete bundled kseq.
5069 ;; TODO: Also delete bundled murmurhash and open bloom filter.
5070 '(delete-file "src/mash/kseq.h"))))
5071 (build-system gnu-build-system)
5072 (arguments
5073 `(#:tests? #f ; No tests.
5074 #:configure-flags
5075 ,#~(list
5076 (string-append "--with-capnp=" #$(this-package-input "capnproto"))
5077 (string-append "--with-gsl=" #$(this-package-input "gsl")))
5078 #:make-flags (list "CC=gcc")
5079 #:phases
5080 (modify-phases %standard-phases
5081 (add-after 'unpack 'fix-includes
5082 (lambda _
5083 (substitute* '("src/mash/Sketch.cpp"
5084 "src/mash/CommandFind.cpp"
5085 "src/mash/CommandScreen.cpp")
5086 (("^#include \"kseq\\.h\"")
5087 "#include \"htslib/kseq.h\""))
5088 #t))
5089 (add-after 'fix-includes 'use-c++14
5090 (lambda _
5091 ;; capnproto 0.7 requires c++14 to build
5092 (substitute* "configure.ac"
5093 (("c\\+\\+11") "c++14"))
5094 (substitute* "Makefile.in"
5095 (("c\\+\\+11") "c++14"))
5096 #t)))))
5097 (native-inputs
5098 (list autoconf))
5099 (inputs
5100 (list ;; Capnproto and htslib are statically embedded in the final
5101 ;; application. Therefore we also list their licenses, below.
5102 capnproto
5103 htslib
5104 gsl
5105 zlib))
5106 (supported-systems '("x86_64-linux"))
5107 (home-page "https://mash.readthedocs.io")
5108 (synopsis "Fast genome and metagenome distance estimation using MinHash")
5109 (description "Mash is a fast sequence distance estimator that uses the
5110 MinHash algorithm and is designed to work with genomes and metagenomes in the
5111 form of assemblies or reads.")
5112 (license (list license:bsd-3 ; Mash
5113 license:expat ; HTSlib and capnproto
5114 license:public-domain ; MurmurHash 3
5115 license:cpl1.0)))) ; Open Bloom Filter
5116
5117 (define-public metabat
5118 (package
5119 (name "metabat")
5120 (version "2.15")
5121 (source
5122 (origin
5123 (method git-fetch)
5124 (uri (git-reference
5125 (url "https://bitbucket.org/berkeleylab/metabat.git")
5126 (commit (string-append "v" version))))
5127 (file-name (git-file-name name version))
5128 (sha256
5129 (base32
5130 "0v3gsps0ypani14102z2y1a2wignhpf7s1h45mxmj5f783rkhqd9"))))
5131 (build-system cmake-build-system)
5132 (arguments
5133 `(#:configure-flags
5134 ,#~(list (string-append "-Dzlib_LIB=" #$(this-package-input "zlib")
5135 "/lib/libz.so")
5136 (string-append "-Dhtslib_LIB=" #$(this-package-input "htslib")
5137 "/lib/libhts.so")
5138 (string-append "-DBOOST_ROOT=" #$(this-package-input "boost")))
5139 #:phases
5140 (modify-phases %standard-phases
5141 (add-after 'unpack 'configure-version-file
5142 (lambda _
5143 (copy-file "metabat_version.h.in" "metabat_version.h")
5144 (substitute* "metabat_version.h"
5145 (("@_time_stamp@") "19700101")
5146 (("@GIT_IS_DIRTY@") "0")
5147 (("@GIT_RETRIEVED_STATE@") "0")
5148 (("@GIT_HEAD_SHA1@") (string-append "v" ,version)))))
5149 (add-after 'unpack 'do-not-use-bundled-libraries
5150 (lambda _
5151 (substitute* "CMakeLists.txt"
5152 (("include\\(cmake.*") ""))
5153 (substitute* "src/CMakeLists.txt"
5154 (("set\\(Boost.*") "")
5155 (("add_dependencies.*") "")))))))
5156 (inputs
5157 (list zlib perl samtools htslib boost))
5158 (home-page "https://bitbucket.org/berkeleylab/metabat")
5159 (synopsis
5160 "Reconstruction of single genomes from complex microbial communities")
5161 (description
5162 "Grouping large genomic fragments assembled from shotgun metagenomic
5163 sequences to deconvolute complex microbial communities, or metagenome binning,
5164 enables the study of individual organisms and their interactions. MetaBAT is
5165 an automated metagenome binning software, which integrates empirical
5166 probabilistic distances of genome abundance and tetranucleotide frequency.")
5167 ;; The source code contains inline assembly.
5168 (supported-systems '("x86_64-linux" "i686-linux"))
5169 (license (license:non-copyleft "file://license.txt"
5170 "See license.txt in the distribution."))))
5171
5172 (define-public minced
5173 (package
5174 (name "minced")
5175 (version "0.3.2")
5176 (source (origin
5177 (method git-fetch)
5178 (uri (git-reference
5179 (url "https://github.com/ctSkennerton/minced")
5180 (commit version)))
5181 (file-name (git-file-name name version))
5182 (sha256
5183 (base32
5184 "1f5h9him0gd355cnx7p6pnxpknhckd4g0v62mg8zyhfbx9as25fv"))))
5185 (build-system gnu-build-system)
5186 (arguments
5187 `(#:test-target "test"
5188 #:phases
5189 (modify-phases %standard-phases
5190 (delete 'configure)
5191 (add-before 'check 'fix-test
5192 (lambda _
5193 ;; Fix test for latest version.
5194 (substitute* "t/Aquifex_aeolicus_VF5.expected"
5195 (("minced:0.1.6") "minced:0.2.0"))
5196 #t))
5197 (replace 'install ; No install target.
5198 (lambda* (#:key inputs outputs #:allow-other-keys)
5199 (let* ((out (assoc-ref outputs "out"))
5200 (bin (string-append out "/bin"))
5201 (wrapper (string-append bin "/minced")))
5202 ;; Minced comes with a wrapper script that tries to figure out where
5203 ;; it is located before running the JAR. Since these paths are known
5204 ;; to us, we build our own wrapper to avoid coreutils dependency.
5205 (install-file "minced.jar" bin)
5206 (with-output-to-file wrapper
5207 (lambda _
5208 (display
5209 (string-append
5210 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
5211 (assoc-ref inputs "jre") "/bin/java -jar "
5212 bin "/minced.jar \"$@\"\n"))))
5213 (chmod wrapper #o555))
5214 #t)))))
5215 (native-inputs
5216 `(("jdk" ,icedtea "jdk")))
5217 (inputs
5218 `(("bash" ,bash)
5219 ("jre" ,icedtea "out")))
5220 (home-page "https://github.com/ctSkennerton/minced")
5221 (synopsis "Mining CRISPRs in Environmental Datasets")
5222 (description
5223 "MinCED is a program to find Clustered Regularly Interspaced Short
5224 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
5225 unassembled metagenomic reads, but is mainly designed for full genomes and
5226 assembled metagenomic sequence.")
5227 (license license:gpl3+)))
5228
5229 (define-public muscle
5230 (package
5231 (name "muscle")
5232 (version "3.8.1551")
5233 (source (origin
5234 (method url-fetch/tarbomb)
5235 (uri (string-append
5236 "http://www.drive5.com/muscle/muscle_src_"
5237 version ".tar.gz"))
5238 (sha256
5239 (base32
5240 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
5241 (build-system gnu-build-system)
5242 (arguments
5243 `(#:make-flags (list "LDLIBS = -lm")
5244 #:phases
5245 (modify-phases %standard-phases
5246 (delete 'configure)
5247 (replace 'check
5248 ;; There are no tests, so just test if it runs.
5249 (lambda _ (invoke "./muscle" "-version") #t))
5250 (replace 'install
5251 (lambda* (#:key outputs #:allow-other-keys)
5252 (let* ((out (assoc-ref outputs "out"))
5253 (bin (string-append out "/bin")))
5254 (install-file "muscle" bin)
5255 #t))))))
5256 (home-page "http://www.drive5.com/muscle")
5257 (synopsis "Multiple sequence alignment program")
5258 (description
5259 "MUSCLE aims to be a fast and accurate multiple sequence alignment
5260 program for nucleotide and protein sequences.")
5261 ;; License information found in 'muscle -h' and usage.cpp.
5262 (license license:public-domain)))
5263
5264 (define-public newick-utils
5265 ;; There are no recent releases so we package from git.
5266 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
5267 (package
5268 (name "newick-utils")
5269 (version (string-append "1.6-1." (string-take commit 8)))
5270 (source (origin
5271 (method git-fetch)
5272 (uri (git-reference
5273 (url "https://github.com/tjunier/newick_utils")
5274 (commit commit)))
5275 (file-name (string-append name "-" version "-checkout"))
5276 (sha256
5277 (base32
5278 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
5279 (build-system gnu-build-system)
5280 (arguments
5281 '(#:make-flags (list "CFLAGS=-O2 -g -fcommon")))
5282 (inputs
5283 ;; XXX: TODO: Enable Lua and Guile bindings.
5284 ;; https://github.com/tjunier/newick_utils/issues/13
5285 (list libxml2 flex bison))
5286 (native-inputs
5287 (list autoconf automake libtool))
5288 (synopsis "Programs for working with newick format phylogenetic trees")
5289 (description
5290 "Newick-utils is a suite of utilities for processing phylogenetic trees
5291 in Newick format. Functions include re-rooting, extracting subtrees,
5292 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
5293 (home-page "https://github.com/tjunier/newick_utils")
5294 (license license:bsd-3))))
5295
5296 (define-public orfm
5297 (package
5298 (name "orfm")
5299 (version "0.7.1")
5300 (source (origin
5301 (method url-fetch)
5302 (uri (string-append
5303 "https://github.com/wwood/OrfM/releases/download/v"
5304 version "/orfm-" version ".tar.gz"))
5305 (sha256
5306 (base32
5307 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
5308 (build-system gnu-build-system)
5309 (inputs (list zlib))
5310 (native-inputs
5311 (list ruby-bio-commandeer ruby-rspec ruby))
5312 (synopsis "Simple and not slow open reading frame (ORF) caller")
5313 (description
5314 "An ORF caller finds stretches of DNA that, when translated, are not
5315 interrupted by stop codons. OrfM finds and prints these ORFs.")
5316 (home-page "https://github.com/wwood/OrfM")
5317 (license license:lgpl3+)))
5318
5319 (define-public prank
5320 (package
5321 (name "prank")
5322 (version "170427")
5323 (source (origin
5324 (method url-fetch)
5325 (uri (string-append
5326 "http://wasabiapp.org/download/prank/prank.source."
5327 version ".tgz"))
5328 (sha256
5329 (base32
5330 "0nc8g9c5rkdxcir46s0in9ci1sxwzbjibxrvkksf22ybnplvagk2"))))
5331 (build-system gnu-build-system)
5332 (arguments
5333 `(#:phases
5334 (modify-phases %standard-phases
5335 (add-after 'unpack 'enter-src-dir
5336 (lambda _ (chdir "src")))
5337 (add-after 'unpack 'remove-m64-flag
5338 ;; Prank will build with the correct 'bit-ness' without this flag
5339 ;; and this allows building on 32-bit machines.
5340 (lambda _
5341 (substitute* "src/Makefile"
5342 (("-m64") ""))))
5343 (delete 'configure)
5344 (replace 'install
5345 (lambda* (#:key inputs outputs #:allow-other-keys)
5346 (let* ((out (assoc-ref outputs "out"))
5347 (bin (string-append out "/bin"))
5348 (man (string-append out "/share/man/man1"))
5349 (path (string-append
5350 (assoc-ref inputs "mafft") "/bin:"
5351 (assoc-ref inputs "exonerate") "/bin:"
5352 (assoc-ref inputs "bppsuite") "/bin")))
5353 (install-file "prank" bin)
5354 (wrap-program (string-append bin "/prank")
5355 `("PATH" ":" prefix (,path)))
5356 (install-file "prank.1" man)))))))
5357 (inputs
5358 (list mafft exonerate bppsuite))
5359 (home-page "http://wasabiapp.org/software/prank/")
5360 (synopsis "Probabilistic multiple sequence alignment program")
5361 (description
5362 "PRANK is a probabilistic multiple sequence alignment program for DNA,
5363 codon and amino-acid sequences. It is based on a novel algorithm that treats
5364 insertions correctly and avoids over-estimation of the number of deletion
5365 events. In addition, PRANK borrows ideas from maximum likelihood methods used
5366 in phylogenetics and correctly takes into account the evolutionary distances
5367 between sequences. Lastly, PRANK allows for defining a potential structure
5368 for sequences to be aligned and then, simultaneously with the alignment,
5369 predicts the locations of structural units in the sequences.")
5370 (license license:gpl2+)))
5371
5372 (define-public proteinortho
5373 (package
5374 (name "proteinortho")
5375 (version "6.0.14")
5376 (source (origin
5377 (method git-fetch)
5378 (uri (git-reference
5379 (url "https://gitlab.com/paulklemm_PHD/proteinortho.git")
5380 (commit (string-append "v" version))))
5381 (file-name (git-file-name name version))
5382 (sha256
5383 (base32
5384 "0pmy617zy2z2w6hjqxjhf3rzikf5n3mpia80ysq8233vfr7wrzff"))
5385 (modules '((guix build utils)))
5386 (snippet
5387 '(begin
5388 ;; remove pre-built scripts
5389 (delete-file-recursively "src/BUILD/")
5390 #t))))
5391 (build-system gnu-build-system)
5392 (arguments
5393 `(#:test-target "test"
5394 #:make-flags '("CC=gcc")
5395 #:phases
5396 (modify-phases %standard-phases
5397 (replace 'configure
5398 ;; There is no configure script, so we modify the Makefile directly.
5399 (lambda* (#:key outputs #:allow-other-keys)
5400 (substitute* "Makefile"
5401 (("INSTALLDIR=.*")
5402 (string-append
5403 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
5404 #t))
5405 (add-before 'install 'make-install-directory
5406 ;; The install directory is not created during 'make install'.
5407 (lambda* (#:key outputs #:allow-other-keys)
5408 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
5409 #t))
5410 (add-after 'install 'wrap-programs
5411 (lambda* (#:key inputs outputs #:allow-other-keys)
5412 (let ((path (getenv "PATH"))
5413 (out (assoc-ref outputs "out"))
5414 (guile (search-input-file inputs "bin/guile")))
5415 (for-each (lambda (script)
5416 (wrap-script script #:guile guile
5417 `("PATH" ":" prefix (,path))))
5418 (cons (string-append out "/bin/proteinortho")
5419 (find-files out "\\.(pl|py)$"))))
5420 #t)))))
5421 (inputs
5422 `(("guile" ,guile-3.0) ; for wrap-script
5423 ("diamond" ,diamond)
5424 ("perl" ,perl)
5425 ("python" ,python-wrapper)
5426 ("blast+" ,blast+)
5427 ("lapack" ,lapack)
5428 ("openblas" ,openblas)))
5429 (native-inputs
5430 (list which))
5431 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
5432 (synopsis "Detect orthologous genes across species")
5433 (description
5434 "Proteinortho is a tool to detect orthologous genes across different
5435 species. For doing so, it compares similarities of given gene sequences and
5436 clusters them to find significant groups. The algorithm was designed to handle
5437 large-scale data and can be applied to hundreds of species at once.")
5438 (license license:gpl3+)))
5439
5440 (define-public prodigal
5441 (package
5442 (name "prodigal")
5443 ;; Check for a new home page when updating this package:
5444 ;; https://github.com/hyattpd/Prodigal/issues/36#issuecomment-536617588
5445 (version "2.6.3")
5446 (source (origin
5447 (method git-fetch)
5448 (uri (git-reference
5449 (url "https://github.com/hyattpd/Prodigal")
5450 (commit (string-append "v" version))))
5451 (file-name (git-file-name name version))
5452 (sha256
5453 (base32
5454 "1fs1hqk83qjbjhrvhw6ni75zakx5ki1ayy3v6wwkn3xvahc9hi5s"))))
5455 (build-system gnu-build-system)
5456 (arguments
5457 `(#:tests? #f ; no check target
5458 #:make-flags
5459 ,#~(list (string-append "INSTALLDIR=" #$output "/bin"))
5460 #:phases
5461 (modify-phases %standard-phases
5462 (delete 'configure))))
5463 (home-page "https://github.com/hyattpd/Prodigal")
5464 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
5465 (description
5466 "Prodigal runs smoothly on finished genomes, draft genomes, and
5467 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
5468 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
5469 partial genes, and identifies translation initiation sites.")
5470 (license license:gpl3+)))
5471
5472 (define-public roary
5473 (package
5474 (name "roary")
5475 (version "3.12.0")
5476 (source
5477 (origin
5478 (method url-fetch)
5479 (uri (string-append
5480 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
5481 version ".tar.gz"))
5482 (sha256
5483 (base32
5484 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
5485 (build-system perl-build-system)
5486 (arguments
5487 `(#:phases
5488 (modify-phases %standard-phases
5489 (delete 'configure)
5490 (delete 'build)
5491 (replace 'check
5492 (lambda _
5493 ;; The tests are not run by default, so we run each test file
5494 ;; directly.
5495 (setenv "PATH" (string-append (getcwd) "/bin" ":"
5496 (getenv "PATH")))
5497 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
5498 (getenv "PERL5LIB")))
5499 (for-each (lambda (file)
5500 (display file)(display "\n")
5501 (invoke "perl" file))
5502 (find-files "t" ".*\\.t$"))
5503 #t))
5504 (replace 'install
5505 ;; There is no 'install' target in the Makefile.
5506 (lambda* (#:key outputs #:allow-other-keys)
5507 (let* ((out (assoc-ref outputs "out"))
5508 (bin (string-append out "/bin"))
5509 (perl (string-append out "/lib/perl5/site_perl"))
5510 (roary-plots "contrib/roary_plots"))
5511 (mkdir-p bin)
5512 (mkdir-p perl)
5513 (copy-recursively "bin" bin)
5514 (copy-recursively "lib" perl)
5515 #t)))
5516 (add-after 'install 'wrap-programs
5517 (lambda* (#:key inputs outputs #:allow-other-keys)
5518 (let* ((out (assoc-ref outputs "out"))
5519 (perl5lib (getenv "PERL5LIB"))
5520 (path (getenv "PATH")))
5521 (for-each (lambda (prog)
5522 (let ((binary (string-append out "/" prog)))
5523 (wrap-program binary
5524 `("PERL5LIB" ":" prefix
5525 (,(string-append perl5lib ":" out
5526 "/lib/perl5/site_perl"))))
5527 (wrap-program binary
5528 `("PATH" ":" prefix
5529 (,(string-append path ":" out "/bin"))))))
5530 (find-files "bin" ".*[^R]$"))
5531 (let ((file
5532 (string-append out "/bin/roary-create_pan_genome_plots.R"))
5533 (r-site-lib (getenv "R_LIBS_SITE"))
5534 (coreutils-path
5535 (dirname (search-input-file inputs "bin/chmod"))))
5536 (wrap-program file
5537 `("R_LIBS_SITE" ":" prefix
5538 (,(string-append r-site-lib ":" out "/site-library/"))))
5539 (wrap-program file
5540 `("PATH" ":" prefix
5541 (,(string-append coreutils-path ":" out "/bin"))))))
5542 #t)))))
5543 (native-inputs
5544 (list perl-env-path perl-test-files perl-test-most perl-test-output))
5545 (inputs
5546 `(("perl-array-utils" ,perl-array-utils)
5547 ("bioperl" ,bioperl-minimal)
5548 ("perl-digest-md5-file" ,perl-digest-md5-file)
5549 ("perl-exception-class" ,perl-exception-class)
5550 ("perl-file-find-rule" ,perl-file-find-rule)
5551 ("perl-file-grep" ,perl-file-grep)
5552 ("perl-file-slurper" ,perl-file-slurper)
5553 ("perl-file-which" ,perl-file-which)
5554 ("perl-graph" ,perl-graph)
5555 ("perl-graph-readwrite" ,perl-graph-readwrite)
5556 ("perl-log-log4perl" ,perl-log-log4perl)
5557 ("perl-moose" ,perl-moose)
5558 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
5559 ("perl-text-csv" ,perl-text-csv)
5560 ("bedtools" ,bedtools)
5561 ("cd-hit" ,cd-hit)
5562 ("blast+" ,blast+)
5563 ("mcl" ,mcl)
5564 ("parallel" ,parallel)
5565 ("prank" ,prank)
5566 ("mafft" ,mafft)
5567 ("fasttree" ,fasttree)
5568 ("grep" ,grep)
5569 ("sed" ,sed)
5570 ("gawk" ,gawk)
5571 ("r-minimal" ,r-minimal)
5572 ("r-ggplot2" ,r-ggplot2)
5573 ("coreutils" ,coreutils)))
5574 (home-page "https://sanger-pathogens.github.io/Roary/")
5575 (synopsis "High speed stand-alone pan genome pipeline")
5576 (description
5577 "Roary is a high speed stand alone pan genome pipeline, which takes
5578 annotated assemblies in GFF3 format (produced by the Prokka program) and
5579 calculates the pan genome. Using a standard desktop PC, it can analyse
5580 datasets with thousands of samples, without compromising the quality of the
5581 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
5582 single processor. Roary is not intended for metagenomics or for comparing
5583 extremely diverse sets of genomes.")
5584 (license license:gpl3)))
5585
5586 (define-public raxml
5587 (package
5588 (name "raxml")
5589 (version "8.2.12")
5590 (source
5591 (origin
5592 (method git-fetch)
5593 (uri (git-reference
5594 (url "https://github.com/stamatak/standard-RAxML")
5595 (commit (string-append "v" version))))
5596 (file-name (git-file-name name version))
5597 (sha256
5598 (base32
5599 "1jqjzhch0rips0vp04prvb8vmc20c5pdmsqn8knadcf91yy859fh"))))
5600 (build-system gnu-build-system)
5601 (arguments
5602 `(#:tests? #f ; There are no tests.
5603 ;; Use 'standard' Makefile rather than SSE or AVX ones.
5604 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
5605 #:phases
5606 (modify-phases %standard-phases
5607 (delete 'configure)
5608 (replace 'install
5609 (lambda* (#:key outputs #:allow-other-keys)
5610 (let* ((out (assoc-ref outputs "out"))
5611 (bin (string-append out "/bin"))
5612 (executable "raxmlHPC-HYBRID"))
5613 (install-file executable bin)
5614 (symlink (string-append bin "/" executable) "raxml"))
5615 #t)))))
5616 (inputs
5617 (list openmpi))
5618 (home-page "https://cme.h-its.org/exelixis/web/software/raxml/index.html")
5619 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
5620 (description
5621 "RAxML is a tool for phylogenetic analysis and post-analysis of large
5622 phylogenies.")
5623 ;; The source includes x86 specific code
5624 (supported-systems '("x86_64-linux" "i686-linux"))
5625 (license license:gpl2+)))
5626
5627 (define-public rsem
5628 (package
5629 (name "rsem")
5630 (version "1.3.3")
5631 (source
5632 (origin
5633 (method git-fetch)
5634 (uri (git-reference
5635 (url "https://github.com/deweylab/RSEM")
5636 (commit (string-append "v" version))))
5637 (sha256
5638 (base32 "1yl4i7z20n2p84j1lmk15aiak3yqc6fiw0q5a4pndw7pxfiq3rzp"))
5639 (file-name (git-file-name name version))
5640 (modules '((guix build utils)))
5641 (snippet
5642 '(begin
5643 ;; remove bundled copy of boost and samtools
5644 (delete-file-recursively "boost")
5645 (delete-file-recursively "samtools-1.3")))))
5646 (build-system gnu-build-system)
5647 (arguments
5648 `(#:tests? #f ;no "check" target
5649 #:make-flags
5650 ,#~(list (string-append "BOOST="
5651 #$(this-package-input "boost")
5652 "/include/")
5653 (string-append "SAMHEADERS="
5654 #$(this-package-input "htslib")
5655 "/include/htslib/sam.h")
5656 (string-append "SAMLIBS="
5657 #$(this-package-input "htslib")
5658 "/lib/libhts.so"))
5659 #:phases
5660 (modify-phases %standard-phases
5661 ;; No "configure" script.
5662 (replace 'configure
5663 (lambda _
5664 (substitute* "Makefile"
5665 (("^all : \\$\\(PROGRAMS\\).*") "all: $(PROGRAMS)\n")
5666 ;; Do not build bundled samtools library.
5667 (("^\\$\\(SAMLIBS\\).*") "")
5668 ;; Needed for Boost
5669 (("gnu\\+\\+98") "gnu++11"))
5670 ;; C++11 compatibility
5671 (substitute* "buildReadIndex.cpp"
5672 (("success = \\(getline")
5673 "success = (bool)(getline"))
5674 (substitute* '("PairedEndHit.h"
5675 "SingleHit.h")
5676 (("return \\(in>>sid>>pos")
5677 "return (bool)(in>>sid>>pos"))))
5678 (replace 'install
5679 (lambda* (#:key outputs #:allow-other-keys)
5680 (let* ((out (string-append (assoc-ref outputs "out")))
5681 (bin (string-append out "/bin/"))
5682 (perl (string-append out "/lib/perl5/site_perl")))
5683 (mkdir-p bin)
5684 (mkdir-p perl)
5685 (for-each (lambda (file)
5686 (install-file file bin))
5687 (find-files "." "rsem-.*"))
5688 (install-file "rsem_perl_utils.pm" perl))))
5689 (add-after 'install 'wrap-program
5690 (lambda* (#:key outputs #:allow-other-keys)
5691 (let ((out (assoc-ref outputs "out")))
5692 (for-each (lambda (prog)
5693 (wrap-program (string-append out "/bin/" prog)
5694 `("PERL5LIB" ":" prefix
5695 (,(string-append out "/lib/perl5/site_perl")))))
5696 '("rsem-calculate-expression"
5697 "rsem-control-fdr"
5698 "rsem-generate-data-matrix"
5699 "rsem-generate-ngvector"
5700 "rsem-plot-transcript-wiggles"
5701 "rsem-prepare-reference"
5702 "rsem-run-ebseq"
5703 "rsem-run-prsem-testing-procedure"))))))))
5704 (inputs
5705 (list boost r-minimal perl htslib-1.3 zlib))
5706 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
5707 (synopsis "Estimate gene expression levels from RNA-Seq data")
5708 (description
5709 "RSEM is a software package for estimating gene and isoform expression
5710 levels from RNA-Seq data. The RSEM package provides a user-friendly
5711 interface, supports threads for parallel computation of the EM algorithm,
5712 single-end and paired-end read data, quality scores, variable-length reads and
5713 RSPD estimation. In addition, it provides posterior mean and 95% credibility
5714 interval estimates for expression levels. For visualization, it can generate
5715 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
5716 (license license:gpl3+)))
5717
5718 (define-public rseqc
5719 (package
5720 (name "rseqc")
5721 (version "3.0.1")
5722 (source
5723 (origin
5724 (method url-fetch)
5725 (uri
5726 (string-append "mirror://sourceforge/rseqc/"
5727 "RSeQC-" version ".tar.gz"))
5728 (sha256
5729 (base32
5730 "0gbb9iyb7swiv5455fm5rg98r7l6qn27v564yllqjd574hncpx6m"))))
5731 (build-system python-build-system)
5732 (inputs
5733 (list python-cython
5734 python-bx-python
5735 python-pybigwig
5736 python-pysam
5737 python-numpy
5738 zlib))
5739 (native-inputs
5740 (list python-nose))
5741 (home-page "http://rseqc.sourceforge.net/")
5742 (synopsis "RNA-seq quality control package")
5743 (description
5744 "RSeQC provides a number of modules that can comprehensively evaluate
5745 high throughput sequence data, especially RNA-seq data. Some basic modules
5746 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
5747 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
5748 distribution, coverage uniformity, strand specificity, etc.")
5749 (license license:gpl3+)))
5750
5751 (define-public seek
5752 ;; There are no release tarballs. And the installation instructions at
5753 ;; http://seek.princeton.edu/installation.jsp only mention a mercurial
5754 ;; changeset ID. This is a git repository, though. So we just take the
5755 ;; most recent commit.
5756 (let ((commit "196ed4c7633246e9c628e4330d77577ccfd7f1e5")
5757 (revision "1"))
5758 (package
5759 (name "seek")
5760 (version (git-version "1" revision commit))
5761 (source (origin
5762 (method git-fetch)
5763 (uri (git-reference
5764 (url "https://github.com/FunctionLab/sleipnir.git")
5765 (commit commit)
5766 (recursive? #true)))
5767 (file-name (git-file-name name version))
5768 (sha256
5769 (base32
5770 "0c658n8nz563a96dsi4gl2685vxph0yfmmqq5yjc6i4xin1jy1ab"))))
5771 (build-system cmake-build-system)
5772 (arguments
5773 `(#:configure-flags
5774 ,#~(list (string-append "-DSVM_LIBRARY="
5775 #$(this-package-input "libsvm")
5776 "/lib/libsvm.so.2")
5777 (string-append "-DSVM_INCLUDE="
5778 #$(this-package-input "libsvm")
5779 "/include"))
5780 #:tests? #false ; tests only fail in the build container
5781 #:phases
5782 (modify-phases %standard-phases
5783 ;; The check phase expects to find the unit_tests executable in the
5784 ;; "build/bin" directory, but it is actually in "build/tests".
5785 (replace 'check
5786 (lambda* (#:key tests? #:allow-other-keys)
5787 (when tests?
5788 (invoke "tests/unit_tests")))))))
5789 (inputs
5790 `(("apache-thrift:include" ,apache-thrift "include")
5791 ("apache-thrift:lib" ,apache-thrift "lib")
5792 ("gsl" ,gsl)
5793 ("boost" ,boost)
5794 ("gengetopt" ,gengetopt)
5795 ("libsvm" ,libsvm)
5796 ("log4cpp" ,log4cpp)
5797 ("python" ,python)
5798 ("readline" ,readline)))
5799 (native-inputs
5800 (list pkg-config))
5801 (home-page "http://seek.princeton.edu")
5802 (synopsis "Gene co-expression search engine")
5803 (description
5804 "SEEK is a computational gene co-expression search engine. SEEK provides
5805 biologists with a way to navigate the massive human expression compendium that
5806 now contains thousands of expression datasets. SEEK returns a robust ranking
5807 of co-expressed genes in the biological area of interest defined by the user's
5808 query genes. It also prioritizes thousands of expression datasets according
5809 to the user's query of interest.")
5810 (license license:cc-by3.0))))
5811
5812 (define-public samtools
5813 (package
5814 (name "samtools")
5815 (version "1.14")
5816 (source
5817 (origin
5818 (method url-fetch)
5819 (uri
5820 (string-append "mirror://sourceforge/samtools/samtools/"
5821 version "/samtools-" version ".tar.bz2"))
5822 (sha256
5823 (base32
5824 "0x3xdda78ac5vx66b3jdsv9sfhyz4npl4znl1zbaf3lbm6xdlhck"))
5825 (modules '((guix build utils)))
5826 (snippet '(begin
5827 ;; Delete bundled htslib.
5828 (delete-file-recursively "htslib-1.14")))))
5829 (build-system gnu-build-system)
5830 (arguments
5831 `(#:configure-flags (list "--with-ncurses")
5832 #:phases
5833 (modify-phases %standard-phases
5834 (add-after 'unpack 'patch-tests
5835 (lambda _
5836 (substitute* "test/test.pl"
5837 ;; The test script calls out to /bin/bash
5838 (("/bin/bash") (which "bash"))))))))
5839 (native-inputs (list pkg-config))
5840 (inputs
5841 (list htslib ncurses perl python zlib))
5842 (home-page "http://samtools.sourceforge.net")
5843 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
5844 (description
5845 "Samtools implements various utilities for post-processing nucleotide
5846 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
5847 variant calling (in conjunction with bcftools), and a simple alignment
5848 viewer.")
5849 (license license:expat)))
5850
5851 (define-public samtools-1.12
5852 (package/inherit samtools
5853 (version "1.12")
5854 (source
5855 (origin
5856 (method url-fetch)
5857 (uri
5858 (string-append "mirror://sourceforge/samtools/samtools/"
5859 version "/samtools-" version ".tar.bz2"))
5860 (sha256
5861 (base32
5862 "1jrdj2idpma5ja9cg0rr73b565vdbr9wyy6zig54bidicc2pg8vd"))
5863 (modules '((guix build utils)))
5864 (snippet '(begin
5865 ;; Delete bundled htslib.
5866 (delete-file-recursively "htslib-1.12")))))
5867 (arguments
5868 (substitute-keyword-arguments (package-arguments samtools)
5869 ((#:modules _ #f)
5870 '((ice-9 ftw)
5871 (ice-9 regex)
5872 (guix build gnu-build-system)
5873 (guix build utils)))
5874 ((#:phases phases)
5875 `(modify-phases ,phases
5876 (add-after 'install 'install-library
5877 (lambda* (#:key outputs #:allow-other-keys)
5878 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
5879 (install-file "libbam.a" lib))))
5880 (add-after 'install 'install-headers
5881 (lambda* (#:key outputs #:allow-other-keys)
5882 (let ((include (string-append (assoc-ref outputs "out")
5883 "/include/samtools/")))
5884 (for-each (lambda (file)
5885 (install-file file include))
5886 (scandir "." (lambda (name)
5887 (string-match "\\.h$" name)))))))))))
5888 (native-inputs (list pkg-config))
5889 (inputs
5890 (list htslib-1.12 ncurses perl python zlib))))
5891
5892 (define-public samtools-1.10
5893 (package (inherit samtools)
5894 (name "samtools")
5895 (version "1.10")
5896 (source
5897 (origin
5898 (method url-fetch)
5899 (uri
5900 (string-append "mirror://sourceforge/samtools/samtools/"
5901 version "/samtools-" version ".tar.bz2"))
5902 (sha256
5903 (base32
5904 "119ms0dpydw8dkh3zc4yyw9zhdzgv12px4l2kayigv31bpqcb7kv"))
5905 (modules '((guix build utils)))
5906 (snippet '(begin
5907 ;; Delete bundled htslib.
5908 (delete-file-recursively "htslib-1.10")
5909 #t))))
5910 (inputs
5911 (list htslib-1.10 ncurses perl python zlib))))
5912
5913 (define-public samtools-1.2
5914 (package (inherit samtools)
5915 (name "samtools")
5916 (version "1.2")
5917 (source
5918 (origin
5919 (method url-fetch)
5920 (uri
5921 (string-append "mirror://sourceforge/samtools/samtools/"
5922 version "/samtools-" version ".tar.bz2"))
5923 (sha256
5924 (base32
5925 "1akdqb685pk9xk1nb6sa9aq8xssjjhvvc06kp4cpdqvz2157l3j2"))
5926 (modules '((guix build utils)))
5927 (snippet
5928 ;; Delete bundled htslib and Windows binaries
5929 '(for-each delete-file-recursively (list "win32" "htslib-1.2.1")))))
5930 (arguments
5931 `(#:make-flags
5932 ,#~(list (string-append "prefix=" #$output)
5933 (string-append "BGZIP="
5934 #$(this-package-input "htslib")
5935 "/bin/bgzip")
5936 (string-append "HTSLIB="
5937 #$(this-package-input "htslib")
5938 "/lib/libhts.so")
5939 (string-append "HTSDIR="
5940 #$(this-package-input "htslib")
5941 "/include"))
5942 #:phases
5943 (modify-phases %standard-phases
5944 (add-after 'unpack 'patch-makefile-curses
5945 (lambda _
5946 (substitute* "Makefile"
5947 (("-lcurses") "-lncurses")
5948 (("include \\$\\(HTSDIR.*") ""))))
5949 (add-after 'unpack 'patch-tests
5950 (lambda _
5951 (substitute* "test/test.pl"
5952 ;; The test script calls out to /bin/bash
5953 (("/bin/bash") (which "bash"))
5954 ;; There are two failing tests upstream relating to the "stats"
5955 ;; subcommand in test_usage_subcommand ("did not have Usage"
5956 ;; and "usage did not mention samtools stats"), so we disable
5957 ;; them.
5958 (("(test_usage_subcommand\\(.*\\);)" cmd)
5959 (string-append "unless ($subcommand eq 'stats') {" cmd "};")))
5960 ;; This test fails because the grep output doesn't look as
5961 ;; expected; it is correct, though.
5962 (substitute* "test/mpileup/mpileup.reg"
5963 (("P 52.out.*") ""))))
5964 (delete 'configure))))
5965 (native-inputs
5966 (list grep gawk pkg-config))
5967 (inputs
5968 (list htslib-for-samtools-1.2 ncurses perl python zlib))))
5969
5970 (define-public samtools-0.1
5971 ;; This is the most recent version of the 0.1 line of samtools. The input
5972 ;; and output formats differ greatly from that used and produced by samtools
5973 ;; 1.x and is still used in many bioinformatics pipelines.
5974 (package (inherit samtools)
5975 (version "0.1.19")
5976 (source
5977 (origin
5978 (method url-fetch)
5979 (uri
5980 (string-append "mirror://sourceforge/samtools/samtools/"
5981 version "/samtools-" version ".tar.bz2"))
5982 (sha256
5983 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
5984 (arguments
5985 `(#:tests? #f ;no "check" target
5986 #:make-flags
5987 (list "LIBCURSES=-lncurses")
5988 ,@(substitute-keyword-arguments (package-arguments samtools)
5989 ((#:modules _ #f)
5990 '((ice-9 ftw)
5991 (ice-9 regex)
5992 (guix build gnu-build-system)
5993 (guix build utils)))
5994 ((#:phases phases)
5995 `(modify-phases ,phases
5996 (replace 'install
5997 (lambda* (#:key outputs #:allow-other-keys)
5998 (let ((bin (string-append
5999 (assoc-ref outputs "out") "/bin")))
6000 (mkdir-p bin)
6001 (install-file "samtools" bin)
6002 #t)))
6003 (add-after 'install 'install-library
6004 (lambda* (#:key outputs #:allow-other-keys)
6005 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
6006 (install-file "libbam.a" lib))))
6007 (add-after 'install 'install-headers
6008 (lambda* (#:key outputs #:allow-other-keys)
6009 (let ((include (string-append (assoc-ref outputs "out")
6010 "/include/samtools/")))
6011 (for-each (lambda (file)
6012 (install-file file include))
6013 (scandir "." (lambda (name)
6014 (string-match "\\.h$" name)))))))
6015 (delete 'patch-tests)
6016 (delete 'configure))))))))
6017
6018 (define-public mosaik
6019 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
6020 (package
6021 (name "mosaik")
6022 (version "2.2.30")
6023 (source (origin
6024 ;; There are no release tarballs nor tags.
6025 (method git-fetch)
6026 (uri (git-reference
6027 (url "https://github.com/wanpinglee/MOSAIK")
6028 (commit commit)))
6029 (file-name (string-append name "-" version))
6030 (sha256
6031 (base32
6032 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
6033 (build-system gnu-build-system)
6034 (arguments
6035 `(#:tests? #f ; no tests
6036 #:make-flags (list "CC=gcc")
6037 #:phases
6038 (modify-phases %standard-phases
6039 (replace 'configure
6040 (lambda _ (chdir "src") #t))
6041 (replace 'install
6042 (lambda* (#:key outputs #:allow-other-keys)
6043 (let ((bin (string-append (assoc-ref outputs "out")
6044 "/bin")))
6045 (mkdir-p bin)
6046 (copy-recursively "../bin" bin)
6047 #t))))))
6048 (inputs
6049 `(("perl" ,perl)
6050 ("zlib:static" ,zlib "static")
6051 ("zlib" ,zlib)))
6052 (supported-systems '("x86_64-linux"))
6053 (home-page "https://github.com/wanpinglee/MOSAIK")
6054 (synopsis "Map nucleotide sequence reads to reference genomes")
6055 (description
6056 "MOSAIK is a program for mapping second and third-generation sequencing
6057 reads to a reference genome. MOSAIK can align reads generated by all the
6058 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
6059 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
6060 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
6061 ;; code released into the public domain:
6062 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
6063 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
6064 (license (list license:gpl2+ license:public-domain)))))
6065
6066 (define-public mosaicatcher
6067 (package
6068 (name "mosaicatcher")
6069 (version "0.3.1")
6070 (source (origin
6071 ;; There are no release tarballs nor tags.
6072 (method git-fetch)
6073 (uri (git-reference
6074 (url "https://github.com/friendsofstrandseq/mosaicatcher")
6075 (commit (string-append version "-dev"))))
6076 (file-name (git-file-name name version))
6077 (sha256
6078 (base32
6079 "1n2s5wvvj2y0vfgjkg1q11xahpbagxz7h2vf5q7qyy25s12kbzbd"))
6080 (patches (search-patches "mosaicatcher-unbundle-htslib.patch"))))
6081 (build-system cmake-build-system)
6082 (arguments
6083 `(#:tests? #false ; there are no tests
6084 #:phases
6085 (modify-phases %standard-phases
6086 (add-after 'unpack 'chdir
6087 (lambda _ (chdir "src")))
6088 (replace 'install
6089 (lambda* (#:key inputs outputs #:allow-other-keys)
6090 (let* ((target (assoc-ref outputs "out"))
6091 (bin (string-append target "/bin"))
6092 (share (string-append target "/share/mosaicatcher")))
6093 (install-file "mosaic" bin)
6094 (mkdir-p share)
6095 (copy-recursively "../R" share)))))))
6096 (inputs
6097 (list boost htslib))
6098 (home-page "https://github.com/friendsofstrandseq/mosaicatcher")
6099 (synopsis "Count and classify Strand-seq reads")
6100 (description
6101 "Mosaicatcher counts Strand-seq reads and classifies strand states of
6102 each chromosome in each cell using a Hidden Markov Model.")
6103 (license license:expat)))
6104
6105 (define-public ngs-sdk
6106 (package
6107 (name "ngs-sdk")
6108 (version "2.10.5")
6109 (source (origin
6110 (method git-fetch)
6111 (uri (git-reference
6112 (url "https://github.com/ncbi/ngs")
6113 (commit version)))
6114 (file-name (git-file-name name version))
6115 (sha256
6116 (base32
6117 "1ix51c25hjn57w93qmwzw80xh2i34wx8j2hn7szh8p6w8i3az5qa"))))
6118 (build-system gnu-build-system)
6119 (arguments
6120 `(#:parallel-build? #f ; not supported
6121 #:tests? #f ; no "check" target
6122 #:phases
6123 (modify-phases %standard-phases
6124 (replace 'configure
6125 (lambda* (#:key outputs #:allow-other-keys)
6126 (let ((out (assoc-ref outputs "out")))
6127 ;; Allow 'konfigure.perl' to find 'package.prl'.
6128 (setenv "PERL5LIB"
6129 (string-append ".:" (getenv "PERL5LIB")))
6130
6131 ;; The 'configure' script doesn't recognize things like
6132 ;; '--enable-fast-install'.
6133 (invoke "./configure"
6134 (string-append "--build-prefix=" (getcwd) "/build")
6135 (string-append "--prefix=" out))
6136 #t)))
6137 (add-after 'unpack 'enter-dir
6138 (lambda _ (chdir "ngs-sdk") #t)))))
6139 (native-inputs (list perl))
6140 ;; According to the test
6141 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
6142 ;; in ngs-sdk/setup/konfigure.perl
6143 (supported-systems '("i686-linux" "x86_64-linux"))
6144 (home-page "https://github.com/ncbi/ngs")
6145 (synopsis "API for accessing Next Generation Sequencing data")
6146 (description
6147 "NGS is a domain-specific API for accessing reads, alignments and pileups
6148 produced from Next Generation Sequencing. The API itself is independent from
6149 any particular back-end implementation, and supports use of multiple back-ends
6150 simultaneously.")
6151 (license license:public-domain)))
6152
6153 (define-public java-ngs
6154 (package (inherit ngs-sdk)
6155 (name "java-ngs")
6156 (arguments
6157 `(,@(substitute-keyword-arguments
6158 `(#:modules ((guix build gnu-build-system)
6159 (guix build utils)
6160 (srfi srfi-1)
6161 (srfi srfi-26))
6162 ,@(package-arguments ngs-sdk))
6163 ((#:phases phases)
6164 `(modify-phases ,phases
6165 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
6166 (inputs
6167 `(("jdk" ,icedtea "jdk")
6168 ("ngs-sdk" ,ngs-sdk)))
6169 (synopsis "Java bindings for NGS SDK")))
6170
6171 (define-public ncbi-vdb
6172 (package
6173 (name "ncbi-vdb")
6174 (version "2.10.6")
6175 (source (origin
6176 (method git-fetch)
6177 (uri (git-reference
6178 (url "https://github.com/ncbi/ncbi-vdb")
6179 (commit version)))
6180 (file-name (git-file-name name version))
6181 (sha256
6182 (base32
6183 "0m8hlxscidsfqm9x9fyi62q6lpf1dv5115kgjjgnrkl49q9c27m6"))))
6184 (build-system gnu-build-system)
6185 (arguments
6186 `(#:parallel-build? #f ; not supported
6187 #:tests? #f ; no "check" target
6188 #:make-flags '("HAVE_HDF5=1")
6189 #:phases
6190 (modify-phases %standard-phases
6191 (add-after 'unpack 'make-files-writable
6192 (lambda _ (for-each make-file-writable (find-files "." ".*")) #t))
6193 (add-before 'configure 'set-perl-search-path
6194 (lambda _
6195 ;; Work around "dotless @INC" build failure.
6196 (setenv "PERL5LIB"
6197 (string-append (getcwd) "/setup:"
6198 (getenv "PERL5LIB")))
6199 #t))
6200 ;; See https://github.com/ncbi/ncbi-vdb/issues/14
6201 (add-after 'unpack 'patch-krypto-flags
6202 (lambda _
6203 (substitute* "libs/krypto/Makefile"
6204 (("-Wa,-march=generic64\\+aes") "")
6205 (("-Wa,-march=generic64\\+sse4") ""))
6206 #t))
6207 (replace 'configure
6208 (lambda* (#:key inputs outputs #:allow-other-keys)
6209 (let ((out (assoc-ref outputs "out")))
6210 ;; Override include path for libmagic
6211 (substitute* "setup/package.prl"
6212 (("name => 'magic', Include => '/usr/include'")
6213 (string-append "name=> 'magic', Include => '"
6214 (assoc-ref inputs "libmagic")
6215 "/include" "'")))
6216
6217 ;; Install kdf5 library (needed by sra-tools)
6218 (substitute* "build/Makefile.install"
6219 (("LIBRARIES_TO_INSTALL =")
6220 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
6221
6222 (substitute* "build/Makefile.env"
6223 (("CFLAGS =" prefix)
6224 (string-append prefix "-msse2 ")))
6225
6226 ;; Override search path for ngs-java
6227 (substitute* "setup/package.prl"
6228 (("/usr/local/ngs/ngs-java")
6229 (assoc-ref inputs "java-ngs")))
6230
6231 ;; The 'configure' script doesn't recognize things like
6232 ;; '--enable-fast-install'.
6233 (invoke "./configure"
6234 (string-append "--build-prefix=" (getcwd) "/build")
6235 (string-append "--prefix=" (assoc-ref outputs "out"))
6236 (string-append "--debug")
6237 (string-append "--with-xml2-prefix="
6238 (assoc-ref inputs "libxml2"))
6239 (string-append "--with-ngs-sdk-prefix="
6240 (assoc-ref inputs "ngs-sdk"))
6241 (string-append "--with-hdf5-prefix="
6242 (assoc-ref inputs "hdf5")))
6243 #t)))
6244 (add-after 'install 'install-interfaces
6245 (lambda* (#:key outputs #:allow-other-keys)
6246 ;; Install interface libraries. On i686 the interface libraries
6247 ;; are installed to "linux/gcc/i386", so we need to use the Linux
6248 ;; architecture name ("i386") instead of the target system prefix
6249 ;; ("i686").
6250 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
6251 (copy-recursively (string-append
6252 "build/ncbi-vdb/linux/gcc/"
6253 ,(platform-linux-architecture
6254 (lookup-platform-by-target-or-system
6255 (or (%current-target-system)
6256 (%current-system))))
6257 "/rel/ilib")
6258 (string-append (assoc-ref outputs "out")
6259 "/ilib"))
6260 ;; Install interface headers
6261 (copy-recursively "interfaces"
6262 (string-append (assoc-ref outputs "out")
6263 "/include"))
6264 #t))
6265 ;; These files are needed by sra-tools.
6266 (add-after 'install 'install-configuration-files
6267 (lambda* (#:key outputs #:allow-other-keys)
6268 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
6269 (mkdir target)
6270 (install-file "libs/kfg/default.kfg" target)
6271 (install-file "libs/kfg/certs.kfg" target))
6272 #t)))))
6273 (inputs
6274 `(("libxml2" ,libxml2)
6275 ("ngs-sdk" ,ngs-sdk)
6276 ("java-ngs" ,java-ngs)
6277 ("libmagic" ,file)
6278 ("hdf5" ,hdf5)))
6279 (native-inputs (list perl))
6280 ;; NCBI-VDB requires SSE capability.
6281 (supported-systems '("i686-linux" "x86_64-linux"))
6282 (home-page "https://github.com/ncbi/ncbi-vdb")
6283 (synopsis "Database engine for genetic information")
6284 (description
6285 "The NCBI-VDB library implements a highly compressed columnar data
6286 warehousing engine that is most often used to store genetic information.
6287 Databases are stored in a portable image within the file system, and can be
6288 accessed/downloaded on demand across HTTP.")
6289 (license license:public-domain)))
6290
6291 (define-public plink
6292 (package
6293 (name "plink")
6294 (version "1.07")
6295 (source
6296 (origin
6297 (method url-fetch)
6298 (uri (string-append
6299 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
6300 version "-src.zip"))
6301 (sha256
6302 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
6303 (patches (search-patches "plink-1.07-unclobber-i.patch"
6304 "plink-endian-detection.patch"))))
6305 (build-system gnu-build-system)
6306 (arguments
6307 `(#:tests? #f ;no "check" target
6308 #:make-flags ,#~(list (string-append "LIB_LAPACK="
6309 #$(this-package-input "lapack")
6310 "/lib/liblapack.so")
6311 "WITH_LAPACK=1"
6312 "FORCE_DYNAMIC=1"
6313 ;; disable phoning home
6314 "WITH_WEBCHECK=")
6315 #:phases
6316 (modify-phases %standard-phases
6317 ;; no "configure" script
6318 (delete 'configure)
6319 (replace 'install
6320 (lambda* (#:key outputs #:allow-other-keys)
6321 (let ((bin (string-append (assoc-ref outputs "out")
6322 "/bin/")))
6323 (install-file "plink" bin)))))))
6324 (inputs
6325 (list zlib lapack))
6326 (native-inputs
6327 (list unzip))
6328 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
6329 (synopsis "Whole genome association analysis toolset")
6330 (description
6331 "PLINK is a whole genome association analysis toolset, designed to
6332 perform a range of basic, large-scale analyses in a computationally efficient
6333 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
6334 so there is no support for steps prior to this (e.g. study design and
6335 planning, generating genotype or CNV calls from raw data). Through
6336 integration with gPLINK and Haploview, there is some support for the
6337 subsequent visualization, annotation and storage of results.")
6338 ;; Code is released under GPLv2, except for fisher.h, which is under
6339 ;; LGPLv2.1+
6340 (license (list license:gpl2 license:lgpl2.1+))))
6341
6342 (define-public plink-ng
6343 (package (inherit plink)
6344 (name "plink-ng")
6345 (version "2.00a3.3")
6346 (source
6347 (origin
6348 (method git-fetch)
6349 (uri (git-reference
6350 (url "https://github.com/chrchang/plink-ng")
6351 (commit (string-append "v" version))))
6352 (file-name (git-file-name name version))
6353 (sha256
6354 (base32 "0m8wkyvbgvcr5kzc284w8fbhpxwglh2c1xq0yc3yv00a53gs7rv0"))))
6355 (build-system gnu-build-system)
6356 (arguments
6357 (list
6358 #:tests? #false ;TEST_EXTRACT_CHR doesn't produce expected files
6359 #:make-flags
6360 #~(list "BLASFLAGS=-llapack -lopenblas"
6361 "NO_SSE42=1"
6362 "NO_AVX2=1"
6363 "STATIC_ZSTD="
6364 (string-append "CC=" #$(cc-for-target))
6365 (string-append "PREFIX=" #$output)
6366 "DESTDIR=")
6367 #:phases
6368 '(modify-phases %standard-phases
6369 (add-after 'unpack 'chdir
6370 (lambda _ (chdir "2.0/build_dynamic")))
6371 (delete 'configure) ; no "configure" script
6372 (replace 'check
6373 (lambda* (#:key tests? inputs #:allow-other-keys)
6374 (when tests?
6375 (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
6376 (with-directory-excursion "../Tests"
6377 (substitute* "run_tests.sh"
6378 (("^./run_tests" m)
6379 (string-append (which "bash") " " m)))
6380 (invoke "bash" "run_tests.sh")))))
6381 (replace 'install
6382 (lambda* (#:key outputs #:allow-other-keys)
6383 (install-file "plink2"
6384 (string-append
6385 (assoc-ref outputs "out") "/bin")))))))
6386 (inputs
6387 (list lapack openblas zlib `(,zstd "lib")))
6388 (native-inputs
6389 (list diffutils plink python simde)) ; for tests
6390 (home-page "https://www.cog-genomics.org/plink/")
6391 (license license:gpl3+)))
6392
6393 (define-public smithlab-cpp
6394 (let ((revision "1")
6395 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
6396 (package
6397 (name "smithlab-cpp")
6398 (version (string-append "0." revision "." (string-take commit 7)))
6399 (source (origin
6400 (method git-fetch)
6401 (uri (git-reference
6402 (url "https://github.com/smithlabcode/smithlab_cpp")
6403 (commit commit)))
6404 (file-name (string-append name "-" version "-checkout"))
6405 (sha256
6406 (base32
6407 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
6408 (build-system gnu-build-system)
6409 (arguments
6410 `(#:modules ((guix build gnu-build-system)
6411 (guix build utils)
6412 (srfi srfi-26))
6413 #:tests? #f ;no "check" target
6414 #:phases
6415 (modify-phases %standard-phases
6416 (add-after 'unpack 'use-samtools-headers
6417 (lambda _
6418 (substitute* '("SAM.cpp"
6419 "SAM.hpp")
6420 (("sam.h") "samtools/sam.h"))
6421 #t))
6422 (replace 'install
6423 (lambda* (#:key outputs #:allow-other-keys)
6424 (let* ((out (assoc-ref outputs "out"))
6425 (lib (string-append out "/lib"))
6426 (include (string-append out "/include/smithlab-cpp")))
6427 (mkdir-p lib)
6428 (mkdir-p include)
6429 (for-each (cut install-file <> lib)
6430 (find-files "." "\\.o$"))
6431 (for-each (cut install-file <> include)
6432 (find-files "." "\\.hpp$")))
6433 #t))
6434 (delete 'configure))))
6435 (inputs
6436 (list samtools-0.1 zlib))
6437 (home-page "https://github.com/smithlabcode/smithlab_cpp")
6438 (synopsis "C++ helper library for functions used in Smith lab projects")
6439 (description
6440 "Smithlab CPP is a C++ library that includes functions used in many of
6441 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
6442 structures, classes for genomic regions, mapped sequencing reads, etc.")
6443 (license license:gpl3+))))
6444
6445 (define-public preseq
6446 (package
6447 (name "preseq")
6448 (version "2.0.3")
6449 (source (origin
6450 (method url-fetch)
6451 (uri (string-append "https://github.com/smithlabcode/preseq/"
6452 "releases/download/v" version
6453 "/preseq_v" version ".tar.bz2"))
6454 (sha256
6455 (base32 "149x9xmk1wy1gff85325yfzqc0qk4sgp1w6gbyj9cnji4x1dszbl"))
6456 (modules '((guix build utils)))
6457 ;; Remove bundled samtools.
6458 (snippet '(delete-file-recursively "samtools"))))
6459 (build-system gnu-build-system)
6460 (arguments
6461 `(#:tests? #f ;no "check" target
6462 #:phases
6463 (modify-phases %standard-phases
6464 (delete 'configure))
6465 #:make-flags
6466 ,#~(list (string-append "PREFIX=" #$output)
6467 (string-append "LIBBAM="
6468 #$(this-package-input "samtools")
6469 "/lib/libbam.a")
6470 (string-append "SMITHLAB_CPP="
6471 #$(this-package-input "smithlab-cpp")
6472 "/lib")
6473 "PROGS=preseq"
6474 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
6475 (inputs
6476 (list gsl samtools-0.1 smithlab-cpp zlib))
6477 (home-page "http://smithlabresearch.org/software/preseq/")
6478 (synopsis "Program for analyzing library complexity")
6479 (description
6480 "The preseq package is aimed at predicting and estimating the complexity
6481 of a genomic sequencing library, equivalent to predicting and estimating the
6482 number of redundant reads from a given sequencing depth and how many will be
6483 expected from additional sequencing using an initial sequencing experiment.
6484 The estimates can then be used to examine the utility of further sequencing,
6485 optimize the sequencing depth, or to screen multiple libraries to avoid low
6486 complexity samples.")
6487 (license license:gpl3+)))
6488
6489 (define-public python-screed
6490 (package
6491 (name "python-screed")
6492 (version "1.0")
6493 (source
6494 (origin
6495 (method url-fetch)
6496 (uri (pypi-uri "screed" version))
6497 (sha256
6498 (base32
6499 "148vcb7w2wr6a4w6vs2bsxanbqibxfk490zbcbg4m61s8669zdjx"))))
6500 (build-system python-build-system)
6501 (native-inputs
6502 (list python-pytest python-pytest-cov python-pytest-runner))
6503 (inputs
6504 (list python-bz2file))
6505 (home-page "https://github.com/dib-lab/screed/")
6506 (synopsis "Short read sequence database utilities")
6507 (description "Screed parses FASTA and FASTQ files and generates databases.
6508 Values such as sequence name, sequence description, sequence quality and the
6509 sequence itself can be retrieved from these databases.")
6510 (license license:bsd-3)))
6511
6512 (define-public python-taggd
6513 (package
6514 (name "python-taggd")
6515 (version "0.3.6")
6516 (source (origin
6517 (method git-fetch)
6518 (uri (git-reference
6519 (url "https://github.com/SpatialTranscriptomicsResearch/taggd")
6520 (commit version)))
6521 (file-name (git-file-name name version))
6522 (sha256
6523 (base32
6524 "0j19ah81z7aqrdljah9hyarp91gvgbk63pz6fz3pdpksy1yqyi6k"))
6525 (modules '((guix build utils)))
6526 (snippet
6527 '(for-each delete-file
6528 (find-files "taggd" "\\.c$")))))
6529 (build-system python-build-system)
6530 (arguments
6531 (list
6532 #:phases
6533 '(modify-phases %standard-phases
6534 (add-after 'unpack 'disable-broken-tests
6535 (lambda _
6536 (substitute* "tests/taggd_demultiplex_test.py"
6537 (("def test_normal_bam_run")
6538 "def _disabled_test_normal_bam_run")))))))
6539 (propagated-inputs
6540 (list python-numpy python-pysam python-setuptools))
6541 (native-inputs
6542 (list python-cython))
6543 (home-page "https://github.com/SpatialTranscriptomicsResearch/taggd")
6544 (synopsis "Genetic barcode demultiplexing")
6545 (description "This package provides TagGD barcode demultiplexing utilities
6546 for Spatial Transcriptomics data.")
6547 (license license:bsd-3)))
6548
6549 (define-public stpipeline
6550 (package
6551 (name "stpipeline")
6552 (version "1.8.1")
6553 (source (origin
6554 (method url-fetch)
6555 (uri (pypi-uri "stpipeline" version))
6556 (sha256
6557 (base32
6558 "0har2g42fvaqpiz66lincy86aj1hvwzds26kxhxfamvyvv4721wk"))))
6559 (build-system python-build-system)
6560 (arguments
6561 (list
6562 #:phases
6563 '(modify-phases %standard-phases
6564 (add-after 'unpack 'relax-requirements
6565 (lambda _
6566 (substitute* "requirements.txt"
6567 (("argparse.*") "")))))))
6568 (propagated-inputs
6569 (list htseq
6570 python-cython
6571 python-invoke
6572 python-numpy
6573 python-pandas
6574 python-pympler
6575 python-pysam
6576 python-regex
6577 python-scikit-learn
6578 python-scipy
6579 python-seaborn
6580 python-setuptools
6581 python-sqlitedict
6582 python-taggd
6583 samtools
6584 star))
6585 (home-page "https://github.com/SpatialTranscriptomicsResearch/st_pipeline")
6586 (synopsis "Pipeline for spatial mapping of unique transcripts")
6587 (description
6588 "This package provides an automated pipeline for spatial mapping of
6589 unique transcripts.")
6590 (license license:expat)))
6591
6592 (define-public sra-tools
6593 (package
6594 (name "sra-tools")
6595 (version "2.10.6")
6596 (source
6597 (origin
6598 (method git-fetch)
6599 (uri (git-reference
6600 (url "https://github.com/ncbi/sra-tools")
6601 (commit version)))
6602 (file-name (git-file-name name version))
6603 (sha256
6604 (base32
6605 "1cr2mijkfs5sm35ffjs6861qsd1qkgnhnbavdv65zg5d655abbjf"))))
6606 (build-system gnu-build-system)
6607 (arguments
6608 `(#:parallel-build? #f ; not supported
6609 #:tests? #f ; no "check" target
6610 #:make-flags
6611 ,#~(list (string-append "DEFAULT_CRT="
6612 #$(this-package-input "ncbi-vdb")
6613 "/kfg/certs.kfg")
6614 (string-append "DEFAULT_KFG="
6615 #$(this-package-input "ncbi-vdb")
6616 "/kfg/default.kfg")
6617 (string-append "VDB_LIBDIR="
6618 #$(this-package-input "ncbi-vdb")
6619 #$(if (string-prefix? "x86_64"
6620 (or (%current-target-system)
6621 (%current-system)))
6622 "/lib64"
6623 "/lib32")))
6624 #:phases
6625 (modify-phases %standard-phases
6626 (add-before 'configure 'set-perl-search-path
6627 (lambda _
6628 ;; Work around "dotless @INC" build failure.
6629 (setenv "PERL5LIB"
6630 (string-append (getcwd) "/setup:"
6631 (getenv "PERL5LIB")))
6632 #t))
6633 (replace 'configure
6634 (lambda* (#:key inputs outputs #:allow-other-keys)
6635 ;; The build system expects a directory containing the sources and
6636 ;; raw build output of ncbi-vdb, including files that are not
6637 ;; installed. Since we are building against an installed version of
6638 ;; ncbi-vdb, the following modifications are needed.
6639 (substitute* "setup/konfigure.perl"
6640 ;; Make the configure script look for the "ilib" directory of
6641 ;; "ncbi-vdb" without first checking for the existence of a
6642 ;; matching library in its "lib" directory.
6643 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
6644 "my $f = File::Spec->catdir($ilibdir, $ilib);")
6645 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
6646 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
6647 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
6648
6649 ;; Dynamic linking
6650 (substitute* "tools/copycat/Makefile"
6651 (("smagic-static") "lmagic"))
6652 (substitute* "tools/driver-tool/utf8proc/Makefile"
6653 (("CC\\?=gcc") "myCC=gcc")
6654 (("\\(CC\\)") "(myCC)"))
6655
6656 ;; The 'configure' script doesn't recognize things like
6657 ;; '--enable-fast-install'.
6658 (invoke "./configure"
6659 (string-append "--build-prefix=" (getcwd) "/build")
6660 (string-append "--prefix=" (assoc-ref outputs "out"))
6661 (string-append "--debug")
6662 (string-append "--with-fuse-prefix="
6663 (assoc-ref inputs "fuse"))
6664 (string-append "--with-magic-prefix="
6665 (assoc-ref inputs "libmagic"))
6666 ;; TODO: building with libxml2 fails with linker errors
6667 #;
6668 (string-append "--with-xml2-prefix="
6669 (assoc-ref inputs "libxml2"))
6670 (string-append "--with-ncbi-vdb-sources="
6671 (assoc-ref inputs "ncbi-vdb"))
6672 (string-append "--with-ncbi-vdb-build="
6673 (assoc-ref inputs "ncbi-vdb"))
6674 (string-append "--with-ngs-sdk-prefix="
6675 (assoc-ref inputs "ngs-sdk"))
6676 (string-append "--with-hdf5-prefix="
6677 (assoc-ref inputs "hdf5")))
6678 #t)))))
6679 (native-inputs (list perl))
6680 (inputs
6681 `(("ngs-sdk" ,ngs-sdk)
6682 ("ncbi-vdb" ,ncbi-vdb)
6683 ("libmagic" ,file)
6684 ("fuse" ,fuse)
6685 ("hdf5" ,hdf5-1.10)
6686 ("zlib" ,zlib)
6687 ("python" ,python-wrapper)))
6688 (home-page
6689 "https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
6690 (synopsis "Tools and libraries for reading and writing sequencing data")
6691 (description
6692 "The SRA Toolkit from NCBI is a collection of tools and libraries for
6693 reading of sequencing files from the Sequence Read Archive (SRA) database and
6694 writing files into the .sra format.")
6695 (license license:public-domain)))
6696
6697 (define-public seqan
6698 (package
6699 (name "seqan")
6700 (version "3.0.3")
6701 (source (origin
6702 (method url-fetch)
6703 (uri (string-append "https://github.com/seqan/seqan3/releases/"
6704 "download/" version "/seqan3-"
6705 version "-Source.tar.xz"))
6706 (sha256
6707 (base32
6708 "1h2z0cvgidhkmh5xsbw75waqbrqbbv6kkrvb0b92xfh3gqpaiz22"))))
6709 (build-system cmake-build-system)
6710 (arguments
6711 `(#:phases
6712 (modify-phases %standard-phases
6713 (replace 'check
6714 (lambda _
6715 (invoke "ctest" "test" "--output-on-failure"))))))
6716 (native-inputs
6717 (list bzip2 cereal zlib))
6718 (home-page "https://www.seqan.de")
6719 (synopsis "Library for nucleotide sequence analysis")
6720 (description
6721 "SeqAn is a C++ library of efficient algorithms and data structures for
6722 the analysis of sequences with the focus on biological data. It contains
6723 algorithms and data structures for string representation and their
6724 manipulation, online and indexed string search, efficient I/O of
6725 bioinformatics file formats, sequence alignment, and more.")
6726 (license license:bsd-3)))
6727
6728 (define-public seqan-2
6729 (package
6730 (inherit seqan)
6731 (version "2.4.0")
6732 (source (origin
6733 (method url-fetch)
6734 (uri (string-append "https://github.com/seqan/seqan/releases/"
6735 "download/seqan-v" version
6736 "/seqan-library-" version ".tar.xz"))
6737 (sha256
6738 (base32
6739 "19a1rlxx03qy1i1iriicly68w64yjxbv24g9gdywnfmq998v35yx"))))
6740 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6741 ;; makes sense to split the outputs.
6742 (outputs '("out" "doc"))
6743 (build-system trivial-build-system)
6744 (arguments
6745 `(#:modules ((guix build utils))
6746 #:builder
6747 ,#~(begin
6748 (use-modules (guix build utils))
6749 (let ((tar #$(this-package-native-input "tar"))
6750 (xz #$(this-package-native-input "xz"))
6751 (out #$output)
6752 (doc #$output:doc))
6753 (setenv "PATH" (string-append tar "/bin:" xz "/bin"))
6754 (invoke "tar" "xvf" #$(this-package-native-input "source"))
6755 (chdir (string-append "seqan-library-" #$version))
6756 (copy-recursively "include" (string-append out "/include"))
6757 (copy-recursively "share" (string-append doc "/share"))))))
6758 (native-inputs
6759 `(("source" ,source)
6760 ("tar" ,tar)
6761 ("xz" ,xz)))))
6762
6763 (define-public seqan-1
6764 (package (inherit seqan)
6765 (name "seqan")
6766 (version "1.4.2")
6767 (source (origin
6768 (method url-fetch)
6769 (uri (string-append "https://packages.seqan.de/seqan-library/"
6770 "seqan-library-" version ".tar.bz2"))
6771 (sha256
6772 (base32
6773 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
6774 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6775 ;; makes sense to split the outputs.
6776 (outputs '("out" "doc"))
6777 (build-system trivial-build-system)
6778 (arguments
6779 `(#:modules ((guix build utils))
6780 #:builder
6781 ,#~(begin
6782 (use-modules (guix build utils))
6783 (let ((tar #$(this-package-native-input "tar"))
6784 (bzip #$(this-package-native-input "bzip2"))
6785 (out #$output)
6786 (doc #$output:doc))
6787 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
6788 (invoke "tar" "xvf" #$(this-package-native-input "source"))
6789 (chdir (string-append "seqan-library-" #$version))
6790 (copy-recursively "include" (string-append out "/include"))
6791 (copy-recursively "share" (string-append doc "/share"))))))
6792 (native-inputs
6793 `(("source" ,source)
6794 ("tar" ,tar)
6795 ("bzip2" ,bzip2)))))
6796
6797 (define-public seqmagick
6798 (package
6799 (name "seqmagick")
6800 (version "0.8.0")
6801 (source
6802 (origin
6803 (method url-fetch)
6804 (uri (pypi-uri "seqmagick" version))
6805 (sha256
6806 (base32
6807 "0pf98da7i59q47gwrbx0wjk6xlvbybiwphw80w7h4ydjj0579a2b"))))
6808 (build-system python-build-system)
6809 (inputs
6810 (list python-biopython))
6811 (native-inputs
6812 (list python-nose))
6813 (home-page "https://github.com/fhcrc/seqmagick")
6814 (synopsis "Tools for converting and modifying sequence files")
6815 (description
6816 "Bioinformaticians often have to convert sequence files between formats
6817 and do little manipulations on them, and it's not worth writing scripts for
6818 that. Seqmagick is a utility to expose the file format conversion in
6819 BioPython in a convenient way. Instead of having a big mess of scripts, there
6820 is one that takes arguments.")
6821 (license license:gpl3)))
6822
6823 (define-public seqtk
6824 (package
6825 (name "seqtk")
6826 (version "1.3")
6827 (source (origin
6828 (method git-fetch)
6829 (uri (git-reference
6830 (url "https://github.com/lh3/seqtk")
6831 (commit (string-append "v" version))))
6832 (file-name (git-file-name name version))
6833 (sha256
6834 (base32
6835 "1bfzlqa84b5s1qi22blmmw2s8xdyp9h9ydcq22pfjhh5gab3yz6l"))))
6836 (build-system gnu-build-system)
6837 (arguments
6838 `(#:phases
6839 (modify-phases %standard-phases
6840 (delete 'configure)
6841 (replace 'check
6842 ;; There are no tests, so we just run a sanity check.
6843 (lambda _ (invoke "./seqtk" "seq") #t))
6844 (replace 'install
6845 (lambda* (#:key outputs #:allow-other-keys)
6846 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6847 (install-file "seqtk" bin)
6848 #t))))))
6849 (inputs
6850 (list zlib))
6851 (home-page "https://github.com/lh3/seqtk")
6852 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
6853 (description
6854 "Seqtk is a fast and lightweight tool for processing sequences in the
6855 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
6856 optionally compressed by gzip.")
6857 (license license:expat)))
6858
6859 (define-public snap-aligner
6860 (package
6861 (name "snap-aligner")
6862 (version "2.0.0")
6863 (source (origin
6864 (method git-fetch)
6865 (uri (git-reference
6866 (url "https://github.com/amplab/snap")
6867 (commit (string-append "v" version))))
6868 (file-name (git-file-name name version))
6869 (sha256
6870 (base32
6871 "0yag3jcazdqfxgmw0vbi91i98kj9sr0aqx83qqj6m5b45wxs7jms"))))
6872 (build-system gnu-build-system)
6873 (arguments
6874 '(#:phases
6875 (modify-phases %standard-phases
6876 (delete 'configure)
6877 (replace 'check
6878 (lambda* (#:key tests? #:allow-other-keys)
6879 (when tests? (invoke "./unit_tests"))))
6880 (replace 'install
6881 (lambda* (#:key outputs #:allow-other-keys)
6882 (let* ((out (assoc-ref outputs "out"))
6883 (bin (string-append out "/bin")))
6884 (install-file "snap-aligner" bin)
6885 (install-file "SNAPCommand" bin)))))))
6886 (native-inputs
6887 (list zlib))
6888 (home-page "http://snap.cs.berkeley.edu/")
6889 (synopsis "Short read DNA sequence aligner")
6890 (description
6891 "SNAP is a fast and accurate aligner for short DNA reads. It is
6892 optimized for modern read lengths of 100 bases or higher, and takes advantage
6893 of these reads to align data quickly through a hash-based indexing scheme.")
6894 ;; 32-bit systems are not supported by the unpatched code.
6895 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
6896 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
6897 ;; systems without a lot of memory cannot make good use of this program.
6898 (supported-systems '("x86_64-linux"))
6899 (license license:asl2.0)))
6900
6901 (define-public sortmerna
6902 (package
6903 (name "sortmerna")
6904 (version "4.3.4")
6905 (source
6906 (origin
6907 (method git-fetch)
6908 (uri (git-reference
6909 (url "https://github.com/biocore/sortmerna")
6910 (commit (string-append "v" version))))
6911 (file-name (git-file-name name version))
6912 (sha256
6913 (base32
6914 "0f8jfc8vsq6llhbb92p9yv7nbp566yqwfcmq3g2hw0n7d8hyl3a8"))))
6915 (build-system cmake-build-system)
6916 (outputs '("out" ;for binaries
6917 "db")) ;for sequence databases
6918 (arguments
6919 (list
6920 #:tests? #false ;unclear how to run them
6921 #:configure-flags
6922 #~(list "-DWITH_TESTS=ON"
6923 "-DCMAKE_CXX_FLAGS=-pthread"
6924 "-DZLIB_STATIC=OFF"
6925 "-DROCKSDB_STATIC=OFF"
6926 "-DPORTABLE=OFF" ;do not use static linking
6927 (string-append "-DROCKSDB_HOME="
6928 #$(this-package-input "rocksdb"))
6929 (string-append "-DRAPIDJSON_HOME="
6930 #$(this-package-input "rapidjson"))
6931 (string-append "-DRapidJson_DIR="
6932 #$(this-package-input "rapidjson")
6933 "/lib/cmake/RapidJSON")
6934 (string-append "-DRapidJSON_INCLUDE_DIR="
6935 #$(this-package-input "rapidjson")
6936 "/include"))
6937 #:phases
6938 '(modify-phases %standard-phases
6939 (add-after 'unpack 'find-concurrentqueue-headers
6940 (lambda* (#:key inputs #:allow-other-keys)
6941 ;; Ensure that headers can be found
6942 (setenv "CPLUS_INCLUDE_PATH"
6943 (string-append (search-input-directory
6944 inputs "/include/concurrentqueue")
6945 ":"
6946 (or (getenv "CPLUS_INCLUDE_PATH") "")))))
6947 (replace 'install
6948 (lambda* (#:key outputs #:allow-other-keys)
6949 (let* ((out (assoc-ref outputs "out"))
6950 (bin (string-append out "/bin"))
6951 (db (assoc-ref outputs "db"))
6952 (share
6953 (string-append db "/share/sortmerna/rRNA_databases")))
6954 (install-file "src/sortmerna" bin)
6955 (for-each (lambda (file)
6956 (install-file file share))
6957 (find-files "../source/data/rRNA_databases" ".*fasta"))))))))
6958 (inputs
6959 (list concurrentqueue
6960 gflags ; because of rocksdb
6961 rapidjson rocksdb zlib))
6962 (native-inputs
6963 (list pkg-config))
6964 (home-page "https://bioinfo.lifl.fr/RNA/sortmerna/")
6965 (synopsis "Biological sequence analysis tool for NGS reads")
6966 (description
6967 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
6968 and @acronym{OTU, operational taxonomic unit} picking of @acronym{NGS, next
6969 generation sequencing} reads. The core algorithm is based on approximate seeds
6970 and allows for fast and sensitive analyses of nucleotide sequences. The main
6971 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
6972 ;; The source includes x86 specific code
6973 (supported-systems '("x86_64-linux" "i686-linux"))
6974 (license license:lgpl3)))
6975
6976 (define-public star
6977 (package
6978 (name "star")
6979 (version "2.7.8a")
6980 (source (origin
6981 (method git-fetch)
6982 (uri (git-reference
6983 (url "https://github.com/alexdobin/STAR")
6984 (commit version)))
6985 (file-name (git-file-name name version))
6986 (sha256
6987 (base32
6988 "0zc5biymja9zml9yizcj1h68fq9c6sxfcav8a0lbgvgsm44rvans"))
6989 (modules '((guix build utils)))
6990 (snippet
6991 '(begin
6992 (substitute* "source/Makefile"
6993 (("/bin/rm") "rm"))
6994 ;; Remove pre-built binaries and bundled htslib sources.
6995 (delete-file-recursively "bin/MacOSX_x86_64")
6996 (delete-file-recursively "bin/Linux_x86_64")
6997 (delete-file-recursively "bin/Linux_x86_64_static")
6998 (delete-file-recursively "source/htslib")
6999 #t))))
7000 (build-system gnu-build-system)
7001 (arguments
7002 '(#:tests? #f ;no check target
7003 #:make-flags '("STAR")
7004 #:phases
7005 (modify-phases %standard-phases
7006 (add-after 'unpack 'enter-source-dir
7007 (lambda _ (chdir "source") #t))
7008 (add-after 'enter-source-dir 'make-reproducible
7009 (lambda _
7010 (substitute* "Makefile"
7011 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
7012 (string-append pre "Built with Guix" post)))
7013 #t))
7014 ;; See https://github.com/alexdobin/STAR/pull/562
7015 (add-after 'enter-source-dir 'add-missing-header
7016 (lambda _
7017 (substitute* "SoloReadFeature_inputRecords.cpp"
7018 (("#include \"binarySearch2.h\"" h)
7019 (string-append h "\n#include <math.h>")))
7020 #t))
7021 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
7022 (lambda _
7023 (substitute* "Makefile"
7024 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
7025 _ prefix) prefix))
7026 (substitute* '("BAMfunctions.cpp"
7027 "signalFromBAM.h"
7028 "bam_cat.h"
7029 "bam_cat.c"
7030 "STAR.cpp"
7031 "bamRemoveDuplicates.cpp")
7032 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
7033 (string-append "#include <" header ">")))
7034 (substitute* "IncludeDefine.h"
7035 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
7036 (string-append "<" header ">")))
7037 #t))
7038 (replace 'install
7039 (lambda* (#:key outputs #:allow-other-keys)
7040 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
7041 (install-file "STAR" bin))
7042 #t))
7043 (delete 'configure))))
7044 (native-inputs
7045 (list xxd))
7046 (inputs
7047 (list htslib zlib))
7048 (home-page "https://github.com/alexdobin/STAR")
7049 (synopsis "Universal RNA-seq aligner")
7050 (description
7051 "The Spliced Transcripts Alignment to a Reference (STAR) software is
7052 based on a previously undescribed RNA-seq alignment algorithm that uses
7053 sequential maximum mappable seed search in uncompressed suffix arrays followed
7054 by seed clustering and stitching procedure. In addition to unbiased de novo
7055 detection of canonical junctions, STAR can discover non-canonical splices and
7056 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
7057 sequences.")
7058 ;; Only 64-bit systems are supported according to the README.
7059 (supported-systems '("x86_64-linux" "mips64el-linux"))
7060 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
7061 (license license:gpl3+)))
7062
7063 (define-public star-for-pigx
7064 (package
7065 (inherit star)
7066 (name "star")
7067 (version "2.7.3a")
7068 (source (origin
7069 (method git-fetch)
7070 (uri (git-reference
7071 (url "https://github.com/alexdobin/STAR")
7072 (commit version)))
7073 (file-name (git-file-name name version))
7074 (sha256
7075 (base32
7076 "1hgiqw5qhs0pc1xazzihcfd92na02xyq2kb469z04y1v51kpvvjq"))
7077 (modules '((guix build utils)))
7078 (snippet
7079 '(begin
7080 (substitute* "source/Makefile"
7081 (("/bin/rm") "rm"))
7082 ;; Remove pre-built binaries and bundled htslib sources.
7083 (delete-file-recursively "bin/MacOSX_x86_64")
7084 (delete-file-recursively "bin/Linux_x86_64")
7085 (delete-file-recursively "bin/Linux_x86_64_static")
7086 (delete-file-recursively "source/htslib")
7087 #t))))))
7088
7089 (define-public starlong
7090 (package (inherit star)
7091 (name "starlong")
7092 (arguments
7093 (substitute-keyword-arguments (package-arguments star)
7094 ((#:make-flags flags)
7095 `(list "STARlong"))
7096 ((#:phases phases)
7097 `(modify-phases ,phases
7098 ;; Allow extra long sequence reads.
7099 (add-after 'unpack 'make-extra-long
7100 (lambda _
7101 (substitute* "source/IncludeDefine.h"
7102 (("(#define DEF_readNameLengthMax ).*" _ match)
7103 (string-append match "900000\n")))
7104 #t))
7105 (replace 'install
7106 (lambda* (#:key outputs #:allow-other-keys)
7107 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
7108 (install-file "STARlong" bin))
7109 #t))))))))
7110
7111 (define-public subread
7112 (package
7113 (name "subread")
7114 (version "2.0.3")
7115 (source (origin
7116 (method url-fetch)
7117 (uri (string-append "mirror://sourceforge/subread/subread-"
7118 version "/subread-" version "-source.tar.gz"))
7119 (sha256
7120 (base32
7121 "0szmllia7jl0annk5568xjhw6cc8yj1c5mb961qk5m0lz6ig7kjn"))))
7122 (build-system gnu-build-system)
7123 (arguments
7124 `(#:tests? #f ;no "check" target
7125 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
7126 ;; optimizations by default, so we override these flags such that x86_64
7127 ;; flags are only added when the build target is an x86_64 system.
7128 #:make-flags
7129 (list (let ((system ,(or (%current-target-system)
7130 (%current-system)))
7131 (flags '("-ggdb" "-fomit-frame-pointer"
7132 "-ffast-math" "-funroll-loops"
7133 "-fmessage-length=0" "-fcommon"
7134 "-O9" "-Wall" "-DMAKE_FOR_EXON"
7135 "-DMAKE_STANDALONE"
7136 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
7137 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
7138 (if (string-prefix? "x86_64" system)
7139 (string-append "CCFLAGS=" (string-join (append flags flags64)))
7140 (string-append "CCFLAGS=" (string-join flags))))
7141 "-f" "Makefile.Linux"
7142 "CC=gcc ${CCFLAGS}")
7143 #:phases
7144 (modify-phases %standard-phases
7145 (add-after 'unpack 'enter-dir
7146 (lambda _ (chdir "src")))
7147 (replace 'install
7148 (lambda* (#:key outputs #:allow-other-keys)
7149 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
7150 (mkdir-p bin)
7151 (copy-recursively "../bin" bin))))
7152 ;; no "configure" script
7153 (delete 'configure))))
7154 (inputs (list zlib))
7155 (home-page "http://subread.sourceforge.net/")
7156 (synopsis "Tool kit for processing next-gen sequencing data")
7157 (description
7158 "The subread package contains the following tools: subread aligner, a
7159 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
7160 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
7161 features; exactSNP: a SNP caller that discovers SNPs by testing signals
7162 against local background noises.")
7163 (license license:gpl3+)))
7164
7165 (define-public stringtie
7166 (package
7167 (name "stringtie")
7168 (version "2.2.0")
7169 (source (origin
7170 (method url-fetch)
7171 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
7172 "stringtie-" version ".tar.gz"))
7173 (sha256
7174 (base32
7175 "08w3ish4y9kf9acp7k38iwi8ixa6j51m6qyf0vvfj7yz78a3ai3x"))
7176 ;; This package bundles an annoying amount of third party source
7177 ;; code.
7178 (modules '((guix build utils)))
7179 (snippet
7180 '(delete-file-recursively "htslib"))))
7181 (build-system gnu-build-system)
7182 (arguments
7183 `(#:tests? #f ;no test suite
7184 #:make-flags '("LIBDEFLATE=-ldeflate"
7185 "LIBBZ2=-lbz2"
7186 "LIBLZMA=-llzma")
7187 #:phases
7188 (modify-phases %standard-phases
7189 ;; no configure script
7190 (delete 'configure)
7191 (add-before 'build 'use-system-samtools
7192 (lambda _
7193 (substitute* "Makefile"
7194 ((" -lm") " -lm -lhts")
7195 ((" \\$\\{HTSLIB\\}/libhts\\.a") " "))))
7196 (replace 'install
7197 (lambda* (#:key outputs #:allow-other-keys)
7198 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
7199 (install-file "stringtie" bin)))))))
7200 (inputs
7201 (list bzip2 htslib-for-stringtie libdeflate zlib))
7202 (home-page "http://ccb.jhu.edu/software/stringtie/")
7203 (synopsis "Transcript assembly and quantification for RNA-Seq data")
7204 (description
7205 "StringTie is a fast and efficient assembler of RNA-Seq sequence
7206 alignments into potential transcripts. It uses a novel network flow algorithm
7207 as well as an optional de novo assembly step to assemble and quantitate
7208 full-length transcripts representing multiple splice variants for each gene
7209 locus. Its input can include not only the alignments of raw reads used by
7210 other transcript assemblers, but also alignments of longer sequences that have
7211 been assembled from those reads. To identify differentially expressed genes
7212 between experiments, StringTie's output can be processed either by the
7213 Cuffdiff or Ballgown programs.")
7214 (license license:expat)))
7215
7216 (define-public taxtastic
7217 (package
7218 (name "taxtastic")
7219 (version "0.9.2")
7220 (source (origin
7221 ;; The Pypi version does not include tests.
7222 (method git-fetch)
7223 (uri (git-reference
7224 (url "https://github.com/fhcrc/taxtastic")
7225 (commit (string-append "v" version))))
7226 (file-name (git-file-name name version))
7227 (sha256
7228 (base32
7229 "1k6wg9ych1j3srnhdny1y4470qlhfg730rb3rm3pq7l7gw62vmgb"))))
7230 (build-system python-build-system)
7231 (arguments
7232 `(#:phases
7233 (modify-phases %standard-phases
7234 (add-after 'unpack 'prepare-directory
7235 (lambda _
7236 ;; This test fails, but the error is not caught by the test
7237 ;; framework, so the tests fail...
7238 (substitute* "tests/test_taxit.py"
7239 (("self.cmd_fails\\(''\\)")
7240 "self.cmd_fails('nothing')"))
7241 ;; This version file is expected to be created with git describe.
7242 (mkdir-p "taxtastic/data")
7243 (with-output-to-file "taxtastic/data/ver"
7244 (lambda () (display ,version)))))
7245 (replace 'check
7246 ;; Note, this fails to run with "-v" as it tries to write to a
7247 ;; closed output stream.
7248 (lambda* (#:key tests? #:allow-other-keys)
7249 (when tests?
7250 (invoke "python" "-m" "unittest"))))
7251 ;; This fails because it cannot find psycopg2 even though it is
7252 ;; available.
7253 (delete 'sanity-check))))
7254 (propagated-inputs
7255 (list python-sqlalchemy
7256 python-decorator
7257 python-biopython
7258 python-pandas
7259 python-psycopg2
7260 python-fastalite
7261 python-pyyaml
7262 python-six
7263 python-jinja2
7264 python-dendropy))
7265 (home-page "https://github.com/fhcrc/taxtastic")
7266 (synopsis "Tools for taxonomic naming and annotation")
7267 (description
7268 "Taxtastic is software written in python used to build and maintain
7269 reference packages i.e. collections of reference trees, reference alignments,
7270 profiles, and associated taxonomic information.")
7271 (license license:gpl3+)))
7272
7273 (define-public vcftools
7274 (package
7275 (name "vcftools")
7276 (version "0.1.16")
7277 (source (origin
7278 (method url-fetch)
7279 (uri (string-append
7280 "https://github.com/vcftools/vcftools/releases/download/v"
7281 version "/vcftools-" version ".tar.gz"))
7282 (sha256
7283 (base32
7284 "1qqlx7flfv7axrjwkaz6njkscsl1d0jw98ns8d8bh1n1hd1pgz6v"))))
7285 (build-system gnu-build-system)
7286 (arguments
7287 `(#:tests? #f ; no "check" target
7288 #:make-flags
7289 ,#~(list
7290 "CFLAGS=-O2" ; override "-m64" flag
7291 (string-append "PREFIX=" #$output)
7292 (string-append "MANDIR=" #$output "/share/man/man1"))))
7293 (native-inputs
7294 (list pkg-config))
7295 (inputs
7296 (list perl zlib))
7297 (home-page "https://vcftools.github.io/")
7298 (synopsis "Tools for working with VCF files")
7299 (description
7300 "VCFtools is a program package designed for working with VCF files, such
7301 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
7302 provide easily accessible methods for working with complex genetic variation
7303 data in the form of VCF files.")
7304 ;; The license is declared as LGPLv3 in the README and
7305 ;; at https://vcftools.github.io/license.html
7306 (license license:lgpl3)))
7307
7308 (define-public infernal
7309 (package
7310 (name "infernal")
7311 (version "1.1.4")
7312 (source (origin
7313 (method url-fetch)
7314 (uri (string-append "http://eddylab.org/software/infernal/"
7315 "infernal-" version ".tar.gz"))
7316 (sha256
7317 (base32
7318 "1z4mgwqg1j4n5ika08ai8mg9yjyjhf4821jp83v2bgwzxrykqjgr"))))
7319 (build-system gnu-build-system)
7320 (native-inputs
7321 (list perl python)) ; for tests
7322 (home-page "http://eddylab.org/infernal/")
7323 (synopsis "Inference of RNA alignments")
7324 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
7325 searching DNA sequence databases for RNA structure and sequence similarities.
7326 It is an implementation of a special case of profile stochastic context-free
7327 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
7328 profile, but it scores a combination of sequence consensus and RNA secondary
7329 structure consensus, so in many cases, it is more capable of identifying RNA
7330 homologs that conserve their secondary structure more than their primary
7331 sequence.")
7332 ;; Infernal 1.1.3 requires VMX or SSE capability for parallel instructions.
7333 (supported-systems '("i686-linux" "x86_64-linux"))
7334 (license license:bsd-3)))
7335
7336 (define-public r-gutils
7337 (let ((commit "10e36c7b580aacb2d952140a3fdd82418aaddea6")
7338 (revision "1"))
7339 (package
7340 (name "r-gutils")
7341 (version (git-version "0.2.0" revision commit))
7342 (source (origin
7343 (method git-fetch)
7344 (uri (git-reference
7345 (url "https://github.com/mskilab/gUtils")
7346 (commit commit)))
7347 (file-name (git-file-name name version))
7348 (sha256
7349 (base32
7350 "1wq9kd1afzy7ii510r20c4n9fkykj6p15q5c85ws27h1q5w4ghxy"))))
7351 (properties `((upstream-name . "gUtils")))
7352 (build-system r-build-system)
7353 (propagated-inputs
7354 (list r-biocgenerics
7355 r-data-table
7356 r-genomeinfodb
7357 r-genomicranges
7358 r-iranges
7359 r-matrix
7360 r-s4vectors
7361 r-stringr))
7362 (home-page "https://github.com/mskilab/gUtils")
7363 (synopsis "Additional capabilities and speed for GenomicRanges operations")
7364 (description
7365 "This is an R package providing additional capabilities and speed for
7366 @code{GenomicRanges} operations.")
7367 (license license:gpl2))))
7368
7369 (define-public r-bamutils
7370 (let ((commit "639dba901f16944fa1b7a8d7048701ba86a2cdb8")
7371 (revision "1"))
7372 (package
7373 (name "r-bamutils")
7374 (version (git-version "0.0.0.9000" revision commit))
7375 (source (origin
7376 (method git-fetch)
7377 (uri (git-reference
7378 (url "https://github.com/mskilab/bamutils/")
7379 (commit commit)))
7380 (file-name (git-file-name name version))
7381 (sha256
7382 (base32
7383 "0qwby2v5rydnipvf1iv1wz9nf02yq98k0xbc4inf9mqc54jwacs0"))))
7384 (properties `((upstream-name . "bamUtils")))
7385 (build-system r-build-system)
7386 (propagated-inputs
7387 (list r-abind
7388 r-biocgenerics
7389 r-data-table
7390 r-genomicalignments
7391 r-genomicranges
7392 r-gutils
7393 r-rsamtools
7394 r-variantannotation))
7395 (home-page "https://github.com/mskilab/bamutils/")
7396 (synopsis "Utility functions for manipulating BAMs")
7397 (description "This package provides utility functions for manipulating
7398 BAM files.")
7399 (license license:gpl2))))
7400
7401 (define-public r-gtrack
7402 (let ((commit "a694fa36cedafca2658da79fc8e5b673535b15e5")
7403 (revision "1"))
7404 (package
7405 (name "r-gtrack")
7406 (version (git-version "0.1.0" revision commit))
7407 (source (origin
7408 (method git-fetch)
7409 (uri (git-reference
7410 (url "https://github.com/mskilab/gTrack/")
7411 (commit commit)))
7412 (file-name (git-file-name name version))
7413 (sha256
7414 (base32
7415 "070qlrbqsbj9max2vx740zigqh0ymvnw2pm1ia5la3wb4dbfwh2b"))))
7416 (properties `((upstream-name . "gTrack")))
7417 (build-system r-build-system)
7418 (propagated-inputs
7419 (list r-biocgenerics
7420 r-data-table
7421 r-genomeinfodb
7422 r-genomicranges
7423 r-gutils
7424 r-iranges
7425 r-matrix
7426 r-rcolorbrewer
7427 r-rcpp
7428 r-rcurl
7429 r-rtracklayer
7430 r-s4vectors))
7431 (home-page "https://github.com/mskilab/gTrack/")
7432 (synopsis "Plot tracks of complex genomic data across multiple genomic windows")
7433 (description
7434 "This package provides an object for plotting GRanges, RleList, UCSC
7435 file formats, and ffTrack objects in multi-track panels.")
7436 (license license:gpl2))))
7437
7438 (define-public r-gchain
7439 (let ((commit "dc393e8dd0d8efaf36270c04d7112db8553db36a")
7440 (revision "1"))
7441 (package
7442 (name "r-gchain")
7443 (version (git-version "0.2.0" revision commit))
7444 (source (origin
7445 (method git-fetch)
7446 (uri (git-reference
7447 (url "https://github.com/mskilab/gChain/")
7448 (commit commit)))
7449 (file-name (git-file-name name version))
7450 (sha256
7451 (base32
7452 "105wgi5w2fhwq1grsvj6zjigwg0sny3z7zr577q8ki3qffjwdkj0"))))
7453 (properties `((upstream-name . "gChain")))
7454 (build-system r-build-system)
7455 (propagated-inputs
7456 (list r-bamutils
7457 r-biostrings
7458 r-data-table
7459 r-genomicalignments
7460 r-genomicranges
7461 r-gtrack
7462 r-gutils
7463 r-matrix
7464 r-rtracklayer))
7465 (home-page "https://github.com/mskilab/gChain/")
7466 (synopsis "Additional capabilities and speed for GenomicRanges operations")
7467 (description
7468 "This R package provides additional capabilities and speed for
7469 GenomicRanges operations.")
7470 (license license:gpl2))))
7471
7472 (define-public r-skitools
7473 (let ((commit "22d107d32f063eb891eb5e7fb36996d1c0b0d2bc")
7474 (revision "1"))
7475 (package
7476 (name "r-skitools")
7477 (version (git-version "0.0.0.9000" revision commit))
7478 (source (origin
7479 (method git-fetch)
7480 (uri (git-reference
7481 (url "https://github.com/mskilab/skitools/")
7482 (commit commit)))
7483 (file-name (git-file-name name version))
7484 (sha256
7485 (base32
7486 "1977d9bkdk9l2n6niahfj9vksh9l1ga4g7c3b3x27lj1gc0qgr4z"))))
7487 (properties `((upstream-name . "skitools")))
7488 (build-system r-build-system)
7489 (propagated-inputs
7490 (list r-biostrings
7491 r-complexheatmap
7492 r-data-table
7493 r-devtools
7494 r-dt
7495 r-gchain
7496 r-genomeinfodb
7497 r-genomicranges
7498 r-ggplot2
7499 r-gplots
7500 r-gutils
7501 r-htmlwidgets
7502 r-hwriter
7503 r-igraph
7504 r-iranges
7505 r-plotly
7506 r-rcolorbrewer
7507 r-reshape2
7508 r-s4vectors
7509 r-stringr
7510 r-variantannotation))
7511 (home-page "https://github.com/mskilab/skitools/")
7512 (synopsis "Various mskilab R utilties")
7513 (description
7514 "This package provides R miscellaneous utilities for basic data
7515 manipulation, debugging, visualization, lsf management, and common mskilab
7516 tasks.")
7517 (license license:expat))))
7518
7519 (define-public r-chromunity
7520 (let ((commit "09fce8bc12cb84b45a6ea25bf8db6e5b75113d4f")
7521 (revision "1"))
7522 (package
7523 (name "r-chromunity")
7524 (version (git-version "0.0.1" revision commit))
7525 (source (origin
7526 (method git-fetch)
7527 (uri (git-reference
7528 (url "https://github.com/mskilab/chromunity")
7529 (commit commit)))
7530 (file-name (git-file-name name version))
7531 (sha256
7532 (base32
7533 "0lp0h614k8fq6h9gpbylk4chh7q6w4qda8lx03ajrpppxmg7al2d"))))
7534 (properties `((upstream-name . "chromunity")))
7535 (build-system r-build-system)
7536 (propagated-inputs
7537 (list r-arrow
7538 r-biocgenerics
7539 r-data-table
7540 r-gchain
7541 r-genomicranges
7542 r-gutils
7543 r-igraph
7544 r-magrittr
7545 r-mass
7546 r-matrix
7547 r-pbmcapply
7548 r-plyr
7549 r-r6
7550 r-skitools
7551 r-zoo))
7552 (home-page "https://github.com/mskilab/chromunity")
7553 (synopsis "Discovery of communities in Pore-C concatemers")
7554 (description "This is a package for the discovery of communities in
7555 Pore-C concatemers.")
7556 (license license:gpl3))))
7557
7558 (define-public r-presto
7559 (let ((commit "052085db9c88aa70a28d11cc58ebc807999bf0ad")
7560 (revision "0"))
7561 (package
7562 (name "r-presto")
7563 (version (git-version "1.0.0" revision commit))
7564 (source
7565 (origin
7566 (method git-fetch)
7567 (uri (git-reference
7568 (url "https://github.com/immunogenomics/presto")
7569 (commit commit)))
7570 (file-name (git-file-name name version))
7571 (sha256
7572 (base32 "1c3fmag4r4p2lvbvxlxyck9dvfw1prbwcl9665mmlx4a35750hk8"))))
7573 (properties `((upstream . "presto")))
7574 (build-system r-build-system)
7575 (propagated-inputs
7576 (list r-data-table
7577 r-deseq2
7578 r-dplyr
7579 r-matrix
7580 r-rcpp
7581 r-rcpparmadillo
7582 r-reshape2
7583 r-rlang
7584 r-tidyr))
7585 (home-page "https://github.com/immunogenomics/presto")
7586 (synopsis "Fast Functions for Differential Expression using Wilcox and AUC")
7587 (description "This package performs a fast Wilcoxon rank sum test and
7588 auROC analysis.")
7589 (license license:gpl3))))
7590
7591 (define-public r-sccustomize
7592 (let ((commit "8414d1f5fb32277855b0619191a568932b7baeb0")
7593 (revision "1"))
7594 (package
7595 (name "r-sccustomize")
7596 (version (git-version "0.7.0" revision commit))
7597 (source (origin
7598 (method git-fetch)
7599 (uri (git-reference
7600 (url "https://github.com/samuel-marsh/scCustomize")
7601 (commit commit)))
7602 (file-name (git-file-name name version))
7603 (sha256
7604 (base32
7605 "1wcgfq7lx83a2kf8pjbw524gdvxf351n08cwd5wzmmy57kf4knbj"))))
7606 (properties `((upstream-name . "scCustomize")))
7607 (build-system r-build-system)
7608 (propagated-inputs
7609 (list r-circlize
7610 r-colorway
7611 r-cowplot
7612 r-data-table
7613 r-dittoseq
7614 r-dplyr
7615 r-forcats
7616 r-ggbeeswarm
7617 r-ggplot2
7618 r-ggprism
7619 r-ggpubr
7620 r-ggrastr
7621 r-ggrepel
7622 r-glue
7623 r-janitor
7624 r-magrittr
7625 r-matrix
7626 r-paletteer
7627 r-patchwork
7628 r-pbapply
7629 r-purrr
7630 r-remotes
7631 r-scales
7632 r-scattermore
7633 r-seurat
7634 r-seuratobject
7635 r-stringi
7636 r-stringr
7637 r-tibble
7638 r-tidyr
7639 r-tidyselect
7640 r-viridis))
7641 (native-inputs (list r-knitr))
7642 (home-page "https://github.com/samuel-marsh/scCustomize")
7643 (synopsis "Custom visualization and analyses of single-cell sequencing")
7644 (description
7645 "This is a collection of functions created and/or curated to aid in the
7646 visualization and analysis of single-cell data using R.")
7647 (license license:gpl3+))))
7648
7649 (define-public r-snapatac
7650 (package
7651 (name "r-snapatac")
7652 (version "2.0")
7653 (source
7654 (origin
7655 (method git-fetch)
7656 (uri (git-reference
7657 (url "https://github.com/r3fang/SnapATAC")
7658 (commit (string-append "v" version))))
7659 (file-name (git-file-name name version))
7660 (sha256
7661 (base32 "037jzlbl436fi7lkpq7d83i2vd1crnrik3vac2x6xj75dbikb2av"))))
7662 (properties `((upstream-name . "SnapATAC")))
7663 (build-system r-build-system)
7664 (propagated-inputs
7665 (list r-bigmemory
7666 r-doparallel
7667 r-dosnow
7668 r-edger
7669 r-foreach
7670 r-genomicranges
7671 r-igraph
7672 r-iranges
7673 r-irlba
7674 r-matrix
7675 r-plyr
7676 r-plot3d
7677 r-rann
7678 r-raster
7679 r-rcolorbrewer
7680 r-rhdf5
7681 r-rtsne
7682 r-scales
7683 r-viridis))
7684 (home-page "https://github.com/r3fang/SnapATAC")
7685 (synopsis "Single nucleus analysis package for ATAC-Seq")
7686 (description
7687 "This package provides a fast and accurate analysis toolkit for single
7688 cell ATAC-seq (Assay for transposase-accessible chromatin using sequencing).
7689 Single cell ATAC-seq can resolve the heterogeneity of a complex tissue and
7690 reveal cell-type specific regulatory landscapes. However, the exceeding data
7691 sparsity has posed unique challenges for the data analysis. This package
7692 @code{r-snapatac} is an end-to-end bioinformatics pipeline for analyzing large-
7693 scale single cell ATAC-seq data which includes quality control, normalization,
7694 clustering analysis, differential analysis, motif inference and exploration of
7695 single cell ATAC-seq sequencing data.")
7696 (license license:gpl3)))
7697
7698 (define-public r-umi4cpackage
7699 (let ((commit "88b07d896a137418ba6c31c2474b9dbe1d86fc20")
7700 (revision "1"))
7701 (package
7702 (name "r-umi4cpackage")
7703 (version (git-version "0.0.1" revision commit))
7704 (source
7705 (origin
7706 (method git-fetch)
7707 (uri (git-reference
7708 (url "https://github.com/tanaylab/umi4cpackage")
7709 (commit commit)))
7710 (file-name (git-file-name name version))
7711 (sha256
7712 (base32 "0bjzamdw2lcfhlbzc0vdva87c3wwnij8jsvnrpx4wyyxvpcz13m5"))))
7713 (properties `((upstream-name . "umi4cPackage")))
7714 (build-system r-build-system)
7715 (arguments
7716 `(#:phases
7717 (modify-phases %standard-phases
7718 (add-after 'unpack 'fix-references
7719 (lambda _
7720 (substitute* "inst/conf/paths.conf"
7721 (("TG3C\\.bowtie2_bin=.*")
7722 (string-append "TG3C.bowtie2_bin="
7723 (which "bowtie2") "\n")))
7724 (substitute* "inst/perl/map3c/TG3C/import3C.pl"
7725 (("\"perl")
7726 (string-append "\"" (which "perl")))))))))
7727 (inputs
7728 (list perl bowtie))
7729 (propagated-inputs
7730 (list r-misha r-zoo))
7731 (native-inputs (list r-knitr))
7732 (home-page "https://github.com/tanaylab/umi4cpackage")
7733 (synopsis "Processing and analysis of UMI-4C contact profiles")
7734 (description "This is a package that lets you process UMI-4C data from
7735 scratch to produce nice plots.")
7736 (license license:expat))))
7737
7738 (define-public r-shinycell
7739 (let ((commit
7740 "aecbd56e66802f28e397f5ae1f19403aadd12163")
7741 (revision "1"))
7742 (package
7743 (name "r-shinycell")
7744 (version (git-version "2.0.0" revision commit))
7745 (source
7746 (origin
7747 (method git-fetch)
7748 (uri (git-reference
7749 (url "https://github.com/SGDDNB/ShinyCell")
7750 (commit commit)))
7751 (file-name (git-file-name name version))
7752 (sha256
7753 (base32
7754 "13jn2ikmvljnzayk485g1mmq5abcp9m1b8n1djdb1agmn83zaki5"))))
7755 (properties `((upstream-name . "ShinyCell")))
7756 (build-system r-build-system)
7757 (propagated-inputs
7758 (list r-data-table
7759 r-ggplot2
7760 r-glue
7761 r-gridextra
7762 r-hdf5r
7763 r-matrix
7764 r-r-utils
7765 r-rcolorbrewer
7766 r-readr
7767 r-reticulate))
7768 (home-page "https://github.com/SGDDNB/ShinyCell")
7769 (synopsis "Shiny interactive web apps for single-cell data")
7770 (description
7771 "This package provides Shiny apps for interactive exploration of
7772 single-cell data.")
7773 (license license:gpl3))))
7774
7775 (define-public r-archr
7776 (let ((commit "92ab814f86be0cea75c661f9827a9549c2cf47f5")
7777 (revision "1"))
7778 (package
7779 (name "r-archr")
7780 (version (git-version "1.0.1" revision commit))
7781 (source
7782 (origin
7783 (method git-fetch)
7784 (uri (git-reference
7785 (url "https://github.com/GreenleafLab/ArchR")
7786 (commit commit)))
7787 (file-name (git-file-name name version))
7788 (sha256
7789 (base32 "1m1vp3kkpvd0fcviv5vb3gcbm3w91ih6gm9ivg48swnbqny44kqb"))))
7790 (properties `((upstream-name . "ArchR")))
7791 (build-system r-build-system)
7792 (propagated-inputs
7793 (list r-biocgenerics
7794 r-biostrings
7795 r-chromvar
7796 r-complexheatmap
7797 r-data-table
7798 r-genomicranges
7799 r-ggplot2
7800 r-ggrepel
7801 r-gridextra
7802 r-gtable
7803 r-gtools
7804 r-magrittr
7805 r-matrix
7806 r-matrixstats
7807 r-motifmatchr
7808 r-nabor
7809 r-plyr
7810 r-rcpp
7811 r-rhdf5
7812 r-rsamtools
7813 r-s4vectors
7814 r-stringr
7815 r-summarizedexperiment
7816 r-uwot))
7817 (home-page "https://github.com/GreenleafLab/ArchR")
7818 (synopsis "Analyze single-cell regulatory chromatin in R")
7819 (description
7820 "This package is designed to streamline scATAC analyses in R.")
7821 (license license:gpl2+))))
7822
7823 (define-public r-icellnet
7824 ;; v1.0 tagged in 2020, last commit contains many fixes.
7825 ;; DESCRIPTION says Version: 0.0.0.9000.
7826 (let ((commit "b9c05488fb8b5ea69bd560018966eaf4e25f82a")
7827 (revision "0"))
7828 (package
7829 (name "r-icellnet")
7830 (version (git-version "1.0" revision commit))
7831 (source (origin
7832 (method git-fetch)
7833 (uri (git-reference
7834 (url "https://github.com/soumelis-lab/ICELLNET")
7835 (commit commit)))
7836 (file-name (git-file-name name version))
7837 (sha256
7838 (base32
7839 "0cld7d6xqnvd0zpcpg3sx73an6vdc9divzywgnn6zxnqcd987cnw"))))
7840 (build-system r-build-system)
7841 (arguments
7842 `(#:phases
7843 (modify-phases %standard-phases
7844 (add-after 'unpack 'enter-dir
7845 (lambda _ (chdir "icellnet"))))))
7846 (propagated-inputs
7847 (list r-annotationdbi
7848 r-data-table
7849 r-dplyr
7850 r-ggplot2
7851 r-hgu133plus2-db
7852 r-jetset
7853 r-psych
7854 r-reshape2
7855 r-rlist))
7856 (home-page "https://github.com/soumelis-lab/ICELLNET")
7857 (synopsis "Transcriptomic-based framework to dissect cell communication")
7858 (description "This package provides a a transcriptomic-based framework
7859 to dissect cell communication in a global manner. It integrates an original
7860 expert-curated database of ligand-receptor interactions taking into account
7861 multiple subunits expression. Based on transcriptomic profiles (gene
7862 expression), this package allows to compute communication scores between cells
7863 and provides several visualization modes that can be helpful to dig into
7864 cell-cell interaction mechanism and extend biological knowledge.")
7865 (license license:gpl3))))
7866
7867 (define-public r-scde
7868 (package
7869 (name "r-scde")
7870 (version "1.99.2")
7871 (source (origin
7872 (method git-fetch)
7873 (uri (git-reference
7874 (url "https://github.com/hms-dbmi/scde")
7875 (commit version)))
7876 (file-name (git-file-name name version))
7877 (sha256
7878 (base32
7879 "10na2gyka24mszdxf92wz9h2c13hdf1ww30c68gfsw53lvvhhhxb"))))
7880 (build-system r-build-system)
7881 (propagated-inputs
7882 (list r-rcpp
7883 r-rcpparmadillo
7884 r-mgcv
7885 r-rook
7886 r-rjson
7887 r-cairo
7888 r-rcolorbrewer
7889 r-edger
7890 r-quantreg
7891 r-nnet
7892 r-rmtstat
7893 r-extremes
7894 r-pcamethods
7895 r-biocparallel
7896 r-flexmix))
7897 (home-page "https://hms-dbmi.github.io/scde/")
7898 (synopsis "R package for analyzing single-cell RNA-seq data")
7899 (description "The SCDE package implements a set of statistical methods for
7900 analyzing single-cell RNA-seq data. SCDE fits individual error models for
7901 single-cell RNA-seq measurements. These models can then be used for
7902 assessment of differential expression between groups of cells, as well as
7903 other types of analysis. The SCDE package also contains the pagoda framework
7904 which applies pathway and gene set overdispersion analysis to identify aspects
7905 of transcriptional heterogeneity among single cells.")
7906 ;; See https://github.com/hms-dbmi/scde/issues/38
7907 (license license:gpl2)))
7908
7909 (define-public r-millefy
7910 (package
7911 (name "r-millefy")
7912 (version "0.1.9-beta")
7913 (source
7914 (origin
7915 (method git-fetch)
7916 (uri (git-reference
7917 (url "https://github.com/yuifu/millefy")
7918 (commit (string-append "v" version))))
7919 (file-name (git-file-name name version))
7920 (sha256
7921 (base32
7922 "0z2y0x99f761pxvg6n37cmnyrnj699jhjk43pvk05sa86iykgizl"))))
7923 (properties `((upstream-name . "millefy")))
7924 (build-system r-build-system)
7925 (propagated-inputs
7926 (list r-data-table
7927 r-destiny
7928 r-dplyr
7929 r-genomicranges
7930 r-iranges
7931 r-magrittr
7932 r-rsamtools
7933 r-rtracklayer
7934 r-tidyr))
7935 (home-page "https://github.com/yuifu/millefy")
7936 (synopsis "Make millefy plot with single-cell RNA-seq data")
7937 (description "@code{Millefy} is a tool for visualizing read coverage of
7938 @dfn{scRNA-seq}(single-cell RNA sequencing) datasets in genomic contexts. By
7939 dynamically and automatically reorder single cells based on locus-specific
7940 pseudo time, @code{Millefy} highlights cell-to-cell heterogeneity in read coverage
7941 of scRNA-seq data.")
7942 (license license:expat)))
7943
7944 (define-public r-misha
7945 (package
7946 (name "r-misha")
7947 (version "4.1.0")
7948 (source
7949 (origin
7950 (method git-fetch)
7951 (uri (git-reference
7952 (url "https://github.com/tanaylab/misha")
7953 (commit version)))
7954 (file-name (git-file-name name version))
7955 (sha256
7956 (base32
7957 "0bgivx3lzjh3173jsfrhb5kvhjsn53br0n4hmyx7i3dwy2cnnp2p"))
7958 ;; Delete bundled executable.
7959 (snippet
7960 '(delete-file "exec/bigWigToWig"))))
7961 (build-system r-build-system)
7962 (arguments
7963 `(#:phases
7964 (modify-phases %standard-phases
7965 (add-after 'unpack 'do-not-use-bundled-bigWigToWig
7966 (lambda* (#:key inputs #:allow-other-keys)
7967 (substitute* "R/misha.R"
7968 (("get\\(\".GLIBDIR\"\\), \"/exec/bigWigToWig")
7969 (string-append "\""
7970 (assoc-ref inputs "kentutils")
7971 "/bin/bigWigToWig"))))))))
7972 (inputs
7973 (list kentutils))
7974 (home-page "https://github.com/tanaylab/misha")
7975 (synopsis "Toolkit for analysis of genomic data")
7976 (description "This package is intended to help users to efficiently
7977 analyze genomic data resulting from various experiments.")
7978 (license license:gpl2)))
7979
7980 (define-public r-scseqcomm
7981 (let ((commit "01076e703999f1a5aa76419d821b50aebe2b777a")
7982 (revision "0"))
7983 (package
7984 (name "r-scseqcomm")
7985 (version (git-version "0" revision commit))
7986 (source
7987 (origin
7988 (method git-fetch)
7989 (uri (git-reference
7990 (url "https://gitlab.com/sysbiobig/scseqcomm")
7991 (commit commit)))
7992 (file-name (git-file-name name version))
7993 (sha256
7994 (base32
7995 "1fw5si47d6agnz5fibmp2b1sv08pbpwv1j71w57xbav9044i032q"))
7996 ;; Delete bundled dependency.
7997 (modules '((guix build utils)))
7998 (snippet
7999 '(delete-file-recursively "other_deps"))))
8000 (build-system r-build-system)
8001 (inputs
8002 (list r-add2ggplot
8003 r-chorddiag
8004 r-doparallel
8005 r-dplyr
8006 r-foreach
8007 ;;r-grid ;; listed in DESCRIPTION
8008 r-gridextra
8009 r-ggplot2
8010 r-gtable
8011 r-htmlwidgets
8012 r-igraph
8013 r-matrix
8014 ;;r-methods ;; listed in DESCRIPTION
8015 r-org-hs-eg-db
8016 r-psych
8017 r-rcolorbrewer
8018 r-rlang
8019 r-scico
8020 r-tidygraph
8021 r-topgo))
8022 (native-inputs
8023 (list r-knitr))
8024 (home-page "https://gitlab.com/sysbiobig/scseqcomm")
8025 (synopsis "Inter- and intra- cellular signaling from single cell RNA-seq")
8026 (description "This package is tools for analysing intercellular and
8027 intracellular signaling from single cell RNA-seq (scRNA-seq) data.")
8028 (license license:gpl3))))
8029
8030 (define-public r-shaman
8031 (let ((commit "d6944e8ac7bd1dbd5c6cec646eafc1d19d0ca96f")
8032 (release "2.0")
8033 (revision "2"))
8034 (package
8035 (name "r-shaman")
8036 (version (git-version release revision commit))
8037 (source (origin
8038 (method git-fetch)
8039 (uri (git-reference
8040 (url "https://github.com/tanaylab/shaman")
8041 (commit commit)))
8042 (file-name (git-file-name name version))
8043 (sha256
8044 (base32
8045 "03sx138dzpfiq23j49z0m0s4j79855mrg64hpj9c83408wzphxi6"))
8046 (snippet
8047 ;; This file will be generated.
8048 '(delete-file "inst/doc/shaman-package.R"))))
8049 (build-system r-build-system)
8050 (propagated-inputs
8051 (list r-data-table
8052 r-domc
8053 r-ggplot2
8054 r-gviz
8055 r-misha
8056 r-plyr
8057 r-rann
8058 r-rcpp
8059 r-reshape2
8060 ;; For vignettes
8061 r-rmarkdown
8062 r-knitr))
8063 (home-page "https://github.com/tanaylab/shaman")
8064 (synopsis "Sampling HiC contact matrices for a-parametric normalization")
8065 (description "The Shaman package implements functions for
8066 resampling Hi-C matrices in order to generate expected contact
8067 distributions given constraints on marginal coverage and
8068 contact-distance probability distributions. The package also provides
8069 support for visualizing normalized matrices and statistical analysis
8070 of contact distributions around selected landmarks.")
8071 ;; Any version of the GPL
8072 (license license:gpl3+))))
8073
8074 (define-public r-centipede
8075 (package
8076 (name "r-centipede")
8077 (version "1.2")
8078 (source (origin
8079 (method url-fetch)
8080 (uri (string-append "http://download.r-forge.r-project.org/"
8081 "src/contrib/CENTIPEDE_" version ".tar.gz"))
8082 (sha256
8083 (base32
8084 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
8085 (build-system r-build-system)
8086 (home-page "http://centipede.uchicago.edu/")
8087 (synopsis "Predict transcription factor binding sites")
8088 (description
8089 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
8090 of the genome that are bound by particular transcription factors. It starts
8091 by identifying a set of candidate binding sites, and then aims to classify the
8092 sites according to whether each site is bound or not bound by a transcription
8093 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
8094 between two different types of motif instances using as much relevant
8095 information as possible.")
8096 (license (list license:gpl2+ license:gpl3+))))
8097
8098 (define-public r-demultiplex
8099 (let ((commit "6e2a1422c8e6f418cfb271997eebc91f9195f299")
8100 (revision "1"))
8101 (package
8102 (name "r-demultiplex")
8103 (version (git-version "1.0.2" revision commit))
8104 (source
8105 (origin
8106 (method git-fetch)
8107 (uri (git-reference
8108 (url "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
8109 (commit commit)))
8110 (file-name (git-file-name name version))
8111 (sha256
8112 (base32
8113 "01kv88wp8vdaq07sjk0d3d1cb553mq1xqg0war81pgmg63bgi38w"))))
8114 (properties `((upstream-name . "deMULTIplex")))
8115 (build-system r-build-system)
8116 (propagated-inputs
8117 (list r-kernsmooth r-reshape2 r-rtsne r-shortread r-stringdist))
8118 (home-page "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
8119 (synopsis "MULTI-seq pre-processing and classification tools")
8120 (description
8121 "deMULTIplex is an R package for analyzing single-cell RNA sequencing
8122 data generated with the MULTI-seq sample multiplexing method. The package
8123 includes software to
8124
8125 @enumerate
8126 @item Convert raw MULTI-seq sample barcode library FASTQs into a sample
8127 barcode UMI count matrix, and
8128 @item Classify cell barcodes into sample barcode groups.
8129 @end enumerate
8130 ")
8131 (license license:cc0))))
8132
8133 (define-public gdc-client
8134 (package
8135 (name "gdc-client")
8136 (version "1.6.0")
8137 (source
8138 (origin
8139 (method git-fetch)
8140 (uri (git-reference
8141 (url "https://github.com/NCI-GDC/gdc-client.git")
8142 (commit version)))
8143 (file-name (git-file-name name version))
8144 (sha256
8145 (base32
8146 "0cagawlzjwj3wam10lv64xgbfx4zcnzxi5sjpsdhq7rn4z24mzc2"))))
8147 (build-system python-build-system)
8148 (arguments
8149 `(#:phases
8150 (modify-phases %standard-phases
8151 (add-after 'unpack 'relax-requirements
8152 (lambda _
8153 (substitute* "requirements.txt"
8154 (("==") ">=")))))))
8155 (inputs
8156 (list python-cryptography
8157 python-intervaltree
8158 python-jsonschema
8159 python-lxml
8160 python-ndg-httpsclient
8161 python-progressbar2
8162 python-pyasn1
8163 python-pyopenssl
8164 python-pyyaml
8165 python-requests
8166 python-termcolor))
8167 (home-page "https://gdc.nci.nih.gov/access-data/gdc-data-transfer-tool")
8168 (synopsis "GDC data transfer tool")
8169 (description "The gdc-client provides several convenience functions over
8170 the GDC API which provides general download/upload via HTTPS.")
8171 (license license:asl2.0)))
8172
8173 (define-public vsearch
8174 (package
8175 (name "vsearch")
8176 (version "2.9.1")
8177 (source
8178 (origin
8179 (method git-fetch)
8180 (uri (git-reference
8181 (url "https://github.com/torognes/vsearch")
8182 (commit (string-append "v" version))))
8183 (file-name (git-file-name name version))
8184 (sha256
8185 (base32
8186 "0vhrpjfdf75ba04b24xknp41790cvcgwl0vgpy7qbzj5xh2521ss"))
8187 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
8188 (snippet
8189 '(begin
8190 ;; Remove bundled cityhash sources. The vsearch source is adjusted
8191 ;; for this in the patch.
8192 (delete-file "src/city.h")
8193 (delete-file "src/citycrc.h")
8194 (delete-file "src/city.cc")
8195 #t))))
8196 (build-system gnu-build-system)
8197 (inputs
8198 (list zlib bzip2 cityhash))
8199 (native-inputs
8200 (list autoconf automake))
8201 (synopsis "Sequence search tools for metagenomics")
8202 (description
8203 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
8204 dereplication, pairwise alignment, shuffling, subsampling, sorting and
8205 masking. The tool takes advantage of parallelism in the form of SIMD
8206 vectorization as well as multiple threads to perform accurate alignments at
8207 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
8208 Needleman-Wunsch).")
8209 (home-page "https://github.com/torognes/vsearch")
8210 ;; vsearch uses non-portable SSE intrinsics so building fails on other
8211 ;; platforms.
8212 (supported-systems '("x86_64-linux"))
8213 ;; Dual licensed; also includes public domain source.
8214 (license (list license:gpl3 license:bsd-2))))
8215
8216 (define-public pardre
8217 (package
8218 (name "pardre")
8219 (version "2.2.5")
8220 (source
8221 (origin
8222 (method url-fetch)
8223 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
8224 version ".tar.gz"))
8225 (sha256
8226 (base32
8227 "105s4f8zs8hh0sc32r9p725n7idza9cj5jvp5z1m5pljjhgk3if5"))))
8228 (build-system gnu-build-system)
8229 (arguments
8230 `(#:tests? #f ; tests require "prove"
8231 #:phases
8232 (modify-phases %standard-phases
8233 (delete 'configure)
8234 (replace 'install
8235 (lambda* (#:key outputs #:allow-other-keys)
8236 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
8237 (install-file "ParDRe" bin)))))))
8238 (inputs
8239 (list openmpi-c++ zlib))
8240 (synopsis "Parallel tool to remove duplicate DNA reads")
8241 (description
8242 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
8243 Duplicate reads can be seen as identical or nearly identical sequences with
8244 some mismatches. This tool lets users avoid the analysis of unnecessary
8245 reads, reducing the time of subsequent procedures with the
8246 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
8247 in order to exploit the parallel capabilities of multicore clusters. It is
8248 faster than multithreaded counterparts (end of 2015) for the same number of
8249 cores and, thanks to the message-passing technology, it can be executed on
8250 clusters.")
8251 (home-page "https://sourceforge.net/projects/pardre/")
8252 (license license:gpl3+)))
8253
8254 (define-public ngshmmalign
8255 (package
8256 (name "ngshmmalign")
8257 (version "0.1.1")
8258 (source
8259 (origin
8260 (method url-fetch)
8261 (uri (string-append "https://github.com/cbg-ethz/ngshmmalign/"
8262 "releases/download/" version
8263 "/ngshmmalign-" version ".tar.bz2"))
8264 (sha256
8265 (base32
8266 "0jryvlssi2r2ii1dxnx39yk6bh4yqgq010fnxrgfgbaj3ykijlzv"))))
8267 (build-system cmake-build-system)
8268 (arguments '(#:tests? #false)) ; there are none
8269 (inputs
8270 (list boost))
8271 (home-page "https://github.com/cbg-ethz/ngshmmalign/")
8272 (synopsis "Profile HMM aligner for NGS reads")
8273 (description
8274 "ngshmmalign is a profile HMM aligner for NGS reads designed particularly
8275 for small genomes (such as those of RNA viruses like HIV-1 and HCV) that
8276 experience substantial biological insertions and deletions.")
8277 (license license:gpl2+)))
8278
8279 (define-public prinseq
8280 (package
8281 (name "prinseq")
8282 (version "0.20.4")
8283 (source
8284 (origin
8285 (method url-fetch)
8286 (uri (string-append "mirror://sourceforge/prinseq/standalone/"
8287 "prinseq-lite-" version ".tar.gz"))
8288 (sha256
8289 (base32
8290 "0vxmzvmm67whxrqdaaamwgjk7cf0fzfs5s673jgg00kz7g70splv"))))
8291 (build-system gnu-build-system)
8292 (arguments
8293 `(#:tests? #false ; no check target
8294 #:phases
8295 (modify-phases %standard-phases
8296 (delete 'configure)
8297 (delete 'build)
8298 (replace 'install
8299 (lambda* (#:key inputs outputs #:allow-other-keys)
8300 (let* ((out (assoc-ref outputs "out"))
8301 (bin (string-append out "/bin"))
8302 (scripts (find-files "." "prinseq.*.pl"))
8303 (guile (search-input-file inputs "bin/guile")))
8304 (substitute* scripts
8305 (("\"perl -pe")
8306 (string-append "\"" (which "perl") " -pe")))
8307 (for-each (lambda (file)
8308 (chmod file #o555)
8309 (install-file file bin)
8310 (wrap-script (string-append bin "/" (basename file))
8311 #:guile guile
8312 `("PERL5LIB" ":" prefix
8313 (,(getenv "PERL5LIB")))))
8314 scripts)))))))
8315 (inputs
8316 (list guile-3.0 ; for wrapper scripts
8317 perl
8318 perl-cairo
8319 perl-data-dumper
8320 perl-digest-md5
8321 perl-getopt-long
8322 perl-json
8323 perl-statistics-pca))
8324 (home-page "http://prinseq.sourceforge.net/")
8325 (synopsis "Preprocess sequence data in FASTA or FASTQ formats")
8326 (description
8327 "PRINSEQ is a bioinformatics tool to help you preprocess your genomic or
8328 metagenomic sequence data in FASTA or FASTQ formats. The tool is written in
8329 Perl and can be helpful if you want to filter, reformat, or trim your sequence
8330 data. It also generates basic statistics for your sequences.")
8331 (license license:gpl3+)))
8332
8333 (define-public shorah
8334 (package
8335 (name "shorah")
8336 (version "1.99.2")
8337 (source
8338 (origin
8339 (method url-fetch)
8340 (uri (string-append "https://github.com/cbg-ethz/shorah"
8341 "/releases/download/v" version
8342 "/shorah-" version ".tar.xz"))
8343 (sha256
8344 (base32
8345 "158dir9qcqspknlnyfr9zwk41x48nrh5wcg10k2grh9cidp9daiq"))))
8346 (build-system gnu-build-system)
8347 (arguments
8348 `(#:phases
8349 (modify-phases %standard-phases
8350 (add-after 'unpack 'fix-test-wrapper
8351 (lambda* (#:key outputs #:allow-other-keys)
8352 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
8353 (substitute* "examples/run_end2end_test"
8354 (("\\$\\{interpreter\\} ../\\$\\{testscript\\}")
8355 (string-append bin "/${testscript}"))))))
8356 (delete 'check)
8357 (add-after 'install 'wrap-programs
8358 (lambda* (#:key outputs #:allow-other-keys)
8359 (let* ((out (assoc-ref outputs "out"))
8360 (site (string-append
8361 out "/lib/python"
8362 ,(version-major+minor
8363 (package-version python))
8364 "/site-packages"))
8365 (pythonpath (getenv "GUIX_PYTHONPATH"))
8366 (script (string-append out "/bin/shorah")))
8367 (chmod script #o555)
8368 (wrap-program script `("GUIX_PYTHONPATH" ":" prefix (,site ,pythonpath))))))
8369 (add-after 'wrap-programs 'check
8370 (lambda* (#:key tests? #:allow-other-keys)
8371 (when tests?
8372 (invoke "make" "check")))))))
8373 (inputs
8374 (list boost
8375 htslib
8376 python
8377 python-biopython
8378 python-numpy
8379 zlib))
8380 (native-inputs
8381 (list pkg-config))
8382 (home-page "")
8383 (synopsis "Short reads assembly into haplotypes")
8384 (description
8385 "ShoRAH is a project for the analysis of next generation sequencing data.
8386 It is designed to analyse genetically heterogeneous samples. Its tools
8387 provide error correction, haplotype reconstruction and estimation of the
8388 frequency of the different genetic variants present in a mixed sample.")
8389 (license license:gpl3+)))
8390
8391 (define-public ruby-bio-kseq
8392 (package
8393 (name "ruby-bio-kseq")
8394 (version "0.0.2")
8395 (source
8396 (origin
8397 (method url-fetch)
8398 (uri (rubygems-uri "bio-kseq" version))
8399 (sha256
8400 (base32
8401 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
8402 (build-system ruby-build-system)
8403 (arguments
8404 `(#:test-target "spec"))
8405 (native-inputs
8406 (list bundler ruby-rspec ruby-rake-compiler))
8407 (inputs
8408 (list zlib))
8409 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
8410 (description
8411 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
8412 FASTQ parsing code. It provides a fast iterator over sequences and their
8413 quality scores.")
8414 (home-page "https://github.com/gusevfe/bio-kseq")
8415 (license license:expat)))
8416
8417 (define-public bio-locus
8418 (package
8419 (name "bio-locus")
8420 (version "0.0.7")
8421 (source
8422 (origin
8423 (method url-fetch)
8424 (uri (rubygems-uri "bio-locus" version))
8425 (sha256
8426 (base32
8427 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
8428 (build-system ruby-build-system)
8429 (native-inputs
8430 (list ruby-rspec))
8431 (synopsis "Tool for fast querying of genome locations")
8432 (description
8433 "Bio-locus is a tabix-like tool for fast querying of genome
8434 locations. Many file formats in bioinformatics contain records that
8435 start with a chromosome name and a position for a SNP, or a start-end
8436 position for indels. Bio-locus allows users to store this chr+pos or
8437 chr+pos+alt information in a database.")
8438 (home-page "https://github.com/pjotrp/bio-locus")
8439 (license license:expat)))
8440
8441 (define-public bio-blastxmlparser
8442 (package
8443 (name "bio-blastxmlparser")
8444 (version "2.0.4")
8445 (source (origin
8446 (method url-fetch)
8447 (uri (rubygems-uri "bio-blastxmlparser" version))
8448 (sha256
8449 (base32
8450 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
8451 (build-system ruby-build-system)
8452 (propagated-inputs
8453 (list ruby-bio-logger ruby-nokogiri))
8454 (inputs
8455 (list ruby-rspec))
8456 (synopsis "Fast big data BLAST XML parser and library")
8457 (description
8458 "Very fast parallel big-data BLAST XML file parser which can be used as
8459 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
8460 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
8461 (home-page "https://github.com/pjotrp/blastxmlparser")
8462 (license license:expat)))
8463
8464 (define-public bioruby
8465 (package
8466 (name "bioruby")
8467 (version "1.5.2")
8468 (source
8469 (origin
8470 (method url-fetch)
8471 (uri (rubygems-uri "bio" version))
8472 (sha256
8473 (base32
8474 "1d56amdsjv1mag7m6gv2w0xij8hqx1v5xbdjsix8sp3yp36m7938"))))
8475 (build-system ruby-build-system)
8476 (propagated-inputs
8477 (list ruby-libxml))
8478 (native-inputs
8479 (list which)) ; required for test phase
8480 (arguments
8481 `(#:phases
8482 (modify-phases %standard-phases
8483 (add-before 'build 'patch-test-command
8484 (lambda _
8485 (substitute* '("test/functional/bio/test_command.rb")
8486 (("/bin/sh") (which "sh")))
8487 (substitute* '("test/functional/bio/test_command.rb")
8488 (("/bin/ls") (which "ls")))
8489 (substitute* '("test/functional/bio/test_command.rb")
8490 (("which") (which "which")))
8491 (substitute* '("test/functional/bio/test_command.rb",
8492 "test/data/command/echoarg2.sh")
8493 (("/bin/echo") (which "echo")))
8494 #t)))))
8495 (synopsis "Ruby library, shell and utilities for bioinformatics")
8496 (description "BioRuby comes with a comprehensive set of Ruby development
8497 tools and libraries for bioinformatics and molecular biology. BioRuby has
8498 components for sequence analysis, pathway analysis, protein modelling and
8499 phylogenetic analysis; it supports many widely used data formats and provides
8500 easy access to databases, external programs and public web services, including
8501 BLAST, KEGG, GenBank, MEDLINE and GO.")
8502 (home-page "http://bioruby.org/")
8503 ;; Code is released under Ruby license, except for setup
8504 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
8505 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
8506
8507 (define-public centrifuge
8508 (package
8509 (name "centrifuge")
8510 (version "1.0.4")
8511 (source
8512 (origin
8513 (method git-fetch)
8514 (uri (git-reference
8515 (url "https://github.com/DaehwanKimLab/centrifuge.git")
8516 (commit (string-append "v" version))))
8517 (file-name (git-file-name name version))
8518 (sha256
8519 (base32
8520 "167610gbz1rrh6ir3j7jcmhzg3x5msn7x7a3dpv7wmwdndnnqvg0"))))
8521 (build-system gnu-build-system)
8522 (arguments
8523 (list
8524 #:tests? #false ; no check target
8525 #:make-flags
8526 #~(list (string-append "prefix=" #$output))
8527 #:phases
8528 '(modify-phases %standard-phases
8529 (delete 'configure))))
8530 (inputs (list python-wrapper))
8531 (native-inputs
8532 (list pandoc perl ;for documentation
8533 which))
8534 (home-page "https://github.com/DaehwanKimLab/centrifuge/")
8535 (synopsis "Classifier for metagenomic sequences")
8536 (description "Centrifuge is a microbial classification engine that enables
8537 rapid, accurate and sensitive labeling of reads and quantification of species
8538 on desktop computers. The system uses an indexing scheme based on the
8539 @dfn{Burrows-Wheeler transform} (BWT) and the @dfn{Ferragina-Manzini} (FM)
8540 index, optimized specifically for the metagenomic classification problem.
8541 Centrifuge requires a relatively small index (4.7 GB for all complete
8542 bacterial and viral genomes plus the human genome) and classifies sequences at
8543 very high speed, allowing it to process the millions of reads from a typical
8544 high-throughput DNA sequencing run within a few minutes.")
8545 (license license:gpl3+)))
8546
8547 (define-public bio-vcf
8548 (package
8549 (name "bio-vcf")
8550 (version "0.9.5")
8551 (source
8552 (origin
8553 (method url-fetch)
8554 (uri (rubygems-uri "bio-vcf" version))
8555 (sha256
8556 (base32
8557 "1glw5pn9s8z13spxk6yyfqaz80n9lga67f33w35nkpq9dwi2vg6g"))))
8558 (build-system ruby-build-system)
8559 (native-inputs
8560 (list ruby-cucumber))
8561 (synopsis "Smart VCF parser DSL")
8562 (description
8563 "Bio-vcf provides a @acronym{DSL, domain specific language} for processing
8564 the VCF format. Record named fields can be queried with regular expressions.
8565 Bio-vcf is a new generation VCF parser, filter and converter. Bio-vcf is not
8566 only very fast for genome-wide (WGS) data, it also comes with a filtering,
8567 evaluation and rewrite language and can output any type of textual data,
8568 including VCF header and contents in RDF and JSON.")
8569 (home-page "https://github.com/vcflib/bio-vcf")
8570 (license license:expat)))
8571
8572 (define-public r-phantompeakqualtools
8573 (let ((commit "8d2b2d18c686d894ef5908b37da7adf72a07ef42")
8574 (revision "1"))
8575 (package
8576 (name "r-phantompeakqualtools")
8577 (version (git-version "1.2.2" revision commit))
8578 (source
8579 (origin
8580 (method git-fetch)
8581 (uri (git-reference
8582 (url "https://github.com/kundajelab/phantompeakqualtools")
8583 (commit commit)))
8584 (file-name (git-file-name name version))
8585 (sha256
8586 (base32
8587 "00anrvcwsp02d98qhj1xpj85644h2pp4kfzq6dgbmwmdr6jvy7p4"))))
8588 (build-system gnu-build-system)
8589 (arguments
8590 `(#:tests? #f ; There are no tests.
8591 #:phases
8592 (modify-phases %standard-phases
8593 (delete 'configure)
8594 (delete 'build)
8595 (replace 'install
8596 (lambda* (#:key inputs outputs #:allow-other-keys)
8597 (let ((script (string-append (assoc-ref outputs "out")
8598 "/share/scripts")))
8599 (install-file "run_spp.R" script)))))))
8600 (inputs
8601 `(("r" ,r-minimal)))
8602 (propagated-inputs
8603 (list r-catools
8604 r-snow
8605 r-snowfall
8606 r-bitops
8607 r-rsamtools
8608 r-spp
8609 gawk
8610 samtools
8611 boost
8612 gzip))
8613 (home-page "https://github.com/kundajelab/phantompeakqualtools")
8614 (synopsis "Informative enrichment for ChIP-seq data")
8615 (description "This package computes informative enrichment and quality
8616 measures for ChIP-seq/DNase-seq/FAIRE-seq/MNase-seq data. It can also be
8617 used to obtain robust estimates of the predominant fragment length or
8618 characteristic tag shift values in these assays.")
8619 (license license:bsd-3))))
8620
8621 (define-public r-r4rna
8622 (package
8623 (name "r-r4rna")
8624 (version "0.1.4")
8625 (source
8626 (origin
8627 (method url-fetch)
8628 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8629 version ".tar.gz"))
8630 (sha256
8631 (base32
8632 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8633 (build-system r-build-system)
8634 (propagated-inputs
8635 (list r-optparse r-rcolorbrewer))
8636 (home-page "https://www.e-rna.org/r-chie/index.cgi")
8637 (synopsis "Analysis framework for RNA secondary structure")
8638 (description
8639 "The R4RNA package aims to be a general framework for the analysis of RNA
8640 secondary structure and comparative analysis in R.")
8641 (license license:gpl3+)))
8642
8643 (define-public rcas-web
8644 (package
8645 (name "rcas-web")
8646 (version "0.1.0")
8647 (source
8648 (origin
8649 (method url-fetch)
8650 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8651 "releases/download/v" version
8652 "/rcas-web-" version ".tar.gz"))
8653 (sha256
8654 (base32
8655 "0wq951aj45gqki1bickg876i993lmawkp8x24agg264br5x716db"))))
8656 (build-system gnu-build-system)
8657 (arguments
8658 `(#:phases
8659 (modify-phases %standard-phases
8660 (add-before 'configure 'find-RCAS
8661 ;; The configure script can't find non-1.3.x versions of RCAS because
8662 ;; its R expression ‘1.10.1 >= 1.3.4’ evaluates to false.
8663 (lambda _
8664 (substitute* "configure"
8665 (("1\\.3\\.4") "0.0.0"))
8666 #t))
8667 (add-after 'install 'wrap-executable
8668 (lambda* (#:key inputs outputs #:allow-other-keys)
8669 (let* ((out (assoc-ref outputs "out"))
8670 (json (assoc-ref inputs "guile-json"))
8671 (redis (assoc-ref inputs "guile-redis"))
8672 (path (string-append
8673 json "/share/guile/site/2.2:"
8674 redis "/share/guile/site/2.2")))
8675 (wrap-program (string-append out "/bin/rcas-web")
8676 `("GUILE_LOAD_PATH" ":" = (,path))
8677 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8678 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8679 #t)))))
8680 (inputs
8681 `(("r-minimal" ,r-minimal)
8682 ("r-rcas" ,r-rcas)
8683 ("guile" ,guile-2.2)
8684 ("guile-json" ,guile-json-1)
8685 ("guile-redis" ,guile2.2-redis)))
8686 (native-inputs
8687 (list pkg-config))
8688 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8689 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8690 (description "This package provides a simple web interface for the
8691 @dfn{RNA-centric annotation system} (RCAS).")
8692 (license license:agpl3+)))
8693
8694 (define-public r-chipkernels
8695 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8696 (revision "1"))
8697 (package
8698 (name "r-chipkernels")
8699 (version (git-version "1.1" revision commit))
8700 (source
8701 (origin
8702 (method git-fetch)
8703 (uri (git-reference
8704 (url "https://github.com/ManuSetty/ChIPKernels")
8705 (commit commit)))
8706 (file-name (git-file-name name version))
8707 (sha256
8708 (base32
8709 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8710 (build-system r-build-system)
8711 (propagated-inputs
8712 (list r-iranges
8713 r-xvector
8714 r-biostrings
8715 r-bsgenome
8716 r-gtools
8717 r-genomicranges
8718 r-sfsmisc
8719 r-kernlab
8720 r-s4vectors
8721 r-biocgenerics))
8722 (home-page "https://github.com/ManuSetty/ChIPKernels")
8723 (synopsis "Build string kernels for DNA Sequence analysis")
8724 (description "ChIPKernels is an R package for building different string
8725 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8726 must be built and this dictionary can be used for determining kernels for DNA
8727 Sequences.")
8728 (license license:gpl2+))))
8729
8730 (define-public r-seqgl
8731 (package
8732 (name "r-seqgl")
8733 (version "1.1.4")
8734 (source
8735 (origin
8736 (method git-fetch)
8737 (uri (git-reference
8738 (url "https://github.com/ManuSetty/SeqGL")
8739 (commit version)))
8740 (file-name (git-file-name name version))
8741 (sha256
8742 (base32
8743 "1r6ywvhxl3ffv48lgj7sbd582mcc6dha3ksgc2qjlvjrnkbj3799"))))
8744 (build-system r-build-system)
8745 (propagated-inputs
8746 (list r-biostrings
8747 r-chipkernels
8748 r-genomicranges
8749 r-spams
8750 r-wgcna
8751 r-fastcluster))
8752 (home-page "https://github.com/ManuSetty/SeqGL")
8753 (synopsis "Group lasso for Dnase/ChIP-seq data")
8754 (description "SeqGL is a group lasso based algorithm to extract
8755 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8756 This package presents a method which uses group lasso to discriminate between
8757 bound and non bound genomic regions to accurately identify transcription
8758 factors bound at the specific regions.")
8759 (license license:gpl2+)))
8760
8761 (define-public emboss
8762 (package
8763 (name "emboss")
8764 (version "6.5.7")
8765 (source (origin
8766 (method url-fetch)
8767 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8768 (version-major+minor version) ".0/"
8769 "EMBOSS-" version ".tar.gz"))
8770 (sha256
8771 (base32
8772 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8773 (build-system gnu-build-system)
8774 (arguments
8775 `(#:configure-flags
8776 ,#~(list (string-append "--with-hpdf="
8777 #$(this-package-input "libharu")))
8778 #:phases
8779 (modify-phases %standard-phases
8780 (add-after 'unpack 'fix-checks
8781 (lambda _
8782 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8783 ;; and zlib, but assume that they are all found at the same
8784 ;; prefix.
8785 (substitute* "configure.in"
8786 (("CHECK_PNGDRIVER")
8787 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8788 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8789 AM_CONDITIONAL(AMPNG, true)"))
8790 #t))
8791 (add-after 'fix-checks 'disable-update-check
8792 (lambda _
8793 ;; At build time there is no connection to the Internet, so
8794 ;; looking for updates will not work.
8795 (substitute* "Makefile.am"
8796 (("\\$\\(bindir\\)/embossupdate") ""))
8797 #t))
8798 (add-after 'disable-update-check 'autogen
8799 (lambda _ (invoke "autoreconf" "-vif") #t)))))
8800 (inputs
8801 (list perl
8802 libpng
8803 gd
8804 libx11
8805 libharu
8806 zlib))
8807 (native-inputs
8808 (list autoconf automake libtool pkg-config))
8809 (home-page "http://emboss.sourceforge.net")
8810 (synopsis "Molecular biology analysis suite")
8811 (description "EMBOSS is the \"European Molecular Biology Open Software
8812 Suite\". EMBOSS is an analysis package specially developed for the needs of
8813 the molecular biology (e.g. EMBnet) user community. The software
8814 automatically copes with data in a variety of formats and even allows
8815 transparent retrieval of sequence data from the web. It also provides a
8816 number of libraries for the development of software in the field of molecular
8817 biology. EMBOSS also integrates a range of currently available packages and
8818 tools for sequence analysis into a seamless whole.")
8819 (license license:gpl2+)))
8820
8821 (define-public bits
8822 (let ((revision "1")
8823 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
8824 (package
8825 (name "bits")
8826 ;; The version is 2.13.0 even though no release archives have been
8827 ;; published as yet.
8828 (version (git-version "2.13.0" revision commit))
8829 (source (origin
8830 (method git-fetch)
8831 (uri (git-reference
8832 (url "https://github.com/arq5x/bits")
8833 (commit commit)))
8834 (file-name (git-file-name name version))
8835 (sha256
8836 (base32
8837 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
8838 (build-system gnu-build-system)
8839 (arguments
8840 `(#:tests? #f ;no tests included
8841 #:phases
8842 (modify-phases %standard-phases
8843 (delete 'configure)
8844 (add-after 'unpack 'remove-cuda
8845 (lambda _
8846 (substitute* "Makefile"
8847 ((".*_cuda") "")
8848 (("(bits_test_intersections) \\\\" _ match) match))
8849 #t))
8850 (replace 'install
8851 (lambda* (#:key outputs #:allow-other-keys)
8852 (copy-recursively
8853 "bin" (string-append (assoc-ref outputs "out") "/bin"))
8854 #t)))))
8855 (inputs
8856 (list gsl zlib))
8857 (home-page "https://github.com/arq5x/bits")
8858 (synopsis "Implementation of binary interval search algorithm")
8859 (description "This package provides an implementation of the
8860 BITS (Binary Interval Search) algorithm, an approach to interval set
8861 intersection. It is especially suited for the comparison of diverse genomic
8862 datasets and the exploration of large datasets of genome
8863 intervals (e.g. genes, sequence alignments).")
8864 (license license:gpl2))))
8865
8866 (define-public piranha
8867 ;; There is no release tarball for the latest version. The latest commit is
8868 ;; older than one year at the time of this writing.
8869 (let ((revision "1")
8870 (commit "0466d364b71117d01e4471b74c514436cc281233"))
8871 (package
8872 (name "piranha")
8873 (version (git-version "1.2.1" revision commit))
8874 (source (origin
8875 (method git-fetch)
8876 (uri (git-reference
8877 (url "https://github.com/smithlabcode/piranha")
8878 (commit commit)))
8879 (file-name (git-file-name name version))
8880 (sha256
8881 (base32
8882 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
8883 (build-system gnu-build-system)
8884 (arguments
8885 `(#:test-target "test"
8886 #:phases
8887 (modify-phases %standard-phases
8888 (add-after 'unpack 'copy-smithlab-cpp
8889 (lambda* (#:key inputs #:allow-other-keys)
8890 (for-each (lambda (file)
8891 (install-file file "./src/smithlab_cpp/"))
8892 (find-files (assoc-ref inputs "smithlab-cpp")))))
8893 (add-after 'install 'install-to-store
8894 (lambda* (#:key outputs #:allow-other-keys)
8895 (let* ((out (assoc-ref outputs "out"))
8896 (bin (string-append out "/bin")))
8897 (for-each (lambda (file)
8898 (install-file file bin))
8899 (find-files "bin" ".*"))))))
8900 #:configure-flags
8901 ,#~(list (string-append "--with-bam_tools_headers="
8902 #$(this-package-input "bamtools") "/include/bamtools")
8903 (string-append "--with-bam_tools_library="
8904 #$(this-package-input "bamtools") "/lib/bamtools"))))
8905 (inputs
8906 `(("bamtools" ,bamtools)
8907 ("samtools" ,samtools-0.1)
8908 ("gsl" ,gsl)
8909 ("smithlab-cpp"
8910 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
8911 (origin
8912 (method git-fetch)
8913 (uri (git-reference
8914 (url "https://github.com/smithlabcode/smithlab_cpp")
8915 (commit commit)))
8916 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
8917 (sha256
8918 (base32
8919 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
8920 (native-inputs
8921 `(("python" ,python-2)))
8922 (home-page "https://github.com/smithlabcode/piranha")
8923 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
8924 (description
8925 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
8926 RIP-seq experiments. It takes input in BED or BAM format and identifies
8927 regions of statistically significant read enrichment. Additional covariates
8928 may optionally be provided to further inform the peak-calling process.")
8929 (license license:gpl3+))))
8930
8931 (define-public filevercmp
8932 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7")
8933 (revision "1"))
8934 (package
8935 (name "filevercmp")
8936 (version (git-version "0" revision commit))
8937 (source (origin
8938 (method git-fetch)
8939 (uri (git-reference
8940 (url "https://github.com/ekg/filevercmp")
8941 (commit commit)))
8942 (file-name (git-file-name name commit))
8943 (sha256
8944 (base32
8945 "1j9vxsy0y050v59h0q1d6501fcw1kjvj0d18l1xk2zyg0jzj247c"))))
8946 (build-system gnu-build-system)
8947 (arguments
8948 `(#:tests? #f ; There are no tests to run.
8949 #:phases
8950 (modify-phases %standard-phases
8951 (delete 'configure) ; There is no configure phase.
8952 (replace 'install
8953 (lambda* (#:key outputs #:allow-other-keys)
8954 (let ((out (assoc-ref outputs "out")))
8955 (install-file "filevercmp" (string-append out "/bin"))
8956 (install-file "filevercmp.h" (string-append out "/include"))
8957 #t))))))
8958 (home-page "https://github.com/ekg/filevercmp")
8959 (synopsis "This program compares version strings")
8960 (description "This program compares version strings. It intends to be a
8961 replacement for strverscmp.")
8962 (license license:gpl3+))))
8963
8964 (define-public multiqc
8965 (package
8966 (name "multiqc")
8967 (version "1.10.1")
8968 (source
8969 (origin
8970 (method url-fetch)
8971 (uri (pypi-uri "multiqc" version))
8972 (sha256
8973 (base32
8974 "0y9sgjca3bp0kk3ngry4zf4q2diyzp5bvzsx5l23nsysfbfkigm4"))))
8975 (build-system python-build-system)
8976 (arguments
8977 `(#:phases
8978 (modify-phases %standard-phases
8979 (replace 'check
8980 (lambda* (#:key inputs outputs tests? #:allow-other-keys)
8981 (when tests?
8982 (setenv "HOME" "/tmp")
8983 (let ((here (getcwd)))
8984 (copy-recursively (assoc-ref inputs "tests") "/tmp/tests")
8985 ;; ModuleNotFoundError: No module named 'multiqc.modules.ccs'
8986 (delete-file "/tmp/tests/unit_tests/test_ccs.py")
8987 (with-directory-excursion "/tmp/tests"
8988 (setenv "GUIX_PYTHONPATH"
8989 (string-append here ":" (getenv "GUIX_PYTHONPATH")))
8990 (invoke "python" "-munittest" "discover"))))))
8991 ;; TODO: importing the picard and gatk modules fails for unknown
8992 ;; reasons.
8993 (delete 'sanity-check))))
8994 (propagated-inputs
8995 (list python-click
8996 python-coloredlogs
8997 python-future
8998 python-jinja2
8999 python-lzstring
9000 python-markdown
9001 python-matplotlib
9002 python-networkx
9003 python-numpy
9004 python-pyyaml
9005 python-requests
9006 python-rich
9007 python-simplejson
9008 python-spectra))
9009 (native-inputs
9010 `(("python-pytest" ,python-pytest)
9011 ("tests"
9012 ,(let ((commit "02272d48a382beb27489fcf9e6308a0407dc3c2e"))
9013 (origin
9014 (method git-fetch)
9015 (uri (git-reference
9016 (url "https://github.com/ewels/MultiQC_TestData")
9017 (commit commit)))
9018 (file-name (git-file-name "multiqc-test-data" commit))
9019 (sha256
9020 (base32
9021 "1bha64wanrigczw4yn81din56396n61j5gqdrkslhslmskcafi91")))))))
9022 (home-page "https://multiqc.info")
9023 (synopsis "Aggregate bioinformatics analysis reports")
9024 (description
9025 "MultiQC is a tool to aggregate bioinformatics results across many
9026 samples into a single report. It contains modules for a large number of
9027 common bioinformatics tools.")
9028 (license license:gpl3+)))
9029
9030 (define-public variant-tools
9031 (package
9032 (name "variant-tools")
9033 (version "3.1.2")
9034 (source
9035 (origin
9036 (method git-fetch)
9037 (uri (git-reference
9038 (url "https://github.com/vatlab/varianttools")
9039 ;; There is no tag corresponding to version 3.1.2
9040 (commit "813ae4a90d25b69abc8a40f4f70441fe09015249")))
9041 (file-name (git-file-name name version))
9042 (sha256
9043 (base32
9044 "12ibdmksj7icyqhks4xyvd61bygk4pjmxn618kp6vgk1af01y34g"))))
9045 (build-system python-build-system)
9046 (inputs
9047 (list boost
9048 c-blosc
9049 gsl
9050 hdf5
9051 hdf5-blosc
9052 python-cython
9053 zlib))
9054 (propagated-inputs
9055 (list python-numpy python-pycurl python-pyzmq python-scipy
9056 python-tables))
9057 (home-page "https://vatlab.github.io/vat-docs/")
9058 (synopsis "Analyze genetic variants from Next-Gen sequencing studies")
9059 (description
9060 "Variant tools is a tool for the manipulation, annotation,
9061 selection, simulation, and analysis of variants in the context of next-gen
9062 sequencing analysis. Unlike some other tools used for next-gen sequencing
9063 analysis, variant tools is project based and provides a whole set of tools to
9064 manipulate and analyze genetic variants.")
9065 (license license:gpl3+)))
9066
9067 (define-public r-chromvarmotifs
9068 (let ((commit "38bed559c1f4770b6c91c80bf3f8ea965da26076")
9069 (revision "1"))
9070 (package
9071 (name "r-chromvarmotifs")
9072 (version (git-version "0.2.0" revision commit))
9073 (source
9074 (origin
9075 (method git-fetch)
9076 (uri (git-reference
9077 (url "https://github.com/GreenleafLab/chromVARmotifs")
9078 (commit commit)))
9079 (file-name (git-file-name name version))
9080 (sha256
9081 (base32 "0i9v1m1hrg1lkd2pnkj5nnrpks6vhhhpbdhsfl2lmjak4npxxr5q"))))
9082 (properties `((upstream-name . "chromVARmotifs")))
9083 (build-system r-build-system)
9084 (propagated-inputs
9085 `(("r-tfbstools" ,r-tfbstools)))
9086 (home-page "https://github.com/GreenleafLab/chromVARmotifs")
9087 (synopsis "Stores motif collections for use with motifmatchr or chromVAR")
9088 (description
9089 "This package stores motif collections as lists of @dfn{position
9090 frequency matrix} (PWMatrixList) objects provided by the @code{TFBSTools}
9091 package for use in R with packages like @code{motifmatchr} or
9092 @code{chromVAR}.")
9093 (license license:expat))))
9094
9095 (define-public r-raremetals2
9096 (package
9097 (name "r-raremetals2")
9098 (version "0.1")
9099 (source
9100 (origin
9101 (method url-fetch)
9102 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9103 "b/b7/RareMETALS2_" version ".tar.gz"))
9104 (sha256
9105 (base32
9106 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9107 (properties `((upstream-name . "RareMETALS2")))
9108 (build-system r-build-system)
9109 (propagated-inputs
9110 (list r-seqminer r-mvtnorm r-mass r-compquadform r-getopt))
9111 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9112 (synopsis "Analyze gene-level association tests for binary trait")
9113 (description
9114 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9115 It was designed to meta-analyze gene-level association tests for binary trait.
9116 While rareMETALS offers a near-complete solution for meta-analysis of
9117 gene-level tests for quantitative trait, it does not offer the optimal
9118 solution for binary trait. The package rareMETALS2 offers improved features
9119 for analyzing gene-level association tests in meta-analyses for binary
9120 trait.")
9121 (license license:gpl3)))
9122
9123 (define-public r-rnaseqdtu
9124 (let ((commit "5bee1e769d2e1dc6a3f1cecb78078050eeb5b9ac")
9125 (revision "1"))
9126 (package
9127 (name "r-rnaseqdtu")
9128 (version (git-version "2.0" revision commit))
9129 (source
9130 (origin
9131 (method git-fetch)
9132 (uri (git-reference
9133 (url "https://github.com/mikelove/rnaseqDTU/")
9134 (commit commit)))
9135 (file-name (git-file-name name version))
9136 (sha256
9137 (base32 "0jfi1ydsk8m5nadwnih48v87nnxdc7s3f0pny4axmnj40dd42as0"))))
9138 (properties `((upstream-name . "rnaseqDTU")))
9139 (build-system r-build-system)
9140 (propagated-inputs
9141 (list r-deseq2
9142 r-devtools
9143 r-dexseq
9144 r-drimseq
9145 r-edger
9146 r-rafalib
9147 r-stager))
9148 (native-inputs (list r-knitr))
9149 (home-page "https://github.com/mikelove/rnaseqDTU/")
9150 (synopsis "RNA-seq workflow for differential transcript usage")
9151 (description
9152 "This package provides an RNA-seq workflow for differential transcript
9153 usage (DTU) following Salmon quantification. This workflow performs a DTU
9154 analysis on simulated data. It also shows how to use stageR to perform
9155 two-stage testing of DTU, a statistical framework to screen at the gene level
9156 and then confirm which transcripts within the significant genes show evidence
9157 of DTU.")
9158 (license license:artistic2.0))))
9159
9160 (define-public r-dropbead
9161 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
9162 (revision "2"))
9163 (package
9164 (name "r-dropbead")
9165 (version (string-append "0-" revision "." (string-take commit 7)))
9166 (source
9167 (origin
9168 (method git-fetch)
9169 (uri (git-reference
9170 (url "https://github.com/rajewsky-lab/dropbead")
9171 (commit commit)))
9172 (file-name (git-file-name name version))
9173 (sha256
9174 (base32
9175 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
9176 (build-system r-build-system)
9177 (propagated-inputs
9178 (list r-ggplot2 r-rcolorbrewer r-gridextra r-gplots r-plyr))
9179 (home-page "https://github.com/rajewsky-lab/dropbead")
9180 (synopsis "Basic exploration and analysis of Drop-seq data")
9181 (description "This package offers a quick and straight-forward way to
9182 explore and perform basic analysis of single cell sequencing data coming from
9183 droplet sequencing. It has been particularly tailored for Drop-seq.")
9184 (license license:gpl3))))
9185
9186 (define-public r-cellchat
9187 (let ((commit
9188 "21edd226ca408e4c413408f98562d71ee0b54e5d")
9189 (revision "1"))
9190 (package
9191 (name "r-cellchat")
9192 (version (git-version "1.0.0" revision commit))
9193 (source
9194 (origin
9195 (method git-fetch)
9196 (uri (git-reference
9197 (url "https://github.com/sqjin/CellChat")
9198 (commit commit)))
9199 (file-name (git-file-name name version))
9200 (sha256
9201 (base32
9202 "0cvzl9mi8jjznpql2gv67swnk1dndn3a2h22z5l84h7lwpwjmh53"))
9203 (snippet
9204 '(for-each delete-file '("src/CellChat.so"
9205 "src/CellChat_Rcpp.o"
9206 "src/RcppExports.o")))))
9207 (properties `((upstream-name . "CellChat")))
9208 (build-system r-build-system)
9209 (propagated-inputs
9210 (list r-biocgenerics
9211 r-circlize
9212 r-colorspace
9213 r-complexheatmap
9214 r-cowplot
9215 r-dplyr
9216 r-expm
9217 r-fnn
9218 r-forcats
9219 r-future
9220 r-future-apply
9221 r-gg-gap
9222 r-ggalluvial
9223 r-ggplot2
9224 r-ggrepel
9225 r-igraph
9226 r-irlba
9227 r-magrittr
9228 r-matrix
9229 r-nmf
9230 r-patchwork
9231 r-pbapply
9232 r-rcolorbrewer
9233 r-rcpp
9234 r-rcppeigen
9235 r-reshape2
9236 r-reticulate
9237 r-rspectra
9238 r-rtsne
9239 r-scales
9240 r-shape
9241 r-sna
9242 r-stringr
9243 r-svglite))
9244 (native-inputs (list r-knitr))
9245 (home-page "https://github.com/sqjin/CellChat")
9246 (synopsis "Analysis of cell-cell communication from single-cell transcriptomics data")
9247 (description
9248 "This package infers, visualizes and analyzes the cell-cell
9249 communication networks from scRNA-seq data.")
9250 (license license:gpl3))))
9251
9252 (define-public r-copykat
9253 (let ((commit ;no tag
9254 "256de33dfc1b80a1a0ac9e098c5557f95a4e0d53")
9255 (revision "0"))
9256 (package
9257 (name "r-copykat")
9258 (version (git-version "1.0.8" revision commit))
9259 (source
9260 (origin
9261 (method git-fetch)
9262 (uri (git-reference
9263 (url "https://github.com/navinlabcode/copykat")
9264 (commit commit)))
9265 (file-name (git-file-name name version))
9266 (sha256
9267 (base32
9268 "0ckyqnial3imcqlgd6xfgwk5w977l1i87sx4kdbwdvg40w0vh1j8"))))
9269 (properties `((upstream-name . "copykat")))
9270 (build-system r-build-system)
9271 (propagated-inputs
9272 (list r-cluster
9273 r-dlm
9274 r-gplots
9275 r-mcmcpack
9276 r-mixtools
9277 r-paralleldist
9278 r-rcolorbrewer))
9279 (native-inputs (list r-knitr))
9280 (home-page "https://github.com/navinlabcode/copykat")
9281 (synopsis "Inference of genomic copy number from single cell RNAseq data")
9282 (description
9283 "This package Copynumber KAryotyping of Tumors infers genomic copy
9284 number and subclonal structure of human tumors using integrative Bayesian
9285 approaches to identify genome-wide aneuploidy at 5MB resolution in single
9286 cells data. It separates tumor cells and tumor subclones from normal cells
9287 using high-throughput sc-RNAseq data.")
9288 (license license:gpl2))))
9289
9290 (define-public sambamba
9291 (package
9292 (name "sambamba")
9293 (version "0.8.2")
9294 (source
9295 (origin
9296 (method git-fetch)
9297 (uri (git-reference
9298 (url "https://github.com/biod/sambamba")
9299 (commit (string-append "v" version))))
9300 (file-name (git-file-name name version))
9301 (sha256
9302 (base32
9303 "1zdkd1md5wk4la71p82pbclqqcm55abk23fk087da6186i1bsihl"))))
9304 (build-system gnu-build-system)
9305 (arguments
9306 `(#:tests? #f ; there is no test target
9307 #:parallel-build? #f ; not supported
9308 #:phases
9309 (modify-phases %standard-phases
9310 (delete 'configure)
9311 (add-after 'unpack 'prepare-build-tools
9312 (lambda* (#:key inputs #:allow-other-keys)
9313 (substitute* "Makefile"
9314 (("\\$\\(shell which ldmd2\\)") (which "ldmd2")))
9315 (setenv "CC" "gcc")
9316 (setenv "D_LD" (which "ld.gold"))))
9317 (add-after 'unpack 'unbundle-prerequisites
9318 (lambda _
9319 (substitute* "Makefile"
9320 (("= lz4/lib/liblz4.a") "= -L-llz4")
9321 (("ldc_version_info lz4-static") "ldc_version_info"))))
9322 (replace 'install
9323 (lambda* (#:key outputs #:allow-other-keys)
9324 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
9325 (mkdir-p bin)
9326 (copy-file (string-append "bin/sambamba-" ,version)
9327 (string-append bin "/sambamba"))))))))
9328 (native-inputs
9329 `(("ld-gold-wrapper"
9330 ;; Importing (gnu packages commencement) would introduce a cycle.
9331 ,(module-ref (resolve-interface
9332 '(gnu packages commencement))
9333 'ld-gold-wrapper))
9334 ("binutils-gold" ,binutils-gold)
9335 ("python" ,python)))
9336 (inputs
9337 (list ldc lz4 zlib))
9338 (home-page "https://github.com/biod/sambamba")
9339 (synopsis "Tools for working with SAM/BAM data")
9340 (description "Sambamba is a high performance modern robust and
9341 fast tool (and library), written in the D programming language, for
9342 working with SAM and BAM files. Current parallelised functionality is
9343 an important subset of samtools functionality, including view, index,
9344 sort, markdup, and depth.")
9345 (license license:gpl2+)))
9346
9347 (define-public ritornello
9348 (package
9349 (name "ritornello")
9350 (version "2.0.1")
9351 (source (origin
9352 (method git-fetch)
9353 (uri (git-reference
9354 (url "https://github.com/KlugerLab/Ritornello")
9355 (commit (string-append "v" version))))
9356 (file-name (git-file-name name version))
9357 (sha256
9358 (base32
9359 "1xahvq215qld7x1w8vpa5zbrsj6p9crb9shqa2x89sb0aaxa02jk"))))
9360 (build-system gnu-build-system)
9361 (arguments
9362 `(#:tests? #f ; there are no tests
9363 #:phases
9364 (modify-phases %standard-phases
9365 (add-after 'unpack 'patch-samtools-references
9366 (lambda* (#:key inputs #:allow-other-keys)
9367 (substitute* '("src/SamStream.h"
9368 "src/FLD.cpp")
9369 (("<sam.h>") "<samtools/sam.h>"))
9370 #t))
9371 (delete 'configure)
9372 (replace 'install
9373 (lambda* (#:key inputs outputs #:allow-other-keys)
9374 (let* ((out (assoc-ref outputs "out"))
9375 (bin (string-append out "/bin/")))
9376 (mkdir-p bin)
9377 (install-file "bin/Ritornello" bin)
9378 #t))))))
9379 (inputs
9380 (list samtools-0.1 fftw boost zlib))
9381 (home-page "https://github.com/KlugerLab/Ritornello")
9382 (synopsis "Control-free peak caller for ChIP-seq data")
9383 (description "Ritornello is a ChIP-seq peak calling algorithm based on
9384 signal processing that can accurately call binding events without the need to
9385 do a pair total DNA input or IgG control sample. It has been tested for use
9386 with narrow binding events such as transcription factor ChIP-seq.")
9387 (license license:gpl3+)))
9388
9389 (define-public trim-galore
9390 (package
9391 (name "trim-galore")
9392 (version "0.6.6")
9393 (source
9394 (origin
9395 (method git-fetch)
9396 (uri (git-reference
9397 (url "https://github.com/FelixKrueger/TrimGalore")
9398 (commit version)))
9399 (file-name (git-file-name name version))
9400 (sha256
9401 (base32
9402 "0yrwg6325j4sb9vnplvl3jplzab0qdhp92wl480qjinpfq88j4rs"))))
9403 (build-system gnu-build-system)
9404 (arguments
9405 `(#:tests? #f ; no tests
9406 #:phases
9407 (modify-phases %standard-phases
9408 (replace 'configure
9409 (lambda _
9410 ;; Trim Galore tries to figure out what version of Python
9411 ;; cutadapt is using by looking at the shebang. Of course that
9412 ;; doesn't work, because cutadapt is wrapped in a shell script.
9413 (substitute* "trim_galore"
9414 (("my \\$python_return.*")
9415 "my $python_return = \"Python 3.999\";\n"))
9416 #t))
9417 (delete 'build)
9418 (add-after 'unpack 'hardcode-tool-references
9419 (lambda* (#:key inputs #:allow-other-keys)
9420 (substitute* "trim_galore"
9421 (("\\$path_to_cutadapt = 'cutadapt'")
9422 (string-append "$path_to_cutadapt = '"
9423 (assoc-ref inputs "cutadapt")
9424 "/bin/cutadapt'"))
9425 (("\\$compression_path = \"gzip\"")
9426 (string-append "$compression_path = \""
9427 (assoc-ref inputs "gzip")
9428 "/bin/gzip\""))
9429 (("\"gunzip")
9430 (string-append "\""
9431 (assoc-ref inputs "gzip")
9432 "/bin/gunzip"))
9433 (("\"pigz")
9434 (string-append "\""
9435 (assoc-ref inputs "pigz")
9436 "/bin/pigz")))
9437 #t))
9438 (replace 'install
9439 (lambda* (#:key outputs #:allow-other-keys)
9440 (let ((bin (string-append (assoc-ref outputs "out")
9441 "/bin")))
9442 (mkdir-p bin)
9443 (install-file "trim_galore" bin)
9444 #t))))))
9445 (inputs
9446 (list gzip perl pigz cutadapt))
9447 (native-inputs
9448 (list unzip))
9449 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
9450 (synopsis "Wrapper around Cutadapt and FastQC")
9451 (description "Trim Galore! is a wrapper script to automate quality and
9452 adapter trimming as well as quality control, with some added functionality to
9453 remove biased methylation positions for RRBS sequence files.")
9454 (license license:gpl3+)))
9455
9456 (define-public phylip
9457 (package
9458 (name "phylip")
9459 (version "3.697")
9460 (source
9461 (origin
9462 (method url-fetch)
9463 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
9464 "download/phylip-" version ".tar.gz"))
9465 (sha256
9466 (base32
9467 "1h8h0nafnlbqryswxgplx80k2044yhfz97jh13vsgzlaifqdh9ls"))))
9468 (build-system gnu-build-system)
9469 (arguments
9470 `(#:tests? #f ; no check target
9471 #:make-flags (list "-f" "Makefile.unx" "CFLAGS=-fcommon" "install")
9472 #:parallel-build? #f ; not supported
9473 #:phases
9474 (modify-phases %standard-phases
9475 (add-after 'unpack 'enter-dir
9476 (lambda _ (chdir "src")))
9477 (delete 'configure)
9478 (replace 'install
9479 (lambda* (#:key inputs outputs #:allow-other-keys)
9480 (let ((target (string-append (assoc-ref outputs "out")
9481 "/bin")))
9482 (mkdir-p target)
9483 (for-each (lambda (file)
9484 (install-file file target))
9485 (find-files "../exe" ".*"))))))))
9486 (home-page "http://evolution.genetics.washington.edu/phylip/")
9487 (synopsis "Tools for inferring phylogenies")
9488 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
9489 programs for inferring phylogenies (evolutionary trees).")
9490 (license license:bsd-2)))
9491
9492 (define-public phyml
9493 (package
9494 (name "phyml")
9495 (version "3.3.20220408")
9496 (source (origin
9497 (method git-fetch)
9498 (uri (git-reference
9499 (url "https://github.com/stephaneguindon/phyml")
9500 (commit (string-append "v" version))))
9501 (file-name (git-file-name name version))
9502 (sha256
9503 (base32
9504 "03hdqmnsgnzkcrp9r9ajdfkj33jgq4b86kra8ssjlrph65y344sa"))
9505 (snippet
9506 '(delete-file "doc/phyml-manual.pdf"))))
9507 (build-system gnu-build-system)
9508 (supported-systems '("x86_64-linux"))
9509 (arguments
9510 (let ((default-flags (list "--disable-native")))
9511 `(#:phases
9512 (let ((build (lambda (what)
9513 (lambda args
9514 (apply (assoc-ref %standard-phases 'configure)
9515 (append args
9516 (list #:configure-flags
9517 (cons (format #false "--enable-~a" what)
9518 '() #;,default-flags))))
9519 (apply (assoc-ref %standard-phases 'build) args)
9520 (apply (assoc-ref %standard-phases 'install) args)))))
9521 (modify-phases %standard-phases
9522 ;; We cannot use --disable-native; see
9523 ;; https://github.com/stephaneguindon/phyml/issues/173 Instead we
9524 ;; patch the code to at least get rid of -march=native.
9525 (add-after 'unpack 'remove-march-native
9526 (lambda _
9527 (substitute* "configure.ac"
9528 (("DEFAULT_VECTOR_FLAG=\"-march=native\"")
9529 "DEFAULT_VECTOR_FLAG=\"-march=athlon64-sse3\"\n"))))
9530 (add-after 'build 'build-manual
9531 (lambda _
9532 (with-directory-excursion "doc"
9533 (invoke "make" "phyml-manual.pdf"))))
9534 (add-after 'build-manual 'install-manual
9535 (lambda* (#:key outputs #:allow-other-keys)
9536 (with-directory-excursion "doc"
9537 (install-file "phyml-manual.pdf"
9538 (string-append (assoc-ref outputs "out")
9539 "/share/doc/phyml")))))
9540 (add-after 'install 'build-phyml-mpi
9541 (build "phyml-mpi"))
9542 (add-after 'build-phyml-mpi 'build-rf
9543 (build "rf"))
9544 (add-after 'build-rf 'build-phyrex
9545 (build "phyrex")))))))
9546 (native-inputs
9547 (list automake
9548 autoconf
9549 openmpi
9550 (texlive-updmap.cfg (list texlive-amsfonts
9551 texlive-caption
9552 texlive-cite
9553 texlive-fonts-ec
9554 texlive-grfext
9555 texlive-hyperref
9556 texlive-latex-fancyvrb
9557 texlive-latex-graphics
9558 texlive-latex-psfrag
9559 texlive-xcolor))))
9560 (home-page "https://github.com/stephaneguindon/phyml")
9561 (synopsis "Programs for working on SAM/BAM files")
9562 (description
9563 "@code{PhyML} is a software package that uses modern statistical
9564 approaches to analyse alignments of nucleotide or amino acid sequences in a
9565 phylogenetic framework. The main tool in this package builds phylogenies
9566 under the maximum likelihood criterion. It implements a large number of
9567 substitution models coupled with efficient options to search the space of
9568 phylogenetic tree topologies. code{PhyREX} fits the
9569 spatial-Lambda-Fleming-Viot model to geo-referenced genetic data. This model
9570 is similar to the structured coalescent but assumes that individuals are
9571 distributed along a spatial continuum rather than discrete demes.
9572 @code{PhyREX} can be used to estimate population densities and rates of
9573 dispersal. Its output can be processed by treeannotator (from the
9574 @code{BEAST} package) as well as @code{SPREAD}.")
9575 (license license:gpl3)))
9576
9577 (define-public imp
9578 (package
9579 (name "imp")
9580 (version "2.15.0")
9581 (source
9582 (origin
9583 (method url-fetch)
9584 (uri (string-append "https://integrativemodeling.org/"
9585 version "/download/imp-" version ".tar.gz"))
9586 (sha256
9587 (base32
9588 "05hsrnkpkajppa3f45x4qsarnkj616hlby749zxg4is3bv4i6b5y"))))
9589 (build-system cmake-build-system)
9590 (arguments
9591 `(#:tests? #false ; The test suite is notoriously fickle
9592 #:configure-flags
9593 (let ((disabled-tests
9594 '("expensive" ;exclude expensive tests
9595 "IMP.modeller" ;fail to import its own modules
9596 "IMP.parallel-test_sge.py" ;fail in build container
9597 ;; The following test fails non-reproducibly on
9598 ;; an inexact numbers assertion.
9599 "IMP.em-medium_test_local_fitting.py"
9600 ;; The following test fails for unknown reasons
9601 "IMP.foxs-add-missing-residues.py")))
9602 (list
9603 (string-append
9604 "-DCMAKE_CTEST_ARGUMENTS="
9605 (string-join
9606 (list "-L" "-tests?-" ;select only tests
9607 "-E" (format #f "'(~a)'" (string-join disabled-tests "|")))
9608 ";"))))))
9609 (native-inputs
9610 `(("python" ,python-wrapper)
9611 ("swig" ,swig)))
9612 (inputs
9613 (list boost
9614 cgal
9615 gsl
9616 hdf5
9617 fftw
9618 eigen
9619 ;; Enabling MPI causes the build to use all the available memory and
9620 ;; fail (tested on a machine with 32 GiB of RAM).
9621 ;;("mpi" ,openmpi)
9622 opencv))
9623 (propagated-inputs
9624 (list python-numpy python-scipy python-pandas python-scikit-learn
9625 python-networkx))
9626 (home-page "https://integrativemodeling.org")
9627 (synopsis "Integrative modeling platform")
9628 (description "IMP's broad goal is to contribute to a comprehensive
9629 structural characterization of biomolecules ranging in size and complexity
9630 from small peptides to large macromolecular assemblies, by integrating data
9631 from diverse biochemical and biophysical experiments. IMP provides a C++ and
9632 Python toolbox for solving complex modeling problems, and a number of
9633 applications for tackling some common problems in a user-friendly way.")
9634 ;; IMP is largely available under the GNU Lesser GPL; see the file
9635 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
9636 ;; available under the GNU GPL (see the file COPYING.GPL).
9637 (license (list license:lgpl2.1+
9638 license:gpl3+))))
9639
9640 ;; We use this seemingly arbitrary commit because of
9641 ;; https://github.com/3DGenomes/TADbit/issues/371
9642 (define-public tadbit
9643 (let ((commit "5c4c1ddaadfbaf7e6edc58173e46d801093bdc9b")
9644 (revision "1"))
9645 (package
9646 (name "tadbit")
9647 (version (git-version "1.0.1" revision commit))
9648 (source (origin
9649 (method git-fetch)
9650 (uri (git-reference
9651 (url "https://github.com/3DGenomes/TADbit")
9652 (commit commit)))
9653 (file-name (git-file-name name version))
9654 (sha256
9655 (base32
9656 "17nwlvjgqpa7x6jgh56m3di61ynaz34kl1jamyv7r2a5rhfcbkla"))))
9657 (build-system python-build-system)
9658 (arguments
9659 `(#:phases
9660 (modify-phases %standard-phases
9661 (add-after 'unpack 'fix-problems-with-setup.py
9662 (lambda* (#:key outputs #:allow-other-keys)
9663 (substitute* "src/test/Makefile"
9664 (("^CFLAGS=") "CFLAGS= -fcommon"))
9665
9666 ;; Don't attempt to install the bash completions to
9667 ;; the home directory.
9668 (rename-file "extras/.bash_completion"
9669 "extras/tadbit")
9670 (substitute* "setup.py"
9671 (("\\(path.expanduser\\('~'\\)")
9672 (string-append "(\""
9673 (assoc-ref outputs "out")
9674 "/etc/bash_completion.d\""))
9675 (("extras/\\.bash_completion")
9676 "extras/tadbit"))))
9677 (replace 'check
9678 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
9679 (when tests?
9680 (add-installed-pythonpath inputs outputs)
9681 (invoke "python3" "test/test_all.py")))))))
9682 (native-inputs
9683 (list `(,glib "bin") ;for gtester
9684 pkg-config))
9685 (inputs
9686 ;; TODO: add Chimera for visualization
9687 (list imp
9688 mcl
9689 python-future
9690 python-h5py
9691 python-scipy
9692 python-numpy
9693 python-matplotlib
9694 python-pysam))
9695 (home-page "https://3dgenomes.github.io/TADbit/")
9696 (synopsis "Analyze, model, and explore 3C-based data")
9697 (description
9698 "TADbit is a complete Python library to deal with all steps to analyze,
9699 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
9700 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
9701 correct interaction matrices, identify and compare the so-called
9702 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
9703 interaction matrices, and finally, extract structural properties from the
9704 models. TADbit is complemented by TADkit for visualizing 3D models.")
9705 (license license:gpl3+))))
9706
9707 (define-public kentutils
9708 (package
9709 (name "kentutils")
9710 ;; 302.1.0 is out, but the only difference is the inclusion of
9711 ;; pre-built binaries.
9712 (version "302.0.0")
9713 (source
9714 (origin
9715 (method git-fetch)
9716 (uri (git-reference
9717 (url "https://github.com/ENCODE-DCC/kentUtils")
9718 (commit (string-append "v" version))))
9719 (file-name (git-file-name name version))
9720 (sha256
9721 (base32
9722 "0n1wbyjpzii2b9qhyp9r1q76j623cggpg3y8fmw78ld3z4y7ivha"))
9723 (modules '((guix build utils)
9724 (srfi srfi-26)
9725 (ice-9 ftw)))
9726 (snippet
9727 '(begin
9728 ;; Only the contents of the specified directories are free
9729 ;; for all uses, so we remove the rest. "hg/autoSql" and
9730 ;; "hg/autoXml" are nominally free, but they depend on a
9731 ;; library that is built from the sources in "hg/lib",
9732 ;; which is nonfree.
9733 (let ((free (list "." ".."
9734 "utils" "lib" "inc" "tagStorm"
9735 "parasol" "htslib"))
9736 (directory? (lambda (file)
9737 (eq? 'directory (stat:type (stat file))))))
9738 (for-each (lambda (file)
9739 (and (directory? file)
9740 (delete-file-recursively file)))
9741 (map (cut string-append "src/" <>)
9742 (scandir "src"
9743 (lambda (file)
9744 (not (member file free)))))))
9745 ;; Only make the utils target, not the userApps target,
9746 ;; because that requires libraries we won't build.
9747 (substitute* "Makefile"
9748 ((" userApps") " utils"))
9749 ;; Only build libraries that are free.
9750 (substitute* "src/makefile"
9751 (("DIRS =.*") "DIRS =\n")
9752 (("cd jkOwnLib.*") "")
9753 ((" hgLib") "")
9754 (("cd hg.*") ""))
9755 (substitute* "src/utils/makefile"
9756 ;; These tools depend on "jkhgap.a", which is part of the
9757 ;; nonfree "src/hg/lib" directory.
9758 (("raSqlQuery") "")
9759 (("pslLiftSubrangeBlat") "")
9760
9761 ;; Do not build UCSC tools, which may require nonfree
9762 ;; components.
9763 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
9764 #t))))
9765 (build-system gnu-build-system)
9766 (arguments
9767 `( ;; There is no global test target and the test target for
9768 ;; individual tools depends on input files that are not
9769 ;; included.
9770 #:tests? #f
9771 #:phases
9772 (modify-phases %standard-phases
9773 (add-after 'unpack 'fix-permissions
9774 (lambda _ (make-file-writable "src/inc/localEnvironment.mk") #t))
9775 (add-after 'unpack 'fix-paths
9776 (lambda _
9777 (substitute* "Makefile"
9778 (("/bin/echo") (which "echo")))
9779 #t))
9780 (add-after 'unpack 'prepare-samtabix
9781 (lambda* (#:key inputs #:allow-other-keys)
9782 (copy-recursively (assoc-ref inputs "samtabix")
9783 "samtabix")
9784 #t))
9785 (delete 'configure)
9786 (replace 'install
9787 (lambda* (#:key outputs #:allow-other-keys)
9788 (let ((bin (string-append (assoc-ref outputs "out")
9789 "/bin")))
9790 (copy-recursively "bin" bin))
9791 #t)))))
9792 (native-inputs
9793 `(("samtabix"
9794 ,(let ((commit "10fd107909c1ac4d679299908be4262a012965ba"))
9795 (origin
9796 (method git-fetch)
9797 (uri (git-reference
9798 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
9799 (commit commit)))
9800 (file-name (git-file-name "samtabix" (string-take commit 7)))
9801 (sha256
9802 (base32
9803 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma")))))))
9804 (inputs
9805 `(("zlib" ,zlib)
9806 ("tcsh" ,tcsh)
9807 ("perl" ,perl)
9808 ("libpng" ,libpng)
9809 ("mariadb-dev" ,mariadb "dev")
9810 ("openssl" ,openssl)))
9811 (home-page "https://genome.cse.ucsc.edu/index.html")
9812 (synopsis "Assorted bioinformatics utilities")
9813 (description "This package provides the kentUtils, a selection of
9814 bioinformatics utilities used in combination with the UCSC genome
9815 browser.")
9816 ;; Only a subset of the sources are released under a non-copyleft
9817 ;; free software license. All other sources are removed in a
9818 ;; snippet. See this bug report for an explanation of how the
9819 ;; license statements apply:
9820 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
9821 (license (license:non-copyleft
9822 "http://genome.ucsc.edu/license/"
9823 "The contents of this package are free for all uses."))))
9824
9825 (define-public f-seq
9826 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
9827 (revision "1"))
9828 (package
9829 (name "f-seq")
9830 (version (git-version "1.1" revision commit))
9831 (source (origin
9832 (method git-fetch)
9833 (uri (git-reference
9834 (url "https://github.com/aboyle/F-seq")
9835 (commit commit)))
9836 (file-name (git-file-name name version))
9837 (sha256
9838 (base32
9839 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
9840 (modules '((guix build utils)))
9841 ;; Remove bundled Java library archives.
9842 (snippet
9843 '(for-each delete-file (find-files "lib" ".*")))))
9844 (build-system ant-build-system)
9845 (arguments
9846 `(#:tests? #f ; no tests included
9847 #:phases
9848 (modify-phases %standard-phases
9849 (replace 'install
9850 (lambda* (#:key inputs outputs #:allow-other-keys)
9851 (let* ((target (assoc-ref outputs "out"))
9852 (bin (string-append target "/bin"))
9853 (doc (string-append target "/share/doc/f-seq"))
9854 (lib (string-append target "/lib")))
9855 (mkdir-p target)
9856 (mkdir-p doc)
9857 (substitute* "bin/linux/fseq"
9858 (("java") (which "java"))
9859 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
9860 (search-input-file inputs
9861 (string-append "/lib/m2/commons-cli/commons-cli/"
9862 ,(package-version java-commons-cli)
9863 "/commons-cli-"
9864 ,(package-version java-commons-cli)
9865 ".jar")))
9866 (("REALDIR=.*")
9867 (string-append "REALDIR=" bin "\n")))
9868 (install-file "README.txt" doc)
9869 (install-file "bin/linux/fseq" bin)
9870 (install-file "build~/fseq.jar" lib)
9871 (copy-recursively "lib" lib)))))))
9872 (inputs
9873 (list perl java-commons-cli))
9874 (home-page "https://fureylab.web.unc.edu/software/fseq/")
9875 (synopsis "Feature density estimator for high-throughput sequence tags")
9876 (description
9877 "F-Seq is a software package that generates a continuous tag sequence
9878 density estimation allowing identification of biologically meaningful sites
9879 such as transcription factor binding sites (ChIP-seq) or regions of open
9880 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
9881 Browser.")
9882 (license license:gpl3+))))
9883
9884 (define-public bismark
9885 (package
9886 (name "bismark")
9887 (version "0.20.1")
9888 (source
9889 (origin
9890 (method git-fetch)
9891 (uri (git-reference
9892 (url "https://github.com/FelixKrueger/Bismark")
9893 (commit version)))
9894 (file-name (git-file-name name version))
9895 (sha256
9896 (base32
9897 "0xchm3rgilj6vfjnyzfzzymfd7djr64sbrmrvs3njbwi66jqbzw9"))))
9898 (build-system perl-build-system)
9899 (arguments
9900 `(#:tests? #f ; there are no tests
9901 #:modules ((guix build utils)
9902 (ice-9 popen)
9903 (srfi srfi-26)
9904 (guix build perl-build-system))
9905 #:phases
9906 (modify-phases %standard-phases
9907 ;; The bundled plotly.js is minified.
9908 (add-after 'unpack 'replace-plotly.js
9909 (lambda* (#:key inputs #:allow-other-keys)
9910 (let* ((file (assoc-ref inputs "plotly.js"))
9911 (installed "plotly/plotly.js"))
9912 (let ((minified (open-pipe* OPEN_READ "uglifyjs" file)))
9913 (call-with-output-file installed
9914 (cut dump-port minified <>))))
9915 #t))
9916 (delete 'configure)
9917 (delete 'build)
9918 (replace 'install
9919 (lambda* (#:key inputs outputs #:allow-other-keys)
9920 (let* ((out (assoc-ref outputs "out"))
9921 (bin (string-append out "/bin"))
9922 (share (string-append out "/share/bismark"))
9923 (docdir (string-append out "/share/doc/bismark"))
9924 (docs '("Docs/Bismark_User_Guide.html"))
9925 (scripts '("bismark"
9926 "bismark_genome_preparation"
9927 "bismark_methylation_extractor"
9928 "bismark2bedGraph"
9929 "bismark2report"
9930 "coverage2cytosine"
9931 "deduplicate_bismark"
9932 "filter_non_conversion"
9933 "bam2nuc"
9934 "bismark2summary"
9935 "NOMe_filtering")))
9936 (substitute* "bismark2report"
9937 (("\\$RealBin/plotly")
9938 (string-append share "/plotly")))
9939 (mkdir-p share)
9940 (mkdir-p docdir)
9941 (mkdir-p bin)
9942 (for-each (lambda (file) (install-file file bin))
9943 scripts)
9944 (for-each (lambda (file) (install-file file docdir))
9945 docs)
9946 (copy-recursively "Docs/Images" (string-append docdir "/Images"))
9947 (copy-recursively "plotly"
9948 (string-append share "/plotly"))
9949
9950 ;; Fix references to gunzip
9951 (substitute* (map (lambda (file)
9952 (string-append bin "/" file))
9953 scripts)
9954 (("\"gunzip -c")
9955 (string-append "\"" (assoc-ref inputs "gzip")
9956 "/bin/gunzip -c")))
9957 #t))))))
9958 (inputs
9959 (list gzip perl-carp perl-getopt-long))
9960 (native-inputs
9961 `(("plotly.js"
9962 ,(origin
9963 (method url-fetch)
9964 (uri (string-append "https://raw.githubusercontent.com/plotly/plotly.js/"
9965 "v1.39.4/dist/plotly.js"))
9966 (sha256
9967 (base32 "138mwsr4nf5qif4mrxx286mpnagxd1xwl6k8aidrjgknaqg88zyr"))))
9968 ("uglifyjs" ,node-uglify-js)))
9969 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/bismark/")
9970 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
9971 (description "Bismark is a program to map bisulfite treated sequencing
9972 reads to a genome of interest and perform methylation calls in a single step.
9973 The output can be easily imported into a genome viewer, such as SeqMonk, and
9974 enables a researcher to analyse the methylation levels of their samples
9975 straight away. Its main features are:
9976
9977 @itemize
9978 @item Bisulfite mapping and methylation calling in one single step
9979 @item Supports single-end and paired-end read alignments
9980 @item Supports ungapped and gapped alignments
9981 @item Alignment seed length, number of mismatches etc are adjustable
9982 @item Output discriminates between cytosine methylation in CpG, CHG
9983 and CHH context
9984 @end itemize\n")
9985 (license license:gpl3+)))
9986
9987 (define-public paml
9988 (package
9989 (name "paml")
9990 (version "4.9e")
9991 (source (origin
9992 (method url-fetch)
9993 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
9994 "paml" version ".tgz"))
9995 (sha256
9996 (base32
9997 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
9998 (modules '((guix build utils)))
9999 ;; Remove Windows binaries
10000 (snippet
10001 '(begin
10002 (for-each delete-file (find-files "." "\\.exe$"))
10003 ;; Some files in the original tarball have restrictive
10004 ;; permissions, which makes repackaging fail
10005 (for-each (lambda (file) (chmod file #o644)) (find-files "."))
10006 #t))))
10007 (build-system gnu-build-system)
10008 (arguments
10009 `(#:tests? #f ; there are no tests
10010 #:make-flags '("CC=gcc" "CFLAGS=-fcommon -O3")
10011 #:phases
10012 (modify-phases %standard-phases
10013 (replace 'configure
10014 (lambda _
10015 (substitute* "src/BFdriver.c"
10016 (("/bin/bash") (which "bash")))
10017 (chdir "src")))
10018 (replace 'install
10019 (lambda* (#:key outputs #:allow-other-keys)
10020 (let ((tools '("baseml" "basemlg" "codeml"
10021 "pamp" "evolver" "yn00" "chi2"))
10022 (bin (string-append (assoc-ref outputs "out") "/bin"))
10023 (docdir (string-append (assoc-ref outputs "out")
10024 "/share/doc/paml")))
10025 (mkdir-p bin)
10026 (for-each (lambda (file) (install-file file bin)) tools)
10027 (copy-recursively "../doc" docdir)))))))
10028 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
10029 (synopsis "Phylogentic analysis by maximum likelihood")
10030 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
10031 contains a few programs for model fitting and phylogenetic tree reconstruction
10032 using nucleotide or amino-acid sequence data.")
10033 ;; GPLv3 only
10034 (license license:gpl3)))
10035
10036 (define-public segemehl
10037 (package
10038 (name "segemehl")
10039 (version "0.3.4")
10040 (source (origin
10041 (method url-fetch)
10042 (uri (string-append "https://www.bioinf.uni-leipzig.de/Software"
10043 "/segemehl/downloads/segemehl-"
10044 version ".tar.gz"))
10045 (sha256
10046 (base32
10047 "0lbzbb7i8zadsn9b99plairhq6s2h1z8qdn6n7djclfis01nycz4"))))
10048 (build-system gnu-build-system)
10049 (arguments
10050 `(#:make-flags
10051 (list (string-append "CC=" ,(cc-for-target))
10052 "all")
10053 #:tests? #false ; there are none
10054 #:phases
10055 (modify-phases %standard-phases
10056 (delete 'configure)
10057 ;; There is no installation target
10058 (replace 'install
10059 (lambda* (#:key inputs outputs #:allow-other-keys)
10060 (let* ((out (assoc-ref outputs "out"))
10061 (bin (string-append out "/bin"))
10062 (exes (list "segemehl.x" "haarz.x")))
10063 (mkdir-p bin)
10064 (for-each (lambda (exe)
10065 (install-file exe bin))
10066 exes)))))))
10067 (inputs
10068 (list htslib ncurses zlib))
10069 (native-inputs
10070 (list pkg-config))
10071 (home-page "https://www.bioinf.uni-leipzig.de/Software/segemehl")
10072 (synopsis "Map short sequencer reads to reference genomes")
10073 (description "Segemehl is software to map short sequencer reads to
10074 reference genomes. Segemehl implements a matching strategy based on enhanced
10075 suffix arrays (ESA). It accepts fasta and fastq queries (gzip'ed and
10076 bgzip'ed). In addition to the alignment of reads from standard DNA- and
10077 RNA-seq protocols, it also allows the mapping of bisulfite converted
10078 reads (Lister and Cokus) and implements a split read mapping strategy. The
10079 output of segemehl is a SAM or BAM formatted alignment file.")
10080 (license license:gpl3+)))
10081
10082 (define-public kallisto
10083 (package
10084 (name "kallisto")
10085 (version "0.46.2")
10086 (source (origin
10087 (method git-fetch)
10088 (uri (git-reference
10089 (url "https://github.com/pachterlab/kallisto")
10090 (commit (string-append "v" version))))
10091 (file-name (git-file-name name version))
10092 (sha256
10093 (base32
10094 "0ij5n7v3m90jdfi7sn8nvglfyf58abp1f5xq42r4k73l0lfds6xi"))
10095 (modules '((guix build utils)))
10096 (snippet
10097 '(delete-file-recursively "ext/htslib/"))))
10098 (build-system cmake-build-system)
10099 (arguments
10100 `(#:tests? #f ; no "check" target
10101 #:phases
10102 (modify-phases %standard-phases
10103 (add-after 'unpack 'do-not-use-bundled-htslib
10104 (lambda _
10105 (substitute* "CMakeLists.txt"
10106 (("^ExternalProject_Add" m)
10107 (string-append "if (NEVER)\n" m))
10108 (("^\\)")
10109 (string-append ")\nendif(NEVER)"))
10110 (("include_directories\\(\\$\\{htslib_PREFIX.*" m)
10111 (string-append "# " m)))
10112 (substitute* "src/CMakeLists.txt"
10113 (("target_link_libraries\\(kallisto kallisto_core pthread \
10114 \\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/../ext/htslib/libhts.a\\)")
10115 "target_link_libraries(kallisto kallisto_core pthread hts)")
10116 (("include_directories\\(\\.\\./ext/htslib\\)") ""))
10117 #t)))))
10118 (inputs
10119 (list hdf5 htslib-1.9 zlib))
10120 (home-page "https://pachterlab.github.io/kallisto/")
10121 (synopsis "Near-optimal RNA-Seq quantification")
10122 (description
10123 "Kallisto is a program for quantifying abundances of transcripts from
10124 RNA-Seq data, or more generally of target sequences using high-throughput
10125 sequencing reads. It is based on the novel idea of pseudoalignment for
10126 rapidly determining the compatibility of reads with targets, without the need
10127 for alignment. Pseudoalignment of reads preserves the key information needed
10128 for quantification, and kallisto is therefore not only fast, but also as
10129 accurate as existing quantification tools.")
10130 (license license:bsd-2)))
10131
10132 (define-public libgff
10133 (package
10134 (name "libgff")
10135 (version "2.0.0")
10136 (source (origin
10137 (method git-fetch)
10138 (uri (git-reference
10139 (url "https://github.com/COMBINE-lab/libgff")
10140 (commit (string-append "v" version))))
10141 (file-name (git-file-name name version))
10142 (sha256
10143 (base32
10144 "0ds9r22y8bl1rj7bhl0003kgmm6aam7g8l41mnjfrzw15d9zf9k4"))))
10145 (build-system cmake-build-system)
10146 (arguments `(#:tests? #f)) ; no tests included
10147 (home-page "https://github.com/COMBINE-lab/libgff")
10148 (synopsis "Parser library for reading/writing GFF files")
10149 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
10150 code that is used in the Cufflinks codebase. The goal of this library is to
10151 provide this functionality without the necessity of drawing in a heavy-weight
10152 dependency like SeqAn.")
10153 (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
10154
10155 (define-public sailfish
10156 (package
10157 (name "sailfish")
10158 (version "0.10.1")
10159 (source (origin
10160 (method git-fetch)
10161 (uri (git-reference
10162 (url "https://github.com/kingsfordgroup/sailfish")
10163 (commit (string-append "v" version))))
10164 (file-name (git-file-name name version))
10165 (sha256
10166 (base32
10167 "1amcc5hqvsl42hg4x19bi9vy47cl874s0lw1fmi0hwsdk9i8c03v"))
10168 (modules '((guix build utils)))
10169 ;; Delete bundled headers for eigen3.
10170 (snippet
10171 '(delete-file-recursively "include/eigen3/"))))
10172 (build-system cmake-build-system)
10173 (arguments
10174 `(#:configure-flags
10175 ,#~(list (string-append "-DBOOST_INCLUDEDIR="
10176 #$(this-package-input "boost")
10177 "/include/")
10178 (string-append "-DBOOST_LIBRARYDIR="
10179 #$(this-package-input "boost")
10180 "/lib/")
10181 (string-append "-DBoost_LIBRARIES="
10182 "-lboost_iostreams "
10183 "-lboost_filesystem "
10184 "-lboost_system "
10185 "-lboost_thread "
10186 "-lboost_timer "
10187 "-lboost_chrono "
10188 "-lboost_program_options")
10189 "-DBoost_FOUND=TRUE"
10190 ;; Don't download RapMap---we already have it!
10191 "-DFETCHED_RAPMAP=1")
10192 ;; Tests must be run after installation and the location of the test
10193 ;; data file must be overridden. But the tests fail. It looks like
10194 ;; they are not really meant to be run.
10195 #:tests? #f
10196 #:phases
10197 (modify-phases %standard-phases
10198 ;; Boost cannot be found, even though it's right there.
10199 (add-after 'unpack 'do-not-look-for-boost
10200 (lambda* (#:key inputs #:allow-other-keys)
10201 (substitute* "CMakeLists.txt"
10202 (("find_package\\(Boost 1\\.53\\.0") "#"))))
10203 (add-after 'unpack 'do-not-assign-to-macro
10204 (lambda _
10205 (substitute* "include/spdlog/details/format.cc"
10206 (("const unsigned CHAR_WIDTH = 1;") ""))))
10207 (add-after 'unpack 'prepare-rapmap
10208 (lambda* (#:key inputs #:allow-other-keys)
10209 (let ((src "external/install/src/rapmap/")
10210 (include "external/install/include/rapmap/")
10211 (rapmap (assoc-ref inputs "rapmap")))
10212 (mkdir-p src)
10213 (mkdir-p include)
10214 (for-each (lambda (file)
10215 (install-file file src))
10216 (find-files (string-append rapmap "/src") "\\.(c|cpp)"))
10217 (copy-recursively (string-append rapmap "/include") include))))
10218 (add-after 'unpack 'use-system-libraries
10219 (lambda* (#:key inputs #:allow-other-keys)
10220 (substitute* '("src/SailfishIndexer.cpp"
10221 "src/SailfishUtils.cpp"
10222 "src/SailfishQuantify.cpp"
10223 "src/FASTAParser.cpp"
10224 "include/PCA.hpp"
10225 "include/SailfishUtils.hpp"
10226 "include/SailfishIndex.hpp"
10227 "include/CollapsedEMOptimizer.hpp"
10228 "src/CollapsedEMOptimizer.cpp")
10229 (("#include \"jellyfish/config.h\"") ""))
10230 (substitute* "src/CMakeLists.txt"
10231 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
10232 (search-input-directory
10233 inputs
10234 (string-append "/include/jellyfish-" ,(package-version jellyfish))))
10235 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
10236 (search-input-file inputs
10237 "/lib/libjellyfish-2.0.a"))
10238 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
10239 (search-input-file inputs
10240 "/lib/libdivsufsort.so"))
10241 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
10242 (search-input-file inputs
10243 "/lib/libdivsufsort64.so")))
10244 (substitute* "CMakeLists.txt"
10245 ;; Don't prefer static libs
10246 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
10247 (("find_package\\(Jellyfish.*") "")
10248 (("ExternalProject_Add\\(libjellyfish") "message(")
10249 (("ExternalProject_Add\\(libgff") "message(")
10250 (("ExternalProject_Add\\(libsparsehash") "message(")
10251 (("ExternalProject_Add\\(libdivsufsort") "message("))
10252
10253 ;; Ensure that Eigen headers can be found
10254 (setenv "CPLUS_INCLUDE_PATH"
10255 (string-append (search-input-directory
10256 inputs "/include/eigen3")
10257 ":"
10258 (or (getenv "CPLUS_INCLUDE_PATH") ""))))))))
10259 (inputs
10260 `(("boost" ,boost)
10261 ("eigen" ,eigen)
10262 ("jemalloc" ,jemalloc)
10263 ("jellyfish" ,jellyfish)
10264 ("sparsehash" ,sparsehash)
10265 ("rapmap" ,(origin
10266 (method git-fetch)
10267 (uri (git-reference
10268 (url "https://github.com/COMBINE-lab/RapMap")
10269 (commit (string-append "sf-v" version))))
10270 (file-name (string-append "rapmap-sf-v" version "-checkout"))
10271 (sha256
10272 (base32
10273 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
10274 (modules '((guix build utils)))
10275 ;; These files are expected to be excluded.
10276 (snippet
10277 '(begin (delete-file-recursively "include/spdlog")
10278 (for-each delete-file '("include/xxhash.h"
10279 "src/xxhash.c"))))))
10280 ("libdivsufsort" ,libdivsufsort)
10281 ("libgff" ,libgff)
10282 ("tbb" ,tbb-2020)
10283 ("zlib" ,zlib)))
10284 (native-inputs
10285 (list pkg-config))
10286 (home-page "https://www.cs.cmu.edu/~ckingsf/software/sailfish/")
10287 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
10288 (description "Sailfish is a tool for genomic transcript quantification
10289 from RNA-seq data. It requires a set of target transcripts (either from a
10290 reference or de-novo assembly) to quantify. All you need to run sailfish is a
10291 fasta file containing your reference transcripts and a (set of) fasta/fastq
10292 file(s) containing your reads.")
10293 (license license:gpl3+)))
10294
10295 (define libstadenio-for-salmon
10296 (package
10297 (name "libstadenio")
10298 (version "1.14.8")
10299 (source (origin
10300 (method git-fetch)
10301 (uri (git-reference
10302 (url "https://github.com/COMBINE-lab/staden-io_lib")
10303 (commit (string-append "v" version))))
10304 (file-name (string-append name "-" version "-checkout"))
10305 (sha256
10306 (base32
10307 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
10308 (build-system gnu-build-system)
10309 (arguments '(#:parallel-tests? #f)) ; not supported
10310 (inputs
10311 `(("zlib" ,zlib)))
10312 (native-inputs
10313 `(("perl" ,perl))) ; for tests
10314 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
10315 (synopsis "General purpose trace and experiment file library")
10316 (description "This package provides a library of file reading and writing
10317 code to provide a general purpose Trace file (and Experiment File) reading
10318 interface.
10319
10320 The following file formats are supported:
10321
10322 @enumerate
10323 @item SCF trace files
10324 @item ABI trace files
10325 @item ALF trace files
10326 @item ZTR trace files
10327 @item SFF trace archives
10328 @item SRF trace archives
10329 @item Experiment files
10330 @item Plain text files
10331 @item SAM/BAM sequence files
10332 @item CRAM sequence files
10333 @end enumerate\n")
10334 (license license:bsd-3)))
10335
10336 (define-public salmon
10337 (package
10338 (name "salmon")
10339 (version "1.9.0")
10340 (source (origin
10341 (method git-fetch)
10342 (uri (git-reference
10343 (url "https://github.com/COMBINE-lab/salmon")
10344 (commit (string-append "v" version))))
10345 (file-name (git-file-name name version))
10346 (sha256
10347 (base32
10348 "1370ry3jpj05gplzyny44mqg77a29a6gp8ijmjz135d2igf956r8"))
10349 (modules '((guix build utils)))
10350 (snippet
10351 ;; Delete bundled headers for eigen3.
10352 '(delete-file-recursively "include/eigen3/"))))
10353 (build-system cmake-build-system)
10354 (arguments
10355 (list
10356 #:configure-flags
10357 #~(list (string-append "-Dlibgff_DIR="
10358 #$(this-package-input "libgff") "/lib")
10359 "-DCMAKE_CXX_FLAGS=\"-DHAVE_NUMERIC_LIMITS128=1\""
10360 "-Dlibgff_FOUND=TRUE"
10361 "-DTBB_FOUND=TRUE"
10362 #$(string-append "-DTBB_VERSION=" (package-version tbb))
10363 "-DFETCHED_PUFFERFISH=TRUE"
10364 "-DUSE_SHARED_LIBS=TRUE")
10365 #:phases
10366 '(modify-phases %standard-phases
10367 (add-after 'unpack 'prepare-pufferfish
10368 (lambda* (#:key inputs #:allow-other-keys)
10369 (copy-recursively (assoc-ref inputs "pufferfish")
10370 "external/pufferfish")
10371 ;; This test isn't working correctly, so compilation aborts.
10372 (substitute* "external/pufferfish/include/string_view.hpp"
10373 (("#if __has_include\\(<string_view>\\)")
10374 "#if 0"))
10375 (let ((headers "external/install/pufferfish/include/pufferfish")
10376 (source "external/install/src/pufferfish"))
10377 (mkdir-p headers)
10378 (mkdir-p source)
10379 (for-each (lambda (file)
10380 (install-file (string-append "external/pufferfish/include/" file)
10381 headers))
10382 (list "ProgOpts.hpp" "BooPHF.hpp" "SpinLock.hpp"
10383 "Kmer.hpp" "CanonicalKmer.hpp" "string_view.hpp"
10384 "CanonicalKmerIterator.hpp"
10385 "PufferfishBaseIndex.hpp"
10386 "PufferfishIndex.hpp"
10387 "PufferfishSparseIndex.hpp"
10388 "PufferfishLossyIndex.hpp"
10389 "PufferfishTypes.hpp"
10390 "rank9b.hpp" "rank9sel.hpp" "macros.hpp"
10391 "select.hpp" "Util.hpp"
10392 "PairedAlignmentFormatter.hpp"
10393 "SelectiveAlignmentUtils.hpp"
10394 "PuffAligner.hpp" "MemCollector.hpp"
10395 "MemChainer.hpp" "CommonTypes.hpp"
10396 "SAMWriter.hpp" "PufferfishConfig.hpp"
10397 "BulkChunk.hpp" "BinWriter.hpp"))
10398
10399 (for-each (lambda (dir)
10400 (copy-recursively
10401 (string-append "external/pufferfish/include/" dir)
10402 (string-append headers "/" dir)))
10403 (list "libdivide"
10404 "ksw2pp"
10405 "compact_vector"
10406 "itlib"
10407 "metro"
10408 "chobo"
10409 "sparsepp"
10410 "simde"
10411 "tsl"))
10412 (copy-recursively
10413 (string-append "external/pufferfish/src/metro/")
10414 (string-append source "/metro"))
10415 (install-file
10416 (string-append "external/pufferfish/src/rank9b.cpp")
10417 source)
10418
10419 ;; Do not complain about not having built libtbb
10420 (substitute* "external/pufferfish/external/twopaco/CMakeLists.txt"
10421 (("add_dependencies.*") "")))))
10422 (add-after 'unpack 'do-not-phone-home
10423 (lambda _
10424 (substitute* "src/Salmon.cpp"
10425 (("getVersionMessage\\(\\)") "\"\""))))
10426 (add-after 'unpack 'use-system-libraries
10427 (lambda* (#:key inputs #:allow-other-keys)
10428 ;; Ensure that all headers can be found
10429 (setenv "CPLUS_INCLUDE_PATH"
10430 (string-append (or (getenv "CPLUS_INCLUDE_PATH") "")
10431 ":"
10432 (getcwd) "/external/install/pufferfish/include:"
10433 (assoc-ref inputs "eigen")
10434 "/include/eigen3"))))
10435 (add-after 'unpack 'fix-error-message-in-tests
10436 (lambda _
10437 (substitute* "cmake/TestSalmonQuasi.cmake"
10438 (("SALMON_QUASI_INDEX_COMMAND")
10439 "SALMON_QUASI_INDEX_CMD")))))))
10440 (inputs
10441 (list boost
10442 bzip2
10443 cereal-1.3.0
10444 curl
10445 eigen
10446 jemalloc
10447 libgff
10448 tbb
10449 libstadenio-for-salmon
10450 xz
10451 zlib))
10452 (native-inputs
10453 `(("pkg-config" ,pkg-config)
10454 ("pufferfish" ,(origin
10455 (method git-fetch)
10456 (uri (git-reference
10457 (url "https://github.com/COMBINE-lab/pufferfish")
10458 (commit (string-append "salmon-v" version))))
10459 (file-name (git-file-name "pufferfish" version))
10460 (sha256
10461 (base32
10462 "048a006mc2d0h78ym58mv67hl1pj480ilc5ifq0rlzfdyyfs1b8i"))))))
10463 (home-page "https://github.com/COMBINE-lab/salmon")
10464 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
10465 (description "Salmon is a program to produce highly-accurate,
10466 transcript-level quantification estimates from RNA-seq data. Salmon achieves
10467 its accuracy and speed via a number of different innovations, including the
10468 use of lightweight alignments (accurate but fast-to-compute proxies for
10469 traditional read alignments) and massively-parallel stochastic collapsed
10470 variational inference.")
10471 (license license:gpl3+)))
10472
10473 (define-public python-loompy
10474 (package
10475 (name "python-loompy")
10476 (version "3.0.7")
10477 ;; The tarball on Pypi does not include the tests.
10478 (source (origin
10479 (method git-fetch)
10480 (uri (git-reference
10481 (url "https://github.com/linnarsson-lab/loompy")
10482 (commit version)))
10483 (file-name (git-file-name name version))
10484 (sha256
10485 (base32
10486 "0xmw2yv1y3y7vh5jcbrmlkn43nmfs0pf6z78k1yxqs3qy248m9b0"))))
10487 (build-system python-build-system)
10488 (arguments
10489 `(#:phases
10490 (modify-phases %standard-phases
10491 ;; See https://github.com/linnarsson-lab/loompy/issues/169
10492 (add-after 'unpack 'fix-h5py-error
10493 (lambda _
10494 (substitute* "tests/test_file_attribute_manager.py"
10495 (("h5py.File\\(f.name\\)")
10496 "h5py.File(f.name, 'a')"))))
10497 ;; Numba needs a writable dir to cache functions.
10498 (add-before 'check 'set-numba-cache-dir
10499 (lambda _
10500 (setenv "NUMBA_CACHE_DIR" "/tmp")))
10501 (replace 'check
10502 (lambda* (#:key tests? #:allow-other-keys)
10503 (when tests?
10504 (invoke "pytest" "tests")))))))
10505 (propagated-inputs
10506 (list python-click
10507 python-h5py
10508 python-numba
10509 python-numpy
10510 python-numpy-groupies
10511 python-pandas
10512 python-scipy))
10513 (native-inputs
10514 (list python-pytest))
10515 (home-page "https://github.com/linnarsson-lab/loompy")
10516 (synopsis "Work with .loom files for single-cell RNA-seq data")
10517 (description "The loom file format is an efficient format for very large
10518 omics datasets, consisting of a main matrix, optional additional layers, a
10519 variable number of row and column annotations. Loom also supports sparse
10520 graphs. This library makes it easy to work with @file{.loom} files for
10521 single-cell RNA-seq data.")
10522 (license license:bsd-3)))
10523
10524 (define-public python-biothings-client
10525 (package
10526 (name "python-biothings-client")
10527 (version "0.2.6")
10528 (source
10529 (origin
10530 (method url-fetch)
10531 (uri (pypi-uri "biothings_client" version))
10532 (sha256
10533 (base32 "0bccs37d5saxn5xsd2rfpkrnc5a120xs3ibizai66fgvp1vxbnc4"))))
10534 (build-system python-build-system)
10535 (arguments `(#:tests? #false)) ; require internet access
10536 (propagated-inputs (list python-requests))
10537 (home-page "https://github.com/biothings/biothings_client.py")
10538 (synopsis "Python client for BioThings API services")
10539 (description "This package provides a Python client for BioThings
10540 API services.")
10541 (license license:bsd-3)))
10542
10543 (define-public python-mygene
10544 (package
10545 (name "python-mygene")
10546 (version "3.2.2")
10547 (source
10548 (origin
10549 (method url-fetch)
10550 (uri (pypi-uri "mygene" version))
10551 (sha256
10552 (base32 "1snszwdgfygchxshcbry3b5pbcw3g1isp8dw46razxccqaxwlag7"))))
10553 (build-system python-build-system)
10554 (propagated-inputs
10555 (list python-biothings-client))
10556 (home-page "https://github.com/biothings/mygene.py")
10557 (synopsis "Python Client for MyGene.Info services")
10558 (description "MyGene.Info provides simple-to-use REST web services
10559 to query/retrieve gene annotation data. It's designed with simplicity
10560 and performance emphasized. Mygene is a Python wrapper to access
10561 MyGene.Info services.")
10562 (license license:bsd-3)))
10563
10564 ;; We cannot use the latest commit because it requires Java 9.
10565 (define-public java-forester
10566 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
10567 (revision "1"))
10568 (package
10569 (name "java-forester")
10570 (version (string-append "0-" revision "." (string-take commit 7)))
10571 (source (origin
10572 (method git-fetch)
10573 (uri (git-reference
10574 (url "https://github.com/cmzmasek/forester")
10575 (commit commit)))
10576 (file-name (string-append name "-" version "-checkout"))
10577 (sha256
10578 (base32
10579 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
10580 (modules '((guix build utils)))
10581 (snippet
10582 '(begin
10583 ;; Delete bundled jars and pre-built classes
10584 (delete-file-recursively "forester/java/resources")
10585 (delete-file-recursively "forester/java/classes")
10586 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
10587 ;; Delete bundled applications
10588 (delete-file-recursively "forester_applications")
10589 #t))))
10590 (build-system ant-build-system)
10591 (arguments
10592 `(#:tests? #f ; there are none
10593 #:jdk ,icedtea-8
10594 #:modules ((guix build ant-build-system)
10595 (guix build utils)
10596 (guix build java-utils)
10597 (sxml simple)
10598 (sxml transform))
10599 #:phases
10600 (modify-phases %standard-phases
10601 (add-after 'unpack 'chdir
10602 (lambda _ (chdir "forester/java") #t))
10603 (add-after 'chdir 'fix-dependencies
10604 (lambda _
10605 (chmod "build.xml" #o664)
10606 (call-with-output-file "build.xml.new"
10607 (lambda (port)
10608 (sxml->xml
10609 (pre-post-order
10610 (with-input-from-file "build.xml"
10611 (lambda _ (xml->sxml #:trim-whitespace? #t)))
10612 `(;; Remove all unjar tags to avoid repacking classes.
10613 (unjar . ,(lambda _ '()))
10614 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
10615 (*text* . ,(lambda (_ txt) txt))))
10616 port)))
10617 (rename-file "build.xml.new" "build.xml")
10618 #t))
10619 ;; FIXME: itext is difficult to package as it depends on a few
10620 ;; unpackaged libraries.
10621 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
10622 (lambda _
10623 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
10624 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
10625 (("pdf_written_to = PdfExporter.*")
10626 "throw new IOException(\"PDF export is not available.\");"))
10627 #t))
10628 ;; There is no install target
10629 (replace 'install (install-jars ".")))))
10630 (propagated-inputs
10631 (list java-commons-codec java-openchart2))
10632 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
10633 (synopsis "Phylogenomics libraries for Java")
10634 (description "Forester is a collection of Java libraries for
10635 phylogenomics and evolutionary biology research. It includes support for
10636 reading, writing, and exporting phylogenetic trees.")
10637 (license license:lgpl2.1+))))
10638
10639 (define-public java-forester-1.005
10640 (package
10641 (name "java-forester")
10642 (version "1.005")
10643 (source (origin
10644 (method url-fetch)
10645 (uri (string-append "https://repo1.maven.org/maven2/"
10646 "org/biojava/thirdparty/forester/"
10647 version "/forester-" version "-sources.jar"))
10648 (file-name (string-append name "-" version ".jar"))
10649 (sha256
10650 (base32
10651 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
10652 (build-system ant-build-system)
10653 (arguments
10654 `(#:tests? #f ; there are none
10655 #:jdk ,icedtea-8
10656 #:modules ((guix build ant-build-system)
10657 (guix build utils)
10658 (guix build java-utils)
10659 (sxml simple)
10660 (sxml transform))
10661 #:phases
10662 (modify-phases %standard-phases
10663 (add-after 'unpack 'fix-dependencies
10664 (lambda* (#:key inputs #:allow-other-keys)
10665 (call-with-output-file "build.xml"
10666 (lambda (port)
10667 (sxml->xml
10668 (pre-post-order
10669 (with-input-from-file "src/build.xml"
10670 (lambda _ (xml->sxml #:trim-whitespace? #t)))
10671 `(;; Remove all unjar tags to avoid repacking classes.
10672 (unjar . ,(lambda _ '()))
10673 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
10674 (*text* . ,(lambda (_ txt) txt))))
10675 port)))
10676 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
10677 "synth_look_and_feel_1.xml")
10678 (copy-file (assoc-ref inputs "phyloxml.xsd")
10679 "phyloxml.xsd")
10680 (substitute* "build.xml"
10681 (("../resources/synth_laf/synth_look_and_feel_1.xml")
10682 "synth_look_and_feel_1.xml")
10683 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
10684 "phyloxml.xsd"))
10685 #t))
10686 ;; FIXME: itext is difficult to package as it depends on a few
10687 ;; unpackaged libraries.
10688 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
10689 (lambda _
10690 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
10691 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
10692 "src/org/forester/archaeopteryx/MainFrameApplication.java")
10693 (("pdf_written_to = PdfExporter.*")
10694 "throw new IOException(\"PDF export is not available.\"); /*")
10695 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
10696 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
10697 #t))
10698 (add-after 'unpack 'delete-pre-built-classes
10699 (lambda _ (delete-file-recursively "src/classes") #t))
10700 ;; There is no install target
10701 (replace 'install (install-jars ".")))))
10702 (propagated-inputs
10703 (list java-commons-codec java-openchart2))
10704 ;; The source archive does not contain the resources.
10705 (native-inputs
10706 `(("phyloxml.xsd"
10707 ,(origin
10708 (method url-fetch)
10709 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
10710 "b61cc2dcede0bede317db362472333115756b8c6/"
10711 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
10712 (file-name (string-append name "-phyloxml-" version ".xsd"))
10713 (sha256
10714 (base32
10715 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
10716 ("synth_look_and_feel_1.xml"
10717 ,(origin
10718 (method url-fetch)
10719 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
10720 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
10721 "forester/java/classes/resources/"
10722 "synth_look_and_feel_1.xml"))
10723 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
10724 (sha256
10725 (base32
10726 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
10727 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
10728 (synopsis "Phylogenomics libraries for Java")
10729 (description "Forester is a collection of Java libraries for
10730 phylogenomics and evolutionary biology research. It includes support for
10731 reading, writing, and exporting phylogenetic trees.")
10732 (license license:lgpl2.1+)))
10733
10734 (define-public java-biojava-core
10735 (package
10736 (name "java-biojava-core")
10737 (version "4.2.11")
10738 (source (origin
10739 (method git-fetch)
10740 (uri (git-reference
10741 (url "https://github.com/biojava/biojava")
10742 (commit (string-append "biojava-" version))))
10743 (file-name (string-append name "-" version "-checkout"))
10744 (sha256
10745 (base32
10746 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
10747 (build-system ant-build-system)
10748 (arguments
10749 `(#:jdk ,icedtea-8
10750 #:jar-name "biojava-core.jar"
10751 #:source-dir "biojava-core/src/main/java/"
10752 #:test-dir "biojava-core/src/test"
10753 ;; These tests seem to require internet access.
10754 #:test-exclude (list "**/SearchIOTest.java"
10755 "**/BlastXMLParserTest.java"
10756 "**/GenbankCookbookTest.java"
10757 "**/GenbankProxySequenceReaderTest.java")
10758 #:phases
10759 (modify-phases %standard-phases
10760 (add-before 'build 'copy-resources
10761 (lambda _
10762 (copy-recursively "biojava-core/src/main/resources"
10763 "build/classes")
10764 #t))
10765 (add-before 'check 'copy-test-resources
10766 (lambda _
10767 (copy-recursively "biojava-core/src/test/resources"
10768 "build/test-classes")
10769 #t)))))
10770 (propagated-inputs
10771 (list java-log4j-api java-log4j-core java-slf4j-api
10772 java-slf4j-simple))
10773 (native-inputs
10774 (list java-junit java-hamcrest-core))
10775 (home-page "https://biojava.org")
10776 (synopsis "Core libraries of Java framework for processing biological data")
10777 (description "BioJava is a project dedicated to providing a Java framework
10778 for processing biological data. It provides analytical and statistical
10779 routines, parsers for common file formats, reference implementations of
10780 popular algorithms, and allows the manipulation of sequences and 3D
10781 structures. The goal of the biojava project is to facilitate rapid
10782 application development for bioinformatics.
10783
10784 This package provides the core libraries.")
10785 (license license:lgpl2.1+)))
10786
10787 (define-public java-biojava-phylo
10788 (package (inherit java-biojava-core)
10789 (name "java-biojava-phylo")
10790 (build-system ant-build-system)
10791 (arguments
10792 `(#:jdk ,icedtea-8
10793 #:jar-name "biojava-phylo.jar"
10794 #:source-dir "biojava-phylo/src/main/java/"
10795 #:test-dir "biojava-phylo/src/test"
10796 #:phases
10797 (modify-phases %standard-phases
10798 (add-before 'build 'copy-resources
10799 (lambda _
10800 (copy-recursively "biojava-phylo/src/main/resources"
10801 "build/classes")
10802 #t))
10803 (add-before 'check 'copy-test-resources
10804 (lambda _
10805 (copy-recursively "biojava-phylo/src/test/resources"
10806 "build/test-classes")
10807 #t)))))
10808 (propagated-inputs
10809 (list java-log4j-api
10810 java-log4j-core
10811 java-slf4j-api
10812 java-slf4j-simple
10813 java-biojava-core
10814 java-forester))
10815 (native-inputs
10816 (list java-junit java-hamcrest-core))
10817 (home-page "https://biojava.org")
10818 (synopsis "Biojava interface to the forester phylogenomics library")
10819 (description "The phylo module provides a biojava interface layer to the
10820 forester phylogenomics library for constructing phylogenetic trees.")))
10821
10822 (define-public java-biojava-alignment
10823 (package (inherit java-biojava-core)
10824 (name "java-biojava-alignment")
10825 (build-system ant-build-system)
10826 (arguments
10827 `(#:jdk ,icedtea-8
10828 #:jar-name "biojava-alignment.jar"
10829 #:source-dir "biojava-alignment/src/main/java/"
10830 #:test-dir "biojava-alignment/src/test"
10831 #:phases
10832 (modify-phases %standard-phases
10833 (add-before 'build 'copy-resources
10834 (lambda _
10835 (copy-recursively "biojava-alignment/src/main/resources"
10836 "build/classes")
10837 #t))
10838 (add-before 'check 'copy-test-resources
10839 (lambda _
10840 (copy-recursively "biojava-alignment/src/test/resources"
10841 "build/test-classes")
10842 #t)))))
10843 (propagated-inputs
10844 (list java-log4j-api
10845 java-log4j-core
10846 java-slf4j-api
10847 java-slf4j-simple
10848 java-biojava-core
10849 java-biojava-phylo
10850 java-forester))
10851 (native-inputs
10852 (list java-junit java-hamcrest-core))
10853 (home-page "https://biojava.org")
10854 (synopsis "Biojava API for genetic sequence alignment")
10855 (description "The alignment module of BioJava provides an API that
10856 contains
10857
10858 @itemize
10859 @item implementations of dynamic programming algorithms for sequence
10860 alignment;
10861 @item reading and writing of popular alignment file formats;
10862 @item a single-, or multi- threaded multiple sequence alignment algorithm.
10863 @end itemize\n")))
10864
10865 (define-public java-biojava-core-4.0
10866 (package (inherit java-biojava-core)
10867 (name "java-biojava-core")
10868 (version "4.0.0")
10869 (source (origin
10870 (method git-fetch)
10871 (uri (git-reference
10872 (url "https://github.com/biojava/biojava")
10873 (commit (string-append "biojava-" version))))
10874 (file-name (string-append name "-" version "-checkout"))
10875 (sha256
10876 (base32
10877 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
10878
10879 (define-public java-biojava-phylo-4.0
10880 (package (inherit java-biojava-core-4.0)
10881 (name "java-biojava-phylo")
10882 (build-system ant-build-system)
10883 (arguments
10884 `(#:jdk ,icedtea-8
10885 #:jar-name "biojava-phylo.jar"
10886 #:source-dir "biojava-phylo/src/main/java/"
10887 #:test-dir "biojava-phylo/src/test"
10888 #:phases
10889 (modify-phases %standard-phases
10890 (add-before 'build 'copy-resources
10891 (lambda _
10892 (copy-recursively "biojava-phylo/src/main/resources"
10893 "build/classes")
10894 #t))
10895 (add-before 'check 'copy-test-resources
10896 (lambda _
10897 (copy-recursively "biojava-phylo/src/test/resources"
10898 "build/test-classes")
10899 #t)))))
10900 (propagated-inputs
10901 (list java-log4j-api
10902 java-log4j-core
10903 java-slf4j-api
10904 java-slf4j-simple
10905 java-biojava-core-4.0
10906 java-forester-1.005))
10907 (native-inputs
10908 (list java-junit java-hamcrest-core))
10909 (home-page "https://biojava.org")
10910 (synopsis "Biojava interface to the forester phylogenomics library")
10911 (description "The phylo module provides a biojava interface layer to the
10912 forester phylogenomics library for constructing phylogenetic trees.")))
10913
10914 (define-public java-biojava-alignment-4.0
10915 (package (inherit java-biojava-core-4.0)
10916 (name "java-biojava-alignment")
10917 (build-system ant-build-system)
10918 (arguments
10919 `(#:jdk ,icedtea-8
10920 #:jar-name "biojava-alignment.jar"
10921 #:source-dir "biojava-alignment/src/main/java/"
10922 #:test-dir "biojava-alignment/src/test"
10923 #:phases
10924 (modify-phases %standard-phases
10925 (add-before 'build 'copy-resources
10926 (lambda _
10927 (copy-recursively "biojava-alignment/src/main/resources"
10928 "build/classes")
10929 #t))
10930 (add-before 'check 'copy-test-resources
10931 (lambda _
10932 (copy-recursively "biojava-alignment/src/test/resources"
10933 "build/test-classes")
10934 #t)))))
10935 (propagated-inputs
10936 (list java-log4j-api
10937 java-log4j-core
10938 java-slf4j-api
10939 java-slf4j-simple
10940 java-biojava-core-4.0
10941 java-biojava-phylo-4.0
10942 java-forester-1.005))
10943 (native-inputs
10944 (list java-junit java-hamcrest-core))
10945 (home-page "https://biojava.org")
10946 (synopsis "Biojava API for genetic sequence alignment")
10947 (description "The alignment module of BioJava provides an API that
10948 contains
10949
10950 @itemize
10951 @item implementations of dynamic programming algorithms for sequence
10952 alignment;
10953 @item reading and writing of popular alignment file formats;
10954 @item a single-, or multi- threaded multiple sequence alignment algorithm.
10955 @end itemize\n")))
10956
10957 (define-public dropseq-tools
10958 (package
10959 (name "dropseq-tools")
10960 (version "1.13")
10961 (source
10962 (origin
10963 (method url-fetch)
10964 (uri "http://mccarrolllab.com/download/1276/")
10965 (file-name (string-append "dropseq-tools-" version ".zip"))
10966 (sha256
10967 (base32
10968 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
10969 ;; Delete bundled libraries
10970 (modules '((guix build utils)))
10971 (snippet
10972 '(begin
10973 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
10974 (delete-file-recursively "3rdParty")))))
10975 (build-system ant-build-system)
10976 (arguments
10977 `(#:tests? #f ; test data are not included
10978 #:test-target "test"
10979 #:build-target "all"
10980 #:source-dir "public/src/"
10981 #:jdk ,icedtea-8
10982 #:make-flags
10983 (list ,#~(string-append "-Dpicard.executable.dir="
10984 #$(this-package-input "java-picard")
10985 "/share/java/"))
10986 #:modules ((ice-9 match)
10987 (srfi srfi-1)
10988 (guix build utils)
10989 (guix build java-utils)
10990 (guix build ant-build-system))
10991 #:phases
10992 (modify-phases %standard-phases
10993 ;; FIXME: fails with "java.io.FileNotFoundException:
10994 ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
10995 (delete 'generate-jar-indices)
10996 ;; All dependencies must be linked to "lib", because that's where
10997 ;; they will be searched for when the Class-Path property of the
10998 ;; manifest is computed.
10999 (add-after 'unpack 'record-references
11000 (lambda* (#:key inputs #:allow-other-keys)
11001 (mkdir-p "jar/lib")
11002 (let ((dirs (filter-map (match-lambda
11003 ((name . dir)
11004 (if (and (string-prefix? "java-" name)
11005 (not (string=? name "java-testng")))
11006 dir #f)))
11007 inputs)))
11008 (for-each (lambda (jar)
11009 (symlink jar (string-append "jar/lib/" (basename jar))))
11010 (append-map (lambda (dir) (find-files dir "\\.jar$"))
11011 dirs)))))
11012 ;; There is no installation target
11013 (replace 'install
11014 (lambda* (#:key inputs outputs #:allow-other-keys)
11015 (let* ((out (assoc-ref outputs "out"))
11016 (bin (string-append out "/bin"))
11017 (share (string-append out "/share/java/"))
11018 (lib (string-append share "/lib/"))
11019 (scripts (list "BAMTagHistogram"
11020 "BAMTagofTagCounts"
11021 "BaseDistributionAtReadPosition"
11022 "CollapseBarcodesInPlace"
11023 "CollapseTagWithContext"
11024 "ConvertToRefFlat"
11025 "CreateIntervalsFiles"
11026 "DetectBeadSynthesisErrors"
11027 "DigitalExpression"
11028 "Drop-seq_alignment.sh"
11029 "FilterBAM"
11030 "FilterBAMByTag"
11031 "GatherGeneGCLength"
11032 "GatherMolecularBarcodeDistributionByGene"
11033 "GatherReadQualityMetrics"
11034 "PolyATrimmer"
11035 "ReduceGTF"
11036 "SelectCellsByNumTranscripts"
11037 "SingleCellRnaSeqMetricsCollector"
11038 "TagBamWithReadSequenceExtended"
11039 "TagReadWithGeneExon"
11040 "TagReadWithInterval"
11041 "TrimStartingSequence"
11042 "ValidateReference")))
11043 (for-each mkdir-p (list bin share lib))
11044 (install-file "dist/dropseq.jar" share)
11045 (for-each (lambda (script)
11046 (chmod script #o555)
11047 (install-file script bin))
11048 scripts)
11049 (substitute* (map (lambda (script)
11050 (string-append bin "/" script))
11051 scripts)
11052 (("^java") (which "java"))
11053 (("jar_deploy_dir=.*")
11054 (string-append "jar_deploy_dir=" share "\n"))))))
11055 ;; FIXME: We do this after stripping jars because we don't want it to
11056 ;; copy all these jars and strip them. We only want to install
11057 ;; links. Arguably, this is a problem with the ant-build-system.
11058 (add-after 'strip-jar-timestamps 'install-links
11059 (lambda* (#:key outputs #:allow-other-keys)
11060 (let* ((out (assoc-ref outputs "out"))
11061 (share (string-append out "/share/java/"))
11062 (lib (string-append share "/lib/")))
11063 (for-each (lambda (jar)
11064 (symlink (readlink jar)
11065 (string-append lib (basename jar))))
11066 (find-files "jar/lib" "\\.jar$"))))))))
11067 (inputs
11068 `(("jdk" ,icedtea-8)
11069 ("java-picard" ,java-picard-2.10.3)
11070 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
11071 ("java-commons-math3" ,java-commons-math3)
11072 ("java-commons-jexl2" ,java-commons-jexl-2)
11073 ("java-commons-collections4" ,java-commons-collections4)
11074 ("java-commons-lang2" ,java-commons-lang)
11075 ("java-commons-io" ,java-commons-io)
11076 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
11077 ("java-guava" ,java-guava)
11078 ("java-la4j" ,java-la4j)
11079 ("java-biojava-core" ,java-biojava-core-4.0)
11080 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
11081 ("java-jdistlib" ,java-jdistlib)
11082 ("java-simple-xml" ,java-simple-xml)
11083 ("java-snakeyaml" ,java-snakeyaml)))
11084 (native-inputs
11085 (list unzip java-testng))
11086 (home-page "http://mccarrolllab.com/dropseq/")
11087 (synopsis "Tools for Drop-seq analyses")
11088 (description "Drop-seq is a technology to enable biologists to
11089 analyze RNA expression genome-wide in thousands of individual cells at
11090 once. This package provides tools to perform Drop-seq analyses.")
11091 (license license:expat)))
11092
11093 (define-public pigx-rnaseq
11094 (package
11095 (name "pigx-rnaseq")
11096 (version "0.1.0")
11097 (source (origin
11098 (method url-fetch)
11099 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
11100 "releases/download/v" version
11101 "/pigx_rnaseq-" version ".tar.gz"))
11102 (sha256
11103 (base32
11104 "0acdjimfb9ywba8zsv7lavv436pmcmp8ra683h11wr4s3681pqk8"))))
11105 (build-system gnu-build-system)
11106 (arguments
11107 `(#:parallel-tests? #f ; not supported
11108 #:phases
11109 (modify-phases %standard-phases
11110 ;; "test.sh" runs the whole pipeline, which takes a long time and
11111 ;; might fail due to OOM. The MultiQC is also resource intensive.
11112 (add-after 'unpack 'disable-resource-intensive-test
11113 (lambda _
11114 (substitute* "Makefile.in"
11115 (("^ tests/test_multiqc/test.sh") "")
11116 (("^ test.sh") ""))))
11117 (add-before 'bootstrap 'autoreconf
11118 (lambda _
11119 (invoke "autoreconf" "-vif")))
11120 (add-before 'configure 'set-PYTHONPATH
11121 (lambda _
11122 (setenv "PYTHONPATH" (getenv "GUIX_PYTHONPATH"))))
11123 (add-before 'check 'set-timezone
11124 ;; The readr package is picky about timezones.
11125 (lambda* (#:key inputs #:allow-other-keys)
11126 (setenv "TZ" "UTC+1")
11127 (setenv "TZDIR"
11128 (search-input-directory inputs
11129 "share/zoneinfo")))))))
11130 (inputs
11131 (list coreutils
11132 sed
11133 gzip
11134 snakemake
11135 megadepth
11136 multiqc
11137 star-for-pigx
11138 hisat2
11139 fastp
11140 htseq
11141 samtools
11142 r-minimal
11143 r-rmarkdown
11144 r-ggplot2
11145 r-ggpubr
11146 r-ggrepel
11147 r-gprofiler2
11148 r-deseq2
11149 r-dt
11150 r-knitr
11151 r-pheatmap
11152 r-corrplot
11153 r-reshape2
11154 r-plotly
11155 r-scales
11156 r-summarizedexperiment
11157 r-crosstalk
11158 r-tximport
11159 r-rtracklayer
11160 r-rjson
11161 salmon
11162 pandoc
11163 python-wrapper
11164 python-deeptools
11165 python-pyyaml))
11166 (native-inputs
11167 (list tzdata automake autoconf))
11168 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11169 (synopsis "Analysis pipeline for RNA sequencing experiments")
11170 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
11171 reporting for RNA sequencing experiments. It is easy to use and produces high
11172 quality reports. The inputs are reads files from the sequencing experiment,
11173 and a configuration file which describes the experiment. In addition to
11174 quality control of the experiment, the pipeline produces a differential
11175 expression report comparing samples in an easily configurable manner.")
11176 (license license:gpl3+)))
11177
11178 (define-public pigx-chipseq
11179 (package
11180 (name "pigx-chipseq")
11181 (version "0.1.0")
11182 (source (origin
11183 (method url-fetch)
11184 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
11185 "releases/download/v" version
11186 "/pigx_chipseq-" version ".tar.gz"))
11187 (sha256
11188 (base32
11189 "008n6drj9q5av86xihxlj4py2c9p3c5z5ld89c3bksrp77zxiy67"))))
11190 (build-system gnu-build-system)
11191 (arguments
11192 `(#:tests? #f ; parts of the tests rely on access to the network
11193 #:phases
11194 (modify-phases %standard-phases
11195 (add-before 'configure 'set-PYTHONPATH
11196 (lambda _
11197 (setenv "PYTHONPATH" (getenv "GUIX_PYTHONPATH")))))))
11198 (inputs
11199 (list grep
11200 coreutils
11201 r-minimal
11202 r-argparser
11203 r-biocparallel
11204 r-biostrings
11205 r-chipseq
11206 r-corrplot
11207 r-data-table
11208 r-deseq2
11209 r-dplyr
11210 r-dt
11211 r-genomation
11212 r-genomicalignments
11213 r-genomicranges
11214 r-ggplot2
11215 r-ggrepel
11216 r-gprofiler2
11217 r-heatmaply
11218 r-hexbin
11219 r-htmlwidgets
11220 r-jsonlite
11221 r-pheatmap
11222 r-plotly
11223 r-rmarkdown
11224 r-rsamtools
11225 r-rsubread
11226 r-rtracklayer
11227 r-s4vectors
11228 r-stringr
11229 r-tibble
11230 r-tidyr
11231 python-wrapper
11232 python-pyyaml
11233 python-magic
11234 python-xlrd
11235 trim-galore
11236 macs
11237 multiqc
11238 perl
11239 pandoc
11240 fastqc
11241 bowtie
11242 idr
11243 snakemake
11244 samtools
11245 bedtools
11246 kentutils))
11247 (native-inputs
11248 (list python-pytest))
11249 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11250 (synopsis "Analysis pipeline for ChIP sequencing experiments")
11251 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
11252 calling and reporting for ChIP sequencing experiments. It is easy to use and
11253 produces high quality reports. The inputs are reads files from the sequencing
11254 experiment, and a configuration file which describes the experiment. In
11255 addition to quality control of the experiment, the pipeline enables to set up
11256 multiple peak calling analysis and allows the generation of a UCSC track hub
11257 in an easily configurable manner.")
11258 (license license:gpl3+)))
11259
11260 (define-public pigx-bsseq
11261 (package
11262 (name "pigx-bsseq")
11263 (version "0.1.8")
11264 (source (origin
11265 (method url-fetch)
11266 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
11267 "releases/download/v" version
11268 "/pigx_bsseq-" version ".tar.gz"))
11269 (sha256
11270 (base32
11271 "1s8zgrqxabrawrgkga5rmgb0gyzj7ck47p3rkicjkfv7r2yjy0d7"))))
11272 (build-system gnu-build-system)
11273 (arguments
11274 `(;; TODO: tests currently require 12+GB of RAM. See
11275 ;; https://github.com/BIMSBbioinfo/pigx_bsseq/issues/164
11276 #:tests? #f
11277 #:phases
11278 (modify-phases %standard-phases
11279 (add-before 'configure 'set-PYTHONPATH
11280 (lambda _
11281 (setenv "PYTHONPATH" (getenv "GUIX_PYTHONPATH"))))
11282 (add-before 'check 'set-timezone
11283 ;; The readr package is picky about timezones.
11284 (lambda* (#:key inputs #:allow-other-keys)
11285 (setenv "TZ" "UTC+1")
11286 (setenv "TZDIR"
11287 (search-input-directory inputs
11288 "share/zoneinfo")))))))
11289 (native-inputs
11290 (list tzdata))
11291 (inputs
11292 (list coreutils
11293 sed
11294 grep
11295 r-minimal
11296 r-annotationhub
11297 r-dt
11298 r-genomation
11299 r-ggbio
11300 r-ggrepel
11301 r-matrixstats
11302 r-methylkit
11303 r-reshape2
11304 r-rtracklayer
11305 r-rmarkdown
11306 r-bookdown
11307 r-ggplot2
11308 r-ggbio
11309 pandoc
11310 python-wrapper
11311 python-pyyaml
11312 snakemake
11313 bismark
11314 bowtie
11315 bwa-meth
11316 fastqc
11317 methyldackel
11318 multiqc
11319 trim-galore
11320 cutadapt
11321 samblaster
11322 samtools))
11323 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11324 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
11325 (description "PiGx BSseq is a data processing pipeline for raw fastq read
11326 data of bisulfite experiments; it produces reports on aggregate methylation
11327 and coverage and can be used to produce information on differential
11328 methylation and segmentation.")
11329 (license license:gpl3+)))
11330
11331 (define-public pigx-scrnaseq
11332 (package
11333 (name "pigx-scrnaseq")
11334 (version "1.1.9")
11335 (source (origin
11336 (method url-fetch)
11337 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
11338 "releases/download/v" version
11339 "/pigx_scrnaseq-" version ".tar.gz"))
11340 (sha256
11341 (base32
11342 "0adx7877c3lhlrzfid76i8bc829wcmzvrm0jx47gyid8mxqb7vqs"))))
11343 (build-system gnu-build-system)
11344 (arguments
11345 `(#:phases
11346 (modify-phases %standard-phases
11347 (add-before 'configure 'set-additional-environment-variables
11348 (lambda _
11349 ;; Needed because of loompy
11350 (setenv "NUMBA_CACHE_DIR" "/tmp")
11351 ;; Needed to capture environment
11352 (setenv "PYTHONPATH" (getenv "GUIX_PYTHONPATH")))))))
11353 (inputs
11354 (list coreutils
11355 perl
11356 fastqc
11357 flexbar
11358 icedtea-8
11359 jellyfish
11360 python-wrapper
11361 python-pyyaml
11362 python-pandas
11363 python-magic
11364 python-numpy
11365 python-loompy
11366 pandoc
11367 samtools
11368 snakemake
11369 star-for-pigx
11370 r-minimal
11371 r-argparser
11372 r-cowplot
11373 r-data-table
11374 r-delayedarray
11375 r-delayedmatrixstats
11376 r-dplyr
11377 r-dropbead
11378 r-dt
11379 r-genomicalignments
11380 r-genomicfiles
11381 r-genomicranges
11382 r-ggplot2
11383 r-hdf5array
11384 r-pheatmap
11385 r-rmarkdown
11386 r-rsamtools
11387 r-rtracklayer
11388 r-rtsne
11389 r-scater
11390 r-scran
11391 r-seurat
11392 r-singlecellexperiment
11393 r-stringr
11394 r-yaml))
11395 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11396 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
11397 (description
11398 "PiGX scRNAseq is an analysis pipeline for preprocessing and
11399 quality control for single cell RNA sequencing experiments. The inputs are
11400 read files from the sequencing experiment, and a configuration file which
11401 describes the experiment. It produces processed files for downstream analysis
11402 and interactive quality reports. The pipeline is designed to work with UMI
11403 based methods.")
11404 (license license:gpl3+)))
11405
11406 (define-public pigx-sars-cov-2
11407 (package
11408 (name "pigx-sars-cov-2")
11409 (version "0.0.7")
11410 (source (origin
11411 (method url-fetch)
11412 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_sars-cov-2"
11413 "/releases/download/v" version
11414 "/pigx_sars-cov-2-" version ".tar.gz"))
11415 (sha256
11416 (base32
11417 "1bqm03ypf7l8lrkjkydxzn7vy0qlps3v9c5cpz2wb008zw44bi3k"))))
11418 (build-system gnu-build-system)
11419 (arguments
11420 `(#:tests? #f ;requires huge kraken database
11421 #:phases
11422 (modify-phases %standard-phases
11423 (add-before 'bootstrap 'autoreconf
11424 (lambda _
11425 ;; https://github.com/BIMSBbioinfo/pigx_sars-cov-2/issues/123
11426 (substitute* "m4/ax_r_package.m4"
11427 (("if\\(is.na\\(packageDescription\\(\"PKG\"\\)\\)\\)")
11428 "if(system.file(package=\"PKG\") == \"\")"))
11429 (invoke "autoreconf" "-vif")))
11430 (add-before 'configure 'set-PYTHONPATH
11431 (lambda _
11432 (setenv "PYTHONPATH" (getenv "GUIX_PYTHONPATH")))))))
11433 (native-inputs
11434 (list automake autoconf))
11435 (inputs
11436 (list bash-minimal
11437 bedtools
11438 bwa
11439 ensembl-vep
11440 fastp
11441 fastqc
11442 ivar
11443 kraken2
11444 krona-tools
11445 lofreq
11446 multiqc
11447 python-pyyaml
11448 python-wrapper
11449 r-base64url
11450 r-dplyr
11451 r-dt
11452 r-ggplot2
11453 r-magrittr
11454 r-minimal
11455 r-plotly
11456 r-qpcr
11457 r-r-utils
11458 r-reshape2
11459 r-rmarkdown
11460 r-stringr
11461 r-tidyr
11462 r-viridis
11463 samtools
11464 snakemake
11465 wget))
11466 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11467 (synopsis "Analysis pipeline for wastewater sequencing")
11468 (description "PiGx SARS-CoV-2 is a pipeline for analysing data from
11469 sequenced wastewater samples and identifying given variants-of-concern of
11470 SARS-CoV-2. The pipeline can be used for continuous sampling. The output
11471 report will provide an intuitive visual overview about the development of
11472 variant abundance over time and location.")
11473 (license license:gpl3+)))
11474
11475 (define-public pigx-sars-cov2-ww
11476 (deprecated-package "pigx-sars-cov2-ww" pigx-sars-cov-2))
11477
11478 (define-public pigx
11479 (package
11480 (name "pigx")
11481 (version "0.0.3")
11482 (source (origin
11483 (method url-fetch)
11484 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
11485 "releases/download/v" version
11486 "/pigx-" version ".tar.gz"))
11487 (sha256
11488 (base32
11489 "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
11490 (build-system gnu-build-system)
11491 (inputs
11492 (list python pigx-bsseq pigx-chipseq pigx-rnaseq pigx-scrnaseq))
11493 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11494 (synopsis "Analysis pipelines for genomics")
11495 (description "PiGx is a collection of genomics pipelines. It includes the
11496 following pipelines:
11497
11498 @itemize
11499 @item PiGx BSseq for raw fastq read data of bisulfite experiments
11500 @item PiGx RNAseq for RNAseq samples
11501 @item PiGx scRNAseq for single cell dropseq analysis
11502 @item PiGx ChIPseq for reads from ChIPseq experiments
11503 @end itemize
11504
11505 All pipelines are easily configured with a simple sample sheet and a
11506 descriptive settings file. The result is a set of comprehensive, interactive
11507 HTML reports with interesting findings about your samples.")
11508 (license license:gpl3+)))
11509
11510 (define-public genrich
11511 (package
11512 (name "genrich")
11513 (version "0.5")
11514 (source (origin
11515 (method git-fetch)
11516 (uri (git-reference
11517 (url "https://github.com/jsh58/Genrich")
11518 (commit (string-append "v" version))))
11519 (file-name (git-file-name name version))
11520 (sha256
11521 (base32
11522 "0x0q6z0208n3cxzqjla4rgjqpyqgwpmz27852lcvzkzaigymq4zp"))))
11523 (build-system gnu-build-system)
11524 (arguments
11525 `(#:tests? #f ; there are none
11526 #:phases
11527 (modify-phases %standard-phases
11528 (delete 'configure)
11529 (replace 'install
11530 (lambda* (#:key outputs #:allow-other-keys)
11531 (install-file "Genrich" (string-append (assoc-ref outputs "out") "/bin"))
11532 #t)))))
11533 (inputs
11534 (list zlib))
11535 (home-page "https://github.com/jsh58/Genrich")
11536 (synopsis "Detecting sites of genomic enrichment")
11537 (description "Genrich is a peak-caller for genomic enrichment
11538 assays (e.g. ChIP-seq, ATAC-seq). It analyzes alignment files generated
11539 following the assay and produces a file detailing peaks of significant
11540 enrichment.")
11541 (license license:expat)))
11542
11543 (define-public mantis
11544 ;; This is an arbitrary commit as a year has passed since 0.1 was tagged.
11545 (let ((commit "b6979a269172a45201c8366680d8b889f889432b")
11546 (revision "2"))
11547 (package
11548 (name "mantis")
11549 (version (git-version "0.1" revision commit))
11550 (source (origin
11551 (method git-fetch)
11552 (uri (git-reference
11553 (url "https://github.com/splatlab/mantis")
11554 (commit commit)))
11555 (file-name (git-file-name name version))
11556 (sha256
11557 (base32
11558 "0dq8a785hnaxx5kq757m5czs8xpcjpcph1inq2nm8h6zfvqyj8xs"))))
11559 (build-system cmake-build-system)
11560 (arguments
11561 '(#:tests? #f ; there are none
11562 #:configure-flags (list "-DNH=ON"))) ; do not use SSE4.2 instructions
11563 (inputs
11564 (list sdsl-lite openssl zlib))
11565 (native-inputs
11566 (list gcc-7))
11567 (home-page "https://github.com/splatlab/mantis")
11568 (synopsis "Large-scale sequence-search index data structure")
11569 (description "Mantis is a space-efficient data structure that can be
11570 used to index thousands of raw-read genomics experiments and facilitate
11571 large-scale sequence searches on those experiments. Mantis uses counting
11572 quotient filters instead of Bloom filters, enabling rapid index builds and
11573 queries, small indexes, and exact results, i.e., no false positives or
11574 negatives. Furthermore, Mantis is also a colored de Bruijn graph
11575 representation, so it supports fast graph traversal and other topological
11576 analyses in addition to large-scale sequence-level searches.")
11577 ;; uses __uint128_t and inline assembly
11578 (supported-systems '("x86_64-linux"))
11579 (license license:bsd-3))))
11580
11581 (define-public sjcount
11582 ;; There is no tag for version 3.2, nor is there a release archive.
11583 (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
11584 (revision "1"))
11585 (package
11586 (name "sjcount")
11587 (version (git-version "3.2" revision commit))
11588 (source (origin
11589 (method git-fetch)
11590 (uri (git-reference
11591 (url "https://github.com/pervouchine/sjcount-full")
11592 (commit commit)))
11593 (file-name (string-append name "-" version "-checkout"))
11594 (sha256
11595 (base32
11596 "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
11597 (build-system gnu-build-system)
11598 (arguments
11599 `(#:tests? #f ; requires a 1.4G test file
11600 #:make-flags
11601 ,#~(list (string-append "SAMTOOLS_DIR="
11602 #$(this-package-input "samtools")
11603 "/lib/"))
11604 #:phases
11605 (modify-phases %standard-phases
11606 (replace 'configure
11607 (lambda* (#:key inputs #:allow-other-keys)
11608 (substitute* "makefile"
11609 (("-I \\$\\{SAMTOOLS_DIR\\}")
11610 (string-append "-I" (assoc-ref inputs "samtools")
11611 "/include/samtools"))
11612 (("-lz ") "-lz -lpthread "))))
11613 (replace 'install
11614 (lambda* (#:key outputs #:allow-other-keys)
11615 (for-each (lambda (tool)
11616 (install-file tool
11617 (string-append (assoc-ref outputs "out")
11618 "/bin")))
11619 '("j_count" "b_count" "sjcount")))))))
11620 (inputs
11621 (list samtools-0.1 zlib))
11622 (home-page "https://github.com/pervouchine/sjcount-full/")
11623 (synopsis "Annotation-agnostic splice junction counting pipeline")
11624 (description "Sjcount is a utility for fast quantification of splice
11625 junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
11626 version does count multisplits.")
11627 (license license:gpl3+))))
11628
11629 (define-public minimap2
11630 (package
11631 (name "minimap2")
11632 (version "2.24")
11633 (source
11634 (origin
11635 (method url-fetch)
11636 (uri (string-append "https://github.com/lh3/minimap2/"
11637 "releases/download/v" version "/"
11638 "minimap2-" version ".tar.bz2"))
11639 (sha256
11640 (base32
11641 "05d6h2c1h95s5vblf1fijn9g0r4g69nsvkabji42j642y0gw7m4x"))))
11642 (build-system gnu-build-system)
11643 (arguments
11644 `(#:tests? #f ; there are none
11645 #:modules ((guix build utils)
11646 (guix build gnu-build-system)
11647 (srfi srfi-26))
11648 #:make-flags
11649 (list (string-append "CC=" ,(cc-for-target))
11650 (let ((system ,(or (%current-target-system)
11651 (%current-system))))
11652 (cond
11653 ((string-prefix? "x86_64" system)
11654 "all")
11655 ((or (string-prefix? "i586" system)
11656 (string-prefix? "i686" system))
11657 "sse2only=1")
11658 ((string-prefix? "armhf" system)
11659 "arm_neon=1")
11660 ((string-prefix? "aarch64" system)
11661 "aarch64=1")
11662 (else ""))))
11663 #:phases
11664 (modify-phases %standard-phases
11665 (delete 'configure)
11666 (replace 'install
11667 (lambda* (#:key outputs #:allow-other-keys)
11668 (let* ((out (assoc-ref outputs "out"))
11669 (bin (string-append out "/bin"))
11670 (lib (string-append out "/lib"))
11671 (inc (string-append out "/include"))
11672 (man (string-append out "/share/man/man1")))
11673 (install-file "minimap2" bin)
11674 (install-file "libminimap2.a" lib)
11675 (install-file "minimap2.1" man)
11676 (map (cut install-file <> inc)
11677 (find-files "." "\\.h$"))
11678 ;; Not this file.
11679 (delete-file (string-append inc "/emmintrin.h"))
11680 (mkdir-p (string-append lib "/pkgconfig"))
11681 (with-output-to-file (string-append lib "/pkgconfig/minimap2.pc")
11682 (lambda _
11683 (format #t "prefix=~a~@
11684 exec_prefix=${prefix}~@
11685 libdir=${exec_prefix}/lib~@
11686 includedir=${prefix}/include~@
11687 ~@
11688 Name: libminimap2~@
11689 Version: ~a~@
11690 Description: A versatile pairwise aligner for genomic and spliced nucleotide sequence~@
11691 Libs: -L${libdir} -lminimap2~@
11692 Cflags: -I${includedir}~%"
11693 out ,version))))
11694 #t)))))
11695 (inputs
11696 (list zlib))
11697 (home-page "https://lh3.github.io/minimap2/")
11698 (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
11699 (description "Minimap2 is a versatile sequence alignment program that
11700 aligns DNA or mRNA sequences against a large reference database. Typical use
11701 cases include:
11702
11703 @enumerate
11704 @item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
11705 @item finding overlaps between long reads with error rate up to ~15%;
11706 @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
11707 reads against a reference genome;
11708 @item aligning Illumina single- or paired-end reads;
11709 @item assembly-to-assembly alignment;
11710 @item full-genome alignment between two closely related species with
11711 divergence below ~15%.
11712 @end enumerate\n")
11713 (license license:expat)))
11714
11715 (define-public python-mappy
11716 (package
11717 (name "python-mappy")
11718 (version "2.24")
11719 (source (origin
11720 (method url-fetch)
11721 (uri (pypi-uri "mappy" version))
11722 (sha256
11723 (base32
11724 "1ycszza87p9qvx8mis9v1hry0ac465x1xcxbsn1k45qlxxrzp8im"))))
11725 (build-system python-build-system)
11726 (native-inputs
11727 (list python-cython))
11728 (inputs
11729 (list zlib))
11730 (home-page "https://github.com/lh3/minimap2")
11731 (synopsis "Python binding for minimap2")
11732 (description "This package provides a convenient interface to minimap2,
11733 a fast and accurate C program to align genomic and transcribe nucleotide
11734 sequences.")
11735 (license license:expat)))
11736
11737 (define-public miniasm
11738 (package
11739 (name "miniasm")
11740 (version "0.3")
11741 (source (origin
11742 (method git-fetch)
11743 (uri (git-reference
11744 (url "https://github.com/lh3/miniasm")
11745 (commit (string-append "v" version))))
11746 (file-name (git-file-name name version))
11747 (sha256
11748 (base32
11749 "04dv5wv8bhsw1imxwyd438bnn9kby7svp44nbcz8lsadzjjci5gs"))))
11750 (build-system gnu-build-system)
11751 (inputs
11752 (list zlib))
11753 (arguments
11754 `(#:tests? #f ; There are no tests.
11755 #:phases
11756 (modify-phases %standard-phases
11757 (delete 'configure)
11758 (replace 'install
11759 (lambda* (#:key inputs outputs #:allow-other-keys)
11760 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
11761 (install-file "miniasm" bin)
11762 (install-file "minidot" bin)
11763 #t))))))
11764 (home-page "https://github.com/lh3/miniasm")
11765 (synopsis "Ultrafast de novo assembly for long noisy reads")
11766 (description "Miniasm is a very fast OLC-based de novo assembler for noisy
11767 long reads. It takes all-vs-all read self-mappings (typically by minimap) as
11768 input and outputs an assembly graph in the GFA format. Different from
11769 mainstream assemblers, miniasm does not have a consensus step. It simply
11770 concatenates pieces of read sequences to generate the final unitig sequences.
11771 Thus the per-base error rate is similar to the raw input reads.")
11772 (license license:expat)))
11773
11774 (define-public bandage
11775 (package
11776 (name "bandage")
11777 (version "0.8.1")
11778 (source
11779 (origin
11780 (method git-fetch)
11781 (uri (git-reference
11782 (url "https://github.com/rrwick/Bandage")
11783 (commit (string-append "v" version))))
11784 (file-name (git-file-name name version))
11785 (sha256
11786 (base32 "1bbsn5f5x8wlspg4pbibqz6m5vin8c19nl224f3z3km0pkc97rwv"))))
11787 (build-system qt-build-system)
11788 (arguments
11789 `(#:phases
11790 (modify-phases %standard-phases
11791 (replace 'configure
11792 (lambda _
11793 (invoke "qmake" "Bandage.pro")))
11794 (replace 'check
11795 (lambda* (#:key tests? #:allow-other-keys)
11796 (when tests?
11797 (substitute* "tests/bandage_command_line_tests.sh"
11798 (("^bandagepath=.*")
11799 (string-append "bandagepath=" (getcwd) "/Bandage\n")))
11800 (with-directory-excursion "tests"
11801 (setenv "XDG_RUNTIME_DIR" (getcwd))
11802 (invoke "./bandage_command_line_tests.sh")))
11803 #t))
11804 (replace 'install
11805 (lambda* (#:key outputs #:allow-other-keys)
11806 (let ((out (assoc-ref outputs "out")))
11807 (install-file "Bandage" (string-append out "/bin"))
11808 #t))))))
11809 (inputs
11810 (list qtbase-5 qtsvg-5))
11811 (native-inputs
11812 (list imagemagick))
11813 (home-page "https://rrwick.github.io/Bandage/")
11814 (synopsis
11815 "Bioinformatics Application for Navigating De novo Assembly Graphs Easily")
11816 (description "Bandage is a program for visualising de novo assembly graphs.
11817 It allows users to interact with the assembly graphs made by de novo assemblers
11818 such as Velvet, SPAdes, MEGAHIT and others. De novo assembly graphs contain not
11819 only assembled contigs but also the connections between those contigs, which
11820 were previously not easily accessible. Bandage visualises assembly graphs, with
11821 connections, using graph layout algorithms. Nodes in the drawn graph, which
11822 represent contigs, can be automatically labelled with their ID, length or depth.
11823 Users can interact with the graph by moving, labelling and colouring nodes.
11824 Sequence information can also be extracted directly from the graph viewer. By
11825 displaying connections between contigs, Bandage opens up new possibilities for
11826 analysing and improving de novo assemblies that are not possible by looking at
11827 contigs alone.")
11828 (license (list license:gpl2+ ; bundled ogdf
11829 license:gpl3+))))
11830
11831 (define-public libmaus2
11832 (package
11833 (name "libmaus2")
11834 (version "2.0.786")
11835 (source (origin
11836 (method git-fetch)
11837 (uri (git-reference
11838 (url "https://gitlab.com/german.tischler/libmaus2")
11839 (commit (string-append version "-release-20210531143054"))))
11840 (file-name (git-file-name name version))
11841 (sha256
11842 (base32
11843 "1rxakmwjcx2yq5sjh3v849f7dfw4xzc2fyzf6s28s3p95z84w564"))))
11844 (build-system gnu-build-system)
11845 ;; The test suite attempts to execute ../test-driver, which does not exist.
11846 (arguments '(#:tests? #false))
11847 (propagated-inputs
11848 (list zlib))
11849 (native-inputs
11850 (list pkg-config))
11851 (home-page "https://gitlab.com/german.tischler/libmaus2")
11852 (synopsis "Collection of data structures and algorithms useful for bioinformatics")
11853 (description "libmaus2 is a collection of data structures and
11854 algorithms. It contains:
11855
11856 @itemize
11857 @item I/O classes (single byte and UTF-8);
11858 @item @code{bitio} classes (input, output and various forms of bit level
11859 manipulation);
11860 @item text indexing classes (suffix and LCP array, fulltext and minute (FM),
11861 etc.);
11862 @item BAM sequence alignment files input/output (simple and collating);
11863 and many lower level support classes.
11864 @end itemize\n")
11865 ;; The code is explicitly available under the terms of either GPLv2 or
11866 ;; GPLv3 according to the AUTHORS file, though most files have a GPLv3+
11867 ;; license header.
11868 (license (list license:gpl2+ license:gpl3+))))
11869
11870 (define-public biobambam2
11871 (package
11872 (name "biobambam2")
11873 (version "2.0.182")
11874 (source (origin
11875 (method git-fetch)
11876 (uri (git-reference
11877 (url "https://gitlab.com/german.tischler/biobambam2")
11878 (commit (string-append version "-release-20210412001032"))))
11879 (file-name (git-file-name name version))
11880 (sha256
11881 (base32
11882 "0b7w7a2a7hpkgrdn0n7hy4pilzrj82zqrh7q4bg1l0cd6bqr60m5"))))
11883 (build-system gnu-build-system)
11884 (arguments
11885 ;; The test suite attempts to execute ../test-driver, which does not exist.
11886 `(#:tests? #false
11887 #:configure-flags
11888 ,#~(list (string-append "--with-libmaus2="
11889 #$(this-package-input "libmaus2")))))
11890 (inputs
11891 (list libmaus2 xerces-c))
11892 (native-inputs
11893 (list pkg-config))
11894 (home-page "https://gitlab.com/german.tischler/biobambam2")
11895 (synopsis "Tools for processing BAM files")
11896 (description "This package contains some tools for processing BAM files
11897 including:
11898
11899 @itemize
11900 @item bamsormadup: parallel sorting and duplicate marking
11901 @item bamcollate2: reads BAM and writes BAM reordered such that alignment or
11902 collated by query name
11903 @item bammarkduplicates: reads BAM and writes BAM with duplicate alignments
11904 marked using the BAM flags field
11905 @item bammaskflags: reads BAM and writes BAM while masking (removing) bits
11906 from the flags column
11907 @item bamrecompress: reads BAM and writes BAM with a defined compression
11908 setting. This tool is capable of multi-threading.
11909 @item bamsort: reads BAM and writes BAM resorted by coordinates or query name
11910 @item bamtofastq: reads BAM and writes FastQ; output can be collated or
11911 uncollated by query name.
11912 @end itemize
11913 ")
11914 ;; The COPYING file states that the code is distributed under version 3 of
11915 ;; the GPL, but the license headers include the "or later" clause.
11916 (license license:gpl3+)))
11917
11918 (define-public r-dyngen
11919 (package
11920 (name "r-dyngen")
11921 (version "1.0.4")
11922 (source
11923 (origin
11924 (method url-fetch)
11925 (uri (cran-uri "dyngen" version))
11926 (sha256
11927 (base32
11928 "1qmqy0dyiz30zpf3ii4h2ip6hg2449ghb474sjzrqa1yk9mdpy4i"))))
11929 (properties `((upstream-name . "dyngen")))
11930 (build-system r-build-system)
11931 (propagated-inputs
11932 (list r-assertthat
11933 r-dplyr
11934 r-dynutils
11935 r-ggplot2
11936 r-ggraph
11937 r-ggrepel
11938 r-gillespiessa2
11939 r-igraph
11940 r-lmds
11941 r-matrix
11942 r-patchwork
11943 r-pbapply
11944 r-purrr
11945 r-rlang
11946 r-tibble
11947 r-tidygraph
11948 r-tidyr
11949 r-viridis))
11950 (home-page "https://github.com/dynverse/dyngen")
11951 (synopsis "Multi-Modal simulator for single-cell omics analyses")
11952 (description
11953 "This package provides a multi-modal simulation engine for studying
11954 dynamic cellular processes at single-cell resolution.")
11955 (license license:expat)))
11956
11957 ;; Needed for r-liana
11958 (define-public r-omnipathr/devel
11959 (let ((commit "679bb79e319af246a16968d27d64d8d6937a331a")
11960 (revision "1"))
11961 (package
11962 (name "r-omnipathr")
11963 (version (git-version "3.5.5" revision commit))
11964 (source (origin
11965 (method git-fetch)
11966 (uri (git-reference
11967 (url "https://github.com/saezlab/omnipathr")
11968 (commit commit)))
11969 (file-name (git-file-name name version))
11970 (sha256
11971 (base32
11972 "10h6lyapyx4ik8r4kx5z2dly46jlf2v57caq4g6i0hzifyz2vgjq"))))
11973 (properties `((upstream-name . "OmnipathR")))
11974 (build-system r-build-system)
11975 (arguments
11976 `(#:phases
11977 (modify-phases %standard-phases
11978 (add-after 'unpack 'set-HOME
11979 (lambda _ (setenv "HOME" "/tmp"))))))
11980 (propagated-inputs
11981 (list r-checkmate
11982 r-crayon
11983 r-curl
11984 r-digest
11985 r-dplyr
11986 r-httr
11987 r-igraph
11988 r-jsonlite
11989 r-later
11990 r-logger
11991 r-magrittr
11992 r-progress
11993 r-purrr
11994 r-rappdirs
11995 r-readr
11996 r-readxl
11997 r-rlang
11998 r-rvest
11999 r-stringr
12000 r-tibble
12001 r-tidyr
12002 r-tidyselect
12003 r-withr
12004 r-xml2
12005 r-yaml))
12006 (native-inputs (list r-knitr))
12007 (home-page "https://github.com/saezlab/omnipathr")
12008 (synopsis "OmniPath web service client and more")
12009 (description
12010 "This package provides a client for the OmniPath web service and many
12011 other resources. It also includes functions to transform and pretty print
12012 some of the downloaded data, functions to access a number of other resources
12013 such as BioPlex, ConsensusPathDB, EVEX, Gene Ontology, Guide to
12014 Pharmacology (IUPHAR/BPS), Harmonizome, HTRIdb, Human Phenotype Ontology,
12015 InWeb InBioMap, KEGG Pathway, Pathway Commons, Ramilowski et al. 2015,
12016 RegNetwork, ReMap, TF census, TRRUST and Vinayagam et al. 2011. Furthermore,
12017 OmnipathR features a close integration with the NicheNet method for ligand
12018 activity prediction from transcriptomics data, and its R implementation
12019 @code{nichenetr}.")
12020 (license license:expat))))
12021
12022 (define-public r-liana
12023 (let ((commit "efb1249af46f576d1d620956053cfa93b2cee961")
12024 (revision "1"))
12025 (package
12026 (name "r-liana")
12027 (version (git-version "0.1.5" revision commit))
12028 (source (origin
12029 (method git-fetch)
12030 (uri (git-reference
12031 (url "https://github.com/saezlab/liana/")
12032 (commit commit)))
12033 (file-name (git-file-name name version))
12034 (sha256
12035 (base32
12036 "0z645k26kqrfj5f1s412vwclw1q47h1zfxxrh9ijr30pxhpv6cv0"))))
12037 (properties `((upstream-name . "liana")))
12038 (build-system r-build-system)
12039 (arguments
12040 (list
12041 #:phases
12042 `(modify-phases %standard-phases
12043 ;; This is needed to find ~/.config/OmnipathR/omnipathr.yml
12044 (add-after 'unpack 'set-HOME
12045 (lambda _ (setenv "HOME" "/tmp"))))))
12046 (propagated-inputs
12047 (list r-complexheatmap
12048 r-dplyr
12049 r-ggplot2
12050 r-magrittr
12051 r-omnipathr/devel
12052 r-purrr
12053 r-rcolorbrewer
12054 r-readr
12055 r-reticulate
12056 r-rlang
12057 r-scater
12058 r-scran
12059 r-scuttle
12060 r-seuratobject
12061 r-singlecellexperiment
12062 r-stringr
12063 r-tibble
12064 r-tidyr
12065 r-tidyselect))
12066 (native-inputs (list r-knitr))
12067 (home-page "https://github.com/saezlab/liana/")
12068 (synopsis "LIANA: a LIgand-receptor ANalysis frAmework")
12069 (description
12070 "LIANA provides a number of methods and resource for ligand-receptor
12071 interaction inference from scRNA-seq data.")
12072 (license license:gpl3))))
12073
12074 (define-public r-circus
12075 (package
12076 (name "r-circus")
12077 (version "0.1.5")
12078 (source
12079 (origin
12080 (method git-fetch)
12081 (uri (git-reference
12082 (url "https://github.com/BIMSBbioinfo/ciRcus")
12083 (commit (string-append "v" version))))
12084 (file-name (git-file-name name version))
12085 (sha256
12086 (base32
12087 "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
12088 (build-system r-build-system)
12089 (propagated-inputs
12090 (list r-annotationdbi
12091 r-annotationhub
12092 r-biomart
12093 r-data-table
12094 r-dbi
12095 r-genomicfeatures
12096 r-genomicranges
12097 r-ggplot2
12098 r-hash
12099 r-iranges
12100 r-rcolorbrewer
12101 r-rmysql
12102 r-s4vectors
12103 r-stringr
12104 r-summarizedexperiment))
12105 (native-inputs
12106 (list r-knitr))
12107 (home-page "https://github.com/BIMSBbioinfo/ciRcus")
12108 (synopsis "Annotation, analysis and visualization of circRNA data")
12109 (description "Circus is an R package for annotation, analysis and
12110 visualization of circRNA data. Users can annotate their circRNA candidates
12111 with host genes, gene featrues they are spliced from, and discriminate between
12112 known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
12113 can be calculated, and a number of descriptive plots easily generated.")
12114 (license license:artistic2.0)))
12115
12116 (define-public r-doubletfinder
12117 (let ((commit "554097ba4e2c0ed7c28dc7f0b5b75277f3a50551")
12118 (revision "1"))
12119 (package
12120 (name "r-doubletfinder")
12121 (version (git-version "2.0.3" revision commit))
12122 (source
12123 (origin
12124 (method git-fetch)
12125 (uri (git-reference
12126 (url "https://github.com/chris-mcginnis-ucsf/DoubletFinder")
12127 (commit commit)))
12128 (file-name (git-file-name name version))
12129 (sha256
12130 (base32 "1q1pnqw7ry4syp04wjmvz5bws6z4vg4c340ky07lk0vp577x2773"))))
12131 (properties `((upstream-name . "DoubletFinder")))
12132 (build-system r-build-system)
12133 (propagated-inputs (list r-fields r-kernsmooth r-rocr))
12134 (home-page "https://github.com/chris-mcginnis-ucsf/DoubletFinder")
12135 (synopsis "Identify doublets in single-cell RNA sequencing data")
12136 (description
12137 "DoubletFinder identifies doublets by generating artificial doublets
12138 from existing scRNA-seq data and defining which real cells preferentially
12139 co-localize with artificial doublets in gene expression space. Other
12140 DoubletFinder package functions are used for fitting DoubletFinder to
12141 different scRNA-seq datasets. For example, ideal DoubletFinder performance in
12142 real-world contexts requires optimal pK selection and homotypic doublet
12143 proportion estimation. pK selection is achieved using pN-pK parameter sweeps
12144 and maxima identification in mean-variance-normalized bimodality coefficient
12145 distributions. Homotypic doublet proportion estimation is achieved by finding
12146 the sum of squared cell annotation frequencies.")
12147 (license license:cc0))))
12148
12149 ;; There have been no releases.
12150 (define-public r-cytobackbone
12151 (let ((commit "4c1a0a35cc5ae1f8f516127cec92351d96fe26e7")
12152 (revision "1"))
12153 (package
12154 (name "r-cytobackbone")
12155 (version (git-version "1.0.0" revision commit))
12156 (source (origin
12157 (method git-fetch)
12158 (uri (git-reference
12159 (url "https://github.com/tchitchek-lab/CytoBackBone")
12160 (commit commit)))
12161 (file-name (git-file-name name version))
12162 (sha256
12163 (base32
12164 "0ahiad14zcgdk42xzw5xryic2ibn2l8lkrcdvl2b5sz2js028yb3"))))
12165 (properties `((upstream-name . "CytoBackBone")))
12166 (build-system r-build-system)
12167 (propagated-inputs
12168 (list r-flowcore
12169 r-flowutils
12170 r-fnn
12171 r-ggplot2
12172 r-preprocesscore))
12173 (native-inputs (list r-knitr))
12174 (home-page "https://github.com/tchitchek-lab/CytoBackBone")
12175 (synopsis "Merge phenotype information from different cytometric profiles")
12176 (description
12177 "This package implements an algorithm which increases the number of
12178 simultaneously measurable markers and in this way helps with study of the
12179 immune responses. Thus, the present algorithm, named @code{CytoBackBone},
12180 allows combining phenotypic information of cells from different cytometric
12181 profiles obtained from different cytometry panels. This computational
12182 approach is based on the principle that each cell has its own phenotypic and
12183 functional characteristics that can be used as an identification card.
12184 @code{CytoBackBone} uses a set of predefined markers, that we call the
12185 backbone, to define this identification card. The phenotypic information of
12186 cells with similar identification cards in the different cytometric profiles
12187 is then merged.")
12188 (license license:gpl2))))
12189
12190 (define-public r-giotto
12191 (let ((commit "68d7390dce87223cac11d4d8f31705fe0144d011")
12192 (revision "1"))
12193 (package
12194 (name "r-giotto")
12195 (version (git-version "1.1.1" revision commit))
12196 (source
12197 (origin
12198 (method git-fetch)
12199 (uri (git-reference
12200 (url "https://github.com/RubD/Giotto/")
12201 (commit commit)))
12202 (file-name (git-file-name name version))
12203 (sha256
12204 (base32 "0mv60khc05wrxzr4ir6cirn7dpqvgwan5hm00lmafsyalr51nf5i"))))
12205 (properties `((upstream-name . "Giotto")))
12206 (build-system r-build-system)
12207 (propagated-inputs
12208 (list r-clusterr
12209 r-complexheatmap
12210 r-cowplot
12211 r-data-table
12212 r-dbscan
12213 r-deldir
12214 r-farver
12215 r-fitdistrplus
12216 r-ggdendro
12217 r-ggplot2
12218 r-ggraph
12219 r-ggrepel
12220 r-igraph
12221 r-irlba
12222 r-lfa
12223 r-limma
12224 r-magick
12225 r-magrittr
12226 r-matrix
12227 r-matrixstats
12228 r-plotly
12229 r-qvalue
12230 r-r-utils
12231 r-rcolorbrewer
12232 r-rcpp
12233 r-reshape2
12234 r-reticulate
12235 r-rfast
12236 r-rlang
12237 r-rtsne
12238 r-scales
12239 r-uwot))
12240 (native-inputs (list r-knitr))
12241 (home-page "https://github.com/RubD/Giotto/")
12242 (synopsis "Spatial single-cell transcriptomics toolbox")
12243 (description
12244 "This package provides a toolbox to process, analyze and visualize
12245 spatial single-cell expression data.")
12246 (license license:expat))))
12247
12248 (define-public gffread
12249 ;; We cannot use the tagged release because it is not in sync with gclib.
12250 ;; See https://github.com/gpertea/gffread/issues/26
12251 (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
12252 (revision "1"))
12253 (package
12254 (name "gffread")
12255 (version (git-version "0.9.12" revision commit))
12256 (source
12257 (origin
12258 (method git-fetch)
12259 (uri (git-reference
12260 (url "https://github.com/gpertea/gffread")
12261 (commit commit)))
12262 (file-name (git-file-name name version))
12263 (sha256
12264 (base32
12265 "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
12266 (build-system gnu-build-system)
12267 (arguments
12268 `(#:tests? #f ; no check target
12269 #:make-flags
12270 (list "GCLDIR=gclib")
12271 #:phases
12272 (modify-phases %standard-phases
12273 (delete 'configure)
12274 (add-after 'unpack 'copy-gclib-source
12275 (lambda* (#:key inputs #:allow-other-keys)
12276 (mkdir-p "gclib")
12277 (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
12278 #t))
12279 ;; There is no install target
12280 (replace 'install
12281 (lambda* (#:key outputs #:allow-other-keys)
12282 (let* ((out (assoc-ref outputs "out"))
12283 (bin (string-append out "/bin")))
12284 (install-file "gffread" bin))
12285 #t)))))
12286 (native-inputs
12287 `(("gclib-source"
12288 ,(let ((version "0.10.3")
12289 (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
12290 (revision "1"))
12291 (origin
12292 (method git-fetch)
12293 (uri (git-reference
12294 (url "https://github.com/gpertea/gclib")
12295 (commit commit)))
12296 (file-name (git-file-name "gclib" version))
12297 (sha256
12298 (base32
12299 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
12300 (home-page "https://github.com/gpertea/gffread/")
12301 (synopsis "Parse and convert GFF/GTF files")
12302 (description
12303 "This package provides a GFF/GTF file parsing utility providing format
12304 conversions, region filtering, FASTA sequence extraction and more.")
12305 ;; gffread is under Expat, but gclib is under Artistic 2.0
12306 (license (list license:expat
12307 license:artistic2.0)))))
12308
12309 (define-public fit-sne
12310 (package
12311 (name "fit-sne")
12312 (version "1.2.1")
12313 (source
12314 (origin
12315 (method git-fetch)
12316 (uri (git-reference
12317 (url "https://github.com/KlugerLab/FIt-SNE")
12318 (commit (string-append "v" version))))
12319 (file-name (git-file-name name version))
12320 (sha256
12321 (base32
12322 "1imq4577awc226wvygf94kpz156qdfw8xl0w0f7ss4w10lhmpmf5"))))
12323 (build-system gnu-build-system)
12324 (arguments
12325 `(#:tests? #false ; there are none
12326 #:phases
12327 ;; There is no build system.
12328 (modify-phases %standard-phases
12329 (delete 'configure)
12330 (replace 'build
12331 (lambda _
12332 (invoke "g++" "-std=c++11" "-O3"
12333 "src/sptree.cpp"
12334 "src/tsne.cpp"
12335 "src/nbodyfft.cpp"
12336 "-o" "bin/fast_tsne"
12337 "-pthread" "-lfftw3" "-lm"
12338 "-Wno-address-of-packed-member")))
12339 (replace 'install
12340 (lambda* (#:key outputs #:allow-other-keys)
12341 (let* ((out (assoc-ref outputs "out"))
12342 (bin (string-append out "/bin"))
12343 (share (string-append out "/share/fit-sne")))
12344 (for-each (lambda (file) (install-file file bin))
12345 (find-files "bin"))
12346
12347 (substitute* "fast_tsne.R"
12348 (("^FAST_TSNE_SCRIPT_DIR.*")
12349 (string-append "FAST_TSNE_SCRIPT_DIR = \"" out "\"\n")))
12350 (install-file "fast_tsne.R" share)))))))
12351 (inputs
12352 (list fftw))
12353 (home-page "https://github.com/KlugerLab/FIt-SNE")
12354 (synopsis "Fast Fourier Transform-accelerated interpolation-based t-SNE")
12355 (description "@dfn{t-Stochastic Neighborhood Embedding} (t-SNE) is a
12356 method for dimensionality reduction and visualization of high dimensional
12357 datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
12358 approximate the gradient at each iteration of gradient descent. This
12359 implementation differs in these ways:
12360
12361 @itemize
12362 @item Instead of approximating the N-body simulation using Barnes-Hut, we
12363 interpolate onto an equispaced grid and use FFT to perform the convolution.
12364 @item Instead of computing nearest neighbors using vantage-point trees, we
12365 approximate nearest neighbors using the Annoy library. The neighbor lookups
12366 are multithreaded to take advantage of machines with multiple cores.
12367 @end itemize
12368 ")
12369 ;; See LICENSE.txt for details on what license applies to what files.
12370 (license (list license:bsd-4 license:expat license:asl2.0))))
12371
12372 (define-public python-scanpy
12373 (package
12374 (name "python-scanpy")
12375 (version "1.9.1")
12376 (source
12377 (origin
12378 (method git-fetch)
12379 (uri (git-reference
12380 (url "https://github.com/theislab/scanpy")
12381 (commit version)))
12382 (file-name (git-file-name name version))
12383 (sha256
12384 (base32
12385 "0k524xnx3dvpz5yx65p316wghvi01zs17is8w2m3w2qywiswk0sl"))))
12386 (build-system python-build-system)
12387 (arguments
12388 `(#:phases
12389 (modify-phases %standard-phases
12390 (replace 'build
12391 (lambda _
12392 (setenv "SETUPTOOLS_SCM_PRETEND_VERSION" ,version)
12393 ;; ZIP does not support timestamps before 1980.
12394 (setenv "SOURCE_DATE_EPOCH" "315532800")
12395 (invoke "flit" "build")))
12396 (replace 'install
12397 (lambda* (#:key inputs outputs #:allow-other-keys)
12398 (add-installed-pythonpath inputs outputs)
12399 (let ((out (assoc-ref outputs "out")))
12400 (for-each (lambda (wheel)
12401 (format #true wheel)
12402 (invoke "python" "-m" "pip" "install"
12403 wheel (string-append "--prefix=" out)))
12404 (find-files "dist" "\\.whl$")))))
12405 ;; Numba needs a writable dir to cache functions.
12406 (add-before 'check 'set-numba-cache-dir
12407 (lambda _
12408 (setenv "NUMBA_CACHE_DIR" "/tmp")))
12409 (replace 'check
12410 (lambda* (#:key tests? inputs #:allow-other-keys)
12411 (when tests?
12412 ;; These tests require Internet access.
12413 (delete-file-recursively "scanpy/tests/notebooks")
12414 (delete-file "scanpy/tests/test_clustering.py")
12415 (delete-file "scanpy/tests/test_datasets.py")
12416 (delete-file "scanpy/tests/test_normalization.py")
12417 (delete-file "scanpy/tests/test_score_genes.py")
12418 (delete-file "scanpy/tests/test_highly_variable_genes.py")
12419
12420 ;; TODO: I can't get the plotting tests to work, even with Xvfb.
12421 (delete-file "scanpy/tests/test_embedding_plots.py")
12422 (delete-file "scanpy/tests/test_preprocessing.py")
12423 (delete-file "scanpy/tests/test_read_10x.py")
12424
12425 ;; These two fail with "ValueError: I/O operation on closed file."
12426 (delete-file "scanpy/tests/test_neighbors_key_added.py")
12427
12428 ;; TODO: these fail with TypingError and "Use of unsupported
12429 ;; NumPy function 'numpy.split'".
12430 (delete-file "scanpy/tests/test_metrics.py")
12431
12432 ;; The following tests requires 'scanorama', which isn't
12433 ;; packaged yet.
12434 (delete-file "scanpy/tests/external/test_scanorama_integrate.py")
12435
12436 (setenv "PYTHONPATH"
12437 (string-append (getcwd) ":"
12438 (assoc-ref inputs "python-anndata:source") ":"
12439 (getenv "GUIX_PYTHONPATH")))
12440 (invoke "pytest" "-vv"
12441 "-k"
12442 ;; Plot tests that fail.
12443 (string-append "not test_dotplot_matrixplot_stacked_violin"
12444 " and not test_violin_without_raw"
12445 " and not test_correlation"
12446 " and not test_scatterplots"
12447 " and not test_scatter_embedding_add_outline_vmin_vmax_norm"
12448 " and not test_paga"
12449 " and not test_paga_compare"
12450 " and not test_clustermap"
12451
12452 ;; These try to connect to the network
12453 " and not test_scrublet_plots"
12454 " and not test_plot_rank_genes_groups_gene_symbols"
12455 " and not test_pca_n_pcs"
12456 " and not test_pca_chunked"
12457 " and not test_pca_sparse"
12458 " and not test_pca_reproducible"))))))))
12459 (propagated-inputs
12460 (list python-anndata
12461 python-dask
12462 python-h5py
12463 python-igraph
12464 python-joblib
12465 python-legacy-api-wrap
12466 python-louvain
12467 python-matplotlib
12468 python-natsort
12469 python-networkx
12470 python-numba
12471 python-packaging
12472 python-pandas
12473 python-patsy
12474 python-scikit-learn
12475 python-scipy
12476 python-seaborn
12477 python-session-info
12478 python-sinfo
12479 python-statsmodels
12480 python-tables
12481 python-pytoml
12482 python-tqdm
12483 python-umap-learn))
12484 (native-inputs
12485 `(;; This package needs anndata.tests, which is not installed.
12486 ("python-anndata:source" ,(package-source python-anndata))
12487 ("python-flit" ,python-flit)
12488 ("python-leidenalg" ,python-leidenalg)
12489 ("python-pytest" ,python-pytest)
12490 ("python-setuptools-scm" ,python-setuptools-scm)))
12491 (home-page "https://github.com/theislab/scanpy")
12492 (synopsis "Single-Cell Analysis in Python")
12493 (description "Scanpy is a scalable toolkit for analyzing single-cell gene
12494 expression data. It includes preprocessing, visualization, clustering,
12495 pseudotime and trajectory inference and differential expression testing. The
12496 Python-based implementation efficiently deals with datasets of more than one
12497 million cells.")
12498 (license license:bsd-3)))
12499
12500 (define-public python-bbknn
12501 (package
12502 (name "python-bbknn")
12503 (version "1.5.1")
12504 (source
12505 (origin
12506 (method url-fetch)
12507 (uri (pypi-uri "bbknn" version))
12508 (sha256
12509 (base32
12510 "0q11xdmjr2kf6f179a6kjizj3lllfrq743gslgw67qyzimvrrnhn"))))
12511 (build-system python-build-system)
12512 (arguments
12513 `(#:tests? #f ; no tests are included
12514 #:phases
12515 (modify-phases %standard-phases
12516 ;; Numba needs a writable dir to cache functions.
12517 (add-before 'check 'set-numba-cache-dir
12518 (lambda _
12519 (setenv "NUMBA_CACHE_DIR" "/tmp")))
12520 (add-after 'unpack 'do-not-fail-to-find-sklearn
12521 (lambda _
12522 ;; XXX: I have no idea why it cannot seem to find sklearn.
12523 (substitute* "setup.py"
12524 (("'sklearn'") "")))))))
12525 (propagated-inputs
12526 (list python-annoy
12527 python-cython
12528 python-numpy
12529 python-pandas
12530 python-scikit-learn
12531 python-scipy
12532 python-umap-learn))
12533 (home-page "https://github.com/Teichlab/bbknn")
12534 (synopsis "Batch balanced KNN")
12535 (description "BBKNN is a batch effect removal tool that can be directly
12536 used in the Scanpy workflow. It serves as an alternative to
12537 @code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour
12538 graph for subsequent use in clustering, pseudotime and UMAP visualisation. If
12539 technical artifacts are present in the data, they will make it challenging to
12540 link corresponding cell types across different batches. BBKNN actively
12541 combats this effect by splitting your data into batches and finding a smaller
12542 number of neighbours for each cell within each of the groups. This helps
12543 create connections between analogous cells in different batches without
12544 altering the counts or PCA space.")
12545 (license license:expat)))
12546
12547 (define-public python-drep
12548 (package
12549 (name "python-drep")
12550 (version "3.2.0")
12551 (source
12552 (origin
12553 (method url-fetch)
12554 (uri (pypi-uri "drep" version))
12555 (sha256
12556 (base32
12557 "08vk0x6v5c5n7afgd5pcjhsvb424absypxy22hw1cm1n9kirbi77"))))
12558 (build-system python-build-system)
12559 (propagated-inputs
12560 (list python-biopython
12561 python-matplotlib
12562 python-numpy
12563 python-pandas
12564 python-pytest
12565 python-scikit-learn
12566 python-seaborn
12567 python-tqdm))
12568 (home-page "https://github.com/MrOlm/drep")
12569 (synopsis "De-replication of microbial genomes assembled from multiple samples")
12570 (description
12571 "dRep is a Python program for rapidly comparing large numbers of genomes.
12572 dRep can also \"de-replicate\" a genome set by identifying groups of highly
12573 similar genomes and choosing the best representative genome for each genome
12574 set.")
12575 (license license:expat)))
12576
12577 (define-public instrain
12578 (package
12579 (name "instrain")
12580 (version "1.5.4")
12581 (source
12582 (origin
12583 (method url-fetch)
12584 (uri (pypi-uri "inStrain" version))
12585 (sha256
12586 (base32
12587 "05w1lw75x4lwkzg4qpi055g7hdjp9rnc4ksbxg2hfgksq9djk0hx"))))
12588 (build-system python-build-system)
12589 (arguments
12590 `(#:phases
12591 (modify-phases %standard-phases
12592 (add-after 'unpack 'patch-relative-imports
12593 (lambda _
12594 (substitute* "docker/run_instrain.py"
12595 (("from s3_utils")
12596 "from .s3_utils")
12597 (("from job_utils")
12598 "from .job_utils")))))))
12599 (inputs
12600 (list python-biopython-1.73
12601 python-boto3
12602 python-h5py
12603 python-lmfit
12604 python-matplotlib
12605 python-networkx
12606 python-numba
12607 python-numpy
12608 python-pandas
12609 python-psutil
12610 python-pysam
12611 python-scikit-learn
12612 python-seaborn
12613 python-tqdm
12614 ;; drep is needed for deprecated plot utilities
12615 python-drep))
12616 (native-inputs
12617 (list python-pytest))
12618 (home-page "https://github.com/MrOlm/inStrain")
12619 (synopsis "Calculation of strain-level metrics")
12620 (description
12621 "inStrain is a Python program for analysis of co-occurring genome
12622 populations from metagenomes that allows highly accurate genome comparisons,
12623 analysis of coverage, microdiversity, and linkage, and sensitive SNP detection
12624 with gene localization and synonymous non-synonymous identification.")
12625 ;; The tool itself says that the license is "MIT", but the repository
12626 ;; contains a LICENSE file with the GPLv3.
12627 ;; See https://github.com/MrOlm/inStrain/issues/51
12628 (license license:expat)))
12629
12630 (define-public gffcompare
12631 (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
12632 (revision "1"))
12633 (package
12634 (name "gffcompare")
12635 (version (git-version "0.10.15" revision commit))
12636 (source
12637 (origin
12638 (method git-fetch)
12639 (uri (git-reference
12640 (url "https://github.com/gpertea/gffcompare/")
12641 (commit commit)))
12642 (file-name (git-file-name name version))
12643 (sha256
12644 (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
12645 (build-system gnu-build-system)
12646 (arguments
12647 `(#:tests? #f ; no check target
12648 #:phases
12649 (modify-phases %standard-phases
12650 (delete 'configure)
12651 (add-before 'build 'copy-gclib-source
12652 (lambda* (#:key inputs #:allow-other-keys)
12653 (mkdir "../gclib")
12654 (copy-recursively
12655 (assoc-ref inputs "gclib-source") "../gclib")
12656 #t))
12657 (replace 'install
12658 (lambda* (#:key outputs #:allow-other-keys)
12659 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
12660 (install-file "gffcompare" bin)
12661 #t))))))
12662 (native-inputs
12663 `(("gclib-source" ; see 'README.md' of gffcompare
12664 ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
12665 (revision "1")
12666 (name "gclib")
12667 (version (git-version "0.10.3" revision commit)))
12668 (origin
12669 (method git-fetch)
12670 (uri (git-reference
12671 (url "https://github.com/gpertea/gclib/")
12672 (commit commit)))
12673 (file-name (git-file-name name version))
12674 (sha256
12675 (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
12676 (home-page "https://github.com/gpertea/gffcompare/")
12677 (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
12678 (description
12679 "@code{gffcompare} is a tool that can:
12680 @enumerate
12681 @item compare and evaluate the accuracy of RNA-Seq transcript assemblers
12682 (Cufflinks, Stringtie);
12683 @item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
12684 resulted from assembly of different samples);
12685 @item classify transcripts from one or multiple GTF/GFF3 files as they relate to
12686 reference transcripts provided in a annotation file (also in GTF/GFF3 format).
12687 @end enumerate")
12688 (license
12689 (list
12690 license:expat ;license for gffcompare
12691 license:artistic2.0))))) ;license for gclib
12692
12693 (define-public intervaltree
12694 (let ((commit "b90527f9e6d51cd36ecbb50429e4524d3a418ea5"))
12695 (package
12696 (name "intervaltree")
12697 (version (git-version "0.0.0" "1" commit))
12698 (source
12699 (origin
12700 (method git-fetch)
12701 (uri (git-reference
12702 (url "https://github.com/ekg/intervaltree/")
12703 (commit commit)))
12704 (file-name (git-file-name name version))
12705 (sha256
12706 (base32 "0rgv6q5fl4x5d74n6p5wvdna6zmbdbqpb4jqqh6vq3670gn08xad"))))
12707 (build-system gnu-build-system)
12708 (arguments
12709 `(#:tests? #f ; No tests.
12710 #:make-flags
12711 ,#~(list (string-append "PREFIX=" #$output) "DESTDIR=\"\"")
12712 #:phases
12713 (modify-phases %standard-phases
12714 (delete 'configure)))) ; There is no configure phase.
12715 (home-page "https://github.com/ekg/intervaltree")
12716 (synopsis "Minimal C++ interval tree implementation")
12717 (description "An interval tree can be used to efficiently find a set of
12718 numeric intervals overlapping or containing another interval. This library
12719 provides a basic implementation of an interval tree using C++ templates,
12720 allowing the insertion of arbitrary types into the tree.")
12721 (license license:expat))))
12722
12723 (define-public python-intervaltree
12724 (package
12725 (name "python-intervaltree")
12726 (version "3.1.0")
12727 (source
12728 (origin
12729 (method url-fetch)
12730 (uri (pypi-uri "intervaltree" version))
12731 (sha256
12732 (base32
12733 "0bcm6c6r4ck9nfj9xwz4rm2swc5lrjvmw3lyl6rgj639jf41nawh"))))
12734 (build-system python-build-system)
12735 (arguments
12736 `(#:phases
12737 (modify-phases %standard-phases
12738 ;; pytest seems to have a check to make sure the user is testing
12739 ;; their checked-out code and not an installed, potentially
12740 ;; out-of-date copy. This is harmless here, since we just installed
12741 ;; the package, so we disable the check to avoid skipping tests
12742 ;; entirely.
12743 (add-before 'check 'import-mismatch-error-workaround
12744 (lambda _
12745 (setenv "PY_IGNORE_IMPORTMISMATCH" "1")
12746 #t)))))
12747 (propagated-inputs
12748 (list python-sortedcontainers))
12749 (native-inputs
12750 (list python-pytest))
12751 (home-page "https://github.com/chaimleib/intervaltree")
12752 (synopsis "Editable interval tree data structure")
12753 (description
12754 "This package provides a mutable, self-balancing interval tree
12755 implementation for Python. Queries may be by point, by range overlap, or by
12756 range envelopment. This library was designed to allow tagging text and time
12757 intervals, where the intervals include the lower bound but not the upper
12758 bound.")
12759 (license license:asl2.0)))
12760
12761 (define-public python-pypairix
12762 (package
12763 (name "python-pypairix")
12764 (version "0.3.7")
12765 ;; The tarball on pypi does not include the makefile to build the
12766 ;; programs.
12767 (source
12768 (origin
12769 (method git-fetch)
12770 (uri (git-reference
12771 (url "https://github.com/4dn-dcic/pairix")
12772 (commit version)))
12773 (file-name (git-file-name name version))
12774 (sha256
12775 (base32
12776 "1snr3lrmsld8sy77ng6ba6wcmd33xjccf1l2f3m6pi29xis9nd6p"))))
12777 (build-system python-build-system)
12778 (arguments
12779 `(#:phases
12780 (modify-phases %standard-phases
12781 (add-before 'build 'build-programs
12782 (lambda _ (invoke "make")))
12783 (add-after 'install 'install-programs
12784 (lambda* (#:key outputs #:allow-other-keys)
12785 (copy-recursively "bin" (string-append
12786 (assoc-ref outputs "out")
12787 "/bin"))
12788 #t)))))
12789 (inputs
12790 (list zlib))
12791 (home-page "https://github.com/4dn-dcic/pairix")
12792 (synopsis "Support for querying pairix-indexed bgzipped text files")
12793 (description
12794 "Pypairix is a Python module for fast querying on a pairix-indexed
12795 bgzipped text file that contains a pair of genomic coordinates per line.")
12796 (license license:expat)))
12797
12798 (define-public python-pyfaidx
12799 (package
12800 (name "python-pyfaidx")
12801 (version "0.5.8")
12802 (source
12803 (origin
12804 (method url-fetch)
12805 (uri (pypi-uri "pyfaidx" version))
12806 (sha256
12807 (base32
12808 "038xi3a6zvrxbyyfpp64ka8pcjgsdq4fgw9cl5lpxbvmm1bzzw2q"))))
12809 (build-system python-build-system)
12810 (propagated-inputs
12811 (list python-six))
12812 (home-page "http://mattshirley.com")
12813 (synopsis "Random access to fasta subsequences")
12814 (description
12815 "This package provides procedures for efficient pythonic random access to
12816 fasta subsequences.")
12817 (license license:bsd-3)))
12818
12819 (define-public python-cooler
12820 (package
12821 (name "python-cooler")
12822 (version "0.8.11")
12823 (source
12824 (origin
12825 (method url-fetch)
12826 (uri (pypi-uri "cooler" version))
12827 (sha256
12828 (base32
12829 "1i96fmpsimj4wrx51rxn8lw2gqxf5a2pvrj5rwdd6ivnm3pmhyrn"))))
12830 (build-system python-build-system)
12831 (arguments
12832 `(#:phases
12833 (modify-phases %standard-phases
12834 ;; cooler requests cytoolz<0.11. It only uses cytoolz for "compose",
12835 ;; which composes two functions.
12836 (add-after 'unpack 'use-recent-cytoolz
12837 (lambda _
12838 (substitute* '("requirements.txt"
12839 "cooler.egg-info/requires.txt")
12840 (("cytoolz.*<.*0.11") "cytoolz"))))
12841 ;; This version of flake8 just won't work with this version of
12842 ;; pytest, because of dependency pinning.
12843 (add-after 'unpack 'do-not-use-flake8
12844 (lambda _
12845 (substitute* "setup.cfg"
12846 (("addopts = --flake8") "addopts = "))))
12847 (add-after 'unpack 'patch-tests
12848 (lambda _
12849 (substitute* "tests/test_create.py"
12850 (("def test_roundtrip")
12851 (string-append "@pytest.mark.skip(reason=\"requires network "
12852 "access to genome.ucsc.edu\")\n"
12853 "def test_roundtrip")))
12854 (substitute* "tests/test_util.py"
12855 (("def test_fetch_chromsizes")
12856 (string-append "@pytest.mark.skip(reason=\"requires network "
12857 "access to genome.ucsc.edu\")\n"
12858 "def test_fetch_chromsizes"))
12859 ;; See https://github.com/open2c/cooler/issues/287
12860 (("skipif\\(six.PY2, reason=\"Scipy on Py2 is too old\"")
12861 "skip(reason=\"Scipy is too new\""))
12862 ;; This test depends on ipytree, which contains a lot of minified
12863 ;; JavaScript.
12864 (substitute* "tests/test_fileops.py"
12865 (("def test_print_trees")
12866 "def _test_print_trees"))))
12867 (replace 'check
12868 (lambda* (#:key tests? #:allow-other-keys)
12869 (when tests?
12870 (invoke "python" "-m" "pytest" "-v")))))))
12871 (propagated-inputs
12872 (list python-asciitree
12873 python-biopython
12874 python-click
12875 python-cytoolz
12876 python-dask
12877 python-h5py
12878 python-multiprocess
12879 python-numpy
12880 python-pandas
12881 python-pyfaidx
12882 python-pypairix
12883 python-pysam
12884 python-pyyaml
12885 python-scipy
12886 python-simplejson
12887 python-six
12888 python-sparse))
12889 (native-inputs
12890 (list python-codecov python-mock python-pytest python-pytest-cov
12891 python-pytest-flake8))
12892 ;; Almost all the projects of the Mirnylab are moved under Open2C umbrella
12893 (home-page "https://github.com/open2c/cooler")
12894 (synopsis "Sparse binary format for genomic interaction matrices")
12895 (description
12896 "Cooler is a support library for a sparse, compressed, binary persistent
12897 storage format, called @code{cool}, used to store genomic interaction data,
12898 such as Hi-C contact matrices.")
12899 (license license:bsd-3)))
12900
12901 (define-public python-hicmatrix
12902 (package
12903 (name "python-hicmatrix")
12904 (version "15")
12905 (source
12906 (origin
12907 ;;Pypi sources do not contain any test
12908 (method git-fetch)
12909 (uri (git-reference
12910 (url "https://github.com/deeptools/HiCMatrix")
12911 (commit version)))
12912 (file-name (git-file-name name version))
12913 (sha256
12914 (base32
12915 "1dshjxgb16sdfg9k1bhw2yhyngac04k4ca7aqy8g3i3pprr068r5"))))
12916 (build-system python-build-system)
12917 (arguments
12918 `(#:phases
12919 (modify-phases %standard-phases
12920 (replace 'check
12921 (lambda* (#:key tests? #:allow-other-keys)
12922 (when tests?
12923 (invoke "python" "-m" "pytest" "-v")))))))
12924 (propagated-inputs
12925 (list python-cooler
12926 python-intervaltree
12927 python-numpy
12928 python-pandas
12929 python-scipy
12930 python-tables))
12931 (native-inputs
12932 (list python-pytest))
12933 (home-page "https://github.com/deeptools/HiCMatrix/")
12934 (synopsis "HiCMatrix class for HiCExplorer and pyGenomeTracks")
12935 (description
12936 "This helper package implements the @code{HiCMatrix} class for
12937 the HiCExplorer and pyGenomeTracks packages.")
12938 (license license:gpl3+)))
12939
12940 (define-public python-hicexplorer
12941 (package
12942 (name "python-hicexplorer")
12943 (version "2.1.4")
12944 (source
12945 (origin
12946 ;; The latest version is not available on Pypi.
12947 (method git-fetch)
12948 (uri (git-reference
12949 (url "https://github.com/deeptools/HiCExplorer")
12950 (commit version)))
12951 (file-name (git-file-name name version))
12952 (sha256
12953 (base32
12954 "0q5gpbzmrkvygqgw524q36b4nrivcmyi5v194vsx0qw7b3gcmq08"))))
12955 (build-system python-build-system)
12956 (arguments
12957 `(#:phases
12958 (modify-phases %standard-phases
12959 (add-after 'unpack 'loosen-up-requirements
12960 (lambda _
12961 (substitute* "setup.py"
12962 (("==") ">="))
12963 #t)))))
12964 (propagated-inputs
12965 (list python-biopython
12966 python-configparser
12967 python-cooler
12968 python-future
12969 python-intervaltree
12970 python-jinja2
12971 python-matplotlib
12972 python-numpy
12973 python-pandas
12974 python-pybigwig
12975 python-pysam
12976 python-scipy
12977 python-six
12978 python-tables
12979 python-unidecode))
12980 (home-page "https://hicexplorer.readthedocs.io")
12981 (synopsis "Process, analyze and visualize Hi-C data")
12982 (description
12983 "HiCExplorer is a powerful and easy to use set of tools to process,
12984 normalize and visualize Hi-C data. HiCExplorer facilitates the creation of
12985 contact matrices, correction of contacts, TAD detection, A/B compartments,
12986 merging, reordering or chromosomes, conversion from different formats
12987 including cooler and detection of long-range contacts. Moreover, it allows
12988 the visualization of multiple contact matrices along with other types of data
12989 like genes, compartments, ChIP-seq coverage tracks (and in general any type of
12990 genomic scores), long range contacts and the visualization of viewpoints.")
12991 (license license:gpl3)))
12992
12993 (define-public python-pygenometracks
12994 (package
12995 (name "python-pygenometracks")
12996 (version "3.3")
12997 (source
12998 (origin
12999 (method url-fetch)
13000 (uri (pypi-uri "pyGenomeTracks" version))
13001 (sha256
13002 (base32
13003 "16laa0wnf4qn9fb9ych4w1vqhqwjss70v0y0f6wp4gwqfrlgac0f"))))
13004 (build-system python-build-system)
13005 (arguments
13006 `(#:tests? #f ; there are none
13007 #:phases
13008 (modify-phases %standard-phases
13009 (add-after 'unpack 'relax-requirements
13010 (lambda _
13011 (substitute* "setup.py"
13012 (("matplotlib ==3.1.1")
13013 "matplotlib >=3.1.1"))
13014 #t)))))
13015 (propagated-inputs
13016 (list python-future
13017 python-gffutils
13018 python-hicmatrix
13019 python-intervaltree
13020 python-matplotlib
13021 python-numpy
13022 python-pybigwig
13023 python-pysam
13024 python-tqdm))
13025 (native-inputs
13026 (list python-pytest))
13027 (home-page "https://pygenometracks.readthedocs.io")
13028 (synopsis "Program and library to plot beautiful genome browser tracks")
13029 (description
13030 "This package aims to produce high-quality genome browser tracks that
13031 are highly customizable. Currently, it is possible to plot: bigwig, bed (many
13032 options), bedgraph, links (represented as arcs), and Hi-C matrices.
13033 pyGenomeTracks can make plots with or without Hi-C data.")
13034 (license license:gpl3+)))
13035
13036 (define-public python-iced
13037 (package
13038 (name "python-iced")
13039 (version "0.5.8")
13040 (source
13041 (origin
13042 (method url-fetch)
13043 (uri (pypi-uri "iced" version))
13044 (sha256
13045 (base32
13046 "1avcjmpyyvhgbj5qca4l70ipiz7j3xmcw9p6rd9c06j99faa0r71"))))
13047 (build-system python-build-system)
13048 (arguments `(#:tests? #false)) ; there are none
13049 (propagated-inputs
13050 (list python-numpy python-pandas python-scipy python-scikit-learn))
13051 (home-page "https://github.com/hiclib/iced")
13052 (synopsis "ICE normalization")
13053 (description "This is a package for normalizing Hi-C contact counts
13054 efficiently.")
13055 (license license:bsd-3)))
13056
13057 (define-public python-hic2cool
13058 (package
13059 (name "python-hic2cool")
13060 (version "0.8.3")
13061 ;; pypi sources do not contain the test_data directory and no test can be
13062 ;; run
13063 (source
13064 (origin
13065 (method git-fetch)
13066 (uri (git-reference
13067 (url "https://github.com/4dn-dcic/hic2cool")
13068 (commit version)))
13069 (file-name (git-file-name name version))
13070 (sha256
13071 (base32
13072 "0dlnf0qfcp4jrc1nyya32a035c13xicyq16bwfnwhbb9s47mz7gl"))))
13073 (build-system python-build-system)
13074 (arguments
13075 `(#:phases
13076 (modify-phases %standard-phases
13077 ;; Two of the test-data files need to be writable.
13078 (add-after 'unpack 'make-test-data-writable
13079 (lambda _
13080 (for-each make-file-writable
13081 (list "test_data/hic2cool_0.4.2_single_res.cool"
13082 "test_data/hic2cool_0.7.0_multi_res.mcool"))))
13083 ;; See https://github.com/4dn-dcic/hic2cool/issues/58
13084 (add-after 'unpack 'fix-incompatibility-with-h5py-3
13085 (lambda _
13086 (substitute* "test.py"
13087 (("h5py.File\\(fname\\)") "h5py.File(fname, 'r')"))
13088 (substitute* "hic2cool/hic2cool_updates.py"
13089 (("h5py.File\\(writefile\\)")
13090 "h5py.File(writefile, 'a')"))))
13091 ;; These two tests fail for unknown reasons.
13092 (add-after 'unpack 'disable-broken-tests
13093 (lambda _
13094 (substitute* "test.py"
13095 (("def test_convert") "def _test_convert")))))))
13096 (propagated-inputs
13097 (list python-cooler python-h5py python-numpy python-pandas
13098 python-scipy))
13099 (home-page "https://github.com/4dn-dcic/hic2cool")
13100 (synopsis "Converter for .hic and .cool files")
13101 (description
13102 "This package provides a converter between @code{.hic} files (from
13103 juicer) and single-resolution or multi-resolution @code{.cool} files (for
13104 cooler). Both @code{hic} and @code{cool} files describe Hi-C contact
13105 matrices.")
13106 (license license:expat)))
13107
13108 (define-public python-scanorama
13109 (package
13110 (name "python-scanorama")
13111 (version "1.7.2")
13112 (source (origin
13113 (method url-fetch)
13114 (uri (pypi-uri "scanorama" version))
13115 (sha256
13116 (base32
13117 "0il7bf4c7vli2dm2jx7dskh3ymgv8nmk0y90jzgfrnqjzh250x5w"))))
13118 (build-system python-build-system)
13119 (propagated-inputs
13120 (list python-annoy
13121 python-fbpca
13122 python-geosketch
13123 python-intervaltree
13124 python-matplotlib
13125 python-numpy
13126 python-scikit-learn
13127 python-scipy))
13128 (home-page "https://github.com/brianhie/scanorama")
13129 (synopsis "Panoramic stitching of heterogeneous single cell transcriptomic data")
13130 (description
13131 "Scanorama enables batch-correction and integration of heterogeneous
13132 scRNA-seq datasets, which is described in the paper \"Efficient integration of
13133 heterogeneous single-cell transcriptomes using Scanorama\" by Brian Hie, Bryan
13134 Bryson, and Bonnie Berger.")
13135 (license license:expat)))
13136
13137 (define-public r-pore
13138 (package
13139 (name "r-pore")
13140 (version "0.24")
13141 (source
13142 (origin
13143 (method url-fetch)
13144 (uri
13145 (string-append "mirror://sourceforge/rpore/" version
13146 "/poRe_" version ".tar.gz"))
13147 (sha256
13148 (base32 "0pih9nljbv8g4x8rkk29i7aqq681b782r5s5ynp4nw9yzqnmmksv"))))
13149 (properties `((upstream-name . "poRe")))
13150 (build-system r-build-system)
13151 (propagated-inputs
13152 (list r-bit64 r-data-table r-rhdf5 r-shiny r-svdialogs))
13153 (home-page "https://sourceforge.net/projects/rpore/")
13154 (synopsis "Visualize Nanopore sequencing data")
13155 (description
13156 "This package provides graphical user interfaces to organize and visualize Nanopore
13157 sequencing data.")
13158 ;; This is free software but the license variant is unclear:
13159 ;; <https://github.com/mw55309/poRe_docs/issues/10>.
13160 (license license:bsd-3)))
13161
13162 (define-public r-xbioc
13163 (let ((revision "1")
13164 (commit "6ff0670a37ab3036aaf1d94aa4b208310946b0b5"))
13165 (package
13166 (name "r-xbioc")
13167 (version (git-version "0.1.16" revision commit))
13168 (source (origin
13169 (method git-fetch)
13170 (uri (git-reference
13171 (url "https://github.com/renozao/xbioc")
13172 (commit commit)))
13173 (file-name (git-file-name name version))
13174 (sha256
13175 (base32
13176 "0w8bsq5myiwkfhh83nm6is5ichiyvwa1axx2szvxnzq39x6knf66"))))
13177 (build-system r-build-system)
13178 (propagated-inputs
13179 (list r-annotationdbi
13180 r-assertthat
13181 r-biobase
13182 r-biocmanager
13183 r-digest
13184 r-pkgmaker
13185 r-plyr
13186 r-reshape2
13187 r-stringr))
13188 (home-page "https://github.com/renozao/xbioc/")
13189 (synopsis "Extra base functions for Bioconductor")
13190 (description "This package provides extra utility functions to perform
13191 common tasks in the analysis of omics data, leveraging and enhancing features
13192 provided by Bioconductor packages.")
13193 (license license:gpl3+))))
13194
13195 (define-public r-cssam
13196 (let ((revision "1")
13197 (commit "9ec58c982fa551af0d80b1a266890d92954833f2"))
13198 (package
13199 (name "r-cssam")
13200 (version (git-version "1.4" revision commit))
13201 (source (origin
13202 (method git-fetch)
13203 (uri (git-reference
13204 (url "https://github.com/shenorrLab/csSAM")
13205 (commit commit)))
13206 (file-name (git-file-name name version))
13207 (sha256
13208 (base32
13209 "128syf9v39gk0z3ip000qpsjbg6l1siyq6c8b0hz41dzg5achyb3"))))
13210 (build-system r-build-system)
13211 (propagated-inputs
13212 (list r-formula
13213 r-ggplot2
13214 r-pkgmaker
13215 r-plyr
13216 r-rngtools
13217 r-scales))
13218 (home-page "https://github.com/shenorrLab/csSAM/")
13219 (synopsis "Cell type-specific statistical analysis of microarray")
13220 (description "This package implements the method csSAM that computes
13221 cell-specific differential expression from measured cell proportions using
13222 SAM.")
13223 ;; Any version
13224 (license license:lgpl2.1+))))
13225
13226 (define-public r-bseqsc
13227 (let ((revision "1")
13228 (commit "fef3f3e38dcf3df37103348b5780937982b43b98"))
13229 (package
13230 (name "r-bseqsc")
13231 (version (git-version "1.0" revision commit))
13232 (source (origin
13233 (method git-fetch)
13234 (uri (git-reference
13235 (url "https://github.com/shenorrLab/bseqsc")
13236 (commit commit)))
13237 (file-name (git-file-name name version))
13238 (sha256
13239 (base32
13240 "1prw13wa20f7wlc3gkkls66n1kxz8d28qrb8icfqdwdnnv8w5qg8"))))
13241 (build-system r-build-system)
13242 (propagated-inputs
13243 (list r-abind
13244 r-annotationdbi
13245 r-biobase
13246 r-cssam
13247 r-dplyr
13248 r-e1071
13249 r-edger
13250 r-ggplot2
13251 r-nmf
13252 r-openxlsx
13253 r-pkgmaker
13254 r-plyr
13255 r-preprocesscore
13256 r-rngtools
13257 r-scales
13258 r-stringr
13259 r-xbioc))
13260 (home-page "https://github.com/shenorrLab/bseqsc")
13261 (synopsis "Deconvolution of bulk sequencing experiments using single cell data")
13262 (description "BSeq-sc is a bioinformatics analysis pipeline that
13263 leverages single-cell sequencing data to estimate cell type proportion and
13264 cell type-specific gene expression differences from RNA-seq data from bulk
13265 tissue samples. This is a companion package to the publication \"A
13266 single-cell transcriptomic map of the human and mouse pancreas reveals inter-
13267 and intra-cell population structure.\" Baron et al. Cell Systems (2016)
13268 @url{https://www.ncbi.nlm.nih.gov/pubmed/27667365}.")
13269 (license license:gpl2+))))
13270
13271 (define-public porechop
13272 ;; The recommended way to install is to clone the git repository
13273 ;; https://github.com/rrwick/Porechop#installation
13274 (let ((commit "289d5dca4a5fc327f97b3f8cecb68ecaf1014861")
13275 (revision "1"))
13276 (package
13277 (name "porechop")
13278 (version (git-version "0.2.3" revision commit))
13279 (source
13280 (origin
13281 (method git-fetch)
13282 (uri (git-reference
13283 (url "https://github.com/rrwick/Porechop")
13284 (commit commit)))
13285 (file-name (git-file-name name version))
13286 (sha256
13287 (base32 "05ps43gig0d3ia9x5lj84lb00hbsl6ba9n7y7jz927npxbr2ym23"))))
13288 (build-system python-build-system)
13289 (home-page "https://github.com/rrwick/porechop")
13290 (synopsis "Finding, trimming or splitting adapters, in Oxford Nanopore reads")
13291 (description
13292 "The porechop package is a tool for finding and removing adapters from Oxford
13293 Nanopore reads. Adapters on the ends of reads are trimmed off, and when a read
13294 has an adapter in its middle, it is treated as chimeric and chopped into
13295 separate reads. Porechop performs thorough alignments to effectively find
13296 adapters, even at low sequence identity. Porechop also supports demultiplexing
13297 of Nanopore reads that were barcoded with the Native Barcoding Kit, PCR
13298 Barcoding Kit or Rapid Barcoding Kit.")
13299 (license license:gpl3+))))
13300
13301 (define-public jamm
13302 (package
13303 (name "jamm")
13304 (version "1.0.7.6")
13305 (source
13306 (origin
13307 (method git-fetch)
13308 (uri (git-reference
13309 (url "https://github.com/mahmoudibrahim/JAMM")
13310 (commit (string-append "JAMMv" version))))
13311 (file-name (git-file-name name version))
13312 (sha256
13313 (base32
13314 "0bsa5mf9n9q5jz7mmacrra41l7r8rac5vgsn6wv1fb52ya58b970"))))
13315 (build-system gnu-build-system)
13316 (arguments
13317 `(#:tests? #f ; there are none
13318 #:phases
13319 (modify-phases %standard-phases
13320 (delete 'configure)
13321 (delete 'build)
13322 (replace 'install
13323 (lambda* (#:key inputs outputs #:allow-other-keys)
13324 (let* ((out (assoc-ref outputs "out"))
13325 (libexec (string-append out "/libexec/jamm"))
13326 (bin (string-append out "/bin")))
13327 (substitute* '("JAMM.sh"
13328 "SignalGenerator.sh")
13329 (("^sPath=.*")
13330 (string-append "sPath=\"" libexec "\"\n")))
13331 (for-each (lambda (file)
13332 (install-file file libexec))
13333 (list "bincalculator.r"
13334 "peakfinder.r"
13335 "peakhelper.r"
13336 "signalmaker.r"
13337 "xcorr.r"
13338 "xcorrhelper.r"
13339 ;; Perl scripts
13340 "peakfilter.pl"
13341 "readshifter.pl"))
13342
13343 (for-each
13344 (lambda (script)
13345 (chmod script #o555)
13346 (install-file script bin)
13347 (wrap-program (string-append bin "/" script)
13348 `("PATH" ":" prefix
13349 (,(string-append (assoc-ref inputs "coreutils") "/bin")
13350 ,(string-append (assoc-ref inputs "gawk") "/bin")
13351 ,(string-append (assoc-ref inputs "perl") "/bin")
13352 ,(string-append (assoc-ref inputs "r-minimal") "/bin")))
13353 `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))
13354 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
13355 (list "JAMM.sh" "SignalGenerator.sh")))
13356 #t)))))
13357 (inputs
13358 (list bash
13359 coreutils
13360 gawk
13361 perl
13362 r-minimal
13363 ;;("r-parallel" ,r-parallel)
13364 r-signal
13365 r-mclust))
13366 (home-page "https://github.com/mahmoudibrahim/JAMM")
13367 (synopsis "Peak finder for NGS datasets")
13368 (description
13369 "JAMM is a peak finder for next generation sequencing datasets (ChIP-Seq,
13370 ATAC-Seq, DNase-Seq, etc.) that can integrate replicates and assign peak
13371 boundaries accurately. JAMM is applicable to both broad and narrow
13372 datasets.")
13373 (license license:gpl3+)))
13374
13375 (define-public ngless
13376 (package
13377 (name "ngless")
13378 (version "1.3.0")
13379 (source
13380 (origin
13381 (method git-fetch)
13382 (uri (git-reference
13383 (url "https://github.com/ngless-toolkit/ngless.git")
13384 (commit (string-append "v" version))))
13385 (file-name (git-file-name name version))
13386 (sha256
13387 (base32
13388 "0pb9f6b0yk9p4cdwiym8r190q1bcdiwvc7i2s6rw54qgi8r3g6pj"))
13389 (patches (search-patches "ngless-unliftio.patch"))))
13390 (build-system haskell-build-system)
13391 (arguments
13392 `(#:haddock? #f ; The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
13393 ; error: parse error on input import
13394 ; import Options.Applicative
13395 #:phases
13396 (modify-phases %standard-phases
13397 (add-after 'unpack 'create-Versions.hs
13398 (lambda _
13399 (substitute* "Makefile"
13400 (("BWA_VERSION = .*")
13401 (string-append "BWA_VERSION = "
13402 ,(package-version bwa) "\n"))
13403 (("SAM_VERSION = .*")
13404 (string-append "SAM_VERSION = "
13405 ,(package-version samtools) "\n"))
13406 (("PRODIGAL_VERSION = .*")
13407 (string-append "PRODIGAL_VERSION = "
13408 ,(package-version prodigal) "\n"))
13409 (("MINIMAP2_VERSION = .*")
13410 (string-append "MINIMAP2_VERSION = "
13411 ,(package-version minimap2) "\n")))
13412 (invoke "make" "NGLess/Dependencies/Versions.hs")
13413 #t))
13414 (add-after 'create-Versions.hs 'create-cabal-file
13415 (lambda _ (invoke "hpack") #t))
13416 ;; These tools are expected to be installed alongside ngless.
13417 (add-after 'install 'link-tools
13418 (lambda* (#:key inputs outputs #:allow-other-keys)
13419 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
13420 (symlink (search-input-file inputs "/bin/prodigal")
13421 (string-append bin "ngless-" ,version "-prodigal"))
13422 (symlink (search-input-file inputs "/bin/minimap2")
13423 (string-append bin "ngless-" ,version "-minimap2"))
13424 (symlink (search-input-file inputs "/bin/samtools")
13425 (string-append bin "ngless-" ,version "-samtools"))
13426 (symlink (search-input-file inputs "/bin/bwa")
13427 (string-append bin "ngless-" ,version "-bwa"))
13428 #t))))))
13429 (inputs
13430 (list prodigal
13431 bwa
13432 samtools
13433 minimap2
13434 ghc-aeson
13435 ghc-ansi-terminal
13436 ghc-async
13437 ghc-atomic-write
13438 ghc-bytestring-lexing
13439 ghc-conduit
13440 ghc-conduit-algorithms
13441 ghc-conduit-extra
13442 ghc-configurator
13443 ghc-convertible
13444 ghc-data-default
13445 ghc-diagrams-core
13446 ghc-diagrams-lib
13447 ghc-diagrams-svg
13448 ghc-double-conversion
13449 ghc-edit-distance
13450 ghc-either
13451 ghc-errors
13452 ghc-extra
13453 ghc-filemanip
13454 ghc-file-embed
13455 ghc-gitrev
13456 ghc-hashtables
13457 ghc-http-conduit
13458 ghc-inline-c
13459 ghc-inline-c-cpp
13460 ghc-int-interval-map
13461 ghc-missingh
13462 ghc-optparse-applicative
13463 ghc-regex
13464 ghc-safe
13465 ghc-safeio
13466 ghc-strict
13467 ghc-tar
13468 ghc-tar-conduit
13469 ghc-unliftio
13470 ghc-unliftio-core
13471 ghc-vector
13472 ghc-yaml
13473 ghc-zlib))
13474 (propagated-inputs
13475 (list r-r6 r-hdf5r r-iterators r-itertools r-matrix))
13476 (native-inputs
13477 (list ghc-hpack
13478 ghc-quickcheck
13479 ghc-test-framework
13480 ghc-test-framework-hunit
13481 ghc-test-framework-quickcheck2
13482 ghc-test-framework-th))
13483 (home-page "https://ngless.embl.de/")
13484 (synopsis "DSL for processing next-generation sequencing data")
13485 (description "Ngless is a domain-specific language for
13486 @dfn{next-generation sequencing} (NGS) data processing.")
13487 (license license:expat)))
13488
13489 (define-public ghc-int-interval-map
13490 (let ((commit "678763de7fe6d7fa3f1c44b32d18ce58670270f4")
13491 (revision "1"))
13492 (package
13493 (name "ghc-int-interval-map")
13494 (version "0.0.0.0")
13495 (source
13496 (origin
13497 (method git-fetch)
13498 (uri (git-reference
13499 (url "https://github.com/ngless-toolkit/interval-to-int.git")
13500 (commit commit)))
13501 (file-name (git-file-name name version))
13502 (sha256 (base32 "0fd728b5if89vj5j4f9y7k0b2xv2ycz5a21iy15wbdcf5bhim7i8"))))
13503 (build-system haskell-build-system)
13504 (inputs
13505 (list ghc-either ghc-primitive ghc-vector ghc-vector-algorithms))
13506 (native-inputs
13507 (list ghc-hedgehog
13508 ghc-tasty
13509 ghc-tasty-hedgehog
13510 ghc-tasty-hunit
13511 ghc-tasty-quickcheck
13512 ghc-tasty-th))
13513 (home-page "https://github.com/luispedro/interval-to-int#readme")
13514 (synopsis "Interval map structure in Haskell")
13515 (description "An interval map structure that is optimized for low
13516 memory (each interval is represented by about 3 words + whatever the
13517 cargo is) and has semantics that are appropriate for genomic intervals
13518 (namely, intervals can overlap and queries will return all matches
13519 together). It also designed to be used in two phases: a construction
13520 phase + query phase).")
13521 (license license:expat))))
13522
13523 (define-public filtlong
13524 ;; The recommended way to install is to clone the git repository
13525 ;; https://github.com/rrwick/Filtlong#installation
13526 ;; and the lastest release is more than nine months old
13527 (let ((commit "d1bb46dfe8bc7efe6257b5ce222c04bfe8aedaab")
13528 (revision "1"))
13529 (package
13530 (name "filtlong")
13531 (version (git-version "0.2.0" revision commit))
13532 (source
13533 (origin
13534 (method git-fetch)
13535 (uri (git-reference
13536 (url "https://github.com/rrwick/Filtlong")
13537 (commit commit)))
13538 (file-name (git-file-name name version))
13539 (sha256
13540 (base32 "1xr92r820x8qlkcr3b57iw223yq8vjgyi42jr79w2xgw47qzr575"))))
13541 (build-system gnu-build-system)
13542 (arguments
13543 `(#:tests? #f ; no check target
13544 #:phases
13545 (modify-phases %standard-phases
13546 (delete 'configure)
13547 (replace 'install
13548 (lambda* (#:key outputs #:allow-other-keys)
13549 (let* ((out (assoc-ref outputs "out"))
13550 (bin (string-append out "/bin"))
13551 (scripts (string-append out "/share/filtlong/scripts")))
13552 (install-file "bin/filtlong" bin)
13553 (install-file "scripts/histogram.py" scripts)
13554 (install-file "scripts/read_info_histograms.sh" scripts))
13555 #t))
13556 (add-after 'install 'wrap-program
13557 (lambda* (#:key inputs outputs #:allow-other-keys)
13558 (let* ((out (assoc-ref outputs "out"))
13559 (path (getenv "GUIX_PYTHONPATH")))
13560 (wrap-program (string-append out
13561 "/share/filtlong/scripts/histogram.py")
13562 `("GUIX_PYTHONPATH" ":" prefix (,path))))
13563 #t))
13564 (add-before 'check 'patch-tests
13565 (lambda _
13566 (substitute* "scripts/read_info_histograms.sh"
13567 (("awk") (which "gawk")))
13568 #t)))))
13569 (inputs
13570 `(("gawk" ,gawk) ;for read_info_histograms.sh
13571 ("python" ,python-2) ;required for histogram.py
13572 ("zlib" ,zlib)))
13573 (home-page "https://github.com/rrwick/Filtlong/")
13574 (synopsis "Tool for quality filtering of Nanopore and PacBio data")
13575 (description
13576 "The Filtlong package is a tool for filtering long reads by quality.
13577 It can take a set of long reads and produce a smaller, better subset. It uses
13578 both read length (longer is better) and read identity (higher is better) when
13579 choosing which reads pass the filter.")
13580 (license (list license:gpl3 ;filtlong
13581 license:asl2.0))))) ;histogram.py
13582
13583 (define-public nanopolish
13584 ;; The recommended way to install is to clone the git repository
13585 ;; <https://github.com/jts/nanopolish#installing-a-particular-release>.
13586 ;; Also, the differences between release and current version seem to be
13587 ;; significant.
13588 (let ((commit "6331dc4f15b9dfabb954ba3fae9d76b6c3ca6377")
13589 (revision "1"))
13590 (package
13591 (name "nanopolish")
13592 (version (git-version "0.11.1" revision commit))
13593 (source
13594 (origin
13595 (method git-fetch)
13596 (uri (git-reference
13597 (url "https://github.com/jts/nanopolish")
13598 (commit commit)
13599 (recursive? #t)))
13600 (file-name (git-file-name name version))
13601 (sha256
13602 (base32 "15ikl3d37y49pwd7vx36xksgsqajhf24q7qqsnpl15dqqyy5qgbc"))
13603 (modules '((guix build utils)))
13604 (snippet
13605 '(begin
13606 (delete-file-recursively "htslib")
13607 #t))))
13608 (build-system gnu-build-system)
13609 (arguments
13610 `(#:make-flags
13611 `("HDF5=noinstall" "EIGEN=noinstall" "HTS=noinstall" "CC=gcc")
13612 #:tests? #f ; no check target
13613 #:phases
13614 (modify-phases %standard-phases
13615 (add-after 'unpack 'find-eigen
13616 (lambda* (#:key inputs #:allow-other-keys)
13617 (setenv "CPATH"
13618 (string-append
13619 (search-input-directory inputs "/include/eigen3")
13620 ":" (or (getenv "CPATH") "")))))
13621 (delete 'configure)
13622 (replace 'install
13623 (lambda* (#:key outputs #:allow-other-keys)
13624 (let* ((out (assoc-ref outputs "out"))
13625 (bin (string-append out "/bin"))
13626 (scripts (string-append out "/share/nanopolish/scripts")))
13627
13628 (install-file "nanopolish" bin)
13629 (for-each (lambda (file) (install-file file scripts))
13630 (find-files "scripts" ".*"))
13631 #t)))
13632 (add-after 'install 'wrap-programs
13633 (lambda* (#:key inputs outputs #:allow-other-keys)
13634 (let ((pythonpath (getenv "GUIX_PYTHONPATH"))
13635 (perl5lib (getenv "PERL5LIB"))
13636 (scripts (string-append (assoc-ref outputs "out")
13637 "/share/nanopolish/scripts"))
13638 (guile (search-input-file inputs "bin/guile")))
13639 (for-each (lambda (file)
13640 (wrap-program file `("GUIX_PYTHONPATH" ":" prefix (,pythonpath))))
13641 (find-files scripts "\\.py"))
13642 (for-each (lambda (file)
13643 (wrap-script file #:guile guile
13644 `("PERL5LIB" ":" prefix (,perl5lib))))
13645 (find-files scripts "\\.pl"))))))))
13646 (inputs
13647 `(("guile" ,guile-3.0) ; for wrappers
13648 ("eigen" ,eigen)
13649 ("hdf5" ,hdf5)
13650 ("htslib" ,htslib)
13651 ("perl" ,perl)
13652 ("bioperl" ,bioperl-minimal)
13653 ("perl-getopt-long" ,perl-getopt-long)
13654 ("python" ,python-wrapper)
13655 ("python-biopython" ,python-biopython)
13656 ("python-numpy" ,python-numpy)
13657 ("python-pysam" ,python-pysam)
13658 ("python-scikit-learn" , python-scikit-learn)
13659 ("python-scipy" ,python-scipy)
13660 ("zlib" ,zlib)))
13661 (home-page "https://github.com/jts/nanopolish")
13662 (synopsis "Signal-level analysis of Oxford Nanopore sequencing data")
13663 (description
13664 "This package analyses the Oxford Nanopore sequencing data at signal-level.
13665 Nanopolish can calculate an improved consensus sequence for a draft genome
13666 assembly, detect base modifications, call SNPs (Single nucleotide
13667 polymorphisms) and indels with respect to a reference genome and more.")
13668 (license license:expat))))
13669
13670 (define-public cnvkit
13671 (package
13672 (name "cnvkit")
13673 (version "0.9.5")
13674 (source
13675 (origin
13676 (method git-fetch)
13677 (uri (git-reference
13678 (url "https://github.com/etal/cnvkit")
13679 (commit (string-append "v" version))))
13680 (file-name (git-file-name name version))
13681 (sha256
13682 (base32 "0g2f78k68yglmj4fsfmgs8idqv3di9aj53fg0ld0hqljg8chhh82"))))
13683 (build-system python-build-system)
13684 (propagated-inputs
13685 (list python-biopython
13686 python-future
13687 python-matplotlib
13688 python-numpy
13689 python-reportlab
13690 python-pandas
13691 python-pysam
13692 python-pyfaidx
13693 python-scipy
13694 ;; R packages
13695 r-dnacopy))
13696 (home-page "https://cnvkit.readthedocs.org/")
13697 (synopsis "Copy number variant detection from targeted DNA sequencing")
13698 (description
13699 "CNVkit is a Python library and command-line software toolkit to infer
13700 and visualize copy number from high-throughput DNA sequencing data. It is
13701 designed for use with hybrid capture, including both whole-exome and custom
13702 target panels, and short-read sequencing platforms such as Illumina and Ion
13703 Torrent.")
13704 (license license:asl2.0)))
13705
13706 (define-public python-pyfit-sne
13707 (package
13708 (name "python-pyfit-sne")
13709 (version "1.2.1")
13710 (source
13711 (origin
13712 (method git-fetch)
13713 (uri (git-reference
13714 (url "https://github.com/KlugerLab/pyFIt-SNE")
13715 (commit version)))
13716 (file-name (git-file-name name version))
13717 (sha256
13718 (base32 "0f3n7wcmxbnqiisgimhpa6p5chqpb1hj69i6rpg2hv2671i8nn68"))))
13719 (build-system python-build-system)
13720 (arguments '(#:tests? #false)) ; there are none
13721 (propagated-inputs
13722 (list python-numpy))
13723 (inputs
13724 (list fftw))
13725 (native-inputs
13726 (list python-cython))
13727 (home-page "https://github.com/KlugerLab/pyFIt-SNE")
13728 (synopsis "FFT-accelerated Interpolation-based t-SNE")
13729 (description
13730 "t-Stochastic Neighborhood Embedding (t-SNE) is a highly successful
13731 method for dimensionality reduction and visualization of high dimensional
13732 datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
13733 approximate the gradient at each iteration of gradient descent. This package
13734 is a Cython wrapper for FIt-SNE.")
13735 (license license:bsd-4)))
13736
13737 (define-public bbmap
13738 (package
13739 (name "bbmap")
13740 (version "38.90")
13741 (source (origin
13742 (method url-fetch)
13743 (uri (string-append
13744 "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz"))
13745 (sha256
13746 (base32
13747 "1wb94bcc006qq86x77z2rz0lc8m9f1kpnw6gdhjfg9bdaqf56rm3"))))
13748 (build-system ant-build-system)
13749 (arguments
13750 `(#:build-target "dist"
13751 #:tests? #f ; there are none
13752 #:make-flags
13753 ,#~(list (string-append "-Dmpijar="
13754 #$(this-package-input "java-openmpi")
13755 "/lib/mpi.jar"))
13756 #:modules ((guix build ant-build-system)
13757 (guix build utils)
13758 (guix build java-utils))
13759 #:phases
13760 (modify-phases %standard-phases
13761 (add-after 'build 'build-jni-library
13762 (lambda _
13763 (with-directory-excursion "jni"
13764 (invoke "make" "-f" "makefile.linux"))))
13765 ;; There is no install target
13766 (replace 'install (install-jars "dist"))
13767 (add-after 'install 'install-scripts-and-documentation
13768 (lambda* (#:key outputs #:allow-other-keys)
13769 (substitute* "calcmem.sh"
13770 (("\\| awk ") (string-append "| " (which "awk") " ")))
13771 (let* ((scripts (find-files "." "\\.sh$"))
13772 (out (assoc-ref outputs "out"))
13773 (bin (string-append out "/bin"))
13774 (doc (string-append out "/share/doc/bbmap"))
13775 (jni (string-append out "/lib/jni")))
13776 (substitute* scripts
13777 (("\\$DIR\"\"docs") doc)
13778 (("^CP=.*")
13779 (string-append "CP=" out "/share/java/BBTools.jar\n"))
13780 (("^NATIVELIBDIR.*")
13781 (string-append "NATIVELIBDIR=" jni "\n"))
13782 (("CMD=\"java")
13783 (string-append "CMD=\"" (which "java"))))
13784 (for-each (lambda (script) (install-file script bin)) scripts)
13785
13786 ;; Install JNI library
13787 (install-file "jni/libbbtoolsjni.so" jni)
13788
13789 ;; Install documentation
13790 (install-file "docs/readme.txt" doc)
13791 (copy-recursively "docs/guides" doc))
13792 #t)))
13793 #:jdk ,openjdk11))
13794 (inputs
13795 (list gawk java-eclipse-jdt-core java-eclipse-jdt-compiler-apt
13796 java-openmpi))
13797 (home-page "https://sourceforge.net/projects/bbmap/")
13798 (synopsis "Aligner and other tools for short sequencing reads")
13799 (description
13800 "This package provides bioinformatic tools to align, deduplicate,
13801 reformat, filter and normalize DNA and RNA-seq data. It includes the
13802 following tools: BBMap, a short read aligner for DNA and RNA-seq data; BBNorm,
13803 a kmer-based error-correction and normalization tool; Dedupe, a tool to
13804 simplify assemblies by removing duplicate or contained subsequences that share
13805 a target percent identity; Reformat, to convert reads between
13806 fasta/fastq/scarf/fasta+qual/sam, interleaved/paired, and ASCII-33/64, at over
13807 500 MB/s; and BBDuk, a tool to filter, trim, or mask reads with kmer matches
13808 to an artifact/contaminant file.")
13809 (license license:bsd-3)))
13810
13811 (define-public velvet
13812 (package
13813 (name "velvet")
13814 (version "1.2.10")
13815 (source (origin
13816 (method url-fetch)
13817 (uri (string-append "https://www.ebi.ac.uk/~zerbino/velvet/"
13818 "velvet_" version ".tgz"))
13819 (sha256
13820 (base32
13821 "0h3njwy66p6bx14r3ar1byb0ccaxmxka4c65rn4iybyiqa4d8kc8"))
13822 ;; Delete bundled libraries
13823 (modules '((guix build utils)))
13824 (snippet
13825 '(begin
13826 (delete-file "Manual.pdf")
13827 (delete-file-recursively "third-party")))))
13828 (build-system gnu-build-system)
13829 (arguments
13830 `(#:make-flags '("OPENMP=t")
13831 #:test-target "test"
13832 #:phases
13833 (modify-phases %standard-phases
13834 (delete 'configure)
13835 (add-after 'unpack 'fix-zlib-include
13836 (lambda _
13837 (substitute* "src/binarySequences.c"
13838 (("../third-party/zlib-1.2.3/zlib.h") "zlib.h"))))
13839 (replace 'install
13840 (lambda* (#:key outputs #:allow-other-keys)
13841 (let* ((out (assoc-ref outputs "out"))
13842 (bin (string-append out "/bin"))
13843 (doc (string-append out "/share/doc/velvet")))
13844 (mkdir-p bin)
13845 (mkdir-p doc)
13846 (install-file "velveth" bin)
13847 (install-file "velvetg" bin)
13848 (install-file "Manual.pdf" doc)
13849 (install-file "Columbus_manual.pdf" doc)))))))
13850 (inputs
13851 (list openmpi zlib))
13852 (native-inputs
13853 `(("texlive" ,(texlive-updmap.cfg (list texlive-latex-graphics
13854 texlive-fonts-ec
13855 texlive-hyperref)))))
13856 (home-page "https://www.ebi.ac.uk/~zerbino/velvet/")
13857 (synopsis "Nucleic acid sequence assembler for very short reads")
13858 (description
13859 "Velvet is a de novo genomic assembler specially designed for short read
13860 sequencing technologies, such as Solexa or 454. Velvet currently takes in
13861 short read sequences, removes errors then produces high quality unique
13862 contigs. It then uses paired read information, if available, to retrieve the
13863 repeated areas between contigs.")
13864 (license license:gpl2+)))
13865
13866 (define-public python-velocyto
13867 (package
13868 (name "python-velocyto")
13869 (version "0.17.17")
13870 (source
13871 (origin
13872 (method url-fetch)
13873 (uri (pypi-uri "velocyto" version))
13874 (sha256
13875 (base32
13876 "0fgygyzqgrq32dv6a00biq1p1cwi6kbl5iqblxq1kklj6b2mzmhs"))
13877 (modules '((guix build utils)))
13878 ;; Delete generated C files.
13879 (snippet
13880 '(for-each delete-file (find-files "." "\\.c")))))
13881 (build-system python-build-system)
13882 (arguments
13883 '(#:phases
13884 (modify-phases %standard-phases
13885 ;; Numba needs a writable dir to cache functions.
13886 (add-before 'check 'set-numba-cache-dir
13887 (lambda _
13888 (setenv "NUMBA_CACHE_DIR" "/tmp"))))))
13889 (native-inputs
13890 (list python-joblib))
13891 (propagated-inputs
13892 (list python-click
13893 python-cython
13894 python-h5py
13895 python-loompy
13896 python-matplotlib
13897 python-numba
13898 python-numpy
13899 python-pandas
13900 python-pysam
13901 python-scikit-learn
13902 python-scipy))
13903 (home-page "https://github.com/velocyto-team/velocyto.py")
13904 (synopsis "RNA velocity analysis for single cell RNA-seq data")
13905 (description
13906 "Velocyto is a library for the analysis of RNA velocity. Velocyto
13907 includes a command line tool and an analysis pipeline.")
13908 (license license:bsd-2)))
13909
13910 (define-public arriba
13911 (package
13912 (name "arriba")
13913 (version "1.0.1")
13914 (source
13915 (origin
13916 (method url-fetch)
13917 (uri (string-append "https://github.com/suhrig/arriba/releases/"
13918 "download/v" version "/arriba_v" version ".tar.gz"))
13919 (sha256
13920 (base32
13921 "0jx9656ry766vb8z08m1c3im87b0c82qpnjby9wz4kcz8vn87dx2"))))
13922 (build-system gnu-build-system)
13923 (arguments
13924 `(#:tests? #f ; there are none
13925 #:phases
13926 (modify-phases %standard-phases
13927 (replace 'configure
13928 (lambda* (#:key inputs #:allow-other-keys)
13929 (let ((htslib (assoc-ref inputs "htslib")))
13930 (substitute* "Makefile"
13931 (("-I\\$\\(HTSLIB\\)/htslib")
13932 (string-append "-I" htslib "/include/htslib"))
13933 ((" \\$\\(HTSLIB\\)/libhts.a")
13934 (string-append " " htslib "/lib/libhts.so"))))
13935 (substitute* "run_arriba.sh"
13936 (("^STAR ") (string-append (which "STAR") " "))
13937 (("samtools --version-only")
13938 (string-append (which "samtools") " --version-only"))
13939 (("samtools index")
13940 (string-append (which "samtools") " index"))
13941 (("samtools sort")
13942 (string-append (which "samtools") " sort")))
13943 #t))
13944 (replace 'install
13945 (lambda* (#:key outputs #:allow-other-keys)
13946 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13947 (install-file "arriba" bin)
13948 (install-file "run_arriba.sh" bin)
13949 (install-file "draw_fusions.R" bin)
13950 (wrap-program (string-append bin "/draw_fusions.R")
13951 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
13952 #t)))))
13953 (inputs
13954 (list htslib
13955 r-minimal
13956 r-circlize
13957 r-genomicalignments
13958 r-genomicranges
13959 samtools
13960 star
13961 zlib))
13962 (home-page "https://github.com/suhrig/arriba")
13963 (synopsis "Gene fusion detection from RNA-Seq data")
13964 (description
13965 "Arriba is a command-line tool for the detection of gene fusions from
13966 RNA-Seq data. It was developed for the use in a clinical research setting.
13967 Therefore, short runtimes and high sensitivity were important design criteria.
13968 It is based on the fast STAR aligner and the post-alignment runtime is
13969 typically just around two minutes. In contrast to many other fusion detection
13970 tools which build on STAR, Arriba does not require to reduce the
13971 @code{alignIntronMax} parameter of STAR to detect small deletions.")
13972 ;; All code is under the Expat license with the exception of
13973 ;; "draw_fusions.R", which is under GPLv3.
13974 (license (list license:expat license:gpl3))))
13975
13976 (define-public adapterremoval
13977 (package
13978 (name "adapterremoval")
13979 (version "2.3.0")
13980 (source
13981 (origin
13982 (method git-fetch)
13983 (uri (git-reference
13984 (url "https://github.com/MikkelSchubert/adapterremoval")
13985 (commit (string-append "v" version))))
13986 (file-name (git-file-name name version))
13987 (sha256
13988 (base32
13989 "1nf3ki5pfzalhrx2fr1y6pfqfi133yj2m7q4fj9irf5fb94bapwr"))))
13990 (build-system gnu-build-system)
13991 (arguments
13992 `(#:make-flags
13993 ,#~(list "COLOR_BUILD=no"
13994 (string-append "PREFIX=" #$output))
13995 #:test-target "test"
13996 #:phases
13997 (modify-phases %standard-phases
13998 (delete 'configure))))
13999 (inputs
14000 (list zlib))
14001 (home-page "https://adapterremoval.readthedocs.io/")
14002 (synopsis "Rapid sequence adapter trimming, identification, and read merging")
14003 (description
14004 "This program searches for and removes remnant adapter sequences from
14005 @dfn{High-Throughput Sequencing} (HTS) data and (optionally) trims low quality
14006 bases from the 3' end of reads following adapter removal. AdapterRemoval can
14007 analyze both single end and paired end data, and can be used to merge
14008 overlapping paired-ended reads into (longer) consensus sequences.
14009 Additionally, the AdapterRemoval may be used to recover a consensus adapter
14010 sequence for paired-ended data, for which this information is not available.")
14011 (license license:gpl3+)))
14012
14013 (define-public checkm
14014 (package
14015 (name "checkm")
14016 (version "1.1.3")
14017 (source
14018 (origin
14019 (method url-fetch)
14020 (uri (pypi-uri "checkm-genome" version))
14021 (sha256
14022 (base32
14023 "0i2nnki639hgjag17wlva2x0ymn37b4krqsf6akxddykhfbkdnkz"))))
14024 (build-system python-build-system)
14025 (arguments
14026 `(#:tests? #f ; Some tests fail for unknown reasons.
14027 #:phases
14028 (modify-phases %standard-phases
14029 (add-before 'check 'set-HOME
14030 (lambda _
14031 (setenv "HOME" "/tmp"))))))
14032 (inputs
14033 (list python-dendropy python-matplotlib python-numpy python-pysam
14034 python-scipy))
14035 (home-page "https://ecogenomics.github.io/CheckM/")
14036 (synopsis "Assess the quality of putative genome bins")
14037 (description
14038 "CheckM provides a set of tools for assessing the quality of genomes
14039 recovered from isolates, single cells, or metagenomes. It provides robust
14040 estimates of genome completeness and contamination by using collocated sets of
14041 genes that are ubiquitous and single-copy within a phylogenetic lineage.
14042 Assessment of genome quality can also be examined using plots depicting key
14043 genomic characteristics (e.g., GC, coding density) which highlight sequences
14044 outside the expected distributions of a typical genome. CheckM also provides
14045 tools for identifying genome bins that are likely candidates for merging based
14046 on marker set compatibility, similarity in genomic characteristics, and
14047 proximity within a reference genome.")
14048 (license license:gpl3+)))
14049
14050 (define-public umi-tools
14051 (package
14052 (name "umi-tools")
14053 (version "1.0.0")
14054 (source
14055 (origin
14056 (method url-fetch)
14057 (uri (pypi-uri "umi_tools" version))
14058 (sha256
14059 (base32
14060 "08y3vz1vcx09whmbsn722lcs6jl9wyrh9i4p3k8j4cb1i32bij4a"))))
14061 (build-system python-build-system)
14062 (inputs
14063 (list python-pandas
14064 python-future
14065 python-scipy
14066 python-matplotlib
14067 python-regex
14068 python-pysam))
14069 (native-inputs
14070 (list python-cython))
14071 (home-page "https://github.com/CGATOxford/UMI-tools")
14072 (synopsis "Tools for analyzing unique modular identifiers")
14073 (description "This package provides tools for dealing with @dfn{Unique
14074 Molecular Identifiers} (UMIs) and @dfn{Random Molecular Tags} (RMTs) in
14075 genetic sequences. There are six tools: the @code{extract} and
14076 @code{whitelist} commands are used to prepare a fastq containing UMIs @code{+/-}
14077 cell barcodes for alignment. The remaining commands, @code{group},
14078 @code{dedup}, and @{count}/@code{count_tab}, are used to identify PCR
14079 duplicates using the UMIs and perform different levels of analysis depending
14080 on the needs of the user.")
14081 (license license:expat)))
14082
14083 (define-public ataqv
14084 (package
14085 (name "ataqv")
14086 (version "1.0.0")
14087 (source
14088 (origin
14089 (method git-fetch)
14090 (uri (git-reference
14091 (url "https://github.com/ParkerLab/ataqv")
14092 (commit version)))
14093 (file-name (git-file-name name version))
14094 (sha256
14095 (base32
14096 "031xr6jx1aprh26y5b1lv3gzrlmzg4alfl73vvshymx8cq8asrqi"))))
14097 (build-system gnu-build-system)
14098 (arguments
14099 `(#:make-flags
14100 ,#~(list (string-append "prefix=" #$output)
14101 (string-append "BOOST_ROOT="
14102 #$(this-package-input "boost"))
14103 (string-append "HTSLIB_ROOT="
14104 #$(this-package-input "htslib")))
14105 #:test-target "test"
14106 #:phases
14107 (modify-phases %standard-phases
14108 (delete 'configure))))
14109 (inputs
14110 (list boost htslib ncurses zlib))
14111 (native-inputs
14112 (list lcov))
14113 (home-page "https://github.com/ParkerLab/ataqv")
14114 (synopsis "Toolkit for quality control and visualization of ATAC-seq data")
14115 (description "This package provides a toolkit for measuring and comparing
14116 ATAC-seq results. It was written to make it easier to spot differences that
14117 might be caused by ATAC-seq library prep or sequencing. The main program,
14118 @code{ataqv}, examines aligned reads and reports some basic metrics.")
14119 (license license:gpl3+)))
14120
14121 (define-public r-psiplot
14122 (package
14123 (name "r-psiplot")
14124 (version "2.3.0")
14125 (source
14126 (origin
14127 (method git-fetch)
14128 (uri (git-reference
14129 (url "https://github.com/kcha/psiplot")
14130 (commit (string-append "v" version))))
14131 (file-name (git-file-name name version))
14132 (sha256
14133 (base32 "08438h16cfry5kqh3y9hs8q1b1a8bxhblsm75knviz5r6q0n1jxh"))))
14134 (build-system r-build-system)
14135 (propagated-inputs
14136 (list r-mass
14137 r-dplyr
14138 r-tidyr
14139 r-purrr
14140 r-readr
14141 r-magrittr
14142 r-ggplot2))
14143 (home-page "https://github.com/kcha/psiplot")
14144 (synopsis "Plot percent spliced-in values of alternatively-spliced exons")
14145 (description
14146 "PSIplot is an R package for generating plots of @dfn{percent
14147 spliced-in} (PSI) values of alternatively-spliced exons that were computed by
14148 vast-tools, an RNA-Seq pipeline for alternative splicing analysis. The plots
14149 are generated using @code{ggplot2}.")
14150 (license license:expat)))
14151
14152 (define-public r-scopeloomr
14153 (let ((commit "99726f5f7da794042036b73924b6a10d6e7b4d5d")
14154 (revision "1"))
14155 (package
14156 (name "r-scopeloomr")
14157 (version (git-version "0.13.0" revision commit))
14158 (source
14159 (origin
14160 (method git-fetch)
14161 (uri (git-reference
14162 (url "https://github.com/aertslab/SCopeLoomR")
14163 (commit commit)))
14164 (file-name (git-file-name name version))
14165 (sha256
14166 (base32 "1ci17ms0c0hf7yfp9ckcg7a2y1s0nm19jj3cifsd55hwc0gdglmz"))))
14167 (properties `((upstream-name . "SCopeLoomR")))
14168 (build-system r-build-system)
14169 (propagated-inputs
14170 (list r-base64enc r-hdf5r r-igraph r-matrix r-plyr r-rjson r-rlist))
14171 (home-page "https://github.com/aertslab/SCopeLoomR")
14172 (synopsis "Build .loom files and extract data from them")
14173 (description
14174 "This is an R package to build generic @code{.loom} files aligning with
14175 the default naming convention of the @code{.loom} format and to integrate
14176 other data types e.g.: regulons (SCENIC), clusters from Seurat, trajectory
14177 information... The package can also be used to extract data from @code{.loom}
14178 files.")
14179 (license license:expat))))
14180
14181 (define-public python-ctxcore
14182 (package
14183 (name "python-ctxcore")
14184 (version "0.1.1")
14185 (source
14186 (origin
14187 (method git-fetch)
14188 (uri (git-reference
14189 (url "https://github.com/aertslab/ctxcore")
14190 (commit version)))
14191 (file-name (git-file-name name version))
14192 (sha256
14193 (base32
14194 "16nlj7z8pirgjad7vlgm7226b3hpw4a7n967vyfg26dsf5n8k70d"))))
14195 (build-system python-build-system)
14196 (arguments
14197 (list
14198 #:phases
14199 #~(modify-phases %standard-phases
14200 (add-before 'build 'pretend-version
14201 ;; The version string is usually derived via setuptools-scm, but
14202 ;; it doesn't work without the .git directory.
14203 (lambda _
14204 (setenv "SETUPTOOLS_SCM_PRETEND_VERSION" #$version))))))
14205 (propagated-inputs
14206 (list python-cytoolz
14207 python-numba
14208 python-frozendict
14209 python-numpy
14210 python-pandas
14211 python-pyyaml
14212 python-pyarrow-0.16
14213 python-tqdm))
14214 (native-inputs
14215 (list python-pytest
14216 python-setuptools-scm))
14217 (home-page "https://github.com/aertslab/ctxcore")
14218 (synopsis "Core functions for pycisTarget and the SCENIC tool suite")
14219 (description
14220 "ctxcore is part of the SCENIC suite of tools. It provides core functions for
14221 pycisTarget and SCENIC.")
14222 (license license:gpl3+)))
14223
14224 (define-public python-arboreto
14225 (package
14226 (name "python-arboreto")
14227 (version "0.1.6")
14228 (source (origin
14229 (method git-fetch)
14230 (uri (git-reference
14231 (url "https://github.com/aertslab/arboreto")
14232 (commit "2f475dca08f47a60acc2beb8dd897e77b7495ca4")))
14233 (file-name (git-file-name name version))
14234 (sha256
14235 (base32
14236 "0l0im8ay7l2d24f7vaha454vsaha9s36bfqhbijg3b8ir8apsd7l"))))
14237 (build-system python-build-system)
14238 ;; Lots of tests fail because python-distributed fails to start the
14239 ;; "Nanny" process.
14240 (arguments '(#:tests? #false))
14241 (propagated-inputs
14242 (list python-bokeh
14243 python-dask
14244 python-distributed
14245 python-numpy
14246 python-pandas
14247 python-scikit-learn
14248 python-scipy
14249 python-tornado-6))
14250 (home-page "https://github.com/aertslab/arboreto")
14251 (synopsis "Gene regulatory network inference using tree-based ensemble regressors")
14252 (description
14253 "This package implements scalable gene regulatory network inference using
14254 tree-based ensemble regressors.")
14255 (license license:bsd-3)))
14256
14257 (define-public pyscenic
14258 (package
14259 (name "pyscenic")
14260 (version "0.11.2")
14261 (source
14262 (origin
14263 (method git-fetch)
14264 (uri (git-reference
14265 (url "https://github.com/aertslab/pySCENIC")
14266 (commit version)))
14267 (file-name (git-file-name name version))
14268 (sha256
14269 (base32
14270 "0pbmmr1zdb1vbbs6wx357s59d13pna6x03wq8blj6ckjws8bbq73"))))
14271 (build-system python-build-system)
14272 (arguments
14273 `(#:phases
14274 (modify-phases %standard-phases
14275 ;; Numba needs a writable dir to cache functions.
14276 (add-before 'check 'set-numba-cache-dir
14277 (lambda _
14278 (setenv "NUMBA_CACHE_DIR" "/tmp")))
14279 (replace 'check
14280 (lambda _
14281 (invoke "pytest" "-v"))))))
14282 (propagated-inputs
14283 (list python-ctxcore
14284 python-cytoolz
14285 python-multiprocessing-on-dill
14286 python-llvmlite
14287 python-numba
14288 python-attrs
14289 python-frozendict
14290 python-numpy
14291 python-pandas
14292 python-cloudpickle
14293 python-dask
14294 python-distributed
14295 python-arboreto
14296 python-boltons
14297 python-setuptools
14298 python-pyyaml
14299 python-tqdm
14300 python-interlap
14301 python-umap-learn
14302 python-loompy
14303 python-networkx
14304 python-scipy
14305 python-fsspec
14306 python-requests
14307 python-aiohttp
14308 python-scikit-learn))
14309 (native-inputs
14310 (list python-pytest))
14311 (home-page "https://scenic.aertslab.org/")
14312 (synopsis "Single-Cell regulatory network inference and clustering")
14313 (description
14314 "pySCENIC is a Python implementation of the SCENIC pipeline (Single-Cell
14315 rEgulatory Network Inference and Clustering) which enables biologists to infer
14316 transcription factors, gene regulatory networks and cell types from
14317 single-cell RNA-seq data.")
14318 (license license:gpl3+)))
14319
14320 (define-public python-ikarus
14321 (package
14322 (name "python-ikarus")
14323 (version "0.0.2")
14324 (source
14325 (origin
14326 (method url-fetch)
14327 (uri (pypi-uri "ikarus" version))
14328 (sha256
14329 (base32
14330 "086czpvj4yafz4vrq5rx2gy0bj2l8nzwnkk0gw8qvy4w133xjysy"))))
14331 (build-system python-build-system)
14332 (arguments
14333 `(#:tests? #false
14334 #:phases
14335 (modify-phases %standard-phases
14336 ;; See https://github.com/BIMSBbioinfo/ikarus/issues/12
14337 (add-after 'unpack 'fix-issue-12
14338 (lambda _
14339 (substitute* "ikarus/classifier.py"
14340 (("pyscenic.genesig") "ctxcore.genesig"))))
14341 ;; Numba needs a writable dir to cache functions.
14342 (add-before 'check 'set-numba-cache-dir
14343 (lambda _
14344 (setenv "NUMBA_CACHE_DIR" "/tmp"))))))
14345 (propagated-inputs
14346 (list python-numpy
14347 python-pandas
14348 python-scipy
14349 python-scanpy
14350 python-anndata
14351 python-ctxcore ;because of issue 12
14352 pyscenic))
14353 (home-page "https://github.com/BIMSBbioinfo/ikarus")
14354 (synopsis "Machine learning classifier of tumor cells")
14355 (description
14356 "ikarus is a stepwise machine learning pipeline that tries to cope with a task
14357 of distinguishing tumor cells from normal cells. Leveraging multiple
14358 annotated single cell datasets it can be used to define a gene set specific to
14359 tumor cells. First, the latter gene set is used to rank cells and then to
14360 train a logistic classifier for the robust classification of tumor and normal
14361 cells. Finally, sensitivity is increased by propagating the cell labels based
14362 on a custom cell-cell network. ikarus is tested on multiple single cell
14363 datasets to ascertain that it achieves high sensitivity and specificity in
14364 multiple experimental contexts.")
14365 (license license:expat)))
14366
14367 (define-public vbz-compression
14368 (package
14369 (name "vbz-compression")
14370 (version "1.0.1")
14371 (source
14372 (origin
14373 (method git-fetch)
14374 (uri (git-reference
14375 (url "https://github.com/nanoporetech/vbz_compression/")
14376 (commit (string-append "v" version))
14377 ;; We include the streamvbyte sources
14378 (recursive? #true)))
14379 (file-name (git-file-name name version))
14380 (sha256
14381 (base32
14382 "1c6wsrnw03vsc5cfp2rdakly5xy55m9chjmy6v685yapdwirdky0"))))
14383 (build-system cmake-build-system)
14384 (arguments
14385 `(#:configure-flags
14386 '("-DENABLE_CONAN=OFF"
14387 ;; Python things aren't even installed, so we might as well
14388 ;; disable building them.
14389 "-DENABLE_PYTHON=OFF")))
14390 (inputs
14391 (list ;("hdf5" ,hdf5-1.10)
14392 `(,zstd "lib")))
14393 (native-inputs
14394 (list googlebenchmark))
14395 (home-page "https://github.com/nanoporetech/vbz_compression/")
14396 (synopsis "VBZ compression plugin for nanopore signal data")
14397 (description
14398 "VBZ Compression uses variable byte integer encoding to compress
14399 nanopore signal data. The performance of VBZ is achieved by taking
14400 advantage of the properties of the raw signal and therefore is most
14401 effective when applied to the signal dataset.")
14402 (license license:mpl2.0)))
14403
14404 (define-public python-ont-fast5-api
14405 (package
14406 (name "python-ont-fast5-api")
14407 (version "4.0.0")
14408 (source
14409 (origin
14410 (method git-fetch)
14411 (uri (git-reference
14412 (url "https://github.com/nanoporetech/ont_fast5_api")
14413 (commit (string-append "release_" version))))
14414 (file-name (git-file-name name version))
14415 (sha256
14416 (base32
14417 "01hj4751j424lzic2sc4bz1f8w7i7fpkjpy3rgghdyl5lyfyb4s4"))
14418 (modules '((guix build utils)))
14419 (snippet
14420 '(delete-file-recursively "ont_fast5_api/vbz_plugin"))))
14421 (build-system python-build-system)
14422 (arguments
14423 `(#:phases
14424 (modify-phases %standard-phases
14425 (add-after 'unpack 'copy-plugin
14426 (lambda* (#:key inputs #:allow-other-keys)
14427 (mkdir-p "ont_fast5_api/vbz_plugin/")
14428 (install-file (string-append
14429 (assoc-ref inputs "vbz-compression")
14430 "/hdf5/lib/plugin/libvbz_hdf_plugin.so")
14431 "ont_fast5_api/vbz_plugin/"))))))
14432 (inputs
14433 (list vbz-compression))
14434 (propagated-inputs
14435 (list python-numpy python-h5py python-packaging python-progressbar33))
14436 (home-page "https://github.com/nanoporetech/ont_fast5_api")
14437 (synopsis "Interface to HDF5 files of the Oxford Nanopore fast5 file format")
14438 (description
14439 "This package provides a concrete implementation of the fast5 file schema
14440 using the generic @code{h5py} library, plain-named methods to interact with
14441 and reflect the fast5 file schema, and tools to convert between
14442 @code{multi_read} and @code{single_read} formats.")
14443 (license license:mpl2.0)))
14444
14445 (define-public tbsp
14446 (let ((commit "dc30c03868233c5504299c9cb0d7b2064ba9cb41")
14447 (revision "2"))
14448 (package
14449 (name "tbsp")
14450 (version (git-version "1.0.0" revision commit))
14451 (source
14452 (origin
14453 (method git-fetch)
14454 (uri (git-reference
14455 (url "https://github.com/phoenixding/tbsp")
14456 (commit commit)))
14457 (file-name (git-file-name name version))
14458 (sha256
14459 (base32
14460 "1im0bik2hxkcb7jzkcxp5nqb30hd8lfraxml6i5ik52j6z3qqln1"))))
14461 (build-system python-build-system)
14462 (arguments
14463 '(#:tests? #f ; no tests included
14464 #:phases
14465 (modify-phases %standard-phases
14466 (add-after 'unpack 'relax-requirements
14467 (lambda _
14468 (substitute* "setup.py"
14469 ((", <3.0") ""))))))) ; matplotlib
14470 (inputs
14471 (list python-matplotlib
14472 python-networkx
14473 python-numpy
14474 python-pybigwig
14475 python-biopython-1.73
14476 python-scikit-learn
14477 python-scipy))
14478 (home-page "https://github.com/phoenixding/tbsp/")
14479 (synopsis "SNP-based trajectory inference")
14480 (description
14481 "Several studies focus on the inference of developmental and response
14482 trajectories from single cell RNA-Seq (scRNA-Seq) data. A number of
14483 computational methods, often referred to as pseudo-time ordering, have been
14484 developed for this task. CRISPR has also been used to reconstruct lineage
14485 trees by inserting random mutations. The tbsp package implements an
14486 alternative method to detect significant, cell type specific sequence
14487 mutations from scRNA-Seq data.")
14488 (license license:expat))))
14489
14490 (define-public tabixpp
14491 (package
14492 (name "tabixpp")
14493 (version "1.1.0")
14494 (source (origin
14495 (method git-fetch)
14496 (uri (git-reference
14497 (url "https://github.com/ekg/tabixpp")
14498 (commit (string-append "v" version))))
14499 (file-name (git-file-name name version))
14500 (sha256
14501 (base32 "1k2a3vbq96ic4lw72iwp5s3mwwc4xhdffjj584yn6l9637q9j1yd"))
14502 (modules '((guix build utils)))
14503 (snippet
14504 #~(begin
14505 (delete-file-recursively "htslib")))))
14506 (build-system gnu-build-system)
14507 (inputs
14508 (list bzip2 htslib xz zlib))
14509 (arguments
14510 (list #:make-flags #~(list (string-append "CC=" #$(cc-for-target))
14511 (string-append "CXX=" #$(cxx-for-target))
14512 "HTS_HEADERS="
14513 (string-append "HTS_LIB="
14514 (search-input-file %build-inputs
14515 "/lib/libhts.a"))
14516 "INCLUDES=")
14517 #:tests? #f ; There are no tests to run.
14518 #:phases
14519 #~(modify-phases %standard-phases
14520 (delete 'configure) ; There is no configure phase.
14521 ;; Build shared and static libraries.
14522 (add-after 'build 'build-libraries
14523 (lambda* (#:key inputs #:allow-other-keys)
14524 (invoke #$(cxx-for-target)
14525 "-shared" "-o" "libtabixpp.so" "tabix.o" "-lhts")
14526 (invoke #$(ar-for-target) "rcs" "libtabixpp.a" "tabix.o")))
14527 (replace 'install
14528 (lambda* (#:key outputs #:allow-other-keys)
14529 (let* ((out (assoc-ref outputs "out"))
14530 (lib (string-append out "/lib"))
14531 (bin (string-append out "/bin")))
14532 (install-file "tabix++" bin)
14533 (install-file "libtabixpp.so" lib)
14534 (install-file "libtabixpp.a" lib)
14535 (install-file "tabix.hpp" (string-append out "/include"))
14536 (mkdir-p (string-append lib "/pkgconfig"))
14537 (with-output-to-file (string-append lib "/pkgconfig/tabixpp.pc")
14538 (lambda _
14539 (format #t "prefix=~a~@
14540 exec_prefix=${prefix}~@
14541 libdir=${exec_prefix}/lib~@
14542 includedir=${prefix}/include~@
14543 ~@
14544 ~@
14545 Name: libtabixpp~@
14546 Version: ~a~@
14547 Description: C++ wrapper around tabix project~@
14548 Libs: -L${libdir} -ltabixpp~@
14549 Cflags: -I${includedir}~%"
14550 out #$version)))))))))
14551 (home-page "https://github.com/ekg/tabixpp")
14552 (synopsis "C++ wrapper around tabix project")
14553 (description "This is a C++ wrapper around the Tabix project which abstracts
14554 some of the details of opening and jumping in tabix-indexed files.")
14555 (license license:expat)))
14556
14557 (define-public smithwaterman
14558 (let ((commit "2610e259611ae4cde8f03c72499d28f03f6d38a7"))
14559 (package
14560 (name "smithwaterman")
14561 (version (git-version "0.0.0" "2" commit))
14562 (source (origin
14563 (method git-fetch)
14564 (uri (git-reference
14565 (url "https://github.com/ekg/smithwaterman/")
14566 (commit commit)))
14567 (file-name (git-file-name name version))
14568 (sha256
14569 (base32 "0i9d8zrxpiracw3mxzd9siybpy62p06rqz9mc2w93arajgbk45bs"))))
14570 (build-system gnu-build-system)
14571 (arguments
14572 (list
14573 #:tests? #f ; There are no tests to run.
14574 #:make-flags
14575 #~(list (string-append "CXX=" #$(cxx-for-target))
14576 "libsw.a" "all")
14577 #:phases
14578 #~(modify-phases %standard-phases
14579 (delete 'configure) ; There is no configure phase.
14580 (add-after 'unpack 'patch-source
14581 (lambda _
14582 (substitute* "Makefile"
14583 (("-c ") "-c -fPIC "))
14584 #$@(if (%current-target-system)
14585 #~((substitute* "Makefile"
14586 (("\tld")
14587 (string-append "\t" #$(%current-target-system) "-ld"))
14588 (("\tar")
14589 (string-append "\t" #$(%current-target-system) "-ar"))))
14590 '())))
14591 (add-after 'build 'build-dynamic
14592 (lambda _
14593 (invoke #$(cxx-for-target)
14594 "-shared" "-o" "libsmithwaterman.so"
14595 "smithwaterman.o" "SmithWatermanGotoh.o"
14596 "disorder.o" "BandedSmithWaterman.o"
14597 "LeftAlign.o" "Repeats.o" "IndelAllele.o")))
14598 (replace 'install
14599 (lambda* (#:key outputs #:allow-other-keys)
14600 (let* ((out (assoc-ref outputs "out"))
14601 (bin (string-append out "/bin"))
14602 (lib (string-append out "/lib")))
14603 (install-file "smithwaterman" bin)
14604 (for-each
14605 (lambda (file)
14606 (install-file file (string-append out "/include/smithwaterman")))
14607 (find-files "." "\\.h$"))
14608 (install-file "libsmithwaterman.so" lib)
14609 (install-file "libsw.a" lib)
14610 (mkdir-p (string-append lib "/pkgconfig"))
14611 (with-output-to-file (string-append lib "/pkgconfig/smithwaterman.pc")
14612 (lambda _
14613 (format #t "prefix=~a~@
14614 exec_prefix=${prefix}~@
14615 libdir=${exec_prefix}/lib~@
14616 includedir=${prefix}/include/smithwaterman~@
14617 ~@
14618 ~@
14619 Name: smithwaterman~@
14620 Version: ~a~@
14621 Description: smith-waterman-gotoh alignment algorithm~@
14622 Libs: -L${libdir} -lsmithwaterman~@
14623 Cflags: -I${includedir}~%"
14624 out #$version)))))))))
14625 (home-page "https://github.com/ekg/smithwaterman")
14626 (synopsis "Implementation of the Smith-Waterman algorithm")
14627 (description "Implementation of the Smith-Waterman algorithm.")
14628 ;; The licensing terms are unclear: https://github.com/ekg/smithwaterman/issues/9.
14629 (license (list license:gpl2 license:expat)))))
14630
14631 (define-public sylamer
14632 (package
14633 (name "sylamer")
14634 (version "18-131")
14635 (source (origin
14636 (method git-fetch)
14637 (uri (git-reference
14638 (url "https://github.com/micans/sylamer/")
14639 (commit "aa75c3584797c0c15f860addb645f7bc1dd7627d")))
14640 (file-name (git-file-name name version))
14641 (sha256
14642 (base32
14643 "1ddiwlrdghhb4574rvfw0brjp9gs5l6nfsy82h0m4mvz1dr3gkj5"))))
14644 (build-system gnu-build-system)
14645 (arguments
14646 (list
14647 #:tests? #f ; no test target
14648 #:make-flags
14649 #~(list (string-append "GSLPREFIX=" #$(this-package-input "gsl")))
14650 #:phases
14651 '(modify-phases %standard-phases
14652 (replace 'configure
14653 (lambda* (#:key outputs #:allow-other-keys)
14654 (substitute* "Makefile"
14655 (("cp sylamer \\$\\(HOME\\)/local/bin")
14656 (string-append "install -D -t " (assoc-ref outputs "out")
14657 "/bin sylamer")))
14658 (install-file "Makefile" "src")
14659 (chdir "src"))))))
14660 (inputs (list gsl zlib))
14661 (home-page "https://www.ebi.ac.uk/research/enright/software/sylamer")
14662 (synopsis "Asses microRNA binding and siRNA off-target effects")
14663 (description "Sylamer is a system for finding significantly over or
14664 under-represented words in sequences according to a sorted gene list.
14665 Typically it is used to find significant enrichment or depletion of microRNA
14666 or siRNA seed sequences from microarray expression data. Sylamer is extremely
14667 fast and can be applied to genome-wide datasets with ease. Results are
14668 plotted in terms of a significance landscape plot. These plots show
14669 significance profiles for each word studied across the sorted genelist.")
14670 (license license:gpl3+)))
14671
14672 (define-public multichoose
14673 (package
14674 (name "multichoose")
14675 (version "1.0.3")
14676 (source (origin
14677 (method git-fetch)
14678 (uri (git-reference
14679 (url "https://github.com/ekg/multichoose/")
14680 (commit (string-append "v" version))))
14681 (file-name (git-file-name name version))
14682 (sha256
14683 (base32 "0ci5fqvmpamwgxvmyd79ygj6n3bnbl3vc7b6h1sxz58186sm3pfs"))))
14684 (build-system gnu-build-system)
14685 (arguments
14686 `(#:tests? #f ; Tests require node.
14687 #:phases
14688 (modify-phases %standard-phases
14689 (delete 'configure) ; There is no configure phase.
14690 (replace 'install
14691 (lambda* (#:key outputs #:allow-other-keys)
14692 (let* ((out (assoc-ref outputs "out"))
14693 (bin (string-append out "/bin"))
14694 (include (string-append out "/include")))
14695 ;; TODO: There are Python modules for these programs too.
14696 (install-file "multichoose" bin)
14697 (install-file "multipermute" bin)
14698 (install-file "multichoose.h" include)
14699 (install-file "multipermute.h" include))
14700 #t)))))
14701 (home-page "https://github.com/ekg/multichoose")
14702 (synopsis "Efficient loopless multiset combination generation algorithm")
14703 (description "This library implements an efficient loopless multiset
14704 combination generation algorithm which is (approximately) described in
14705 \"Loopless algorithms for generating permutations, combinations, and other
14706 combinatorial configurations.\", G. Ehrlich - Journal of the ACM (JACM),
14707 1973. (Algorithm 7.)")
14708 (license license:expat)))
14709
14710 (define-public fsom
14711 (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3"))
14712 (package
14713 (name "fsom")
14714 (version (git-version "0.0.0" "1" commit))
14715 (source (origin
14716 (method git-fetch)
14717 (uri (git-reference
14718 (url "https://github.com/ekg/fsom/")
14719 (commit commit)))
14720 (file-name (git-file-name name version))
14721 (sha256
14722 (base32 "0gw1lpvr812pywg9y546x0h1hhj261xwls41r6kqhddjlrcjc0pi"))))
14723 (build-system gnu-build-system)
14724 (arguments
14725 `(#:tests? #f ; There are no tests to run.
14726 #:phases
14727 (modify-phases %standard-phases
14728 (delete 'configure) ; There is no configure phase.
14729 (replace 'install
14730 (lambda* (#:key outputs #:allow-other-keys)
14731 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
14732 (install-file "fsom" bin)))))))
14733 (native-inputs
14734 (list gcc-6))
14735 (home-page "https://github.com/ekg/fsom")
14736 (synopsis "Manage SOM (Self-Organizing Maps) neural networks")
14737 (description "A tiny C library for managing SOM (Self-Organizing Maps)
14738 neural networks.")
14739 (license license:gpl3))))
14740
14741 (define-public fastahack
14742 (package
14743 (name "fastahack")
14744 (version "1.0.0")
14745 (source (origin
14746 (method git-fetch)
14747 (uri (git-reference
14748 (url "https://github.com/ekg/fastahack/")
14749 (commit (string-append "v" version))))
14750 (file-name (git-file-name name version))
14751 (sha256
14752 (base32 "0rp1blskhzxf7vbh253ibpxbgl9wwgyzf1wbkxndi08d3j4vcss9"))))
14753 (build-system gnu-build-system)
14754 (arguments
14755 (list #:make-flags #~(list (string-append "CXX=" #$(cxx-for-target)))
14756 ;; Unclear how to run tests: https://github.com/ekg/fastahack/issues/15
14757 #:tests? #f
14758 #:phases
14759 #~(modify-phases %standard-phases
14760 (delete 'configure) ; There is no configure phase.
14761 (add-after 'unpack 'patch-source
14762 (lambda _
14763 (substitute* "Makefile"
14764 (("-c ") "-c -fPIC "))))
14765 (add-after 'build 'build-dynamic
14766 (lambda _
14767 (invoke #$(cxx-for-target)
14768 "-shared" "-o" "libfastahack.so"
14769 "Fasta.o" "FastaHack.o" "split.o" "disorder.o")))
14770 (replace 'install
14771 (lambda* (#:key outputs #:allow-other-keys)
14772 (let* ((out (assoc-ref outputs "out"))
14773 (lib (string-append out "/lib"))
14774 (bin (string-append out "/bin")))
14775 (mkdir-p (string-append out "/include/fastahack"))
14776 (for-each
14777 (lambda (file)
14778 (install-file file (string-append out "/include/fastahack")))
14779 (find-files "." "\\.h$"))
14780 (install-file "fastahack" bin)
14781 (install-file "libfastahack.so" lib)
14782 (mkdir-p (string-append lib "/pkgconfig"))
14783 (with-output-to-file (string-append lib "/pkgconfig/fastahack.pc")
14784 (lambda _
14785 (format #t "prefix=~a~@
14786 exec_prefix=${prefix}~@
14787 libdir=${exec_prefix}/lib~@
14788 includedir=${prefix}/include/fastahack~@
14789 ~@
14790 ~@
14791 Name: fastahack~@
14792 Version: ~a~@
14793 Description: Indexing and sequence extraction from FASTA files~@
14794 Libs: -L${libdir} -lfastahack~@
14795 Cflags: -I${includedir}~%"
14796 out #$version)))))))))
14797 (home-page "https://github.com/ekg/fastahack")
14798 (synopsis "Indexing and sequence extraction from FASTA files")
14799 (description "Fastahack is a small application for indexing and
14800 extracting sequences and subsequences from FASTA files. The included library
14801 provides a FASTA reader and indexer that can be embedded into applications
14802 which would benefit from directly reading subsequences from FASTA files. The
14803 library automatically handles index file generation and use.")
14804 (license (list license:expat license:gpl2))))
14805
14806 (define-public vcflib
14807 (package
14808 (name "vcflib")
14809 (version "1.0.3")
14810 (source
14811 (origin
14812 (method git-fetch)
14813 (uri (git-reference
14814 (url "https://github.com/vcflib/vcflib")
14815 (commit (string-append "v" version))))
14816 (file-name (git-file-name name version))
14817 (sha256
14818 (base32 "1r7pnajg997zdjkf1b38m14v0zqnfx52w7nbldwh1xpbpahb1hjh"))
14819 (modules '((guix build utils)))
14820 (snippet
14821 #~(begin
14822 (substitute* "CMakeLists.txt"
14823 ((".*fastahack.*") "")
14824 ((".*smithwaterman.*") "")
14825 (("(pkg_check_modules\\(TABIXPP)" text)
14826 (string-append
14827 "pkg_check_modules(FASTAHACK REQUIRED fastahack)\n"
14828 "pkg_check_modules(SMITHWATERMAN REQUIRED smithwaterman)\n"
14829 text))
14830 (("\\$\\{TABIXPP_LIBRARIES\\}" text)
14831 (string-append "${FASTAHACK_LIBRARIES} "
14832 "${SMITHWATERMAN_LIBRARIES} "
14833 text)))
14834 (substitute* (find-files "." "\\.(h|c)(pp)?$")
14835 (("\"SmithWatermanGotoh.h\"") "<smithwaterman/SmithWatermanGotoh.h>")
14836 (("\"convert.h\"") "<smithwaterman/convert.h>")
14837 (("\"disorder.h\"") "<smithwaterman/disorder.h>")
14838 (("Fasta.h") "fastahack/Fasta.h"))
14839 (for-each delete-file-recursively
14840 '("fastahack" "filevercmp" "fsom" "googletest" "intervaltree"
14841 "libVCFH" "multichoose" "smithwaterman"))))))
14842 (build-system cmake-build-system)
14843 (inputs
14844 (list bzip2
14845 htslib
14846 fastahack
14847 perl
14848 python
14849 smithwaterman
14850 tabixpp
14851 xz
14852 zlib))
14853 (native-inputs
14854 `(("pkg-config" ,pkg-config)
14855 ;; Submodules.
14856 ;; This package builds against the .o files so we need to extract the source.
14857 ("filevercmp-src" ,(package-source filevercmp))
14858 ("fsom-src" ,(package-source fsom))
14859 ("intervaltree-src" ,(package-source intervaltree))
14860 ("multichoose-src" ,(package-source multichoose))))
14861 (arguments
14862 (list #:configure-flags
14863 #~(list (string-append
14864 "-DPKG_CONFIG_EXECUTABLE="
14865 (search-input-file
14866 %build-inputs (string-append
14867 "/bin/" #$(pkg-config-for-target)))))
14868 #:tests? #f ; no tests
14869 #:phases
14870 #~(modify-phases %standard-phases
14871 (add-after 'unpack 'build-shared-library
14872 (lambda _
14873 (substitute* "CMakeLists.txt"
14874 (("vcflib STATIC") "vcflib SHARED"))
14875 (substitute* "test/Makefile"
14876 (("libvcflib.a") "libvcflib.so"))))
14877 (add-after 'unpack 'unpack-submodule-sources
14878 (lambda* (#:key inputs native-inputs #:allow-other-keys)
14879 (let ((unpack (lambda (source target)
14880 (mkdir target)
14881 (with-directory-excursion target
14882 (let ((source (or (assoc-ref inputs source)
14883 (assoc-ref native-inputs source))))
14884 (if (file-is-directory? source)
14885 (copy-recursively source ".")
14886 (invoke "tar" "xvf"
14887 source
14888 "--strip-components=1")))))))
14889 (and
14890 (unpack "filevercmp-src" "filevercmp")
14891 (unpack "fsom-src" "fsom")
14892 (unpack "intervaltree-src" "intervaltree")
14893 (unpack "multichoose-src" "multichoose")))))
14894 ;; This pkg-config file is provided by other distributions.
14895 (add-after 'install 'install-pkg-config-file
14896 (lambda* (#:key outputs #:allow-other-keys)
14897 (let* ((out (assoc-ref outputs "out"))
14898 (pkgconfig (string-append out "/lib/pkgconfig")))
14899 (mkdir-p pkgconfig)
14900 (with-output-to-file (string-append pkgconfig "/vcflib.pc")
14901 (lambda _
14902 (format #t "prefix=~a~@
14903 exec_prefix=${prefix}~@
14904 libdir=${exec_prefix}/lib~@
14905 includedir=${prefix}/include~@
14906 ~@
14907 Name: vcflib~@
14908 Version: ~a~@
14909 Requires: smithwaterman, fastahack, tabixpp~@
14910 Description: C++ library for parsing and manipulating VCF files~@
14911 Libs: -L${libdir} -lvcflib~@
14912 Cflags: -I${includedir}~%"
14913 out #$version)))))))))
14914 (home-page "https://github.com/vcflib/vcflib/")
14915 (synopsis "Library for parsing and manipulating VCF files")
14916 (description "Vcflib provides methods to manipulate and interpret
14917 sequence variation as it can be described by VCF. It is both an API for parsing
14918 and operating on records of genomic variation as it can be described by the VCF
14919 format, and a collection of command-line utilities for executing complex
14920 manipulations on VCF files.")
14921 (license license:expat)))
14922
14923 (define-public freebayes
14924 (package
14925 (name "freebayes")
14926 (version "1.3.5")
14927 (source (origin
14928 (method git-fetch)
14929 (uri (git-reference
14930 (url "https://github.com/freebayes/freebayes")
14931 (commit (string-append "v" version))))
14932 (file-name (git-file-name name version))
14933 (sha256
14934 (base32 "1l0z88gq57kva677a6xri5g9k2d9h9lk5yk1q2xmq64wqhv7dvc3"))
14935 (modules '((guix build utils)))
14936 (snippet
14937 '(begin
14938 (delete-file-recursively "contrib/htslib")
14939 #t))))
14940 (build-system meson-build-system)
14941 (inputs
14942 (list fastahack htslib smithwaterman tabixpp vcflib))
14943 (native-inputs
14944 `(("bash-tap" ,bash-tap)
14945 ("bc" ,bc)
14946 ("grep" ,grep) ; Built with perl support.
14947 ("parallel" ,parallel)
14948 ("perl" ,perl)
14949 ("pkg-config" ,pkg-config)
14950 ("python" ,python)
14951 ("samtools" ,samtools)
14952 ("simde" ,simde)
14953 ;; This submodule is needed to run the tests.
14954 ("test-simple-bash-src"
14955 ,(origin
14956 (method git-fetch)
14957 (uri (git-reference
14958 (url "https://github.com/ingydotnet/test-simple-bash/")
14959 (commit "124673ff204b01c8e96b7fc9f9b32ee35d898acc")))
14960 (file-name "test-simple-bash-src-checkout")
14961 (sha256
14962 (base32 "043plp6z0x9yf7mdpky1fw7zcpwn1p47px95w9mh16603zqqqpga"))))))
14963 (arguments
14964 `(#:phases
14965 (modify-phases %standard-phases
14966 (add-after 'unpack 'patch-source
14967 (lambda* (#:key inputs #:allow-other-keys)
14968 (let ((bash-tap (assoc-ref inputs "bash-tap")))
14969 (substitute* (find-files "test/t")
14970 (("BASH_TAP_ROOT=bash-tap")
14971 (string-append "BASH_TAP_ROOT=" bash-tap "/bin"))
14972 (("bash-tap/bash-tap-bootstrap")
14973 (string-append bash-tap "/bin/bash-tap-bootstrap"))
14974 (("source.*bash-tap-bootstrap")
14975 (string-append "source " bash-tap "/bin/bash-tap-bootstrap")))
14976 (substitute* '("src/BedReader.cpp"
14977 "src/BedReader.h")
14978 (("../intervaltree/IntervalTree.h") "IntervalTree.h"))
14979 (substitute* "meson.build"
14980 ;; Our pkg-config file is vcflib.pc
14981 (("libvcflib") "vcflib")
14982 (("vcflib_inc,") ""))
14983 #t)))
14984 (add-after 'unpack 'unpack-submodule-sources
14985 (lambda* (#:key inputs #:allow-other-keys)
14986 (mkdir-p "test/test-simple-bash")
14987 (copy-recursively (assoc-ref inputs "test-simple-bash-src")
14988 "test/test-simple-bash")
14989 #t))
14990 ;; The slow tests take longer than the specified timeout.
14991 ,@(if (any (cute string=? <> (%current-system))
14992 '("armhf-linux" "aarch64-linux"))
14993 '((replace 'check
14994 (lambda* (#:key tests? #:allow-other-keys)
14995 (when tests?
14996 (invoke "meson" "test" "--timeout-multiplier" "5"))
14997 #t)))
14998 '()))))
14999 (home-page "https://github.com/freebayes/freebayes")
15000 (synopsis "Haplotype-based variant detector")
15001 (description "FreeBayes is a Bayesian genetic variant detector designed to
15002 find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
15003 indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
15004 complex events (composite insertion and substitution events) smaller than the
15005 length of a short-read sequencing alignment.")
15006 (license license:expat)))
15007
15008 (define-public samblaster
15009 (package
15010 (name "samblaster")
15011 (version "0.1.24")
15012 (source (origin
15013 (method git-fetch)
15014 (uri (git-reference
15015 (url "https://github.com/GregoryFaust/samblaster")
15016 (commit (string-append "v." version))))
15017 (file-name (git-file-name name version))
15018 (sha256
15019 (base32
15020 "0iv2ddfw8363vb2x8gr3p8g88whb6mb9m0pf71i2cqsbv6jghap7"))))
15021 (build-system gnu-build-system)
15022 (arguments
15023 `(#:tests? #f ; there are none
15024 #:phases
15025 (modify-phases %standard-phases
15026 (delete 'configure) ; There is no configure phase.
15027 (replace 'install
15028 (lambda* (#:key outputs #:allow-other-keys)
15029 (install-file "samblaster"
15030 (string-append (assoc-ref outputs "out") "/bin"))
15031 #t)))))
15032 (home-page "https://github.com/GregoryFaust/samblaster")
15033 (synopsis "Mark duplicates in paired-end SAM files")
15034 (description "Samblaster is a fast and flexible program for marking
15035 duplicates in read-id grouped paired-end SAM files. It can also optionally
15036 output discordant read pairs and/or split read mappings to separate SAM files,
15037 and/or unmapped/clipped reads to a separate FASTQ file. When marking
15038 duplicates, samblaster will require approximately 20MB of memory per 1M read
15039 pairs.")
15040 (license license:expat)))
15041
15042 (define-public r-velocyto
15043 (let ((commit "d7790346cb99f49ab9c2b23ba70dcf9d2c9fc350")
15044 (revision "1"))
15045 (package
15046 (name "r-velocyto")
15047 (version (git-version "0.6" revision commit))
15048 (source
15049 (origin
15050 (method git-fetch)
15051 (uri (git-reference
15052 (url "https://github.com/velocyto-team/velocyto.R")
15053 (commit commit)))
15054 (file-name (git-file-name name version))
15055 (sha256
15056 (base32
15057 "16wqf70j7rd7pay2q513iyz12i8n9vrpg1bisah4lddbcpx5dz1n"))))
15058 (build-system r-build-system)
15059 (inputs
15060 (list boost))
15061 (propagated-inputs
15062 (list r-hdf5r
15063 r-mass
15064 r-mgcv
15065 r-pcamethods
15066 r-rcpp
15067 r-rcpparmadillo
15068 ;; Suggested packages
15069 r-rtsne
15070 r-cluster
15071 r-abind
15072 r-h5
15073 r-biocgenerics
15074 r-genomicalignments
15075 r-rsamtools
15076 r-edger
15077 r-igraph))
15078 (home-page "https://velocyto.org")
15079 (synopsis "RNA velocity estimation in R")
15080 (description
15081 "This package provides basic routines for estimation of gene-specific
15082 transcriptional derivatives and visualization of the resulting velocity
15083 patterns.")
15084 (license license:gpl3))))
15085
15086 (define-public methyldackel
15087 (package
15088 (name "methyldackel")
15089 (version "0.5.1")
15090 (source (origin
15091 (method git-fetch)
15092 (uri (git-reference
15093 (url "https://github.com/dpryan79/MethylDackel")
15094 (commit version)))
15095 (file-name (git-file-name name version))
15096 (sha256
15097 (base32
15098 "1sfhf2ap75qxpnmy1ifgmxqs18rq8mah9mcgkby73vc6h0sw99ws"))))
15099 (build-system gnu-build-system)
15100 (arguments
15101 `(#:test-target "test"
15102 #:make-flags
15103 ,#~(list "CC=gcc"
15104 "CFLAGS=-fcommon"
15105 (string-append "prefix=" #$output "/bin/"))
15106 #:phases
15107 (modify-phases %standard-phases
15108 (replace 'configure
15109 (lambda* (#:key outputs #:allow-other-keys)
15110 (substitute* "Makefile"
15111 (("-lhts ") "-lhts -lBigWig ")
15112 (("install MethylDackel \\$\\(prefix\\)" match)
15113 (string-append "install -d $(prefix); " match))))))))
15114 (inputs
15115 (list curl ; XXX: needed by libbigwig
15116 htslib-1.9 libbigwig zlib))
15117 ;; Needed for tests
15118 (native-inputs
15119 `(("python" ,python-wrapper)))
15120 (home-page "https://github.com/dpryan79/MethylDackel")
15121 (synopsis "Universal methylation extractor for BS-seq experiments")
15122 (description
15123 "MethylDackel will process a coordinate-sorted and indexed BAM or CRAM
15124 file containing some form of BS-seq alignments and extract per-base
15125 methylation metrics from them. MethylDackel requires an indexed fasta file
15126 containing the reference genome as well.")
15127 ;; See https://github.com/dpryan79/MethylDackel/issues/85
15128 (license license:expat)))
15129
15130 ;; This package bundles PCRE 8.02 and cannot be built with the current
15131 ;; version.
15132 (define-public phast
15133 (package
15134 (name "phast")
15135 (version "1.5")
15136 (source (origin
15137 (method git-fetch)
15138 (uri (git-reference
15139 (url "https://github.com/CshlSiepelLab/phast")
15140 (commit (string-append "v" version))))
15141 (file-name (git-file-name name version))
15142 (sha256
15143 (base32
15144 "10lpbllvny923jjbbyrpxahhd1m5h7sbj9gx7rd123rg10mlidki"))))
15145 (build-system gnu-build-system)
15146 (arguments
15147 `(#:make-flags
15148 ,#~(list "CC=gcc"
15149 (string-append "DESTDIR=" #$output))
15150 #:phases
15151 (modify-phases %standard-phases
15152 (replace 'configure
15153 (lambda* (#:key inputs outputs #:allow-other-keys)
15154 ;; Fix syntax
15155 (substitute* "test/Makefile"
15156 ((" ") " "))
15157 (substitute* "Makefile"
15158 (("CLAPACKPATH=/usr/lib")
15159 (string-append "CLAPACKPATH="
15160 (assoc-ref inputs "clapack") "/lib")))
15161 ;; Renaming the libraries is not necessary with our version of
15162 ;; CLAPACK.
15163 (substitute* "src/lib/Makefile"
15164 (("ifdef CLAPACKPATH") "ifdef UNNECESSARY"))
15165 (substitute* "src/make-include.mk"
15166 (("-lblaswr") "-lblas")
15167 (("-ltmg") "-ltmglib")
15168 (("liblapack.a") "liblapack.so")
15169 (("libblas.a") "libblas.so")
15170 (("libf2c.a") "libf2c.so"))
15171 (substitute* "src/Makefile"
15172 (("/opt") "/share")
15173 (("/usr/") "/"))
15174 #t))
15175 (replace 'check
15176 (lambda _
15177 (setenv "PATH"
15178 (string-append (getcwd) "/bin:" (getenv "PATH")))
15179 ;; Disable broken test
15180 (substitute* "test/Makefile"
15181 ((".*if.*hmrc_summary" m) (string-append "#" m)))
15182 ;; Only run the msa_view tests because the others fail for
15183 ;; unknown reasons.
15184 (invoke "make" "-C" "test" "msa_view"))))))
15185 (inputs
15186 (list clapack))
15187 (native-inputs
15188 (list perl))
15189 (home-page "http://compgen.cshl.edu/phast/")
15190 (synopsis "Phylogenetic analysis with space/time models")
15191 (description
15192 "Phylogenetic Analysis with Space/Time models (PHAST) is a collection of
15193 command-line programs and supporting libraries for comparative and
15194 evolutionary genomics. Best known as the search engine behind the
15195 Conservation tracks in the University of California, Santa Cruz (UCSC) Genome
15196 Browser, PHAST also includes several tools for phylogenetic modeling,
15197 functional element identification, as well as utilities for manipulating
15198 alignments, trees and genomic annotations.")
15199 (license license:bsd-3)))
15200
15201 (define-public python-gffutils
15202 (package
15203 (name "python-gffutils")
15204 (version "0.10.1")
15205 (source
15206 (origin
15207 (method git-fetch)
15208 (uri (git-reference
15209 (url "https://github.com/daler/gffutils")
15210 (commit (string-append "v" version))))
15211 (file-name (git-file-name name version))
15212 (sha256
15213 (base32
15214 "1gkzk7ps6w3ai2r81js9s9bzpba0jmxychnd2da6n9ggdnf2xzqz"))))
15215 (build-system python-build-system)
15216 (arguments
15217 '(#:phases
15218 (modify-phases %standard-phases
15219 (replace 'check
15220 (lambda* (#:key tests? #:allow-other-keys)
15221 (when tests?
15222 ;; Tests need to access the HOME directory
15223 (setenv "HOME" "/tmp")
15224 (invoke "nosetests" "-a" "!slow")))))))
15225 (propagated-inputs
15226 (list python-argcomplete
15227 python-argh
15228 python-biopython
15229 python-pybedtools
15230 python-pyfaidx
15231 python-simplejson
15232 python-six))
15233 (native-inputs
15234 (list python-nose))
15235 (home-page "https://github.com/daler/gffutils")
15236 (synopsis "Tool for manipulation of GFF and GTF files")
15237 (description
15238 "python-gffutils is a Python package for working with and manipulating
15239 the GFF and GTF format files typically used for genomic annotations. The
15240 files are loaded into a SQLite database, allowing much more complex
15241 manipulation of hierarchical features (e.g., genes, transcripts, and exons)
15242 than is possible with plain-text methods alone.")
15243 (license license:expat)))
15244
15245 (define-public indelfixer
15246 (package
15247 (name "indelfixer")
15248 (version "1.1")
15249 (source (origin
15250 (method git-fetch)
15251 (uri (git-reference
15252 (url "https://github.com/cbg-ethz/InDelFixer/")
15253 (commit (string-append "v" version))))
15254 (file-name (git-file-name name version))
15255 (sha256
15256 (base32
15257 "10ak05x8i1bx2p7rriv2rglqg1wr7c8wrhjrqlq1wm7ka99w8i79"))))
15258 (build-system ant-build-system)
15259 (arguments
15260 `(#:jar-name "InDelFixer.jar"
15261 #:source-dir "src/main/java"
15262 #:test-dir "src/test"))
15263 (inputs
15264 `(("java-commons-lang2" ,java-commons-lang)
15265 ("java-args4j" ,java-args4j)))
15266 (native-inputs
15267 (list java-junit))
15268 (home-page "https://github.com/cbg-ethz/InDelFixer/")
15269 (synopsis "Iterative and sensitive NGS sequence aligner")
15270 (description "InDelFixer is a sensitive aligner for 454, Illumina and
15271 PacBio data, employing a full Smith-Waterman alignment against a reference.
15272 This Java command line application aligns Next-Generation Sequencing (NGS) and
15273 third-generation reads to a set of reference sequences, by a prior fast k-mer
15274 matching and removes indels, causing frame shifts. In addition, only a
15275 specific region can be considered. An iterative refinement of the alignment
15276 can be performed, by alignment against the consensus sequence with wobbles.
15277 The output is in SAM format.")
15278 (license license:gpl3+)))
15279
15280 (define-public libsbml
15281 (package
15282 (name "libsbml")
15283 (version "5.18.0")
15284 (source (origin
15285 (method url-fetch)
15286 (uri (string-append "mirror://sourceforge/sbml/libsbml/"
15287 version "/stable/libSBML-"
15288 version "-core-src.tar.gz"))
15289 (sha256
15290 (base32
15291 "0slkagrk3nfi2qsksv6b1brj6zhx4bj4bkib2sdycvrcd10ql2lh"))))
15292 (build-system cmake-build-system)
15293 (arguments
15294 `(#:test-target "test"
15295 #:configure-flags
15296 ,#~(list "-DWITH_CHECK=ON"
15297 (string-append "-DLIBXML_LIBRARY="
15298 #$(this-package-input "libxml2")
15299 "/lib/libxml2.so")
15300 (string-append "-DLIBXML_INCLUDE_DIR="
15301 #$(this-package-input "libxml2")
15302 "/include/libxml2"))))
15303 (propagated-inputs
15304 (list libxml2))
15305 (native-inputs
15306 (list check swig))
15307 (home-page "http://sbml.org/Software/libSBML")
15308 (synopsis "Process SBML files and data streams")
15309 (description "LibSBML is a library to help you read, write, manipulate,
15310 translate, and validate SBML files and data streams. The @dfn{Systems Biology
15311 Markup Language} (SBML) is an interchange format for computer models of
15312 biological processes. SBML is useful for models of metabolism, cell
15313 signaling, and more. It continues to be evolved and expanded by an
15314 international community.")
15315 (license license:lgpl2.1+)))
15316
15317 (define-public kraken2
15318 (package
15319 (name "kraken2")
15320 (version "2.1.2")
15321 (source (origin
15322 (method git-fetch)
15323 (uri (git-reference
15324 (url "https://github.com/DerrickWood/kraken2")
15325 (commit (string-append "v" version))))
15326 (file-name (git-file-name name version))
15327 (sha256
15328 (base32
15329 "1pl6ml1ldg2hnhy8ps56q0fl1wq3g91qkhinj6pb4yjjhv1rxsjf"))))
15330 (build-system gnu-build-system)
15331 (arguments
15332 `(#:tests? #false ; there are none
15333 #:make-flags
15334 ,#~(list "-C" "src"
15335 (string-append "KRAKEN2_DIR=" #$output "/bin"))
15336 #:phases
15337 (modify-phases %standard-phases
15338 (delete 'configure)
15339 (add-before 'install 'install-scripts
15340 (lambda* (#:key outputs #:allow-other-keys)
15341 (let* ((bin (string-append (assoc-ref outputs "out") "/bin"))
15342 (replacements `(("KRAKEN2_DIR" . ,bin)
15343 ("VERSION" . ,,version))))
15344 (mkdir-p bin)
15345
15346 (with-directory-excursion "scripts"
15347 (let ((scripts (find-files "." ".*")))
15348 (substitute* scripts
15349 (("#####=([^=]+)=#####" _ key)
15350 (or (assoc-ref replacements key)
15351 (error (format #false "unknown key: ~a~%" key)))))
15352 (substitute* "kraken2"
15353 (("compression_program = \"bzip2\"")
15354 (string-append "compression_program = \""
15355 (which "bzip2")
15356 "\""))
15357 (("compression_program = \"gzip\"")
15358 (string-append "compression_program = \""
15359 (which "gzip")
15360 "\"")))
15361 (substitute* '("download_genomic_library.sh"
15362 "download_taxonomy.sh"
15363 "16S_gg_installation.sh"
15364 "16S_silva_installation.sh"
15365 "16S_rdp_installation.sh")
15366 (("wget") (which "wget")))
15367 (substitute* '("download_taxonomy.sh"
15368 "download_genomic_library.sh"
15369 "rsync_from_ncbi.pl")
15370 (("rsync -")
15371 (string-append (which "rsync") " -")))
15372 (substitute* "mask_low_complexity.sh"
15373 (("which") (which "which")))
15374 (substitute* '("mask_low_complexity.sh"
15375 "download_genomic_library.sh"
15376 "16S_silva_installation.sh")
15377 (("sed -e ")
15378 (string-append (which "sed") " -e ")))
15379 (substitute* '("rsync_from_ncbi.pl"
15380 "16S_rdp_installation.sh"
15381 "16S_silva_installation.sh"
15382 "16S_gg_installation.sh"
15383 "download_taxonomy.sh"
15384 "download_genomic_library.sh")
15385 (("gunzip") (which "gunzip")))
15386 (for-each (lambda (script)
15387 (chmod script #o555)
15388 (install-file script bin))
15389 scripts)))))))))
15390 (inputs
15391 (list gzip
15392 perl
15393 rsync
15394 sed
15395 wget
15396 which))
15397 (home-page "https://github.com/DerrickWood/kraken2")
15398 (synopsis "Taxonomic sequence classification system")
15399 (description "Kraken is a taxonomic sequence classifier that assigns
15400 taxonomic labels to DNA sequences. Kraken examines the k-mers within a query
15401 sequence and uses the information within those k-mers to query a
15402 database. That database maps k-mers to the lowest common ancestor (LCA) of all
15403 genomes known to contain a given k-mer.")
15404 (license license:expat)))
15405
15406 (define-public lofreq
15407 (package
15408 (name "lofreq")
15409 (version "2.1.5")
15410 (source (origin
15411 (method git-fetch)
15412 (uri (git-reference
15413 (url "https://github.com/CSB5/lofreq")
15414 (commit (string-append "v" version))))
15415 (file-name (git-file-name name version))
15416 (sha256
15417 (base32
15418 "0qssrn3mgjak7df6iqc1rljqd3g3a5syvg0lsv4vds43s3fq23bl"))))
15419 (build-system gnu-build-system)
15420 (arguments
15421 '(#:test-target "bug-tests"
15422 #:tests? #false)) ; test data are not included
15423 (inputs
15424 `(("htslib" ,htslib)
15425 ("python" ,python-wrapper)
15426 ("zlib" ,zlib)))
15427 (native-inputs
15428 (list autoconf automake which))
15429 (home-page "https://csb5.github.io/lofreq/")
15430 (synopsis "Sensitive variant calling from sequencing data")
15431 (description "LoFreq is a fast and sensitive variant-caller for inferring
15432 SNVs and indels from next-generation sequencing data. It makes full use of
15433 base-call qualities and other sources of errors inherent in
15434 sequencing (e.g. mapping or base/indel alignment uncertainty), which are
15435 usually ignored by other methods or only used for filtering.")
15436 (license license:expat)))
15437
15438 (define-public ivar
15439 (package
15440 (name "ivar")
15441 (version "1.3.1")
15442 (source (origin
15443 (method git-fetch)
15444 (uri (git-reference
15445 (url "https://github.com/andersen-lab/ivar")
15446 (commit (string-append "v" version))))
15447 (file-name (git-file-name name version))
15448 (sha256
15449 (base32
15450 "044xa0hm3b8fga64csrdx05ih8w7kwmvcdrdrhkg8j11ml4bi4xv"))))
15451 (build-system gnu-build-system)
15452 (arguments `(#:parallel-tests? #false)) ; not supported
15453 (inputs
15454 (list htslib zlib))
15455 (native-inputs
15456 (list autoconf automake))
15457 (home-page "https://andersen-lab.github.io/ivar/html/")
15458 (synopsis "Tools for amplicon-based sequencing")
15459 (description "iVar is a computational package that contains functions
15460 broadly useful for viral amplicon-based sequencing.")
15461 (license license:gpl3+)))
15462
15463 (define-public python-pyliftover
15464 (package
15465 (name "python-pyliftover")
15466 (version "0.4")
15467 ;; The version of pypi does not include test data.
15468 (source (origin
15469 (method git-fetch)
15470 (uri (git-reference
15471 (url "https://github.com/konstantint/pyliftover")
15472 (commit version)))
15473 (file-name (git-file-name name version))
15474 (sha256
15475 (base32
15476 "1j8jp9iynv2l3jv5pr0pn0p3azlama1bqg233piglzm6bqh3m2m3"))))
15477 (build-system python-build-system)
15478 (arguments `(#:tests? #false)) ; the tests access the web
15479 (native-inputs
15480 (list python-pytest))
15481 (home-page "https://github.com/konstantint/pyliftover")
15482 (synopsis "Python implementation of UCSC liftOver genome coordinate conversion")
15483 (description
15484 "PyLiftover is a library for quick and easy conversion of genomic (point)
15485 coordinates between different assemblies.")
15486 (license license:expat)))
15487
15488 (define-public python-cgatcore
15489 (package
15490 (name "python-cgatcore")
15491 (version "0.6.7")
15492 ;; The version of pypi does not include test data.
15493 (source (origin
15494 (method git-fetch)
15495 (uri (git-reference
15496 (url "https://github.com/cgat-developers/cgat-core")
15497 (commit (string-append "v" version))))
15498 (file-name (git-file-name name version))
15499 (sha256
15500 (base32
15501 "17vk88v1bx7x02ibzkc9i7ir4b5p1hcjr38jpsfzyzxr68352d5k"))))
15502 (build-system python-build-system)
15503 (arguments
15504 `(#:phases
15505 (modify-phases %standard-phases
15506 (add-after 'unpack 'fix-references
15507 (lambda _
15508 (substitute* "cgatcore/pipeline/execution.py"
15509 (("#!/bin/bash") (string-append "#!" (which "bash")))
15510 (("executable=\"/bin/bash\"")
15511 (string-append "executable=\"" (which "bash") "\""))
15512 (("\\\\time") (which "time")))))
15513 (delete 'check)
15514 (add-after 'install 'check
15515 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
15516 (when tests?
15517 (add-installed-pythonpath inputs outputs)
15518 ;; Requires network access
15519 (delete-file "tests/test_pipeline_execution.py")
15520 (invoke "python" "-m" "pytest" "-v")))))))
15521 (native-inputs
15522 `(("python-pytest" ,python-pytest)
15523 ("lsof" ,lsof)
15524 ("hostname" ,inetutils)
15525 ("openssl" ,openssl)))
15526 (inputs
15527 (list time))
15528 (propagated-inputs
15529 (list python-apsw
15530 python-gevent
15531 python-pandas
15532 python-paramiko
15533 python-pyyaml
15534 python-ruffus
15535 python-sqlalchemy))
15536 (home-page "https://github.com/cgat-developers/cgat-core")
15537 (synopsis "Computational genomics analysis toolkit")
15538 (description
15539 "CGAT-core is a set of libraries and helper functions used to enable
15540 researchers to design and build computational workflows for the analysis of
15541 large-scale data-analysis.")
15542 (license license:expat)))
15543
15544 (define-public perl-cworld-dekker
15545 (package
15546 (name "perl-cworld-dekker")
15547 (version "1.01")
15548 (source (origin
15549 (method git-fetch)
15550 (uri (git-reference
15551 (url "https://github.com/dekkerlab/cworld-dekker.git")
15552 (commit (string-append "v" version))))
15553 (file-name (git-file-name name version))
15554 (sha256
15555 (base32
15556 "1dvh23fx52m59y6304xi2j2pl2hiqadlqg8jyv2pm14j1hy71ych"))))
15557 (build-system perl-build-system)
15558 (arguments
15559 `(#:modules ((guix build perl-build-system)
15560 (guix build utils)
15561 (srfi srfi-26))
15562 #:phases
15563 (modify-phases %standard-phases
15564 (add-after 'unpack 'hardcode-references
15565 (lambda* (#:key inputs #:allow-other-keys)
15566 (let ((bedtools (assoc-ref inputs "bedtools"))
15567 (r (assoc-ref inputs "r-minimal")))
15568 (substitute* '("scripts/python/getEigenVectors.py"
15569 "scripts/python/matrix2EigenVectors.py")
15570 (("bedtools intersect")
15571 (string-append bedtools "/bin/bedtools intersect")))
15572 (substitute* "lib/cworld/dekker.pm"
15573 (("bedtools --version")
15574 (string-append bedtools "/bin/bedtools --version")))
15575 (substitute* '("scripts/perl/correlateMatrices.pl"
15576 "scripts/perl/matrix2scaling.pl"
15577 "scripts/perl/matrix2distance.pl"
15578 "scripts/perl/coverageCorrect.pl"
15579 "scripts/perl/matrix2anchorPlot.pl"
15580 "scripts/python/matrix2EigenVectors.py"
15581 "scripts/python/matrix2insulation-lite.py"
15582 "scripts/perl/matrix2compartment.pl"
15583 "scripts/perl/anchorPurge.pl"
15584 "scripts/perl/applyCorrection.pl"
15585 "scripts/perl/compareInsulation.pl"
15586 "scripts/perl/fillMissingData.pl"
15587 "scripts/perl/matrix2loess.pl"
15588 "scripts/python/getEigenVectors.py"
15589 "scripts/perl/aggregateBED.pl"
15590 "scripts/perl/collapseMatrix.pl"
15591 "scripts/perl/matrix2direction.pl"
15592 "scripts/perl/singletonRemoval.pl"
15593 "lib/cworld/dekker.pm"
15594 "scripts/perl/matrix2insulation.pl")
15595 (("(`|\")Rscript" _ pre)
15596 (string-append pre r "/bin/Rscript"))))))
15597 (add-after 'install 'install-scripts
15598 (lambda* (#:key outputs #:allow-other-keys)
15599 (let* ((out (assoc-ref outputs "out"))
15600 (share (string-append out "/share/cworld-dekker")))
15601 (mkdir-p share)
15602 (copy-recursively "scripts" share)
15603
15604 ;; Make all scripts executable and wrap them.
15605 (let ((r (find-files share "\\.R$"))
15606 (py (find-files share "\\.py$"))
15607 (pl (find-files share "\\.pl$"))
15608 (wrap (lambda* (script var #:optional (extra ""))
15609 (let ((path (string-append (getenv var)
15610 extra)))
15611 (wrap-program script
15612 `(,var ":" prefix (,path)))))))
15613 (for-each (cut chmod <> #o555) (append r py pl))
15614 (for-each (cut wrap <> "PERL5LIB"
15615 (string-append ":" out
15616 "/lib/perl5/site_perl"))
15617 pl)
15618 (for-each (cut wrap <> "GUIX_PYTHONPATH") py))))))))
15619 (inputs
15620 `(("libgd" ,gd)
15621 ("perl-gd" ,perl-gd)
15622 ("bedtools" ,bedtools)
15623 ("python" ,python-wrapper)
15624 ("python-scipy" ,python-scipy)
15625 ("python-numpy" ,python-numpy)
15626 ("python-matplotlib" ,python-matplotlib)
15627 ("python-h5py" ,python-h5py)
15628 ("python-scikit-learn" ,python-scikit-learn)
15629 ("r-minimal" ,r-minimal)))
15630 (native-inputs
15631 (list perl-module-build))
15632 (home-page "https://github.com/dekkerlab/cworld-dekker")
15633 (synopsis "Utility and analysis scripts for 3C, 4C, 5C, and Hi-C data")
15634 (description "This package is a collection of Perl, Python, and R
15635 scripts for manipulating 3C/4C/5C/Hi-C data.")
15636 (license license:asl2.0)))
15637
15638 (define-public ensembl-vep
15639 (let* ((api-version "103")
15640 (api-module
15641 (lambda (name hash)
15642 (origin (method git-fetch)
15643 (uri (git-reference
15644 (url (string-append "https://github.com/Ensembl/"
15645 name ".git"))
15646 (commit (string-append "release/" api-version))))
15647 (file-name (string-append name "-" api-version "-checkout"))
15648 (sha256 (base32 hash))))))
15649 (package
15650 (name "ensembl-vep")
15651 (version (string-append api-version ".1"))
15652 (source
15653 (origin
15654 (method git-fetch)
15655 (uri (git-reference
15656 (url "https://github.com/Ensembl/ensembl-vep.git")
15657 (commit (string-append "release/" version))))
15658 (file-name (git-file-name name version))
15659 (sha256
15660 (base32
15661 "1iq7p72cv9b38jz2v8a4slzy2n8y0md487943180ym9xc8qvw09c"))))
15662 (build-system gnu-build-system)
15663 (arguments
15664 `(#:modules ((guix build gnu-build-system)
15665 (guix build utils)
15666 (ice-9 match))
15667 #:phases
15668 (modify-phases %standard-phases
15669 (delete 'configure)
15670 (delete 'build)
15671 ;; Tests need to run after installation
15672 (delete 'check)
15673 (replace 'install
15674 (lambda* (#:key inputs outputs #:allow-other-keys)
15675 (let* ((modules '(("ensembl" "/")
15676 ("ensembl-variation" "/Variation")
15677 ("ensembl-funcgen" "/Funcgen")
15678 ("ensembl-io" "/")))
15679 (scripts '(("convert_cache.pl" "vep_convert_cache.pl")
15680 ("INSTALL.pl" "vep_install.pl")
15681 ("haplo" #f)
15682 ("variant_recoder" #f)
15683 ("filter_vep" #f)
15684 ("vep" #f)))
15685 (out (assoc-ref outputs "out"))
15686 (bin (string-append out "/bin"))
15687 (perl (string-append out "/lib/perl5/site_perl")))
15688 (for-each
15689 (match-lambda
15690 ((name path)
15691 (let ((dir (string-append perl "/Bio/EnsEMBL" path)))
15692 (mkdir-p dir)
15693 (copy-recursively
15694 (string-append (assoc-ref inputs (string-append "api-module-" name))
15695 "/modules/Bio/EnsEMBL" path)
15696 dir))))
15697 modules)
15698 (copy-recursively "modules/" perl)
15699 (mkdir-p bin)
15700 (for-each
15701 (match-lambda
15702 ((script new-name)
15703 (let ((location (string-append bin "/"
15704 (or new-name (basename script)))))
15705 (copy-file script location)
15706 (chmod location #o555)
15707 (wrap-program location
15708 `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")
15709 ,perl))))))
15710 scripts)
15711
15712 ;; Fix path to tools
15713 (with-directory-excursion (string-append perl "/Bio/EnsEMBL")
15714 (substitute* '("Funcgen/RunnableDB/ProbeMapping/PrePipelineChecks.pm"
15715 "VEP/BaseRunner.pm"
15716 "VEP/Utils.pm"
15717 "VEP/AnnotationSource/Cache/VariationTabix.pm"
15718 "VEP/AnnotationSource/Cache/BaseSerialized.pm"
15719 "Variation/Utils/BaseVepTabixPlugin.pm"
15720 "Variation/Utils/VEP.pm"
15721 "Variation/Pipeline/ReleaseDataDumps/PreRunChecks.pm")
15722 (("`which")
15723 (string-append "`"
15724 (assoc-ref inputs "which")
15725 "/bin/which")))))))
15726 (add-after 'install 'check
15727 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
15728 (when tests?
15729 (setenv "PERL5LIB"
15730 (string-append (getenv "PERL5LIB")
15731 ":"
15732 (assoc-ref outputs "out")
15733 "/lib/perl5/site_perl"))
15734 (copy-recursively (string-append (assoc-ref inputs "source") "/t")
15735 "/tmp/t")
15736 (for-each make-file-writable (find-files "/tmp/t"))
15737 ;; TODO: haplo needs Set/IntervalTree.pm
15738 (invoke "perl" "-e" (string-append "
15739 use Test::Harness; use Test::Exception;
15740 my $dirname = \"/tmp\";
15741 opendir TEST, \"$dirname\\/t\";
15742 my @test_files = map {\"$dirname\\/t\\/\".$_} grep {!/^\\./ && /\\.t$/} readdir TEST; closedir TEST;
15743 @test_files = grep {!/Haplo/} @test_files;
15744 runtests(@test_files);
15745 "))))))))
15746 (inputs
15747 (list bioperl-minimal
15748 perl-bio-db-hts
15749 perl-dbi
15750 perl-dbd-mysql
15751 perl-libwww
15752 perl-http-tiny
15753 perl-json
15754 which))
15755 (propagated-inputs
15756 (list kentutils))
15757 (native-inputs
15758 `(("unzip" ,unzip)
15759 ("perl" ,perl)
15760 ("api-module-ensembl"
15761 ,(api-module "ensembl"
15762 "0s59rj905g72hljzfpvnx5nxwz925b917y4jp912i23f5gwxh14v"))
15763 ("api-module-ensembl-variation"
15764 ,(api-module "ensembl-variation"
15765 "1dvwdzzfjhzymq02b6n4p6j3a9q4jgq0g89hs7hj1apd7zhirgkq"))
15766 ("api-module-ensembl-funcgen"
15767 ,(api-module "ensembl-funcgen"
15768 "1x23pv38dmv0w0gby6rv3wds50qghb4v3v1mf43vk55msfxzry8n"))
15769 ("api-module-ensembl-io"
15770 ,(api-module "ensembl-io"
15771 "14adb2x934lzsq20035mazdkhrkcw0qzb0xhz6zps9vk4wixwaix"))
15772 ("perl-test-harness" ,perl-test-harness)
15773 ("perl-test-exception" ,perl-test-exception)))
15774 (home-page "http://www.ensembl.org/vep")
15775 (synopsis "Predict functional effects of genomic variants")
15776 (description
15777 "This package provides a Variant Effect Predictor, which predicts
15778 the functional effects of genomic variants. It also provides
15779 Haplosaurus, which uses phased genotype data to predict
15780 whole-transcript haplotype sequences, and Variant Recoder, which
15781 translates between different variant encodings.")
15782 (license license:asl2.0))))
15783
15784 (define-public r-signac
15785 (let ((commit "458e647b503c3472b0b98c0aeca934f452e039ee")
15786 (revision "2"))
15787 (package
15788 (name "r-signac")
15789 (version (git-version "1.6.0" revision commit))
15790 (source
15791 (origin
15792 (method git-fetch)
15793 (uri (git-reference
15794 (url "https://github.com/timoast/signac/")
15795 (commit commit)))
15796 (file-name (git-file-name name version))
15797 (sha256
15798 (base32 "1hgwpgighkvfkai80n4d2252s4sdpa4faag4ncdiylicl5wa7lbj"))))
15799 (properties `((upstream-name . "Signac")))
15800 (build-system r-build-system)
15801 (inputs (list zlib))
15802 (propagated-inputs
15803 (list r-biocgenerics
15804 r-data-table
15805 r-dplyr
15806 r-fastmatch
15807 r-future
15808 r-future-apply
15809 r-genomeinfodb
15810 r-genomicranges
15811 r-ggforce
15812 r-ggplot2
15813 r-ggrepel
15814 r-ggseqlogo
15815 r-iranges
15816 r-irlba
15817 r-lsa
15818 r-matrix
15819 r-patchwork
15820 r-pbapply
15821 r-qlcmatrix
15822 r-rcpp
15823 r-rcpproll
15824 r-rsamtools
15825 r-s4vectors
15826 r-scales
15827 r-seurat
15828 r-seuratobject
15829 r-stringi
15830 r-tidyr
15831 r-tidyselect))
15832 (home-page "https://github.com/timoast/signac/")
15833 (synopsis "Analysis of single-cell chromatin data")
15834 (description
15835 "This package provides a framework for the analysis and exploration of
15836 single-cell chromatin data. The Signac package contains functions for
15837 quantifying single-cell chromatin data, computing per-cell quality control
15838 metrics, dimension reduction and normalization, visualization, and DNA
15839 sequence motif analysis.")
15840 (license license:expat))))
15841
15842 (define-public tombo
15843 (package
15844 (name "tombo")
15845 (version "1.5.1")
15846 (source
15847 (origin
15848 (method url-fetch)
15849 (uri (pypi-uri "ont-tombo" version))
15850 (sha256
15851 (base32
15852 "1023hadgcsgi53kz53ql45207hfizf9sw57z0qij3ay1bx68zbpm"))))
15853 (build-system python-build-system)
15854 (arguments
15855 '(#:tests? #false)) ;no tests
15856 (native-inputs
15857 (list python-cython python-nose2))
15858 ;; The package mainly consists of a command-line tool, but also has a
15859 ;; Python-API. Thus these must be propagated.
15860 (propagated-inputs
15861 (list python-future
15862 python-h5py
15863 python-mappy
15864 python-numpy
15865 python-scipy
15866 python-tqdm
15867 python-rpy2))
15868 (home-page "https://github.com/nanoporetech/tombo")
15869 (synopsis "Analysis of raw nanopore sequencing data")
15870 (description "Tombo is a suite of tools primarily for the identification of
15871 modified nucleotides from nanopore sequencing data. Tombo also provides tools
15872 for the analysis and visualization of raw nanopore signal.")
15873 ;; Some parts may be BSD-3-licensed.
15874 (license license:mpl2.0)))
15875
15876 (define-public python-pyvcf
15877 (let ((commit "476169cd457ba0caa6b998b301a4d91e975251d9")
15878 (revision "0"))
15879 (package
15880 (name "python-pyvcf")
15881 (version (git-version "0.6.8" revision commit))
15882 ;; Use git, because the PyPI tarballs lack test data.
15883 (source
15884 (origin
15885 (method git-fetch)
15886 (uri (git-reference
15887 (url "https://github.com/jamescasbon/PyVCF.git")
15888 ;; Latest release is not tagged.
15889 (commit commit)))
15890 (file-name (git-file-name name version))
15891 (sha256
15892 (base32
15893 "0qf9lwj7r2hjjp4bd4vc7nayrhblfm4qcqs4dbd43a6p4bj2jv5p"))))
15894 (build-system python-build-system)
15895 (arguments
15896 `(#:phases
15897 (modify-phases %standard-phases
15898 (add-after 'unpack 'patch-sample-script
15899 (lambda _
15900 ;; Add Python 3 compatibility to this sample script.
15901 (substitute* "scripts/vcf_sample_filter.py"
15902 (("print (.*)\n" _ arg)
15903 (string-append "print(" arg ")\n")))))
15904 (add-after 'install 'remove-installed-tests
15905 ;; Do not install test files.
15906 (lambda* (#:key inputs outputs #:allow-other-keys)
15907 (delete-file-recursively (string-append
15908 (site-packages inputs outputs)
15909 "/vcf/test")))))))
15910 (native-inputs
15911 ;; Older setuptools is needed for use_2to3.
15912 (list python-cython python-setuptools-for-tensorflow))
15913 (propagated-inputs
15914 (list python-pysam python-rpy2))
15915 (home-page "https://github.com/jamescasbon/PyVCF")
15916 (synopsis "Variant Call Format parser for Python")
15917 (description "This package provides a @acronym{VCF,Variant Call Format}
15918 parser for Python.")
15919 (license license:expat))))
15920
15921 (define-public nanosv
15922 (package
15923 (name "nanosv")
15924 (version "1.2.4")
15925 (source (origin
15926 (method url-fetch)
15927 (uri (pypi-uri "NanoSV" version))
15928 (sha256
15929 (base32
15930 "1wl2daj0bwrl8fx5xi8j8hfs3mp3vg3qycy66538n032v1qkc6xg"))))
15931 (build-system python-build-system)
15932 (inputs
15933 (list python-configparser python-pysam python-pyvcf))
15934 (home-page "https://github.com/mroosmalen/nanosv")
15935 (synopsis "Structural variation detection tool for Oxford Nanopore data")
15936 (description "NanoSV is a software package that can be used to identify
15937 structural genomic variations in long-read sequencing data, such as data
15938 produced by Oxford Nanopore Technologies’ MinION, GridION or PromethION
15939 instruments, or Pacific Biosciences RSII or Sequel sequencers.")
15940 (license license:expat)))
15941
15942 (define-public python-strawc
15943 (package
15944 (name "python-strawc")
15945 (version "0.0.2.1")
15946 (source
15947 (origin
15948 (method url-fetch)
15949 (uri (pypi-uri "strawC" version))
15950 (sha256
15951 (base32
15952 "1z1gy8n56lhriy6hdkh9r82ndikndipq2cy2wh8q185qig4rimr6"))))
15953 (build-system python-build-system)
15954 (inputs
15955 (list curl zlib))
15956 (propagated-inputs
15957 (list pybind11))
15958 (home-page "https://github.com/aidenlab/straw")
15959 (synopsis "Stream data from .hic files")
15960 (description "Straw is library which allows rapid streaming of contact
15961 data from @file{.hic} files. This package provides Python bindings.")
15962 (license license:expat)))
15963
15964 (define-public python-pybbi
15965 (package
15966 (name "python-pybbi")
15967 (version "0.3.0")
15968 (source
15969 (origin
15970 (method url-fetch)
15971 (uri (pypi-uri "pybbi" version))
15972 (sha256
15973 (base32
15974 "1hvy2f28i2b41l1pq15vciqbj538n0lichp8yr6413jmgg06xdsk"))))
15975 (build-system python-build-system)
15976 (arguments
15977 `(#:tests? #false ; tests require network access
15978 #:phases
15979 (modify-phases %standard-phases
15980 (add-after 'unpack 'set-cc
15981 (lambda _ (setenv "CC" "gcc")))
15982 (replace 'check
15983 (lambda* (#:key inputs outputs tests? #:allow-other-keys)
15984 (when tests?
15985 (add-installed-pythonpath inputs outputs)
15986 (copy-recursively "tests" "/tmp/tests")
15987 (with-directory-excursion "/tmp/tests"
15988 (invoke "python" "-m" "pytest" "-v"))))))))
15989 (native-inputs
15990 (list pkg-config python-pkgconfig python-pytest))
15991 (inputs
15992 (list libpng openssl zlib))
15993 (propagated-inputs
15994 (list python-cython python-numpy python-pandas python-six))
15995 (home-page "https://github.com/nvictus/pybbi")
15996 (synopsis "Python bindings to UCSC Big Binary file library")
15997 (description
15998 "This package provides Python bindings to the UCSC Big
15999 Binary (bigWig/bigBed) file library. This provides read-level access to local
16000 and remote bigWig and bigBed files but no write capabilitites. The main
16001 feature is fast retrieval of range queries into numpy arrays.")
16002 (license license:expat)))
16003
16004 (define-public python-dna-features-viewer
16005 (package
16006 (name "python-dna-features-viewer")
16007 (version "3.0.3")
16008 (source
16009 (origin
16010 (method url-fetch)
16011 (uri (pypi-uri "dna_features_viewer" version))
16012 (sha256
16013 (base32
16014 "0vci6kg2id6r6rh3cifq7ccnh7j0mb8iqg3hji6rva0ayrdqzafc"))))
16015 (build-system python-build-system)
16016 (arguments '(#:tests? #false)) ; there are none
16017 (propagated-inputs
16018 (list python-biopython python-matplotlib))
16019 (home-page
16020 "https://github.com/Edinburgh-Genome-Foundry/DnaFeaturesViewer")
16021 (synopsis "Plot features from DNA sequences")
16022 (description
16023 "DNA Features Viewer is a Python library to visualize DNA features,
16024 e.g. from GenBank or Gff files, or Biopython SeqRecords.")
16025 (license license:expat)))
16026
16027 (define-public python-coolbox
16028 (package
16029 (name "python-coolbox")
16030 (version "0.3.8")
16031 (source
16032 (origin
16033 (method url-fetch)
16034 (uri (pypi-uri "coolbox" version))
16035 (sha256
16036 (base32
16037 "0gqp76285w9klswr47y6kxbzwhv033b26jfa179kccfhiaq5p2xa"))))
16038 (build-system python-build-system)
16039 (arguments '(#:tests? #false)) ; there are none
16040 (inputs
16041 (list pybind11))
16042 (propagated-inputs
16043 (list python-cooler
16044 python-dna-features-viewer
16045 python-fire
16046 python-h5py
16047 python-intervaltree
16048 python-ipywidgets
16049 jupyter
16050 python-matplotlib
16051 python-nbformat
16052 python-numpy
16053 python-numpydoc
16054 python-pandas
16055 python-pybbi
16056 python-pytest
16057 python-scipy
16058 python-statsmodels
16059 python-strawc
16060 python-svgutils
16061 python-termcolor
16062 python-voila))
16063 (home-page "https://github.com/GangCaoLab/CoolBox")
16064 (synopsis "Genomic data visualization toolkit")
16065 (description
16066 "CoolBox is a toolkit for visual analysis of genomics data. It aims to
16067 be highly compatible with the Python ecosystem, easy to use and highly
16068 customizable with a well-designed user interface. It can be used in various
16069 visualization situations, for example, to produce high-quality genome track
16070 plots or fetch common used genomic data files with a Python script or command
16071 line, interactively explore genomic data within Jupyter environment or web
16072 browser.")
16073 (license license:gpl3+)))
16074
16075 (define-public python-pyspoa
16076 (package
16077 (name "python-pyspoa")
16078 (version "0.0.5")
16079 (source
16080 (origin
16081 (method git-fetch)
16082 (uri (git-reference
16083 (url "https://github.com/nanoporetech/pyspoa")
16084 (commit (string-append "v" version))
16085 (recursive? #true)))
16086 (file-name (git-file-name name version))
16087 (sha256
16088 (base32
16089 "1lgf2shzhxkcsircd6vy46h27pjljd5q95fyz1cm3lkk702qbnzx"))))
16090 (build-system python-build-system)
16091 (arguments
16092 `(#:phases
16093 (modify-phases %standard-phases
16094 (add-before 'build 'build-libspoa
16095 (lambda _
16096 (mkdir-p "src/build")
16097 (with-directory-excursion "src/build"
16098 (invoke "cmake"
16099 "-Dspoa_optimize_for_portability=ON"
16100 "-DCMAKE_BUILD_TYPE=Release"
16101 "-DCMAKE_CXX_FLAGS=\"-I ../vendor/cereal/include/\" -fPIC"
16102 "..")
16103 (invoke "make"))))
16104 (replace 'check
16105 (lambda* (#:key inputs outputs tests? #:allow-other-keys)
16106 (when tests?
16107 (add-installed-pythonpath inputs outputs)
16108 (invoke "python" "tests/test_pyspoa.py")))))))
16109 (propagated-inputs
16110 (list pybind11))
16111 (native-inputs
16112 `(("cmake" ,cmake-minimal)))
16113 (home-page "https://github.com/nanoporetech/pyspoa")
16114 (synopsis "Python bindings for the SIMD partial order alignment library")
16115 (description
16116 "This package provides Python bindings for spoa, a C++ implementation of
16117 the @dfn{partial order alignment} (POA) algorithm (as described in
16118 10.1093/bioinformatics/18.3.452) which is used to generate consensus
16119 sequences")
16120 (license license:expat)))
16121
16122 (define-public python-bwapy
16123 (package
16124 (name "python-bwapy")
16125 (version "0.1.4")
16126 (source
16127 (origin
16128 (method url-fetch)
16129 (uri (pypi-uri "bwapy" version))
16130 (sha256
16131 (base32 "090qwx3vl729zn3a7sksbviyg04kc71gpbm3nd8dalqp673x1npw"))
16132 (modules '((guix build utils)))
16133 (snippet
16134 '(for-each delete-file (find-files "." "\\.o$")))))
16135 (build-system python-build-system)
16136 (arguments
16137 `(#:phases
16138 (modify-phases %standard-phases
16139 (add-after 'unpack 'relax-requirements
16140 (lambda _
16141 (substitute* "setup.py"
16142 (("wheel>=0.34") "wheel>=0.30"))))
16143 ;; TODO: it's possible that the import error points to a real
16144 ;; problem with the C sources.
16145 (delete 'sanity-check))))
16146 (propagated-inputs
16147 (list python-cffi python-setuptools python-wheel))
16148 (inputs
16149 (list zlib))
16150 (home-page "https://github.com/ACEnglish/bwapy")
16151 (synopsis "Python bindings to bwa alinger")
16152 (description "This package provides Python bindings to the bwa mem
16153 aligner.")
16154 ;; These Python bindings are licensed under Mozilla Public License 2.0,
16155 ;; bwa itself is licenced under GNU General Public License v3.0.
16156 (license license:mpl2.0)))
16157
16158 (define-public scvelo
16159 (package
16160 (name "scvelo")
16161 (version "0.2.4")
16162 (source
16163 (origin
16164 (method url-fetch)
16165 (uri (pypi-uri "scvelo" version))
16166 (sha256
16167 (base32 "0h5ha1459ljs0qgpnlfsw592i8dxqn6p9bl08l1ikpwk36baxb7z"))))
16168 (build-system python-build-system)
16169 (arguments
16170 `(#:phases
16171 (modify-phases %standard-phases
16172 ;; Numba needs a writable dir to cache functions.
16173 (add-before 'check 'set-numba-cache-dir
16174 (lambda _
16175 (setenv "NUMBA_CACHE_DIR" "/tmp")))
16176 (replace 'check
16177 (lambda* (#:key outputs tests? #:allow-other-keys)
16178 (when tests?
16179 ;; The discovered test file names must match the names of the
16180 ;; compiled files, so we cannot run the tests from
16181 ;; /tmp/guix-build-*.
16182 (with-directory-excursion
16183 (string-append (assoc-ref outputs "out")
16184 "/lib/python3.9/site-packages/scvelo/core/tests/")
16185 (invoke "pytest" "-v"))))))))
16186 (propagated-inputs
16187 (list python-anndata
16188 python-hnswlib
16189 python-isort
16190 python-igraph
16191 python-loompy
16192 python-louvain
16193 python-matplotlib
16194 python-numba
16195 python-numpy
16196 python-pandas
16197 python-scanpy
16198 python-scikit-learn
16199 python-scipy
16200 python-umap-learn
16201 pybind11))
16202 (native-inputs
16203 (list python-black
16204 python-flake8
16205 python-hypothesis
16206 python-pre-commit
16207 python-pytest
16208 python-setuptools-scm
16209 python-wheel))
16210 (home-page "https://scvelo.org")
16211 (synopsis "RNA velocity generalized through dynamical modeling")
16212 (description "ScVelo is a scalable toolkit for RNA velocity analysis in
16213 single cells. RNA velocity enables the recovery of directed dynamic
16214 information by leveraging splicing kinetics. scVelo generalizes the concept of
16215 RNA velocity by relaxing previously made assumptions with a stochastic and a
16216 dynamical model that solves the full transcriptional dynamics. It thereby
16217 adapts RNA velocity to widely varying specifications such as non-stationary
16218 populations.")
16219 (license license:bsd-3)))
16220
16221 (define-public scregseg
16222 (package
16223 (name "scregseg")
16224 (version "0.1.1")
16225 (source (origin
16226 (method git-fetch)
16227 (uri (git-reference
16228 (url "https://github.com/BIMSBbioinfo/scregseg")
16229 (commit (string-append "v" version))))
16230 (file-name (git-file-name name version))
16231 (sha256
16232 (base32
16233 "1k8hllr5if6k2mm2zj391fv40sfc008cjm04l9vgfsdppb80i112"))
16234 (snippet
16235 #~(begin
16236 (use-modules ((guix build utils)))
16237 (delete-file "src/scregseg/_utils.c")))))
16238 (build-system python-build-system)
16239 (arguments
16240 `(#:tests? #false ; tests require network access
16241 #:phases
16242 (modify-phases %standard-phases
16243 (add-after 'unpack 'do-not-fail-to-find-sklearn
16244 (lambda _
16245 ;; XXX: I have no idea why it cannot seem to find sklearn.
16246 (substitute* "setup.py"
16247 (("'sklearn',") "")))))))
16248 (native-inputs
16249 (list python-cython))
16250 (propagated-inputs
16251 (list python-scikit-learn
16252 python-scipy
16253 python-numpy
16254 python-hmmlearn
16255 python-pandas
16256 python-numba
16257 python-anndata
16258 python-scanpy
16259 python-pybedtools
16260 python-pysam
16261 python-matplotlib
16262 python-seaborn
16263 python-coolbox))
16264 (home-page "https://github.com/BIMSBbioinfo/scregseg")
16265 (synopsis "Single-cell regulatory landscape segmentation")
16266 (description "Scregseg (Single-Cell REGulatory landscape SEGmentation) is a
16267 tool that facilitates the analysis of single cell ATAC-seq data by an
16268 HMM-based segmentation algorithm. Scregseg uses an HMM with
16269 Dirichlet-Multinomial emission probabilities to segment the genome either
16270 according to distinct relative cross-cell accessibility profiles or (after
16271 collapsing the single-cell tracks to pseudo-bulk tracks) to capture distinct
16272 cross-cluster accessibility profiles.")
16273 (license license:gpl3+)))
16274
16275 (define-public megadepth
16276 (package
16277 (name "megadepth")
16278 (version "1.1.1")
16279 (source (origin
16280 (method git-fetch)
16281 (uri (git-reference
16282 (url "https://github.com/ChristopherWilks/megadepth")
16283 (commit version)))
16284 (file-name (git-file-name name version))
16285 (sha256
16286 (base32
16287 "0hj69d2dgmk2zwgazik7xzc04fxxlk93p888kpgc52fmhd95qph7"))))
16288 (build-system cmake-build-system)
16289 (arguments
16290 `(#:tests? #false ; some tests seem to require connection to
16291 ; www.ebi.ac.uk; this may be caused by htslib.
16292 #:phases
16293 (modify-phases %standard-phases
16294 (add-after 'unpack 'prepare-CMakeLists.txt
16295 (lambda _
16296 (rename-file "CMakeLists.txt.ci" "CMakeLists.txt")
16297 (substitute* "CMakeLists.txt"
16298 (("`cat ../VERSION`") ,version)
16299 (("target_link_libraries\\(megadepth_static") "#")
16300 (("target_link_libraries\\(megadepth_statlib") "#")
16301 (("add_executable\\(megadepth_static") "#")
16302 (("add_executable\\(megadepth_statlib") "#"))
16303
16304 (substitute* "tests/test.sh"
16305 ;; Disable remote test
16306 (("./megadepth http://stingray.cs.jhu.edu/data/temp/test.bam") "#")
16307 ;; Prior to installation the binary's name differs from what
16308 ;; the test script assumes.
16309 (("./megadepth") "../build/megadepth_dynamic"))))
16310 (replace 'check
16311 (lambda* (#:key tests? #:allow-other-keys)
16312 (when tests?
16313 (with-directory-excursion "../source"
16314 (invoke "bash" "tests/test.sh" "use-local-test-data")))))
16315 (replace 'install
16316 (lambda* (#:key outputs #:allow-other-keys)
16317 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
16318 (mkdir-p bin)
16319 (copy-file "megadepth_dynamic"
16320 (string-append bin "/megadepth"))))))))
16321 (native-inputs
16322 (list diffutils perl grep))
16323 (inputs
16324 (list curl htslib libdeflate libbigwig zlib))
16325 (home-page "https://github.com/ChristopherWilks/megadepth")
16326 (synopsis "BigWig and BAM/CRAM related utilities")
16327 (description "Megadepth is an efficient tool for extracting coverage
16328 related information from RNA and DNA-seq BAM and BigWig files. It supports
16329 reading whole-genome coverage from BAM files and writing either indexed TSV or
16330 BigWig files, as well as efficient region coverage summary over intervals from
16331 both types of files.")
16332 (license license:expat)))
16333
16334 (define-public mudskipper
16335 (package
16336 (name "mudskipper")
16337 (version "0.1.0")
16338 (source (origin
16339 (method url-fetch)
16340 (uri (crate-uri "mudskipper" version))
16341 (file-name (string-append name "-" version ".tar.gz"))
16342 (sha256
16343 (base32
16344 "1y7fnlz6irmxdmv6bxzm95w4ws4vzldlrh8npvgxmdnrz9pgb1dv"))))
16345 (build-system cargo-build-system)
16346 (arguments
16347 `(#:tests? #false ;fail because the "mudskipper" crate cannot be found
16348 #:cargo-inputs
16349 (("rust-bio" ,rust-bio-0.39)
16350 ("rust-bio-types" ,rust-bio-types-0.12)
16351 ("rust-clap" ,rust-clap-2)
16352 ("rust-coitrees" ,rust-coitrees-0.2)
16353 ("rust-env-logger" ,rust-env-logger-0.9)
16354 ("rust-fnv" ,rust-fnv-1)
16355 ("rust-indicatif" ,rust-indicatif-0.16)
16356 ("rust-libradicl" ,rust-libradicl-0.4)
16357 ("rust-linecount" ,rust-linecount-0.1)
16358 ("rust-log" ,rust-log-0.4)
16359 ("rust-num-cpus" ,rust-num-cpus-1)
16360 ("rust-rust-htslib" ,rust-rust-htslib-0.38))))
16361 (native-inputs
16362 (list cmake pkg-config))
16363 (inputs
16364 (list zlib xz))
16365 (home-page "https://github.com/OceanGenomics/mudskipper")
16366 (synopsis "Convert genomic alignments to transcriptomic BAM/RAD files.")
16367 (description "Mudskipper is a tool for projecting genomic alignments to
16368 transcriptomic coordinates.")
16369 (license license:bsd-3)))
16370
16371 (define-public r-ascat
16372 (package
16373 (name "r-ascat")
16374 (version "2.5.2")
16375 (source (origin
16376 (method git-fetch)
16377 (uri (git-reference
16378 (url "https://github.com/Crick-CancerGenomics/ascat.git")
16379 (commit (string-append "v" version))))
16380 (file-name (git-file-name name version))
16381 (sha256
16382 (base32
16383 "0cc0y3as6cb64iwnx0pgbajiig7m4z723mns9d5i4j09ccid3ccm"))))
16384 (build-system r-build-system)
16385 (arguments
16386 `(#:phases
16387 (modify-phases %standard-phases
16388 (add-after 'unpack 'move-to-ascat-dir
16389 (lambda _
16390 (chdir "ASCAT"))))))
16391 (propagated-inputs
16392 (list r-rcolorbrewer))
16393 (home-page "https://github.com/VanLoo-lab/ascat/")
16394 (synopsis "Allele-Specific Copy Number Analysis of Tumors in R")
16395 (description "This package provides the @acronym{ASCAT,Allele-Specific Copy
16396 Number Analysis of Tumors} R package that can be used to infer tumour purity,
16397 ploidy and allele-specific copy number profiles.")
16398 (license license:gpl3)))
16399
16400 (define-public r-battenberg
16401 (package
16402 (name "r-battenberg")
16403 (version "2.2.9")
16404 (source (origin
16405 (method git-fetch)
16406 (uri (git-reference
16407 (url "https://github.com/Wedge-lab/battenberg.git")
16408 (commit (string-append "v" version))))
16409 (file-name (git-file-name name version))
16410 (sha256
16411 (base32
16412 "0nmcq4c7y5g8h8lxsq9vadz9bj4qgqn118alip520ny6czaxki4h"))))
16413 (build-system r-build-system)
16414 (propagated-inputs
16415 (list r-devtools
16416 r-readr
16417 r-doparallel
16418 r-ggplot2
16419 r-rcolorbrewer
16420 r-gridextra
16421 r-gtools
16422 r-ascat))
16423 (home-page "https://github.com/Wedge-lab/battenberg")
16424 (synopsis "Subclonal copy number estimation in R")
16425 (description "This package contains the Battenberg R package for subclonal
16426 copy number estimation, as described by
16427 @url{doi:10.1016/j.cell.2012.04.023,Nik-Zainal et al.}")
16428 (license license:gpl3)))
16429
16430 (define-public r-catch
16431 (let ((commit "196ddd5a51b1a5f5daa01de53fdaad9b7505e084")
16432 (revision "1"))
16433 (package
16434 (name "r-catch")
16435 (version (git-version "1.0" revision commit))
16436 (source (origin
16437 (method git-fetch)
16438 (uri (git-reference
16439 (url "https://github.com/zhanyinx/CaTCH")
16440 (commit commit)))
16441 (file-name (git-file-name name version))
16442 (sha256
16443 (base32
16444 "11c7f1fc8f57wnwk1hrgr5y814m80zj8gkz5021vxyxy2v02cqgd"))))
16445 (build-system r-build-system)
16446 (arguments
16447 `(#:phases
16448 (modify-phases %standard-phases
16449 (add-after 'unpack 'chdir
16450 (lambda _ (chdir "CaTCH"))))))
16451 (home-page "https://github.com/zhanyinx/CaTCH_R")
16452 (synopsis "Call a hierarchy of domains based on Hi-C data")
16453 (description "This package allows building the hierarchy of domains
16454 starting from Hi-C data. Each hierarchical level is identified by a minimum
16455 value of physical insulation between neighboring domains.")
16456 (license license:gpl2+))))
16457
16458 (define-public r-spectre
16459 (let ((commit "f6648ab3eb9499300d86502b5d60ec370ae9b61a")
16460 (revision "1"))
16461 (package
16462 (name "r-spectre")
16463 (version (git-version "0.5.5" revision commit))
16464 (source
16465 (origin
16466 (method git-fetch)
16467 (uri (git-reference
16468 (url "https://github.com/ImmuneDynamics/Spectre")
16469 (commit commit)))
16470 (file-name (git-file-name name version))
16471 (sha256
16472 (base32
16473 "0g38grrhbqqa4bmcilvdyawbkcnax6k4vffx2giywp18mbirmj0x"))))
16474 (properties `((upstream-name . "Spectre")))
16475 (build-system r-build-system)
16476 (propagated-inputs
16477 (list r-biobase
16478 r-biocmanager
16479 r-caret
16480 r-class
16481 r-colorramps
16482 r-data-table
16483 r-devtools
16484 r-dplyr
16485 r-exactextractr
16486 r-factoextra
16487 r-flowcore
16488 r-flowsom
16489 r-flowviz
16490 r-fnn
16491 r-ggplot2
16492 r-ggpointdensity
16493 r-ggpubr
16494 r-ggraph
16495 r-ggthemes
16496 r-gridextra
16497 r-gridextra
16498 r-gtools
16499 r-hdf5array
16500 r-irlba
16501 r-pheatmap
16502 r-plyr
16503 r-qs
16504 r-raster
16505 r-rcolorbrewer
16506 r-rgeos
16507 r-rhdf5
16508 r-rstudioapi
16509 r-rsvd
16510 r-rtsne
16511 r-s2
16512 r-scales
16513 r-sf
16514 r-sp
16515 r-stars
16516 r-stringr
16517 r-tidygraph
16518 r-tidyr
16519 r-tidyr
16520 r-tiff
16521 r-umap))
16522 (home-page "https://github.com/ImmuneDynamics/Spectre")
16523 (synopsis "High-dimensional cytometry and imaging analysis")
16524 (description
16525 "This package provides a computational toolkit in R for the
16526 integration, exploration, and analysis of high-dimensional single-cell
16527 cytometry and imaging data.")
16528 (license license:expat))))
16529
16530 (define-public r-compgenomrdata
16531 (let ((commit "24484cb77631e1123ead6c329b9d62c160e600c6")
16532 (revision "1"))
16533 (package
16534 (name "r-compgenomrdata")
16535 (version (git-version "0.1.0" revision commit))
16536 (source (origin
16537 (method git-fetch)
16538 (uri (git-reference
16539 (url "https://github.com/compgenomr/compGenomRData")
16540 (commit commit)))
16541 (file-name (git-file-name name version))
16542 (sha256
16543 (base32
16544 "06gdvz4m4qlb1ylv10qfx09zv4c95cm7nps4y2s67m946kv8czv8"))))
16545 (properties `((upstream-name . "compGenomRData")))
16546 (build-system r-build-system)
16547 (home-page "https://github.com/compgenomr/compGenomRData")
16548 (synopsis "Data for Computational Genomics with R book")
16549 (description "This package provides data for the book \"Computational
16550 Genomics with R\".")
16551 (license license:gpl3))))
16552
16553 (define-public r-cytonorm
16554 (let ((commit "e4b9d343ee65db3c422800f1db3e77c25abde987")
16555 (revision "1"))
16556 (package
16557 (name "r-cytonorm")
16558 (version (git-version "0.0.7" revision commit))
16559 (source
16560 (origin
16561 (method git-fetch)
16562 (uri (git-reference
16563 (url "https://github.com/saeyslab/CytoNorm")
16564 (commit commit)))
16565 (file-name (git-file-name name version))
16566 (sha256
16567 (base32
16568 "0h2rdy15i4zymd4dv60n5w0frbsdbmzpv99dgm0l2dn041qv7fah"))))
16569 (properties `((upstream-name . "CytoNorm")))
16570 (build-system r-build-system)
16571 (propagated-inputs
16572 (list r-cytoml
16573 r-dplyr
16574 r-emdist
16575 r-flowcore
16576 r-flowsom
16577 r-flowworkspace
16578 r-ggplot2
16579 r-gridextra
16580 r-pheatmap
16581 r-stringr))
16582 (home-page "https://github.com/saeyslab/CytoNorm")
16583 (synopsis "Normalize cytometry data measured across multiple batches")
16584 (description
16585 "This package can be used to normalize cytometry samples when a control
16586 sample is taken along in each of the batches. This is done by first
16587 identifying multiple clusters/cell types, learning the batch effects from the
16588 control samples and applying quantile normalization on all markers of
16589 interest.")
16590 (license license:gpl2+))))
16591
16592 (define-public r-kbet
16593 (let ((commit "f35171dfb04c7951b8a09ac778faf7424c4b6bc0")
16594 (revision "1"))
16595 (package
16596 (name "r-kbet")
16597 (version (git-version "0.99.6" revision commit))
16598 (source (origin
16599 (method git-fetch)
16600 (uri (git-reference
16601 (url "https://github.com/theislab/kBET")
16602 (commit commit)))
16603 (file-name (git-file-name name version))
16604 (sha256
16605 (base32
16606 "1r91prl2kki3zk694vhlmxdlqh0ixlhs8jfcqw6wc7cdsa0nv67k"))))
16607 (properties `((upstream-name . "kBET")))
16608 (build-system r-build-system)
16609 (propagated-inputs (list r-cluster r-fnn r-ggplot2 r-mass r-rcolorbrewer))
16610 (native-inputs (list r-knitr))
16611 (home-page "https://github.com/theislab/kBET")
16612 (synopsis "k-nearest neighbour batch effect test")
16613 (description
16614 "This tool detects batch effects in high-dimensional data based on chi^2-test.")
16615 ;; Any version of the GPL
16616 (license license:gpl3+))))
16617
16618 (define-public ccwl
16619 (package
16620 (name "ccwl")
16621 (version "0.2.0")
16622 (source
16623 (origin
16624 (method url-fetch)
16625 (uri (string-append "https://ccwl.systemreboot.net/releases/ccwl-"
16626 version ".tar.lz"))
16627 (sha256
16628 (base32
16629 "1ar8rfz3zrksgygrv67zv77y8gfvvz54zcs546jn6j28y20basla"))))
16630 (build-system gnu-build-system)
16631 (arguments
16632 `(#:make-flags '("GUILE_AUTO_COMPILE=0") ; to prevent guild warnings
16633 #:modules (((guix build guile-build-system)
16634 #:select (target-guile-effective-version))
16635 ,@%gnu-build-system-modules)
16636 #:imported-modules ((guix build guile-build-system)
16637 ,@%gnu-build-system-modules)
16638 #:phases
16639 (modify-phases %standard-phases
16640 (add-after 'install 'wrap
16641 (lambda* (#:key inputs outputs #:allow-other-keys)
16642 (let ((out (assoc-ref outputs "out"))
16643 (effective-version (target-guile-effective-version)))
16644 (wrap-program (string-append out "/bin/ccwl")
16645 `("GUILE_LOAD_PATH" prefix
16646 (,(string-append out "/share/guile/site/" effective-version)
16647 ,(getenv "GUILE_LOAD_PATH")))
16648 `("GUILE_LOAD_COMPILED_PATH" prefix
16649 (,(string-append out "/lib/guile/" effective-version "/site-ccache")
16650 ,(getenv "GUILE_LOAD_COMPILED_PATH"))))))))))
16651 (inputs
16652 `(("bash" ,bash-minimal)
16653 ("guile" ,guile-3.0)
16654 ("guile-libyaml" ,guile-libyaml)))
16655 (native-inputs
16656 (list pkg-config
16657 lzip
16658 ;; To build documentation
16659 cwltool
16660 graphviz
16661 skribilo))
16662 (home-page "https://ccwl.systemreboot.net")
16663 (synopsis "Concise common workflow language")
16664 (description "The @acronym{ccwl, Concise Common Workflow Language} is a
16665 concise syntax to express CWL workflows. ccwl is a compiler to generate CWL
16666 workflows from concise descriptions in ccwl. It is implemented as an
16667 @acronym{EDSL, Embedded Domain Specific Language} in the Scheme programming
16668 language.")
16669 (license license:gpl3+)))
16670
16671 (define-public hh-suite
16672 (package
16673 (name "hh-suite")
16674 (version "3.3.0")
16675 (source
16676 (origin
16677 (method git-fetch)
16678 (uri (git-reference
16679 (url "https://github.com/soedinglab/hh-suite")
16680 (commit (string-append "v" version))))
16681 (file-name (git-file-name name version))
16682 (sha256
16683 (base32
16684 "1bcmzg0ii6nkda2xm5jdddbwkgsag7k38j20af0c9chr2mbxwx4d"))
16685 (modules '((guix build utils)))
16686 (snippet
16687 '(delete-file-recursively "lib/simde"))))
16688 (build-system cmake-build-system)
16689 (arguments '(#:tests? #false)) ;no test target
16690 (inputs
16691 (list openmpi simde))
16692 (native-inputs
16693 (list perl pkg-config xxd))
16694 (home-page "https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-3019-7")
16695 (synopsis "Remote protein homology detection suite")
16696 (description "The HH-suite is a software package for sensitive protein sequence searching
16697 based on the pairwise alignment of hidden Markov models (HMMs).")
16698 (license license:gpl3+)))
16699
16700 (define-public wfmash
16701 (package
16702 (name "wfmash")
16703 (version "0.8.1")
16704 (source
16705 (origin
16706 (method url-fetch)
16707 (uri (string-append "https://github.com/ekg/wfmash/releases/download/v"
16708 version "/wfmash-v" version ".tar.gz"))
16709 (sha256
16710 (base32
16711 "031cm1arpfckvihb28vlk69mirpnmlag81zcscfba1bac58wvr7c"))
16712 (snippet
16713 #~(begin
16714 (use-modules (guix build utils))
16715 ;; Unbundle atomic-queue.
16716 (delete-file-recursively "src/common/atomic_queue")
16717 (substitute* "src/align/include/computeAlignments.hpp"
16718 (("\"common/atomic_queue/atomic_queue.h\"")
16719 "<atomic_queue/atomic_queue.h>"))
16720 ;; Remove compiler optimizations.
16721 (substitute* (find-files "." "CMakeLists\\.txt")
16722 (("-mcx16 ") "")
16723 (("-march=native ") ""))
16724 ;; Allow building on architectures other than x86_64.
16725 (substitute* "src/common/dset64.hpp"
16726 (("!__x86_64__") "0"))))))
16727 (build-system cmake-build-system)
16728 (arguments
16729 (list
16730 #:phases
16731 #~(modify-phases %standard-phases
16732 (replace 'check
16733 ;; Adapted from .github/workflows/test_on_push.yml
16734 (lambda* (#:key tests? inputs #:allow-other-keys)
16735 (when tests?
16736 (let ((samtools (search-input-file inputs "/bin/samtools")))
16737 ;; This is the easiest way to access the data
16738 ;; needed for the test suite.
16739 (symlink (string-append "../wfmash-v" #$version "/data")
16740 "data")
16741 (and
16742 ;; This test takes 60 minutes on riscv64-linux.
16743 #$@(if (not (target-riscv64?))
16744 #~((begin
16745 ;; Test with a subset of the LPA dataset (PAF output)
16746 (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
16747 (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
16748 (with-output-to-file "LPA.subset.paf"
16749 (lambda _
16750 (invoke "bin/wfmash"
16751 "data/LPA.subset.fa.gz"
16752 "data/LPA.subset.fa.gz"
16753 "-X" "-n" "10" "-T" "wflign_info."
16754 "-u" "./")))
16755 (invoke "head" "LPA.subset.paf")))
16756 #~())
16757 ;; This test takes about 5 hours on riscv64-linux.
16758 #$@(if (not (target-riscv64?))
16759 #~((begin
16760 ;; Test with a subset of the LPA dataset (SAM output)
16761 (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
16762 (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
16763 (with-output-to-file "LPA.subset.sam"
16764 (lambda _
16765 (invoke "bin/wfmash"
16766 "data/LPA.subset.fa.gz"
16767 "data/LPA.subset.fa.gz"
16768 "-X" "-N" "-a" "-T" "wflign_info.")))
16769 (with-output-to-file "LPA.subset.sam-view"
16770 (lambda _
16771 (invoke samtools "view" "LPA.subset.sam" "-bS")))
16772 (with-output-to-file "LPA.subset.bam"
16773 (lambda _
16774 (invoke samtools "sort" "LPA.subset.sam-view")))
16775 (invoke samtools "index" "LPA.subset.bam")
16776 ;; samtools view LPA.subset.bam | head | cut -f 1-9
16777 ;(invoke samtools "view" "LPA.subset.bam")
16778 ;; There should be an easier way to do this with pipes.
16779 (with-output-to-file "LPA.subset.bam-incr1"
16780 (lambda _
16781 (invoke samtools "view" "LPA.subset.bam")))
16782 (with-output-to-file "LPA.subset.bam-incr2"
16783 (lambda _
16784 (invoke "head" "LPA.subset.bam-incr1")))
16785 (invoke "cut" "-f" "1-9" "LPA.subset.bam-incr2")))
16786 #~())
16787 ;; This test takes 60 minutes on riscv64-linux.
16788 #$@(if (not (target-riscv64?))
16789 #~((begin
16790 ;; Test with a subset of the LPA dataset,
16791 ;; setting a lower identity threshold (PAF output)
16792 (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
16793 (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
16794 (with-output-to-file "LPA.subset.p90.paf"
16795 (lambda _
16796 (invoke "bin/wfmash"
16797 "data/LPA.subset.fa.gz"
16798 "data/LPA.subset.fa.gz"
16799 "-X" "-p" "90" "-n" "10"
16800 "-T" "wflign_info.")))
16801 (invoke "head" "LPA.subset.p90.paf")))
16802 #~())
16803 (begin
16804 ;; Test aligning short reads (500 bps) to a reference (SAM output)
16805 (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
16806 (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
16807 (with-output-to-file "reads.500bps.sam"
16808 (lambda _
16809 (invoke "bin/wfmash"
16810 "data/reference.fa.gz"
16811 "data/reads.500bps.fa.gz"
16812 "-s" "0.5k" "-N" "-a")))
16813 (with-output-to-file "reads.500bps.sam-view"
16814 (lambda _
16815 (invoke samtools "view" "reads.500bps.sam" "-bS")))
16816 (with-output-to-file "reads.500bps.bam"
16817 (lambda _
16818 (invoke samtools "sort" "reads.500bps.sam-view")))
16819 (invoke samtools "index" "reads.500bps.bam")
16820 (with-output-to-file "reads.500bps.bam-view"
16821 (lambda _
16822 (invoke samtools "view" "reads.500bps.bam")))
16823 (invoke "head" "reads.500bps.bam-view"))
16824 (begin
16825 ;; Test with few very short reads (255bps) (PAF output)
16826 (setenv "ASAN_OPTIONS" "detect_leaks=1:symbolize=1")
16827 (setenv "LSAN_OPTIONS" "verbosity=0:log_threads=1")
16828 (with-output-to-file "reads.255bps.paf"
16829 (lambda _
16830 (invoke "bin/wfmash"
16831 "data/reads.255bps.fa.gz"
16832 "data/reads.255bps.fa.gz"
16833 "-X" "-w" "16")))
16834 (invoke "head" "reads.255bps.paf"))))))))))
16835 (inputs
16836 (list atomic-queue
16837 gsl
16838 htslib
16839 jemalloc
16840 zlib))
16841 (native-inputs
16842 (list samtools))
16843 (synopsis "Base-accurate DNA sequence aligner")
16844 (description "@code{wfmash} is a DNA sequence read mapper based on mash
16845 distances and the wavefront alignment algorithm. It is a fork of MashMap that
16846 implements base-level alignment via the wflign tiled wavefront global
16847 alignment algorithm. It completes MashMap with a high-performance alignment
16848 module capable of computing base-level alignments for very large sequences.")
16849 (home-page "https://github.com/ekg/wfmash")
16850 (license license:expat)))
16851
16852 (define-public flair
16853 (package
16854 (name "flair")
16855 (version "1.6.2")
16856 (source
16857 (origin
16858 (method git-fetch)
16859 (uri (git-reference
16860 (url "https://github.com/BrooksLabUCSC/flair")
16861 (commit (string-append "v" version))))
16862 (file-name (git-file-name name version))
16863 (sha256
16864 (base32
16865 "106swb2q7l20ki58fca1hg95q5f79bgp9gjb0clr2243ycrzyxf8"))))
16866 (build-system python-build-system)
16867 (arguments
16868 (list
16869 #:tests? #false ;there are none
16870 #:phases
16871 #~(modify-phases %standard-phases
16872 ;; TODO: implement as a feature of python-build-system (PEP-621,
16873 ;; PEP-631, PEP-660)
16874 (replace 'build
16875 (lambda _
16876 (setenv "SETUPTOOLS_SCM_PRETEND_VERSION" #$version)
16877 ;; ZIP does not support timestamps before 1980.
16878 (setenv "SOURCE_DATE_EPOCH" "315532800")
16879 (invoke "python" "-m" "build" "--wheel" "--no-isolation" ".")))
16880 (replace 'install
16881 (lambda _
16882 (apply invoke "pip" "--no-cache-dir" "--no-input"
16883 "install" "--no-deps" "--prefix" #$output
16884 (find-files "dist" "\\.whl$")))))))
16885 (propagated-inputs
16886 (list python-mappy
16887 python-ncls
16888 python-pybedtools
16889 python-pysam
16890 python-tqdm))
16891 (native-inputs
16892 (list python-pypa-build python-setuptools))
16893 (home-page "https://flair.readthedocs.io/en/latest/")
16894 (synopsis "Full-length alternative isoform analysis of RNA")
16895 (description "This package implements FLAIR (Full-Length Alternative
16896 Isoform analysis of RNA) for the correction, isoform definition, and
16897 alternative splicing analysis of noisy reads. FLAIR has primarily been used
16898 for nanopore cDNA, native RNA, and PacBio sequencing reads.")
16899 (license license:bsd-3)))
16900
16901 (define-public go-github-com-biogo-graph
16902 (package
16903 (name "go-github-com-biogo-graph")
16904 (version "0.0.0-20150317020928-057c1989faed")
16905 (source (origin
16906 (method git-fetch)
16907 (uri (git-reference
16908 (url "https://github.com/biogo/graph")
16909 (commit (go-version->git-ref version))))
16910 (file-name (git-file-name name version))
16911 (sha256
16912 (base32
16913 "1kpzs5dfd5dsk4mg1g2qjz1prqd84ixhrcxxnf90hq25vxcnk7lh"))))
16914 (build-system go-build-system)
16915 (arguments
16916 '(#:import-path "github.com/biogo/graph"
16917 #:tests? #false)) ;TODO: one of 13 tests fails for unknown reasons
16918 (propagated-inputs
16919 (list go-gopkg-in-check-v1))
16920 (home-page "https://github.com/biogo/graph")
16921 (synopsis "Undirected graph analysis for biogo")
16922 (description "The package @code{graph} implements graph manipulation
16923 functions.")
16924 (license license:bsd-3)))
16925
16926 (define-public go-github-com-biogo-store-interval
16927 (package
16928 (name "go-github-com-biogo-store-interval")
16929 (version "0.0.0-20201120204734-aad293a2328f")
16930 (source (origin
16931 (method git-fetch)
16932 (uri (git-reference
16933 (url "https://github.com/biogo/store")
16934 (commit (go-version->git-ref version))))
16935 (file-name (git-file-name name version))
16936 (sha256
16937 (base32
16938 "0skizrp1j6vgbl0g1kmh73picagqlvwckaqs0gkl6rai5lckxj8a"))))
16939 (build-system go-build-system)
16940 (arguments
16941 '(#:import-path "github.com/biogo/store/interval"
16942 #:unpack-path "github.com/biogo/store"))
16943 (propagated-inputs
16944 (list go-gopkg-in-check-v1
16945 go-github-com-kr-pretty))
16946 (home-page "https://github.com/biogo/store")
16947 (synopsis "Interval store type for biogo")
16948 (description
16949 "The @code{store} package provides a number of data store types that are
16950 useful for bioinformatic analysis.")
16951 (license license:bsd-3)))
16952
16953 (define-public go-github-com-biogo-store-kdtree
16954 (package
16955 (inherit go-github-com-biogo-store-interval)
16956 (name "go-github-com-biogo-store-kdtree")
16957 (arguments
16958 '(#:import-path "github.com/biogo/store/kdtree"
16959 #:unpack-path "github.com/biogo/store"))
16960 (propagated-inputs
16961 (list go-gopkg-in-check-v1
16962 go-github-com-kr-pretty))
16963 (synopsis "kdtree store type for biogo")))
16964
16965 (define-public go-github-com-biogo-store-llrb
16966 (package
16967 (inherit go-github-com-biogo-store-interval)
16968 (name "go-github-com-biogo-store-llrb")
16969 (arguments
16970 '(#:import-path "github.com/biogo/store/llrb"
16971 #:unpack-path "github.com/biogo/store"))
16972 (propagated-inputs
16973 (list go-gopkg-in-check-v1
16974 go-github-com-kr-pretty))
16975 (synopsis "LLRB store for biogo")))
16976
16977 (define-public go-github-com-biogo-store-step
16978 (package
16979 (inherit go-github-com-biogo-store-interval)
16980 (name "go-github-com-biogo-store-step")
16981 (arguments
16982 '(#:import-path "github.com/biogo/store/step"
16983 #:unpack-path "github.com/biogo/store"))
16984 (propagated-inputs
16985 (list go-gopkg-in-check-v1
16986 go-github-com-kr-pretty))
16987 (synopsis "Step store for biogo")))
16988
16989 (define-public go-github-com-biogo-hts-bam
16990 (package
16991 (name "go-github-com-biogo-hts-bam")
16992 (version "1.4.3")
16993 (source (origin
16994 (method git-fetch)
16995 (uri (git-reference
16996 (url "https://github.com/biogo/hts")
16997 (commit (string-append "v" version))))
16998 (file-name (git-file-name name version))
16999 (sha256
17000 (base32
17001 "013ga6ilc4m3hyfr3yyiva9g4vs81afhj73v2sy7r75b5zxw7lx1"))))
17002 (build-system go-build-system)
17003 (arguments
17004 '(#:import-path "github.com/biogo/hts/bam"
17005 #:unpack-path "github.com/biogo/hts"))
17006 (propagated-inputs
17007 (list go-gopkg-in-check-v1))
17008 (home-page "https://github.com/biogo/hts")
17009 (synopsis "HTS BAM module for biogo")
17010 (description "This package provides tools for handling BAM files.")
17011 (license license:bsd-3)))
17012
17013 (define-public go-github-com-biogo-hts-sam
17014 (package
17015 (inherit go-github-com-biogo-hts-bam)
17016 (name "go-github-com-biogo-hts-sam")
17017 (arguments
17018 '(#:import-path "github.com/biogo/hts/sam"
17019 #:unpack-path "github.com/biogo/hts"))
17020 (propagated-inputs
17021 (list go-gopkg-in-check-v1))
17022 (synopsis "HTS SAM module for biogo")
17023 (description "This package provides tools for handling SAM files.")))
17024
17025 (define-public go-github-com-biogo-hts-tabix
17026 (package
17027 (inherit go-github-com-biogo-hts-bam)
17028 (name "go-github-com-biogo-hts-tabix")
17029 (arguments
17030 '(#:import-path "github.com/biogo/hts/tabix"
17031 #:unpack-path "github.com/biogo/hts"))
17032 (propagated-inputs
17033 (list go-gopkg-in-check-v1))
17034 (synopsis "HTS Tabix module for biogo")
17035 (description "This package provides tools for handling Tabix files.")))
17036
17037 (define-public go-github-com-biogo-hts-bgzf
17038 (package
17039 (inherit go-github-com-biogo-hts-bam)
17040 (name "go-github-com-biogo-hts-bgzf")
17041 (arguments
17042 '(#:import-path "github.com/biogo/hts/bgzf"
17043 #:unpack-path "github.com/biogo/hts"))
17044 (propagated-inputs
17045 (list go-gopkg-in-check-v1))
17046 (synopsis "HTS bgzf module for biogo")
17047 (description "This package provides tools for handling bgzf files.")))
17048
17049 (define-public go-github-com-biogo-hts-cram
17050 (package
17051 (inherit go-github-com-biogo-hts-bam)
17052 (name "go-github-com-biogo-hts-cram")
17053 (arguments
17054 '(#:import-path "github.com/biogo/hts/cram"
17055 #:unpack-path "github.com/biogo/hts"
17056 #:tests? #false)) ;require network access
17057 (propagated-inputs
17058 (list go-gopkg-in-check-v1
17059 go-github.com-ulikunitz-xz
17060 go-github-com-kortschak-utter))
17061 (synopsis "HTS CRAM module for biogo")
17062 (description "This package provides tools for handling CRAM files.")))
17063
17064 (define-public go-github-com-biogo-hts-csi
17065 (package
17066 (inherit go-github-com-biogo-hts-bam)
17067 (name "go-github-com-biogo-hts-csi")
17068 (arguments
17069 '(#:import-path "github.com/biogo/hts/csi"
17070 #:unpack-path "github.com/biogo/hts"))
17071 (propagated-inputs
17072 (list go-gopkg-in-check-v1))
17073 (synopsis "Coordinate sorted indexing for biogo")
17074 (description "This package implements CSIv1 and CSIv2 coordinate sorted
17075 indexing.")))
17076
17077 (define-public go-github-com-biogo-hts-fai
17078 (package
17079 (inherit go-github-com-biogo-hts-bam)
17080 (name "go-github-com-biogo-hts-fai")
17081 (arguments
17082 '(#:import-path "github.com/biogo/hts/fai"
17083 #:unpack-path "github.com/biogo/hts"))
17084 (propagated-inputs
17085 (list go-gopkg-in-check-v1))
17086 (synopsis "Fasta sequence file index handling for biogo")
17087 (description "This package implements FAI fasta sequence file index
17088 handling.")))
17089
17090 (define-public go-github-com-biogo-biogo
17091 (package
17092 (name "go-github-com-biogo-biogo")
17093 (version "1.0.4")
17094 (source (origin
17095 (method git-fetch)
17096 (uri (git-reference
17097 (url "https://github.com/biogo/biogo")
17098 (commit (string-append "v" version))))
17099 (file-name (git-file-name name version))
17100 (sha256
17101 (base32
17102 "0ali1mqf3dc26myv6l7wmqfr8i25461rbq3qdad8s0wi29622199"))))
17103 (build-system go-build-system)
17104 (arguments
17105 '(#:import-path "github.com/biogo/biogo"))
17106 (propagated-inputs
17107 (list go-gopkg-in-check-v1
17108 go-github-com-biogo-store-interval
17109 go-github-com-biogo-store-kdtree
17110 go-github-com-biogo-store-llrb
17111 go-github-com-biogo-store-step
17112 go-github-com-biogo-hts-bam
17113 go-github-com-biogo-graph))
17114 (home-page "https://github.com/biogo/biogo")
17115 (synopsis "Bioinformatics library for Go")
17116 (description
17117 "Bíogo is a bioinformatics library for the Go language.")
17118 (license license:bsd-3)))
17119
17120 ;;;
17121 ;;; Avoid adding new packages to the end of this file. To reduce the chances
17122 ;;; of a merge conflict, place them above by existing packages with similar
17123 ;;; functionality or similar names.
17124 ;;;