gnu: emacs-consult: Fix grammar.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016, 2018, 2019, 2020 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016, 2020 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017, 2018, 2019, 2020, 2021 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com>
9 ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;; Copyright © 2017, 2021 Arun Isaac <arunisaac@systemreboot.net>
12 ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
13 ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
14 ;;; Copyright © 2018, 2019, 2020, 2021 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
15 ;;; Copyright © 2019, 2020 Maxim Cournoyer <maxim.cournoyer@gmail.com>
16 ;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
17 ;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
18 ;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
19 ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
20 ;;; Copyright © 2020 Pierre Langlois <pierre.langlois@gmx.com>
21 ;;; Copyright © 2020 Bonface Munyoki Kilyungi <bonfacemunyoki@gmail.com>
22 ;;; Copyright © 2021 Tim Howes <timhowes@lavabit.com>
23 ;;; Copyright © 2021 Hong Li <hli@mdc-berlin.de>
24 ;;;
25 ;;; This file is part of GNU Guix.
26 ;;;
27 ;;; GNU Guix is free software; you can redistribute it and/or modify it
28 ;;; under the terms of the GNU General Public License as published by
29 ;;; the Free Software Foundation; either version 3 of the License, or (at
30 ;;; your option) any later version.
31 ;;;
32 ;;; GNU Guix is distributed in the hope that it will be useful, but
33 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
34 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 ;;; GNU General Public License for more details.
36 ;;;
37 ;;; You should have received a copy of the GNU General Public License
38 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
39
40 (define-module (gnu packages bioinformatics)
41 #:use-module ((guix licenses) #:prefix license:)
42 #:use-module (guix packages)
43 #:use-module (guix utils)
44 #:use-module (guix download)
45 #:use-module (guix git-download)
46 #:use-module (guix hg-download)
47 #:use-module (guix build-system ant)
48 #:use-module (guix build-system gnu)
49 #:use-module (guix build-system cmake)
50 #:use-module (guix build-system go)
51 #:use-module (guix build-system haskell)
52 #:use-module (guix build-system meson)
53 #:use-module (guix build-system ocaml)
54 #:use-module (guix build-system perl)
55 #:use-module (guix build-system python)
56 #:use-module (guix build-system qt)
57 #:use-module (guix build-system r)
58 #:use-module (guix build-system ruby)
59 #:use-module (guix build-system scons)
60 #:use-module (guix build-system trivial)
61 #:use-module (guix deprecation)
62 #:use-module (gnu packages)
63 #:use-module (gnu packages assembly)
64 #:use-module (gnu packages autotools)
65 #:use-module (gnu packages algebra)
66 #:use-module (gnu packages base)
67 #:use-module (gnu packages bash)
68 #:use-module (gnu packages bison)
69 #:use-module (gnu packages bioconductor)
70 #:use-module (gnu packages boost)
71 #:use-module (gnu packages check)
72 #:use-module (gnu packages code)
73 #:use-module (gnu packages commencement)
74 #:use-module (gnu packages cmake)
75 #:use-module (gnu packages compression)
76 #:use-module (gnu packages cpio)
77 #:use-module (gnu packages cran)
78 #:use-module (gnu packages curl)
79 #:use-module (gnu packages documentation)
80 #:use-module (gnu packages databases)
81 #:use-module (gnu packages datastructures)
82 #:use-module (gnu packages dlang)
83 #:use-module (gnu packages file)
84 #:use-module (gnu packages flex)
85 #:use-module (gnu packages gawk)
86 #:use-module (gnu packages gcc)
87 #:use-module (gnu packages gd)
88 #:use-module (gnu packages golang)
89 #:use-module (gnu packages glib)
90 #:use-module (gnu packages graph)
91 #:use-module (gnu packages graphics)
92 #:use-module (gnu packages graphviz)
93 #:use-module (gnu packages groff)
94 #:use-module (gnu packages gtk)
95 #:use-module (gnu packages guile)
96 #:use-module (gnu packages guile-xyz)
97 #:use-module (gnu packages haskell-check)
98 #:use-module (gnu packages haskell-web)
99 #:use-module (gnu packages haskell-xyz)
100 #:use-module (gnu packages image)
101 #:use-module (gnu packages image-processing)
102 #:use-module (gnu packages imagemagick)
103 #:use-module (gnu packages java)
104 #:use-module (gnu packages java-compression)
105 #:use-module (gnu packages jemalloc)
106 #:use-module (gnu packages linux)
107 #:use-module (gnu packages lisp-xyz)
108 #:use-module (gnu packages logging)
109 #:use-module (gnu packages machine-learning)
110 #:use-module (gnu packages man)
111 #:use-module (gnu packages maths)
112 #:use-module (gnu packages mpi)
113 #:use-module (gnu packages ncurses)
114 #:use-module (gnu packages node)
115 #:use-module (gnu packages ocaml)
116 #:use-module (gnu packages pcre)
117 #:use-module (gnu packages parallel)
118 #:use-module (gnu packages pdf)
119 #:use-module (gnu packages perl)
120 #:use-module (gnu packages perl-check)
121 #:use-module (gnu packages pkg-config)
122 #:use-module (gnu packages popt)
123 #:use-module (gnu packages protobuf)
124 #:use-module (gnu packages python)
125 #:use-module (gnu packages python-check)
126 #:use-module (gnu packages python-compression)
127 #:use-module (gnu packages python-science)
128 #:use-module (gnu packages python-web)
129 #:use-module (gnu packages python-xyz)
130 #:use-module (gnu packages qt)
131 #:use-module (gnu packages rdf)
132 #:use-module (gnu packages readline)
133 #:use-module (gnu packages ruby)
134 #:use-module (gnu packages serialization)
135 #:use-module (gnu packages shells)
136 #:use-module (gnu packages sphinx)
137 #:use-module (gnu packages statistics)
138 #:use-module (gnu packages swig)
139 #:use-module (gnu packages tbb)
140 #:use-module (gnu packages tex)
141 #:use-module (gnu packages texinfo)
142 #:use-module (gnu packages textutils)
143 #:use-module (gnu packages time)
144 #:use-module (gnu packages tls)
145 #:use-module (gnu packages vim)
146 #:use-module (gnu packages web)
147 #:use-module (gnu packages xml)
148 #:use-module (gnu packages xorg)
149 #:use-module (srfi srfi-1)
150 #:use-module (srfi srfi-26)
151 #:use-module (ice-9 match))
152
153 (define-public aragorn
154 (package
155 (name "aragorn")
156 (version "1.2.38")
157 (source (origin
158 (method url-fetch)
159 (uri (string-append
160 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
161 version ".tgz"))
162 (sha256
163 (base32
164 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
165 (build-system gnu-build-system)
166 (arguments
167 `(#:tests? #f ; there are no tests
168 #:phases
169 (modify-phases %standard-phases
170 (delete 'configure)
171 (replace 'build
172 (lambda _
173 (invoke "gcc"
174 "-O3"
175 "-ffast-math"
176 "-finline-functions"
177 "-o"
178 "aragorn"
179 (string-append "aragorn" ,version ".c"))
180 #t))
181 (replace 'install
182 (lambda* (#:key outputs #:allow-other-keys)
183 (let* ((out (assoc-ref outputs "out"))
184 (bin (string-append out "/bin"))
185 (man (string-append out "/share/man/man1")))
186 (install-file "aragorn" bin)
187 (install-file "aragorn.1" man))
188 #t)))))
189 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
190 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
191 (description
192 "Aragorn identifies transfer RNA, mitochondrial RNA and
193 transfer-messenger RNA from nucleotide sequences, based on homology to known
194 tRNA consensus sequences and RNA structure. It also outputs the secondary
195 structure of the predicted RNA.")
196 (license license:gpl2)))
197
198 (define-public bamm
199 (package
200 (name "bamm")
201 (version "1.7.3")
202 (source (origin
203 (method git-fetch)
204 ;; BamM is not available on pypi.
205 (uri (git-reference
206 (url "https://github.com/Ecogenomics/BamM")
207 (commit version)
208 (recursive? #t)))
209 (file-name (git-file-name name version))
210 (sha256
211 (base32
212 "1p83ahi984ipslxlg4yqy1gdnya9rkn1v71z8djgxkm9d2chw4c5"))
213 (modules '((guix build utils)))
214 (snippet
215 `(begin
216 ;; Delete bundled htslib.
217 (delete-file-recursively "c/htslib-1.3.1")
218 #t))))
219 (build-system python-build-system)
220 (arguments
221 `(#:python ,python-2 ; BamM is Python 2 only.
222 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
223 ;; been modified from its original form.
224 #:configure-flags
225 (let ((htslib (assoc-ref %build-inputs "htslib")))
226 (list "--with-libhts-lib" (string-append htslib "/lib")
227 "--with-libhts-inc" (string-append htslib "/include/htslib")))
228 #:phases
229 (modify-phases %standard-phases
230 (add-after 'unpack 'autogen
231 (lambda _
232 (with-directory-excursion "c"
233 (let ((sh (which "sh")))
234 (for-each make-file-writable (find-files "." ".*"))
235 ;; Use autogen so that 'configure' works.
236 (substitute* "autogen.sh" (("/bin/sh") sh))
237 (setenv "CONFIG_SHELL" sh)
238 (invoke "./autogen.sh")))
239 #t))
240 (delete 'build)
241 ;; Run tests after installation so compilation only happens once.
242 (delete 'check)
243 (add-after 'install 'wrap-executable
244 (lambda* (#:key outputs #:allow-other-keys)
245 (let* ((out (assoc-ref outputs "out"))
246 (path (getenv "PATH")))
247 (wrap-program (string-append out "/bin/bamm")
248 `("PATH" ":" prefix (,path))))
249 #t))
250 (add-after 'wrap-executable 'post-install-check
251 (lambda* (#:key inputs outputs #:allow-other-keys)
252 (setenv "PATH"
253 (string-append (assoc-ref outputs "out")
254 "/bin:"
255 (getenv "PATH")))
256 (setenv "PYTHONPATH"
257 (string-append
258 (assoc-ref outputs "out")
259 "/lib/python"
260 (string-take (string-take-right
261 (assoc-ref inputs "python") 5) 3)
262 "/site-packages:"
263 (getenv "PYTHONPATH")))
264 ;; There are 2 errors printed, but they are safe to ignore:
265 ;; 1) [E::hts_open_format] fail to open file ...
266 ;; 2) samtools view: failed to open ...
267 (invoke "nosetests")
268 #t)))))
269 (native-inputs
270 `(("autoconf" ,autoconf)
271 ("automake" ,automake)
272 ("libtool" ,libtool)
273 ("zlib" ,zlib)
274 ("python-nose" ,python2-nose)
275 ("python-pysam" ,python2-pysam)))
276 (inputs
277 `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
278 ("samtools" ,samtools)
279 ("bwa" ,bwa)
280 ("grep" ,grep)
281 ("sed" ,sed)
282 ("coreutils" ,coreutils)))
283 (propagated-inputs
284 `(("python-numpy" ,python2-numpy)))
285 (home-page "https://ecogenomics.github.io/BamM/")
286 (synopsis "Metagenomics-focused BAM file manipulator")
287 (description
288 "BamM is a C library, wrapped in python, to efficiently generate and
289 parse BAM files, specifically for the analysis of metagenomic data. For
290 instance, it implements several methods to assess contig-wise read coverage.")
291 (license license:lgpl3+)))
292
293 (define-public bamtools
294 (package
295 (name "bamtools")
296 (version "2.5.1")
297 (source (origin
298 (method git-fetch)
299 (uri (git-reference
300 (url "https://github.com/pezmaster31/bamtools")
301 (commit (string-append "v" version))))
302 (file-name (git-file-name name version))
303 (sha256
304 (base32
305 "0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
306 (build-system cmake-build-system)
307 (arguments
308 `(#:tests? #f ;no "check" target
309 #:phases
310 (modify-phases %standard-phases
311 (add-before
312 'configure 'set-ldflags
313 (lambda* (#:key outputs #:allow-other-keys)
314 (setenv "LDFLAGS"
315 (string-append
316 "-Wl,-rpath="
317 (assoc-ref outputs "out") "/lib/bamtools"))
318 #t)))))
319 (inputs `(("zlib" ,zlib)))
320 (home-page "https://github.com/pezmaster31/bamtools")
321 (synopsis "C++ API and command-line toolkit for working with BAM data")
322 (description
323 "BamTools provides both a C++ API and a command-line toolkit for handling
324 BAM files.")
325 (license license:expat)))
326
327 (define-public bcftools
328 (package
329 (name "bcftools")
330 (version "1.11")
331 (source (origin
332 (method url-fetch)
333 (uri (string-append "https://github.com/samtools/bcftools/"
334 "releases/download/"
335 version "/bcftools-" version ".tar.bz2"))
336 (sha256
337 (base32
338 "0r508mp15pqzf8r1269kb4v5naw9zsvbwd3cz8s1yj7carsf9viw"))
339 (modules '((guix build utils)))
340 (snippet '(begin
341 ;; Delete bundled htslib.
342 (delete-file-recursively "htslib-1.11")
343 #t))))
344 (build-system gnu-build-system)
345 (arguments
346 `(#:configure-flags
347 (list "--enable-libgsl")
348 #:test-target "test"
349 #:phases
350 (modify-phases %standard-phases
351 (add-before 'check 'patch-tests
352 (lambda _
353 (substitute* "test/test.pl"
354 (("/bin/bash") (which "bash")))
355 #t)))))
356 (native-inputs
357 `(("htslib" ,htslib)
358 ("perl" ,perl)))
359 (inputs
360 `(("gsl" ,gsl)
361 ("zlib" ,zlib)))
362 (home-page "https://samtools.github.io/bcftools/")
363 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
364 (description
365 "BCFtools is a set of utilities that manipulate variant calls in the
366 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
367 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
368 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
369 (license (list license:gpl3+ license:expat))))
370
371 (define-public bcftools-1.9
372 (package (inherit bcftools)
373 (name "bcftools")
374 (version "1.9")
375 (source (origin
376 (method url-fetch)
377 (uri (string-append "https://github.com/samtools/bcftools/"
378 "releases/download/"
379 version "/bcftools-" version ".tar.bz2"))
380 (sha256
381 (base32
382 "1j3h638i8kgihzyrlnpj82xg1b23sijibys9hvwari3fy7kd0dkg"))
383 (modules '((guix build utils)))
384 (snippet '(begin
385 ;; Delete bundled htslib.
386 (delete-file-recursively "htslib-1.9")
387 #t))))
388 (build-system gnu-build-system)
389 (native-inputs
390 `(("htslib" ,htslib-1.9)
391 ("perl" ,perl)))))
392
393 (define-public bedops
394 (package
395 (name "bedops")
396 (version "2.4.35")
397 (source (origin
398 (method git-fetch)
399 (uri (git-reference
400 (url "https://github.com/bedops/bedops")
401 (commit (string-append "v" version))))
402 (file-name (git-file-name name version))
403 (sha256
404 (base32
405 "0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
406 (build-system gnu-build-system)
407 (arguments
408 '(#:tests? #f
409 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
410 #:phases
411 (modify-phases %standard-phases
412 (add-after 'unpack 'unpack-tarballs
413 (lambda _
414 ;; FIXME: Bedops includes tarballs of minimally patched upstream
415 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
416 ;; libraries because at least one of the libraries (zlib) is
417 ;; patched to add a C++ function definition (deflateInit2cpp).
418 ;; Until the Bedops developers offer a way to link against system
419 ;; libraries we have to build the in-tree copies of these three
420 ;; libraries.
421
422 ;; See upstream discussion:
423 ;; https://github.com/bedops/bedops/issues/124
424
425 ;; Unpack the tarballs to benefit from shebang patching.
426 (with-directory-excursion "third-party"
427 (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
428 (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
429 (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
430 ;; Disable unpacking of tarballs in Makefile.
431 (substitute* "system.mk/Makefile.linux"
432 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
433 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
434 (substitute* "third-party/zlib-1.2.7/Makefile.in"
435 (("^SHELL=.*$") "SHELL=bash\n"))
436 #t))
437 (delete 'configure))))
438 (home-page "https://github.com/bedops/bedops")
439 (synopsis "Tools for high-performance genomic feature operations")
440 (description
441 "BEDOPS is a suite of tools to address common questions raised in genomic
442 studies---mostly with regard to overlap and proximity relationships between
443 data sets. It aims to be scalable and flexible, facilitating the efficient
444 and accurate analysis and management of large-scale genomic data.
445
446 BEDOPS provides tools that perform highly efficient and scalable Boolean and
447 other set operations, statistical calculations, archiving, conversion and
448 other management of genomic data of arbitrary scale. Tasks can be easily
449 split by chromosome for distributing whole-genome analyses across a
450 computational cluster.")
451 (license license:gpl2+)))
452
453 (define-public bedtools
454 (package
455 (name "bedtools")
456 (version "2.29.2")
457 (source (origin
458 (method url-fetch)
459 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
460 "download/v" version "/"
461 "bedtools-" version ".tar.gz"))
462 (sha256
463 (base32
464 "0m3hk6548846w83a9s5drsczvy67n2azx41kj71n03klb2gbzwg3"))))
465 (build-system gnu-build-system)
466 (arguments
467 '(#:test-target "test"
468 #:make-flags
469 (list (string-append "prefix=" (assoc-ref %outputs "out")))
470 #:phases
471 (modify-phases %standard-phases
472 (delete 'configure))))
473 (native-inputs
474 `(("python" ,python-wrapper)))
475 (inputs
476 `(("samtools" ,samtools-1.9)
477 ("zlib" ,zlib)))
478 (home-page "https://github.com/arq5x/bedtools2")
479 (synopsis "Tools for genome analysis and arithmetic")
480 (description
481 "Collectively, the bedtools utilities are a swiss-army knife of tools for
482 a wide-range of genomics analysis tasks. The most widely-used tools enable
483 genome arithmetic: that is, set theory on the genome. For example, bedtools
484 allows one to intersect, merge, count, complement, and shuffle genomic
485 intervals from multiple files in widely-used genomic file formats such as BAM,
486 BED, GFF/GTF, VCF.")
487 (license license:expat)))
488
489 ;; Later releases of bedtools produce files with more columns than
490 ;; what Ribotaper expects.
491 (define-public bedtools-2.18
492 (package (inherit bedtools)
493 (name "bedtools")
494 (version "2.18.0")
495 (source (origin
496 (method url-fetch)
497 (uri (string-append "https://github.com/arq5x/bedtools2/"
498 "releases/download/v" version
499 "/bedtools-" version ".tar.gz"))
500 (sha256
501 (base32
502 "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
503 (arguments
504 '(#:test-target "test"
505 #:phases
506 (modify-phases %standard-phases
507 (delete 'configure)
508 (replace 'install
509 (lambda* (#:key outputs #:allow-other-keys)
510 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
511 (for-each (lambda (file)
512 (install-file file bin))
513 (find-files "bin" ".*")))
514 #t)))))))
515
516 (define-public pbbam
517 (package
518 (name "pbbam")
519 (version "0.23.0")
520 (source (origin
521 (method git-fetch)
522 (uri (git-reference
523 (url "https://github.com/PacificBiosciences/pbbam")
524 (commit version)))
525 (file-name (git-file-name name version))
526 (sha256
527 (base32
528 "0h9gkrpf2lrxklxp72xfl5bi3h5zcm5hprrya9gf0hr3xwlbpp0x"))))
529 (build-system meson-build-system)
530 (arguments
531 `(#:phases
532 (modify-phases %standard-phases
533 (add-after 'unpack 'find-googletest
534 (lambda* (#:key inputs #:allow-other-keys)
535 ;; It doesn't find gtest_main because there's no pkg-config file
536 ;; for it. Find it another way.
537 (substitute* "tests/meson.build"
538 (("pbbam_gtest_dep = dependency\\('gtest_main'.*")
539 (format #f "cpp = meson.get_compiler('cpp')
540 pbbam_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
541 (assoc-ref inputs "googletest"))))
542 #t)))
543 ;; TODO: tests/pbbam_test cannot be linked
544 ;; ld: tests/59830eb@@pbbam_test@exe/src_test_Accuracy.cpp.o:
545 ;; undefined reference to symbol '_ZTIN7testing4TestE'
546 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
547 ;; error adding symbols: DSO missing from command line
548 #:tests? #f
549 #:configure-flags '("-Dtests=false")))
550 ;; These libraries are listed as "Required" in the pkg-config file.
551 (propagated-inputs
552 `(("htslib" ,htslib)
553 ("zlib" ,zlib)))
554 (inputs
555 `(("boost" ,boost)
556 ("samtools" ,samtools)))
557 (native-inputs
558 `(("googletest" ,googletest)
559 ("pkg-config" ,pkg-config)
560 ("python" ,python-wrapper))) ; for tests
561 (home-page "https://github.com/PacificBiosciences/pbbam")
562 (synopsis "Work with PacBio BAM files")
563 (description
564 "The pbbam software package provides components to create, query, and
565 edit PacBio BAM files and associated indices. These components include a core
566 C++ library, bindings for additional languages, and command-line utilities.
567 This library is not intended to be used as a general-purpose BAM utility - all
568 input and output BAMs must adhere to the PacBio BAM format specification.
569 Non-PacBio BAMs will cause exceptions to be thrown.")
570 (license license:bsd-3)))
571
572 (define-public blasr-libcpp
573 (package
574 (name "blasr-libcpp")
575 (version "5.3.3")
576 (source (origin
577 (method git-fetch)
578 (uri (git-reference
579 (url "https://github.com/PacificBiosciences/blasr_libcpp")
580 (commit version)))
581 (file-name (git-file-name name version))
582 (sha256
583 (base32
584 "0cn5l42zyq67sj0g2imqkhayz2iqvv0a1pgpbmlq0qynjmsrbfd2"))))
585 (build-system meson-build-system)
586 (arguments
587 `(#:phases
588 (modify-phases %standard-phases
589 (add-after 'unpack 'link-with-hdf5
590 (lambda* (#:key inputs #:allow-other-keys)
591 (let ((hdf5 (assoc-ref inputs "hdf5")))
592 (substitute* "meson.build"
593 (("libblasr_deps = \\[" m)
594 (string-append
595 m
596 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
597 cpp.find_library('hdf5_cpp', dirs : '~a'), "
598 hdf5 hdf5)))))
599 #t))
600 (add-after 'unpack 'find-googletest
601 (lambda* (#:key inputs #:allow-other-keys)
602 ;; It doesn't find gtest_main because there's no pkg-config file
603 ;; for it. Find it another way.
604 (substitute* "unittest/meson.build"
605 (("libblasr_gtest_dep = dependency\\('gtest_main'.*")
606 (format #f "cpp = meson.get_compiler('cpp')
607 libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
608 (assoc-ref inputs "googletest"))))
609 #t)))
610 ;; TODO: unittest/libblasr_unittest cannot be linked
611 ;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
612 ;; undefined reference to symbol
613 ;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
614 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
615 ;; error adding symbols: DSO missing from command line
616 #:tests? #f
617 #:configure-flags '("-Dtests=false")))
618 (inputs
619 `(("boost" ,boost)
620 ("hdf5" ,hdf5)
621 ("pbbam" ,pbbam)
622 ("zlib" ,zlib)))
623 (native-inputs
624 `(("googletest" ,googletest)
625 ("pkg-config" ,pkg-config)))
626 (home-page "https://github.com/PacificBiosciences/blasr_libcpp")
627 (synopsis "Library for analyzing PacBio genomic sequences")
628 (description
629 "This package provides three libraries used by applications for analyzing
630 PacBio genomic sequences. This library contains three sub-libraries: pbdata,
631 hdf and alignment.")
632 (license license:bsd-3)))
633
634 (define-public blasr
635 (package
636 (name "blasr")
637 (version "5.3.3")
638 (source (origin
639 (method git-fetch)
640 (uri (git-reference
641 (url "https://github.com/PacificBiosciences/blasr")
642 (commit version)))
643 (file-name (git-file-name name version))
644 (sha256
645 (base32
646 "1skgy2mvz8gsgfh1gc2nfgwvpyzb1hpmp2cf2773h5wsj8nw22kl"))))
647 (build-system meson-build-system)
648 (arguments
649 `(#:phases
650 (modify-phases %standard-phases
651 (add-after 'unpack 'link-with-hdf5
652 (lambda* (#:key inputs #:allow-other-keys)
653 (let ((hdf5 (assoc-ref inputs "hdf5")))
654 (substitute* "meson.build"
655 (("blasr_deps = \\[" m)
656 (string-append
657 m
658 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
659 cpp.find_library('hdf5_cpp', dirs : '~a'), "
660 hdf5 hdf5)))))
661 #t)))
662 ;; Tests require "cram" executable, which is not packaged.
663 #:tests? #f
664 #:configure-flags '("-Dtests=false")))
665 (inputs
666 `(("boost" ,boost)
667 ("blasr-libcpp" ,blasr-libcpp)
668 ("hdf5" ,hdf5)
669 ("pbbam" ,pbbam)
670 ("zlib" ,zlib)))
671 (native-inputs
672 `(("pkg-config" ,pkg-config)))
673 (home-page "https://github.com/PacificBiosciences/blasr")
674 (synopsis "PacBio long read aligner")
675 (description
676 "Blasr is a genomic sequence aligner for processing PacBio long reads.")
677 (license license:bsd-3)))
678
679 (define-public ribotaper
680 (package
681 (name "ribotaper")
682 (version "1.3.1")
683 (source (origin
684 (method url-fetch)
685 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
686 "files/RiboTaper/RiboTaper_Version_"
687 version ".tar.gz"))
688 (sha256
689 (base32
690 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
691 (build-system gnu-build-system)
692 (arguments
693 `(#:phases
694 (modify-phases %standard-phases
695 (add-after 'install 'wrap-executables
696 (lambda* (#:key inputs outputs #:allow-other-keys)
697 (let* ((out (assoc-ref outputs "out")))
698 (for-each
699 (lambda (script)
700 (wrap-program (string-append out "/bin/" script)
701 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
702 '("create_annotations_files.bash"
703 "create_metaplots.bash"
704 "Ribotaper_ORF_find.sh"
705 "Ribotaper.sh")))
706 #t)))))
707 (inputs
708 `(("bedtools" ,bedtools-2.18)
709 ("samtools" ,samtools-0.1)
710 ("r-minimal" ,r-minimal)
711 ("r-foreach" ,r-foreach)
712 ("r-xnomial" ,r-xnomial)
713 ("r-domc" ,r-domc)
714 ("r-multitaper" ,r-multitaper)
715 ("r-seqinr" ,r-seqinr)))
716 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
717 (synopsis "Define translated ORFs using ribosome profiling data")
718 (description
719 "Ribotaper is a method for defining translated @dfn{open reading
720 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
721 provides the Ribotaper pipeline.")
722 (license license:gpl3+)))
723
724 (define-public ribodiff
725 (package
726 (name "ribodiff")
727 (version "0.2.2")
728 (source
729 (origin
730 (method git-fetch)
731 (uri (git-reference
732 (url "https://github.com/ratschlab/RiboDiff")
733 (commit (string-append "v" version))))
734 (file-name (git-file-name name version))
735 (sha256
736 (base32
737 "0x75nlp7qnmm64jasbi6l21f2cy99r2cjyl6b4hr8zf2bq22drnz"))))
738 (build-system python-build-system)
739 (arguments
740 `(#:python ,python-2
741 #:phases
742 (modify-phases %standard-phases
743 ;; Generate an installable executable script wrapper.
744 (add-after 'unpack 'patch-setup.py
745 (lambda _
746 (substitute* "setup.py"
747 (("^(.*)packages=.*" line prefix)
748 (string-append line "\n"
749 prefix "scripts=['scripts/TE.py'],\n")))
750 #t)))))
751 (inputs
752 `(("python-numpy" ,python2-numpy)
753 ("python-matplotlib" ,python2-matplotlib)
754 ("python-scipy" ,python2-scipy)
755 ("python-statsmodels" ,python2-statsmodels)))
756 (native-inputs
757 `(("python-mock" ,python2-mock)
758 ("python-nose" ,python2-nose)))
759 (home-page "https://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
760 (synopsis "Detect translation efficiency changes from ribosome footprints")
761 (description "RiboDiff is a statistical tool that detects the protein
762 translational efficiency change from Ribo-Seq (ribosome footprinting) and
763 RNA-Seq data. It uses a generalized linear model to detect genes showing
764 difference in translational profile taking mRNA abundance into account. It
765 facilitates us to decipher the translational regulation that behave
766 independently with transcriptional regulation.")
767 (license license:gpl3+)))
768
769 (define-public bioawk
770 (package
771 (name "bioawk")
772 (version "1.0")
773 (source (origin
774 (method git-fetch)
775 (uri (git-reference
776 (url "https://github.com/lh3/bioawk")
777 (commit (string-append "v" version))))
778 (file-name (git-file-name name version))
779 (sha256
780 (base32
781 "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
782 (build-system gnu-build-system)
783 (inputs
784 `(("zlib" ,zlib)))
785 (native-inputs
786 `(("bison" ,bison)))
787 (arguments
788 `(#:tests? #f ; There are no tests to run.
789 ;; Bison must generate files, before other targets can build.
790 #:parallel-build? #f
791 #:phases
792 (modify-phases %standard-phases
793 (delete 'configure) ; There is no configure phase.
794 (replace 'install
795 (lambda* (#:key outputs #:allow-other-keys)
796 (let* ((out (assoc-ref outputs "out"))
797 (bin (string-append out "/bin"))
798 (man (string-append out "/share/man/man1")))
799 (mkdir-p man)
800 (copy-file "awk.1" (string-append man "/bioawk.1"))
801 (install-file "bioawk" bin))
802 #t)))))
803 (home-page "https://github.com/lh3/bioawk")
804 (synopsis "AWK with bioinformatics extensions")
805 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
806 support of several common biological data formats, including optionally gzip'ed
807 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
808 also adds a few built-in functions and a command line option to use TAB as the
809 input/output delimiter. When the new functionality is not used, bioawk is
810 intended to behave exactly the same as the original BWK awk.")
811 (license license:x11)))
812
813 (define-public python-pybedtools
814 (package
815 (name "python-pybedtools")
816 (version "0.8.1")
817 (source (origin
818 (method url-fetch)
819 (uri (pypi-uri "pybedtools" version))
820 (sha256
821 (base32
822 "14w5i40gi25clrr7h4wa2pcpnyipya8hrqi7nq77553zc5wf0df0"))))
823 (build-system python-build-system)
824 (arguments
825 `(#:modules ((ice-9 ftw)
826 (srfi srfi-1)
827 (srfi srfi-26)
828 (guix build utils)
829 (guix build python-build-system))
830 ;; See https://github.com/daler/pybedtools/issues/192
831 #:phases
832 (modify-phases %standard-phases
833 (add-after 'unpack 'disable-broken-tests
834 (lambda _
835 (substitute* "pybedtools/test/test_scripts.py"
836 ;; This test freezes.
837 (("def test_intron_exon_reads")
838 "def _do_not_test_intron_exon_reads")
839 ;; This test fails in the Python 2 build.
840 (("def test_venn_mpl")
841 "def _do_not_test_venn_mpl"))
842 (substitute* "pybedtools/test/test_helpers.py"
843 ;; Requires internet access.
844 (("def test_chromsizes")
845 "def _do_not_test_chromsizes")
846 ;; Broken as a result of the workaround used in the check phase
847 ;; (see: https://github.com/daler/pybedtools/issues/192).
848 (("def test_getting_example_beds")
849 "def _do_not_test_getting_example_beds"))
850 ;; This issue still occurs on python2
851 (substitute* "pybedtools/test/test_issues.py"
852 (("def test_issue_303")
853 "def _test_issue_303"))
854 #t))
855 ;; TODO: Remove phase after it's part of PYTHON-BUILD-SYSTEM.
856 ;; build system.
857 ;; Force the Cythonization of C++ files to guard against compilation
858 ;; problems.
859 (add-after 'unpack 'remove-cython-generated-files
860 (lambda _
861 (let ((cython-sources (map (cut string-drop-right <> 4)
862 (find-files "." "\\.pyx$")))
863 (c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
864 (define (strip-extension filename)
865 (string-take filename (string-index-right filename #\.)))
866 (define (cythonized? c/c++-file)
867 (member (strip-extension c/c++-file) cython-sources))
868 (for-each delete-file (filter cythonized? c/c++-files))
869 #t)))
870 (add-after 'remove-cython-generated-files 'generate-cython-extensions
871 (lambda _
872 (invoke "python" "setup.py" "cythonize")))
873 (replace 'check
874 (lambda _
875 (let* ((cwd (getcwd))
876 (build-root-directory (string-append cwd "/build/"))
877 (build (string-append
878 build-root-directory
879 (find (cut string-prefix? "lib" <>)
880 (scandir (string-append
881 build-root-directory)))))
882 (scripts (string-append
883 build-root-directory
884 (find (cut string-prefix? "scripts" <>)
885 (scandir build-root-directory)))))
886 (setenv "PYTHONPATH"
887 (string-append build ":" (getenv "PYTHONPATH")))
888 ;; Executable scripts such as 'intron_exon_reads.py' must be
889 ;; available in the PATH.
890 (setenv "PATH"
891 (string-append scripts ":" (getenv "PATH"))))
892 ;; The tests need to be run from elsewhere...
893 (mkdir-p "/tmp/test")
894 (copy-recursively "pybedtools/test" "/tmp/test")
895 (with-directory-excursion "/tmp/test"
896 (invoke "pytest" "-v" "--doctest-modules")))))))
897 (propagated-inputs
898 `(("bedtools" ,bedtools)
899 ("samtools" ,samtools)
900 ("python-matplotlib" ,python-matplotlib)
901 ("python-pysam" ,python-pysam)
902 ("python-pyyaml" ,python-pyyaml)))
903 (native-inputs
904 `(("python-numpy" ,python-numpy)
905 ("python-pandas" ,python-pandas)
906 ("python-cython" ,python-cython)
907 ("kentutils" ,kentutils) ; for bedGraphToBigWig
908 ("python-six" ,python-six)
909 ;; For the test suite.
910 ("python-pytest" ,python-pytest)
911 ("python-psutil" ,python-psutil)))
912 (home-page "https://pythonhosted.org/pybedtools/")
913 (synopsis "Python wrapper for BEDtools programs")
914 (description
915 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
916 which are widely used for genomic interval manipulation or \"genome algebra\".
917 pybedtools extends BEDTools by offering feature-level manipulations from with
918 Python.")
919 (license license:gpl2+)))
920
921 (define-public python2-pybedtools
922 (let ((pybedtools (package-with-python2 python-pybedtools)))
923 (package
924 (inherit pybedtools)
925 (native-inputs
926 `(("python2-pathlib" ,python2-pathlib)
927 ,@(package-native-inputs pybedtools))))))
928
929 (define-public python-biom-format
930 (package
931 (name "python-biom-format")
932 (version "2.1.7")
933 (source
934 (origin
935 (method git-fetch)
936 ;; Use GitHub as source because PyPI distribution does not contain
937 ;; test data: https://github.com/biocore/biom-format/issues/693
938 (uri (git-reference
939 (url "https://github.com/biocore/biom-format")
940 (commit version)))
941 (file-name (git-file-name name version))
942 (sha256
943 (base32
944 "1rna16lyk5aqhnv0dp77wwaplias93f1vw28ad3jmyw6hwkai05v"))
945 (modules '((guix build utils)))
946 (snippet '(begin
947 ;; Delete generated C files.
948 (for-each delete-file (find-files "." "\\.c"))
949 #t))))
950 (build-system python-build-system)
951 (arguments
952 `(#:phases
953 (modify-phases %standard-phases
954 (add-after 'unpack 'use-cython
955 (lambda _ (setenv "USE_CYTHON" "1") #t))
956 (add-after 'unpack 'disable-broken-tests
957 (lambda _
958 (substitute* "biom/tests/test_cli/test_validate_table.py"
959 (("^(.+)def test_invalid_hdf5" m indent)
960 (string-append indent
961 "@npt.dec.skipif(True, msg='Guix')\n"
962 m)))
963 (substitute* "biom/tests/test_table.py"
964 (("^(.+)def test_from_hdf5_issue_731" m indent)
965 (string-append indent
966 "@npt.dec.skipif(True, msg='Guix')\n"
967 m)))
968 #t))
969 (add-before 'reset-gzip-timestamps 'make-files-writable
970 (lambda* (#:key outputs #:allow-other-keys)
971 (let ((out (assoc-ref outputs "out")))
972 (for-each (lambda (file) (chmod file #o644))
973 (find-files out "\\.gz"))
974 #t))))))
975 (propagated-inputs
976 `(("python-numpy" ,python-numpy)
977 ("python-scipy" ,python-scipy)
978 ("python-flake8" ,python-flake8)
979 ("python-future" ,python-future)
980 ("python-click" ,python-click)
981 ("python-h5py" ,python-h5py)
982 ;; FIXME: Upgrade to pandas 1.0 when
983 ;; https://github.com/biocore/biom-format/issues/837 is resolved.
984 ("python-pandas" ,python-pandas-0.25)))
985 (native-inputs
986 `(("python-cython" ,python-cython)
987 ("python-pytest" ,python-pytest)
988 ("python-pytest-cov" ,python-pytest-cov)
989 ("python-nose" ,python-nose)))
990 (home-page "http://www.biom-format.org")
991 (synopsis "Biological Observation Matrix (BIOM) format utilities")
992 (description
993 "The BIOM file format is designed to be a general-use format for
994 representing counts of observations e.g. operational taxonomic units, KEGG
995 orthology groups or lipid types, in one or more biological samples
996 e.g. microbiome samples, genomes, metagenomes.")
997 (license license:bsd-3)
998 (properties `((python2-variant . ,(delay python2-biom-format))))))
999
1000 (define-public python2-biom-format
1001 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
1002 (package
1003 (inherit base)
1004 (arguments
1005 (substitute-keyword-arguments (package-arguments base)
1006 ((#:phases phases)
1007 `(modify-phases ,phases
1008 ;; Do not require the unmaintained pyqi library.
1009 (add-after 'unpack 'remove-pyqi
1010 (lambda _
1011 (substitute* "setup.py"
1012 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
1013 #t)))))))))
1014
1015 (define-public python-pairtools
1016 (package
1017 (name "python-pairtools")
1018 (version "0.3.0")
1019 (source (origin
1020 (method git-fetch)
1021 (uri (git-reference
1022 (url "https://github.com/mirnylab/pairtools")
1023 (commit (string-append "v" version))))
1024 (file-name (git-file-name name version))
1025 (sha256
1026 (base32
1027 "0gr8y13q7sd6yai6df4aavl2470n1f9s3cib6r473z4hr8hcbwmc"))))
1028 (build-system python-build-system)
1029 (arguments
1030 `(#:phases
1031 (modify-phases %standard-phases
1032 (add-after 'unpack 'fix-references
1033 (lambda _
1034 (substitute* '("pairtools/pairtools_merge.py"
1035 "pairtools/pairtools_sort.py")
1036 (("/bin/bash") (which "bash")))
1037 #t))
1038 (replace 'check
1039 (lambda* (#:key inputs outputs #:allow-other-keys)
1040 (add-installed-pythonpath inputs outputs)
1041 (with-directory-excursion "/tmp"
1042 (invoke "pytest" "-v")))))))
1043 (native-inputs
1044 `(("python-cython" ,python-cython)
1045 ("python-nose" ,python-nose)
1046 ("python-pytest" ,python-pytest)))
1047 (inputs
1048 `(("python" ,python-wrapper)))
1049 (propagated-inputs
1050 `(("htslib" ,htslib) ; for bgzip, looked up in PATH
1051 ("samtools" ,samtools) ; looked up in PATH
1052 ("lz4" ,lz4) ; for lz4c
1053 ("python-click" ,python-click)
1054 ("python-numpy" ,python-numpy)))
1055 (home-page "https://github.com/mirnylab/pairtools")
1056 (synopsis "Process mapped Hi-C data")
1057 (description "Pairtools is a simple and fast command-line framework to
1058 process sequencing data from a Hi-C experiment. Process pair-end sequence
1059 alignments and perform the following operations:
1060
1061 @itemize
1062 @item detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end
1063 sequences of Hi-C DNA molecules
1064 @item sort @code{.pairs} files for downstream analyses
1065 @item detect, tag and remove PCR/optical duplicates
1066 @item generate extensive statistics of Hi-C datasets
1067 @item select Hi-C pairs given flexibly defined criteria
1068 @item restore @code{.sam} alignments from Hi-C pairs.
1069 @end itemize
1070 ")
1071 (license license:expat)))
1072
1073 (define-public bioperl-minimal
1074 (let* ((inputs `(("perl-module-build" ,perl-module-build)
1075 ("perl-data-stag" ,perl-data-stag)
1076 ("perl-libwww" ,perl-libwww)
1077 ("perl-uri" ,perl-uri)))
1078 (transitive-inputs
1079 (map (compose package-name cadr)
1080 (delete-duplicates
1081 (concatenate
1082 (map (compose package-transitive-target-inputs cadr) inputs))))))
1083 (package
1084 (name "bioperl-minimal")
1085 (version "1.7.0")
1086 (source
1087 (origin
1088 (method git-fetch)
1089 (uri (git-reference
1090 (url "https://github.com/bioperl/bioperl-live")
1091 (commit (string-append "release-"
1092 (string-map (lambda (c)
1093 (if (char=? c #\.)
1094 #\- c)) version)))))
1095 (file-name (git-file-name name version))
1096 (sha256
1097 (base32
1098 "0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
1099 (build-system perl-build-system)
1100 (arguments
1101 `(#:phases
1102 (modify-phases %standard-phases
1103 (add-after
1104 'install 'wrap-programs
1105 (lambda* (#:key outputs #:allow-other-keys)
1106 ;; Make sure all executables in "bin" find the required Perl
1107 ;; modules at runtime. As the PERL5LIB variable contains also
1108 ;; the paths of native inputs, we pick the transitive target
1109 ;; inputs from %build-inputs.
1110 (let* ((out (assoc-ref outputs "out"))
1111 (bin (string-append out "/bin/"))
1112 (path (string-join
1113 (cons (string-append out "/lib/perl5/site_perl")
1114 (map (lambda (name)
1115 (assoc-ref %build-inputs name))
1116 ',transitive-inputs))
1117 ":")))
1118 (for-each (lambda (file)
1119 (wrap-program file
1120 `("PERL5LIB" ":" prefix (,path))))
1121 (find-files bin "\\.pl$"))
1122 #t))))))
1123 (inputs inputs)
1124 (native-inputs
1125 `(("perl-test-most" ,perl-test-most)))
1126 (home-page "https://metacpan.org/release/BioPerl")
1127 (synopsis "Bioinformatics toolkit")
1128 (description
1129 "BioPerl is the product of a community effort to produce Perl code which
1130 is useful in biology. Examples include Sequence objects, Alignment objects
1131 and database searching objects. These objects not only do what they are
1132 advertised to do in the documentation, but they also interact - Alignment
1133 objects are made from the Sequence objects, Sequence objects have access to
1134 Annotation and SeqFeature objects and databases, Blast objects can be
1135 converted to Alignment objects, and so on. This means that the objects
1136 provide a coordinated and extensible framework to do computational biology.")
1137 (license license:perl-license))))
1138
1139 (define-public python-biopython
1140 (package
1141 (name "python-biopython")
1142 (version "1.70")
1143 (source (origin
1144 (method url-fetch)
1145 ;; use PyPi rather than biopython.org to ease updating
1146 (uri (pypi-uri "biopython" version))
1147 (sha256
1148 (base32
1149 "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
1150 (build-system python-build-system)
1151 (arguments
1152 `(#:phases
1153 (modify-phases %standard-phases
1154 (add-before 'check 'set-home
1155 ;; Some tests require a home directory to be set.
1156 (lambda _ (setenv "HOME" "/tmp") #t)))))
1157 (propagated-inputs
1158 `(("python-numpy" ,python-numpy)))
1159 (home-page "https://biopython.org/")
1160 (synopsis "Tools for biological computation in Python")
1161 (description
1162 "Biopython is a set of tools for biological computation including parsers
1163 for bioinformatics files into Python data structures; interfaces to common
1164 bioinformatics programs; a standard sequence class and tools for performing
1165 common operations on them; code to perform data classification; code for
1166 dealing with alignments; code making it easy to split up parallelizable tasks
1167 into separate processes; and more.")
1168 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
1169
1170 (define-public python2-biopython
1171 (package-with-python2 python-biopython))
1172
1173 (define-public python-fastalite
1174 (package
1175 (name "python-fastalite")
1176 (version "0.3")
1177 (source
1178 (origin
1179 (method url-fetch)
1180 (uri (pypi-uri "fastalite" version))
1181 (sha256
1182 (base32
1183 "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
1184 (build-system python-build-system)
1185 (arguments
1186 `(#:tests? #f)) ; Test data is not distributed.
1187 (home-page "https://github.com/nhoffman/fastalite")
1188 (synopsis "Simplest possible FASTA parser")
1189 (description "This library implements a FASTA and a FASTQ parser without
1190 relying on a complex dependency tree.")
1191 (license license:expat)))
1192
1193 (define-public python2-fastalite
1194 (package-with-python2 python-fastalite))
1195
1196 (define-public bpp-core
1197 ;; The last release was in 2014 and the recommended way to install from source
1198 ;; is to clone the git repository, so we do this.
1199 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1200 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
1201 (package
1202 (name "bpp-core")
1203 (version (string-append "2.2.0-1." (string-take commit 7)))
1204 (source (origin
1205 (method git-fetch)
1206 (uri (git-reference
1207 (url "http://biopp.univ-montp2.fr/git/bpp-core")
1208 (commit commit)))
1209 (file-name (string-append name "-" version "-checkout"))
1210 (sha256
1211 (base32
1212 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
1213 (build-system cmake-build-system)
1214 (arguments
1215 `(#:parallel-build? #f))
1216 (home-page "http://biopp.univ-montp2.fr")
1217 (synopsis "C++ libraries for Bioinformatics")
1218 (description
1219 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1220 analysis, phylogenetics, molecular evolution and population genetics. It is
1221 Object Oriented and is designed to be both easy to use and computer efficient.
1222 Bio++ intends to help programmers to write computer expensive programs, by
1223 providing them a set of re-usable tools.")
1224 (license license:cecill-c))))
1225
1226 (define-public bpp-phyl
1227 ;; The last release was in 2014 and the recommended way to install from source
1228 ;; is to clone the git repository, so we do this.
1229 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1230 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
1231 (package
1232 (name "bpp-phyl")
1233 (version (string-append "2.2.0-1." (string-take commit 7)))
1234 (source (origin
1235 (method git-fetch)
1236 (uri (git-reference
1237 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
1238 (commit commit)))
1239 (file-name (string-append name "-" version "-checkout"))
1240 (sha256
1241 (base32
1242 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
1243 (build-system cmake-build-system)
1244 (arguments
1245 `(#:parallel-build? #f
1246 ;; If out-of-source, test data is not copied into the build directory
1247 ;; so the tests fail.
1248 #:out-of-source? #f))
1249 (inputs
1250 `(("bpp-core" ,bpp-core)
1251 ("bpp-seq" ,bpp-seq)))
1252 (home-page "http://biopp.univ-montp2.fr")
1253 (synopsis "Bio++ phylogenetic Library")
1254 (description
1255 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1256 analysis, phylogenetics, molecular evolution and population genetics. This
1257 library provides phylogenetics-related modules.")
1258 (license license:cecill-c))))
1259
1260 (define-public bpp-popgen
1261 ;; The last release was in 2014 and the recommended way to install from source
1262 ;; is to clone the git repository, so we do this.
1263 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1264 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
1265 (package
1266 (name "bpp-popgen")
1267 (version (string-append "2.2.0-1." (string-take commit 7)))
1268 (source (origin
1269 (method git-fetch)
1270 (uri (git-reference
1271 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
1272 (commit commit)))
1273 (file-name (string-append name "-" version "-checkout"))
1274 (sha256
1275 (base32
1276 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
1277 (build-system cmake-build-system)
1278 (arguments
1279 `(#:parallel-build? #f
1280 #:tests? #f)) ; There are no tests.
1281 (inputs
1282 `(("bpp-core" ,bpp-core)
1283 ("bpp-seq" ,bpp-seq)))
1284 (home-page "http://biopp.univ-montp2.fr")
1285 (synopsis "Bio++ population genetics library")
1286 (description
1287 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1288 analysis, phylogenetics, molecular evolution and population genetics. This
1289 library provides population genetics-related modules.")
1290 (license license:cecill-c))))
1291
1292 (define-public bpp-seq
1293 ;; The last release was in 2014 and the recommended way to install from source
1294 ;; is to clone the git repository, so we do this.
1295 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1296 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
1297 (package
1298 (name "bpp-seq")
1299 (version (string-append "2.2.0-1." (string-take commit 7)))
1300 (source (origin
1301 (method git-fetch)
1302 (uri (git-reference
1303 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
1304 (commit commit)))
1305 (file-name (string-append name "-" version "-checkout"))
1306 (sha256
1307 (base32
1308 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
1309 (build-system cmake-build-system)
1310 (arguments
1311 `(#:parallel-build? #f
1312 ;; If out-of-source, test data is not copied into the build directory
1313 ;; so the tests fail.
1314 #:out-of-source? #f))
1315 (inputs
1316 `(("bpp-core" ,bpp-core)))
1317 (home-page "http://biopp.univ-montp2.fr")
1318 (synopsis "Bio++ sequence library")
1319 (description
1320 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1321 analysis, phylogenetics, molecular evolution and population genetics. This
1322 library provides sequence-related modules.")
1323 (license license:cecill-c))))
1324
1325 (define-public bppsuite
1326 ;; The last release was in 2014 and the recommended way to install from source
1327 ;; is to clone the git repository, so we do this.
1328 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1329 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
1330 (package
1331 (name "bppsuite")
1332 (version (string-append "2.2.0-1." (string-take commit 7)))
1333 (source (origin
1334 (method git-fetch)
1335 (uri (git-reference
1336 (url "http://biopp.univ-montp2.fr/git/bppsuite")
1337 (commit commit)))
1338 (file-name (string-append name "-" version "-checkout"))
1339 (sha256
1340 (base32
1341 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
1342 (build-system cmake-build-system)
1343 (arguments
1344 `(#:parallel-build? #f
1345 #:tests? #f)) ; There are no tests.
1346 (native-inputs
1347 `(("groff" ,groff)
1348 ("man-db" ,man-db)
1349 ("texinfo" ,texinfo)))
1350 (inputs
1351 `(("bpp-core" ,bpp-core)
1352 ("bpp-seq" ,bpp-seq)
1353 ("bpp-phyl" ,bpp-phyl)
1354 ("bpp-phyl" ,bpp-popgen)))
1355 (home-page "http://biopp.univ-montp2.fr")
1356 (synopsis "Bioinformatics tools written with the Bio++ libraries")
1357 (description
1358 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1359 analysis, phylogenetics, molecular evolution and population genetics. This
1360 package provides command line tools using the Bio++ library.")
1361 (license license:cecill-c))))
1362
1363 (define-public blast+
1364 (package
1365 (name "blast+")
1366 (version "2.11.0")
1367 (source (origin
1368 (method url-fetch)
1369 (uri (string-append
1370 "https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
1371 version "/ncbi-blast-" version "+-src.tar.gz"))
1372 (sha256
1373 (base32
1374 "0m0r9vkw631ky1za1wilsfk9k9spwqh22nkrb9a57rbwmrc1i3nq"))
1375 (modules '((guix build utils)))
1376 (snippet
1377 '(begin
1378 ;; Remove bundled bzip2, zlib and pcre.
1379 (delete-file-recursively "c++/src/util/compress/bzip2")
1380 (delete-file-recursively "c++/src/util/compress/zlib")
1381 (delete-file-recursively "c++/src/util/regexp")
1382 (substitute* "c++/src/util/compress/Makefile.in"
1383 (("bzip2 zlib api") "api"))
1384 ;; Remove useless msbuild directory
1385 (delete-file-recursively
1386 "c++/src/build-system/project_tree_builder/msbuild")
1387 #t))))
1388 (build-system gnu-build-system)
1389 (arguments
1390 `(;; There are two(!) tests for this massive library, and both fail with
1391 ;; "unparsable timing stats".
1392 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
1393 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
1394 #:tests? #f
1395 #:out-of-source? #t
1396 #:parallel-build? #f ; not supported
1397 #:phases
1398 (modify-phases %standard-phases
1399 (add-before 'configure 'set-HOME
1400 ;; $HOME needs to be set at some point during the configure phase
1401 (lambda _ (setenv "HOME" "/tmp") #t))
1402 (add-after 'unpack 'enter-dir
1403 (lambda _ (chdir "c++") #t))
1404 (add-after 'enter-dir 'fix-build-system
1405 (lambda _
1406 (define (which* cmd)
1407 (cond ((string=? cmd "date")
1408 ;; make call to "date" deterministic
1409 "date -d @0")
1410 ((which cmd)
1411 => identity)
1412 (else
1413 (format (current-error-port)
1414 "WARNING: Unable to find absolute path for ~s~%"
1415 cmd)
1416 #f)))
1417
1418 ;; Rewrite hardcoded paths to various tools
1419 (substitute* (append '("src/build-system/configure.ac"
1420 "src/build-system/configure"
1421 "src/build-system/helpers/run_with_lock.c"
1422 "scripts/common/impl/if_diff.sh"
1423 "scripts/common/impl/run_with_lock.sh"
1424 "src/build-system/Makefile.configurables.real"
1425 "src/build-system/Makefile.in.top"
1426 "src/build-system/Makefile.meta.gmake=no"
1427 "src/build-system/Makefile.meta.in"
1428 "src/build-system/Makefile.meta_l"
1429 "src/build-system/Makefile.meta_p"
1430 "src/build-system/Makefile.meta_r"
1431 "src/build-system/Makefile.mk.in"
1432 "src/build-system/Makefile.requirements"
1433 "src/build-system/Makefile.rules_with_autodep.in")
1434 (find-files "scripts/common/check" "\\.sh$"))
1435 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1436 (or (which* cmd) all)))
1437
1438 (substitute* (find-files "src/build-system" "^config.*")
1439 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1440 (("^PATH=.*") ""))
1441
1442 ;; rewrite "/var/tmp" in check script
1443 (substitute* "scripts/common/check/check_make_unix.sh"
1444 (("/var/tmp") "/tmp"))
1445
1446 ;; do not reset PATH
1447 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1448 (("^ *PATH=.*") "")
1449 (("action=/bin/") "action=")
1450 (("export PATH") ":"))
1451 #t))
1452 (replace 'configure
1453 (lambda* (#:key inputs outputs #:allow-other-keys)
1454 (let ((out (assoc-ref outputs "out"))
1455 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1456 (include (string-append (assoc-ref outputs "include")
1457 "/include/ncbi-tools++")))
1458 ;; The 'configure' script doesn't recognize things like
1459 ;; '--enable-fast-install'.
1460 (invoke "./configure.orig"
1461 (string-append "--with-build-root=" (getcwd) "/build")
1462 (string-append "--prefix=" out)
1463 (string-append "--libdir=" lib)
1464 (string-append "--includedir=" include)
1465 (string-append "--with-bz2="
1466 (assoc-ref inputs "bzip2"))
1467 (string-append "--with-z="
1468 (assoc-ref inputs "zlib"))
1469 (string-append "--with-pcre="
1470 (assoc-ref inputs "pcre"))
1471 ;; Each library is built twice by default, once
1472 ;; with "-static" in its name, and again
1473 ;; without.
1474 "--without-static"
1475 "--with-dll")
1476 #t))))))
1477 (outputs '("out" ; 21 MB
1478 "lib" ; 226 MB
1479 "include")) ; 33 MB
1480 (inputs
1481 `(("bzip2" ,bzip2)
1482 ("lmdb" ,lmdb)
1483 ("zlib" ,zlib)
1484 ("pcre" ,pcre)
1485 ("perl" ,perl)
1486 ("python" ,python-wrapper)))
1487 (native-inputs
1488 `(("cpio" ,cpio)))
1489 (home-page "https://blast.ncbi.nlm.nih.gov")
1490 (synopsis "Basic local alignment search tool")
1491 (description
1492 "BLAST is a popular method of performing a DNA or protein sequence
1493 similarity search, using heuristics to produce results quickly. It also
1494 calculates an “expect value” that estimates how many matches would have
1495 occurred at a given score by chance, which can aid a user in judging how much
1496 confidence to have in an alignment.")
1497 ;; Most of the sources are in the public domain, with the following
1498 ;; exceptions:
1499 ;; * Expat:
1500 ;; * ./c++/include/util/bitset/
1501 ;; * ./c++/src/html/ncbi_menu*.js
1502 ;; * Boost license:
1503 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1504 ;; * LGPL 2+:
1505 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1506 ;; * ASL 2.0:
1507 ;; * ./c++/src/corelib/teamcity_*
1508 (license (list license:public-domain
1509 license:expat
1510 license:boost1.0
1511 license:lgpl2.0+
1512 license:asl2.0))))
1513
1514 (define-public bless
1515 (package
1516 (name "bless")
1517 (version "1p02")
1518 (source (origin
1519 (method url-fetch)
1520 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1521 version ".tgz"))
1522 (sha256
1523 (base32
1524 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1525 (modules '((guix build utils)))
1526 (snippet
1527 `(begin
1528 ;; Remove bundled boost, pigz, zlib, and .git directory
1529 ;; FIXME: also remove bundled sources for murmurhash3 and
1530 ;; kmc once packaged.
1531 (delete-file-recursively "boost")
1532 (delete-file-recursively "pigz")
1533 (delete-file-recursively "google-sparsehash")
1534 (delete-file-recursively "zlib")
1535 (delete-file-recursively ".git")
1536 #t))))
1537 (build-system gnu-build-system)
1538 (arguments
1539 '(#:tests? #f ;no "check" target
1540 #:make-flags
1541 (list (string-append "ZLIB="
1542 (assoc-ref %build-inputs "zlib:static")
1543 "/lib/libz.a")
1544 (string-append "LDFLAGS="
1545 (string-join '("-lboost_filesystem"
1546 "-lboost_system"
1547 "-lboost_iostreams"
1548 "-lz"
1549 "-fopenmp"))))
1550 #:phases
1551 (modify-phases %standard-phases
1552 (add-after 'unpack 'do-not-build-bundled-pigz
1553 (lambda* (#:key inputs outputs #:allow-other-keys)
1554 (substitute* "Makefile"
1555 (("cd pigz/pigz-2.3.3; make") ""))
1556 #t))
1557 (add-after 'unpack 'patch-paths-to-executables
1558 (lambda* (#:key inputs outputs #:allow-other-keys)
1559 (substitute* "parse_args.cpp"
1560 (("kmc_binary = .*")
1561 (string-append "kmc_binary = \""
1562 (assoc-ref outputs "out")
1563 "/bin/kmc\";"))
1564 (("pigz_binary = .*")
1565 (string-append "pigz_binary = \""
1566 (assoc-ref inputs "pigz")
1567 "/bin/pigz\";")))
1568 #t))
1569 (replace 'install
1570 (lambda* (#:key outputs #:allow-other-keys)
1571 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1572 (for-each (lambda (file)
1573 (install-file file bin))
1574 '("bless" "kmc/bin/kmc"))
1575 #t)))
1576 (delete 'configure))))
1577 (native-inputs
1578 `(("perl" ,perl)))
1579 (inputs
1580 `(("openmpi" ,openmpi)
1581 ("boost" ,boost)
1582 ("sparsehash" ,sparsehash)
1583 ("pigz" ,pigz)
1584 ("zlib:static" ,zlib "static")
1585 ("zlib" ,zlib)))
1586 (supported-systems '("x86_64-linux"))
1587 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1588 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1589 (description
1590 "@dfn{Bloom-filter-based error correction solution for high-throughput
1591 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1592 correction tool for genomic reads produced by @dfn{Next-generation
1593 sequencing} (NGS). BLESS produces accurate correction results with much less
1594 memory compared with previous solutions and is also able to tolerate a higher
1595 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1596 errors at the end of reads.")
1597 (license license:gpl3+)))
1598
1599 (define-public bowtie
1600 (package
1601 (name "bowtie")
1602 (version "2.3.4.3")
1603 (source (origin
1604 (method git-fetch)
1605 (uri (git-reference
1606 (url "https://github.com/BenLangmead/bowtie2")
1607 (commit (string-append "v" version))))
1608 (file-name (git-file-name name version))
1609 (sha256
1610 (base32
1611 "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
1612 (modules '((guix build utils)))
1613 (snippet
1614 '(begin
1615 (substitute* "Makefile"
1616 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1617 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1618 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1619 #t))))
1620 (build-system gnu-build-system)
1621 (arguments
1622 '(#:make-flags
1623 (list "allall"
1624 "WITH_TBB=1"
1625 (string-append "prefix=" (assoc-ref %outputs "out")))
1626 #:phases
1627 (modify-phases %standard-phases
1628 (delete 'configure)
1629 (replace 'check
1630 (lambda _
1631 (invoke "perl"
1632 "scripts/test/simple_tests.pl"
1633 "--bowtie2=./bowtie2"
1634 "--bowtie2-build=./bowtie2-build")
1635 #t)))))
1636 (inputs
1637 `(("tbb" ,tbb)
1638 ("zlib" ,zlib)
1639 ("python" ,python-wrapper)))
1640 (native-inputs
1641 `(("perl" ,perl)
1642 ("perl-clone" ,perl-clone)
1643 ("perl-test-deep" ,perl-test-deep)
1644 ("perl-test-simple" ,perl-test-simple)))
1645 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1646 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1647 (description
1648 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1649 reads to long reference sequences. It is particularly good at aligning reads
1650 of about 50 up to 100s or 1,000s of characters, and particularly good at
1651 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1652 genome with an FM Index to keep its memory footprint small: for the human
1653 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1654 gapped, local, and paired-end alignment modes.")
1655 (supported-systems '("x86_64-linux"))
1656 (license license:gpl3+)))
1657
1658 (define-public bowtie1
1659 (package
1660 (name "bowtie1")
1661 (version "1.3.0")
1662 (source (origin
1663 (method url-fetch)
1664 (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
1665 version "/bowtie-" version "-src.zip"))
1666 (sha256
1667 (base32
1668 "11dbihdnrizc6qhx9xsw77w3q5ssx642alaqzvhxx32ak9glvq04"))
1669 (modules '((guix build utils)))
1670 (snippet
1671 '(substitute* "Makefile"
1672 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1673 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1674 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
1675 (build-system gnu-build-system)
1676 (arguments
1677 '(#:tests? #f ; no "check" target
1678 #:make-flags
1679 (list "CC=gcc" "all"
1680 (string-append "prefix=" (assoc-ref %outputs "out")))
1681 #:phases
1682 (modify-phases %standard-phases
1683 (delete 'configure))))
1684 (inputs
1685 `(("python-wrapper" ,python-wrapper)
1686 ("tbb" ,tbb)
1687 ("zlib" ,zlib)))
1688 (supported-systems '("x86_64-linux"))
1689 (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
1690 (synopsis "Fast aligner for short nucleotide sequence reads")
1691 (description
1692 "Bowtie is a fast, memory-efficient short read aligner. It aligns short
1693 DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
1694 reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to
1695 keep its memory footprint small: typically about 2.2 GB for the human
1696 genome (2.9 GB for paired-end).")
1697 (license license:artistic2.0)))
1698
1699 (define-public tophat
1700 (package
1701 (name "tophat")
1702 (version "2.1.1")
1703 (source (origin
1704 (method url-fetch)
1705 (uri (string-append
1706 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1707 version ".tar.gz"))
1708 (sha256
1709 (base32
1710 "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
1711 (modules '((guix build utils)))
1712 (snippet
1713 '(begin
1714 ;; Remove bundled SeqAn and samtools
1715 (delete-file-recursively "src/SeqAn-1.4.2")
1716 (delete-file-recursively "src/samtools-0.1.18")
1717 #t))))
1718 (build-system gnu-build-system)
1719 (arguments
1720 '(#:parallel-build? #f ; not supported
1721 #:phases
1722 (modify-phases %standard-phases
1723 (add-after 'set-paths 'hide-default-gcc
1724 (lambda* (#:key inputs #:allow-other-keys)
1725 (let ((gcc (assoc-ref inputs "gcc")))
1726 ;; Remove the default GCC from CPLUS_INCLUDE_PATH to prevent
1727 ;; conflicts with the GCC 5 input.
1728 (setenv "CPLUS_INCLUDE_PATH"
1729 (string-join
1730 (delete (string-append gcc "/include/c++")
1731 (string-split (getenv "CPLUS_INCLUDE_PATH") #\:))
1732 ":"))
1733 #t)))
1734 (add-after 'unpack 'use-system-samtools
1735 (lambda* (#:key inputs #:allow-other-keys)
1736 (substitute* "src/Makefile.in"
1737 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1738 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1739 (("SAMPROG = samtools_0\\.1\\.18") "")
1740 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1741 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1742 (substitute* '("src/common.cpp"
1743 "src/tophat.py")
1744 (("samtools_0.1.18") (which "samtools")))
1745 (substitute* '("src/common.h"
1746 "src/bam2fastx.cpp")
1747 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1748 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1749 (substitute* '("src/bwt_map.h"
1750 "src/map2gtf.h"
1751 "src/align_status.h")
1752 (("#include <bam.h>") "#include <samtools/bam.h>")
1753 (("#include <sam.h>") "#include <samtools/sam.h>"))
1754 #t)))))
1755 (native-inputs
1756 `(("gcc@5" ,gcc-5))) ;; doesn't build with later versions
1757 (inputs
1758 `(("boost" ,boost)
1759 ("bowtie" ,bowtie)
1760 ("ncurses" ,ncurses)
1761 ("perl" ,perl)
1762 ("python" ,python-2)
1763 ("samtools" ,samtools-0.1)
1764 ("seqan" ,seqan-1)
1765 ("zlib" ,zlib)))
1766 (home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
1767 (synopsis "Spliced read mapper for RNA-Seq data")
1768 (description
1769 "TopHat is a fast splice junction mapper for nucleotide sequence
1770 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1771 mammalian-sized genomes using the ultra high-throughput short read
1772 aligner Bowtie, and then analyzes the mapping results to identify
1773 splice junctions between exons.")
1774 ;; TopHat is released under the Boost Software License, Version 1.0
1775 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1776 (license license:boost1.0)))
1777
1778 (define-public bwa
1779 (package
1780 (name "bwa")
1781 (version "0.7.17")
1782 (source (origin
1783 (method url-fetch)
1784 (uri (string-append
1785 "https://github.com/lh3/bwa/releases/download/v"
1786 version "/bwa-" version ".tar.bz2"))
1787 (sha256
1788 (base32
1789 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
1790 (build-system gnu-build-system)
1791 (arguments
1792 '(#:tests? #f ;no "check" target
1793 #:phases
1794 (modify-phases %standard-phases
1795 (replace 'install
1796 (lambda* (#:key outputs #:allow-other-keys)
1797 (let* ((out (assoc-ref outputs "out"))
1798 (bin (string-append out "/bin"))
1799 (lib (string-append out "/lib"))
1800 (doc (string-append out "/share/doc/bwa"))
1801 (man (string-append out "/share/man/man1")))
1802 (install-file "bwa" bin)
1803 (install-file "libbwa.a" lib)
1804 (install-file "README.md" doc)
1805 (install-file "bwa.1" man))
1806 #t))
1807 ;; no "configure" script
1808 (delete 'configure))))
1809 (inputs `(("zlib" ,zlib)))
1810 ;; Non-portable SSE instructions are used so building fails on platforms
1811 ;; other than x86_64.
1812 (supported-systems '("x86_64-linux"))
1813 (home-page "http://bio-bwa.sourceforge.net/")
1814 (synopsis "Burrows-Wheeler sequence aligner")
1815 (description
1816 "BWA is a software package for mapping low-divergent sequences against a
1817 large reference genome, such as the human genome. It consists of three
1818 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1819 designed for Illumina sequence reads up to 100bp, while the rest two for
1820 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1821 features such as long-read support and split alignment, but BWA-MEM, which is
1822 the latest, is generally recommended for high-quality queries as it is faster
1823 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
1824 70-100bp Illumina reads.")
1825 (license license:gpl3+)))
1826
1827 (define-public bwa-pssm
1828 (package (inherit bwa)
1829 (name "bwa-pssm")
1830 (version "0.5.11")
1831 (source (origin
1832 (method git-fetch)
1833 (uri (git-reference
1834 (url "https://github.com/pkerpedjiev/bwa-pssm")
1835 (commit version)))
1836 (file-name (git-file-name name version))
1837 (sha256
1838 (base32
1839 "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
1840 (build-system gnu-build-system)
1841 (inputs
1842 `(("gdsl" ,gdsl)
1843 ("zlib" ,zlib)
1844 ("perl" ,perl)))
1845 (home-page "http://bwa-pssm.binf.ku.dk/")
1846 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1847 (description
1848 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1849 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1850 existing aligners it is fast and sensitive. Unlike most other aligners,
1851 however, it is also adaptible in the sense that one can direct the alignment
1852 based on known biases within the data set. It is coded as a modification of
1853 the original BWA alignment program and shares the genome index structure as
1854 well as many of the command line options.")
1855 (license license:gpl3+)))
1856
1857 (define-public bwa-meth
1858 (package
1859 (name "bwa-meth")
1860 (version "0.2.2")
1861 (source (origin
1862 (method git-fetch)
1863 (uri (git-reference
1864 (url "https://github.com/brentp/bwa-meth")
1865 (commit (string-append "v" version))))
1866 (file-name (git-file-name name version))
1867 (sha256
1868 (base32
1869 "17j31i7zws5j7mhsq9x3qgkxly6mlmrgwhfq0qbflgxrmx04yaiz"))))
1870 (build-system python-build-system)
1871 (arguments
1872 `(#:phases
1873 (modify-phases %standard-phases
1874 (add-after 'unpack 'keep-references-to-bwa
1875 (lambda* (#:key inputs #:allow-other-keys)
1876 (substitute* "bwameth.py"
1877 (("bwa (mem|index)" _ command)
1878 (string-append (which "bwa") " " command))
1879 ;; There's an ill-advised check for "samtools" on PATH.
1880 (("^checkX.*") ""))
1881 #t)))))
1882 (inputs
1883 `(("bwa" ,bwa)))
1884 (native-inputs
1885 `(("python-toolshed" ,python-toolshed)))
1886 (home-page "https://github.com/brentp/bwa-meth")
1887 (synopsis "Fast and accurante alignment of BS-Seq reads")
1888 (description
1889 "BWA-Meth works for single-end reads and for paired-end reads from the
1890 directional protocol (most common). It uses the method employed by
1891 methylcoder and Bismark of in silico conversion of all C's to T's in both
1892 reference and reads. It recovers the original read (needed to tabulate
1893 methylation) by attaching it as a comment which BWA appends as a tag to the
1894 read. It performs favorably to existing aligners gauged by number of on and
1895 off-target reads for a capture method that targets CpG-rich region.")
1896 (license license:expat)))
1897
1898 (define-public python-bx-python
1899 (package
1900 (name "python-bx-python")
1901 (version "0.8.2")
1902 (source (origin
1903 (method url-fetch)
1904 (uri (pypi-uri "bx-python" version))
1905 (sha256
1906 (base32
1907 "11kksg2rbzihpmcid823xvg42xi88m7sz58rzk29abybkxy0rszs"))))
1908 (build-system python-build-system)
1909 ;; Tests fail because test data are not included
1910 (arguments '(#:tests? #f))
1911 (propagated-inputs
1912 `(("python-numpy" ,python-numpy)
1913 ("python-six" ,python-six)))
1914 (inputs
1915 `(("zlib" ,zlib)))
1916 (native-inputs
1917 `(("python-lzo" ,python-lzo)
1918 ("python-nose" ,python-nose)
1919 ("python-cython" ,python-cython)))
1920 (home-page "https://github.com/bxlab/bx-python")
1921 (synopsis "Tools for manipulating biological data")
1922 (description
1923 "bx-python provides tools for manipulating biological data, particularly
1924 multiple sequence alignments.")
1925 (license license:expat)))
1926
1927 (define-public python2-bx-python
1928 (package-with-python2 python-bx-python))
1929
1930 (define-public python-pysam
1931 (package
1932 (name "python-pysam")
1933 (version "0.15.1")
1934 (source (origin
1935 (method git-fetch)
1936 ;; Test data is missing on PyPi.
1937 (uri (git-reference
1938 (url "https://github.com/pysam-developers/pysam")
1939 (commit (string-append "v" version))))
1940 (file-name (git-file-name name version))
1941 (sha256
1942 (base32
1943 "1vj367w6xbn9bpmksm162l1aipf7cj97h1q83y7jcpm33ihwpf7x"))
1944 (modules '((guix build utils)))
1945 (snippet '(begin
1946 ;; Drop bundled htslib. TODO: Also remove samtools
1947 ;; and bcftools.
1948 (delete-file-recursively "htslib")
1949 #t))))
1950 (build-system python-build-system)
1951 (arguments
1952 `(#:modules ((ice-9 ftw)
1953 (srfi srfi-26)
1954 (guix build python-build-system)
1955 (guix build utils))
1956 #:phases
1957 (modify-phases %standard-phases
1958 (add-before 'build 'set-flags
1959 (lambda* (#:key inputs #:allow-other-keys)
1960 (setenv "HTSLIB_MODE" "external")
1961 (setenv "HTSLIB_LIBRARY_DIR"
1962 (string-append (assoc-ref inputs "htslib") "/lib"))
1963 (setenv "HTSLIB_INCLUDE_DIR"
1964 (string-append (assoc-ref inputs "htslib") "/include"))
1965 (setenv "LDFLAGS" "-lncurses")
1966 (setenv "CFLAGS" "-D_CURSES_LIB=1")
1967 #t))
1968 (replace 'check
1969 (lambda* (#:key inputs outputs #:allow-other-keys)
1970 ;; This file contains tests that require a connection to the
1971 ;; internet.
1972 (delete-file "tests/tabix_test.py")
1973 ;; FIXME: This test fails
1974 (delete-file "tests/AlignmentFile_test.py")
1975 ;; Add first subdirectory of "build" directory to PYTHONPATH.
1976 (setenv "PYTHONPATH"
1977 (string-append
1978 (getenv "PYTHONPATH")
1979 ":" (getcwd) "/build/"
1980 (car (scandir "build"
1981 (negate (cut string-prefix? "." <>))))))
1982 ;; Step out of source dir so python does not import from CWD.
1983 (with-directory-excursion "tests"
1984 (setenv "HOME" "/tmp")
1985 (invoke "make" "-C" "pysam_data")
1986 (invoke "make" "-C" "cbcf_data")
1987 ;; Running nosetests without explicitly asking for a single
1988 ;; process leads to a crash. Running with multiple processes
1989 ;; fails because the tests are not designed to run in parallel.
1990
1991 ;; FIXME: tests keep timing out on some systems.
1992 (invoke "nosetests" "-v" "--processes" "1")))))))
1993 (propagated-inputs
1994 `(("htslib" ,htslib-1.9))) ; Included from installed header files.
1995 (inputs
1996 `(("ncurses" ,ncurses)
1997 ("curl" ,curl)
1998 ("zlib" ,zlib)))
1999 (native-inputs
2000 `(("python-cython" ,python-cython)
2001 ;; Dependencies below are are for tests only.
2002 ("samtools" ,samtools-1.9)
2003 ("bcftools" ,bcftools-1.9)
2004 ("python-nose" ,python-nose)))
2005 (home-page "https://github.com/pysam-developers/pysam")
2006 (synopsis "Python bindings to the SAMtools C API")
2007 (description
2008 "Pysam is a Python module for reading and manipulating files in the
2009 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
2010 also includes an interface for tabix.")
2011 (license license:expat)))
2012
2013 (define-public python2-pysam
2014 (package-with-python2 python-pysam))
2015
2016 (define-public python-twobitreader
2017 (package
2018 (name "python-twobitreader")
2019 (version "3.1.6")
2020 (source (origin
2021 (method git-fetch)
2022 (uri (git-reference
2023 (url "https://github.com/benjschiller/twobitreader")
2024 (commit version)))
2025 (file-name (git-file-name name version))
2026 (sha256
2027 (base32
2028 "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
2029 (build-system python-build-system)
2030 ;; Tests are not included
2031 (arguments '(#:tests? #f))
2032 (native-inputs
2033 `(("python-sphinx" ,python-sphinx)))
2034 (home-page "https://github.com/benjschiller/twobitreader")
2035 (synopsis "Python library for reading .2bit files")
2036 (description
2037 "twobitreader is a Python library for reading .2bit files as used by the
2038 UCSC genome browser.")
2039 (license license:artistic2.0)))
2040
2041 (define-public python2-twobitreader
2042 (package-with-python2 python-twobitreader))
2043
2044 (define-public python-plastid
2045 (package
2046 (name "python-plastid")
2047 (version "0.4.8")
2048 (source (origin
2049 (method url-fetch)
2050 (uri (pypi-uri "plastid" version))
2051 (sha256
2052 (base32
2053 "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
2054 (build-system python-build-system)
2055 (arguments
2056 ;; Some test files are not included.
2057 `(#:tests? #f))
2058 (propagated-inputs
2059 `(("python-numpy" ,python-numpy)
2060 ("python-scipy" ,python-scipy)
2061 ("python-pandas" ,python-pandas)
2062 ("python-pysam" ,python-pysam)
2063 ("python-matplotlib" ,python-matplotlib)
2064 ("python-biopython" ,python-biopython)
2065 ("python-twobitreader" ,python-twobitreader)
2066 ("python-termcolor" ,python-termcolor)))
2067 (native-inputs
2068 `(("python-cython" ,python-cython)
2069 ("python-nose" ,python-nose)))
2070 (home-page "https://github.com/joshuagryphon/plastid")
2071 (synopsis "Python library for genomic analysis")
2072 (description
2073 "plastid is a Python library for genomic analysis – in particular,
2074 high-throughput sequencing data – with an emphasis on simplicity.")
2075 (license license:bsd-3)))
2076
2077 (define-public python2-plastid
2078 (package-with-python2 python-plastid))
2079
2080 (define-public tetoolkit
2081 (package
2082 (name "tetoolkit")
2083 (version "2.0.3")
2084 (source (origin
2085 (method git-fetch)
2086 (uri (git-reference
2087 (url "https://github.com/mhammell-laboratory/tetoolkit")
2088 (commit version)))
2089 (file-name (git-file-name name version))
2090 (sha256
2091 (base32
2092 "1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807"))))
2093 (build-system python-build-system)
2094 (arguments
2095 `(#:python ,python-2 ; not guaranteed to work with Python 3
2096 #:phases
2097 (modify-phases %standard-phases
2098 (add-after 'unpack 'make-writable
2099 (lambda _
2100 (for-each make-file-writable (find-files "."))
2101 #t))
2102 (add-after 'unpack 'patch-invocations
2103 (lambda* (#:key inputs #:allow-other-keys)
2104 (substitute* '("bin/TEtranscripts"
2105 "bin/TEcount")
2106 (("'sort ")
2107 (string-append "'" (which "sort") " "))
2108 (("'rm -f ")
2109 (string-append "'" (which "rm") " -f "))
2110 (("'Rscript'") (string-append "'" (which "Rscript") "'")))
2111 (substitute* "TEToolkit/IO/ReadInputs.py"
2112 (("BamToBED") (which "bamToBed")))
2113 (substitute* "TEToolkit/Normalization.py"
2114 (("\"Rscript\"")
2115 (string-append "\"" (which "Rscript") "\"")))
2116 #t))
2117 (add-after 'install 'wrap-program
2118 (lambda* (#:key outputs #:allow-other-keys)
2119 ;; Make sure the executables find R packages.
2120 (let ((out (assoc-ref outputs "out")))
2121 (for-each
2122 (lambda (script)
2123 (wrap-program (string-append out "/bin/" script)
2124 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
2125 '("TEtranscripts"
2126 "TEcount")))
2127 #t)))))
2128 (inputs
2129 `(("coreutils" ,coreutils)
2130 ("bedtools" ,bedtools)
2131 ("python-argparse" ,python2-argparse)
2132 ("python-pysam" ,python2-pysam)
2133 ("r-minimal" ,r-minimal)
2134 ("r-deseq2" ,r-deseq2)))
2135 (home-page "https://github.com/mhammell-laboratory/tetoolkit")
2136 (synopsis "Transposable elements in differential enrichment analysis")
2137 (description
2138 "This is package for including transposable elements in differential
2139 enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
2140 RNA-seq (and similar data) and annotates reads to both genes and transposable
2141 elements. TEtranscripts then performs differential analysis using DESeq2.
2142 Note that TEtranscripts and TEcount rely on specially curated GTF files, which
2143 are not included due to their size.")
2144 (license license:gpl3+)))
2145
2146 (define-public cd-hit
2147 (package
2148 (name "cd-hit")
2149 (version "4.6.8")
2150 (source (origin
2151 (method url-fetch)
2152 (uri (string-append "https://github.com/weizhongli/cdhit"
2153 "/releases/download/V" version
2154 "/cd-hit-v" version
2155 "-2017-0621-source.tar.gz"))
2156 (sha256
2157 (base32
2158 "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
2159 (build-system gnu-build-system)
2160 (arguments
2161 `(#:tests? #f ; there are no tests
2162 #:make-flags
2163 ;; Executables are copied directly to the PREFIX.
2164 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin")
2165 ;; Support longer sequences (e.g. Pacbio sequences)
2166 "MAX_SEQ=60000000")
2167 #:phases
2168 (modify-phases %standard-phases
2169 ;; No "configure" script
2170 (delete 'configure)
2171 ;; Remove sources of non-determinism
2172 (add-after 'unpack 'be-timeless
2173 (lambda _
2174 (substitute* "cdhit-utility.c++"
2175 ((" \\(built on \" __DATE__ \"\\)") ""))
2176 (substitute* "cdhit-common.c++"
2177 (("__DATE__") "\"0\"")
2178 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
2179 #t))
2180 ;; The "install" target does not create the target directory.
2181 (add-before 'install 'create-target-dir
2182 (lambda* (#:key outputs #:allow-other-keys)
2183 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
2184 #t)))))
2185 (inputs
2186 `(("perl" ,perl)))
2187 (home-page "http://weizhongli-lab.org/cd-hit/")
2188 (synopsis "Cluster and compare protein or nucleotide sequences")
2189 (description
2190 "CD-HIT is a program for clustering and comparing protein or nucleotide
2191 sequences. CD-HIT is designed to be fast and handle extremely large
2192 databases.")
2193 ;; The manual says: "It can be copied under the GNU General Public License
2194 ;; version 2 (GPLv2)."
2195 (license license:gpl2)))
2196
2197 (define-public clipper
2198 (package
2199 (name "clipper")
2200 (version "2.0")
2201 (source (origin
2202 (method git-fetch)
2203 (uri (git-reference
2204 (url "https://github.com/YeoLab/clipper")
2205 (commit version)))
2206 (file-name (git-file-name name version))
2207 (sha256
2208 (base32
2209 "1bcag4lb5bkzsj2vg7lrq24aw6yfgq275ifrbhd82l7kqgbbjbkv"))))
2210 (build-system python-build-system)
2211 (arguments
2212 `(#:phases
2213 (modify-phases %standard-phases
2214 (add-before 'reset-gzip-timestamps 'make-files-writable
2215 (lambda* (#:key outputs #:allow-other-keys)
2216 ;; Make sure .gz files are writable so that the
2217 ;; 'reset-gzip-timestamps' phase can do its work.
2218 (let ((out (assoc-ref outputs "out")))
2219 (for-each make-file-writable
2220 (find-files out "\\.gz$"))
2221 #t))))))
2222 (inputs
2223 `(("htseq" ,htseq)
2224 ("python-pybedtools" ,python-pybedtools)
2225 ("python-cython" ,python-cython)
2226 ("python-scikit-learn" ,python-scikit-learn)
2227 ("python-matplotlib" ,python-matplotlib)
2228 ("python-pandas" ,python-pandas)
2229 ("python-pysam" ,python-pysam)
2230 ("python-numpy" ,python-numpy)
2231 ("python-scipy" ,python-scipy)))
2232 (native-inputs
2233 `(("python-setuptools-git" ,python-setuptools-git)
2234 ("python-mock" ,python-mock) ; for tests
2235 ("python-nose" ,python-nose) ; for tests
2236 ("python-pytz" ,python-pytz))) ; for tests
2237 (home-page "https://github.com/YeoLab/clipper")
2238 (synopsis "CLIP peak enrichment recognition")
2239 (description
2240 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
2241 (license license:gpl2)))
2242
2243 (define-public codingquarry
2244 (package
2245 (name "codingquarry")
2246 (version "2.0")
2247 (source (origin
2248 (method url-fetch)
2249 (uri (string-append
2250 "mirror://sourceforge/codingquarry/CodingQuarry_v"
2251 version ".tar.gz"))
2252 (sha256
2253 (base32
2254 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
2255 (build-system gnu-build-system)
2256 (arguments
2257 '(#:tests? #f ; no "check" target
2258 #:phases
2259 (modify-phases %standard-phases
2260 (delete 'configure)
2261 (replace 'install
2262 (lambda* (#:key outputs #:allow-other-keys)
2263 (let* ((out (assoc-ref outputs "out"))
2264 (bin (string-append out "/bin"))
2265 (doc (string-append out "/share/doc/codingquarry")))
2266 (install-file "INSTRUCTIONS.pdf" doc)
2267 (copy-recursively "QuarryFiles"
2268 (string-append out "/QuarryFiles"))
2269 (install-file "CodingQuarry" bin)
2270 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
2271 #t)))))
2272 (inputs `(("openmpi" ,openmpi)))
2273 (native-search-paths
2274 (list (search-path-specification
2275 (variable "QUARRY_PATH")
2276 (files '("QuarryFiles")))))
2277 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
2278 (synopsis "Fungal gene predictor")
2279 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
2280 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
2281 (home-page "https://sourceforge.net/projects/codingquarry/")
2282 (license license:gpl3+)))
2283
2284 (define-public couger
2285 (package
2286 (name "couger")
2287 (version "1.8.2")
2288 (source (origin
2289 (method url-fetch)
2290 (uri (string-append
2291 "http://couger.oit.duke.edu/static/assets/COUGER"
2292 version ".zip"))
2293 (sha256
2294 (base32
2295 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
2296 (build-system gnu-build-system)
2297 (arguments
2298 `(#:tests? #f
2299 #:phases
2300 (modify-phases %standard-phases
2301 (delete 'configure)
2302 (delete 'build)
2303 (replace
2304 'install
2305 (lambda* (#:key outputs #:allow-other-keys)
2306 (let* ((out (assoc-ref outputs "out"))
2307 (bin (string-append out "/bin")))
2308 (copy-recursively "src" (string-append out "/src"))
2309 (mkdir bin)
2310 ;; Add "src" directory to module lookup path.
2311 (substitute* "couger"
2312 (("from argparse")
2313 (string-append "import sys\nsys.path.append(\""
2314 out "\")\nfrom argparse")))
2315 (install-file "couger" bin))
2316 #t))
2317 (add-after
2318 'install 'wrap-program
2319 (lambda* (#:key inputs outputs #:allow-other-keys)
2320 ;; Make sure 'couger' runs with the correct PYTHONPATH.
2321 (let* ((out (assoc-ref outputs "out"))
2322 (path (getenv "PYTHONPATH")))
2323 (wrap-program (string-append out "/bin/couger")
2324 `("PYTHONPATH" ":" prefix (,path))))
2325 #t)))))
2326 (inputs
2327 `(("python" ,python-2)
2328 ("python2-pillow" ,python2-pillow)
2329 ("python2-numpy" ,python2-numpy)
2330 ("python2-scipy" ,python2-scipy)
2331 ("python2-matplotlib" ,python2-matplotlib)))
2332 (propagated-inputs
2333 `(("r-minimal" ,r-minimal)
2334 ("libsvm" ,libsvm)
2335 ("randomjungle" ,randomjungle)))
2336 (native-inputs
2337 `(("unzip" ,unzip)))
2338 (home-page "http://couger.oit.duke.edu")
2339 (synopsis "Identify co-factors in sets of genomic regions")
2340 (description
2341 "COUGER can be applied to any two sets of genomic regions bound by
2342 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
2343 putative co-factors that provide specificity to each TF. The framework
2344 determines the genomic targets uniquely-bound by each TF, and identifies a
2345 small set of co-factors that best explain the in vivo binding differences
2346 between the two TFs.
2347
2348 COUGER uses classification algorithms (support vector machines and random
2349 forests) with features that reflect the DNA binding specificities of putative
2350 co-factors. The features are generated either from high-throughput TF-DNA
2351 binding data (from protein binding microarray experiments), or from large
2352 collections of DNA motifs.")
2353 (license license:gpl3+)))
2354
2355 (define-public clustal-omega
2356 (package
2357 (name "clustal-omega")
2358 (version "1.2.4")
2359 (source (origin
2360 (method url-fetch)
2361 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
2362 version ".tar.gz"))
2363 (sha256
2364 (base32
2365 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
2366 (build-system gnu-build-system)
2367 (inputs
2368 `(("argtable" ,argtable)))
2369 (home-page "http://www.clustal.org/omega/")
2370 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
2371 (description
2372 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
2373 program for protein and DNA/RNA. It produces high quality MSAs and is capable
2374 of handling data-sets of hundreds of thousands of sequences in reasonable
2375 time.")
2376 (license license:gpl2+)))
2377
2378 (define-public crossmap
2379 (package
2380 (name "crossmap")
2381 (version "0.3.8")
2382 (source (origin
2383 (method url-fetch)
2384 (uri (pypi-uri "CrossMap" version))
2385 (sha256
2386 (base32
2387 "1sb2f2qbxya4fzw3yjl09vbrs8vfmw22zrygrvz004sf9gb1vkan"))))
2388 (build-system python-build-system)
2389 (inputs
2390 `(("python-bx-python" ,python-bx-python)
2391 ("python-numpy" ,python-numpy)
2392 ("python-pybigwig" ,python-pybigwig)
2393 ("python-pysam" ,python-pysam)
2394 ("zlib" ,zlib)))
2395 (native-inputs
2396 `(("python-cython" ,python-cython)
2397 ("python-nose" ,python-nose)))
2398 (home-page "http://crossmap.sourceforge.net/")
2399 (synopsis "Convert genome coordinates between assemblies")
2400 (description
2401 "CrossMap is a program for conversion of genome coordinates or annotation
2402 files between different genome assemblies. It supports most commonly used
2403 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
2404 (license license:gpl2+)))
2405
2406 (define-public python-dnaio
2407 (package
2408 (name "python-dnaio")
2409 (version "0.3")
2410 (source
2411 (origin
2412 (method url-fetch)
2413 (uri (pypi-uri "dnaio" version))
2414 (sha256
2415 (base32
2416 "0f16m7hdlm0fz1n7y5asy0v9ghyrq17ni1p9iybq22ddzyd49r27"))))
2417 (build-system python-build-system)
2418 (native-inputs
2419 `(("python-cython" ,python-cython)
2420 ("python-pytest" ,python-pytest)
2421 ("python-xopen" ,python-xopen)))
2422 (home-page "https://github.com/marcelm/dnaio/")
2423 (synopsis "Read FASTA and FASTQ files efficiently")
2424 (description
2425 "dnaio is a Python library for fast parsing of FASTQ and also FASTA
2426 files. The code was previously part of the cutadapt tool.")
2427 (license license:expat)))
2428
2429 (define-public python-deeptoolsintervals
2430 (package
2431 (name "python-deeptoolsintervals")
2432 (version "0.1.9")
2433 (source (origin
2434 (method url-fetch)
2435 (uri (pypi-uri "deeptoolsintervals" version))
2436 (sha256
2437 (base32
2438 "1xnl80nblysj6dylj4683wgrfa425rkx4dp5k65hvwdns9pw753x"))))
2439 (build-system python-build-system)
2440 (inputs
2441 `(("zlib" ,zlib)))
2442 (home-page "https://github.com/deeptools/deeptools_intervals")
2443 (synopsis "Create GTF-based interval trees with associated meta-data")
2444 (description
2445 "This package provides a Python module creating/accessing GTF-based
2446 interval trees with associated meta-data. It is primarily used by the
2447 @code{deeptools} package.")
2448 (license license:expat)))
2449
2450 (define-public python-deeptools
2451 (package
2452 (name "python-deeptools")
2453 (version "3.4.3")
2454 (source (origin
2455 (method git-fetch)
2456 (uri (git-reference
2457 (url "https://github.com/deeptools/deepTools")
2458 (commit version)))
2459 (file-name (git-file-name name version))
2460 (sha256
2461 (base32
2462 "0l09vyynz6s6w7fnyd94rpys4a6aja6kp4gli64pngdxdz3md1nl"))))
2463 (build-system python-build-system)
2464 (native-inputs
2465 `(("python-mock" ,python-mock)
2466 ("python-nose" ,python-nose)))
2467 (propagated-inputs
2468 `(("python-matplotlib" ,python-matplotlib)
2469 ("python-numpy" ,python-numpy)
2470 ("python-numpydoc" ,python-numpydoc)
2471 ("python-py2bit" ,python-py2bit)
2472 ("python-pybigwig" ,python-pybigwig)
2473 ("python-pysam" ,python-pysam)
2474 ("python-scipy" ,python-scipy)
2475 ("python-deeptoolsintervals" ,python-deeptoolsintervals)
2476 ("python-plotly" ,python-plotly-2.4.1)))
2477 (home-page "https://pypi.org/project/deepTools/")
2478 (synopsis "Useful tools for exploring deep sequencing data")
2479 (description "This package addresses the challenge of handling large amounts
2480 of data that are now routinely generated from DNA sequencing centers.
2481 @code{deepTools} contains useful modules to process the mapped reads data for
2482 multiple quality checks, creating normalized coverage files in standard bedGraph
2483 and bigWig file formats, that allow comparison between different files. Finally,
2484 using such normalized and standardized files, deepTools can create many
2485 publication-ready visualizations to identify enrichments and for functional
2486 annotations of the genome.")
2487 ;; The file deeptools/cm.py is licensed under the BSD license. The
2488 ;; remainder of the code is licensed under the MIT license.
2489 (license (list license:bsd-3 license:expat))))
2490
2491 (define-deprecated deeptools python-deeptools)
2492
2493 (define-public cutadapt
2494 (package
2495 (name "cutadapt")
2496 (version "2.1")
2497 (source (origin
2498 (method url-fetch)
2499 (uri (pypi-uri "cutadapt" version))
2500 (sha256
2501 (base32
2502 "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
2503 (build-system python-build-system)
2504 (inputs
2505 `(("python-dnaio" ,python-dnaio)
2506 ("python-xopen" ,python-xopen)))
2507 (native-inputs
2508 `(("python-cython" ,python-cython)
2509 ("python-pytest" ,python-pytest)
2510 ("python-setuptools-scm" ,python-setuptools-scm)))
2511 (home-page "https://cutadapt.readthedocs.io/en/stable/")
2512 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
2513 (description
2514 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
2515 other types of unwanted sequence from high-throughput sequencing reads.")
2516 (license license:expat)))
2517
2518 (define-public libbigwig
2519 (package
2520 (name "libbigwig")
2521 (version "0.4.4")
2522 (source (origin
2523 (method git-fetch)
2524 (uri (git-reference
2525 (url "https://github.com/dpryan79/libBigWig")
2526 (commit version)))
2527 (file-name (git-file-name name version))
2528 (sha256
2529 (base32
2530 "09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
2531 (build-system gnu-build-system)
2532 (arguments
2533 `(#:test-target "test"
2534 #:tests? #f ; tests require access to the web
2535 #:make-flags
2536 (list "CC=gcc"
2537 (string-append "prefix=" (assoc-ref %outputs "out")))
2538 #:phases
2539 (modify-phases %standard-phases
2540 (delete 'configure))))
2541 (inputs
2542 `(("zlib" ,zlib)
2543 ("curl" ,curl)))
2544 (native-inputs
2545 `(("doxygen" ,doxygen)
2546 ;; Need for tests
2547 ("python" ,python-2)))
2548 (home-page "https://github.com/dpryan79/libBigWig")
2549 (synopsis "C library for handling bigWig files")
2550 (description
2551 "This package provides a C library for parsing local and remote BigWig
2552 files.")
2553 (license license:expat)))
2554
2555 (define-public python-pybigwig
2556 (package
2557 (name "python-pybigwig")
2558 (version "0.3.17")
2559 (source (origin
2560 (method url-fetch)
2561 (uri (pypi-uri "pyBigWig" version))
2562 (sha256
2563 (base32
2564 "157x6v48y299zm382krf1dw08fdxg95im8lnabhp5vc94s04zxj1"))
2565 (modules '((guix build utils)))
2566 (snippet
2567 '(begin
2568 ;; Delete bundled libBigWig sources
2569 (delete-file-recursively "libBigWig")
2570 #t))))
2571 (build-system python-build-system)
2572 (arguments
2573 `(#:phases
2574 (modify-phases %standard-phases
2575 (add-after 'unpack 'link-with-libBigWig
2576 (lambda* (#:key inputs #:allow-other-keys)
2577 (substitute* "setup.py"
2578 (("libs=\\[") "libs=[\"BigWig\", "))
2579 #t)))))
2580 (propagated-inputs
2581 `(("python-numpy" ,python-numpy)))
2582 (inputs
2583 `(("libbigwig" ,libbigwig)
2584 ("zlib" ,zlib)
2585 ("curl" ,curl)))
2586 (home-page "https://github.com/dpryan79/pyBigWig")
2587 (synopsis "Access bigWig files in Python using libBigWig")
2588 (description
2589 "This package provides Python bindings to the libBigWig library for
2590 accessing bigWig files.")
2591 (license license:expat)))
2592
2593 (define-public python2-pybigwig
2594 (package-with-python2 python-pybigwig))
2595
2596 (define-public python-schema-salad
2597 (package
2598 (name "python-schema-salad")
2599 (version "7.1.20210316164414")
2600 (source
2601 (origin
2602 (method url-fetch)
2603 (uri (pypi-uri "schema-salad" version))
2604 (sha256
2605 (base32
2606 "04jaykdpgfnkrghvli5swxzqp7yba842am4bz42hcfljsmkrxvrk"))))
2607 (build-system python-build-system)
2608 (arguments
2609 `(#:phases
2610 (modify-phases %standard-phases
2611 (add-before 'check 'skip-failing-tests
2612 (lambda _
2613 ;; Skip tests that require network access.
2614 (substitute* "schema_salad/tests/test_cwl11.py"
2615 (("^def test_(secondaryFiles|outputBinding)" all)
2616 (string-append "@pytest.mark.skip(reason="
2617 "\"test requires network access\")\n"
2618 all)))
2619 #t)))))
2620 (propagated-inputs
2621 `(("python-cachecontrol" ,python-cachecontrol-0.11)
2622 ("python-lockfile" ,python-lockfile)
2623 ("python-mistune" ,python-mistune)
2624 ("python-rdflib" ,python-rdflib)
2625 ("python-rdflib-jsonld" ,python-rdflib-jsonld)
2626 ("python-requests" ,python-requests)
2627 ("python-ruamel.yaml" ,python-ruamel.yaml)
2628 ("python-typing-extensions" ,python-typing-extensions)))
2629 (native-inputs
2630 `(("python-pytest" ,python-pytest)
2631 ("python-pytest-runner" ,python-pytest-runner)))
2632 (home-page "https://github.com/common-workflow-language/schema_salad")
2633 (synopsis "Schema Annotations for Linked Avro Data (SALAD)")
2634 (description
2635 "Salad is a schema language for describing JSON or YAML structured linked
2636 data documents. Salad schema describes rules for preprocessing, structural
2637 validation, and hyperlink checking for documents described by a Salad schema.
2638 Salad supports rich data modeling with inheritance, template specialization,
2639 object identifiers, object references, documentation generation, code
2640 generation, and transformation to RDF. Salad provides a bridge between document
2641 and record oriented data modeling and the Semantic Web.")
2642 (license license:asl2.0)))
2643
2644 (define-public cwltool
2645 (package
2646 (name "cwltool")
2647 (version "3.0.20210319143721")
2648 (source (origin
2649 (method git-fetch)
2650 (uri (git-reference
2651 (url "https://github.com/common-workflow-language/cwltool")
2652 (commit version)))
2653 (file-name (git-file-name name version))
2654 (sha256
2655 (base32
2656 "1sgs9ckyxb9f9169mc3wm9lnjg4080ai42xqsrwpw9l8apy4c9m5"))))
2657 (build-system python-build-system)
2658 (arguments
2659 `(#:phases
2660 (modify-phases %standard-phases
2661 (add-after 'unpack 'loosen-version-restrictions
2662 (lambda _
2663 (substitute* "setup.py"
2664 (("== 1.5.1") ">=1.5.1") ; prov
2665 ((", < 3.5") "") ; shellescape
2666 ((" >= 6.0.2, < 6.2") "")) ; pytest
2667 #t))
2668 (add-after 'unpack 'dont-use-git
2669 (lambda _
2670 (substitute* "gittaggers.py"
2671 (("self.git_timestamp_tag\\(\\)")
2672 (string-append "time.strftime('.%Y%m%d%H%M%S', time.gmtime(int("
2673 (string-drop ,version 4) ")))")))
2674 #t))
2675 (add-after 'unpack 'modify-tests
2676 (lambda _
2677 ;; Tries to connect to the internet.
2678 (delete-file "tests/test_content_type.py")
2679 (delete-file "tests/test_udocker.py")
2680 (delete-file "tests/test_http_input.py")
2681 (substitute* "tests/test_load_tool.py"
2682 (("def test_load_graph_fragment_from_packed")
2683 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2684 "def test_load_graph_fragment_from_packed")))
2685 (substitute* "tests/test_examples.py"
2686 (("def test_env_filtering")
2687 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2688 "def test_env_filtering")))
2689 ;; Tries to use cwl-runners.
2690 (substitute* "tests/test_examples.py"
2691 (("def test_v1_0_arg_empty_prefix_separate_false")
2692 (string-append "@pytest.mark.skip(reason=\"Disabled by Guix\")\n"
2693 "def test_v1_0_arg_empty_prefix_separate_false")))
2694 #t)))))
2695 (propagated-inputs
2696 `(("python-argcomplete" ,python-argcomplete)
2697 ("python-bagit" ,python-bagit)
2698 ("python-coloredlogs" ,python-coloredlogs)
2699 ("python-mypy-extensions" ,python-mypy-extensions)
2700 ("python-prov" ,python-prov)
2701 ("python-pydot" ,python-pydot)
2702 ("python-psutil" ,python-psutil)
2703 ("python-rdflib" ,python-rdflib)
2704 ("python-requests" ,python-requests)
2705 ("python-ruamel.yaml" ,python-ruamel.yaml)
2706 ("python-schema-salad" ,python-schema-salad)
2707 ("python-shellescape" ,python-shellescape)
2708 ("python-typing-extensions" ,python-typing-extensions)
2709 ;; Not listed as needed but still necessary:
2710 ("node" ,node)))
2711 (native-inputs
2712 `(("python-arcp" ,python-arcp)
2713 ("python-humanfriendly" ,python-humanfriendly)
2714 ("python-mock" ,python-mock)
2715 ("python-pytest" ,python-pytest)
2716 ("python-pytest-cov" ,python-pytest-cov)
2717 ("python-pytest-mock" ,python-pytest-mock)
2718 ("python-pytest-runner" ,python-pytest-runner)
2719 ("python-rdflib-jsonld" ,python-rdflib-jsonld)))
2720 (home-page
2721 "https://github.com/common-workflow-language/common-workflow-language")
2722 (synopsis "Common Workflow Language reference implementation")
2723 (description
2724 "This is the reference implementation of the @acronym{CWL, Common Workflow
2725 Language} standards. The CWL open standards are for describing analysis
2726 workflows and tools in a way that makes them portable and scalable across a
2727 variety of software and hardware environments, from workstations to cluster,
2728 cloud, and high performance computing (HPC) environments. CWL is designed to
2729 meet the needs of data-intensive science, such as Bioinformatics, Medical
2730 Imaging, Astronomy, Physics, and Chemistry. The @acronym{cwltool, CWL reference
2731 implementation} is intended to be feature complete and to provide comprehensive
2732 validation of CWL files as well as provide other tools related to working with
2733 CWL descriptions.")
2734 (license license:asl2.0)))
2735
2736 (define-public python-dendropy
2737 (package
2738 (name "python-dendropy")
2739 (version "4.4.0")
2740 (source
2741 (origin
2742 (method git-fetch)
2743 ;; Source from GitHub so that tests are included.
2744 (uri (git-reference
2745 (url "https://github.com/jeetsukumaran/DendroPy")
2746 (commit (string-append "v" version))))
2747 (file-name (git-file-name name version))
2748 (sha256
2749 (base32
2750 "097hfyv2kaf4x92i4rjx0paw2cncxap48qivv8zxng4z7nhid0x9"))))
2751 (build-system python-build-system)
2752 (home-page "https://dendropy.org/")
2753 (synopsis "Library for phylogenetics and phylogenetic computing")
2754 (description
2755 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
2756 writing, simulation, processing and manipulation of phylogenetic
2757 trees (phylogenies) and characters.")
2758 (license license:bsd-3)))
2759
2760 (define-public python2-dendropy
2761 (let ((base (package-with-python2 python-dendropy)))
2762 (package/inherit base
2763 (arguments
2764 `(#:phases
2765 (modify-phases %standard-phases
2766 (add-after 'unpack 'remove-failing-test
2767 (lambda _
2768 ;; This test fails when the full test suite is run, as documented
2769 ;; at https://github.com/jeetsukumaran/DendroPy/issues/74
2770 (substitute* "tests/test_dataio_nexml_reader_tree_list.py"
2771 (("test_collection_comments_and_annotations")
2772 "do_not_test_collection_comments_and_annotations"))
2773 #t)))
2774 ,@(package-arguments base))))))
2775
2776 (define-public python-py2bit
2777 (package
2778 (name "python-py2bit")
2779 (version "0.3.0")
2780 (source
2781 (origin
2782 (method url-fetch)
2783 (uri (pypi-uri "py2bit" version))
2784 (sha256
2785 (base32
2786 "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
2787 (build-system python-build-system)
2788 (home-page "https://github.com/dpryan79/py2bit")
2789 (synopsis "Access 2bit files using lib2bit")
2790 (description
2791 "This package provides Python bindings for lib2bit to access 2bit files
2792 with Python.")
2793 (license license:expat)))
2794
2795 (define-public delly
2796 (package
2797 (name "delly")
2798 (version "0.8.3")
2799 (source (origin
2800 (method git-fetch)
2801 (uri (git-reference
2802 (url "https://github.com/dellytools/delly")
2803 (commit (string-append "v" version))))
2804 (file-name (git-file-name name version))
2805 (sha256
2806 (base32 "1ibnplgfzj96w8glkx17v7sld3pm402fr5ybmf3h0rlcryabxrqy"))
2807 (modules '((guix build utils)))
2808 (snippet
2809 '(begin
2810 (delete-file-recursively "src/htslib")
2811 #t))))
2812 (build-system gnu-build-system)
2813 (arguments
2814 `(#:tests? #f ; There are no tests to run.
2815 #:make-flags
2816 (list "PARALLEL=1" ; Allow parallel execution at run-time.
2817 (string-append "prefix=" (assoc-ref %outputs "out")))
2818 #:phases
2819 (modify-phases %standard-phases
2820 (delete 'configure) ; There is no configure phase.
2821 (add-after 'install 'install-templates
2822 (lambda* (#:key outputs #:allow-other-keys)
2823 (let ((templates (string-append (assoc-ref outputs "out")
2824 "/share/delly/templates")))
2825 (mkdir-p templates)
2826 (copy-recursively "excludeTemplates" templates)
2827 #t))))))
2828 (inputs
2829 `(("boost" ,boost)
2830 ("bzip2" ,bzip2)
2831 ("htslib" ,htslib)
2832 ("zlib" ,zlib)))
2833 (home-page "https://github.com/dellytools/delly")
2834 (synopsis "Integrated structural variant prediction method")
2835 (description "Delly is an integrated structural variant prediction method
2836 that can discover and genotype deletions, tandem duplications, inversions and
2837 translocations at single-nucleotide resolution in short-read massively parallel
2838 sequencing data. It uses paired-ends and split-reads to sensitively and
2839 accurately delineate genomic rearrangements throughout the genome.")
2840 (license license:gpl3+)))
2841
2842 (define-public trf
2843 (package
2844 (name "trf")
2845 (version "4.09.1")
2846 (source (origin
2847 (method git-fetch)
2848 (uri (git-reference
2849 (url "https://github.com/Benson-Genomics-Lab/TRF")
2850 (commit (string-append "v" version))))
2851 (file-name (git-file-name name version))
2852 (sha256
2853 (base32 "0fhwr4s1mf8nw8fr5imwjvjr42b59p97zr961ifm8xl1bajz4wpg"))))
2854 (build-system gnu-build-system)
2855 (home-page "https://github.com/Benson-Genomics-Lab/TRF")
2856 (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
2857 (description "A tandem repeat in DNA is two or more adjacent, approximate
2858 copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
2859 locate and display tandem repeats in DNA sequences. In order to use the
2860 program, the user submits a sequence in FASTA format. The output consists of
2861 two files: a repeat table file and an alignment file. Submitted sequences may
2862 be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
2863 bases are detected.")
2864 (license license:agpl3+)))
2865
2866 (define-public repeat-masker
2867 (package
2868 (name "repeat-masker")
2869 (version "4.1.1")
2870 (source (origin
2871 (method url-fetch)
2872 (uri (string-append "http://www.repeatmasker.org/"
2873 "RepeatMasker/RepeatMasker-"
2874 version ".tar.gz"))
2875 (sha256
2876 (base32 "03144sl9kh5ni2i33phi7x2pjndzbm5bjw3r4kqvmm6hxyb4k4x2"))))
2877 (build-system gnu-build-system)
2878 (arguments
2879 `(#:tests? #false ; there are none
2880 #:phases
2881 (modify-phases %standard-phases
2882 (delete 'configure)
2883 (replace 'build
2884 (lambda* (#:key inputs outputs #:allow-other-keys)
2885 (let ((share (string-append (assoc-ref outputs "out")
2886 "/share/RepeatMasker")))
2887 (mkdir-p share)
2888 (copy-recursively "." share)
2889 (with-directory-excursion share
2890 (invoke "perl" "configure"
2891 "--trf_prgm" (which "trf")
2892 "--hmmer_dir"
2893 (string-append (assoc-ref inputs "hmmer")
2894 "/bin"))))))
2895 (replace 'install
2896 (lambda* (#:key outputs #:allow-other-keys)
2897 (let* ((out (assoc-ref outputs "out"))
2898 (share (string-append out "/share/RepeatMasker"))
2899 (bin (string-append out "/bin"))
2900 (path (getenv "PERL5LIB")))
2901 (install-file (string-append share "/RepeatMasker") bin)
2902 (wrap-program (string-append bin "/RepeatMasker")
2903 `("PERL5LIB" ":" prefix (,path ,share)))))))))
2904 (inputs
2905 `(("perl" ,perl)
2906 ("perl-text-soundex" ,perl-text-soundex)
2907 ("python" ,python)
2908 ("python-h5py" ,python-h5py)
2909 ("hmmer" ,hmmer)
2910 ("trf" ,trf)))
2911 (home-page "https://github.com/Benson-Genomics-Lab/TRF")
2912 (synopsis "Tandem Repeats Finder: a program to analyze DNA sequences")
2913 (description "A tandem repeat in DNA is two or more adjacent, approximate
2914 copies of a pattern of nucleotides. Tandem Repeats Finder is a program to
2915 locate and display tandem repeats in DNA sequences. In order to use the
2916 program, the user submits a sequence in FASTA format. The output consists of
2917 two files: a repeat table file and an alignment file. Submitted sequences may
2918 be of arbitrary length. Repeats with pattern size in the range from 1 to 2000
2919 bases are detected.")
2920 (license license:osl2.1)))
2921
2922 (define-public diamond
2923 (package
2924 (name "diamond")
2925 (version "0.9.30")
2926 (source (origin
2927 (method git-fetch)
2928 (uri (git-reference
2929 (url "https://github.com/bbuchfink/diamond")
2930 (commit (string-append "v" version))))
2931 (file-name (git-file-name name version))
2932 (sha256
2933 (base32
2934 "0k6f3kb6cniw11xw6763kkbs1sl0yack7xsy7q5fl5v170ssphq4"))))
2935 (build-system cmake-build-system)
2936 (arguments
2937 '(#:tests? #f ; no "check" target
2938 #:phases
2939 (modify-phases %standard-phases
2940 (add-after 'unpack 'remove-native-compilation
2941 (lambda _
2942 (substitute* "CMakeLists.txt" (("-march=native") ""))
2943 #t)))))
2944 (inputs
2945 `(("zlib" ,zlib)))
2946 (home-page "https://github.com/bbuchfink/diamond")
2947 (synopsis "Accelerated BLAST compatible local sequence aligner")
2948 (description
2949 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2950 translated DNA query sequences against a protein reference database (BLASTP
2951 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2952 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2953 data and settings.")
2954 (license license:agpl3+)))
2955
2956 (define-public discrover
2957 (package
2958 (name "discrover")
2959 (version "1.6.0")
2960 (source
2961 (origin
2962 (method git-fetch)
2963 (uri (git-reference
2964 (url "https://github.com/maaskola/discrover")
2965 (commit version)))
2966 (file-name (git-file-name name version))
2967 (sha256
2968 (base32
2969 "173fwi2vb6a5kp406hm3jj6j7v4whww796f2qcygp4rpvamh307y"))))
2970 (build-system cmake-build-system)
2971 (arguments
2972 `(#:tests? #f ; there are no tests
2973 #:phases
2974 (modify-phases %standard-phases
2975 (add-after 'unpack 'fix-latex-errors
2976 (lambda _
2977 (with-fluids ((%default-port-encoding #f))
2978 (substitute* "doc/references.bib"
2979 (("\\{S\\}illanp[^,]+,")
2980 "{S}illanp{\\\"a}{\\\"a},")))
2981 ;; XXX: I just can't get pdflatex to not complain about these
2982 ;; characters. They end up in the manual via the generated
2983 ;; discrover-cli-help.txt.
2984 (substitute* "src/hmm/cli.cpp"
2985 (("µ") "mu")
2986 (("η") "eta")
2987 (("≤") "<="))
2988 ;; This seems to be a syntax error.
2989 (substitute* "doc/discrover-manual.tex"
2990 (("theverbbox\\[t\\]") "theverbbox"))
2991 #t))
2992 (add-after 'unpack 'add-missing-includes
2993 (lambda _
2994 (substitute* "src/executioninformation.hpp"
2995 (("#define EXECUTIONINFORMATION_HPP" line)
2996 (string-append line "\n#include <random>")))
2997 (substitute* "src/plasma/fasta.hpp"
2998 (("#define FASTA_HPP" line)
2999 (string-append line "\n#include <random>")))
3000 #t))
3001 ;; FIXME: this is needed because we're using texlive-union, which
3002 ;; doesn't handle fonts correctly. It expects to be able to generate
3003 ;; fonts in the home directory.
3004 (add-before 'build 'setenv-HOME
3005 (lambda _ (setenv "HOME" "/tmp") #t)))))
3006 (inputs
3007 `(("boost" ,boost)
3008 ("cairo" ,cairo)
3009 ("rmath-standalone" ,rmath-standalone)))
3010 (native-inputs
3011 `(("texlive" ,(texlive-union (list texlive-fonts-cm
3012 texlive-fonts-amsfonts
3013
3014 texlive-latex-doi
3015 texlive-latex-examplep
3016 texlive-latex-hyperref
3017 texlive-latex-ms
3018 texlive-latex-natbib
3019 texlive-bibtex ; style files used by natbib
3020 texlive-latex-pgf ; tikz
3021 texlive-latex-verbatimbox)))
3022 ("imagemagick" ,imagemagick)))
3023 (home-page "https://dorina.mdc-berlin.de/public/rajewsky/discrover/")
3024 (synopsis "Discover discriminative nucleotide sequence motifs")
3025 (description "Discrover is a motif discovery method to find binding sites
3026 of nucleic acid binding proteins.")
3027 (license license:gpl3+)))
3028
3029 (define-public eigensoft
3030 (package
3031 (name "eigensoft")
3032 (version "7.2.1")
3033 (source
3034 (origin
3035 (method git-fetch)
3036 (uri (git-reference
3037 (url "https://github.com/DReichLab/EIG")
3038 (commit (string-append "v" version))))
3039 (file-name (git-file-name name version))
3040 (sha256
3041 (base32
3042 "1c141fqvhnzibmnf22sv23vbmzm20kjjyrib44cfh75wyndp2d9k"))
3043 (modules '((guix build utils)))
3044 ;; Remove pre-built binaries.
3045 (snippet '(begin
3046 (delete-file-recursively "bin")
3047 (mkdir "bin")
3048 #t))))
3049 (build-system gnu-build-system)
3050 (arguments
3051 `(#:tests? #f ; There are no tests.
3052 #:make-flags '("CC=gcc")
3053 #:phases
3054 (modify-phases %standard-phases
3055 ;; There is no configure phase, but the Makefile is in a
3056 ;; sub-directory.
3057 (replace 'configure
3058 (lambda _ (chdir "src") #t))
3059 ;; The provided install target only copies executables to
3060 ;; the "bin" directory in the build root.
3061 (add-after 'install 'actually-install
3062 (lambda* (#:key outputs #:allow-other-keys)
3063 (let* ((out (assoc-ref outputs "out"))
3064 (bin (string-append out "/bin")))
3065 (for-each (lambda (file)
3066 (install-file file bin))
3067 (find-files "../bin" ".*"))
3068 #t))))))
3069 (inputs
3070 `(("gsl" ,gsl)
3071 ("lapack" ,lapack)
3072 ("openblas" ,openblas)
3073 ("perl" ,perl)
3074 ("gfortran" ,gfortran "lib")))
3075 (home-page "https://github.com/DReichLab/EIG")
3076 (synopsis "Tools for population genetics")
3077 (description "The EIGENSOFT package provides tools for population
3078 genetics and stratification correction. EIGENSOFT implements methods commonly
3079 used in population genetics analyses such as PCA, computation of Tracy-Widom
3080 statistics, and finding related individuals in structured populations. It
3081 comes with a built-in plotting script and supports multiple file formats and
3082 quantitative phenotypes.")
3083 ;; The license of the eigensoft tools is Expat, but since it's
3084 ;; linking with the GNU Scientific Library (GSL) the effective
3085 ;; license is the GPL.
3086 (license license:gpl3+)))
3087
3088 (define-public edirect
3089 (package
3090 (name "edirect")
3091 (version "13.3.20200128")
3092 (source (origin
3093 (method url-fetch)
3094 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
3095 "/versions/" version
3096 "/edirect-" version ".tar.gz"))
3097 (sha256
3098 (base32
3099 "093zp7klv81ph0y8mm8d78a9hnpfxbv2kdym70gzdf3vz176rw33"))
3100 (modules '((guix build utils)))
3101 (snippet
3102 '(begin (delete-file "Mozilla-CA.tar.gz")
3103 (substitute* "rchive.go"
3104 ;; This go library does not have any license.
3105 (("github.com/fiam/gounidecode/unidecode")
3106 "golang.org/rainycape/unidecode"))
3107 #t))))
3108 (build-system perl-build-system)
3109 (arguments
3110 `(#:phases
3111 (modify-phases %standard-phases
3112 (delete 'configure)
3113 (delete 'build)
3114 (delete 'check) ; simple check after install
3115 (add-after 'unpack 'patch-programs
3116 (lambda* (#:key inputs #:allow-other-keys)
3117 ;; Ignore errors about missing xtract.Linux and rchive.Linux.
3118 (substitute* "pm-refresh"
3119 (("cat \\\"\\$target")
3120 "grep ^[[:digit:]] \"$target"))
3121 #t))
3122 (replace 'install
3123 (lambda* (#:key inputs outputs #:allow-other-keys)
3124 (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
3125 (edirect-go (assoc-ref inputs "edirect-go-programs")))
3126 (for-each
3127 (lambda (file)
3128 (install-file file bin))
3129 '("archive-pubmed" "asp-cp" "asp-ls" "download-ncbi-data"
3130 "download-pubmed" "edirect.pl" "efetch" "epost" "esearch"
3131 "fetch-pubmed" "ftp-cp" "ftp-ls" "has-asp" "index-pubmed"
3132 "pm-prepare" "pm-refresh" "pm-stash" "pm-collect"
3133 "pm-index" "pm-invert" "pm-merge" "pm-promote"))
3134 (symlink (string-append edirect-go "/bin/xtract.Linux")
3135 (string-append bin "/xtract"))
3136 (symlink (string-append edirect-go "/bin/rchive.Linux")
3137 (string-append bin "/rchive")))
3138 #t))
3139 (add-after 'install 'wrap-program
3140 (lambda* (#:key outputs #:allow-other-keys)
3141 ;; Make sure everything can run in a pure environment.
3142 (let ((out (assoc-ref outputs "out"))
3143 (path (getenv "PERL5LIB")))
3144 (for-each
3145 (lambda (file)
3146 (wrap-program file
3147 `("PERL5LIB" ":" prefix (,path)))
3148 (wrap-program file
3149 `("PATH" ":" prefix (,(string-append out "/bin")
3150 ,(dirname (which "sed"))
3151 ,(dirname (which "gzip"))
3152 ,(dirname (which "grep"))
3153 ,(dirname (which "perl"))
3154 ,(dirname (which "uname"))))))
3155 (find-files out ".")))
3156 #t))
3157 (add-after 'wrap-program 'check
3158 (lambda* (#:key outputs #:allow-other-keys)
3159 (invoke (string-append (assoc-ref outputs "out")
3160 "/bin/edirect.pl")
3161 "-filter" "-help")
3162 #t)))))
3163 (inputs
3164 `(("edirect-go-programs" ,edirect-go-programs)
3165 ("perl-html-parser" ,perl-html-parser)
3166 ("perl-encode-locale" ,perl-encode-locale)
3167 ("perl-file-listing" ,perl-file-listing)
3168 ("perl-html-tagset" ,perl-html-tagset)
3169 ("perl-html-tree" ,perl-html-tree)
3170 ("perl-http-cookies" ,perl-http-cookies)
3171 ("perl-http-date" ,perl-http-date)
3172 ("perl-http-message" ,perl-http-message)
3173 ("perl-http-negotiate" ,perl-http-negotiate)
3174 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
3175 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
3176 ("perl-net-http" ,perl-net-http)
3177 ("perl-uri" ,perl-uri)
3178 ("perl-www-robotrules" ,perl-www-robotrules)
3179 ("perl-xml-simple" ,perl-xml-simple)
3180 ("perl" ,perl)))
3181 (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
3182 (synopsis "Tools for accessing the NCBI's set of databases")
3183 (description
3184 "Entrez Direct (EDirect) is a method for accessing the National Center
3185 for Biotechnology Information's (NCBI) set of interconnected
3186 databases (publication, sequence, structure, gene, variation, expression,
3187 etc.) from a terminal. Functions take search terms from command-line
3188 arguments. Individual operations are combined to build multi-step queries.
3189 Record retrieval and formatting normally complete the process.
3190
3191 EDirect also provides an argument-driven function that simplifies the
3192 extraction of data from document summaries or other results that are returned
3193 in structured XML format. This can eliminate the need for writing custom
3194 software to answer ad hoc questions.")
3195 (native-search-paths
3196 ;; Ideally this should be set for LWP somewhere.
3197 (list (search-path-specification
3198 (variable "PERL_LWP_SSL_CA_FILE")
3199 (file-type 'regular)
3200 (separator #f)
3201 (files '("/etc/ssl/certs/ca-certificates.crt")))))
3202 (license license:public-domain)))
3203
3204 (define-public edirect-go-programs
3205 (package
3206 (inherit edirect)
3207 (name "edirect-go-programs")
3208 (build-system go-build-system)
3209 (arguments
3210 `(#:install-source? #f
3211 #:tests? #f ; No tests.
3212 #:import-path "ncbi.nlm.nih.gov/entrez/edirect"
3213 #:phases
3214 (modify-phases %standard-phases
3215 (replace 'build
3216 (lambda* (#:key import-path #:allow-other-keys)
3217 (with-directory-excursion (string-append "src/" import-path)
3218 (invoke "go" "build" "-v" "-x" "j2x.go")
3219 (invoke "go" "build" "-v" "-x" "t2x.go")
3220 (invoke "go" "build" "-v" "-x" "-o"
3221 "xtract.Linux" "xtract.go" "common.go")
3222 (invoke "go" "build" "-v" "-x" "-o"
3223 "rchive.Linux" "rchive.go" "common.go")
3224 (invoke "go" "build" "-v" "-x" "-o" "symbols.Linux" "s2p.go"))))
3225 (replace 'install
3226 (lambda* (#:key outputs import-path #:allow-other-keys)
3227 (let ((dest (string-append (assoc-ref outputs "out") "/bin"))
3228 (source (string-append "src/" import-path "/")))
3229 (for-each (lambda (file)
3230 (format #t "installing ~a~%" file)
3231 (install-file (string-append source file) dest))
3232 '("j2x" "t2x" "symbols.Linux" "xtract.Linux" "rchive.Linux"))
3233 #t))))))
3234 (native-inputs '())
3235 (propagated-inputs '())
3236 (inputs
3237 `(("go-github-com-fatih-color" ,go-github-com-fatih-color)
3238 ("go-github-com-fogleman-gg" ,go-github-com-fogleman-gg)
3239 ("go-github-com-gedex-inflector" ,go-github-com-gedex-inflector)
3240 ("go-github-com-golang-freetype" ,go-github-com-golang-freetype)
3241 ("go-github-com-klauspost-cpuid" ,go-github-com-klauspost-cpuid)
3242 ("go-github-com-pbnjay-memory" ,go-github-com-pbnjay-memory)
3243 ("go-github-com-surgebase-porter2" ,go-github-com-surgebase-porter2)
3244 ("go-golang-org-rainycape-unidecode" ,go-golang-org-rainycape-unidecode)
3245 ("go-golang-org-x-image" ,go-golang-org-x-image)
3246 ("go-golang-org-x-text" ,go-golang-org-x-text)))))
3247
3248 (define-public exonerate
3249 (package
3250 (name "exonerate")
3251 (version "2.4.0")
3252 (source
3253 (origin
3254 (method url-fetch)
3255 (uri
3256 (string-append
3257 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
3258 "exonerate-" version ".tar.gz"))
3259 (sha256
3260 (base32
3261 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
3262 (build-system gnu-build-system)
3263 (arguments
3264 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
3265 (native-inputs
3266 `(("pkg-config" ,pkg-config)))
3267 (inputs
3268 `(("glib" ,glib)))
3269 (home-page
3270 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
3271 (synopsis "Generic tool for biological sequence alignment")
3272 (description
3273 "Exonerate is a generic tool for pairwise sequence comparison. It allows
3274 the alignment of sequences using a many alignment models, either exhaustive
3275 dynamic programming or a variety of heuristics.")
3276 (license license:gpl3)))
3277
3278 (define-public express
3279 (package
3280 (name "express")
3281 (version "1.5.3")
3282 (source (origin
3283 (method git-fetch)
3284 (uri (git-reference
3285 (url "https://github.com/adarob/eXpress")
3286 (commit version)))
3287 (file-name (git-file-name name version))
3288 (sha256
3289 (base32
3290 "18nb22n7x820fzjngf4qgyb3mspqkw7xyk7v7s5ps6wfrd8qwscb"))))
3291 (build-system cmake-build-system)
3292 (arguments
3293 `(#:tests? #f ;no "check" target
3294 #:phases
3295 (modify-phases %standard-phases
3296 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
3297 (lambda* (#:key inputs #:allow-other-keys)
3298 (substitute* "CMakeLists.txt"
3299 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
3300 "set(Boost_USE_STATIC_LIBS OFF)")
3301 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
3302 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
3303 (substitute* "src/CMakeLists.txt"
3304 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
3305 (string-append (assoc-ref inputs "bamtools") "/lib"))
3306 (("libprotobuf.a") "libprotobuf.so"))
3307 #t))
3308 (add-after 'unpack 'remove-update-check
3309 (lambda _
3310 (substitute* "src/main.cpp"
3311 (("#include \"update_check.h\"") "")
3312 (("check_version\\(PACKAGE_VERSION\\);") ""))
3313 #t)))))
3314 (inputs
3315 `(("boost" ,boost)
3316 ("bamtools" ,bamtools)
3317 ("protobuf" ,protobuf)
3318 ("zlib" ,zlib)))
3319 (home-page "http://bio.math.berkeley.edu/eXpress")
3320 (synopsis "Streaming quantification for high-throughput genomic sequencing")
3321 (description
3322 "eXpress is a streaming tool for quantifying the abundances of a set of
3323 target sequences from sampled subsequences. Example applications include
3324 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
3325 analysis (from RNA-Seq), transcription factor binding quantification in
3326 ChIP-Seq, and analysis of metagenomic data.")
3327 (license license:artistic2.0)))
3328
3329 (define-public express-beta-diversity
3330 (package
3331 (name "express-beta-diversity")
3332 (version "1.0.8")
3333 (source (origin
3334 (method git-fetch)
3335 (uri (git-reference
3336 (url "https://github.com/dparks1134/ExpressBetaDiversity")
3337 (commit (string-append "v" version))))
3338 (file-name (git-file-name name version))
3339 (sha256
3340 (base32
3341 "0s0yzg5c21349rh7x4w9266jsvnp7j1hp9cf8sk32hz8nvrj745x"))))
3342 (build-system gnu-build-system)
3343 (arguments
3344 `(#:phases
3345 (modify-phases %standard-phases
3346 (delete 'configure)
3347 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
3348 (replace 'check
3349 (lambda _ (invoke "../bin/ExpressBetaDiversity" "-u") #t))
3350 (replace 'install
3351 (lambda* (#:key outputs #:allow-other-keys)
3352 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3353 (install-file "../scripts/convertToEBD.py" bin)
3354 (install-file "../bin/ExpressBetaDiversity" bin)
3355 #t))))))
3356 (inputs
3357 `(("python" ,python-2)))
3358 (home-page "https://github.com/dparks1134/ExpressBetaDiversity")
3359 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
3360 (description
3361 "Express Beta Diversity (EBD) calculates ecological beta diversity
3362 (dissimilarity) measures between biological communities. EBD implements a
3363 variety of diversity measures including those that make use of phylogenetic
3364 similarity of community members.")
3365 (license license:gpl3+)))
3366
3367 (define-public fasttree
3368 (package
3369 (name "fasttree")
3370 (version "2.1.10")
3371 (source (origin
3372 (method url-fetch)
3373 (uri (string-append
3374 "http://www.microbesonline.org/fasttree/FastTree-"
3375 version ".c"))
3376 (sha256
3377 (base32
3378 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
3379 (build-system gnu-build-system)
3380 (arguments
3381 `(#:tests? #f ; no "check" target
3382 #:phases
3383 (modify-phases %standard-phases
3384 (delete 'unpack)
3385 (delete 'configure)
3386 (replace 'build
3387 (lambda* (#:key source #:allow-other-keys)
3388 (invoke "gcc"
3389 "-O3"
3390 "-finline-functions"
3391 "-funroll-loops"
3392 "-Wall"
3393 "-o"
3394 "FastTree"
3395 source
3396 "-lm")
3397 (invoke "gcc"
3398 "-DOPENMP"
3399 "-fopenmp"
3400 "-O3"
3401 "-finline-functions"
3402 "-funroll-loops"
3403 "-Wall"
3404 "-o"
3405 "FastTreeMP"
3406 source
3407 "-lm")
3408 #t))
3409 (replace 'install
3410 (lambda* (#:key outputs #:allow-other-keys)
3411 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3412 (install-file "FastTree" bin)
3413 (install-file "FastTreeMP" bin)
3414 #t))))))
3415 (home-page "http://www.microbesonline.org/fasttree")
3416 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
3417 (description
3418 "FastTree can handle alignments with up to a million of sequences in a
3419 reasonable amount of time and memory. For large alignments, FastTree is
3420 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
3421 (license license:gpl2+)))
3422
3423 (define-public fastx-toolkit
3424 (package
3425 (name "fastx-toolkit")
3426 (version "0.0.14")
3427 (source (origin
3428 (method url-fetch)
3429 (uri
3430 (string-append
3431 "https://github.com/agordon/fastx_toolkit/releases/download/"
3432 version "/fastx_toolkit-" version ".tar.bz2"))
3433 (sha256
3434 (base32
3435 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
3436 (build-system gnu-build-system)
3437 (inputs
3438 `(("libgtextutils" ,libgtextutils)))
3439 (native-inputs
3440 `(("gcc" ,gcc-6) ;; doesn't build with later versions
3441 ("pkg-config" ,pkg-config)))
3442 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
3443 (synopsis "Tools for FASTA/FASTQ file preprocessing")
3444 (description
3445 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
3446 FASTA/FASTQ files preprocessing.
3447
3448 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
3449 containing multiple short-reads sequences. The main processing of such
3450 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
3451 is sometimes more productive to preprocess the files before mapping the
3452 sequences to the genome---manipulating the sequences to produce better mapping
3453 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
3454 (license license:agpl3+)))
3455
3456 (define-public flexbar
3457 (package
3458 (name "flexbar")
3459 (version "3.4.0")
3460 (source (origin
3461 (method git-fetch)
3462 (uri (git-reference
3463 (url "https://github.com/seqan/flexbar")
3464 (commit (string-append "v" version))))
3465 (file-name (git-file-name name version))
3466 (sha256
3467 (base32
3468 "1pq9sxvdnldl14libk234m72dqhwgzs3acgl943wchwdqlcsi5r2"))))
3469 (build-system cmake-build-system)
3470 (arguments
3471 `(#:phases
3472 (modify-phases %standard-phases
3473 (add-after 'unpack 'do-not-tune-to-CPU
3474 (lambda _
3475 (substitute* "src/CMakeLists.txt"
3476 ((" -march=native") ""))
3477 #t))
3478 (replace 'check
3479 (lambda* (#:key outputs #:allow-other-keys)
3480 (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
3481 (with-directory-excursion "../source/test"
3482 (invoke "bash" "flexbar_test.sh"))
3483 #t))
3484 (replace 'install
3485 (lambda* (#:key outputs #:allow-other-keys)
3486 (let* ((out (string-append (assoc-ref outputs "out")))
3487 (bin (string-append out "/bin/")))
3488 (install-file "flexbar" bin))
3489 #t)))))
3490 (inputs
3491 `(("tbb" ,tbb)
3492 ("zlib" ,zlib)))
3493 (native-inputs
3494 `(("pkg-config" ,pkg-config)
3495 ("seqan" ,seqan)))
3496 (home-page "https://github.com/seqan/flexbar")
3497 (synopsis "Barcode and adapter removal tool for sequencing platforms")
3498 (description
3499 "Flexbar preprocesses high-throughput nucleotide sequencing data
3500 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
3501 Moreover, trimming and filtering features are provided. Flexbar increases
3502 read mapping rates and improves genome and transcriptome assemblies. It
3503 supports next-generation sequencing data in fasta/q and csfasta/q format from
3504 Illumina, Roche 454, and the SOLiD platform.")
3505 (license license:bsd-3)))
3506
3507 (define-public fraggenescan
3508 (package
3509 (name "fraggenescan")
3510 (version "1.30")
3511 (source
3512 (origin
3513 (method url-fetch)
3514 (uri
3515 (string-append "mirror://sourceforge/fraggenescan/"
3516 "FragGeneScan" version ".tar.gz"))
3517 (sha256
3518 (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
3519 (build-system gnu-build-system)
3520 (arguments
3521 `(#:phases
3522 (modify-phases %standard-phases
3523 (delete 'configure)
3524 (add-before 'build 'patch-paths
3525 (lambda* (#:key outputs #:allow-other-keys)
3526 (let* ((out (string-append (assoc-ref outputs "out")))
3527 (share (string-append out "/share/fraggenescan/")))
3528 (substitute* "run_FragGeneScan.pl"
3529 (("system\\(\"rm")
3530 (string-append "system(\"" (which "rm")))
3531 (("system\\(\"mv")
3532 (string-append "system(\"" (which "mv")))
3533 (("\\\"awk") (string-append "\"" (which "awk")))
3534 ;; This script and other programs expect the training files
3535 ;; to be in the non-standard location bin/train/XXX. Change
3536 ;; this to be share/fraggenescan/train/XXX instead.
3537 (("^\\$train.file = \\$dir.*")
3538 (string-append "$train_file = \""
3539 share
3540 "train/\".$FGS_train_file;")))
3541 (substitute* "run_hmm.c"
3542 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
3543 (string-append " strcpy(train_dir, \"" share "/train/\");"))))
3544 #t))
3545 (replace 'build
3546 (lambda _
3547 (invoke "make" "clean")
3548 (invoke "make" "fgs")
3549 #t))
3550 (replace 'install
3551 (lambda* (#:key outputs #:allow-other-keys)
3552 (let* ((out (string-append (assoc-ref outputs "out")))
3553 (bin (string-append out "/bin/"))
3554 (share (string-append out "/share/fraggenescan/train")))
3555 (install-file "run_FragGeneScan.pl" bin)
3556 (install-file "FragGeneScan" bin)
3557 (copy-recursively "train" share))
3558 #t))
3559 (delete 'check)
3560 (add-after 'install 'post-install-check
3561 ;; In lieu of 'make check', run one of the examples and check the
3562 ;; output files gets created.
3563 (lambda* (#:key outputs #:allow-other-keys)
3564 (let* ((out (string-append (assoc-ref outputs "out")))
3565 (bin (string-append out "/bin/"))
3566 (frag (string-append bin "run_FragGeneScan.pl")))
3567 ;; Test complete genome.
3568 (invoke frag
3569 "-genome=./example/NC_000913.fna"
3570 "-out=./test2"
3571 "-complete=1"
3572 "-train=complete")
3573 (unless (and (file-exists? "test2.faa")
3574 (file-exists? "test2.ffn")
3575 (file-exists? "test2.gff")
3576 (file-exists? "test2.out"))
3577 (error "Expected files do not exist."))
3578 ;; Test incomplete sequences.
3579 (invoke frag
3580 "-genome=./example/NC_000913-fgs.ffn"
3581 "-out=out"
3582 "-complete=0"
3583 "-train=454_30")
3584 #t))))))
3585 (inputs
3586 `(("perl" ,perl)
3587 ("python" ,python-2))) ;not compatible with python 3.
3588 (home-page "https://sourceforge.net/projects/fraggenescan/")
3589 (synopsis "Finds potentially fragmented genes in short reads")
3590 (description
3591 "FragGeneScan is a program for predicting bacterial and archaeal genes in
3592 short and error-prone DNA sequencing reads. It can also be applied to predict
3593 genes in incomplete assemblies or complete genomes.")
3594 ;; GPL3+ according to private correspondense with the authors.
3595 (license license:gpl3+)))
3596
3597 (define-public fxtract
3598 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
3599 (package
3600 (name "fxtract")
3601 (version "2.3")
3602 (source
3603 (origin
3604 (method git-fetch)
3605 (uri (git-reference
3606 (url "https://github.com/ctSkennerton/fxtract")
3607 (commit version)))
3608 (file-name (git-file-name name version))
3609 (sha256
3610 (base32
3611 "0hab3gpwf4w9s87qlbswq6ws1qqybh4dcqk79q1ahyldzai5fgp5"))))
3612 (build-system gnu-build-system)
3613 (arguments
3614 `(#:make-flags (list
3615 (string-append "PREFIX=" (assoc-ref %outputs "out"))
3616 "CC=gcc")
3617 #:test-target "fxtract_test"
3618 #:phases
3619 (modify-phases %standard-phases
3620 (delete 'configure)
3621 (add-before 'build 'copy-util
3622 (lambda* (#:key inputs #:allow-other-keys)
3623 (rmdir "util")
3624 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
3625 #t))
3626 ;; Do not use make install as this requires additional dependencies.
3627 (replace 'install
3628 (lambda* (#:key outputs #:allow-other-keys)
3629 (let* ((out (assoc-ref outputs "out"))
3630 (bin (string-append out"/bin")))
3631 (install-file "fxtract" bin)
3632 #t))))))
3633 (inputs
3634 `(("pcre" ,pcre)
3635 ("zlib" ,zlib)))
3636 (native-inputs
3637 ;; ctskennerton-util is licensed under GPL2.
3638 `(("ctskennerton-util"
3639 ,(origin
3640 (method git-fetch)
3641 (uri (git-reference
3642 (url "https://github.com/ctSkennerton/util")
3643 (commit util-commit)))
3644 (file-name (string-append
3645 "ctstennerton-util-" util-commit "-checkout"))
3646 (sha256
3647 (base32
3648 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
3649 (home-page "https://github.com/ctSkennerton/fxtract")
3650 (synopsis "Extract sequences from FASTA and FASTQ files")
3651 (description
3652 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
3653 or FASTQ) file given a subsequence. It uses a simple substring search for
3654 basic tasks but can change to using POSIX regular expressions, PCRE, hash
3655 lookups or multi-pattern searching as required. By default fxtract looks in
3656 the sequence of each record but can also be told to look in the header,
3657 comment or quality sections.")
3658 ;; 'util' requires SSE instructions.
3659 (supported-systems '("x86_64-linux"))
3660 (license license:expat))))
3661
3662 (define-public gemma
3663 (package
3664 (name "gemma")
3665 (version "0.98.3")
3666 (source (origin
3667 (method git-fetch)
3668 (uri (git-reference
3669 (url "https://github.com/genetics-statistics/GEMMA")
3670 (commit version)))
3671 (file-name (git-file-name name version))
3672 (sha256
3673 (base32
3674 "1p8a7kkfn1mmrg017aziy544aha8i9h6wd1x2dk3w2794wl33qb7"))
3675 (modules '((guix build utils)))
3676 (snippet
3677 '(begin
3678 (delete-file-recursively "contrib")
3679 #t))))
3680 (build-system gnu-build-system)
3681 (inputs
3682 `(("gsl" ,gsl)
3683 ("openblas" ,openblas)
3684 ("zlib" ,zlib)))
3685 (native-inputs
3686 `(("catch" ,catch-framework2-1)
3687 ("perl" ,perl)
3688 ("shunit2" ,shunit2)
3689 ("which" ,which)))
3690 (arguments
3691 `(#:phases
3692 (modify-phases %standard-phases
3693 (delete 'configure)
3694 (add-after 'unpack 'prepare-build
3695 (lambda* (#:key inputs #:allow-other-keys)
3696 (mkdir-p "bin")
3697 (substitute* "Makefile"
3698 (("/usr/local/opt/openblas")
3699 (assoc-ref inputs "openblas")))
3700 #t))
3701 (replace 'check
3702 (lambda* (#:key tests? #:allow-other-keys)
3703 (when tests?
3704 ;; 'make slow-check' expects shunit2-2.0.3.
3705 (with-directory-excursion "test"
3706 (invoke "./test_suite.sh"))
3707 #t)))
3708 (replace 'install
3709 (lambda* (#:key outputs #:allow-other-keys)
3710 (install-file "bin/gemma"
3711 (string-append (assoc-ref outputs "out") "/bin"))
3712 #t)))))
3713 (home-page "https://github.com/genetics-statistics/GEMMA")
3714 (synopsis "Tool for genome-wide efficient mixed model association")
3715 (description
3716 "@acronym{GEMMA, Genome-wide Efficient Mixed Model Association} provides a
3717 standard linear mixed model resolver with application in @acronym{GWAS,
3718 genome-wide association studies}.")
3719 (license license:gpl3)))
3720
3721 (define-public grit
3722 (package
3723 (name "grit")
3724 (version "2.0.5")
3725 (source (origin
3726 (method git-fetch)
3727 (uri (git-reference
3728 (url "https://github.com/nboley/grit")
3729 (commit version)))
3730 (file-name (git-file-name name version))
3731 (sha256
3732 (base32
3733 "1l5v8vfvfbrpmgnrvbrbv40d0arhxcnmxgv2f1mlcqfa3q6bkqm9"))))
3734 (build-system python-build-system)
3735 (arguments
3736 `(#:python ,python-2
3737 #:phases
3738 (modify-phases %standard-phases
3739 (add-after 'unpack 'generate-from-cython-sources
3740 (lambda* (#:key inputs outputs #:allow-other-keys)
3741 ;; Delete these C files to force fresh generation from pyx sources.
3742 (delete-file "grit/sparsify_support_fns.c")
3743 (delete-file "grit/call_peaks_support_fns.c")
3744 (substitute* "setup.py"
3745 (("Cython.Setup") "Cython.Build"))
3746 #t)))))
3747 (inputs
3748 `(("python-scipy" ,python2-scipy)
3749 ("python-numpy" ,python2-numpy)
3750 ("python-pysam" ,python2-pysam)
3751 ("python-networkx" ,python2-networkx)))
3752 (native-inputs
3753 `(("python-cython" ,python2-cython)))
3754 ;; The canonical <http://grit-bio.org> home page times out as of 2020-01-21.
3755 (home-page "https://github.com/nboley/grit")
3756 (synopsis "Tool for integrative analysis of RNA-seq type assays")
3757 (description
3758 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
3759 full length transcript models. When none of these data sources are available,
3760 GRIT can be run by providing a candidate set of TES or TSS sites. In
3761 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
3762 also be run in quantification mode, where it uses a provided GTF file and just
3763 estimates transcript expression.")
3764 (license license:gpl3+)))
3765
3766 (define-public hisat
3767 (package
3768 (name "hisat")
3769 (version "0.1.4")
3770 (source (origin
3771 (method url-fetch)
3772 (uri (string-append
3773 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
3774 version "-beta-source.zip"))
3775 (sha256
3776 (base32
3777 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
3778 (build-system gnu-build-system)
3779 (arguments
3780 `(#:tests? #f ;no check target
3781 #:make-flags '("allall"
3782 ;; Disable unsupported `popcnt' instructions on
3783 ;; architectures other than x86_64
3784 ,@(if (string-prefix? "x86_64"
3785 (or (%current-target-system)
3786 (%current-system)))
3787 '()
3788 '("POPCNT_CAPABILITY=0")))
3789 #:phases
3790 (modify-phases %standard-phases
3791 (add-after 'unpack 'patch-sources
3792 (lambda _
3793 ;; XXX Cannot use snippet because zip files are not supported
3794 (substitute* "Makefile"
3795 (("^CC = .*$") "CC = gcc")
3796 (("^CPP = .*$") "CPP = g++")
3797 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
3798 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
3799 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
3800 (substitute* '("hisat-build" "hisat-inspect")
3801 (("/usr/bin/env") (which "env")))
3802 #t))
3803 (replace 'install
3804 (lambda* (#:key outputs #:allow-other-keys)
3805 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3806 (for-each (lambda (file)
3807 (install-file file bin))
3808 (find-files
3809 "."
3810 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
3811 #t))
3812 (delete 'configure))))
3813 (native-inputs
3814 `(("unzip" ,unzip)))
3815 (inputs
3816 `(("perl" ,perl)
3817 ("python" ,python)
3818 ("zlib" ,zlib)))
3819 ;; Non-portable SSE instructions are used so building fails on platforms
3820 ;; other than x86_64.
3821 (supported-systems '("x86_64-linux"))
3822 (home-page "https://ccb.jhu.edu/software/hisat/index.shtml")
3823 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
3824 (description
3825 "HISAT is a fast and sensitive spliced alignment program for mapping
3826 RNA-seq reads. In addition to one global FM index that represents a whole
3827 genome, HISAT uses a large set of small FM indexes that collectively cover the
3828 whole genome. These small indexes (called local indexes) combined with
3829 several alignment strategies enable effective alignment of RNA-seq reads, in
3830 particular, reads spanning multiple exons.")
3831 (license license:gpl3+)))
3832
3833 (define-public hisat2
3834 (package
3835 (name "hisat2")
3836 (version "2.2.1")
3837 (source
3838 (origin
3839 (method git-fetch)
3840 (uri (git-reference
3841 (url "https://github.com/DaehwanKimLab/hisat2/")
3842 (commit (string-append "v" version))))
3843 (file-name (git-file-name name version))
3844 (sha256
3845 (base32
3846 "0lmzdhzjkvxw7n5w40pbv5fgzd4cz0f9pxczswn3d4cr0k10k754"))))
3847 (build-system gnu-build-system)
3848 (arguments
3849 `(#:tests? #f ; no check target
3850 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
3851 #:modules ((guix build gnu-build-system)
3852 (guix build utils)
3853 (srfi srfi-26))
3854 #:phases
3855 (modify-phases %standard-phases
3856 (add-after 'unpack 'make-deterministic
3857 (lambda _
3858 (substitute* "Makefile"
3859 (("`date`") "0"))))
3860 (delete 'configure)
3861 (add-before 'build 'build-manual
3862 (lambda _
3863 (mkdir-p "doc")
3864 (invoke "make" "doc")))
3865 (replace 'install
3866 (lambda* (#:key outputs #:allow-other-keys)
3867 (let* ((out (assoc-ref outputs "out"))
3868 (bin (string-append out "/bin/"))
3869 (doc (string-append out "/share/doc/hisat2/")))
3870 (for-each
3871 (cut install-file <> bin)
3872 (find-files "."
3873 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
3874 (mkdir-p doc)
3875 (install-file "doc/manual.inc.html" doc)))))))
3876 (native-inputs
3877 `(("perl" ,perl)
3878 ("pandoc" ,pandoc))) ; for documentation
3879 (inputs
3880 `(("python" ,python-wrapper)))
3881 (home-page "https://daehwankimlab.github.io/hisat2/")
3882 (synopsis "Graph-based alignment of genomic sequencing reads")
3883 (description "HISAT2 is a fast and sensitive alignment program for mapping
3884 next-generation sequencing reads (both DNA and RNA) to a population of human
3885 genomes (as well as to a single reference genome). In addition to using one
3886 global @dfn{graph FM} (GFM) index that represents a population of human
3887 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
3888 the whole genome. These small indexes, combined with several alignment
3889 strategies, enable rapid and accurate alignment of sequencing reads. This new
3890 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
3891 ;; HISAT2 contains files from Bowtie2, which is released under
3892 ;; GPLv2 or later. The HISAT2 source files are released under
3893 ;; GPLv3 or later.
3894 (license license:gpl3+)))
3895
3896 (define-public hmmer
3897 (package
3898 (name "hmmer")
3899 (version "3.3.2")
3900 (source
3901 (origin
3902 (method url-fetch)
3903 (uri (string-append
3904 "http://eddylab.org/software/hmmer/hmmer-" version ".tar.gz"))
3905 (sha256
3906 (base32
3907 "0s9wf6n0qanbx8qs6igfl3vyjikwbrvh4d9d6mv54yp3xysykzlj"))))
3908 (build-system gnu-build-system)
3909 (native-inputs `(("perl" ,perl)
3910 ("python" ,python))) ; for tests
3911 (home-page "http://hmmer.org/")
3912 (synopsis "Biosequence analysis using profile hidden Markov models")
3913 (description
3914 "HMMER is used for searching sequence databases for homologs of protein
3915 sequences, and for making protein sequence alignments. It implements methods
3916 using probabilistic models called profile hidden Markov models (profile
3917 HMMs).")
3918 ;; hmmer uses non-portable SSE intrinsics so building fails on other
3919 ;; platforms.
3920 (supported-systems '("x86_64-linux" "i686-linux"))
3921 (license license:bsd-3)))
3922
3923 (define-public htseq
3924 (package
3925 (name "htseq")
3926 (version "0.9.1")
3927 (source (origin
3928 (method url-fetch)
3929 (uri (pypi-uri "HTSeq" version))
3930 (sha256
3931 (base32
3932 "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
3933 (build-system python-build-system)
3934 (native-inputs
3935 `(("python-cython" ,python-cython)))
3936 ;; Numpy needs to be propagated when htseq is used as a Python library.
3937 (propagated-inputs
3938 `(("python-numpy" ,python-numpy)))
3939 (inputs
3940 `(("python-pysam" ,python-pysam)
3941 ("python-matplotlib" ,python-matplotlib)))
3942 (home-page "https://htseq.readthedocs.io/")
3943 (synopsis "Analysing high-throughput sequencing data with Python")
3944 (description
3945 "HTSeq is a Python package that provides infrastructure to process data
3946 from high-throughput sequencing assays.")
3947 (license license:gpl3+)))
3948
3949 (define-public python2-htseq
3950 (package-with-python2 htseq))
3951
3952 (define-public java-htsjdk
3953 (package
3954 (name "java-htsjdk")
3955 (version "2.3.0") ; last version without build dependency on gradle
3956 (source (origin
3957 (method git-fetch)
3958 (uri (git-reference
3959 (url "https://github.com/samtools/htsjdk")
3960 (commit version)))
3961 (file-name (git-file-name name version))
3962 (sha256
3963 (base32
3964 "1b178ixcabanm834ydjl3jiakpyxdmki32hqfv2abrzn3rcwa28i"))
3965 (modules '((guix build utils)))
3966 (snippet
3967 ;; Delete pre-built binaries
3968 '(begin
3969 (delete-file-recursively "lib")
3970 (mkdir-p "lib")
3971 #t))))
3972 (build-system ant-build-system)
3973 (arguments
3974 `(#:tests? #f ; test require Internet access
3975 #:jdk ,icedtea-8
3976 #:make-flags
3977 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
3978 "/share/java/htsjdk/"))
3979 #:build-target "all"
3980 #:phases
3981 (modify-phases %standard-phases
3982 ;; The build phase also installs the jars
3983 (delete 'install))))
3984 (inputs
3985 `(("java-ngs" ,java-ngs)
3986 ("java-snappy-1" ,java-snappy-1)
3987 ("java-commons-compress" ,java-commons-compress)
3988 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3989 ("java-commons-jexl-2" ,java-commons-jexl-2)
3990 ("java-xz" ,java-xz)))
3991 (native-inputs
3992 `(("java-testng" ,java-testng)))
3993 (home-page "http://samtools.github.io/htsjdk/")
3994 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3995 (description
3996 "HTSJDK is an implementation of a unified Java library for accessing
3997 common file formats, such as SAM and VCF, used for high-throughput
3998 sequencing (HTS) data. There are also an number of useful utilities for
3999 manipulating HTS data.")
4000 (license license:expat)))
4001
4002 (define-public java-htsjdk-latest
4003 (package
4004 (name "java-htsjdk")
4005 (version "2.14.3")
4006 (source (origin
4007 (method git-fetch)
4008 (uri (git-reference
4009 (url "https://github.com/samtools/htsjdk")
4010 (commit version)))
4011 (file-name (string-append name "-" version "-checkout"))
4012 (sha256
4013 (base32
4014 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
4015 (build-system ant-build-system)
4016 (arguments
4017 `(#:tests? #f ; test require Scala
4018 #:jdk ,icedtea-8
4019 #:jar-name "htsjdk.jar"
4020 #:phases
4021 (modify-phases %standard-phases
4022 (add-after 'unpack 'remove-useless-build.xml
4023 (lambda _ (delete-file "build.xml") #t))
4024 ;; The tests require the scalatest package.
4025 (add-after 'unpack 'remove-tests
4026 (lambda _ (delete-file-recursively "src/test") #t)))))
4027 (inputs
4028 `(("java-ngs" ,java-ngs)
4029 ("java-snappy-1" ,java-snappy-1)
4030 ("java-commons-compress" ,java-commons-compress)
4031 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
4032 ("java-commons-jexl-2" ,java-commons-jexl-2)
4033 ("java-xz" ,java-xz)))
4034 (native-inputs
4035 `(("java-junit" ,java-junit)))
4036 (home-page "http://samtools.github.io/htsjdk/")
4037 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
4038 (description
4039 "HTSJDK is an implementation of a unified Java library for accessing
4040 common file formats, such as SAM and VCF, used for high-throughput
4041 sequencing (HTS) data. There are also an number of useful utilities for
4042 manipulating HTS data.")
4043 (license license:expat)))
4044
4045 ;; This is needed for picard 2.10.3
4046 (define-public java-htsjdk-2.10.1
4047 (package (inherit java-htsjdk-latest)
4048 (name "java-htsjdk")
4049 (version "2.10.1")
4050 (source (origin
4051 (method git-fetch)
4052 (uri (git-reference
4053 (url "https://github.com/samtools/htsjdk")
4054 (commit version)))
4055 (file-name (string-append name "-" version "-checkout"))
4056 (sha256
4057 (base32
4058 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
4059 (build-system ant-build-system)
4060 (arguments
4061 `(#:tests? #f ; tests require Scala
4062 #:jdk ,icedtea-8
4063 #:jar-name "htsjdk.jar"
4064 #:phases
4065 (modify-phases %standard-phases
4066 (add-after 'unpack 'remove-useless-build.xml
4067 (lambda _ (delete-file "build.xml") #t))
4068 ;; The tests require the scalatest package.
4069 (add-after 'unpack 'remove-tests
4070 (lambda _ (delete-file-recursively "src/test") #t)))))))
4071
4072 ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
4073 ;; recent version of java-htsjdk, which depends on gradle.
4074 (define-public java-picard
4075 (package
4076 (name "java-picard")
4077 (version "2.3.0")
4078 (source (origin
4079 (method git-fetch)
4080 (uri (git-reference
4081 (url "https://github.com/broadinstitute/picard")
4082 (commit version)))
4083 (file-name (string-append "java-picard-" version "-checkout"))
4084 (sha256
4085 (base32
4086 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
4087 (modules '((guix build utils)))
4088 (snippet
4089 '(begin
4090 ;; Delete pre-built binaries.
4091 (delete-file-recursively "lib")
4092 (mkdir-p "lib")
4093 (substitute* "build.xml"
4094 ;; Remove build-time dependency on git.
4095 (("failifexecutionfails=\"true\"")
4096 "failifexecutionfails=\"false\"")
4097 ;; Use our htsjdk.
4098 (("depends=\"compile-htsjdk, ")
4099 "depends=\"")
4100 (("depends=\"compile-htsjdk-tests, ")
4101 "depends=\"")
4102 ;; Build picard-lib.jar before building picard.jar
4103 (("name=\"picard-jar\" depends=\"" line)
4104 (string-append line "picard-lib-jar, ")))
4105 #t))))
4106 (build-system ant-build-system)
4107 (arguments
4108 `(#:build-target "picard-jar"
4109 #:test-target "test"
4110 ;; Tests require jacoco:coverage.
4111 #:tests? #f
4112 #:make-flags
4113 (list (string-append "-Dhtsjdk_lib_dir="
4114 (assoc-ref %build-inputs "java-htsjdk")
4115 "/share/java/htsjdk/")
4116 "-Dhtsjdk-classes=dist/tmp"
4117 (string-append "-Dhtsjdk-version="
4118 ,(package-version java-htsjdk)))
4119 #:jdk ,icedtea-8
4120 #:phases
4121 (modify-phases %standard-phases
4122 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
4123 (delete 'generate-jar-indices)
4124 (add-after 'unpack 'use-our-htsjdk
4125 (lambda* (#:key inputs #:allow-other-keys)
4126 (substitute* "build.xml"
4127 (("\\$\\{htsjdk\\}/lib")
4128 (string-append (assoc-ref inputs "java-htsjdk")
4129 "/share/java/htsjdk/")))
4130 #t))
4131 (add-after 'unpack 'make-test-target-independent
4132 (lambda* (#:key inputs #:allow-other-keys)
4133 (substitute* "build.xml"
4134 (("name=\"test\" depends=\"compile, ")
4135 "name=\"test\" depends=\""))
4136 #t))
4137 (replace 'install (install-jars "dist")))))
4138 (inputs
4139 `(("java-htsjdk" ,java-htsjdk)
4140 ("java-guava" ,java-guava)))
4141 (native-inputs
4142 `(("java-testng" ,java-testng)))
4143 (home-page "http://broadinstitute.github.io/picard/")
4144 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
4145 (description "Picard is a set of Java command line tools for manipulating
4146 high-throughput sequencing (HTS) data and formats. Picard is implemented
4147 using the HTSJDK Java library to support accessing file formats that are
4148 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
4149 VCF.")
4150 (license license:expat)))
4151
4152 ;; This is needed for dropseq-tools
4153 (define-public java-picard-2.10.3
4154 (package
4155 (name "java-picard")
4156 (version "2.10.3")
4157 (source (origin
4158 (method git-fetch)
4159 (uri (git-reference
4160 (url "https://github.com/broadinstitute/picard")
4161 (commit version)))
4162 (file-name (string-append "java-picard-" version "-checkout"))
4163 (sha256
4164 (base32
4165 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
4166 (build-system ant-build-system)
4167 (arguments
4168 `(#:jar-name "picard.jar"
4169 ;; Tests require jacoco:coverage.
4170 #:tests? #f
4171 #:jdk ,icedtea-8
4172 #:main-class "picard.cmdline.PicardCommandLine"
4173 #:modules ((guix build ant-build-system)
4174 (guix build utils)
4175 (guix build java-utils)
4176 (sxml simple)
4177 (sxml transform)
4178 (sxml xpath))
4179 #:phases
4180 (modify-phases %standard-phases
4181 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
4182 (delete 'generate-jar-indices)
4183 (add-after 'unpack 'remove-useless-build.xml
4184 (lambda _ (delete-file "build.xml") #t))
4185 ;; This is necessary to ensure that htsjdk is found when using
4186 ;; picard.jar as an executable.
4187 (add-before 'build 'edit-classpath-in-manifest
4188 (lambda* (#:key inputs #:allow-other-keys)
4189 (chmod "build.xml" #o664)
4190 (call-with-output-file "build.xml.new"
4191 (lambda (port)
4192 (sxml->xml
4193 (pre-post-order
4194 (with-input-from-file "build.xml"
4195 (lambda _ (xml->sxml #:trim-whitespace? #t)))
4196 `((target . ,(lambda (tag . kids)
4197 (let ((name ((sxpath '(name *text*))
4198 (car kids)))
4199 ;; FIXME: We're breaking the line
4200 ;; early with a dummy path to
4201 ;; ensure that the store reference
4202 ;; isn't broken apart and can still
4203 ;; be found by the reference
4204 ;; scanner.
4205 (msg (format #f
4206 "\
4207 Class-Path: /~a \
4208 ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
4209 ;; maximum line length is 70
4210 (string-tabulate (const #\b) 57)
4211 (assoc-ref inputs "java-htsjdk"))))
4212 (if (member "manifest" name)
4213 `(,tag ,@kids
4214 (replaceregexp
4215 (@ (file "${manifest.file}")
4216 (match "\\r\\n\\r\\n")
4217 (replace "${line.separator}")))
4218 (echo
4219 (@ (message ,msg)
4220 (file "${manifest.file}")
4221 (append "true"))))
4222 `(,tag ,@kids)))))
4223 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
4224 (*text* . ,(lambda (_ txt) txt))))
4225 port)))
4226 (rename-file "build.xml.new" "build.xml")
4227 #t)))))
4228 (propagated-inputs
4229 `(("java-htsjdk" ,java-htsjdk-2.10.1)))
4230 (native-inputs
4231 `(("java-testng" ,java-testng)
4232 ("java-guava" ,java-guava)))
4233 (home-page "http://broadinstitute.github.io/picard/")
4234 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
4235 (description "Picard is a set of Java command line tools for manipulating
4236 high-throughput sequencing (HTS) data and formats. Picard is implemented
4237 using the HTSJDK Java library to support accessing file formats that are
4238 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
4239 VCF.")
4240 (license license:expat)))
4241
4242 ;; This is the last version of Picard to provide net.sf.samtools
4243 (define-public java-picard-1.113
4244 (package (inherit java-picard)
4245 (name "java-picard")
4246 (version "1.113")
4247 (source (origin
4248 (method git-fetch)
4249 (uri (git-reference
4250 (url "https://github.com/broadinstitute/picard")
4251 (commit version)))
4252 (file-name (string-append "java-picard-" version "-checkout"))
4253 (sha256
4254 (base32
4255 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
4256 (modules '((guix build utils)))
4257 (snippet
4258 '(begin
4259 ;; Delete pre-built binaries.
4260 (delete-file-recursively "lib")
4261 (mkdir-p "lib")
4262 #t))))
4263 (build-system ant-build-system)
4264 (arguments
4265 `(#:build-target "picard-jar"
4266 #:test-target "test"
4267 ;; FIXME: the class path at test time is wrong.
4268 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
4269 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
4270 #:tests? #f
4271 #:jdk ,icedtea-8
4272 #:ant ,ant/java8
4273 ;; This is only used for tests.
4274 #:make-flags
4275 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
4276 #:phases
4277 (modify-phases %standard-phases
4278 ;; FIXME: This phase fails.
4279 (delete 'generate-jar-indices)
4280 ;; Do not use bundled ant bzip2.
4281 (add-after 'unpack 'use-ant-bzip
4282 (lambda* (#:key inputs #:allow-other-keys)
4283 (substitute* "build.xml"
4284 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
4285 (string-append (assoc-ref inputs "ant")
4286 "/lib/ant.jar")))
4287 #t))
4288 (add-after 'unpack 'make-test-target-independent
4289 (lambda* (#:key inputs #:allow-other-keys)
4290 (substitute* "build.xml"
4291 (("name=\"test\" depends=\"compile, ")
4292 "name=\"test\" depends=\"compile-tests, ")
4293 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
4294 "name=\"compile\" depends=\"compile-src\""))
4295 #t))
4296 (add-after 'unpack 'fix-deflater-path
4297 (lambda* (#:key outputs #:allow-other-keys)
4298 (substitute* "src/java/net/sf/samtools/Defaults.java"
4299 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
4300 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
4301 (assoc-ref outputs "out")
4302 "/lib/jni/libIntelDeflater.so"
4303 "\")")))
4304 #t))
4305 ;; Build the deflater library, because we've previously deleted the
4306 ;; pre-built one. This can only be built with access to the JDK
4307 ;; sources.
4308 (add-after 'build 'build-jni
4309 (lambda* (#:key inputs #:allow-other-keys)
4310 (mkdir-p "lib/jni")
4311 (mkdir-p "jdk-src")
4312 (invoke "tar" "--strip-components=1" "-C" "jdk-src"
4313 "-xf" (assoc-ref inputs "jdk-src"))
4314 (invoke "javah" "-jni"
4315 "-classpath" "classes"
4316 "-d" "lib/"
4317 "net.sf.samtools.util.zip.IntelDeflater")
4318 (with-directory-excursion "src/c/inteldeflater"
4319 (invoke "gcc" "-I../../../lib" "-I."
4320 (string-append "-I" (assoc-ref inputs "jdk")
4321 "/include/linux")
4322 "-I../../../jdk-src/src/share/native/common/"
4323 "-I../../../jdk-src/src/solaris/native/common/"
4324 "-c" "-O3" "-fPIC" "IntelDeflater.c")
4325 (invoke "gcc" "-shared"
4326 "-o" "../../../lib/jni/libIntelDeflater.so"
4327 "IntelDeflater.o" "-lz" "-lstdc++"))
4328 #t))
4329 ;; We can only build everything else after building the JNI library.
4330 (add-after 'build-jni 'build-rest
4331 (lambda* (#:key make-flags #:allow-other-keys)
4332 (apply invoke `("ant" "all" ,@make-flags))
4333 #t))
4334 (add-before 'build 'set-JAVA6_HOME
4335 (lambda _
4336 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
4337 #t))
4338 (replace 'install (install-jars "dist"))
4339 (add-after 'install 'install-jni-lib
4340 (lambda* (#:key outputs #:allow-other-keys)
4341 (let ((jni (string-append (assoc-ref outputs "out")
4342 "/lib/jni")))
4343 (mkdir-p jni)
4344 (install-file "lib/jni/libIntelDeflater.so" jni)
4345 #t))))))
4346 (inputs
4347 `(("java-snappy-1" ,java-snappy-1)
4348 ("java-commons-jexl-2" ,java-commons-jexl-2)
4349 ("java-cofoja" ,java-cofoja)
4350 ("ant" ,ant/java8) ; for bzip2 support at runtime
4351 ("zlib" ,zlib)))
4352 (native-inputs
4353 `(("ant-apache-bcel" ,ant-apache-bcel)
4354 ("ant-junit" ,ant-junit)
4355 ("java-testng" ,java-testng)
4356 ("java-commons-bcel" ,java-commons-bcel)
4357 ("java-jcommander" ,java-jcommander)
4358 ("jdk" ,icedtea-8 "jdk")
4359 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
4360
4361 (define-public fastqc
4362 (package
4363 (name "fastqc")
4364 (version "0.11.5")
4365 (source
4366 (origin
4367 (method url-fetch)
4368 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
4369 "projects/fastqc/fastqc_v"
4370 version "_source.zip"))
4371 (sha256
4372 (base32
4373 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
4374 (build-system ant-build-system)
4375 (arguments
4376 `(#:tests? #f ; there are no tests
4377 #:build-target "build"
4378 #:phases
4379 (modify-phases %standard-phases
4380 (add-after 'unpack 'fix-dependencies
4381 (lambda* (#:key inputs #:allow-other-keys)
4382 (substitute* "build.xml"
4383 (("jbzip2-0.9.jar")
4384 (string-append (assoc-ref inputs "java-jbzip2")
4385 "/share/java/jbzip2.jar"))
4386 (("sam-1.103.jar")
4387 (string-append (assoc-ref inputs "java-picard-1.113")
4388 "/share/java/sam-1.112.jar"))
4389 (("cisd-jhdf5.jar")
4390 (string-append (assoc-ref inputs "java-cisd-jhdf5")
4391 "/share/java/sis-jhdf5.jar")))
4392 #t))
4393 ;; There is no installation target
4394 (replace 'install
4395 (lambda* (#:key inputs outputs #:allow-other-keys)
4396 (let* ((out (assoc-ref outputs "out"))
4397 (bin (string-append out "/bin"))
4398 (share (string-append out "/share/fastqc/"))
4399 (exe (string-append share "/fastqc")))
4400 (for-each mkdir-p (list bin share))
4401 (copy-recursively "bin" share)
4402 (substitute* exe
4403 (("my \\$java_bin = 'java';")
4404 (string-append "my $java_bin = '"
4405 (assoc-ref inputs "java")
4406 "/bin/java';")))
4407 (chmod exe #o555)
4408 (symlink exe (string-append bin "/fastqc"))
4409 #t))))))
4410 (inputs
4411 `(("java" ,icedtea)
4412 ("perl" ,perl) ; needed for the wrapper script
4413 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
4414 ("java-picard-1.113" ,java-picard-1.113)
4415 ("java-jbzip2" ,java-jbzip2)))
4416 (native-inputs
4417 `(("unzip" ,unzip)))
4418 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
4419 (synopsis "Quality control tool for high throughput sequence data")
4420 (description
4421 "FastQC aims to provide a simple way to do some quality control
4422 checks on raw sequence data coming from high throughput sequencing
4423 pipelines. It provides a modular set of analyses which you can use to
4424 give a quick impression of whether your data has any problems of which
4425 you should be aware before doing any further analysis.
4426
4427 The main functions of FastQC are:
4428
4429 @itemize
4430 @item Import of data from BAM, SAM or FastQ files (any variant);
4431 @item Providing a quick overview to tell you in which areas there may
4432 be problems;
4433 @item Summary graphs and tables to quickly assess your data;
4434 @item Export of results to an HTML based permanent report;
4435 @item Offline operation to allow automated generation of reports
4436 without running the interactive application.
4437 @end itemize\n")
4438 (license license:gpl3+)))
4439
4440 (define-public fastp
4441 (package
4442 (name "fastp")
4443 (version "0.14.1")
4444 (source
4445 (origin
4446 (method git-fetch)
4447 (uri (git-reference
4448 (url "https://github.com/OpenGene/fastp")
4449 (commit (string-append "v" version))))
4450 (file-name (git-file-name name version))
4451 (sha256
4452 (base32
4453 "1r6ms5zbf5rps4rgp4z73nczadl00b5rqylw8f684isfz27dp0xh"))))
4454 (build-system gnu-build-system)
4455 (arguments
4456 `(#:tests? #f ; there are none
4457 #:make-flags
4458 (list (string-append "BINDIR=" (assoc-ref %outputs "out") "/bin"))
4459 #:phases
4460 (modify-phases %standard-phases
4461 (delete 'configure)
4462 (add-before 'install 'create-target-dir
4463 (lambda* (#:key outputs #:allow-other-keys)
4464 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4465 #t)))))
4466 (inputs
4467 `(("zlib" ,zlib)))
4468 (home-page "https://github.com/OpenGene/fastp/")
4469 (synopsis "All-in-one FastQ preprocessor")
4470 (description
4471 "Fastp is a tool designed to provide fast all-in-one preprocessing for
4472 FastQ files. This tool has multi-threading support to afford high
4473 performance.")
4474 (license license:expat)))
4475
4476 (define-public htslib
4477 (package
4478 (name "htslib")
4479 (version "1.11")
4480 (source (origin
4481 (method url-fetch)
4482 (uri (string-append
4483 "https://github.com/samtools/htslib/releases/download/"
4484 version "/htslib-" version ".tar.bz2"))
4485 (sha256
4486 (base32
4487 "1mrq4mihzx37yqhj3sfz6da6mw49niia808bzsw2gkkgmadxvyng"))))
4488 (build-system gnu-build-system)
4489 ;; Let htslib translate "gs://" and "s3://" to regular https links with
4490 ;; "--enable-gcs" and "--enable-s3". For these options to work, we also
4491 ;; need to set "--enable-libcurl".
4492 (arguments
4493 `(#:configure-flags '("--enable-gcs"
4494 "--enable-libcurl"
4495 "--enable-s3")))
4496 (inputs
4497 `(("curl" ,curl)
4498 ("openssl" ,openssl)))
4499 ;; This is referred to in the pkg-config file as a required library.
4500 (propagated-inputs
4501 `(("zlib" ,zlib)))
4502 (native-inputs
4503 `(("perl" ,perl)))
4504 (home-page "https://www.htslib.org")
4505 (synopsis "C library for reading/writing high-throughput sequencing data")
4506 (description
4507 "HTSlib is a C library for reading/writing high-throughput sequencing
4508 data. It also provides the @command{bgzip}, @command{htsfile}, and
4509 @command{tabix} utilities.")
4510 ;; Files under cram/ are released under the modified BSD license;
4511 ;; the rest is released under the Expat license
4512 (license (list license:expat license:bsd-3))))
4513
4514 (define-public htslib-1.9
4515 (package (inherit htslib)
4516 (name "htslib")
4517 (version "1.9")
4518 (source (origin
4519 (method url-fetch)
4520 (uri (string-append
4521 "https://github.com/samtools/htslib/releases/download/"
4522 version "/htslib-" version ".tar.bz2"))
4523 (sha256
4524 (base32
4525 "16ljv43sc3fxmv63w7b2ff8m1s7h89xhazwmbm1bicz8axq8fjz0"))))))
4526
4527 ;; This package should be removed once no packages rely upon it.
4528 (define htslib-1.3
4529 (package
4530 (inherit htslib)
4531 (version "1.3.1")
4532 (source (origin
4533 (method url-fetch)
4534 (uri (string-append
4535 "https://github.com/samtools/htslib/releases/download/"
4536 version "/htslib-" version ".tar.bz2"))
4537 (sha256
4538 (base32
4539 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
4540
4541 (define-public idr
4542 (package
4543 (name "idr")
4544 (version "2.0.3")
4545 (source (origin
4546 (method git-fetch)
4547 (uri (git-reference
4548 (url "https://github.com/nboley/idr")
4549 (commit version)))
4550 (file-name (git-file-name name version))
4551 (sha256
4552 (base32
4553 "04j876h6z444v2q79drxx283d3k5snd72kj895wbalnl42206x9g"))
4554 ;; Delete generated C code.
4555 (snippet
4556 '(begin (delete-file "idr/inv_cdf.c") #t))))
4557 (build-system python-build-system)
4558 ;; There is only one test ("test_inv_cdf.py") and it tests features that
4559 ;; are no longer part of this package. It also asserts False, which
4560 ;; causes the tests to always fail.
4561 (arguments `(#:tests? #f))
4562 (propagated-inputs
4563 `(("python-scipy" ,python-scipy)
4564 ("python-sympy" ,python-sympy)
4565 ("python-numpy" ,python-numpy)
4566 ("python-matplotlib" ,python-matplotlib)))
4567 (native-inputs
4568 `(("python-cython" ,python-cython)))
4569 (home-page "https://github.com/nboley/idr")
4570 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
4571 (description
4572 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
4573 to measure the reproducibility of findings identified from replicate
4574 experiments and provide highly stable thresholds based on reproducibility.")
4575 (license license:gpl2+)))
4576
4577 (define-public jellyfish
4578 (package
4579 (name "jellyfish")
4580 (version "2.2.10")
4581 (source (origin
4582 (method url-fetch)
4583 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
4584 "releases/download/v" version
4585 "/jellyfish-" version ".tar.gz"))
4586 (sha256
4587 (base32
4588 "1k4pc3fvv6w1km2yph4m5sd78fbxp21d6xyzgmy0gjihzc6mb249"))))
4589 (build-system gnu-build-system)
4590 (outputs '("out" ;for library
4591 "ruby" ;for Ruby bindings
4592 "python")) ;for Python bindings
4593 (arguments
4594 `(#:configure-flags
4595 (list (string-append "--enable-ruby-binding="
4596 (assoc-ref %outputs "ruby"))
4597 (string-append "--enable-python-binding="
4598 (assoc-ref %outputs "python")))
4599 #:phases
4600 (modify-phases %standard-phases
4601 (add-before 'check 'set-SHELL-variable
4602 (lambda _
4603 ;; generator_manager.hpp either uses /bin/sh or $SHELL
4604 ;; to run tests.
4605 (setenv "SHELL" (which "bash"))
4606 #t)))))
4607 (native-inputs
4608 `(("bc" ,bc)
4609 ("time" ,time)
4610 ("ruby" ,ruby)
4611 ("python" ,python-2)
4612 ("pkg-config" ,pkg-config)))
4613 (inputs
4614 `(("htslib" ,htslib)))
4615 (synopsis "Tool for fast counting of k-mers in DNA")
4616 (description
4617 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
4618 DNA. A k-mer is a substring of length k, and counting the occurrences of all
4619 such substrings is a central step in many analyses of DNA sequence. Jellyfish
4620 is a command-line program that reads FASTA and multi-FASTA files containing
4621 DNA sequences. It outputs its k-mer counts in a binary format, which can be
4622 translated into a human-readable text format using the @code{jellyfish dump}
4623 command, or queried for specific k-mers with @code{jellyfish query}.")
4624 (home-page "http://www.genome.umd.edu/jellyfish.html")
4625 ;; JELLYFISH seems to be 64-bit only.
4626 (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
4627 ;; The combined work is published under the GPLv3 or later. Individual
4628 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
4629 (license (list license:gpl3+ license:expat))))
4630
4631 (define-public khmer
4632 (package
4633 (name "khmer")
4634 (version "3.0.0a3")
4635 (source
4636 (origin
4637 (method git-fetch)
4638 (uri (git-reference
4639 (url "https://github.com/dib-lab/khmer")
4640 (commit (string-append "v" version))))
4641 (file-name (git-file-name name version))
4642 (sha256
4643 (base32
4644 "01l4jczglkl7yfhgvzx8j0df7k54bk1r8sli9ll16i1mis0d8f37"))
4645 (modules '((guix build utils)))
4646 (snippet
4647 '(begin
4648 ;; Delete bundled libraries. We do not replace the bundled seqan
4649 ;; as it is a modified subset of the old version 1.4.1.
4650 ;;
4651 ;; We do not replace the bundled MurmurHash as the canonical
4652 ;; repository for this code 'SMHasher' is unsuitable for providing
4653 ;; a library. See
4654 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
4655 (delete-file-recursively "third-party/zlib")
4656 (delete-file-recursively "third-party/bzip2")
4657 (delete-file-recursively "third-party/seqan")
4658 (substitute* "setup.cfg"
4659 (("# libraries = z,bz2")
4660 "libraries = z,bz2")
4661 (("include:third-party/zlib:third-party/bzip2")
4662 "include:"))
4663 #t))))
4664 (build-system python-build-system)
4665 (arguments
4666 `(#:phases
4667 (modify-phases %standard-phases
4668 (add-after 'unpack 'set-cc
4669 (lambda _ (setenv "CC" "gcc") #t))
4670
4671 (add-before 'reset-gzip-timestamps 'make-files-writable
4672 (lambda* (#:key outputs #:allow-other-keys)
4673 ;; Make sure .gz files are writable so that the
4674 ;; 'reset-gzip-timestamps' phase can do its work.
4675 (let ((out (assoc-ref outputs "out")))
4676 (for-each make-file-writable
4677 (find-files out "\\.gz$"))
4678 #t))))))
4679 (native-inputs
4680 `(("python-cython" ,python-cython)
4681 ("python-pytest" ,python-pytest)
4682 ("python-pytest-runner" ,python-pytest-runner)))
4683 (inputs
4684 `(("zlib" ,zlib)
4685 ("bzip2" ,bzip2)
4686 ("seqan" ,seqan-1)
4687 ("python-screed" ,python-screed)
4688 ("python-bz2file" ,python-bz2file)))
4689 (home-page "https://khmer.readthedocs.org/")
4690 (synopsis "K-mer counting, filtering and graph traversal library")
4691 (description "The khmer software is a set of command-line tools for
4692 working with DNA shotgun sequencing data from genomes, transcriptomes,
4693 metagenomes and single cells. Khmer can make de novo assemblies faster, and
4694 sometimes better. Khmer can also identify and fix problems with shotgun
4695 data.")
4696 ;; When building on i686, armhf and mips64el, we get the following error:
4697 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
4698 (supported-systems '("x86_64-linux" "aarch64-linux"))
4699 (license license:bsd-3)))
4700
4701 (define-public kaiju
4702 (package
4703 (name "kaiju")
4704 (version "1.6.3")
4705 (source (origin
4706 (method git-fetch)
4707 (uri (git-reference
4708 (url "https://github.com/bioinformatics-centre/kaiju")
4709 (commit (string-append "v" version))))
4710 (file-name (git-file-name name version))
4711 (sha256
4712 (base32
4713 "119pzi0ddzv9mjg4wwa6han0cwr3k3ssn7kirvsjfcq05mi5ka0x"))))
4714 (build-system gnu-build-system)
4715 (arguments
4716 `(#:tests? #f ; There are no tests.
4717 #:phases
4718 (modify-phases %standard-phases
4719 (delete 'configure)
4720 (add-before 'build 'move-to-src-dir
4721 (lambda _ (chdir "src") #t))
4722 (replace 'install
4723 (lambda* (#:key inputs outputs #:allow-other-keys)
4724 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
4725 (mkdir-p bin)
4726 (chdir "..")
4727 (copy-recursively "bin" bin))
4728 #t)))))
4729 (inputs
4730 `(("perl" ,perl)
4731 ("zlib" ,zlib)))
4732 (home-page "http://kaiju.binf.ku.dk/")
4733 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
4734 (description "Kaiju is a program for sensitive taxonomic classification
4735 of high-throughput sequencing reads from metagenomic whole genome sequencing
4736 experiments.")
4737 (license license:gpl3+)))
4738
4739 (define-public macs
4740 (package
4741 (name "macs")
4742 (version "2.2.6")
4743 (source (origin
4744 ;; The PyPi tarball does not contain tests.
4745 (method git-fetch)
4746 (uri (git-reference
4747 (url "https://github.com/macs3-project/MACS")
4748 (commit (string-append "v" version))))
4749 (file-name (git-file-name name version))
4750 (sha256
4751 (base32
4752 "1c5gxr0mk6hkd4vclf0k00wvyvzw2vrmk52c85338p7aqjwg6n15"))
4753 (modules '((guix build utils)))
4754 ;; Remove files generated by Cython
4755 (snippet
4756 '(begin
4757 (for-each (lambda (file)
4758 (let ((generated-file
4759 (string-append (string-drop-right file 3) "c")))
4760 (when (file-exists? generated-file)
4761 (delete-file generated-file))))
4762 (find-files "." "\\.pyx$"))
4763 (delete-file "MACS2/IO/CallPeakUnitPrecompiled.c")
4764 #t))))
4765 (build-system python-build-system)
4766 (arguments
4767 `(#:phases
4768 (modify-phases %standard-phases
4769 (replace 'check
4770 (lambda* (#:key tests? inputs outputs #:allow-other-keys)
4771 (when tests?
4772 (add-installed-pythonpath inputs outputs)
4773 (invoke "pytest" "-v"))
4774 #t)))))
4775 (inputs
4776 `(("python-numpy" ,python-numpy)))
4777 (native-inputs
4778 `(("python-cython" ,python-cython)
4779 ("python-pytest" ,python-pytest)))
4780 (home-page "https://github.com/macs3-project/MACS")
4781 (synopsis "Model based analysis for ChIP-Seq data")
4782 (description
4783 "MACS is an implementation of a ChIP-Seq analysis algorithm for
4784 identifying transcript factor binding sites named Model-based Analysis of
4785 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
4786 the significance of enriched ChIP regions and it improves the spatial
4787 resolution of binding sites through combining the information of both
4788 sequencing tag position and orientation.")
4789 (license license:bsd-3)))
4790
4791 (define-public mafft
4792 (package
4793 (name "mafft")
4794 (version "7.475")
4795 (source (origin
4796 (method url-fetch)
4797 (uri (string-append
4798 "https://mafft.cbrc.jp/alignment/software/mafft-" version
4799 "-without-extensions-src.tgz"))
4800 (file-name (string-append name "-" version ".tgz"))
4801 (sha256
4802 (base32
4803 "0i2i2m3blh2xkbkdk48hxfssks30ny0v381gdl7zwhcvp0axs26r"))))
4804 (build-system gnu-build-system)
4805 (arguments
4806 `(#:tests? #f ; no automated tests, though there are tests in the read me
4807 #:make-flags (let ((out (assoc-ref %outputs "out")))
4808 (list (string-append "PREFIX=" out)
4809 (string-append "BINDIR="
4810 (string-append out "/bin"))))
4811 #:phases
4812 (modify-phases %standard-phases
4813 (add-after 'unpack 'enter-dir
4814 (lambda _ (chdir "core") #t))
4815 (add-after 'enter-dir 'patch-makefile
4816 (lambda _
4817 ;; on advice from the MAFFT authors, there is no need to
4818 ;; distribute mafft-profile, mafft-distance, or
4819 ;; mafft-homologs.rb as they are too "specialised".
4820 (substitute* "Makefile"
4821 ;; remove mafft-homologs.rb from SCRIPTS
4822 (("^SCRIPTS = mafft mafft-homologs.rb")
4823 "SCRIPTS = mafft")
4824 ;; remove mafft-homologs from MANPAGES
4825 (("^MANPAGES = mafft.1 mafft-homologs.1")
4826 "MANPAGES = mafft.1")
4827 ;; remove mafft-distance from PROGS
4828 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
4829 "PROGS = dvtditr dndfast7 dndblast sextet5")
4830 ;; remove mafft-profile from PROGS
4831 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
4832 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
4833 (("^rm -f mafft-profile mafft-profile.exe") "#")
4834 (("^rm -f mafft-distance mafft-distance.exe") ")#")
4835 ;; do not install MAN pages in libexec folder
4836 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
4837 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
4838 #t))
4839 (add-after 'enter-dir 'patch-paths
4840 (lambda* (#:key inputs #:allow-other-keys)
4841 (substitute* '("pairash.c"
4842 "mafft.tmpl")
4843 (("perl") (which "perl"))
4844 (("([\"`| ])awk" _ prefix)
4845 (string-append prefix (which "awk")))
4846 (("grep") (which "grep")))
4847 #t))
4848 (delete 'configure)
4849 (add-after 'install 'wrap-programs
4850 (lambda* (#:key outputs #:allow-other-keys)
4851 (let* ((out (assoc-ref outputs "out"))
4852 (bin (string-append out "/bin"))
4853 (path (string-append
4854 (assoc-ref %build-inputs "coreutils") "/bin:")))
4855 (for-each (lambda (file)
4856 (wrap-program file
4857 `("PATH" ":" prefix (,path))))
4858 (find-files bin)))
4859 #t)))))
4860 (inputs
4861 `(("perl" ,perl)
4862 ("ruby" ,ruby)
4863 ("gawk" ,gawk)
4864 ("grep" ,grep)
4865 ("coreutils" ,coreutils)))
4866 (home-page "https://mafft.cbrc.jp/alignment/software/")
4867 (synopsis "Multiple sequence alignment program")
4868 (description
4869 "MAFFT offers a range of multiple alignment methods for nucleotide and
4870 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
4871 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
4872 sequences).")
4873 (license (license:non-copyleft
4874 "https://mafft.cbrc.jp/alignment/software/license.txt"
4875 "BSD-3 with different formatting"))))
4876
4877 (define-public mash
4878 (package
4879 (name "mash")
4880 (version "2.1")
4881 (source (origin
4882 (method git-fetch)
4883 (uri (git-reference
4884 (url "https://github.com/marbl/mash")
4885 (commit (string-append "v" version))))
4886 (file-name (git-file-name name version))
4887 (sha256
4888 (base32
4889 "049hwcc059p2fd9vwndn63laifvvsi0wmv84i6y1fr79k15dxwy6"))
4890 (modules '((guix build utils)))
4891 (snippet
4892 '(begin
4893 ;; Delete bundled kseq.
4894 ;; TODO: Also delete bundled murmurhash and open bloom filter.
4895 (delete-file "src/mash/kseq.h")
4896 #t))))
4897 (build-system gnu-build-system)
4898 (arguments
4899 `(#:tests? #f ; No tests.
4900 #:configure-flags
4901 (list
4902 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
4903 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
4904 #:make-flags (list "CC=gcc")
4905 #:phases
4906 (modify-phases %standard-phases
4907 (add-after 'unpack 'fix-includes
4908 (lambda _
4909 (substitute* '("src/mash/Sketch.cpp"
4910 "src/mash/CommandFind.cpp"
4911 "src/mash/CommandScreen.cpp")
4912 (("^#include \"kseq\\.h\"")
4913 "#include \"htslib/kseq.h\""))
4914 #t))
4915 (add-after 'fix-includes 'use-c++14
4916 (lambda _
4917 ;; capnproto 0.7 requires c++14 to build
4918 (substitute* "configure.ac"
4919 (("c\\+\\+11") "c++14"))
4920 (substitute* "Makefile.in"
4921 (("c\\+\\+11") "c++14"))
4922 #t)))))
4923 (native-inputs
4924 `(("autoconf" ,autoconf)
4925 ;; Capnproto and htslib are statically embedded in the final
4926 ;; application. Therefore we also list their licenses, below.
4927 ("capnproto" ,capnproto)
4928 ("htslib" ,htslib)))
4929 (inputs
4930 `(("gsl" ,gsl)
4931 ("zlib" ,zlib)))
4932 (supported-systems '("x86_64-linux"))
4933 (home-page "https://mash.readthedocs.io")
4934 (synopsis "Fast genome and metagenome distance estimation using MinHash")
4935 (description "Mash is a fast sequence distance estimator that uses the
4936 MinHash algorithm and is designed to work with genomes and metagenomes in the
4937 form of assemblies or reads.")
4938 (license (list license:bsd-3 ; Mash
4939 license:expat ; HTSlib and capnproto
4940 license:public-domain ; MurmurHash 3
4941 license:cpl1.0)))) ; Open Bloom Filter
4942
4943 (define-public metabat
4944 (package
4945 (name "metabat")
4946 (version "2.12.1")
4947 (source
4948 (origin
4949 (method git-fetch)
4950 (uri (git-reference
4951 (url "https://bitbucket.org/berkeleylab/metabat.git")
4952 (commit (string-append "v" version))))
4953 (file-name (git-file-name name version))
4954 (sha256
4955 (base32
4956 "0hyg2smw1nz69mfvjpk45xyyychmda92c80a0cv7baji84ri4iyn"))
4957 (patches (search-patches "metabat-fix-compilation.patch"))))
4958 (build-system scons-build-system)
4959 (arguments
4960 `(#:scons ,scons-python2
4961 #:scons-flags
4962 (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
4963 (string-append "BOOST_ROOT=" (assoc-ref %build-inputs "boost")))
4964 #:tests? #f ;; Tests are run during the build phase.
4965 #:phases
4966 (modify-phases %standard-phases
4967 (add-after 'unpack 'fix-includes
4968 (lambda _
4969 (substitute* "src/BamUtils.h"
4970 (("^#include \"bam/bam\\.h\"")
4971 "#include \"samtools/bam.h\"")
4972 (("^#include \"bam/sam\\.h\"")
4973 "#include \"samtools/sam.h\""))
4974 (substitute* "src/KseqReader.h"
4975 (("^#include \"bam/kseq\\.h\"")
4976 "#include \"htslib/kseq.h\""))
4977 #t))
4978 (add-after 'unpack 'fix-scons
4979 (lambda* (#:key inputs #:allow-other-keys)
4980 (substitute* "SConstruct"
4981 (("^htslib_dir += 'samtools'")
4982 (string-append "htslib_dir = '"
4983 (assoc-ref inputs "htslib")
4984 "'"))
4985 (("^samtools_dir = 'samtools'")
4986 (string-append "samtools_dir = '"
4987 (assoc-ref inputs "samtools")
4988 "'"))
4989 (("^findStaticOrShared\\('bam', hts_lib")
4990 (string-append "findStaticOrShared('bam', '"
4991 (assoc-ref inputs "samtools")
4992 "/lib'"))
4993 ;; Do not distribute README.
4994 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
4995 #t)))))
4996 (inputs
4997 `(("zlib" ,zlib)
4998 ("perl" ,perl)
4999 ("samtools" ,samtools)
5000 ("htslib" ,htslib)
5001 ("boost" ,boost)))
5002 (home-page "https://bitbucket.org/berkeleylab/metabat")
5003 (synopsis
5004 "Reconstruction of single genomes from complex microbial communities")
5005 (description
5006 "Grouping large genomic fragments assembled from shotgun metagenomic
5007 sequences to deconvolute complex microbial communities, or metagenome binning,
5008 enables the study of individual organisms and their interactions. MetaBAT is
5009 an automated metagenome binning software, which integrates empirical
5010 probabilistic distances of genome abundance and tetranucleotide frequency.")
5011 ;; The source code contains inline assembly.
5012 (supported-systems '("x86_64-linux" "i686-linux"))
5013 (license (license:non-copyleft "file://license.txt"
5014 "See license.txt in the distribution."))))
5015
5016 (define-public minced
5017 (package
5018 (name "minced")
5019 (version "0.3.2")
5020 (source (origin
5021 (method git-fetch)
5022 (uri (git-reference
5023 (url "https://github.com/ctSkennerton/minced")
5024 (commit version)))
5025 (file-name (git-file-name name version))
5026 (sha256
5027 (base32
5028 "1f5h9him0gd355cnx7p6pnxpknhckd4g0v62mg8zyhfbx9as25fv"))))
5029 (build-system gnu-build-system)
5030 (arguments
5031 `(#:test-target "test"
5032 #:phases
5033 (modify-phases %standard-phases
5034 (delete 'configure)
5035 (add-before 'check 'fix-test
5036 (lambda _
5037 ;; Fix test for latest version.
5038 (substitute* "t/Aquifex_aeolicus_VF5.expected"
5039 (("minced:0.1.6") "minced:0.2.0"))
5040 #t))
5041 (replace 'install ; No install target.
5042 (lambda* (#:key inputs outputs #:allow-other-keys)
5043 (let* ((out (assoc-ref outputs "out"))
5044 (bin (string-append out "/bin"))
5045 (wrapper (string-append bin "/minced")))
5046 ;; Minced comes with a wrapper script that tries to figure out where
5047 ;; it is located before running the JAR. Since these paths are known
5048 ;; to us, we build our own wrapper to avoid coreutils dependency.
5049 (install-file "minced.jar" bin)
5050 (with-output-to-file wrapper
5051 (lambda _
5052 (display
5053 (string-append
5054 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
5055 (assoc-ref inputs "jre") "/bin/java -jar "
5056 bin "/minced.jar \"$@\"\n"))))
5057 (chmod wrapper #o555))
5058 #t)))))
5059 (native-inputs
5060 `(("jdk" ,icedtea "jdk")))
5061 (inputs
5062 `(("bash" ,bash)
5063 ("jre" ,icedtea "out")))
5064 (home-page "https://github.com/ctSkennerton/minced")
5065 (synopsis "Mining CRISPRs in Environmental Datasets")
5066 (description
5067 "MinCED is a program to find Clustered Regularly Interspaced Short
5068 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
5069 unassembled metagenomic reads, but is mainly designed for full genomes and
5070 assembled metagenomic sequence.")
5071 (license license:gpl3+)))
5072
5073 (define-public miso
5074 (package
5075 (name "miso")
5076 (version "0.5.4")
5077 (source (origin
5078 (method url-fetch)
5079 (uri (pypi-uri "misopy" version))
5080 (sha256
5081 (base32
5082 "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
5083 (modules '((guix build utils)))
5084 (snippet '(begin
5085 (substitute* "setup.py"
5086 ;; Use setuptools, or else the executables are not
5087 ;; installed.
5088 (("distutils.core") "setuptools")
5089 ;; Use "gcc" instead of "cc" for compilation.
5090 (("^defines")
5091 "cc.set_executables(
5092 compiler='gcc',
5093 compiler_so='gcc',
5094 linker_exe='gcc',
5095 linker_so='gcc -shared'); defines"))
5096 #t))))
5097 (build-system python-build-system)
5098 (arguments
5099 `(#:python ,python-2 ; only Python 2 is supported
5100 #:tests? #f)) ; no "test" target
5101 (inputs
5102 `(("samtools" ,samtools)
5103 ("python-numpy" ,python2-numpy)
5104 ("python-pysam" ,python2-pysam)
5105 ("python-scipy" ,python2-scipy)
5106 ("python-matplotlib" ,python2-matplotlib)))
5107 (native-inputs
5108 `(("python-mock" ,python2-mock) ; for tests
5109 ("python-pytz" ,python2-pytz))) ; for tests
5110 (home-page "https://www.genes.mit.edu/burgelab/miso/index.html")
5111 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
5112 (description
5113 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
5114 the expression level of alternatively spliced genes from RNA-Seq data, and
5115 identifies differentially regulated isoforms or exons across samples. By
5116 modeling the generative process by which reads are produced from isoforms in
5117 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
5118 that a read originated from a particular isoform.")
5119 (license license:gpl2)))
5120
5121 (define-public muscle
5122 (package
5123 (name "muscle")
5124 (version "3.8.1551")
5125 (source (origin
5126 (method url-fetch/tarbomb)
5127 (uri (string-append
5128 "http://www.drive5.com/muscle/muscle_src_"
5129 version ".tar.gz"))
5130 (sha256
5131 (base32
5132 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
5133 (build-system gnu-build-system)
5134 (arguments
5135 `(#:make-flags (list "LDLIBS = -lm")
5136 #:phases
5137 (modify-phases %standard-phases
5138 (delete 'configure)
5139 (replace 'check
5140 ;; There are no tests, so just test if it runs.
5141 (lambda _ (invoke "./muscle" "-version") #t))
5142 (replace 'install
5143 (lambda* (#:key outputs #:allow-other-keys)
5144 (let* ((out (assoc-ref outputs "out"))
5145 (bin (string-append out "/bin")))
5146 (install-file "muscle" bin)
5147 #t))))))
5148 (home-page "http://www.drive5.com/muscle")
5149 (synopsis "Multiple sequence alignment program")
5150 (description
5151 "MUSCLE aims to be a fast and accurate multiple sequence alignment
5152 program for nucleotide and protein sequences.")
5153 ;; License information found in 'muscle -h' and usage.cpp.
5154 (license license:public-domain)))
5155
5156 (define-public newick-utils
5157 ;; There are no recent releases so we package from git.
5158 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
5159 (package
5160 (name "newick-utils")
5161 (version (string-append "1.6-1." (string-take commit 8)))
5162 (source (origin
5163 (method git-fetch)
5164 (uri (git-reference
5165 (url "https://github.com/tjunier/newick_utils")
5166 (commit commit)))
5167 (file-name (string-append name "-" version "-checkout"))
5168 (sha256
5169 (base32
5170 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
5171 (build-system gnu-build-system)
5172 (inputs
5173 ;; XXX: TODO: Enable Lua and Guile bindings.
5174 ;; https://github.com/tjunier/newick_utils/issues/13
5175 `(("libxml2" ,libxml2)
5176 ("flex" ,flex)
5177 ("bison" ,bison)))
5178 (native-inputs
5179 `(("autoconf" ,autoconf)
5180 ("automake" ,automake)
5181 ("libtool" ,libtool)))
5182 (synopsis "Programs for working with newick format phylogenetic trees")
5183 (description
5184 "Newick-utils is a suite of utilities for processing phylogenetic trees
5185 in Newick format. Functions include re-rooting, extracting subtrees,
5186 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
5187 (home-page "https://github.com/tjunier/newick_utils")
5188 (license license:bsd-3))))
5189
5190 (define-public orfm
5191 (package
5192 (name "orfm")
5193 (version "0.7.1")
5194 (source (origin
5195 (method url-fetch)
5196 (uri (string-append
5197 "https://github.com/wwood/OrfM/releases/download/v"
5198 version "/orfm-" version ".tar.gz"))
5199 (sha256
5200 (base32
5201 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
5202 (build-system gnu-build-system)
5203 (inputs `(("zlib" ,zlib)))
5204 (native-inputs
5205 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
5206 ("ruby-rspec" ,ruby-rspec)
5207 ("ruby" ,ruby)))
5208 (synopsis "Simple and not slow open reading frame (ORF) caller")
5209 (description
5210 "An ORF caller finds stretches of DNA that, when translated, are not
5211 interrupted by stop codons. OrfM finds and prints these ORFs.")
5212 (home-page "https://github.com/wwood/OrfM")
5213 (license license:lgpl3+)))
5214
5215 (define-public python2-pbcore
5216 (package
5217 (name "python2-pbcore")
5218 (version "1.2.10")
5219 (source (origin
5220 (method url-fetch)
5221 (uri (pypi-uri "pbcore" version))
5222 (sha256
5223 (base32
5224 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
5225 (build-system python-build-system)
5226 (arguments
5227 `(#:python ,python-2 ;pbcore < 2.0 requires Python 2.7
5228 #:phases (modify-phases %standard-phases
5229 (add-after 'unpack 'remove-sphinx-dependency
5230 (lambda _
5231 ;; Sphinx is only required for documentation tests, which
5232 ;; we do not run; furthermore it depends on python2-sphinx
5233 ;; which is no longer maintained.
5234 (substitute* "requirements-dev.txt"
5235 (("^sphinx") ""))
5236 #t)))))
5237 (propagated-inputs
5238 `(("python-cython" ,python2-cython)
5239 ("python-numpy" ,python2-numpy)
5240 ("python-pysam" ,python2-pysam)
5241 ("python-h5py" ,python2-h5py)))
5242 (native-inputs
5243 `(("python-nose" ,python2-nose)
5244 ("python-pyxb" ,python2-pyxb)))
5245 (home-page "https://pacificbiosciences.github.io/pbcore/")
5246 (synopsis "Library for reading and writing PacBio data files")
5247 (description
5248 "The pbcore package provides Python APIs for interacting with PacBio data
5249 files and writing bioinformatics applications.")
5250 (license license:bsd-3)))
5251
5252 (define-public python2-warpedlmm
5253 (package
5254 (name "python2-warpedlmm")
5255 (version "0.21")
5256 (source
5257 (origin
5258 (method url-fetch)
5259 (uri (pypi-uri "WarpedLMM" version ".zip"))
5260 (sha256
5261 (base32
5262 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
5263 (build-system python-build-system)
5264 (arguments
5265 `(#:python ,python-2 ; requires Python 2.7
5266 #:tests? #f ; test data are not included
5267 #:phases
5268 (modify-phases %standard-phases
5269 (add-after 'unpack 'use-weave
5270 (lambda _
5271 (substitute* "warpedlmm/util/linalg.py"
5272 (("from scipy import linalg, weave")
5273 "from scipy import linalg\nimport weave"))
5274 #t)))))
5275 (propagated-inputs
5276 `(("python-scipy" ,python2-scipy)
5277 ("python-numpy" ,python2-numpy)
5278 ("python-matplotlib" ,python2-matplotlib)
5279 ("python-fastlmm" ,python2-fastlmm)
5280 ("python-pandas" ,python2-pandas)
5281 ("python-pysnptools" ,python2-pysnptools)
5282 ("python-weave" ,python2-weave)))
5283 (native-inputs
5284 `(("python-mock" ,python2-mock)
5285 ("python-nose" ,python2-nose)
5286 ("unzip" ,unzip)))
5287 (home-page "https://github.com/PMBio/warpedLMM")
5288 (synopsis "Implementation of warped linear mixed models")
5289 (description
5290 "WarpedLMM is a Python implementation of the warped linear mixed model,
5291 which automatically learns an optimal warping function (or transformation) for
5292 the phenotype as it models the data.")
5293 (license license:asl2.0)))
5294
5295 (define-public pbtranscript-tofu
5296 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
5297 (package
5298 (name "pbtranscript-tofu")
5299 (version (string-append "2.2.3." (string-take commit 7)))
5300 (source (origin
5301 (method git-fetch)
5302 (uri (git-reference
5303 (url "https://github.com/PacificBiosciences/cDNA_primer")
5304 (commit commit)))
5305 (file-name (string-append name "-" version "-checkout"))
5306 (sha256
5307 (base32
5308 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
5309 (modules '((guix build utils)))
5310 (snippet
5311 '(begin
5312 ;; remove bundled Cython sources
5313 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
5314 #t))))
5315 (build-system python-build-system)
5316 (arguments
5317 `(#:python ,python-2
5318 ;; FIXME: Tests fail with "No such file or directory:
5319 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
5320 #:tests? #f
5321 #:phases
5322 (modify-phases %standard-phases
5323 (add-after 'unpack 'enter-directory
5324 (lambda _
5325 (chdir "pbtranscript-tofu/pbtranscript/")
5326 #t))
5327 ;; With setuptools version 18.0 and later this setup.py hack causes
5328 ;; a build error, so we disable it.
5329 (add-after 'enter-directory 'patch-setuppy
5330 (lambda _
5331 (substitute* "setup.py"
5332 (("if 'setuptools.extension' in sys.modules:")
5333 "if False:"))
5334 #t)))))
5335 (inputs
5336 `(("python-numpy" ,python2-numpy)
5337 ("python-bx-python" ,python2-bx-python)
5338 ("python-networkx" ,python2-networkx)
5339 ("python-scipy" ,python2-scipy)
5340 ("python-pbcore" ,python2-pbcore)
5341 ("python-h5py" ,python2-h5py)))
5342 (native-inputs
5343 `(("python-cython" ,python2-cython)
5344 ("python-nose" ,python2-nose)))
5345 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
5346 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
5347 (description
5348 "pbtranscript-tofu contains scripts to analyze transcriptome data
5349 generated using the PacBio Iso-Seq protocol.")
5350 (license license:bsd-3))))
5351
5352 (define-public prank
5353 (package
5354 (name "prank")
5355 (version "170427")
5356 (source (origin
5357 (method url-fetch)
5358 (uri (string-append
5359 "http://wasabiapp.org/download/prank/prank.source."
5360 version ".tgz"))
5361 (sha256
5362 (base32
5363 "0nc8g9c5rkdxcir46s0in9ci1sxwzbjibxrvkksf22ybnplvagk2"))))
5364 (build-system gnu-build-system)
5365 (arguments
5366 `(#:phases
5367 (modify-phases %standard-phases
5368 (add-after 'unpack 'enter-src-dir
5369 (lambda _
5370 (chdir "src")
5371 #t))
5372 (add-after 'unpack 'remove-m64-flag
5373 ;; Prank will build with the correct 'bit-ness' without this flag
5374 ;; and this allows building on 32-bit machines.
5375 (lambda _ (substitute* "src/Makefile"
5376 (("-m64") ""))
5377 #t))
5378 (delete 'configure)
5379 (replace 'install
5380 (lambda* (#:key outputs #:allow-other-keys)
5381 (let* ((out (assoc-ref outputs "out"))
5382 (bin (string-append out "/bin"))
5383 (man (string-append out "/share/man/man1"))
5384 (path (string-append
5385 (assoc-ref %build-inputs "mafft") "/bin:"
5386 (assoc-ref %build-inputs "exonerate") "/bin:"
5387 (assoc-ref %build-inputs "bppsuite") "/bin")))
5388 (install-file "prank" bin)
5389 (wrap-program (string-append bin "/prank")
5390 `("PATH" ":" prefix (,path)))
5391 (install-file "prank.1" man))
5392 #t)))))
5393 (inputs
5394 `(("mafft" ,mafft)
5395 ("exonerate" ,exonerate)
5396 ("bppsuite" ,bppsuite)))
5397 (home-page "http://wasabiapp.org/software/prank/")
5398 (synopsis "Probabilistic multiple sequence alignment program")
5399 (description
5400 "PRANK is a probabilistic multiple sequence alignment program for DNA,
5401 codon and amino-acid sequences. It is based on a novel algorithm that treats
5402 insertions correctly and avoids over-estimation of the number of deletion
5403 events. In addition, PRANK borrows ideas from maximum likelihood methods used
5404 in phylogenetics and correctly takes into account the evolutionary distances
5405 between sequences. Lastly, PRANK allows for defining a potential structure
5406 for sequences to be aligned and then, simultaneously with the alignment,
5407 predicts the locations of structural units in the sequences.")
5408 (license license:gpl2+)))
5409
5410 (define-public proteinortho
5411 (package
5412 (name "proteinortho")
5413 (version "6.0.14")
5414 (source (origin
5415 (method git-fetch)
5416 (uri (git-reference
5417 (url "https://gitlab.com/paulklemm_PHD/proteinortho.git")
5418 (commit (string-append "v" version))))
5419 (file-name (git-file-name name version))
5420 (sha256
5421 (base32
5422 "0pmy617zy2z2w6hjqxjhf3rzikf5n3mpia80ysq8233vfr7wrzff"))
5423 (modules '((guix build utils)))
5424 (snippet
5425 '(begin
5426 ;; remove pre-built scripts
5427 (delete-file-recursively "src/BUILD/")
5428 #t))))
5429 (build-system gnu-build-system)
5430 (arguments
5431 `(#:test-target "test"
5432 #:make-flags '("CC=gcc")
5433 #:phases
5434 (modify-phases %standard-phases
5435 (replace 'configure
5436 ;; There is no configure script, so we modify the Makefile directly.
5437 (lambda* (#:key outputs #:allow-other-keys)
5438 (substitute* "Makefile"
5439 (("INSTALLDIR=.*")
5440 (string-append
5441 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
5442 #t))
5443 (add-before 'install 'make-install-directory
5444 ;; The install directory is not created during 'make install'.
5445 (lambda* (#:key outputs #:allow-other-keys)
5446 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
5447 #t))
5448 (add-after 'install 'wrap-programs
5449 (lambda* (#:key inputs outputs #:allow-other-keys)
5450 (let ((path (getenv "PATH"))
5451 (out (assoc-ref outputs "out")))
5452 (for-each (lambda (script)
5453 (wrap-script script `("PATH" ":" prefix (,path))))
5454 (cons (string-append out "/bin/proteinortho")
5455 (find-files out "\\.(pl|py)$"))))
5456 #t)))))
5457 (inputs
5458 `(("guile" ,guile-3.0) ; for wrap-script
5459 ("diamond" ,diamond)
5460 ("perl" ,perl)
5461 ("python" ,python-wrapper)
5462 ("blast+" ,blast+)
5463 ("lapack" ,lapack)
5464 ("openblas" ,openblas)))
5465 (native-inputs
5466 `(("which" ,which)))
5467 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
5468 (synopsis "Detect orthologous genes across species")
5469 (description
5470 "Proteinortho is a tool to detect orthologous genes across different
5471 species. For doing so, it compares similarities of given gene sequences and
5472 clusters them to find significant groups. The algorithm was designed to handle
5473 large-scale data and can be applied to hundreds of species at once.")
5474 (license license:gpl3+)))
5475
5476 (define-public pyicoteo
5477 (package
5478 (name "pyicoteo")
5479 (version "2.0.7")
5480 (source
5481 (origin
5482 (method git-fetch)
5483 (uri (git-reference
5484 (url "https://bitbucket.org/regulatorygenomicsupf/pyicoteo.git")
5485 (commit (string-append "v" version))))
5486 (file-name (git-file-name name version))
5487 (sha256
5488 (base32
5489 "0hz5g8d25lbjy1wpscr490l0lmyvaix893hhax4fxnh1h9w34w8p"))))
5490 (build-system python-build-system)
5491 (arguments
5492 `(#:python ,python-2 ; does not work with Python 3
5493 #:tests? #f)) ; there are no tests
5494 (inputs
5495 `(("python2-matplotlib" ,python2-matplotlib)))
5496 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
5497 (synopsis "Analyze high-throughput genetic sequencing data")
5498 (description
5499 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
5500 sequencing data. It works with genomic coordinates. There are currently six
5501 different command-line tools:
5502
5503 @enumerate
5504 @item pyicoregion: for generating exploratory regions automatically;
5505 @item pyicoenrich: for differential enrichment between two conditions;
5506 @item pyicoclip: for calling CLIP-Seq peaks without a control;
5507 @item pyicos: for genomic coordinates manipulation;
5508 @item pyicoller: for peak calling on punctuated ChIP-Seq;
5509 @item pyicount: to count how many reads from N experiment files overlap in a
5510 region file;
5511 @item pyicotrocol: to combine operations from pyicoteo.
5512 @end enumerate\n")
5513 (license license:gpl3+)))
5514
5515 (define-public prodigal
5516 (package
5517 (name "prodigal")
5518 ;; Check for a new home page when updating this package:
5519 ;; https://github.com/hyattpd/Prodigal/issues/36#issuecomment-536617588
5520 (version "2.6.3")
5521 (source (origin
5522 (method git-fetch)
5523 (uri (git-reference
5524 (url "https://github.com/hyattpd/Prodigal")
5525 (commit (string-append "v" version))))
5526 (file-name (git-file-name name version))
5527 (sha256
5528 (base32
5529 "1fs1hqk83qjbjhrvhw6ni75zakx5ki1ayy3v6wwkn3xvahc9hi5s"))))
5530 (build-system gnu-build-system)
5531 (arguments
5532 `(#:tests? #f ; no check target
5533 #:make-flags (list (string-append "INSTALLDIR="
5534 (assoc-ref %outputs "out")
5535 "/bin"))
5536 #:phases
5537 (modify-phases %standard-phases
5538 (delete 'configure))))
5539 (home-page "https://github.com/hyattpd/Prodigal")
5540 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
5541 (description
5542 "Prodigal runs smoothly on finished genomes, draft genomes, and
5543 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
5544 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
5545 partial genes, and identifies translation initiation sites.")
5546 (license license:gpl3+)))
5547
5548 (define-public roary
5549 (package
5550 (name "roary")
5551 (version "3.12.0")
5552 (source
5553 (origin
5554 (method url-fetch)
5555 (uri (string-append
5556 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
5557 version ".tar.gz"))
5558 (sha256
5559 (base32
5560 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
5561 (build-system perl-build-system)
5562 (arguments
5563 `(#:phases
5564 (modify-phases %standard-phases
5565 (delete 'configure)
5566 (delete 'build)
5567 (replace 'check
5568 (lambda _
5569 ;; The tests are not run by default, so we run each test file
5570 ;; directly.
5571 (setenv "PATH" (string-append (getcwd) "/bin" ":"
5572 (getenv "PATH")))
5573 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
5574 (getenv "PERL5LIB")))
5575 (for-each (lambda (file)
5576 (display file)(display "\n")
5577 (invoke "perl" file))
5578 (find-files "t" ".*\\.t$"))
5579 #t))
5580 (replace 'install
5581 ;; There is no 'install' target in the Makefile.
5582 (lambda* (#:key outputs #:allow-other-keys)
5583 (let* ((out (assoc-ref outputs "out"))
5584 (bin (string-append out "/bin"))
5585 (perl (string-append out "/lib/perl5/site_perl"))
5586 (roary-plots "contrib/roary_plots"))
5587 (mkdir-p bin)
5588 (mkdir-p perl)
5589 (copy-recursively "bin" bin)
5590 (copy-recursively "lib" perl)
5591 #t)))
5592 (add-after 'install 'wrap-programs
5593 (lambda* (#:key inputs outputs #:allow-other-keys)
5594 (let* ((out (assoc-ref outputs "out"))
5595 (perl5lib (getenv "PERL5LIB"))
5596 (path (getenv "PATH")))
5597 (for-each (lambda (prog)
5598 (let ((binary (string-append out "/" prog)))
5599 (wrap-program binary
5600 `("PERL5LIB" ":" prefix
5601 (,(string-append perl5lib ":" out
5602 "/lib/perl5/site_perl"))))
5603 (wrap-program binary
5604 `("PATH" ":" prefix
5605 (,(string-append path ":" out "/bin"))))))
5606 (find-files "bin" ".*[^R]$"))
5607 (let ((file
5608 (string-append out "/bin/roary-create_pan_genome_plots.R"))
5609 (r-site-lib (getenv "R_LIBS_SITE"))
5610 (coreutils-path
5611 (string-append (assoc-ref inputs "coreutils") "/bin")))
5612 (wrap-program file
5613 `("R_LIBS_SITE" ":" prefix
5614 (,(string-append r-site-lib ":" out "/site-library/"))))
5615 (wrap-program file
5616 `("PATH" ":" prefix
5617 (,(string-append coreutils-path ":" out "/bin"))))))
5618 #t)))))
5619 (native-inputs
5620 `(("perl-env-path" ,perl-env-path)
5621 ("perl-test-files" ,perl-test-files)
5622 ("perl-test-most" ,perl-test-most)
5623 ("perl-test-output" ,perl-test-output)))
5624 (inputs
5625 `(("perl-array-utils" ,perl-array-utils)
5626 ("bioperl" ,bioperl-minimal)
5627 ("perl-digest-md5-file" ,perl-digest-md5-file)
5628 ("perl-exception-class" ,perl-exception-class)
5629 ("perl-file-find-rule" ,perl-file-find-rule)
5630 ("perl-file-grep" ,perl-file-grep)
5631 ("perl-file-slurper" ,perl-file-slurper)
5632 ("perl-file-which" ,perl-file-which)
5633 ("perl-graph" ,perl-graph)
5634 ("perl-graph-readwrite" ,perl-graph-readwrite)
5635 ("perl-log-log4perl" ,perl-log-log4perl)
5636 ("perl-moose" ,perl-moose)
5637 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
5638 ("perl-text-csv" ,perl-text-csv)
5639 ("bedtools" ,bedtools)
5640 ("cd-hit" ,cd-hit)
5641 ("blast+" ,blast+)
5642 ("mcl" ,mcl)
5643 ("parallel" ,parallel)
5644 ("prank" ,prank)
5645 ("mafft" ,mafft)
5646 ("fasttree" ,fasttree)
5647 ("grep" ,grep)
5648 ("sed" ,sed)
5649 ("gawk" ,gawk)
5650 ("r-minimal" ,r-minimal)
5651 ("r-ggplot2" ,r-ggplot2)
5652 ("coreutils" ,coreutils)))
5653 (home-page "https://sanger-pathogens.github.io/Roary/")
5654 (synopsis "High speed stand-alone pan genome pipeline")
5655 (description
5656 "Roary is a high speed stand alone pan genome pipeline, which takes
5657 annotated assemblies in GFF3 format (produced by the Prokka program) and
5658 calculates the pan genome. Using a standard desktop PC, it can analyse
5659 datasets with thousands of samples, without compromising the quality of the
5660 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
5661 single processor. Roary is not intended for metagenomics or for comparing
5662 extremely diverse sets of genomes.")
5663 (license license:gpl3)))
5664
5665 (define-public raxml
5666 (package
5667 (name "raxml")
5668 (version "8.2.12")
5669 (source
5670 (origin
5671 (method git-fetch)
5672 (uri (git-reference
5673 (url "https://github.com/stamatak/standard-RAxML")
5674 (commit (string-append "v" version))))
5675 (file-name (git-file-name name version))
5676 (sha256
5677 (base32
5678 "1jqjzhch0rips0vp04prvb8vmc20c5pdmsqn8knadcf91yy859fh"))))
5679 (build-system gnu-build-system)
5680 (arguments
5681 `(#:tests? #f ; There are no tests.
5682 ;; Use 'standard' Makefile rather than SSE or AVX ones.
5683 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
5684 #:phases
5685 (modify-phases %standard-phases
5686 (delete 'configure)
5687 (replace 'install
5688 (lambda* (#:key outputs #:allow-other-keys)
5689 (let* ((out (assoc-ref outputs "out"))
5690 (bin (string-append out "/bin"))
5691 (executable "raxmlHPC-HYBRID"))
5692 (install-file executable bin)
5693 (symlink (string-append bin "/" executable) "raxml"))
5694 #t)))))
5695 (inputs
5696 `(("openmpi" ,openmpi)))
5697 (home-page "https://cme.h-its.org/exelixis/web/software/raxml/index.html")
5698 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
5699 (description
5700 "RAxML is a tool for phylogenetic analysis and post-analysis of large
5701 phylogenies.")
5702 ;; The source includes x86 specific code
5703 (supported-systems '("x86_64-linux" "i686-linux"))
5704 (license license:gpl2+)))
5705
5706 (define-public rsem
5707 (package
5708 (name "rsem")
5709 (version "1.3.1")
5710 (source
5711 (origin
5712 (method git-fetch)
5713 (uri (git-reference
5714 (url "https://github.com/deweylab/RSEM")
5715 (commit (string-append "v" version))))
5716 (sha256
5717 (base32 "1jlq11d1p8qp64w75yj8cnbbd1a93viq10pzsbwal7vdn8fg13j1"))
5718 (file-name (git-file-name name version))
5719 (modules '((guix build utils)))
5720 (snippet
5721 '(begin
5722 ;; remove bundled copy of boost and samtools
5723 (delete-file-recursively "boost")
5724 (delete-file-recursively "samtools-1.3")
5725 #t))))
5726 (build-system gnu-build-system)
5727 (arguments
5728 `(#:tests? #f ;no "check" target
5729 #:make-flags
5730 (list (string-append "BOOST="
5731 (assoc-ref %build-inputs "boost")
5732 "/include/")
5733 (string-append "SAMHEADERS="
5734 (assoc-ref %build-inputs "htslib")
5735 "/include/htslib/sam.h")
5736 (string-append "SAMLIBS="
5737 (assoc-ref %build-inputs "htslib")
5738 "/lib/libhts.so"))
5739 #:phases
5740 (modify-phases %standard-phases
5741 ;; No "configure" script.
5742 ;; Do not build bundled samtools library.
5743 (replace 'configure
5744 (lambda _
5745 (substitute* "Makefile"
5746 (("^all : \\$\\(PROGRAMS\\).*") "all: $(PROGRAMS)\n")
5747 (("^\\$\\(SAMLIBS\\).*") ""))
5748 #t))
5749 (replace 'install
5750 (lambda* (#:key outputs #:allow-other-keys)
5751 (let* ((out (string-append (assoc-ref outputs "out")))
5752 (bin (string-append out "/bin/"))
5753 (perl (string-append out "/lib/perl5/site_perl")))
5754 (mkdir-p bin)
5755 (mkdir-p perl)
5756 (for-each (lambda (file)
5757 (install-file file bin))
5758 (find-files "." "rsem-.*"))
5759 (install-file "rsem_perl_utils.pm" perl))
5760 #t))
5761 (add-after 'install 'wrap-program
5762 (lambda* (#:key outputs #:allow-other-keys)
5763 (let ((out (assoc-ref outputs "out")))
5764 (for-each (lambda (prog)
5765 (wrap-program (string-append out "/bin/" prog)
5766 `("PERL5LIB" ":" prefix
5767 (,(string-append out "/lib/perl5/site_perl")))))
5768 '("rsem-calculate-expression"
5769 "rsem-control-fdr"
5770 "rsem-generate-data-matrix"
5771 "rsem-generate-ngvector"
5772 "rsem-plot-transcript-wiggles"
5773 "rsem-prepare-reference"
5774 "rsem-run-ebseq"
5775 "rsem-run-prsem-testing-procedure")))
5776 #t)))))
5777 (inputs
5778 `(("boost" ,boost)
5779 ("r-minimal" ,r-minimal)
5780 ("perl" ,perl)
5781 ("htslib" ,htslib-1.3)
5782 ("zlib" ,zlib)))
5783 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
5784 (synopsis "Estimate gene expression levels from RNA-Seq data")
5785 (description
5786 "RSEM is a software package for estimating gene and isoform expression
5787 levels from RNA-Seq data. The RSEM package provides a user-friendly
5788 interface, supports threads for parallel computation of the EM algorithm,
5789 single-end and paired-end read data, quality scores, variable-length reads and
5790 RSPD estimation. In addition, it provides posterior mean and 95% credibility
5791 interval estimates for expression levels. For visualization, it can generate
5792 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
5793 (license license:gpl3+)))
5794
5795 (define-public rseqc
5796 (package
5797 (name "rseqc")
5798 (version "3.0.1")
5799 (source
5800 (origin
5801 (method url-fetch)
5802 (uri
5803 (string-append "mirror://sourceforge/rseqc/"
5804 "RSeQC-" version ".tar.gz"))
5805 (sha256
5806 (base32
5807 "0gbb9iyb7swiv5455fm5rg98r7l6qn27v564yllqjd574hncpx6m"))))
5808 (build-system python-build-system)
5809 (inputs
5810 `(("python-cython" ,python-cython)
5811 ("python-bx-python" ,python-bx-python)
5812 ("python-pybigwig" ,python-pybigwig)
5813 ("python-pysam" ,python-pysam)
5814 ("python-numpy" ,python-numpy)
5815 ("zlib" ,zlib)))
5816 (native-inputs
5817 `(("python-nose" ,python-nose)))
5818 (home-page "http://rseqc.sourceforge.net/")
5819 (synopsis "RNA-seq quality control package")
5820 (description
5821 "RSeQC provides a number of modules that can comprehensively evaluate
5822 high throughput sequence data, especially RNA-seq data. Some basic modules
5823 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
5824 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
5825 distribution, coverage uniformity, strand specificity, etc.")
5826 (license license:gpl3+)))
5827
5828 (define-public seek
5829 ;; There are no release tarballs. According to the installation
5830 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
5831 ;; stable release is identified by this changeset ID.
5832 (let ((changeset "2329130")
5833 (revision "1"))
5834 (package
5835 (name "seek")
5836 (version (string-append "0-" revision "." changeset))
5837 (source (origin
5838 (method hg-fetch)
5839 (uri (hg-reference
5840 (url "https://bitbucket.org/libsleipnir/sleipnir")
5841 (changeset changeset)))
5842 (file-name (string-append name "-" version "-checkout"))
5843 (sha256
5844 (base32
5845 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
5846 (build-system gnu-build-system)
5847 (arguments
5848 `(#:modules ((srfi srfi-1)
5849 (guix build gnu-build-system)
5850 (guix build utils))
5851 #:phases
5852 (let ((dirs '("SeekMiner"
5853 "SeekEvaluator"
5854 "SeekPrep"
5855 "Distancer"
5856 "Data2DB"
5857 "PCL2Bin")))
5858 (modify-phases %standard-phases
5859 (replace 'bootstrap
5860 (lambda _
5861 (substitute* "gen_tools_am"
5862 (("/usr/bin/env.*") (which "perl")))
5863 (invoke "bash" "gen_auto")
5864 #t))
5865 (add-after 'build 'build-additional-tools
5866 (lambda* (#:key make-flags #:allow-other-keys)
5867 (for-each (lambda (dir)
5868 (with-directory-excursion (string-append "tools/" dir)
5869 (apply invoke "make" make-flags)))
5870 dirs)
5871 #t))
5872 (add-after 'install 'install-additional-tools
5873 (lambda* (#:key make-flags #:allow-other-keys)
5874 (for-each (lambda (dir)
5875 (with-directory-excursion (string-append "tools/" dir)
5876 (apply invoke `("make" ,@make-flags "install"))))
5877 dirs)
5878 #t))))))
5879 (inputs
5880 `(("gsl" ,gsl)
5881 ("boost" ,boost)
5882 ("libsvm" ,libsvm)
5883 ("readline" ,readline)
5884 ("gengetopt" ,gengetopt)
5885 ("log4cpp" ,log4cpp)))
5886 (native-inputs
5887 `(("autoconf" ,autoconf)
5888 ("automake" ,automake)
5889 ("perl" ,perl)))
5890 (home-page "http://seek.princeton.edu")
5891 (synopsis "Gene co-expression search engine")
5892 (description
5893 "SEEK is a computational gene co-expression search engine. SEEK provides
5894 biologists with a way to navigate the massive human expression compendium that
5895 now contains thousands of expression datasets. SEEK returns a robust ranking
5896 of co-expressed genes in the biological area of interest defined by the user's
5897 query genes. It also prioritizes thousands of expression datasets according
5898 to the user's query of interest.")
5899 (license license:cc-by3.0))))
5900
5901 (define-public samtools
5902 (package
5903 (name "samtools")
5904 (version "1.11")
5905 (source
5906 (origin
5907 (method url-fetch)
5908 (uri
5909 (string-append "mirror://sourceforge/samtools/samtools/"
5910 version "/samtools-" version ".tar.bz2"))
5911 (sha256
5912 (base32
5913 "1dp5wknak4arnw5ghhif9mmljlfnw5bgm91wib7z0j8wdjywx0z2"))
5914 (modules '((guix build utils)))
5915 (snippet '(begin
5916 ;; Delete bundled htslib.
5917 (delete-file-recursively "htslib-1.11")
5918 #t))))
5919 (build-system gnu-build-system)
5920 (arguments
5921 `(#:modules ((ice-9 ftw)
5922 (ice-9 regex)
5923 (guix build gnu-build-system)
5924 (guix build utils))
5925 #:configure-flags (list "--with-ncurses")
5926 #:phases
5927 (modify-phases %standard-phases
5928 (add-after 'unpack 'patch-tests
5929 (lambda _
5930 (substitute* "test/test.pl"
5931 ;; The test script calls out to /bin/bash
5932 (("/bin/bash") (which "bash")))
5933 #t))
5934 (add-after 'install 'install-library
5935 (lambda* (#:key outputs #:allow-other-keys)
5936 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
5937 (install-file "libbam.a" lib)
5938 #t)))
5939 (add-after 'install 'install-headers
5940 (lambda* (#:key outputs #:allow-other-keys)
5941 (let ((include (string-append (assoc-ref outputs "out")
5942 "/include/samtools/")))
5943 (for-each (lambda (file)
5944 (install-file file include))
5945 (scandir "." (lambda (name) (string-match "\\.h$" name))))
5946 #t))))))
5947 (native-inputs `(("pkg-config" ,pkg-config)))
5948 (inputs
5949 `(("htslib" ,htslib)
5950 ("ncurses" ,ncurses)
5951 ("perl" ,perl)
5952 ("python" ,python)
5953 ("zlib" ,zlib)))
5954 (home-page "http://samtools.sourceforge.net")
5955 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
5956 (description
5957 "Samtools implements various utilities for post-processing nucleotide
5958 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
5959 variant calling (in conjunction with bcftools), and a simple alignment
5960 viewer.")
5961 (license license:expat)))
5962
5963 (define-public samtools-1.9
5964 (package (inherit samtools)
5965 (name "samtools")
5966 (version "1.9")
5967 (source
5968 (origin
5969 (method url-fetch)
5970 (uri
5971 (string-append "mirror://sourceforge/samtools/samtools/"
5972 version "/samtools-" version ".tar.bz2"))
5973 (sha256
5974 (base32
5975 "10ilqbmm7ri8z431sn90lvbjwizd0hhkf9rcqw8j823hf26nhgq8"))
5976 (modules '((guix build utils)))
5977 (snippet '(begin
5978 ;; Delete bundled htslib.
5979 (delete-file-recursively "htslib-1.9")
5980 #t))))
5981 (inputs
5982 `(("htslib" ,htslib-1.9)
5983 ("ncurses" ,ncurses)
5984 ("perl" ,perl)
5985 ("python" ,python)
5986 ("zlib" ,zlib)))))
5987
5988 (define-public samtools-0.1
5989 ;; This is the most recent version of the 0.1 line of samtools. The input
5990 ;; and output formats differ greatly from that used and produced by samtools
5991 ;; 1.x and is still used in many bioinformatics pipelines.
5992 (package (inherit samtools)
5993 (version "0.1.19")
5994 (source
5995 (origin
5996 (method url-fetch)
5997 (uri
5998 (string-append "mirror://sourceforge/samtools/samtools/"
5999 version "/samtools-" version ".tar.bz2"))
6000 (sha256
6001 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
6002 (arguments
6003 `(#:tests? #f ;no "check" target
6004 #:make-flags
6005 (list "LIBCURSES=-lncurses")
6006 ,@(substitute-keyword-arguments (package-arguments samtools)
6007 ((#:phases phases)
6008 `(modify-phases ,phases
6009 (replace 'install
6010 (lambda* (#:key outputs #:allow-other-keys)
6011 (let ((bin (string-append
6012 (assoc-ref outputs "out") "/bin")))
6013 (mkdir-p bin)
6014 (install-file "samtools" bin)
6015 #t)))
6016 (delete 'patch-tests)
6017 (delete 'configure))))))))
6018
6019 (define-public mosaik
6020 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
6021 (package
6022 (name "mosaik")
6023 (version "2.2.30")
6024 (source (origin
6025 ;; There are no release tarballs nor tags.
6026 (method git-fetch)
6027 (uri (git-reference
6028 (url "https://github.com/wanpinglee/MOSAIK")
6029 (commit commit)))
6030 (file-name (string-append name "-" version))
6031 (sha256
6032 (base32
6033 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
6034 (build-system gnu-build-system)
6035 (arguments
6036 `(#:tests? #f ; no tests
6037 #:make-flags (list "CC=gcc")
6038 #:phases
6039 (modify-phases %standard-phases
6040 (replace 'configure
6041 (lambda _ (chdir "src") #t))
6042 (replace 'install
6043 (lambda* (#:key outputs #:allow-other-keys)
6044 (let ((bin (string-append (assoc-ref outputs "out")
6045 "/bin")))
6046 (mkdir-p bin)
6047 (copy-recursively "../bin" bin)
6048 #t))))))
6049 (inputs
6050 `(("perl" ,perl)
6051 ("zlib:static" ,zlib "static")
6052 ("zlib" ,zlib)))
6053 (supported-systems '("x86_64-linux"))
6054 (home-page "https://github.com/wanpinglee/MOSAIK")
6055 (synopsis "Map nucleotide sequence reads to reference genomes")
6056 (description
6057 "MOSAIK is a program for mapping second and third-generation sequencing
6058 reads to a reference genome. MOSAIK can align reads generated by all the
6059 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
6060 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
6061 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
6062 ;; code released into the public domain:
6063 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
6064 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
6065 (license (list license:gpl2+ license:public-domain)))))
6066
6067 (define-public ngs-sdk
6068 (package
6069 (name "ngs-sdk")
6070 (version "2.10.5")
6071 (source (origin
6072 (method git-fetch)
6073 (uri (git-reference
6074 (url "https://github.com/ncbi/ngs")
6075 (commit version)))
6076 (file-name (git-file-name name version))
6077 (sha256
6078 (base32
6079 "1ix51c25hjn57w93qmwzw80xh2i34wx8j2hn7szh8p6w8i3az5qa"))))
6080 (build-system gnu-build-system)
6081 (arguments
6082 `(#:parallel-build? #f ; not supported
6083 #:tests? #f ; no "check" target
6084 #:phases
6085 (modify-phases %standard-phases
6086 (replace 'configure
6087 (lambda* (#:key outputs #:allow-other-keys)
6088 (let ((out (assoc-ref outputs "out")))
6089 ;; Allow 'konfigure.perl' to find 'package.prl'.
6090 (setenv "PERL5LIB"
6091 (string-append ".:" (getenv "PERL5LIB")))
6092
6093 ;; The 'configure' script doesn't recognize things like
6094 ;; '--enable-fast-install'.
6095 (invoke "./configure"
6096 (string-append "--build-prefix=" (getcwd) "/build")
6097 (string-append "--prefix=" out))
6098 #t)))
6099 (add-after 'unpack 'enter-dir
6100 (lambda _ (chdir "ngs-sdk") #t)))))
6101 (native-inputs `(("perl" ,perl)))
6102 ;; According to the test
6103 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
6104 ;; in ngs-sdk/setup/konfigure.perl
6105 (supported-systems '("i686-linux" "x86_64-linux"))
6106 (home-page "https://github.com/ncbi/ngs")
6107 (synopsis "API for accessing Next Generation Sequencing data")
6108 (description
6109 "NGS is a domain-specific API for accessing reads, alignments and pileups
6110 produced from Next Generation Sequencing. The API itself is independent from
6111 any particular back-end implementation, and supports use of multiple back-ends
6112 simultaneously.")
6113 (license license:public-domain)))
6114
6115 (define-public java-ngs
6116 (package (inherit ngs-sdk)
6117 (name "java-ngs")
6118 (arguments
6119 `(,@(substitute-keyword-arguments
6120 `(#:modules ((guix build gnu-build-system)
6121 (guix build utils)
6122 (srfi srfi-1)
6123 (srfi srfi-26))
6124 ,@(package-arguments ngs-sdk))
6125 ((#:phases phases)
6126 `(modify-phases ,phases
6127 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
6128 (inputs
6129 `(("jdk" ,icedtea "jdk")
6130 ("ngs-sdk" ,ngs-sdk)))
6131 (synopsis "Java bindings for NGS SDK")))
6132
6133 (define-public ncbi-vdb
6134 (package
6135 (name "ncbi-vdb")
6136 (version "2.10.6")
6137 (source (origin
6138 (method git-fetch)
6139 (uri (git-reference
6140 (url "https://github.com/ncbi/ncbi-vdb")
6141 (commit version)))
6142 (file-name (git-file-name name version))
6143 (sha256
6144 (base32
6145 "0m8hlxscidsfqm9x9fyi62q6lpf1dv5115kgjjgnrkl49q9c27m6"))))
6146 (build-system gnu-build-system)
6147 (arguments
6148 `(#:parallel-build? #f ; not supported
6149 #:tests? #f ; no "check" target
6150 #:make-flags '("HAVE_HDF5=1")
6151 #:phases
6152 (modify-phases %standard-phases
6153 (add-after 'unpack 'make-files-writable
6154 (lambda _ (for-each make-file-writable (find-files "." ".*")) #t))
6155 (add-before 'configure 'set-perl-search-path
6156 (lambda _
6157 ;; Work around "dotless @INC" build failure.
6158 (setenv "PERL5LIB"
6159 (string-append (getcwd) "/setup:"
6160 (getenv "PERL5LIB")))
6161 #t))
6162 ;; See https://github.com/ncbi/ncbi-vdb/issues/14
6163 (add-after 'unpack 'patch-krypto-flags
6164 (lambda _
6165 (substitute* "libs/krypto/Makefile"
6166 (("-Wa,-march=generic64\\+aes") "")
6167 (("-Wa,-march=generic64\\+sse4") ""))
6168 #t))
6169 (replace 'configure
6170 (lambda* (#:key inputs outputs #:allow-other-keys)
6171 (let ((out (assoc-ref outputs "out")))
6172 ;; Override include path for libmagic
6173 (substitute* "setup/package.prl"
6174 (("name => 'magic', Include => '/usr/include'")
6175 (string-append "name=> 'magic', Include => '"
6176 (assoc-ref inputs "libmagic")
6177 "/include" "'")))
6178
6179 ;; Install kdf5 library (needed by sra-tools)
6180 (substitute* "build/Makefile.install"
6181 (("LIBRARIES_TO_INSTALL =")
6182 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
6183
6184 (substitute* "build/Makefile.env"
6185 (("CFLAGS =" prefix)
6186 (string-append prefix "-msse2 ")))
6187
6188 ;; Override search path for ngs-java
6189 (substitute* "setup/package.prl"
6190 (("/usr/local/ngs/ngs-java")
6191 (assoc-ref inputs "java-ngs")))
6192
6193 ;; The 'configure' script doesn't recognize things like
6194 ;; '--enable-fast-install'.
6195 (invoke "./configure"
6196 (string-append "--build-prefix=" (getcwd) "/build")
6197 (string-append "--prefix=" (assoc-ref outputs "out"))
6198 (string-append "--debug")
6199 (string-append "--with-xml2-prefix="
6200 (assoc-ref inputs "libxml2"))
6201 (string-append "--with-ngs-sdk-prefix="
6202 (assoc-ref inputs "ngs-sdk"))
6203 (string-append "--with-hdf5-prefix="
6204 (assoc-ref inputs "hdf5")))
6205 #t)))
6206 (add-after 'install 'install-interfaces
6207 (lambda* (#:key outputs #:allow-other-keys)
6208 ;; Install interface libraries. On i686 the interface libraries
6209 ;; are installed to "linux/gcc/i386", so we need to use the Linux
6210 ;; architecture name ("i386") instead of the target system prefix
6211 ;; ("i686").
6212 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
6213 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
6214 ,(system->linux-architecture
6215 (or (%current-target-system)
6216 (%current-system)))
6217 "/rel/ilib")
6218 (string-append (assoc-ref outputs "out")
6219 "/ilib"))
6220 ;; Install interface headers
6221 (copy-recursively "interfaces"
6222 (string-append (assoc-ref outputs "out")
6223 "/include"))
6224 #t))
6225 ;; These files are needed by sra-tools.
6226 (add-after 'install 'install-configuration-files
6227 (lambda* (#:key outputs #:allow-other-keys)
6228 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
6229 (mkdir target)
6230 (install-file "libs/kfg/default.kfg" target)
6231 (install-file "libs/kfg/certs.kfg" target))
6232 #t)))))
6233 (inputs
6234 `(("libxml2" ,libxml2)
6235 ("ngs-sdk" ,ngs-sdk)
6236 ("java-ngs" ,java-ngs)
6237 ("libmagic" ,file)
6238 ("hdf5" ,hdf5)))
6239 (native-inputs `(("perl" ,perl)))
6240 ;; NCBI-VDB requires SSE capability.
6241 (supported-systems '("i686-linux" "x86_64-linux"))
6242 (home-page "https://github.com/ncbi/ncbi-vdb")
6243 (synopsis "Database engine for genetic information")
6244 (description
6245 "The NCBI-VDB library implements a highly compressed columnar data
6246 warehousing engine that is most often used to store genetic information.
6247 Databases are stored in a portable image within the file system, and can be
6248 accessed/downloaded on demand across HTTP.")
6249 (license license:public-domain)))
6250
6251 (define-public plink
6252 (package
6253 (name "plink")
6254 (version "1.07")
6255 (source
6256 (origin
6257 (method url-fetch)
6258 (uri (string-append
6259 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
6260 version "-src.zip"))
6261 (sha256
6262 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
6263 (patches (search-patches "plink-1.07-unclobber-i.patch"
6264 "plink-endian-detection.patch"))))
6265 (build-system gnu-build-system)
6266 (arguments
6267 '(#:tests? #f ;no "check" target
6268 #:make-flags (list (string-append "LIB_LAPACK="
6269 (assoc-ref %build-inputs "lapack")
6270 "/lib/liblapack.so")
6271 "WITH_LAPACK=1"
6272 "FORCE_DYNAMIC=1"
6273 ;; disable phoning home
6274 "WITH_WEBCHECK=")
6275 #:phases
6276 (modify-phases %standard-phases
6277 ;; no "configure" script
6278 (delete 'configure)
6279 (replace 'install
6280 (lambda* (#:key outputs #:allow-other-keys)
6281 (let ((bin (string-append (assoc-ref outputs "out")
6282 "/bin/")))
6283 (install-file "plink" bin)
6284 #t))))))
6285 (inputs
6286 `(("zlib" ,zlib)
6287 ("lapack" ,lapack)))
6288 (native-inputs
6289 `(("unzip" ,unzip)))
6290 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
6291 (synopsis "Whole genome association analysis toolset")
6292 (description
6293 "PLINK is a whole genome association analysis toolset, designed to
6294 perform a range of basic, large-scale analyses in a computationally efficient
6295 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
6296 so there is no support for steps prior to this (e.g. study design and
6297 planning, generating genotype or CNV calls from raw data). Through
6298 integration with gPLINK and Haploview, there is some support for the
6299 subsequent visualization, annotation and storage of results.")
6300 ;; Code is released under GPLv2, except for fisher.h, which is under
6301 ;; LGPLv2.1+
6302 (license (list license:gpl2 license:lgpl2.1+))))
6303
6304 (define-public plink-ng
6305 (package (inherit plink)
6306 (name "plink-ng")
6307 (version "2.00a2.3")
6308 (source
6309 (origin
6310 (method git-fetch)
6311 (uri (git-reference
6312 (url "https://github.com/chrchang/plink-ng")
6313 (commit (string-append "v" version))))
6314 (file-name (git-file-name name version))
6315 (sha256
6316 (base32 "1p88lz9agzjlspjhciz61qjc36cfniv4nkxszyy0njqyc5rzc0cd"))))
6317 (build-system gnu-build-system)
6318 (arguments
6319 `(#:tests? #f ;no "check" target
6320 #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
6321 "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
6322 "ZLIB=-lz"
6323 "BIN=plink prettify"
6324 (string-append "CC=" ,(cc-for-target))
6325 (string-append "PREFIX=" (assoc-ref %outputs "out"))
6326 "DESTDIR=")
6327 #:phases
6328 (modify-phases %standard-phases
6329 (add-after 'unpack 'chdir
6330 (lambda _ (chdir "1.9") #t))
6331 (delete 'configure)))) ; no "configure" script
6332 (inputs
6333 `(("lapack" ,lapack)
6334 ("openblas" ,openblas)
6335 ("zlib" ,zlib)))
6336 (home-page "https://www.cog-genomics.org/plink/")
6337 (license license:gpl3+)))
6338
6339 (define-public smithlab-cpp
6340 (let ((revision "1")
6341 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
6342 (package
6343 (name "smithlab-cpp")
6344 (version (string-append "0." revision "." (string-take commit 7)))
6345 (source (origin
6346 (method git-fetch)
6347 (uri (git-reference
6348 (url "https://github.com/smithlabcode/smithlab_cpp")
6349 (commit commit)))
6350 (file-name (string-append name "-" version "-checkout"))
6351 (sha256
6352 (base32
6353 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
6354 (build-system gnu-build-system)
6355 (arguments
6356 `(#:modules ((guix build gnu-build-system)
6357 (guix build utils)
6358 (srfi srfi-26))
6359 #:tests? #f ;no "check" target
6360 #:phases
6361 (modify-phases %standard-phases
6362 (add-after 'unpack 'use-samtools-headers
6363 (lambda _
6364 (substitute* '("SAM.cpp"
6365 "SAM.hpp")
6366 (("sam.h") "samtools/sam.h"))
6367 #t))
6368 (replace 'install
6369 (lambda* (#:key outputs #:allow-other-keys)
6370 (let* ((out (assoc-ref outputs "out"))
6371 (lib (string-append out "/lib"))
6372 (include (string-append out "/include/smithlab-cpp")))
6373 (mkdir-p lib)
6374 (mkdir-p include)
6375 (for-each (cut install-file <> lib)
6376 (find-files "." "\\.o$"))
6377 (for-each (cut install-file <> include)
6378 (find-files "." "\\.hpp$")))
6379 #t))
6380 (delete 'configure))))
6381 (inputs
6382 `(("samtools" ,samtools-0.1)
6383 ("zlib" ,zlib)))
6384 (home-page "https://github.com/smithlabcode/smithlab_cpp")
6385 (synopsis "C++ helper library for functions used in Smith lab projects")
6386 (description
6387 "Smithlab CPP is a C++ library that includes functions used in many of
6388 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
6389 structures, classes for genomic regions, mapped sequencing reads, etc.")
6390 (license license:gpl3+))))
6391
6392 (define-public preseq
6393 (package
6394 (name "preseq")
6395 (version "2.0.3")
6396 (source (origin
6397 (method url-fetch)
6398 (uri (string-append "https://github.com/smithlabcode/preseq/"
6399 "releases/download/v" version
6400 "/preseq_v" version ".tar.bz2"))
6401 (sha256
6402 (base32 "149x9xmk1wy1gff85325yfzqc0qk4sgp1w6gbyj9cnji4x1dszbl"))
6403 (modules '((guix build utils)))
6404 (snippet '(begin
6405 ;; Remove bundled samtools.
6406 (delete-file-recursively "samtools")
6407 #t))))
6408 (build-system gnu-build-system)
6409 (arguments
6410 `(#:tests? #f ;no "check" target
6411 #:phases
6412 (modify-phases %standard-phases
6413 (delete 'configure))
6414 #:make-flags
6415 (list (string-append "PREFIX="
6416 (assoc-ref %outputs "out"))
6417 (string-append "LIBBAM="
6418 (assoc-ref %build-inputs "samtools")
6419 "/lib/libbam.a")
6420 (string-append "SMITHLAB_CPP="
6421 (assoc-ref %build-inputs "smithlab-cpp")
6422 "/lib")
6423 "PROGS=preseq"
6424 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
6425 (inputs
6426 `(("gsl" ,gsl)
6427 ("samtools" ,samtools-0.1)
6428 ("smithlab-cpp" ,smithlab-cpp)
6429 ("zlib" ,zlib)))
6430 (home-page "http://smithlabresearch.org/software/preseq/")
6431 (synopsis "Program for analyzing library complexity")
6432 (description
6433 "The preseq package is aimed at predicting and estimating the complexity
6434 of a genomic sequencing library, equivalent to predicting and estimating the
6435 number of redundant reads from a given sequencing depth and how many will be
6436 expected from additional sequencing using an initial sequencing experiment.
6437 The estimates can then be used to examine the utility of further sequencing,
6438 optimize the sequencing depth, or to screen multiple libraries to avoid low
6439 complexity samples.")
6440 (license license:gpl3+)))
6441
6442 (define-public python-screed
6443 (package
6444 (name "python-screed")
6445 (version "1.0")
6446 (source
6447 (origin
6448 (method url-fetch)
6449 (uri (pypi-uri "screed" version))
6450 (sha256
6451 (base32
6452 "148vcb7w2wr6a4w6vs2bsxanbqibxfk490zbcbg4m61s8669zdjx"))))
6453 (build-system python-build-system)
6454 (arguments
6455 '(#:phases
6456 (modify-phases %standard-phases
6457 ;; Tests must be run after installation, as the "screed" command does
6458 ;; not exist right after building.
6459 (delete 'check)
6460 (add-after 'install 'check
6461 (lambda* (#:key inputs outputs #:allow-other-keys)
6462 (let ((out (assoc-ref outputs "out")))
6463 (setenv "PYTHONPATH"
6464 (string-append out "/lib/python"
6465 (string-take (string-take-right
6466 (assoc-ref inputs "python")
6467 5) 3)
6468 "/site-packages:"
6469 (getenv "PYTHONPATH")))
6470 (setenv "PATH" (string-append out "/bin:" (getenv "PATH"))))
6471 (invoke "python" "setup.py" "test")
6472 #t)))))
6473 (native-inputs
6474 `(("python-pytest" ,python-pytest)
6475 ("python-pytest-cov" ,python-pytest-cov)
6476 ("python-pytest-runner" ,python-pytest-runner)))
6477 (inputs
6478 `(("python-bz2file" ,python-bz2file)))
6479 (home-page "https://github.com/dib-lab/screed/")
6480 (synopsis "Short read sequence database utilities")
6481 (description "Screed parses FASTA and FASTQ files and generates databases.
6482 Values such as sequence name, sequence description, sequence quality and the
6483 sequence itself can be retrieved from these databases.")
6484 (license license:bsd-3)))
6485
6486 (define-public python2-screed
6487 (package-with-python2 python-screed))
6488
6489 (define-public sra-tools
6490 (package
6491 (name "sra-tools")
6492 (version "2.10.6")
6493 (source
6494 (origin
6495 (method git-fetch)
6496 (uri (git-reference
6497 (url "https://github.com/ncbi/sra-tools")
6498 (commit version)))
6499 (file-name (git-file-name name version))
6500 (sha256
6501 (base32
6502 "1cr2mijkfs5sm35ffjs6861qsd1qkgnhnbavdv65zg5d655abbjf"))))
6503 (build-system gnu-build-system)
6504 (arguments
6505 `(#:parallel-build? #f ; not supported
6506 #:tests? #f ; no "check" target
6507 #:make-flags
6508 (list (string-append "DEFAULT_CRT="
6509 (assoc-ref %build-inputs "ncbi-vdb")
6510 "/kfg/certs.kfg")
6511 (string-append "DEFAULT_KFG="
6512 (assoc-ref %build-inputs "ncbi-vdb")
6513 "/kfg/default.kfg")
6514 (string-append "VDB_LIBDIR="
6515 (assoc-ref %build-inputs "ncbi-vdb")
6516 ,(if (string-prefix? "x86_64"
6517 (or (%current-target-system)
6518 (%current-system)))
6519 "/lib64"
6520 "/lib32")))
6521 #:phases
6522 (modify-phases %standard-phases
6523 (add-before 'configure 'set-perl-search-path
6524 (lambda _
6525 ;; Work around "dotless @INC" build failure.
6526 (setenv "PERL5LIB"
6527 (string-append (getcwd) "/setup:"
6528 (getenv "PERL5LIB")))
6529 #t))
6530 (replace 'configure
6531 (lambda* (#:key inputs outputs #:allow-other-keys)
6532 ;; The build system expects a directory containing the sources and
6533 ;; raw build output of ncbi-vdb, including files that are not
6534 ;; installed. Since we are building against an installed version of
6535 ;; ncbi-vdb, the following modifications are needed.
6536 (substitute* "setup/konfigure.perl"
6537 ;; Make the configure script look for the "ilib" directory of
6538 ;; "ncbi-vdb" without first checking for the existence of a
6539 ;; matching library in its "lib" directory.
6540 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
6541 "my $f = File::Spec->catdir($ilibdir, $ilib);")
6542 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
6543 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
6544 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
6545
6546 ;; Dynamic linking
6547 (substitute* "tools/copycat/Makefile"
6548 (("smagic-static") "lmagic"))
6549 (substitute* "tools/driver-tool/utf8proc/Makefile"
6550 (("CC\\?=gcc") "myCC=gcc")
6551 (("\\(CC\\)") "(myCC)"))
6552
6553 ;; The 'configure' script doesn't recognize things like
6554 ;; '--enable-fast-install'.
6555 (invoke "./configure"
6556 (string-append "--build-prefix=" (getcwd) "/build")
6557 (string-append "--prefix=" (assoc-ref outputs "out"))
6558 (string-append "--debug")
6559 (string-append "--with-fuse-prefix="
6560 (assoc-ref inputs "fuse"))
6561 (string-append "--with-magic-prefix="
6562 (assoc-ref inputs "libmagic"))
6563 ;; TODO: building with libxml2 fails with linker errors
6564 #;
6565 (string-append "--with-xml2-prefix="
6566 (assoc-ref inputs "libxml2"))
6567 (string-append "--with-ncbi-vdb-sources="
6568 (assoc-ref inputs "ncbi-vdb"))
6569 (string-append "--with-ncbi-vdb-build="
6570 (assoc-ref inputs "ncbi-vdb"))
6571 (string-append "--with-ngs-sdk-prefix="
6572 (assoc-ref inputs "ngs-sdk"))
6573 (string-append "--with-hdf5-prefix="
6574 (assoc-ref inputs "hdf5")))
6575 #t)))))
6576 (native-inputs `(("perl" ,perl)))
6577 (inputs
6578 `(("ngs-sdk" ,ngs-sdk)
6579 ("ncbi-vdb" ,ncbi-vdb)
6580 ("libmagic" ,file)
6581 ("fuse" ,fuse)
6582 ("hdf5" ,hdf5-1.10)
6583 ("zlib" ,zlib)
6584 ("python" ,python-wrapper)))
6585 (home-page
6586 "https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
6587 (synopsis "Tools and libraries for reading and writing sequencing data")
6588 (description
6589 "The SRA Toolkit from NCBI is a collection of tools and libraries for
6590 reading of sequencing files from the Sequence Read Archive (SRA) database and
6591 writing files into the .sra format.")
6592 (license license:public-domain)))
6593
6594 (define-public seqan
6595 (package
6596 (name "seqan")
6597 (version "2.4.0")
6598 (source (origin
6599 (method url-fetch)
6600 (uri (string-append "https://github.com/seqan/seqan/releases/"
6601 "download/seqan-v" version
6602 "/seqan-library-" version ".tar.xz"))
6603 (sha256
6604 (base32
6605 "19a1rlxx03qy1i1iriicly68w64yjxbv24g9gdywnfmq998v35yx"))))
6606 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6607 ;; makes sense to split the outputs.
6608 (outputs '("out" "doc"))
6609 (build-system trivial-build-system)
6610 (arguments
6611 `(#:modules ((guix build utils))
6612 #:builder
6613 (begin
6614 (use-modules (guix build utils))
6615 (let ((tar (assoc-ref %build-inputs "tar"))
6616 (xz (assoc-ref %build-inputs "xz"))
6617 (out (assoc-ref %outputs "out"))
6618 (doc (assoc-ref %outputs "doc")))
6619 (setenv "PATH" (string-append tar "/bin:" xz "/bin"))
6620 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6621 (chdir (string-append "seqan-library-" ,version))
6622 (copy-recursively "include" (string-append out "/include"))
6623 (copy-recursively "share" (string-append doc "/share"))
6624 #t))))
6625 (native-inputs
6626 `(("source" ,source)
6627 ("tar" ,tar)
6628 ("xz" ,xz)))
6629 (home-page "https://www.seqan.de")
6630 (synopsis "Library for nucleotide sequence analysis")
6631 (description
6632 "SeqAn is a C++ library of efficient algorithms and data structures for
6633 the analysis of sequences with the focus on biological data. It contains
6634 algorithms and data structures for string representation and their
6635 manipulation, online and indexed string search, efficient I/O of
6636 bioinformatics file formats, sequence alignment, and more.")
6637 (license license:bsd-3)))
6638
6639 (define-public seqan-1
6640 (package (inherit seqan)
6641 (name "seqan")
6642 (version "1.4.2")
6643 (source (origin
6644 (method url-fetch)
6645 (uri (string-append "https://packages.seqan.de/seqan-library/"
6646 "seqan-library-" version ".tar.bz2"))
6647 (sha256
6648 (base32
6649 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
6650 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6651 ;; makes sense to split the outputs.
6652 (outputs '("out" "doc"))
6653 (build-system trivial-build-system)
6654 (arguments
6655 `(#:modules ((guix build utils))
6656 #:builder
6657 (begin
6658 (use-modules (guix build utils))
6659 (let ((tar (assoc-ref %build-inputs "tar"))
6660 (bzip (assoc-ref %build-inputs "bzip2"))
6661 (out (assoc-ref %outputs "out"))
6662 (doc (assoc-ref %outputs "doc")))
6663 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
6664 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6665 (chdir (string-append "seqan-library-" ,version))
6666 (copy-recursively "include" (string-append out "/include"))
6667 (copy-recursively "share" (string-append doc "/share"))
6668 #t))))
6669 (native-inputs
6670 `(("source" ,source)
6671 ("tar" ,tar)
6672 ("bzip2" ,bzip2)))))
6673
6674 (define-public seqmagick
6675 (package
6676 (name "seqmagick")
6677 (version "0.8.0")
6678 (source
6679 (origin
6680 (method url-fetch)
6681 (uri (pypi-uri "seqmagick" version))
6682 (sha256
6683 (base32
6684 "0pf98da7i59q47gwrbx0wjk6xlvbybiwphw80w7h4ydjj0579a2b"))))
6685 (build-system python-build-system)
6686 (inputs
6687 `(("python-biopython" ,python-biopython)))
6688 (native-inputs
6689 `(("python-nose" ,python-nose)))
6690 (home-page "https://github.com/fhcrc/seqmagick")
6691 (synopsis "Tools for converting and modifying sequence files")
6692 (description
6693 "Bioinformaticians often have to convert sequence files between formats
6694 and do little manipulations on them, and it's not worth writing scripts for
6695 that. Seqmagick is a utility to expose the file format conversion in
6696 BioPython in a convenient way. Instead of having a big mess of scripts, there
6697 is one that takes arguments.")
6698 (license license:gpl3)))
6699
6700 (define-public seqtk
6701 (package
6702 (name "seqtk")
6703 (version "1.3")
6704 (source (origin
6705 (method git-fetch)
6706 (uri (git-reference
6707 (url "https://github.com/lh3/seqtk")
6708 (commit (string-append "v" version))))
6709 (file-name (git-file-name name version))
6710 (sha256
6711 (base32
6712 "1bfzlqa84b5s1qi22blmmw2s8xdyp9h9ydcq22pfjhh5gab3yz6l"))))
6713 (build-system gnu-build-system)
6714 (arguments
6715 `(#:phases
6716 (modify-phases %standard-phases
6717 (delete 'configure)
6718 (replace 'check
6719 ;; There are no tests, so we just run a sanity check.
6720 (lambda _ (invoke "./seqtk" "seq") #t))
6721 (replace 'install
6722 (lambda* (#:key outputs #:allow-other-keys)
6723 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6724 (install-file "seqtk" bin)
6725 #t))))))
6726 (inputs
6727 `(("zlib" ,zlib)))
6728 (home-page "https://github.com/lh3/seqtk")
6729 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
6730 (description
6731 "Seqtk is a fast and lightweight tool for processing sequences in the
6732 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
6733 optionally compressed by gzip.")
6734 (license license:expat)))
6735
6736 (define-public snap-aligner
6737 (package
6738 (name "snap-aligner")
6739 (version "1.0beta.18")
6740 (source (origin
6741 (method git-fetch)
6742 (uri (git-reference
6743 (url "https://github.com/amplab/snap")
6744 (commit (string-append "v" version))))
6745 (file-name (git-file-name name version))
6746 (sha256
6747 (base32
6748 "01w3qq4wm07z73vky0cfwlmrbf50n3w722cxrlzxfi99mnb808d8"))))
6749 (build-system gnu-build-system)
6750 (arguments
6751 '(#:phases
6752 (modify-phases %standard-phases
6753 (delete 'configure)
6754 (replace 'check (lambda _ (invoke "./unit_tests") #t))
6755 (replace 'install
6756 (lambda* (#:key outputs #:allow-other-keys)
6757 (let* ((out (assoc-ref outputs "out"))
6758 (bin (string-append out "/bin")))
6759 (install-file "snap-aligner" bin)
6760 (install-file "SNAPCommand" bin)
6761 #t))))))
6762 (native-inputs
6763 `(("zlib" ,zlib)))
6764 (home-page "http://snap.cs.berkeley.edu/")
6765 (synopsis "Short read DNA sequence aligner")
6766 (description
6767 "SNAP is a fast and accurate aligner for short DNA reads. It is
6768 optimized for modern read lengths of 100 bases or higher, and takes advantage
6769 of these reads to align data quickly through a hash-based indexing scheme.")
6770 ;; 32-bit systems are not supported by the unpatched code.
6771 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
6772 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
6773 ;; systems without a lot of memory cannot make good use of this program.
6774 (supported-systems '("x86_64-linux"))
6775 (license license:asl2.0)))
6776
6777 (define-public sortmerna
6778 (package
6779 (name "sortmerna")
6780 (version "2.1b")
6781 (source
6782 (origin
6783 (method git-fetch)
6784 (uri (git-reference
6785 (url "https://github.com/biocore/sortmerna")
6786 (commit version)))
6787 (file-name (git-file-name name version))
6788 (sha256
6789 (base32
6790 "0j3mbz4n25738yijmjbr5r4fyvkgm8v5vn3sshyfvmyqf5q9byqf"))))
6791 (build-system gnu-build-system)
6792 (outputs '("out" ;for binaries
6793 "db")) ;for sequence databases
6794 (arguments
6795 `(#:phases
6796 (modify-phases %standard-phases
6797 (replace 'install
6798 (lambda* (#:key outputs #:allow-other-keys)
6799 (let* ((out (assoc-ref outputs "out"))
6800 (bin (string-append out "/bin"))
6801 (db (assoc-ref outputs "db"))
6802 (share
6803 (string-append db "/share/sortmerna/rRNA_databases")))
6804 (install-file "sortmerna" bin)
6805 (install-file "indexdb_rna" bin)
6806 (for-each (lambda (file)
6807 (install-file file share))
6808 (find-files "rRNA_databases" ".*fasta"))
6809 #t))))))
6810 (inputs
6811 `(("zlib" ,zlib)))
6812 (home-page "https://bioinfo.lifl.fr/RNA/sortmerna/")
6813 (synopsis "Biological sequence analysis tool for NGS reads")
6814 (description
6815 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
6816 and @acronym{OTU, operational taxonomic unit} picking of @acronym{NGS, next
6817 generation sequencing} reads. The core algorithm is based on approximate seeds
6818 and allows for fast and sensitive analyses of nucleotide sequences. The main
6819 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
6820 ;; The source includes x86 specific code
6821 (supported-systems '("x86_64-linux" "i686-linux"))
6822 (license license:lgpl3)))
6823
6824 (define-public star
6825 (package
6826 (name "star")
6827 (version "2.7.8a")
6828 (source (origin
6829 (method git-fetch)
6830 (uri (git-reference
6831 (url "https://github.com/alexdobin/STAR")
6832 (commit version)))
6833 (file-name (git-file-name name version))
6834 (sha256
6835 (base32
6836 "0zc5biymja9zml9yizcj1h68fq9c6sxfcav8a0lbgvgsm44rvans"))
6837 (modules '((guix build utils)))
6838 (snippet
6839 '(begin
6840 (substitute* "source/Makefile"
6841 (("/bin/rm") "rm"))
6842 ;; Remove pre-built binaries and bundled htslib sources.
6843 (delete-file-recursively "bin/MacOSX_x86_64")
6844 (delete-file-recursively "bin/Linux_x86_64")
6845 (delete-file-recursively "bin/Linux_x86_64_static")
6846 (delete-file-recursively "source/htslib")
6847 #t))))
6848 (build-system gnu-build-system)
6849 (arguments
6850 '(#:tests? #f ;no check target
6851 #:make-flags '("STAR")
6852 #:phases
6853 (modify-phases %standard-phases
6854 (add-after 'unpack 'enter-source-dir
6855 (lambda _ (chdir "source") #t))
6856 (add-after 'enter-source-dir 'make-reproducible
6857 (lambda _
6858 (substitute* "Makefile"
6859 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
6860 (string-append pre "Built with Guix" post)))
6861 #t))
6862 ;; See https://github.com/alexdobin/STAR/pull/562
6863 (add-after 'enter-source-dir 'add-missing-header
6864 (lambda _
6865 (substitute* "SoloReadFeature_inputRecords.cpp"
6866 (("#include \"binarySearch2.h\"" h)
6867 (string-append h "\n#include <math.h>")))
6868 #t))
6869 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
6870 (lambda _
6871 (substitute* "Makefile"
6872 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
6873 _ prefix) prefix))
6874 (substitute* '("BAMfunctions.cpp"
6875 "signalFromBAM.h"
6876 "bam_cat.h"
6877 "bam_cat.c"
6878 "STAR.cpp"
6879 "bamRemoveDuplicates.cpp")
6880 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
6881 (string-append "#include <" header ">")))
6882 (substitute* "IncludeDefine.h"
6883 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
6884 (string-append "<" header ">")))
6885 #t))
6886 (replace 'install
6887 (lambda* (#:key outputs #:allow-other-keys)
6888 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6889 (install-file "STAR" bin))
6890 #t))
6891 (delete 'configure))))
6892 (native-inputs
6893 `(("xxd" ,xxd)))
6894 (inputs
6895 `(("htslib" ,htslib)
6896 ("zlib" ,zlib)))
6897 (home-page "https://github.com/alexdobin/STAR")
6898 (synopsis "Universal RNA-seq aligner")
6899 (description
6900 "The Spliced Transcripts Alignment to a Reference (STAR) software is
6901 based on a previously undescribed RNA-seq alignment algorithm that uses
6902 sequential maximum mappable seed search in uncompressed suffix arrays followed
6903 by seed clustering and stitching procedure. In addition to unbiased de novo
6904 detection of canonical junctions, STAR can discover non-canonical splices and
6905 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
6906 sequences.")
6907 ;; Only 64-bit systems are supported according to the README.
6908 (supported-systems '("x86_64-linux" "mips64el-linux"))
6909 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
6910 (license license:gpl3+)))
6911
6912 (define-public star-for-pigx
6913 (package
6914 (inherit star)
6915 (name "star")
6916 (version "2.7.3a")
6917 (source (origin
6918 (method git-fetch)
6919 (uri (git-reference
6920 (url "https://github.com/alexdobin/STAR")
6921 (commit version)))
6922 (file-name (git-file-name name version))
6923 (sha256
6924 (base32
6925 "1hgiqw5qhs0pc1xazzihcfd92na02xyq2kb469z04y1v51kpvvjq"))
6926 (modules '((guix build utils)))
6927 (snippet
6928 '(begin
6929 (substitute* "source/Makefile"
6930 (("/bin/rm") "rm"))
6931 ;; Remove pre-built binaries and bundled htslib sources.
6932 (delete-file-recursively "bin/MacOSX_x86_64")
6933 (delete-file-recursively "bin/Linux_x86_64")
6934 (delete-file-recursively "bin/Linux_x86_64_static")
6935 (delete-file-recursively "source/htslib")
6936 #t))))))
6937
6938 (define-public starlong
6939 (package (inherit star)
6940 (name "starlong")
6941 (arguments
6942 (substitute-keyword-arguments (package-arguments star)
6943 ((#:make-flags flags)
6944 `(list "STARlong"))
6945 ((#:phases phases)
6946 `(modify-phases ,phases
6947 ;; Allow extra long sequence reads.
6948 (add-after 'unpack 'make-extra-long
6949 (lambda _
6950 (substitute* "source/IncludeDefine.h"
6951 (("(#define DEF_readNameLengthMax ).*" _ match)
6952 (string-append match "900000\n")))
6953 #t))
6954 (replace 'install
6955 (lambda* (#:key outputs #:allow-other-keys)
6956 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6957 (install-file "STARlong" bin))
6958 #t))))))))
6959
6960 (define-public subread
6961 (package
6962 (name "subread")
6963 (version "1.6.0")
6964 (source (origin
6965 (method url-fetch)
6966 (uri (string-append "mirror://sourceforge/subread/subread-"
6967 version "/subread-" version "-source.tar.gz"))
6968 (sha256
6969 (base32
6970 "0ah0n4jx6ksk2m2j7xk385x2qzmk1y4rfc6a4mfrdqrlq721w99i"))))
6971 (build-system gnu-build-system)
6972 (arguments
6973 `(#:tests? #f ;no "check" target
6974 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
6975 ;; optimizations by default, so we override these flags such that x86_64
6976 ;; flags are only added when the build target is an x86_64 system.
6977 #:make-flags
6978 (list (let ((system ,(or (%current-target-system)
6979 (%current-system)))
6980 (flags '("-ggdb" "-fomit-frame-pointer"
6981 "-ffast-math" "-funroll-loops"
6982 "-fmessage-length=0"
6983 "-O9" "-Wall" "-DMAKE_FOR_EXON"
6984 "-DMAKE_STANDALONE"
6985 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
6986 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
6987 (if (string-prefix? "x86_64" system)
6988 (string-append "CCFLAGS=" (string-join (append flags flags64)))
6989 (string-append "CCFLAGS=" (string-join flags))))
6990 "-f" "Makefile.Linux"
6991 "CC=gcc ${CCFLAGS}")
6992 #:phases
6993 (modify-phases %standard-phases
6994 (add-after 'unpack 'enter-dir
6995 (lambda _ (chdir "src") #t))
6996 (replace 'install
6997 (lambda* (#:key outputs #:allow-other-keys)
6998 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6999 (mkdir-p bin)
7000 (copy-recursively "../bin" bin))
7001 #t))
7002 ;; no "configure" script
7003 (delete 'configure))))
7004 (inputs `(("zlib" ,zlib)))
7005 (home-page "http://bioinf.wehi.edu.au/subread-package/")
7006 (synopsis "Tool kit for processing next-gen sequencing data")
7007 (description
7008 "The subread package contains the following tools: subread aligner, a
7009 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
7010 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
7011 features; exactSNP: a SNP caller that discovers SNPs by testing signals
7012 against local background noises.")
7013 (license license:gpl3+)))
7014
7015 (define-public stringtie
7016 (package
7017 (name "stringtie")
7018 (version "1.2.1")
7019 (source (origin
7020 (method url-fetch)
7021 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
7022 "stringtie-" version ".tar.gz"))
7023 (sha256
7024 (base32
7025 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
7026 (modules '((guix build utils)))
7027 (snippet
7028 '(begin
7029 (delete-file-recursively "samtools-0.1.18")
7030 #t))))
7031 (build-system gnu-build-system)
7032 (arguments
7033 `(#:tests? #f ;no test suite
7034 #:phases
7035 (modify-phases %standard-phases
7036 ;; no configure script
7037 (delete 'configure)
7038 (add-before 'build 'use-system-samtools
7039 (lambda _
7040 (substitute* "Makefile"
7041 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
7042 "stringtie: "))
7043 (substitute* '("gclib/GBam.h"
7044 "gclib/GBam.cpp")
7045 (("#include \"(bam|sam|kstring).h\"" _ header)
7046 (string-append "#include <samtools/" header ".h>")))
7047 #t))
7048 (add-after 'unpack 'remove-duplicate-typedef
7049 (lambda _
7050 ;; This typedef conflicts with the typedef in
7051 ;; glibc-2.25/include/bits/types.h
7052 (substitute* "gclib/GThreads.h"
7053 (("typedef long long __intmax_t;") ""))
7054 #t))
7055 (replace 'install
7056 (lambda* (#:key outputs #:allow-other-keys)
7057 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
7058 (install-file "stringtie" bin)
7059 #t))))))
7060 (inputs
7061 `(("samtools" ,samtools-0.1)
7062 ("zlib" ,zlib)))
7063 (home-page "http://ccb.jhu.edu/software/stringtie/")
7064 (synopsis "Transcript assembly and quantification for RNA-Seq data")
7065 (description
7066 "StringTie is a fast and efficient assembler of RNA-Seq sequence
7067 alignments into potential transcripts. It uses a novel network flow algorithm
7068 as well as an optional de novo assembly step to assemble and quantitate
7069 full-length transcripts representing multiple splice variants for each gene
7070 locus. Its input can include not only the alignments of raw reads used by
7071 other transcript assemblers, but also alignments of longer sequences that have
7072 been assembled from those reads. To identify differentially expressed genes
7073 between experiments, StringTie's output can be processed either by the
7074 Cuffdiff or Ballgown programs.")
7075 (license license:artistic2.0)))
7076
7077 (define-public taxtastic
7078 (package
7079 (name "taxtastic")
7080 (version "0.8.11")
7081 (source (origin
7082 ;; The Pypi version does not include tests.
7083 (method git-fetch)
7084 (uri (git-reference
7085 (url "https://github.com/fhcrc/taxtastic")
7086 (commit (string-append "v" version))))
7087 (file-name (git-file-name name version))
7088 (sha256
7089 (base32
7090 "1sv8mkg64jn7zdwf1jj71c16686yrwxk0apb1l8sjszy9p166g0p"))))
7091 (build-system python-build-system)
7092 (arguments
7093 `(#:phases
7094 (modify-phases %standard-phases
7095 (add-after 'unpack 'prepare-directory
7096 (lambda _
7097 ;; The git checkout must be writable for tests.
7098 (for-each make-file-writable (find-files "."))
7099 ;; This test fails, but the error is not caught by the test
7100 ;; framework, so the tests fail...
7101 (substitute* "tests/test_taxit.py"
7102 (("self.cmd_fails\\(''\\)")
7103 "self.cmd_fails('nothing')"))
7104 ;; This version file is expected to be created with git describe.
7105 (mkdir-p "taxtastic/data")
7106 (with-output-to-file "taxtastic/data/ver"
7107 (lambda () (display ,version)))
7108 #t))
7109 (add-after 'unpack 'python37-compatibility
7110 (lambda _
7111 (substitute* "taxtastic/utils.py"
7112 (("import csv") "import csv, errno")
7113 (("os.errno") "errno"))
7114 #t))
7115 (replace 'check
7116 ;; Note, this fails to run with "-v" as it tries to write to a
7117 ;; closed output stream.
7118 (lambda _ (invoke "python" "-m" "unittest") #t)))))
7119 (propagated-inputs
7120 `(("python-sqlalchemy" ,python-sqlalchemy)
7121 ("python-decorator" ,python-decorator)
7122 ("python-biopython" ,python-biopython)
7123 ("python-pandas" ,python-pandas)
7124 ("python-psycopg2" ,python-psycopg2)
7125 ("python-fastalite" ,python-fastalite)
7126 ("python-pyyaml" ,python-pyyaml)
7127 ("python-six" ,python-six)
7128 ("python-jinja2" ,python-jinja2)
7129 ("python-dendropy" ,python-dendropy)))
7130 (home-page "https://github.com/fhcrc/taxtastic")
7131 (synopsis "Tools for taxonomic naming and annotation")
7132 (description
7133 "Taxtastic is software written in python used to build and maintain
7134 reference packages i.e. collections of reference trees, reference alignments,
7135 profiles, and associated taxonomic information.")
7136 (license license:gpl3+)))
7137
7138 (define-public vcftools
7139 (package
7140 (name "vcftools")
7141 (version "0.1.16")
7142 (source (origin
7143 (method url-fetch)
7144 (uri (string-append
7145 "https://github.com/vcftools/vcftools/releases/download/v"
7146 version "/vcftools-" version ".tar.gz"))
7147 (sha256
7148 (base32
7149 "1qqlx7flfv7axrjwkaz6njkscsl1d0jw98ns8d8bh1n1hd1pgz6v"))))
7150 (build-system gnu-build-system)
7151 (arguments
7152 `(#:tests? #f ; no "check" target
7153 #:make-flags (list
7154 "CFLAGS=-O2" ; override "-m64" flag
7155 (string-append "PREFIX=" (assoc-ref %outputs "out"))
7156 (string-append "MANDIR=" (assoc-ref %outputs "out")
7157 "/share/man/man1"))))
7158 (native-inputs
7159 `(("pkg-config" ,pkg-config)))
7160 (inputs
7161 `(("perl" ,perl)
7162 ("zlib" ,zlib)))
7163 (home-page "https://vcftools.github.io/")
7164 (synopsis "Tools for working with VCF files")
7165 (description
7166 "VCFtools is a program package designed for working with VCF files, such
7167 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
7168 provide easily accessible methods for working with complex genetic variation
7169 data in the form of VCF files.")
7170 ;; The license is declared as LGPLv3 in the README and
7171 ;; at https://vcftools.github.io/license.html
7172 (license license:lgpl3)))
7173
7174 (define-public infernal
7175 (package
7176 (name "infernal")
7177 (version "1.1.4")
7178 (source (origin
7179 (method url-fetch)
7180 (uri (string-append "http://eddylab.org/software/infernal/"
7181 "infernal-" version ".tar.gz"))
7182 (sha256
7183 (base32
7184 "1z4mgwqg1j4n5ika08ai8mg9yjyjhf4821jp83v2bgwzxrykqjgr"))))
7185 (build-system gnu-build-system)
7186 (native-inputs
7187 `(("perl" ,perl)
7188 ("python" ,python))) ; for tests
7189 (home-page "http://eddylab.org/infernal/")
7190 (synopsis "Inference of RNA alignments")
7191 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
7192 searching DNA sequence databases for RNA structure and sequence similarities.
7193 It is an implementation of a special case of profile stochastic context-free
7194 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
7195 profile, but it scores a combination of sequence consensus and RNA secondary
7196 structure consensus, so in many cases, it is more capable of identifying RNA
7197 homologs that conserve their secondary structure more than their primary
7198 sequence.")
7199 ;; Infernal 1.1.3 requires VMX or SSE capability for parallel instructions.
7200 (supported-systems '("i686-linux" "x86_64-linux"))
7201 (license license:bsd-3)))
7202
7203 (define-public r-snapatac
7204 (package
7205 (name "r-snapatac")
7206 (version "2.0")
7207 (source
7208 (origin
7209 (method git-fetch)
7210 (uri (git-reference
7211 (url "https://github.com/r3fang/SnapATAC")
7212 (commit (string-append "v" version))))
7213 (file-name (git-file-name name version))
7214 (sha256
7215 (base32 "037jzlbl436fi7lkpq7d83i2vd1crnrik3vac2x6xj75dbikb2av"))))
7216 (properties `((upstream-name . "SnapATAC")))
7217 (build-system r-build-system)
7218 (propagated-inputs
7219 `(("r-bigmemory" ,r-bigmemory)
7220 ("r-doparallel" ,r-doparallel)
7221 ("r-dosnow" ,r-dosnow)
7222 ("r-edger" ,r-edger)
7223 ("r-foreach" ,r-foreach)
7224 ("r-genomicranges" ,r-genomicranges)
7225 ("r-igraph" ,r-igraph)
7226 ("r-iranges" ,r-iranges)
7227 ("r-irlba" ,r-irlba)
7228 ("r-matrix" ,r-matrix)
7229 ("r-plyr" ,r-plyr)
7230 ("r-plot3d" ,r-plot3d)
7231 ("r-rann" ,r-rann)
7232 ("r-raster" ,r-raster)
7233 ("r-rcolorbrewer" ,r-rcolorbrewer)
7234 ("r-rhdf5" ,r-rhdf5)
7235 ("r-rtsne" ,r-rtsne)
7236 ("r-scales" ,r-scales)
7237 ("r-viridis" ,r-viridis)))
7238 (home-page "https://github.com/r3fang/SnapATAC")
7239 (synopsis "Single nucleus analysis package for ATAC-Seq")
7240 (description
7241 "This package provides a fast and accurate analysis toolkit for single
7242 cell ATAC-seq (Assay for transposase-accessible chromatin using sequencing).
7243 Single cell ATAC-seq can resolve the heterogeneity of a complex tissue and
7244 reveal cell-type specific regulatory landscapes. However, the exceeding data
7245 sparsity has posed unique challenges for the data analysis. This package
7246 @code{r-snapatac} is an end-to-end bioinformatics pipeline for analyzing large-
7247 scale single cell ATAC-seq data which includes quality control, normalization,
7248 clustering analysis, differential analysis, motif inference and exploration of
7249 single cell ATAC-seq sequencing data.")
7250 (license license:gpl3)))
7251
7252 (define-public r-archr
7253 (let ((commit "46b519ffb6f73edf132497ac31650d19ef055dc1")
7254 (revision "1"))
7255 (package
7256 (name "r-archr")
7257 (version (git-version "1.0.0" revision commit))
7258 (source
7259 (origin
7260 (method git-fetch)
7261 (uri (git-reference
7262 (url "https://github.com/GreenleafLab/ArchR")
7263 (commit commit)))
7264 (file-name (git-file-name name version))
7265 (sha256
7266 (base32
7267 "1zj3sdfhgn2q2256fmz61a92vw1wylyck632d7842d6knd0v92v8"))))
7268 (properties `((upstream-name . "ArchR")))
7269 (build-system r-build-system)
7270 (propagated-inputs
7271 `(("r-biocgenerics" ,r-biocgenerics)
7272 ("r-biostrings" ,r-biostrings)
7273 ("r-chromvar" ,r-chromvar)
7274 ("r-complexheatmap" ,r-complexheatmap)
7275 ("r-data-table" ,r-data-table)
7276 ("r-genomicranges" ,r-genomicranges)
7277 ("r-ggplot2" ,r-ggplot2)
7278 ("r-ggrepel" ,r-ggrepel)
7279 ("r-gridextra" ,r-gridextra)
7280 ("r-gtable" ,r-gtable)
7281 ("r-gtools" ,r-gtools)
7282 ("r-magrittr" ,r-magrittr)
7283 ("r-matrix" ,r-matrix)
7284 ("r-matrixstats" ,r-matrixstats)
7285 ("r-motifmatchr" ,r-motifmatchr)
7286 ("r-nabor" ,r-nabor)
7287 ("r-plyr" ,r-plyr)
7288 ("r-rcpp" ,r-rcpp)
7289 ("r-rhdf5" ,r-rhdf5)
7290 ("r-rsamtools" ,r-rsamtools)
7291 ("r-s4vectors" ,r-s4vectors)
7292 ("r-stringr" ,r-stringr)
7293 ("r-summarizedexperiment" ,r-summarizedexperiment)
7294 ("r-uwot" ,r-uwot)))
7295 (home-page "https://github.com/GreenleafLab/ArchR")
7296 (synopsis "Analyze single-cell regulatory chromatin in R")
7297 (description
7298 "This package is designed to streamline scATAC analyses in R.")
7299 (license license:gpl2+))))
7300
7301 (define-public r-scde
7302 (package
7303 (name "r-scde")
7304 (version "1.99.2")
7305 (source (origin
7306 (method git-fetch)
7307 (uri (git-reference
7308 (url "https://github.com/hms-dbmi/scde")
7309 (commit version)))
7310 (file-name (git-file-name name version))
7311 (sha256
7312 (base32
7313 "10na2gyka24mszdxf92wz9h2c13hdf1ww30c68gfsw53lvvhhhxb"))))
7314 (build-system r-build-system)
7315 (propagated-inputs
7316 `(("r-rcpp" ,r-rcpp)
7317 ("r-rcpparmadillo" ,r-rcpparmadillo)
7318 ("r-mgcv" ,r-mgcv)
7319 ("r-rook" ,r-rook)
7320 ("r-rjson" ,r-rjson)
7321 ("r-cairo" ,r-cairo)
7322 ("r-rcolorbrewer" ,r-rcolorbrewer)
7323 ("r-edger" ,r-edger)
7324 ("r-quantreg" ,r-quantreg)
7325 ("r-nnet" ,r-nnet)
7326 ("r-rmtstat" ,r-rmtstat)
7327 ("r-extremes" ,r-extremes)
7328 ("r-pcamethods" ,r-pcamethods)
7329 ("r-biocparallel" ,r-biocparallel)
7330 ("r-flexmix" ,r-flexmix)))
7331 (home-page "https://hms-dbmi.github.io/scde/")
7332 (synopsis "R package for analyzing single-cell RNA-seq data")
7333 (description "The SCDE package implements a set of statistical methods for
7334 analyzing single-cell RNA-seq data. SCDE fits individual error models for
7335 single-cell RNA-seq measurements. These models can then be used for
7336 assessment of differential expression between groups of cells, as well as
7337 other types of analysis. The SCDE package also contains the pagoda framework
7338 which applies pathway and gene set overdispersion analysis to identify aspects
7339 of transcriptional heterogeneity among single cells.")
7340 ;; See https://github.com/hms-dbmi/scde/issues/38
7341 (license license:gpl2)))
7342
7343 (define-public r-centipede
7344 (package
7345 (name "r-centipede")
7346 (version "1.2")
7347 (source (origin
7348 (method url-fetch)
7349 (uri (string-append "http://download.r-forge.r-project.org/"
7350 "src/contrib/CENTIPEDE_" version ".tar.gz"))
7351 (sha256
7352 (base32
7353 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
7354 (build-system r-build-system)
7355 (home-page "http://centipede.uchicago.edu/")
7356 (synopsis "Predict transcription factor binding sites")
7357 (description
7358 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
7359 of the genome that are bound by particular transcription factors. It starts
7360 by identifying a set of candidate binding sites, and then aims to classify the
7361 sites according to whether each site is bound or not bound by a transcription
7362 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
7363 between two different types of motif instances using as much relevant
7364 information as possible.")
7365 (license (list license:gpl2+ license:gpl3+))))
7366
7367 (define-public r-demultiplex
7368 (let ((commit "6e2a1422c8e6f418cfb271997eebc91f9195f299")
7369 (revision "1"))
7370 (package
7371 (name "r-demultiplex")
7372 (version (git-version "1.0.2" revision commit))
7373 (source
7374 (origin
7375 (method git-fetch)
7376 (uri (git-reference
7377 (url "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
7378 (commit commit)))
7379 (file-name (git-file-name name version))
7380 (sha256
7381 (base32
7382 "01kv88wp8vdaq07sjk0d3d1cb553mq1xqg0war81pgmg63bgi38w"))))
7383 (properties `((upstream-name . "deMULTIplex")))
7384 (build-system r-build-system)
7385 (propagated-inputs
7386 `(("r-kernsmooth" ,r-kernsmooth)
7387 ("r-reshape2" ,r-reshape2)
7388 ("r-rtsne" ,r-rtsne)
7389 ("r-shortread" ,r-shortread)
7390 ("r-stringdist" ,r-stringdist)))
7391 (home-page "https://github.com/chris-mcginnis-ucsf/MULTI-seq")
7392 (synopsis "MULTI-seq pre-processing and classification tools")
7393 (description
7394 "deMULTIplex is an R package for analyzing single-cell RNA sequencing
7395 data generated with the MULTI-seq sample multiplexing method. The package
7396 includes software to
7397
7398 @enumerate
7399 @item Convert raw MULTI-seq sample barcode library FASTQs into a sample
7400 barcode UMI count matrix, and
7401 @item Classify cell barcodes into sample barcode groups.
7402 @end enumerate
7403 ")
7404 (license license:cc0))))
7405
7406 (define-public vsearch
7407 (package
7408 (name "vsearch")
7409 (version "2.9.1")
7410 (source
7411 (origin
7412 (method git-fetch)
7413 (uri (git-reference
7414 (url "https://github.com/torognes/vsearch")
7415 (commit (string-append "v" version))))
7416 (file-name (git-file-name name version))
7417 (sha256
7418 (base32
7419 "0vhrpjfdf75ba04b24xknp41790cvcgwl0vgpy7qbzj5xh2521ss"))
7420 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
7421 (snippet
7422 '(begin
7423 ;; Remove bundled cityhash sources. The vsearch source is adjusted
7424 ;; for this in the patch.
7425 (delete-file "src/city.h")
7426 (delete-file "src/citycrc.h")
7427 (delete-file "src/city.cc")
7428 #t))))
7429 (build-system gnu-build-system)
7430 (inputs
7431 `(("zlib" ,zlib)
7432 ("bzip2" ,bzip2)
7433 ("cityhash" ,cityhash)))
7434 (native-inputs
7435 `(("autoconf" ,autoconf)
7436 ("automake" ,automake)))
7437 (synopsis "Sequence search tools for metagenomics")
7438 (description
7439 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
7440 dereplication, pairwise alignment, shuffling, subsampling, sorting and
7441 masking. The tool takes advantage of parallelism in the form of SIMD
7442 vectorization as well as multiple threads to perform accurate alignments at
7443 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
7444 Needleman-Wunsch).")
7445 (home-page "https://github.com/torognes/vsearch")
7446 ;; vsearch uses non-portable SSE intrinsics so building fails on other
7447 ;; platforms.
7448 (supported-systems '("x86_64-linux"))
7449 ;; Dual licensed; also includes public domain source.
7450 (license (list license:gpl3 license:bsd-2))))
7451
7452 (define-public pardre
7453 (package
7454 (name "pardre")
7455 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
7456 (version "1.1.5-1")
7457 (source
7458 (origin
7459 (method url-fetch)
7460 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
7461 "1.1.5" ".tar.gz"))
7462 (sha256
7463 (base32
7464 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
7465 (build-system gnu-build-system)
7466 (arguments
7467 `(#:tests? #f ; no tests included
7468 #:phases
7469 (modify-phases %standard-phases
7470 (delete 'configure)
7471 (replace 'install
7472 (lambda* (#:key outputs #:allow-other-keys)
7473 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
7474 (install-file "ParDRe" bin)
7475 #t))))))
7476 (inputs
7477 `(("openmpi" ,openmpi)
7478 ("zlib" ,zlib)))
7479 (synopsis "Parallel tool to remove duplicate DNA reads")
7480 (description
7481 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
7482 Duplicate reads can be seen as identical or nearly identical sequences with
7483 some mismatches. This tool lets users avoid the analysis of unnecessary
7484 reads, reducing the time of subsequent procedures with the
7485 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
7486 in order to exploit the parallel capabilities of multicore clusters. It is
7487 faster than multithreaded counterparts (end of 2015) for the same number of
7488 cores and, thanks to the message-passing technology, it can be executed on
7489 clusters.")
7490 (home-page "https://sourceforge.net/projects/pardre/")
7491 (license license:gpl3+)))
7492
7493 (define-public ngshmmalign
7494 (package
7495 (name "ngshmmalign")
7496 (version "0.1.1")
7497 (source
7498 (origin
7499 (method url-fetch)
7500 (uri (string-append "https://github.com/cbg-ethz/ngshmmalign/"
7501 "releases/download/" version
7502 "/ngshmmalign-" version ".tar.bz2"))
7503 (sha256
7504 (base32
7505 "0jryvlssi2r2ii1dxnx39yk6bh4yqgq010fnxrgfgbaj3ykijlzv"))))
7506 (build-system cmake-build-system)
7507 (arguments '(#:tests? #false)) ; there are none
7508 (inputs
7509 `(("boost" ,boost)))
7510 (home-page "https://github.com/cbg-ethz/ngshmmalign/")
7511 (synopsis "Profile HMM aligner for NGS reads")
7512 (description
7513 "ngshmmalign is a profile HMM aligner for NGS reads designed particularly
7514 for small genomes (such as those of RNA viruses like HIV-1 and HCV) that
7515 experience substantial biological insertions and deletions.")
7516 (license license:gpl2+)))
7517
7518 (define-public prinseq
7519 (package
7520 (name "prinseq")
7521 (version "0.20.4")
7522 (source
7523 (origin
7524 (method url-fetch)
7525 (uri (string-append "mirror://sourceforge/prinseq/standalone/"
7526 "prinseq-lite-" version ".tar.gz"))
7527 (sha256
7528 (base32
7529 "0vxmzvmm67whxrqdaaamwgjk7cf0fzfs5s673jgg00kz7g70splv"))))
7530 (build-system gnu-build-system)
7531 (arguments
7532 `(#:tests? #false ; no check target
7533 #:phases
7534 (modify-phases %standard-phases
7535 (delete 'configure)
7536 (delete 'build)
7537 (replace 'install
7538 (lambda* (#:key outputs #:allow-other-keys)
7539 (let* ((out (assoc-ref outputs "out"))
7540 (bin (string-append out "/bin")))
7541 (for-each (lambda (file)
7542 (chmod file #o555)
7543 (install-file file bin)
7544 (wrap-script (string-append bin "/" (basename file))
7545 `("PERL5LIB" ":" prefix
7546 (,(getenv "PERL5LIB")))))
7547 (find-files "." "prinseq.*.pl"))))))))
7548 (inputs
7549 `(("guile" ,guile-3.0) ; for wrapper scripts
7550 ("perl" ,perl)
7551 ("perl-cairo" ,perl-cairo)
7552 ("perl-data-dumper" ,perl-data-dumper)
7553 ("perl-digest-md5" ,perl-digest-md5)
7554 ("perl-getopt-long" ,perl-getopt-long)
7555 ("perl-json" ,perl-json)
7556 ("perl-statistics-pca" ,perl-statistics-pca)))
7557 (home-page "http://prinseq.sourceforge.net/")
7558 (synopsis "Preprocess sequence data in FASTA or FASTQ formats")
7559 (description
7560 "PRINSEQ is a bioinformatics tool to help you preprocess your genomic or
7561 metagenomic sequence data in FASTA or FASTQ formats. The tool is written in
7562 Perl and can be helpful if you want to filter, reformat, or trim your sequence
7563 data. It also generates basic statistics for your sequences.")
7564 (license license:gpl3+)))
7565
7566 (define-public ruby-bio-kseq
7567 (package
7568 (name "ruby-bio-kseq")
7569 (version "0.0.2")
7570 (source
7571 (origin
7572 (method url-fetch)
7573 (uri (rubygems-uri "bio-kseq" version))
7574 (sha256
7575 (base32
7576 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
7577 (build-system ruby-build-system)
7578 (arguments
7579 `(#:test-target "spec"))
7580 (native-inputs
7581 `(("bundler" ,bundler)
7582 ("ruby-rspec" ,ruby-rspec)
7583 ("ruby-rake-compiler" ,ruby-rake-compiler)))
7584 (inputs
7585 `(("zlib" ,zlib)))
7586 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
7587 (description
7588 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
7589 FASTQ parsing code. It provides a fast iterator over sequences and their
7590 quality scores.")
7591 (home-page "https://github.com/gusevfe/bio-kseq")
7592 (license license:expat)))
7593
7594 (define-public bio-locus
7595 (package
7596 (name "bio-locus")
7597 (version "0.0.7")
7598 (source
7599 (origin
7600 (method url-fetch)
7601 (uri (rubygems-uri "bio-locus" version))
7602 (sha256
7603 (base32
7604 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
7605 (build-system ruby-build-system)
7606 (native-inputs
7607 `(("ruby-rspec" ,ruby-rspec)))
7608 (synopsis "Tool for fast querying of genome locations")
7609 (description
7610 "Bio-locus is a tabix-like tool for fast querying of genome
7611 locations. Many file formats in bioinformatics contain records that
7612 start with a chromosome name and a position for a SNP, or a start-end
7613 position for indels. Bio-locus allows users to store this chr+pos or
7614 chr+pos+alt information in a database.")
7615 (home-page "https://github.com/pjotrp/bio-locus")
7616 (license license:expat)))
7617
7618 (define-public bio-blastxmlparser
7619 (package
7620 (name "bio-blastxmlparser")
7621 (version "2.0.4")
7622 (source (origin
7623 (method url-fetch)
7624 (uri (rubygems-uri "bio-blastxmlparser" version))
7625 (sha256
7626 (base32
7627 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
7628 (build-system ruby-build-system)
7629 (propagated-inputs
7630 `(("ruby-bio-logger" ,ruby-bio-logger)
7631 ("ruby-nokogiri" ,ruby-nokogiri)))
7632 (inputs
7633 `(("ruby-rspec" ,ruby-rspec)))
7634 (synopsis "Fast big data BLAST XML parser and library")
7635 (description
7636 "Very fast parallel big-data BLAST XML file parser which can be used as
7637 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
7638 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
7639 (home-page "https://github.com/pjotrp/blastxmlparser")
7640 (license license:expat)))
7641
7642 (define-public bioruby
7643 (package
7644 (name "bioruby")
7645 (version "1.5.2")
7646 (source
7647 (origin
7648 (method url-fetch)
7649 (uri (rubygems-uri "bio" version))
7650 (sha256
7651 (base32
7652 "1d56amdsjv1mag7m6gv2w0xij8hqx1v5xbdjsix8sp3yp36m7938"))))
7653 (build-system ruby-build-system)
7654 (propagated-inputs
7655 `(("ruby-libxml" ,ruby-libxml)))
7656 (native-inputs
7657 `(("which" ,which))) ; required for test phase
7658 (arguments
7659 `(#:phases
7660 (modify-phases %standard-phases
7661 (add-before 'build 'patch-test-command
7662 (lambda _
7663 (substitute* '("test/functional/bio/test_command.rb")
7664 (("/bin/sh") (which "sh")))
7665 (substitute* '("test/functional/bio/test_command.rb")
7666 (("/bin/ls") (which "ls")))
7667 (substitute* '("test/functional/bio/test_command.rb")
7668 (("which") (which "which")))
7669 (substitute* '("test/functional/bio/test_command.rb",
7670 "test/data/command/echoarg2.sh")
7671 (("/bin/echo") (which "echo")))
7672 #t)))))
7673 (synopsis "Ruby library, shell and utilities for bioinformatics")
7674 (description "BioRuby comes with a comprehensive set of Ruby development
7675 tools and libraries for bioinformatics and molecular biology. BioRuby has
7676 components for sequence analysis, pathway analysis, protein modelling and
7677 phylogenetic analysis; it supports many widely used data formats and provides
7678 easy access to databases, external programs and public web services, including
7679 BLAST, KEGG, GenBank, MEDLINE and GO.")
7680 (home-page "http://bioruby.org/")
7681 ;; Code is released under Ruby license, except for setup
7682 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
7683 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
7684
7685 (define-public bio-vcf
7686 (package
7687 (name "bio-vcf")
7688 (version "0.9.5")
7689 (source
7690 (origin
7691 (method url-fetch)
7692 (uri (rubygems-uri "bio-vcf" version))
7693 (sha256
7694 (base32
7695 "1glw5pn9s8z13spxk6yyfqaz80n9lga67f33w35nkpq9dwi2vg6g"))))
7696 (build-system ruby-build-system)
7697 (native-inputs
7698 `(("ruby-cucumber" ,ruby-cucumber)))
7699 (synopsis "Smart VCF parser DSL")
7700 (description
7701 "Bio-vcf provides a @acronym{DSL, domain specific language} for processing
7702 the VCF format. Record named fields can be queried with regular expressions.
7703 Bio-vcf is a new generation VCF parser, filter and converter. Bio-vcf is not
7704 only very fast for genome-wide (WGS) data, it also comes with a filtering,
7705 evaluation and rewrite language and can output any type of textual data,
7706 including VCF header and contents in RDF and JSON.")
7707 (home-page "https://github.com/vcflib/bio-vcf")
7708 (license license:expat)))
7709
7710 (define-public r-summarizedexperiment
7711 (package
7712 (name "r-summarizedexperiment")
7713 (version "1.20.0")
7714 (source (origin
7715 (method url-fetch)
7716 (uri (bioconductor-uri "SummarizedExperiment" version))
7717 (sha256
7718 (base32
7719 "04x6d4mcsnvz6glkmf6k2cv3fs8zk03i9rvv0ahpl793n8l411ps"))))
7720 (properties
7721 `((upstream-name . "SummarizedExperiment")))
7722 (build-system r-build-system)
7723 (propagated-inputs
7724 `(("r-biobase" ,r-biobase)
7725 ("r-biocgenerics" ,r-biocgenerics)
7726 ("r-delayedarray" ,r-delayedarray)
7727 ("r-genomeinfodb" ,r-genomeinfodb)
7728 ("r-genomicranges" ,r-genomicranges)
7729 ("r-iranges" ,r-iranges)
7730 ("r-matrix" ,r-matrix)
7731 ("r-matrixgenerics" ,r-matrixgenerics)
7732 ("r-s4vectors" ,r-s4vectors)))
7733 (native-inputs
7734 `(("r-knitr" ,r-knitr)))
7735 (home-page "https://bioconductor.org/packages/SummarizedExperiment")
7736 (synopsis "Container for representing genomic ranges by sample")
7737 (description
7738 "The SummarizedExperiment container contains one or more assays, each
7739 represented by a matrix-like object of numeric or other mode. The rows
7740 typically represent genomic ranges of interest and the columns represent
7741 samples.")
7742 (license license:artistic2.0)))
7743
7744 (define-public r-genomicalignments
7745 (package
7746 (name "r-genomicalignments")
7747 (version "1.26.0")
7748 (source (origin
7749 (method url-fetch)
7750 (uri (bioconductor-uri "GenomicAlignments" version))
7751 (sha256
7752 (base32
7753 "1q95px6s6snsax4ax955zzpdlrwp5liwf70wqq0lrk9mp6lq0hbr"))))
7754 (properties
7755 `((upstream-name . "GenomicAlignments")))
7756 (build-system r-build-system)
7757 (propagated-inputs
7758 `(("r-biocgenerics" ,r-biocgenerics)
7759 ("r-biocparallel" ,r-biocparallel)
7760 ("r-biostrings" ,r-biostrings)
7761 ("r-genomeinfodb" ,r-genomeinfodb)
7762 ("r-genomicranges" ,r-genomicranges)
7763 ("r-iranges" ,r-iranges)
7764 ("r-rsamtools" ,r-rsamtools)
7765 ("r-s4vectors" ,r-s4vectors)
7766 ("r-summarizedexperiment" ,r-summarizedexperiment)))
7767 (home-page "https://bioconductor.org/packages/GenomicAlignments")
7768 (synopsis "Representation and manipulation of short genomic alignments")
7769 (description
7770 "This package provides efficient containers for storing and manipulating
7771 short genomic alignments (typically obtained by aligning short reads to a
7772 reference genome). This includes read counting, computing the coverage,
7773 junction detection, and working with the nucleotide content of the
7774 alignments.")
7775 (license license:artistic2.0)))
7776
7777 (define-public r-rtracklayer
7778 (package
7779 (name "r-rtracklayer")
7780 (version "1.50.0")
7781 (source (origin
7782 (method url-fetch)
7783 (uri (bioconductor-uri "rtracklayer" version))
7784 (sha256
7785 (base32
7786 "12zimhpdzjyzd81wrzz5hdbzvlgzcs22x1nnaf2jq4cba3ch5px8"))))
7787 (build-system r-build-system)
7788 (arguments
7789 `(#:phases
7790 (modify-phases %standard-phases
7791 (add-after 'unpack 'use-system-zlib
7792 (lambda _
7793 (substitute* "DESCRIPTION"
7794 ((" zlibbioc,") ""))
7795 (substitute* "NAMESPACE"
7796 (("import\\(zlibbioc\\)") ""))
7797 #t)))))
7798 (native-inputs
7799 `(("pkg-config" ,pkg-config)))
7800 (inputs
7801 `(("zlib" ,zlib)))
7802 (propagated-inputs
7803 `(("r-biocgenerics" ,r-biocgenerics)
7804 ("r-biostrings" ,r-biostrings)
7805 ("r-genomeinfodb" ,r-genomeinfodb)
7806 ("r-genomicalignments" ,r-genomicalignments)
7807 ("r-genomicranges" ,r-genomicranges)
7808 ("r-iranges" ,r-iranges)
7809 ("r-rcurl" ,r-rcurl)
7810 ("r-rsamtools" ,r-rsamtools)
7811 ("r-s4vectors" ,r-s4vectors)
7812 ("r-xml" ,r-xml)
7813 ("r-xvector" ,r-xvector)
7814 ("r-zlibbioc" ,r-zlibbioc)))
7815 (home-page "https://bioconductor.org/packages/rtracklayer")
7816 (synopsis "R interface to genome browsers and their annotation tracks")
7817 (description
7818 "rtracklayer is an extensible framework for interacting with multiple
7819 genome browsers (currently UCSC built-in) and manipulating annotation tracks
7820 in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
7821 built-in). The user may export/import tracks to/from the supported browsers,
7822 as well as query and modify the browser state, such as the current viewport.")
7823 (license license:artistic2.0)))
7824
7825 (define-public r-genomicfeatures
7826 (package
7827 (name "r-genomicfeatures")
7828 (version "1.42.3")
7829 (source (origin
7830 (method url-fetch)
7831 (uri (bioconductor-uri "GenomicFeatures" version))
7832 (sha256
7833 (base32
7834 "168cf261vmcqffbzassavkjyz9a2af0l6zbv9cagkx6b1qrk3siz"))))
7835 (properties
7836 `((upstream-name . "GenomicFeatures")))
7837 (build-system r-build-system)
7838 (propagated-inputs
7839 `(("r-annotationdbi" ,r-annotationdbi)
7840 ("r-biobase" ,r-biobase)
7841 ("r-biocgenerics" ,r-biocgenerics)
7842 ("r-biomart" ,r-biomart)
7843 ("r-biostrings" ,r-biostrings)
7844 ("r-dbi" ,r-dbi)
7845 ("r-genomeinfodb" ,r-genomeinfodb)
7846 ("r-genomicranges" ,r-genomicranges)
7847 ("r-iranges" ,r-iranges)
7848 ("r-rcurl" ,r-rcurl)
7849 ("r-rsqlite" ,r-rsqlite)
7850 ("r-rtracklayer" ,r-rtracklayer)
7851 ("r-s4vectors" ,r-s4vectors)
7852 ("r-xvector" ,r-xvector)))
7853 (native-inputs
7854 `(("r-knitr" ,r-knitr)))
7855 (home-page "https://bioconductor.org/packages/GenomicFeatures")
7856 (synopsis "Tools for working with transcript centric annotations")
7857 (description
7858 "This package provides a set of tools and methods for making and
7859 manipulating transcript centric annotations. With these tools the user can
7860 easily download the genomic locations of the transcripts, exons and cds of a
7861 given organism, from either the UCSC Genome Browser or a BioMart
7862 database (more sources will be supported in the future). This information is
7863 then stored in a local database that keeps track of the relationship between
7864 transcripts, exons, cds and genes. Flexible methods are provided for
7865 extracting the desired features in a convenient format.")
7866 (license license:artistic2.0)))
7867
7868 (define-public r-go-db
7869 (package
7870 (name "r-go-db")
7871 (version "3.7.0")
7872 (source (origin
7873 (method url-fetch)
7874 (uri (string-append "https://www.bioconductor.org/packages/"
7875 "release/data/annotation/src/contrib/GO.db_"
7876 version ".tar.gz"))
7877 (sha256
7878 (base32
7879 "0i3wcf5h3n0dawzc1hy0kv74f06j80c47n4p3g3fmrcxlhi3jpa5"))))
7880 (properties
7881 `((upstream-name . "GO.db")))
7882 (build-system r-build-system)
7883 (propagated-inputs
7884 `(("r-annotationdbi" ,r-annotationdbi)))
7885 (home-page "https://bioconductor.org/packages/GO.db")
7886 (synopsis "Annotation maps describing the entire Gene Ontology")
7887 (description
7888 "The purpose of this GO.db annotation package is to provide detailed
7889 information about the latest version of the Gene Ontologies.")
7890 (license license:artistic2.0)))
7891
7892 (define-public r-topgo
7893 (package
7894 (name "r-topgo")
7895 (version "2.42.0")
7896 (source (origin
7897 (method url-fetch)
7898 (uri (bioconductor-uri "topGO" version))
7899 (sha256
7900 (base32
7901 "0vr3l9gvd3dhy446k3fkj6rm7z1abxi56rbnrs64297yzxaz1ngl"))))
7902 (properties
7903 `((upstream-name . "topGO")))
7904 (build-system r-build-system)
7905 (propagated-inputs
7906 `(("r-annotationdbi" ,r-annotationdbi)
7907 ("r-dbi" ,r-dbi)
7908 ("r-biobase" ,r-biobase)
7909 ("r-biocgenerics" ,r-biocgenerics)
7910 ("r-go-db" ,r-go-db)
7911 ("r-graph" ,r-graph)
7912 ("r-lattice" ,r-lattice)
7913 ("r-matrixstats" ,r-matrixstats)
7914 ("r-sparsem" ,r-sparsem)))
7915 (home-page "https://bioconductor.org/packages/topGO")
7916 (synopsis "Enrichment analysis for gene ontology")
7917 (description
7918 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
7919 terms while accounting for the topology of the GO graph. Different test
7920 statistics and different methods for eliminating local similarities and
7921 dependencies between GO terms can be implemented and applied.")
7922 ;; Any version of the LGPL applies.
7923 (license license:lgpl2.1+)))
7924
7925 (define-public r-bsgenome
7926 (package
7927 (name "r-bsgenome")
7928 (version "1.58.0")
7929 (source (origin
7930 (method url-fetch)
7931 (uri (bioconductor-uri "BSgenome" version))
7932 (sha256
7933 (base32
7934 "1gbvmxr6r57smgvhqgwspbcnwyk4hsfkxkpzzcs6470q03zfb4wq"))))
7935 (properties
7936 `((upstream-name . "BSgenome")))
7937 (build-system r-build-system)
7938 (propagated-inputs
7939 `(("r-biocgenerics" ,r-biocgenerics)
7940 ("r-biostrings" ,r-biostrings)
7941 ("r-genomeinfodb" ,r-genomeinfodb)
7942 ("r-genomicranges" ,r-genomicranges)
7943 ("r-iranges" ,r-iranges)
7944 ("r-matrixstats" ,r-matrixstats)
7945 ("r-rsamtools" ,r-rsamtools)
7946 ("r-rtracklayer" ,r-rtracklayer)
7947 ("r-s4vectors" ,r-s4vectors)
7948 ("r-xvector" ,r-xvector)))
7949 (home-page "https://bioconductor.org/packages/BSgenome")
7950 (synopsis "Infrastructure for Biostrings-based genome data packages")
7951 (description
7952 "This package provides infrastructure shared by all Biostrings-based
7953 genome data packages and support for efficient SNP representation.")
7954 (license license:artistic2.0)))
7955
7956 (define-public r-impute
7957 (package
7958 (name "r-impute")
7959 (version "1.64.0")
7960 (source (origin
7961 (method url-fetch)
7962 (uri (bioconductor-uri "impute" version))
7963 (sha256
7964 (base32
7965 "1pnjasw9i19nmxwjzrd9jbln31yc5jilfvwk414ya5zbqfsazvxa"))))
7966 (native-inputs
7967 `(("gfortran" ,gfortran)))
7968 (build-system r-build-system)
7969 (home-page "https://bioconductor.org/packages/impute")
7970 (synopsis "Imputation for microarray data")
7971 (description
7972 "This package provides a function to impute missing gene expression
7973 microarray data, using nearest neighbor averaging.")
7974 (license license:gpl2+)))
7975
7976 (define-public r-seqpattern
7977 (package
7978 (name "r-seqpattern")
7979 (version "1.22.0")
7980 (source (origin
7981 (method url-fetch)
7982 (uri (bioconductor-uri "seqPattern" version))
7983 (sha256
7984 (base32
7985 "0j68n6fwycxjpl2va5fw7ajb123n758s2pq997d76dysxghmrlzq"))))
7986 (properties
7987 `((upstream-name . "seqPattern")))
7988 (build-system r-build-system)
7989 (propagated-inputs
7990 `(("r-biostrings" ,r-biostrings)
7991 ("r-genomicranges" ,r-genomicranges)
7992 ("r-iranges" ,r-iranges)
7993 ("r-kernsmooth" ,r-kernsmooth)
7994 ("r-plotrix" ,r-plotrix)))
7995 (home-page "https://bioconductor.org/packages/seqPattern")
7996 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
7997 (description
7998 "This package provides tools to visualize oligonucleotide patterns and
7999 sequence motif occurrences across a large set of sequences centred at a common
8000 reference point and sorted by a user defined feature.")
8001 (license license:gpl3+)))
8002
8003 (define-public r-genomation
8004 (package
8005 (name "r-genomation")
8006 (version "1.22.0")
8007 (source (origin
8008 (method url-fetch)
8009 (uri (bioconductor-uri "genomation" version))
8010 (sha256
8011 (base32
8012 "1ana06irlpdgnmk8mb329nws9sm8n6max4qargf1xdcdf3rnk45g"))))
8013 (build-system r-build-system)
8014 (propagated-inputs
8015 `(("r-biostrings" ,r-biostrings)
8016 ("r-bsgenome" ,r-bsgenome)
8017 ("r-data-table" ,r-data-table)
8018 ("r-genomeinfodb" ,r-genomeinfodb)
8019 ("r-genomicalignments" ,r-genomicalignments)
8020 ("r-genomicranges" ,r-genomicranges)
8021 ("r-ggplot2" ,r-ggplot2)
8022 ("r-gridbase" ,r-gridbase)
8023 ("r-impute" ,r-impute)
8024 ("r-iranges" ,r-iranges)
8025 ("r-matrixstats" ,r-matrixstats)
8026 ("r-plotrix" ,r-plotrix)
8027 ("r-plyr" ,r-plyr)
8028 ("r-rcpp" ,r-rcpp)
8029 ("r-readr" ,r-readr)
8030 ("r-reshape2" ,r-reshape2)
8031 ("r-rsamtools" ,r-rsamtools)
8032 ("r-rtracklayer" ,r-rtracklayer)
8033 ("r-runit" ,r-runit)
8034 ("r-s4vectors" ,r-s4vectors)
8035 ("r-seqpattern" ,r-seqpattern)))
8036 (native-inputs
8037 `(("r-knitr" ,r-knitr)))
8038 (home-page "https://bioinformatics.mdc-berlin.de/genomation/")
8039 (synopsis "Summary, annotation and visualization of genomic data")
8040 (description
8041 "This package provides a package for summary and annotation of genomic
8042 intervals. Users can visualize and quantify genomic intervals over
8043 pre-defined functional regions, such as promoters, exons, introns, etc. The
8044 genomic intervals represent regions with a defined chromosome position, which
8045 may be associated with a score, such as aligned reads from HT-seq experiments,
8046 TF binding sites, methylation scores, etc. The package can use any tabular
8047 genomic feature data as long as it has minimal information on the locations of
8048 genomic intervals. In addition, it can use BAM or BigWig files as input.")
8049 (license license:artistic2.0)))
8050
8051 (define-public r-genomationdata
8052 (package
8053 (name "r-genomationdata")
8054 (version "1.22.0")
8055 (source (origin
8056 (method url-fetch)
8057 ;; We cannot use bioconductor-uri here because this tarball is
8058 ;; located under "data/annotation/" instead of "bioc/".
8059 (uri (string-append "https://bioconductor.org/packages/"
8060 "release/data/experiment/src/contrib/"
8061 "genomationData_" version ".tar.gz"))
8062 (sha256
8063 (base32
8064 "0igjsvfnws3498j65ifniw0kbxfqpfr59rcjddqvq4zsj453fx1g"))))
8065 (build-system r-build-system)
8066 ;; As this package provides little more than large data files, it doesn't
8067 ;; make sense to build substitutes.
8068 (arguments `(#:substitutable? #f))
8069 (native-inputs
8070 `(("r-knitr" ,r-knitr)))
8071 (home-page "https://bioinformatics.mdc-berlin.de/genomation/")
8072 (synopsis "Experimental data for use with the genomation package")
8073 (description
8074 "This package contains experimental genetic data for use with the
8075 genomation package. Included are Chip Seq, Methylation and Cage data,
8076 downloaded from Encode.")
8077 (license license:gpl3+)))
8078
8079 (define-public r-seqlogo
8080 (package
8081 (name "r-seqlogo")
8082 (version "1.56.0")
8083 (source
8084 (origin
8085 (method url-fetch)
8086 (uri (bioconductor-uri "seqLogo" version))
8087 (sha256
8088 (base32
8089 "02rpzjjfg5chlwwfbvv72cm78cg2vfmdwzars0cin9hz1hd7rnq1"))))
8090 (properties `((upstream-name . "seqLogo")))
8091 (build-system r-build-system)
8092 (native-inputs
8093 `(("r-knitr" ,r-knitr)))
8094 (home-page "https://bioconductor.org/packages/seqLogo")
8095 (synopsis "Sequence logos for DNA sequence alignments")
8096 (description
8097 "seqLogo takes the position weight matrix of a DNA sequence motif and
8098 plots the corresponding sequence logo as introduced by Schneider and
8099 Stephens (1990).")
8100 (license license:lgpl2.0+)))
8101
8102 (define-public r-motifrg
8103 (package
8104 (name "r-motifrg")
8105 (version "1.31.0")
8106 (source
8107 (origin
8108 (method url-fetch)
8109 (uri (bioconductor-uri "motifRG" version))
8110 (sha256
8111 (base32
8112 "1ml6zyzlk8yjbnfhga2qnw8nl43rankvka0kc1yljxr2b66aqbhn"))))
8113 (properties `((upstream-name . "motifRG")))
8114 (build-system r-build-system)
8115 (propagated-inputs
8116 `(("r-biostrings" ,r-biostrings)
8117 ("r-bsgenome" ,r-bsgenome)
8118 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8119 ("r-iranges" ,r-iranges)
8120 ("r-seqlogo" ,r-seqlogo)
8121 ("r-xvector" ,r-xvector)))
8122 (home-page "https://bioconductor.org/packages/motifRG")
8123 (synopsis "Discover motifs in high throughput sequencing data")
8124 (description
8125 "This package provides tools for discriminative motif discovery in high
8126 throughput genetic sequencing data sets using regression methods.")
8127 (license license:artistic2.0)))
8128
8129 (define-public r-zlibbioc
8130 (package
8131 (name "r-zlibbioc")
8132 (version "1.36.0")
8133 (source (origin
8134 (method url-fetch)
8135 (uri (bioconductor-uri "zlibbioc" version))
8136 (sha256
8137 (base32
8138 "0m36ddss0znvm19dhnxcclxjhgjplw8ajk8v419h20ab8an6khxg"))))
8139 (properties
8140 `((upstream-name . "zlibbioc")))
8141 (build-system r-build-system)
8142 (home-page "https://bioconductor.org/packages/zlibbioc")
8143 (synopsis "Provider for zlib-1.2.5 to R packages")
8144 (description "This package uses the source code of zlib-1.2.5 to create
8145 libraries for systems that do not have these available via other means.")
8146 (license license:artistic2.0)))
8147
8148 (define-public r-r4rna
8149 (package
8150 (name "r-r4rna")
8151 (version "0.1.4")
8152 (source
8153 (origin
8154 (method url-fetch)
8155 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8156 version ".tar.gz"))
8157 (sha256
8158 (base32
8159 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8160 (build-system r-build-system)
8161 (propagated-inputs
8162 `(("r-optparse" ,r-optparse)
8163 ("r-rcolorbrewer" ,r-rcolorbrewer)))
8164 (home-page "https://www.e-rna.org/r-chie/index.cgi")
8165 (synopsis "Analysis framework for RNA secondary structure")
8166 (description
8167 "The R4RNA package aims to be a general framework for the analysis of RNA
8168 secondary structure and comparative analysis in R.")
8169 (license license:gpl3+)))
8170
8171 (define-public r-rhtslib
8172 (package
8173 (name "r-rhtslib")
8174 (version "1.22.0")
8175 (source
8176 (origin
8177 (method url-fetch)
8178 (uri (bioconductor-uri "Rhtslib" version))
8179 (sha256
8180 (base32
8181 "18wag2jnpda6078xjkpfdvar1gkb2myhw83gg03l39sabh35qya4"))))
8182 (properties `((upstream-name . "Rhtslib")))
8183 (build-system r-build-system)
8184 ;; Without this a temporary directory ends up in the Rhtslib.so binary,
8185 ;; which makes R abort the build.
8186 (arguments '(#:configure-flags '("--no-staged-install")))
8187 (propagated-inputs
8188 `(("curl" ,curl)
8189 ("zlib" ,zlib) ; packages using rhtslib need to link with zlib
8190 ("r-zlibbioc" ,r-zlibbioc)))
8191 (native-inputs
8192 `(("pkg-config" ,pkg-config)
8193 ("r-knitr" ,r-knitr)))
8194 (home-page "https://github.com/nhayden/Rhtslib")
8195 (synopsis "High-throughput sequencing library as an R package")
8196 (description
8197 "This package provides the HTSlib C library for high-throughput
8198 nucleotide sequence analysis. The package is primarily useful to developers
8199 of other R packages who wish to make use of HTSlib.")
8200 (license license:lgpl2.0+)))
8201
8202 (define-public r-bamsignals
8203 (package
8204 (name "r-bamsignals")
8205 (version "1.22.0")
8206 (source
8207 (origin
8208 (method url-fetch)
8209 (uri (bioconductor-uri "bamsignals" version))
8210 (sha256
8211 (base32
8212 "0p3r9z9z5sfkd0b951cgr751k4z0yviyn1jfw9d4fcnyld7g1jxv"))))
8213 (build-system r-build-system)
8214 (propagated-inputs
8215 `(("r-biocgenerics" ,r-biocgenerics)
8216 ("r-genomicranges" ,r-genomicranges)
8217 ("r-iranges" ,r-iranges)
8218 ("r-rcpp" ,r-rcpp)
8219 ("r-rhtslib" ,r-rhtslib)
8220 ("r-zlibbioc" ,r-zlibbioc)))
8221 (native-inputs
8222 `(("r-knitr" ,r-knitr)))
8223 (home-page "https://bioconductor.org/packages/bamsignals")
8224 (synopsis "Extract read count signals from bam files")
8225 (description
8226 "This package efficiently obtains count vectors from indexed bam
8227 files. It counts the number of nucleotide sequence reads in given genomic
8228 ranges and it computes reads profiles and coverage profiles. It also handles
8229 paired-end data.")
8230 (license license:gpl2+)))
8231
8232 (define-public r-rcas
8233 (package
8234 (name "r-rcas")
8235 (version "1.16.0")
8236 (source (origin
8237 (method url-fetch)
8238 (uri (bioconductor-uri "RCAS" version))
8239 (sha256
8240 (base32
8241 "0vdxml618vqvf8xyh0zxs307p9zby0cj9dqyiiz625ilyq1hkw2m"))))
8242 (properties `((upstream-name . "RCAS")))
8243 (build-system r-build-system)
8244 (propagated-inputs
8245 `(("r-biocgenerics" ,r-biocgenerics)
8246 ("r-biostrings" ,r-biostrings)
8247 ("r-bsgenome" ,r-bsgenome)
8248 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8249 ("r-cowplot" ,r-cowplot)
8250 ("r-data-table" ,r-data-table)
8251 ("r-dt" ,r-dt)
8252 ("r-genomation" ,r-genomation)
8253 ("r-genomeinfodb" ,r-genomeinfodb)
8254 ("r-genomicfeatures" ,r-genomicfeatures)
8255 ("r-genomicranges" ,r-genomicranges)
8256 ("r-ggplot2" ,r-ggplot2)
8257 ("r-ggseqlogo" ,r-ggseqlogo)
8258 ("r-gprofiler2" ,r-gprofiler2)
8259 ("r-iranges" ,r-iranges)
8260 ("r-pbapply" ,r-pbapply)
8261 ("r-pheatmap" ,r-pheatmap)
8262 ("r-plotly" ,r-plotly)
8263 ("r-plotrix" ,r-plotrix)
8264 ("r-proxy" ,r-proxy)
8265 ("r-ranger" ,r-ranger)
8266 ("r-rsqlite" ,r-rsqlite)
8267 ("r-rtracklayer" ,r-rtracklayer)
8268 ("r-rmarkdown" ,r-rmarkdown)
8269 ("r-s4vectors" ,r-s4vectors)
8270 ("pandoc" ,pandoc)))
8271 (native-inputs
8272 `(("r-knitr" ,r-knitr)))
8273 (synopsis "RNA-centric annotation system")
8274 (description
8275 "RCAS aims to be a standalone RNA-centric annotation system that provides
8276 intuitive reports and publication-ready graphics. This package provides the R
8277 library implementing most of the pipeline's features.")
8278 (home-page "https://github.com/BIMSBbioinfo/RCAS")
8279 (license license:artistic2.0)))
8280
8281 (define-public rcas-web
8282 (package
8283 (name "rcas-web")
8284 (version "0.1.0")
8285 (source
8286 (origin
8287 (method url-fetch)
8288 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8289 "releases/download/v" version
8290 "/rcas-web-" version ".tar.gz"))
8291 (sha256
8292 (base32
8293 "0wq951aj45gqki1bickg876i993lmawkp8x24agg264br5x716db"))))
8294 (build-system gnu-build-system)
8295 (arguments
8296 `(#:phases
8297 (modify-phases %standard-phases
8298 (add-before 'configure 'find-RCAS
8299 ;; The configure script can't find non-1.3.x versions of RCAS because
8300 ;; its R expression ‘1.10.1 >= 1.3.4’ evaluates to false.
8301 (lambda _
8302 (substitute* "configure"
8303 (("1\\.3\\.4") "0.0.0"))
8304 #t))
8305 (add-after 'install 'wrap-executable
8306 (lambda* (#:key inputs outputs #:allow-other-keys)
8307 (let* ((out (assoc-ref outputs "out"))
8308 (json (assoc-ref inputs "guile-json"))
8309 (redis (assoc-ref inputs "guile-redis"))
8310 (path (string-append
8311 json "/share/guile/site/2.2:"
8312 redis "/share/guile/site/2.2")))
8313 (wrap-program (string-append out "/bin/rcas-web")
8314 `("GUILE_LOAD_PATH" ":" = (,path))
8315 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8316 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8317 #t)))))
8318 (inputs
8319 `(("r-minimal" ,r-minimal)
8320 ("r-rcas" ,r-rcas)
8321 ("guile" ,guile-2.2)
8322 ("guile-json" ,guile-json-1)
8323 ("guile-redis" ,guile2.2-redis)))
8324 (native-inputs
8325 `(("pkg-config" ,pkg-config)))
8326 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8327 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8328 (description "This package provides a simple web interface for the
8329 @dfn{RNA-centric annotation system} (RCAS).")
8330 (license license:agpl3+)))
8331
8332 (define-public r-mutationalpatterns
8333 (package
8334 (name "r-mutationalpatterns")
8335 (version "3.0.1")
8336 (source
8337 (origin
8338 (method url-fetch)
8339 (uri (bioconductor-uri "MutationalPatterns" version))
8340 (sha256
8341 (base32
8342 "1988kjjgq8af0hj7chhpxi88717wwmzs9qgrwapjh0hm2hjwhn35"))))
8343 (build-system r-build-system)
8344 (native-inputs
8345 `(("r-knitr" ,r-knitr)))
8346 (propagated-inputs
8347 `(("r-biocgenerics" ,r-biocgenerics)
8348 ("r-biostrings" ,r-biostrings)
8349 ("r-bsgenome" ,r-bsgenome)
8350 ;; These two packages are suggested packages
8351 ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
8352 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8353 ("r-cowplot" ,r-cowplot)
8354 ("r-dplyr" ,r-dplyr)
8355 ("r-genomeinfodb" ,r-genomeinfodb)
8356 ("r-genomicranges" ,r-genomicranges)
8357 ("r-ggalluvial" ,r-ggalluvial)
8358 ("r-ggdendro" ,r-ggdendro)
8359 ("r-ggplot2" ,r-ggplot2)
8360 ("r-iranges" ,r-iranges)
8361 ("r-magrittr" ,r-magrittr)
8362 ("r-nmf" ,r-nmf)
8363 ("r-pracma" ,r-pracma)
8364 ("r-purrr" ,r-purrr)
8365 ("r-s4vectors" ,r-s4vectors)
8366 ("r-stringr" ,r-stringr)
8367 ("r-tibble" ,r-tibble)
8368 ("r-tidyr" ,r-tidyr)
8369 ("r-variantannotation" ,r-variantannotation)))
8370 (home-page "https://bioconductor.org/packages/MutationalPatterns/")
8371 (synopsis "Extract and visualize mutational patterns in genomic data")
8372 (description "This package provides an extensive toolset for the
8373 characterization and visualization of a wide range of mutational patterns
8374 in SNV base substitution data.")
8375 (license license:expat)))
8376
8377 (define-public r-chipkernels
8378 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8379 (revision "1"))
8380 (package
8381 (name "r-chipkernels")
8382 (version (string-append "1.1-" revision "." (string-take commit 9)))
8383 (source
8384 (origin
8385 (method git-fetch)
8386 (uri (git-reference
8387 (url "https://github.com/ManuSetty/ChIPKernels")
8388 (commit commit)))
8389 (file-name (string-append name "-" version))
8390 (sha256
8391 (base32
8392 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8393 (build-system r-build-system)
8394 (propagated-inputs
8395 `(("r-iranges" ,r-iranges)
8396 ("r-xvector" ,r-xvector)
8397 ("r-biostrings" ,r-biostrings)
8398 ("r-bsgenome" ,r-bsgenome)
8399 ("r-gtools" ,r-gtools)
8400 ("r-genomicranges" ,r-genomicranges)
8401 ("r-sfsmisc" ,r-sfsmisc)
8402 ("r-kernlab" ,r-kernlab)
8403 ("r-s4vectors" ,r-s4vectors)
8404 ("r-biocgenerics" ,r-biocgenerics)))
8405 (home-page "https://github.com/ManuSetty/ChIPKernels")
8406 (synopsis "Build string kernels for DNA Sequence analysis")
8407 (description "ChIPKernels is an R package for building different string
8408 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8409 must be built and this dictionary can be used for determining kernels for DNA
8410 Sequences.")
8411 (license license:gpl2+))))
8412
8413 (define-public r-seqgl
8414 (package
8415 (name "r-seqgl")
8416 (version "1.1.4")
8417 (source
8418 (origin
8419 (method git-fetch)
8420 (uri (git-reference
8421 (url "https://github.com/ManuSetty/SeqGL")
8422 (commit version)))
8423 (file-name (git-file-name name version))
8424 (sha256
8425 (base32
8426 "1r6ywvhxl3ffv48lgj7sbd582mcc6dha3ksgc2qjlvjrnkbj3799"))))
8427 (build-system r-build-system)
8428 (propagated-inputs
8429 `(("r-biostrings" ,r-biostrings)
8430 ("r-chipkernels" ,r-chipkernels)
8431 ("r-genomicranges" ,r-genomicranges)
8432 ("r-spams" ,r-spams)
8433 ("r-wgcna" ,r-wgcna)
8434 ("r-fastcluster" ,r-fastcluster)))
8435 (home-page "https://github.com/ManuSetty/SeqGL")
8436 (synopsis "Group lasso for Dnase/ChIP-seq data")
8437 (description "SeqGL is a group lasso based algorithm to extract
8438 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8439 This package presents a method which uses group lasso to discriminate between
8440 bound and non bound genomic regions to accurately identify transcription
8441 factors bound at the specific regions.")
8442 (license license:gpl2+)))
8443
8444 (define-public r-tximport
8445 (package
8446 (name "r-tximport")
8447 (version "1.18.0")
8448 (source (origin
8449 (method url-fetch)
8450 (uri (bioconductor-uri "tximport" version))
8451 (sha256
8452 (base32
8453 "1nxnlvl4iv2392xa72j0lzy2xnb3vrvyhfrdj9l54znwkrryyq34"))))
8454 (build-system r-build-system)
8455 (native-inputs
8456 `(("r-knitr" ,r-knitr)))
8457 (home-page "https://bioconductor.org/packages/tximport")
8458 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
8459 (description
8460 "This package provides tools to import transcript-level abundance,
8461 estimated counts and transcript lengths, and to summarize them into matrices
8462 for use with downstream gene-level analysis packages. Average transcript
8463 length, weighted by sample-specific transcript abundance estimates, is
8464 provided as a matrix which can be used as an offset for different expression
8465 of gene-level counts.")
8466 (license license:gpl2+)))
8467
8468 (define-public r-rhdf5filters
8469 (package
8470 (name "r-rhdf5filters")
8471 (version "1.2.0")
8472 (source
8473 (origin
8474 (method url-fetch)
8475 (uri (bioconductor-uri "rhdf5filters" version))
8476 (sha256
8477 (base32
8478 "1jvnss44liapbc6hk93yg1gknv0ahd5x86dydqiwq9l65jd03psq"))))
8479 (properties `((upstream-name . "rhdf5filters")))
8480 (build-system r-build-system)
8481 (propagated-inputs
8482 `(("r-rhdf5lib" ,r-rhdf5lib)))
8483 (inputs
8484 `(("zlib" ,zlib)))
8485 (native-inputs
8486 `(("r-knitr" ,r-knitr)))
8487 (home-page "https://github.com/grimbough/rhdf5filters")
8488 (synopsis "HDF5 compression filters")
8489 (description
8490 "This package provides a collection of compression filters for use with
8491 HDF5 datasets.")
8492 (license license:bsd-2)))
8493
8494 (define-public r-rhdf5
8495 (package
8496 (name "r-rhdf5")
8497 (version "2.34.0")
8498 (source (origin
8499 (method url-fetch)
8500 (uri (bioconductor-uri "rhdf5" version))
8501 (sha256
8502 (base32
8503 "0almr1vscrgj5g4dyrags131wia2pmdbdidlpskbgm44ha6hpmqi"))))
8504 (build-system r-build-system)
8505 (arguments
8506 `(#:phases
8507 (modify-phases %standard-phases
8508 (add-after 'unpack 'fix-linking
8509 (lambda _
8510 (substitute* "src/Makevars"
8511 ;; This is to avoid having a plain directory on the list of
8512 ;; libraries to link.
8513 (("\\(RHDF5_LIBS\\)" match)
8514 (string-append match "/libhdf5.a")))
8515 #t)))))
8516 (propagated-inputs
8517 `(("r-rhdf5filters" ,r-rhdf5filters)
8518 ("r-rhdf5lib" ,r-rhdf5lib)))
8519 (native-inputs
8520 `(("r-knitr" ,r-knitr)))
8521 (home-page "https://bioconductor.org/packages/rhdf5")
8522 (synopsis "HDF5 interface to R")
8523 (description
8524 "This R/Bioconductor package provides an interface between HDF5 and R.
8525 HDF5's main features are the ability to store and access very large and/or
8526 complex datasets and a wide variety of metadata on mass storage (disk) through
8527 a completely portable file format. The rhdf5 package is thus suited for the
8528 exchange of large and/or complex datasets between R and other software
8529 package, and for letting R applications work on datasets that are larger than
8530 the available RAM.")
8531 (license license:artistic2.0)))
8532
8533 (define-public r-annotationfilter
8534 (package
8535 (name "r-annotationfilter")
8536 (version "1.14.0")
8537 (source (origin
8538 (method url-fetch)
8539 (uri (bioconductor-uri "AnnotationFilter" version))
8540 (sha256
8541 (base32
8542 "0npk0laa2rc93rsh6yikj886zf2fl53a050j07fj9w67j0q0h3s9"))))
8543 (properties
8544 `((upstream-name . "AnnotationFilter")))
8545 (build-system r-build-system)
8546 (propagated-inputs
8547 `(("r-genomicranges" ,r-genomicranges)
8548 ("r-lazyeval" ,r-lazyeval)))
8549 (native-inputs
8550 `(("r-knitr" ,r-knitr)))
8551 (home-page "https://github.com/Bioconductor/AnnotationFilter")
8552 (synopsis "Facilities for filtering Bioconductor annotation resources")
8553 (description
8554 "This package provides classes and other infrastructure to implement
8555 filters for manipulating Bioconductor annotation resources. The filters are
8556 used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
8557 (license license:artistic2.0)))
8558
8559 (define-public emboss
8560 (package
8561 (name "emboss")
8562 (version "6.5.7")
8563 (source (origin
8564 (method url-fetch)
8565 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8566 (version-major+minor version) ".0/"
8567 "EMBOSS-" version ".tar.gz"))
8568 (sha256
8569 (base32
8570 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8571 (build-system gnu-build-system)
8572 (arguments
8573 `(#:configure-flags
8574 (list (string-append "--with-hpdf="
8575 (assoc-ref %build-inputs "libharu")))
8576 #:phases
8577 (modify-phases %standard-phases
8578 (add-after 'unpack 'fix-checks
8579 (lambda _
8580 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8581 ;; and zlib, but assume that they are all found at the same
8582 ;; prefix.
8583 (substitute* "configure.in"
8584 (("CHECK_PNGDRIVER")
8585 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8586 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8587 AM_CONDITIONAL(AMPNG, true)"))
8588 #t))
8589 (add-after 'fix-checks 'disable-update-check
8590 (lambda _
8591 ;; At build time there is no connection to the Internet, so
8592 ;; looking for updates will not work.
8593 (substitute* "Makefile.am"
8594 (("\\$\\(bindir\\)/embossupdate") ""))
8595 #t))
8596 (add-after 'disable-update-check 'autogen
8597 (lambda _ (invoke "autoreconf" "-vif") #t)))))
8598 (inputs
8599 `(("perl" ,perl)
8600 ("libpng" ,libpng)
8601 ("gd" ,gd)
8602 ("libx11" ,libx11)
8603 ("libharu" ,libharu)
8604 ("zlib" ,zlib)))
8605 (native-inputs
8606 `(("autoconf" ,autoconf)
8607 ("automake" ,automake)
8608 ("libtool" ,libtool)
8609 ("pkg-config" ,pkg-config)))
8610 (home-page "http://emboss.sourceforge.net")
8611 (synopsis "Molecular biology analysis suite")
8612 (description "EMBOSS is the \"European Molecular Biology Open Software
8613 Suite\". EMBOSS is an analysis package specially developed for the needs of
8614 the molecular biology (e.g. EMBnet) user community. The software
8615 automatically copes with data in a variety of formats and even allows
8616 transparent retrieval of sequence data from the web. It also provides a
8617 number of libraries for the development of software in the field of molecular
8618 biology. EMBOSS also integrates a range of currently available packages and
8619 tools for sequence analysis into a seamless whole.")
8620 (license license:gpl2+)))
8621
8622 (define-public bits
8623 (let ((revision "1")
8624 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
8625 (package
8626 (name "bits")
8627 ;; The version is 2.13.0 even though no release archives have been
8628 ;; published as yet.
8629 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
8630 (source (origin
8631 (method git-fetch)
8632 (uri (git-reference
8633 (url "https://github.com/arq5x/bits")
8634 (commit commit)))
8635 (file-name (string-append name "-" version "-checkout"))
8636 (sha256
8637 (base32
8638 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
8639 (build-system gnu-build-system)
8640 (arguments
8641 `(#:tests? #f ;no tests included
8642 #:phases
8643 (modify-phases %standard-phases
8644 (delete 'configure)
8645 (add-after 'unpack 'remove-cuda
8646 (lambda _
8647 (substitute* "Makefile"
8648 ((".*_cuda") "")
8649 (("(bits_test_intersections) \\\\" _ match) match))
8650 #t))
8651 (replace 'install
8652 (lambda* (#:key outputs #:allow-other-keys)
8653 (copy-recursively
8654 "bin" (string-append (assoc-ref outputs "out") "/bin"))
8655 #t)))))
8656 (inputs
8657 `(("gsl" ,gsl)
8658 ("zlib" ,zlib)))
8659 (home-page "https://github.com/arq5x/bits")
8660 (synopsis "Implementation of binary interval search algorithm")
8661 (description "This package provides an implementation of the
8662 BITS (Binary Interval Search) algorithm, an approach to interval set
8663 intersection. It is especially suited for the comparison of diverse genomic
8664 datasets and the exploration of large datasets of genome
8665 intervals (e.g. genes, sequence alignments).")
8666 (license license:gpl2))))
8667
8668 (define-public piranha
8669 ;; There is no release tarball for the latest version. The latest commit is
8670 ;; older than one year at the time of this writing.
8671 (let ((revision "1")
8672 (commit "0466d364b71117d01e4471b74c514436cc281233"))
8673 (package
8674 (name "piranha")
8675 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
8676 (source (origin
8677 (method git-fetch)
8678 (uri (git-reference
8679 (url "https://github.com/smithlabcode/piranha")
8680 (commit commit)))
8681 (file-name (git-file-name name version))
8682 (sha256
8683 (base32
8684 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
8685 (build-system gnu-build-system)
8686 (arguments
8687 `(#:test-target "test"
8688 #:phases
8689 (modify-phases %standard-phases
8690 (add-after 'unpack 'copy-smithlab-cpp
8691 (lambda* (#:key inputs #:allow-other-keys)
8692 (for-each (lambda (file)
8693 (install-file file "./src/smithlab_cpp/"))
8694 (find-files (assoc-ref inputs "smithlab-cpp")))
8695 #t))
8696 (add-after 'install 'install-to-store
8697 (lambda* (#:key outputs #:allow-other-keys)
8698 (let* ((out (assoc-ref outputs "out"))
8699 (bin (string-append out "/bin")))
8700 (for-each (lambda (file)
8701 (install-file file bin))
8702 (find-files "bin" ".*")))
8703 #t)))
8704 #:configure-flags
8705 (list (string-append "--with-bam_tools_headers="
8706 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
8707 (string-append "--with-bam_tools_library="
8708 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
8709 (inputs
8710 `(("bamtools" ,bamtools)
8711 ("samtools" ,samtools-0.1)
8712 ("gsl" ,gsl)
8713 ("smithlab-cpp"
8714 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
8715 (origin
8716 (method git-fetch)
8717 (uri (git-reference
8718 (url "https://github.com/smithlabcode/smithlab_cpp")
8719 (commit commit)))
8720 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
8721 (sha256
8722 (base32
8723 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
8724 (native-inputs
8725 `(("python" ,python-2)))
8726 (home-page "https://github.com/smithlabcode/piranha")
8727 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
8728 (description
8729 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
8730 RIP-seq experiments. It takes input in BED or BAM format and identifies
8731 regions of statistically significant read enrichment. Additional covariates
8732 may optionally be provided to further inform the peak-calling process.")
8733 (license license:gpl3+))))
8734
8735 (define-public pepr
8736 (package
8737 (name "pepr")
8738 (version "1.0.9")
8739 (source (origin
8740 (method url-fetch)
8741 (uri (pypi-uri "PePr" version))
8742 (sha256
8743 (base32
8744 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
8745 (build-system python-build-system)
8746 (arguments
8747 `(#:python ,python-2 ; python2 only
8748 #:tests? #f)) ; no tests included
8749 (propagated-inputs
8750 `(("python2-numpy" ,python2-numpy)
8751 ("python2-scipy" ,python2-scipy)
8752 ("python2-pysam" ,python2-pysam)))
8753 (home-page "https://github.com/shawnzhangyx/PePr")
8754 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
8755 (description
8756 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
8757 that is primarily designed for data with biological replicates. It uses a
8758 negative binomial distribution to model the read counts among the samples in
8759 the same group, and look for consistent differences between ChIP and control
8760 group or two ChIP groups run under different conditions.")
8761 (license license:gpl3+)))
8762
8763 (define-public filevercmp
8764 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
8765 (package
8766 (name "filevercmp")
8767 (version (string-append "0-1." (string-take commit 7)))
8768 (source (origin
8769 (method git-fetch)
8770 (uri (git-reference
8771 (url "https://github.com/ekg/filevercmp")
8772 (commit commit)))
8773 (file-name (git-file-name name commit))
8774 (sha256
8775 (base32
8776 "1j9vxsy0y050v59h0q1d6501fcw1kjvj0d18l1xk2zyg0jzj247c"))))
8777 (build-system gnu-build-system)
8778 (arguments
8779 `(#:tests? #f ; There are no tests to run.
8780 #:phases
8781 (modify-phases %standard-phases
8782 (delete 'configure) ; There is no configure phase.
8783 (replace 'install
8784 (lambda* (#:key outputs #:allow-other-keys)
8785 (let ((out (assoc-ref outputs "out")))
8786 (install-file "filevercmp" (string-append out "/bin"))
8787 (install-file "filevercmp.h" (string-append out "/include"))
8788 #t))))))
8789 (home-page "https://github.com/ekg/filevercmp")
8790 (synopsis "This program compares version strings")
8791 (description "This program compares version strings. It intends to be a
8792 replacement for strverscmp.")
8793 (license license:gpl3+))))
8794
8795 (define-public multiqc
8796 (package
8797 (name "multiqc")
8798 (version "1.5")
8799 (source
8800 (origin
8801 (method url-fetch)
8802 (uri (pypi-uri "multiqc" version))
8803 (sha256
8804 (base32
8805 "02iihfl0w0hpnr4pa0sbd1y9qxrg3ycyhjp5lidkcrqh1lmzs3zy"))))
8806 (build-system python-build-system)
8807 (propagated-inputs
8808 `(("python-jinja2" ,python-jinja2)
8809 ("python-simplejson" ,python-simplejson)
8810 ("python-pyyaml" ,python-pyyaml)
8811 ("python-click" ,python-click)
8812 ("python-spectra" ,python-spectra)
8813 ("python-requests" ,python-requests)
8814 ("python-markdown" ,python-markdown)
8815 ("python-lzstring" ,python-lzstring)
8816 ("python-matplotlib" ,python-matplotlib)
8817 ("python-numpy" ,python-numpy)
8818 ;; MultQC checks for the presence of nose at runtime.
8819 ("python-nose" ,python-nose)))
8820 (arguments
8821 `(#:phases
8822 (modify-phases %standard-phases
8823 (add-after 'unpack 'relax-requirements
8824 (lambda _
8825 (substitute* "setup.py"
8826 ;; MultiQC 1.5 ‘requires’ a version of python-matplotlib older
8827 ;; than the one in Guix, but should work fine with 2.2.2.
8828 ;; See <https://github.com/ewels/MultiQC/issues/725> and
8829 ;; <https://github.com/ewels/MultiQC/issues/732> for details.
8830 (("['\"]matplotlib.*?['\"]")
8831 "'matplotlib'"))
8832 #t)))))
8833 (home-page "https://multiqc.info")
8834 (synopsis "Aggregate bioinformatics analysis reports")
8835 (description
8836 "MultiQC is a tool to aggregate bioinformatics results across many
8837 samples into a single report. It contains modules for a large number of
8838 common bioinformatics tools.")
8839 (license license:gpl3+)))
8840
8841 (define-public variant-tools
8842 (package
8843 (name "variant-tools")
8844 (version "3.1.2")
8845 (source
8846 (origin
8847 (method git-fetch)
8848 (uri (git-reference
8849 (url "https://github.com/vatlab/varianttools")
8850 ;; There is no tag corresponding to version 3.1.2
8851 (commit "813ae4a90d25b69abc8a40f4f70441fe09015249")))
8852 (file-name (git-file-name name version))
8853 (sha256
8854 (base32
8855 "12ibdmksj7icyqhks4xyvd61bygk4pjmxn618kp6vgk1af01y34g"))))
8856 (build-system python-build-system)
8857 (inputs
8858 `(("boost" ,boost)
8859 ("c-blosc" ,c-blosc)
8860 ("gsl" ,gsl)
8861 ("hdf5" ,hdf5)
8862 ("hdf5-blosc" ,hdf5-blosc)
8863 ("python-cython" ,python-cython)
8864 ("zlib" ,zlib)))
8865 (propagated-inputs
8866 `(("python-numpy" ,python-numpy)
8867 ("python-pycurl" ,python-pycurl)
8868 ("python-pyzmq" ,python-pyzmq)
8869 ("python-scipy" ,python-scipy)
8870 ("python-tables" ,python-tables)))
8871 (home-page "https://vatlab.github.io/vat-docs/")
8872 (synopsis "Analyze genetic variants from Next-Gen sequencing studies")
8873 (description
8874 "Variant tools is a tool for the manipulation, annotation,
8875 selection, simulation, and analysis of variants in the context of next-gen
8876 sequencing analysis. Unlike some other tools used for next-gen sequencing
8877 analysis, variant tools is project based and provides a whole set of tools to
8878 manipulate and analyze genetic variants.")
8879 (license license:gpl3+)))
8880
8881 (define-public r-chipseq
8882 (package
8883 (name "r-chipseq")
8884 (version "1.40.0")
8885 (source
8886 (origin
8887 (method url-fetch)
8888 (uri (bioconductor-uri "chipseq" version))
8889 (sha256
8890 (base32
8891 "12pzq24aarvgxfmhcad0l5g951xqdvvi7bspgbsvlvmfkqd74j2v"))))
8892 (build-system r-build-system)
8893 (propagated-inputs
8894 `(("r-biocgenerics" ,r-biocgenerics)
8895 ("r-genomicranges" ,r-genomicranges)
8896 ("r-iranges" ,r-iranges)
8897 ("r-lattice" ,r-lattice)
8898 ("r-s4vectors" ,r-s4vectors)
8899 ("r-shortread" ,r-shortread)))
8900 (home-page "https://bioconductor.org/packages/chipseq")
8901 (synopsis "Package for analyzing ChIPseq data")
8902 (description
8903 "This package provides tools for processing short read data from ChIPseq
8904 experiments.")
8905 (license license:artistic2.0)))
8906
8907 (define-public r-copyhelper
8908 (package
8909 (name "r-copyhelper")
8910 (version "1.6.0")
8911 (source
8912 (origin
8913 (method url-fetch)
8914 (uri (string-append "https://bioconductor.org/packages/release/"
8915 "data/experiment/src/contrib/CopyhelpeR_"
8916 version ".tar.gz"))
8917 (sha256
8918 (base32
8919 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
8920 (properties `((upstream-name . "CopyhelpeR")))
8921 (build-system r-build-system)
8922 (home-page "https://bioconductor.org/packages/CopyhelpeR/")
8923 (synopsis "Helper files for CopywriteR")
8924 (description
8925 "This package contains the helper files that are required to run the
8926 Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
8927 and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
8928 mm10. In addition, it contains a blacklist filter to remove regions that
8929 display copy number variation. Files are stored as GRanges objects from the
8930 GenomicRanges Bioconductor package.")
8931 (license license:gpl2)))
8932
8933 (define-public r-copywriter
8934 (package
8935 (name "r-copywriter")
8936 (version "2.22.0")
8937 (source
8938 (origin
8939 (method url-fetch)
8940 (uri (bioconductor-uri "CopywriteR" version))
8941 (sha256
8942 (base32
8943 "060p6l6l8i6b15hyyz5v5kkxih3h4wcciixii51m9mn82z23xr2f"))))
8944 (properties `((upstream-name . "CopywriteR")))
8945 (build-system r-build-system)
8946 (propagated-inputs
8947 `(("r-biocparallel" ,r-biocparallel)
8948 ("r-chipseq" ,r-chipseq)
8949 ("r-copyhelper" ,r-copyhelper)
8950 ("r-data-table" ,r-data-table)
8951 ("r-dnacopy" ,r-dnacopy)
8952 ("r-futile-logger" ,r-futile-logger)
8953 ("r-genomeinfodb" ,r-genomeinfodb)
8954 ("r-genomicalignments" ,r-genomicalignments)
8955 ("r-genomicranges" ,r-genomicranges)
8956 ("r-gtools" ,r-gtools)
8957 ("r-iranges" ,r-iranges)
8958 ("r-matrixstats" ,r-matrixstats)
8959 ("r-rsamtools" ,r-rsamtools)
8960 ("r-s4vectors" ,r-s4vectors)))
8961 (home-page "https://github.com/PeeperLab/CopywriteR")
8962 (synopsis "Copy number information from targeted sequencing")
8963 (description
8964 "CopywriteR extracts DNA copy number information from targeted sequencing
8965 by utilizing off-target reads. It allows for extracting uniformly distributed
8966 copy number information, can be used without reference, and can be applied to
8967 sequencing data obtained from various techniques including chromatin
8968 immunoprecipitation and target enrichment on small gene panels. Thereby,
8969 CopywriteR constitutes a widely applicable alternative to available copy
8970 number detection tools.")
8971 (license license:gpl2)))
8972
8973 (define-public r-methylkit
8974 (package
8975 (name "r-methylkit")
8976 (version "1.16.1")
8977 (source (origin
8978 (method url-fetch)
8979 (uri (bioconductor-uri "methylKit" version))
8980 (sha256
8981 (base32
8982 "1c9b11gfh3cc37iwym9rgsba3mh2xkp78a1gvnjqhzlkiz667mn3"))))
8983 (properties `((upstream-name . "methylKit")))
8984 (build-system r-build-system)
8985 (propagated-inputs
8986 `(("r-data-table" ,r-data-table)
8987 ("r-emdbook" ,r-emdbook)
8988 ("r-fastseg" ,r-fastseg)
8989 ("r-genomeinfodb" ,r-genomeinfodb)
8990 ("r-genomicranges" ,r-genomicranges)
8991 ("r-gtools" ,r-gtools)
8992 ("r-iranges" ,r-iranges)
8993 ("r-kernsmooth" ,r-kernsmooth)
8994 ("r-limma" ,r-limma)
8995 ("r-mclust" ,r-mclust)
8996 ("r-mgcv" ,r-mgcv)
8997 ("r-qvalue" ,r-qvalue)
8998 ("r-r-utils" ,r-r-utils)
8999 ("r-rcpp" ,r-rcpp)
9000 ("r-rhtslib" ,r-rhtslib)
9001 ("r-rsamtools" ,r-rsamtools)
9002 ("r-rtracklayer" ,r-rtracklayer)
9003 ("r-s4vectors" ,r-s4vectors)
9004 ("r-zlibbioc" ,r-zlibbioc)))
9005 (native-inputs
9006 `(("r-knitr" ,r-knitr))) ; for vignettes
9007 (home-page "https://github.com/al2na/methylKit")
9008 (synopsis
9009 "DNA methylation analysis from high-throughput bisulfite sequencing results")
9010 (description
9011 "MethylKit is an R package for DNA methylation analysis and annotation
9012 from high-throughput bisulfite sequencing. The package is designed to deal
9013 with sequencing data from @dfn{Reduced representation bisulfite
9014 sequencing} (RRBS) and its variants, but also target-capture methods and whole
9015 genome bisulfite sequencing. It also has functions to analyze base-pair
9016 resolution 5hmC data from experimental protocols such as oxBS-Seq and
9017 TAB-Seq.")
9018 (license license:artistic2.0)))
9019
9020 (define-public r-sva
9021 (package
9022 (name "r-sva")
9023 (version "3.38.0")
9024 (source
9025 (origin
9026 (method url-fetch)
9027 (uri (bioconductor-uri "sva" version))
9028 (sha256
9029 (base32
9030 "1hpzzg3qrgkd8kwg1m5gq94cikjgk9j4l1wk58fxl49s6fmd13zy"))))
9031 (build-system r-build-system)
9032 (propagated-inputs
9033 `(("r-edger" ,r-edger)
9034 ("r-genefilter" ,r-genefilter)
9035 ("r-mgcv" ,r-mgcv)
9036 ("r-biocparallel" ,r-biocparallel)
9037 ("r-matrixstats" ,r-matrixstats)
9038 ("r-limma" ,r-limma)))
9039 (home-page "https://bioconductor.org/packages/sva")
9040 (synopsis "Surrogate variable analysis")
9041 (description
9042 "This package contains functions for removing batch effects and other
9043 unwanted variation in high-throughput experiment. It also contains functions
9044 for identifying and building surrogate variables for high-dimensional data
9045 sets. Surrogate variables are covariates constructed directly from
9046 high-dimensional data like gene expression/RNA sequencing/methylation/brain
9047 imaging data that can be used in subsequent analyses to adjust for unknown,
9048 unmodeled, or latent sources of noise.")
9049 (license license:artistic2.0)))
9050
9051 (define-public r-raremetals2
9052 (package
9053 (name "r-raremetals2")
9054 (version "0.1")
9055 (source
9056 (origin
9057 (method url-fetch)
9058 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9059 "b/b7/RareMETALS2_" version ".tar.gz"))
9060 (sha256
9061 (base32
9062 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9063 (properties `((upstream-name . "RareMETALS2")))
9064 (build-system r-build-system)
9065 (propagated-inputs
9066 `(("r-seqminer" ,r-seqminer)
9067 ("r-mvtnorm" ,r-mvtnorm)
9068 ("r-mass" ,r-mass)
9069 ("r-compquadform" ,r-compquadform)
9070 ("r-getopt" ,r-getopt)))
9071 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9072 (synopsis "Analyze gene-level association tests for binary trait")
9073 (description
9074 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9075 It was designed to meta-analyze gene-level association tests for binary trait.
9076 While rareMETALS offers a near-complete solution for meta-analysis of
9077 gene-level tests for quantitative trait, it does not offer the optimal
9078 solution for binary trait. The package rareMETALS2 offers improved features
9079 for analyzing gene-level association tests in meta-analyses for binary
9080 trait.")
9081 (license license:gpl3)))
9082
9083 (define-public r-protgenerics
9084 (package
9085 (name "r-protgenerics")
9086 (version "1.22.0")
9087 (source
9088 (origin
9089 (method url-fetch)
9090 (uri (bioconductor-uri "ProtGenerics" version))
9091 (sha256
9092 (base32
9093 "0yihxphgkshvfv1sn67wc4zvr2zlzws2j7ki3zabm6vyfkfdkfiz"))))
9094 (properties `((upstream-name . "ProtGenerics")))
9095 (build-system r-build-system)
9096 (home-page "https://github.com/lgatto/ProtGenerics")
9097 (synopsis "S4 generic functions for proteomics infrastructure")
9098 (description
9099 "This package provides S4 generic functions needed by Bioconductor
9100 proteomics packages.")
9101 (license license:artistic2.0)))
9102
9103 (define-public r-mzr
9104 (package
9105 (name "r-mzr")
9106 (version "2.24.1")
9107 (source
9108 (origin
9109 (method url-fetch)
9110 (uri (bioconductor-uri "mzR" version))
9111 (sha256
9112 (base32
9113 "0ik0yrjhvk8r5pm990chn2aadp0gqzzkkm0027682ky34xp142sg"))
9114 (modules '((guix build utils)))
9115 (snippet
9116 '(begin
9117 (delete-file-recursively "src/boost")
9118 #t))))
9119 (properties `((upstream-name . "mzR")))
9120 (build-system r-build-system)
9121 (arguments
9122 `(#:phases
9123 (modify-phases %standard-phases
9124 (add-after 'unpack 'use-system-boost
9125 (lambda _
9126 (substitute* "src/Makevars"
9127 (("\\./boost/libs.*") "")
9128 ;; This is to avoid having a plain directory on the list of
9129 ;; libraries to link.
9130 (("\\(RHDF5_LIBS\\)" match)
9131 (string-append match "/libhdf5.a"))
9132 (("PKG_LIBS=") "PKG_LIBS=$(BOOST_LIBS) ")
9133 (("\\ARCH_OBJS=" line)
9134 (string-append line
9135 "\nBOOST_LIBS=-lboost_system -lboost_regex \
9136 -lboost_iostreams -lboost_thread -lboost_filesystem -lboost_chrono\n")))
9137 #t)))))
9138 (inputs
9139 `(;; Our default boost package won't work here, unfortunately, even with
9140 ;; mzR version 2.24.1.
9141 ("boost" ,boost-for-mysql) ; use this instead of the bundled boost sources
9142 ("zlib" ,zlib)))
9143 (propagated-inputs
9144 `(("r-biobase" ,r-biobase)
9145 ("r-biocgenerics" ,r-biocgenerics)
9146 ("r-ncdf4" ,r-ncdf4)
9147 ("r-protgenerics" ,r-protgenerics)
9148 ("r-rcpp" ,r-rcpp)
9149 ("r-rhdf5lib" ,r-rhdf5lib)
9150 ("r-zlibbioc" ,r-zlibbioc)))
9151 (native-inputs
9152 `(("r-knitr" ,r-knitr)))
9153 (home-page "https://github.com/sneumann/mzR/")
9154 (synopsis "Parser for mass spectrometry data files")
9155 (description
9156 "The mzR package provides a unified API to the common file formats and
9157 parsers available for mass spectrometry data. It comes with a wrapper for the
9158 ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
9159 The package contains the original code written by the ISB, and a subset of the
9160 proteowizard library for mzML and mzIdentML. The netCDF reading code has
9161 previously been used in XCMS.")
9162 (license license:artistic2.0)))
9163
9164 (define-public r-affyio
9165 (package
9166 (name "r-affyio")
9167 (version "1.60.0")
9168 (source
9169 (origin
9170 (method url-fetch)
9171 (uri (bioconductor-uri "affyio" version))
9172 (sha256
9173 (base32
9174 "14xnzrxrvgxgixjhq5a9fdgcmrxam2j74hwidkc9if92ffv6s83h"))))
9175 (build-system r-build-system)
9176 (propagated-inputs
9177 `(("r-zlibbioc" ,r-zlibbioc)))
9178 (inputs
9179 `(("zlib" ,zlib)))
9180 (home-page "https://github.com/bmbolstad/affyio")
9181 (synopsis "Tools for parsing Affymetrix data files")
9182 (description
9183 "This package provides routines for parsing Affymetrix data files based
9184 upon file format information. The primary focus is on accessing the CEL and
9185 CDF file formats.")
9186 (license license:lgpl2.0+)))
9187
9188 (define-public r-affy
9189 (package
9190 (name "r-affy")
9191 (version "1.68.0")
9192 (source
9193 (origin
9194 (method url-fetch)
9195 (uri (bioconductor-uri "affy" version))
9196 (sha256
9197 (base32
9198 "0ywz548cbzk2k1njnxhlk5ydzvz2dk78ka8kx53gwrmdc4sc2b06"))))
9199 (build-system r-build-system)
9200 (propagated-inputs
9201 `(("r-affyio" ,r-affyio)
9202 ("r-biobase" ,r-biobase)
9203 ("r-biocgenerics" ,r-biocgenerics)
9204 ("r-biocmanager" ,r-biocmanager)
9205 ("r-preprocesscore" ,r-preprocesscore)
9206 ("r-zlibbioc" ,r-zlibbioc)))
9207 (inputs
9208 `(("zlib" ,zlib)))
9209 (home-page "https://bioconductor.org/packages/affy")
9210 (synopsis "Methods for affymetrix oligonucleotide arrays")
9211 (description
9212 "This package contains functions for exploratory oligonucleotide array
9213 analysis.")
9214 (license license:lgpl2.0+)))
9215
9216 (define-public r-vsn
9217 (package
9218 (name "r-vsn")
9219 (version "3.58.0")
9220 (source
9221 (origin
9222 (method url-fetch)
9223 (uri (bioconductor-uri "vsn" version))
9224 (sha256
9225 (base32
9226 "0dfrfflidpnphwyqzmmfiz9blfqv6qa09xlwgfabhpfsf3ml2rlb"))))
9227 (build-system r-build-system)
9228 (propagated-inputs
9229 `(("r-affy" ,r-affy)
9230 ("r-biobase" ,r-biobase)
9231 ("r-ggplot2" ,r-ggplot2)
9232 ("r-lattice" ,r-lattice)
9233 ("r-limma" ,r-limma)))
9234 (native-inputs
9235 `(("r-knitr" ,r-knitr))) ; for vignettes
9236 (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
9237 (synopsis "Variance stabilization and calibration for microarray data")
9238 (description
9239 "The package implements a method for normalising microarray intensities,
9240 and works for single- and multiple-color arrays. It can also be used for data
9241 from other technologies, as long as they have similar format. The method uses
9242 a robust variant of the maximum-likelihood estimator for an
9243 additive-multiplicative error model and affine calibration. The model
9244 incorporates data calibration step (a.k.a. normalization), a model for the
9245 dependence of the variance on the mean intensity and a variance stabilizing
9246 data transformation. Differences between transformed intensities are
9247 analogous to \"normalized log-ratios\". However, in contrast to the latter,
9248 their variance is independent of the mean, and they are usually more sensitive
9249 and specific in detecting differential transcription.")
9250 (license license:artistic2.0)))
9251
9252 (define-public r-mzid
9253 (package
9254 (name "r-mzid")
9255 (version "1.28.0")
9256 (source
9257 (origin
9258 (method url-fetch)
9259 (uri (bioconductor-uri "mzID" version))
9260 (sha256
9261 (base32
9262 "0s7d6cz1li7v3ni6n6hrdspl93yiyr283kmbbd3hhkfgzgx6kpq2"))))
9263 (properties `((upstream-name . "mzID")))
9264 (build-system r-build-system)
9265 (propagated-inputs
9266 `(("r-doparallel" ,r-doparallel)
9267 ("r-foreach" ,r-foreach)
9268 ("r-iterators" ,r-iterators)
9269 ("r-plyr" ,r-plyr)
9270 ("r-protgenerics" ,r-protgenerics)
9271 ("r-xml" ,r-xml)))
9272 (native-inputs
9273 `(("r-knitr" ,r-knitr)))
9274 (home-page "https://bioconductor.org/packages/mzID")
9275 (synopsis "Parser for mzIdentML files")
9276 (description
9277 "This package provides a parser for mzIdentML files implemented using the
9278 XML package. The parser tries to be general and able to handle all types of
9279 mzIdentML files with the drawback of having less pretty output than a vendor
9280 specific parser.")
9281 (license license:gpl2+)))
9282
9283 (define-public r-pcamethods
9284 (package
9285 (name "r-pcamethods")
9286 (version "1.82.0")
9287 (source
9288 (origin
9289 (method url-fetch)
9290 (uri (bioconductor-uri "pcaMethods" version))
9291 (sha256
9292 (base32
9293 "04xb4vjky6hq58l30i1iq9rv5gzjdxnidjxpnzg7pvg67vz8pgf0"))))
9294 (properties `((upstream-name . "pcaMethods")))
9295 (build-system r-build-system)
9296 (propagated-inputs
9297 `(("r-biobase" ,r-biobase)
9298 ("r-biocgenerics" ,r-biocgenerics)
9299 ("r-mass" ,r-mass)
9300 ("r-rcpp" ,r-rcpp)))
9301 (home-page "https://github.com/hredestig/pcamethods")
9302 (synopsis "Collection of PCA methods")
9303 (description
9304 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
9305 Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
9306 for missing value estimation is included for comparison. BPCA, PPCA and
9307 NipalsPCA may be used to perform PCA on incomplete data as well as for
9308 accurate missing value estimation. A set of methods for printing and plotting
9309 the results is also provided. All PCA methods make use of the same data
9310 structure (pcaRes) to provide a common interface to the PCA results.")
9311 (license license:gpl3+)))
9312
9313 (define-public r-msnbase
9314 (package
9315 (name "r-msnbase")
9316 (version "2.16.1")
9317 (source
9318 (origin
9319 (method url-fetch)
9320 (uri (bioconductor-uri "MSnbase" version))
9321 (sha256
9322 (base32
9323 "0hxzs9zzljywqxr7q388hshpy1pdryhl0zkwffqbxpf5pcf92d3h"))))
9324 (properties `((upstream-name . "MSnbase")))
9325 (build-system r-build-system)
9326 (propagated-inputs
9327 `(("r-affy" ,r-affy)
9328 ("r-biobase" ,r-biobase)
9329 ("r-biocgenerics" ,r-biocgenerics)
9330 ("r-biocparallel" ,r-biocparallel)
9331 ("r-digest" ,r-digest)
9332 ("r-ggplot2" ,r-ggplot2)
9333 ("r-impute" ,r-impute)
9334 ("r-iranges" ,r-iranges)
9335 ("r-lattice" ,r-lattice)
9336 ("r-maldiquant" ,r-maldiquant)
9337 ("r-mass" ,r-mass)
9338 ("r-mzid" ,r-mzid)
9339 ("r-mzr" ,r-mzr)
9340 ("r-pcamethods" ,r-pcamethods)
9341 ("r-plyr" ,r-plyr)
9342 ("r-preprocesscore" ,r-preprocesscore)
9343 ("r-protgenerics" ,r-protgenerics)
9344 ("r-rcpp" ,r-rcpp)
9345 ("r-s4vectors" ,r-s4vectors)
9346 ("r-scales" ,r-scales)
9347 ("r-vsn" ,r-vsn)
9348 ("r-xml" ,r-xml)))
9349 (native-inputs
9350 `(("r-knitr" ,r-knitr)))
9351 (home-page "https://github.com/lgatto/MSnbase")
9352 (synopsis "Base functions and classes for MS-based proteomics")
9353 (description
9354 "This package provides basic plotting, data manipulation and processing
9355 of mass spectrometry based proteomics data.")
9356 (license license:artistic2.0)))
9357
9358 (define-public r-msnid
9359 (package
9360 (name "r-msnid")
9361 (version "1.24.0")
9362 (source
9363 (origin
9364 (method url-fetch)
9365 (uri (bioconductor-uri "MSnID" version))
9366 (sha256
9367 (base32
9368 "05bncy7lw2a3h8xgnavjiz56pc6mk8q7l6qdd81197nawxs3j02d"))))
9369 (properties `((upstream-name . "MSnID")))
9370 (build-system r-build-system)
9371 (propagated-inputs
9372 `(("r-annotationdbi" ,r-annotationdbi)
9373 ("r-annotationhub" ,r-annotationhub)
9374 ("r-biobase" ,r-biobase)
9375 ("r-biocgenerics" ,r-biocgenerics)
9376 ("r-biocstyle" ,r-biocstyle)
9377 ("r-biostrings" ,r-biostrings)
9378 ("r-data-table" ,r-data-table)
9379 ("r-doparallel" ,r-doparallel)
9380 ("r-dplyr" ,r-dplyr)
9381 ("r-foreach" ,r-foreach)
9382 ("r-ggplot2" ,r-ggplot2)
9383 ("r-iterators" ,r-iterators)
9384 ("r-msnbase" ,r-msnbase)
9385 ("r-msmstests" ,r-msmstests)
9386 ("r-mzid" ,r-mzid)
9387 ("r-mzr" ,r-mzr)
9388 ("r-protgenerics" ,r-protgenerics)
9389 ("r-purrr" ,r-purrr)
9390 ("r-r-cache" ,r-r-cache)
9391 ("r-rcpp" ,r-rcpp)
9392 ("r-reshape2" ,r-reshape2)
9393 ("r-rlang" ,r-rlang)
9394 ("r-runit" ,r-runit)
9395 ("r-stringr" ,r-stringr)
9396 ("r-tibble" ,r-tibble)
9397 ("r-xtable" ,r-xtable)))
9398 (home-page "https://bioconductor.org/packages/MSnID")
9399 (synopsis "Utilities for LC-MSn proteomics identifications")
9400 (description
9401 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
9402 from mzIdentML (leveraging the mzID package) or text files. After collating
9403 the search results from multiple datasets it assesses their identification
9404 quality and optimize filtering criteria to achieve the maximum number of
9405 identifications while not exceeding a specified false discovery rate. It also
9406 contains a number of utilities to explore the MS/MS results and assess missed
9407 and irregular enzymatic cleavages, mass measurement accuracy, etc.")
9408 (license license:artistic2.0)))
9409
9410 (define-public r-aroma-light
9411 (package
9412 (name "r-aroma-light")
9413 (version "3.20.0")
9414 (source
9415 (origin
9416 (method url-fetch)
9417 (uri (bioconductor-uri "aroma.light" version))
9418 (sha256
9419 (base32
9420 "0pi37rlfqh24p9wd7l1xb3f7c7938xdscgcc5agp8c9qhajq25a0"))))
9421 (properties `((upstream-name . "aroma.light")))
9422 (build-system r-build-system)
9423 (propagated-inputs
9424 `(("r-matrixstats" ,r-matrixstats)
9425 ("r-r-methodss3" ,r-r-methodss3)
9426 ("r-r-oo" ,r-r-oo)
9427 ("r-r-utils" ,r-r-utils)))
9428 (home-page "https://github.com/HenrikBengtsson/aroma.light")
9429 (synopsis "Methods for normalization and visualization of microarray data")
9430 (description
9431 "This package provides methods for microarray analysis that take basic
9432 data types such as matrices and lists of vectors. These methods can be used
9433 standalone, be utilized in other packages, or be wrapped up in higher-level
9434 classes.")
9435 (license license:gpl2+)))
9436
9437 (define-public r-deseq
9438 (package
9439 (name "r-deseq")
9440 (version "1.39.0")
9441 (source
9442 (origin
9443 (method url-fetch)
9444 (uri (bioconductor-uri "DESeq" version))
9445 (sha256
9446 (base32
9447 "047hph5aqmjnz1aqprziw0smdn5lf96hmwpnvqrxv1j2yfvcf3h1"))))
9448 (properties `((upstream-name . "DESeq")))
9449 (build-system r-build-system)
9450 (propagated-inputs
9451 `(("r-biobase" ,r-biobase)
9452 ("r-biocgenerics" ,r-biocgenerics)
9453 ("r-genefilter" ,r-genefilter)
9454 ("r-geneplotter" ,r-geneplotter)
9455 ("r-lattice" ,r-lattice)
9456 ("r-locfit" ,r-locfit)
9457 ("r-mass" ,r-mass)
9458 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9459 (home-page "https://www-huber.embl.de/users/anders/DESeq/")
9460 (synopsis "Differential gene expression analysis")
9461 (description
9462 "This package provides tools for estimating variance-mean dependence in
9463 count data from high-throughput genetic sequencing assays and for testing for
9464 differential expression based on a model using the negative binomial
9465 distribution.")
9466 (license license:gpl3+)))
9467
9468 (define-public r-edaseq
9469 (package
9470 (name "r-edaseq")
9471 (version "2.24.0")
9472 (source
9473 (origin
9474 (method url-fetch)
9475 (uri (bioconductor-uri "EDASeq" version))
9476 (sha256
9477 (base32
9478 "0fznj7lsgkss1svv4rq8g87s1gmnbd7hccim41dv1c2w2nl0n2ip"))))
9479 (properties `((upstream-name . "EDASeq")))
9480 (build-system r-build-system)
9481 (propagated-inputs
9482 `(("r-annotationdbi" ,r-annotationdbi)
9483 ("r-aroma-light" ,r-aroma-light)
9484 ("r-biobase" ,r-biobase)
9485 ("r-biocgenerics" ,r-biocgenerics)
9486 ("r-biocmanager" ,r-biocmanager)
9487 ("r-biomart" ,r-biomart)
9488 ("r-biostrings" ,r-biostrings)
9489 ("r-genomicfeatures" ,r-genomicfeatures)
9490 ("r-genomicranges" ,r-genomicranges)
9491 ("r-iranges" ,r-iranges)
9492 ("r-rsamtools" ,r-rsamtools)
9493 ("r-shortread" ,r-shortread)))
9494 (native-inputs
9495 `(("r-knitr" ,r-knitr)))
9496 (home-page "https://github.com/drisso/EDASeq")
9497 (synopsis "Exploratory data analysis and normalization for RNA-Seq")
9498 (description
9499 "This package provides support for numerical and graphical summaries of
9500 RNA-Seq genomic read data. Provided within-lane normalization procedures to
9501 adjust for GC-content effect (or other gene-level effects) on read counts:
9502 loess robust local regression, global-scaling, and full-quantile
9503 normalization. Between-lane normalization procedures to adjust for
9504 distributional differences between lanes (e.g., sequencing depth):
9505 global-scaling and full-quantile normalization.")
9506 (license license:artistic2.0)))
9507
9508 (define-public r-interactivedisplaybase
9509 (package
9510 (name "r-interactivedisplaybase")
9511 (version "1.28.0")
9512 (source
9513 (origin
9514 (method url-fetch)
9515 (uri (bioconductor-uri "interactiveDisplayBase" version))
9516 (sha256
9517 (base32
9518 "08id2hkx4ssxj34dildx00a4j3z0nv171b7b0wl6xjks7wk6lv01"))))
9519 (properties
9520 `((upstream-name . "interactiveDisplayBase")))
9521 (build-system r-build-system)
9522 (propagated-inputs
9523 `(("r-biocgenerics" ,r-biocgenerics)
9524 ("r-dt" ,r-dt)
9525 ("r-shiny" ,r-shiny)))
9526 (native-inputs
9527 `(("r-knitr" ,r-knitr)))
9528 (home-page "https://bioconductor.org/packages/interactiveDisplayBase")
9529 (synopsis "Base package for web displays of Bioconductor objects")
9530 (description
9531 "This package contains the basic methods needed to generate interactive
9532 Shiny-based display methods for Bioconductor objects.")
9533 (license license:artistic2.0)))
9534
9535 (define-public r-annotationhub
9536 (package
9537 (name "r-annotationhub")
9538 (version "2.22.0")
9539 (source
9540 (origin
9541 (method url-fetch)
9542 (uri (bioconductor-uri "AnnotationHub" version))
9543 (sha256
9544 (base32
9545 "1950x654ffqx53b154kbph808zdh2xm5vmj9vzmc5nxc28fi2z5g"))))
9546 (properties `((upstream-name . "AnnotationHub")))
9547 (build-system r-build-system)
9548 (propagated-inputs
9549 `(("r-annotationdbi" ,r-annotationdbi)
9550 ("r-biocfilecache" ,r-biocfilecache)
9551 ("r-biocgenerics" ,r-biocgenerics)
9552 ("r-biocmanager" ,r-biocmanager)
9553 ("r-biocversion" ,r-biocversion)
9554 ("r-curl" ,r-curl)
9555 ("r-dplyr" ,r-dplyr)
9556 ("r-httr" ,r-httr)
9557 ("r-interactivedisplaybase" ,r-interactivedisplaybase)
9558 ("r-rappdirs" ,r-rappdirs)
9559 ("r-rsqlite" ,r-rsqlite)
9560 ("r-s4vectors" ,r-s4vectors)
9561 ("r-yaml" ,r-yaml)))
9562 (native-inputs
9563 `(("r-knitr" ,r-knitr)))
9564 (home-page "https://bioconductor.org/packages/AnnotationHub")
9565 (synopsis "Client to access AnnotationHub resources")
9566 (description
9567 "This package provides a client for the Bioconductor AnnotationHub web
9568 resource. The AnnotationHub web resource provides a central location where
9569 genomic files (e.g. VCF, bed, wig) and other resources from standard
9570 locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
9571 metadata about each resource, e.g., a textual description, tags, and date of
9572 modification. The client creates and manages a local cache of files retrieved
9573 by the user, helping with quick and reproducible access.")
9574 (license license:artistic2.0)))
9575
9576 (define-public r-fastseg
9577 (package
9578 (name "r-fastseg")
9579 (version "1.36.0")
9580 (source
9581 (origin
9582 (method url-fetch)
9583 (uri (bioconductor-uri "fastseg" version))
9584 (sha256
9585 (base32
9586 "1ln6w93ag4wanp0nrm0pqngbfc88w95zq2kcj583hbxy885dkg4f"))))
9587 (build-system r-build-system)
9588 (propagated-inputs
9589 `(("r-biobase" ,r-biobase)
9590 ("r-biocgenerics" ,r-biocgenerics)
9591 ("r-genomicranges" ,r-genomicranges)
9592 ("r-iranges" ,r-iranges)
9593 ("r-s4vectors" ,r-s4vectors)))
9594 (home-page "https://www.bioinf.jku.at/software/fastseg/index.html")
9595 (synopsis "Fast segmentation algorithm for genetic sequencing data")
9596 (description
9597 "Fastseg implements a very fast and efficient segmentation algorithm.
9598 It can segment data from DNA microarrays and data from next generation
9599 sequencing for example to detect copy number segments. Further it can segment
9600 data from RNA microarrays like tiling arrays to identify transcripts. Most
9601 generally, it can segment data given as a matrix or as a vector. Various data
9602 formats can be used as input to fastseg like expression set objects for
9603 microarrays or GRanges for sequencing data.")
9604 (license license:lgpl2.0+)))
9605
9606 (define-public r-keggrest
9607 (package
9608 (name "r-keggrest")
9609 (version "1.30.1")
9610 (source
9611 (origin
9612 (method url-fetch)
9613 (uri (bioconductor-uri "KEGGREST" version))
9614 (sha256
9615 (base32
9616 "0k9z85xf9la2y98xqmdmjb8mci9fh2fdybkl77x1yl26hyalip0s"))))
9617 (properties `((upstream-name . "KEGGREST")))
9618 (build-system r-build-system)
9619 (propagated-inputs
9620 `(("r-biostrings" ,r-biostrings)
9621 ("r-httr" ,r-httr)
9622 ("r-png" ,r-png)))
9623 (native-inputs
9624 `(("r-knitr" ,r-knitr)))
9625 (home-page "https://bioconductor.org/packages/KEGGREST")
9626 (synopsis "Client-side REST access to KEGG")
9627 (description
9628 "This package provides a package that provides a client interface to the
9629 @dfn{Kyoto Encyclopedia of Genes and Genomes} (KEGG) REST server.")
9630 (license license:artistic2.0)))
9631
9632 (define-public r-gage
9633 (package
9634 (name "r-gage")
9635 (version "2.40.1")
9636 (source
9637 (origin
9638 (method url-fetch)
9639 (uri (bioconductor-uri "gage" version))
9640 (sha256
9641 (base32
9642 "1iawa03dy4bl333my69d4sk7d74cjzfg5dpcxga6q5dglan4sp8r"))))
9643 (build-system r-build-system)
9644 (propagated-inputs
9645 `(("r-annotationdbi" ,r-annotationdbi)
9646 ("r-go-db" ,r-go-db)
9647 ("r-graph" ,r-graph)
9648 ("r-keggrest" ,r-keggrest)))
9649 (home-page (string-append "https://bmcbioinformatics.biomedcentral.com/"
9650 "articles/10.1186/1471-2105-10-161"))
9651 (synopsis "Generally applicable gene-set enrichment for pathway analysis")
9652 (description
9653 "GAGE is a published method for gene set (enrichment or GSEA) or pathway
9654 analysis. GAGE is generally applicable independent of microarray or RNA-Seq
9655 data attributes including sample sizes, experimental designs, assay platforms,
9656 and other types of heterogeneity. The gage package provides functions for
9657 basic GAGE analysis, result processing and presentation. In addition, it
9658 provides demo microarray data and commonly used gene set data based on KEGG
9659 pathways and GO terms. These functions and data are also useful for gene set
9660 analysis using other methods.")
9661 (license license:gpl2+)))
9662
9663 (define-public r-genomicfiles
9664 (package
9665 (name "r-genomicfiles")
9666 (version "1.26.0")
9667 (source
9668 (origin
9669 (method url-fetch)
9670 (uri (bioconductor-uri "GenomicFiles" version))
9671 (sha256
9672 (base32
9673 "0awnf0m1pz7cw9wvh9cfxz9k7xm6wnvjm7xbxf139lrhd4nlyqjz"))))
9674 (properties `((upstream-name . "GenomicFiles")))
9675 (build-system r-build-system)
9676 (propagated-inputs
9677 `(("r-biocgenerics" ,r-biocgenerics)
9678 ("r-biocparallel" ,r-biocparallel)
9679 ("r-genomeinfodb" ,r-genomeinfodb)
9680 ("r-genomicalignments" ,r-genomicalignments)
9681 ("r-genomicranges" ,r-genomicranges)
9682 ("r-iranges" ,r-iranges)
9683 ("r-matrixgenerics" ,r-matrixgenerics)
9684 ("r-rsamtools" ,r-rsamtools)
9685 ("r-rtracklayer" ,r-rtracklayer)
9686 ("r-s4vectors" ,r-s4vectors)
9687 ("r-summarizedexperiment" ,r-summarizedexperiment)
9688 ("r-variantannotation" ,r-variantannotation)))
9689 (home-page "https://bioconductor.org/packages/GenomicFiles")
9690 (synopsis "Distributed computing by file or by range")
9691 (description
9692 "This package provides infrastructure for parallel computations
9693 distributed by file or by range. User defined mapper and reducer functions
9694 provide added flexibility for data combination and manipulation.")
9695 (license license:artistic2.0)))
9696
9697 (define-public r-complexheatmap
9698 (package
9699 (name "r-complexheatmap")
9700 (version "2.6.2")
9701 (source
9702 (origin
9703 (method url-fetch)
9704 (uri (bioconductor-uri "ComplexHeatmap" version))
9705 (sha256
9706 (base32
9707 "1nx1xxpq8zrvi990v9fmvx3msl85pdz5dp1gp6m78q6i4s2alg5x"))))
9708 (properties
9709 `((upstream-name . "ComplexHeatmap")))
9710 (build-system r-build-system)
9711 (propagated-inputs
9712 `(("r-cairo" ,r-cairo)
9713 ("r-circlize" ,r-circlize)
9714 ("r-clue" ,r-clue)
9715 ("r-colorspace" ,r-colorspace)
9716 ("r-digest" ,r-digest)
9717 ("r-getoptlong" ,r-getoptlong)
9718 ("r-globaloptions" ,r-globaloptions)
9719 ("r-iranges" ,r-iranges)
9720 ("r-matrixstats" ,r-matrixstats)
9721 ("r-png" ,r-png)
9722 ("r-rcolorbrewer" ,r-rcolorbrewer)
9723 ("r-s4vectors" ,r-s4vectors)))
9724 (native-inputs
9725 `(("r-knitr" ,r-knitr)))
9726 (home-page
9727 "https://github.com/jokergoo/ComplexHeatmap")
9728 (synopsis "Making Complex Heatmaps")
9729 (description
9730 "Complex heatmaps are efficient to visualize associations between
9731 different sources of data sets and reveal potential structures. This package
9732 provides a highly flexible way to arrange multiple heatmaps and supports
9733 self-defined annotation graphics.")
9734 (license license:gpl2+)))
9735
9736 (define-public r-dirichletmultinomial
9737 (package
9738 (name "r-dirichletmultinomial")
9739 (version "1.32.0")
9740 (source
9741 (origin
9742 (method url-fetch)
9743 (uri (bioconductor-uri "DirichletMultinomial" version))
9744 (sha256
9745 (base32
9746 "098zql6ryd1b0gkq4cjybblyh0x8xidxxfygqq5a5x9asl8y4vsk"))))
9747 (properties
9748 `((upstream-name . "DirichletMultinomial")))
9749 (build-system r-build-system)
9750 (inputs
9751 `(("gsl" ,gsl)))
9752 (propagated-inputs
9753 `(("r-biocgenerics" ,r-biocgenerics)
9754 ("r-iranges" ,r-iranges)
9755 ("r-s4vectors" ,r-s4vectors)))
9756 (home-page "https://bioconductor.org/packages/DirichletMultinomial")
9757 (synopsis "Dirichlet-Multinomial mixture models for microbiome data")
9758 (description
9759 "Dirichlet-multinomial mixture models can be used to describe variability
9760 in microbial metagenomic data. This package is an interface to code
9761 originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
9762 1-15.")
9763 (license license:lgpl3)))
9764
9765 (define-public r-ensembldb
9766 (package
9767 (name "r-ensembldb")
9768 (version "2.14.0")
9769 (source
9770 (origin
9771 (method url-fetch)
9772 (uri (bioconductor-uri "ensembldb" version))
9773 (sha256
9774 (base32
9775 "04il99gcrqzakvc0bxchdp9gghkn1sp9lpiian0iz4y7r67z3wpy"))))
9776 (build-system r-build-system)
9777 (propagated-inputs
9778 `(("r-annotationdbi" ,r-annotationdbi)
9779 ("r-annotationfilter" ,r-annotationfilter)
9780 ("r-biobase" ,r-biobase)
9781 ("r-biocgenerics" ,r-biocgenerics)
9782 ("r-biostrings" ,r-biostrings)
9783 ("r-curl" ,r-curl)
9784 ("r-dbi" ,r-dbi)
9785 ("r-genomeinfodb" ,r-genomeinfodb)
9786 ("r-genomicfeatures" ,r-genomicfeatures)
9787 ("r-genomicranges" ,r-genomicranges)
9788 ("r-iranges" ,r-iranges)
9789 ("r-protgenerics" ,r-protgenerics)
9790 ("r-rsamtools" ,r-rsamtools)
9791 ("r-rsqlite" ,r-rsqlite)
9792 ("r-rtracklayer" ,r-rtracklayer)
9793 ("r-s4vectors" ,r-s4vectors)))
9794 (native-inputs
9795 `(("r-knitr" ,r-knitr)))
9796 (home-page "https://github.com/jotsetung/ensembldb")
9797 (synopsis "Utilities to create and use Ensembl-based annotation databases")
9798 (description
9799 "The package provides functions to create and use transcript-centric
9800 annotation databases/packages. The annotation for the databases are directly
9801 fetched from Ensembl using their Perl API. The functionality and data is
9802 similar to that of the TxDb packages from the @code{GenomicFeatures} package,
9803 but, in addition to retrieve all gene/transcript models and annotations from
9804 the database, the @code{ensembldb} package also provides a filter framework
9805 allowing to retrieve annotations for specific entries like genes encoded on a
9806 chromosome region or transcript models of lincRNA genes.")
9807 ;; No version specified
9808 (license license:lgpl3+)))
9809
9810 (define-public r-organismdbi
9811 (package
9812 (name "r-organismdbi")
9813 (version "1.32.0")
9814 (source
9815 (origin
9816 (method url-fetch)
9817 (uri (bioconductor-uri "OrganismDbi" version))
9818 (sha256
9819 (base32
9820 "1mklnzs0d0ygcdibwfnk5xqr8ln6wpa00qcaw9c68m342kql0jqw"))))
9821 (properties `((upstream-name . "OrganismDbi")))
9822 (build-system r-build-system)
9823 (propagated-inputs
9824 `(("r-annotationdbi" ,r-annotationdbi)
9825 ("r-biobase" ,r-biobase)
9826 ("r-biocgenerics" ,r-biocgenerics)
9827 ("r-biocmanager" ,r-biocmanager)
9828 ("r-dbi" ,r-dbi)
9829 ("r-genomicfeatures" ,r-genomicfeatures)
9830 ("r-genomicranges" ,r-genomicranges)
9831 ("r-graph" ,r-graph)
9832 ("r-iranges" ,r-iranges)
9833 ("r-rbgl" ,r-rbgl)
9834 ("r-s4vectors" ,r-s4vectors)))
9835 (home-page "https://bioconductor.org/packages/OrganismDbi")
9836 (synopsis "Software to enable the smooth interfacing of database packages")
9837 (description "The package enables a simple unified interface to several
9838 annotation packages each of which has its own schema by taking advantage of
9839 the fact that each of these packages implements a select methods.")
9840 (license license:artistic2.0)))
9841
9842 (define-public r-biovizbase
9843 (package
9844 (name "r-biovizbase")
9845 (version "1.38.0")
9846 (source
9847 (origin
9848 (method url-fetch)
9849 (uri (bioconductor-uri "biovizBase" version))
9850 (sha256
9851 (base32
9852 "10jflvadfcgxq2jnfxkpn417xd8ibh3zllz9rsqnq5w3wgfr4fhq"))))
9853 (properties `((upstream-name . "biovizBase")))
9854 (build-system r-build-system)
9855 (propagated-inputs
9856 `(("r-annotationdbi" ,r-annotationdbi)
9857 ("r-annotationfilter" ,r-annotationfilter)
9858 ("r-biocgenerics" ,r-biocgenerics)
9859 ("r-biostrings" ,r-biostrings)
9860 ("r-dichromat" ,r-dichromat)
9861 ("r-ensembldb" ,r-ensembldb)
9862 ("r-genomeinfodb" ,r-genomeinfodb)
9863 ("r-genomicalignments" ,r-genomicalignments)
9864 ("r-genomicfeatures" ,r-genomicfeatures)
9865 ("r-genomicranges" ,r-genomicranges)
9866 ("r-hmisc" ,r-hmisc)
9867 ("r-iranges" ,r-iranges)
9868 ("r-rcolorbrewer" ,r-rcolorbrewer)
9869 ("r-rlang" ,r-rlang)
9870 ("r-rsamtools" ,r-rsamtools)
9871 ("r-s4vectors" ,r-s4vectors)
9872 ("r-scales" ,r-scales)
9873 ("r-summarizedexperiment" ,r-summarizedexperiment)
9874 ("r-variantannotation" ,r-variantannotation)))
9875 (home-page "https://bioconductor.org/packages/biovizBase")
9876 (synopsis "Basic graphic utilities for visualization of genomic data")
9877 (description
9878 "The biovizBase package is designed to provide a set of utilities, color
9879 schemes and conventions for genomic data. It serves as the base for various
9880 high-level packages for biological data visualization. This saves development
9881 effort and encourages consistency.")
9882 (license license:artistic2.0)))
9883
9884 (define-public r-dropbead
9885 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
9886 (revision "2"))
9887 (package
9888 (name "r-dropbead")
9889 (version (string-append "0-" revision "." (string-take commit 7)))
9890 (source
9891 (origin
9892 (method git-fetch)
9893 (uri (git-reference
9894 (url "https://github.com/rajewsky-lab/dropbead")
9895 (commit commit)))
9896 (file-name (git-file-name name version))
9897 (sha256
9898 (base32
9899 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
9900 (build-system r-build-system)
9901 (propagated-inputs
9902 `(("r-ggplot2" ,r-ggplot2)
9903 ("r-rcolorbrewer" ,r-rcolorbrewer)
9904 ("r-gridextra" ,r-gridextra)
9905 ("r-gplots" ,r-gplots)
9906 ("r-plyr" ,r-plyr)))
9907 (home-page "https://github.com/rajewsky-lab/dropbead")
9908 (synopsis "Basic exploration and analysis of Drop-seq data")
9909 (description "This package offers a quick and straight-forward way to
9910 explore and perform basic analysis of single cell sequencing data coming from
9911 droplet sequencing. It has been particularly tailored for Drop-seq.")
9912 (license license:gpl3))))
9913
9914 (define-public r-cellchat
9915 (let ((commit
9916 "21edd226ca408e4c413408f98562d71ee0b54e5d")
9917 (revision "1"))
9918 (package
9919 (name "r-cellchat")
9920 (version (git-version "1.0.0" revision commit))
9921 (source
9922 (origin
9923 (method git-fetch)
9924 (uri (git-reference
9925 (url "https://github.com/sqjin/CellChat")
9926 (commit commit)))
9927 (file-name (git-file-name name version))
9928 (sha256
9929 (base32
9930 "0cvzl9mi8jjznpql2gv67swnk1dndn3a2h22z5l84h7lwpwjmh53"))
9931 (snippet
9932 '(for-each delete-file '("src/CellChat.so"
9933 "src/CellChat_Rcpp.o"
9934 "src/RcppExports.o")))))
9935 (properties `((upstream-name . "CellChat")))
9936 (build-system r-build-system)
9937 (propagated-inputs
9938 `(("r-biocgenerics" ,r-biocgenerics)
9939 ("r-circlize" ,r-circlize)
9940 ("r-colorspace" ,r-colorspace)
9941 ("r-complexheatmap" ,r-complexheatmap)
9942 ("r-cowplot" ,r-cowplot)
9943 ("r-dplyr" ,r-dplyr)
9944 ("r-expm" ,r-expm)
9945 ("r-fnn" ,r-fnn)
9946 ("r-forcats" ,r-forcats)
9947 ("r-future" ,r-future)
9948 ("r-future-apply" ,r-future-apply)
9949 ("r-gg-gap" ,r-gg-gap)
9950 ("r-ggalluvial" ,r-ggalluvial)
9951 ("r-ggplot2" ,r-ggplot2)
9952 ("r-ggrepel" ,r-ggrepel)
9953 ("r-igraph" ,r-igraph)
9954 ("r-irlba" ,r-irlba)
9955 ("r-magrittr" ,r-magrittr)
9956 ("r-matrix" ,r-matrix)
9957 ("r-nmf" ,r-nmf)
9958 ("r-patchwork" ,r-patchwork)
9959 ("r-pbapply" ,r-pbapply)
9960 ("r-rcolorbrewer" ,r-rcolorbrewer)
9961 ("r-rcpp" ,r-rcpp)
9962 ("r-rcppeigen" ,r-rcppeigen)
9963 ("r-reshape2" ,r-reshape2)
9964 ("r-reticulate" ,r-reticulate)
9965 ("r-rspectra" ,r-rspectra)
9966 ("r-rtsne" ,r-rtsne)
9967 ("r-scales" ,r-scales)
9968 ("r-shape" ,r-shape)
9969 ("r-sna" ,r-sna)
9970 ("r-stringr" ,r-stringr)
9971 ("r-svglite" ,r-svglite)))
9972 (native-inputs `(("r-knitr" ,r-knitr)))
9973 (home-page "https://github.com/sqjin/CellChat")
9974 (synopsis "Analysis of cell-cell communication from single-cell transcriptomics data")
9975 (description
9976 "This package infers, visualizes and analyzes the cell-cell
9977 communication networks from scRNA-seq data.")
9978 (license license:gpl3))))
9979
9980 (define-public sambamba
9981 (package
9982 (name "sambamba")
9983 (version "0.8.0")
9984 (source
9985 (origin
9986 (method git-fetch)
9987 (uri (git-reference
9988 (url "https://github.com/biod/sambamba")
9989 (commit (string-append "v" version))))
9990 (file-name (git-file-name name version))
9991 (sha256
9992 (base32
9993 "07dznzl6m8k7sw84jxw2kx6i3ymrapbmcmyh0fxz8wrybhw8fmwc"))))
9994 (build-system gnu-build-system)
9995 (arguments
9996 `(#:tests? #f ; there is no test target
9997 #:parallel-build? #f ; not supported
9998 #:phases
9999 (modify-phases %standard-phases
10000 (delete 'configure)
10001 (add-after 'unpack 'fix-ldc-version
10002 (lambda _
10003 (substitute* "Makefile"
10004 ;; We use ldc2 instead of ldmd2 to compile sambamba.
10005 (("\\$\\(shell which ldmd2\\)") (which "ldc2")))
10006 #t))
10007 (add-after 'unpack 'unbundle-prerequisites
10008 (lambda _
10009 (substitute* "Makefile"
10010 (("= lz4/lib/liblz4.a") "= -L-llz4")
10011 (("ldc_version_info lz4-static") "ldc_version_info"))
10012 #t))
10013 (replace 'install
10014 (lambda* (#:key outputs #:allow-other-keys)
10015 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
10016 (mkdir-p bin)
10017 (copy-file (string-append "bin/sambamba-" ,version)
10018 (string-append bin "/sambamba"))
10019 #t))))))
10020 (native-inputs
10021 `(("python" ,python)))
10022 (inputs
10023 `(("ldc" ,ldc)
10024 ("lz4" ,lz4)
10025 ("zlib" ,zlib)))
10026 (home-page "https://github.com/biod/sambamba")
10027 (synopsis "Tools for working with SAM/BAM data")
10028 (description "Sambamba is a high performance modern robust and
10029 fast tool (and library), written in the D programming language, for
10030 working with SAM and BAM files. Current parallelised functionality is
10031 an important subset of samtools functionality, including view, index,
10032 sort, markdup, and depth.")
10033 (license license:gpl2+)))
10034
10035 (define-public ritornello
10036 (package
10037 (name "ritornello")
10038 (version "2.0.1")
10039 (source (origin
10040 (method git-fetch)
10041 (uri (git-reference
10042 (url "https://github.com/KlugerLab/Ritornello")
10043 (commit (string-append "v" version))))
10044 (file-name (git-file-name name version))
10045 (sha256
10046 (base32
10047 "1xahvq215qld7x1w8vpa5zbrsj6p9crb9shqa2x89sb0aaxa02jk"))))
10048 (build-system gnu-build-system)
10049 (arguments
10050 `(#:tests? #f ; there are no tests
10051 #:phases
10052 (modify-phases %standard-phases
10053 (add-after 'unpack 'patch-samtools-references
10054 (lambda* (#:key inputs #:allow-other-keys)
10055 (substitute* '("src/SamStream.h"
10056 "src/FLD.cpp")
10057 (("<sam.h>") "<samtools/sam.h>"))
10058 #t))
10059 (delete 'configure)
10060 (replace 'install
10061 (lambda* (#:key inputs outputs #:allow-other-keys)
10062 (let* ((out (assoc-ref outputs "out"))
10063 (bin (string-append out "/bin/")))
10064 (mkdir-p bin)
10065 (install-file "bin/Ritornello" bin)
10066 #t))))))
10067 (inputs
10068 `(("samtools" ,samtools-0.1)
10069 ("fftw" ,fftw)
10070 ("boost" ,boost)
10071 ("zlib" ,zlib)))
10072 (home-page "https://github.com/KlugerLab/Ritornello")
10073 (synopsis "Control-free peak caller for ChIP-seq data")
10074 (description "Ritornello is a ChIP-seq peak calling algorithm based on
10075 signal processing that can accurately call binding events without the need to
10076 do a pair total DNA input or IgG control sample. It has been tested for use
10077 with narrow binding events such as transcription factor ChIP-seq.")
10078 (license license:gpl3+)))
10079
10080 (define-public trim-galore
10081 (package
10082 (name "trim-galore")
10083 (version "0.6.6")
10084 (source
10085 (origin
10086 (method git-fetch)
10087 (uri (git-reference
10088 (url "https://github.com/FelixKrueger/TrimGalore")
10089 (commit version)))
10090 (file-name (git-file-name name version))
10091 (sha256
10092 (base32
10093 "0yrwg6325j4sb9vnplvl3jplzab0qdhp92wl480qjinpfq88j4rs"))))
10094 (build-system gnu-build-system)
10095 (arguments
10096 `(#:tests? #f ; no tests
10097 #:phases
10098 (modify-phases %standard-phases
10099 (replace 'configure
10100 (lambda _
10101 ;; Trim Galore tries to figure out what version of Python
10102 ;; cutadapt is using by looking at the shebang. Of course that
10103 ;; doesn't work, because cutadapt is wrapped in a shell script.
10104 (substitute* "trim_galore"
10105 (("my \\$python_return.*")
10106 "my $python_return = \"Python 3.999\";\n"))
10107 #t))
10108 (delete 'build)
10109 (add-after 'unpack 'hardcode-tool-references
10110 (lambda* (#:key inputs #:allow-other-keys)
10111 (substitute* "trim_galore"
10112 (("\\$path_to_cutadapt = 'cutadapt'")
10113 (string-append "$path_to_cutadapt = '"
10114 (assoc-ref inputs "cutadapt")
10115 "/bin/cutadapt'"))
10116 (("\\$compression_path = \"gzip\"")
10117 (string-append "$compression_path = \""
10118 (assoc-ref inputs "gzip")
10119 "/bin/gzip\""))
10120 (("\"gunzip")
10121 (string-append "\""
10122 (assoc-ref inputs "gzip")
10123 "/bin/gunzip"))
10124 (("\"pigz")
10125 (string-append "\""
10126 (assoc-ref inputs "pigz")
10127 "/bin/pigz")))
10128 #t))
10129 (replace 'install
10130 (lambda* (#:key outputs #:allow-other-keys)
10131 (let ((bin (string-append (assoc-ref outputs "out")
10132 "/bin")))
10133 (mkdir-p bin)
10134 (install-file "trim_galore" bin)
10135 #t))))))
10136 (inputs
10137 `(("gzip" ,gzip)
10138 ("perl" ,perl)
10139 ("pigz" ,pigz)
10140 ("cutadapt" ,cutadapt)))
10141 (native-inputs
10142 `(("unzip" ,unzip)))
10143 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
10144 (synopsis "Wrapper around Cutadapt and FastQC")
10145 (description "Trim Galore! is a wrapper script to automate quality and
10146 adapter trimming as well as quality control, with some added functionality to
10147 remove biased methylation positions for RRBS sequence files.")
10148 (license license:gpl3+)))
10149
10150 (define-public gess
10151 (package
10152 (name "gess")
10153 (version "1.0")
10154 (source (origin
10155 (method url-fetch)
10156 (uri (string-append "http://compbio.uthscsa.edu/"
10157 "GESS_Web/files/"
10158 "gess-" version ".src.tar.gz"))
10159 (sha256
10160 (base32
10161 "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
10162 (build-system gnu-build-system)
10163 (arguments
10164 `(#:tests? #f ; no tests
10165 #:phases
10166 (modify-phases %standard-phases
10167 (delete 'configure)
10168 (delete 'build)
10169 (replace 'install
10170 (lambda* (#:key inputs outputs #:allow-other-keys)
10171 (let* ((python (assoc-ref inputs "python"))
10172 (out (assoc-ref outputs "out"))
10173 (bin (string-append out "/bin/"))
10174 (target (string-append
10175 out "/lib/python"
10176 ,(version-major+minor
10177 (package-version python))
10178 "/site-packages/gess/")))
10179 (mkdir-p target)
10180 (copy-recursively "." target)
10181 ;; Make GESS.py executable
10182 (chmod (string-append target "GESS.py") #o555)
10183 ;; Add Python shebang to the top and make Matplotlib
10184 ;; usable.
10185 (substitute* (string-append target "GESS.py")
10186 (("\"\"\"Description:" line)
10187 (string-append "#!" (which "python") "
10188 import matplotlib
10189 matplotlib.use('Agg')
10190 " line)))
10191 ;; Make sure GESS has all modules in its path
10192 (wrap-script (string-append target "GESS.py")
10193 `("PYTHONPATH" ":" = (,target ,(getenv "PYTHONPATH"))))
10194 (mkdir-p bin)
10195 (symlink (string-append target "GESS.py")
10196 (string-append bin "GESS.py"))
10197 #t))))))
10198 (inputs
10199 `(("python" ,python-2)
10200 ("python2-pysam" ,python2-pysam)
10201 ("python2-scipy" ,python2-scipy)
10202 ("python2-numpy" ,python2-numpy)
10203 ("python2-networkx" ,python2-networkx)
10204 ("python2-biopython" ,python2-biopython)
10205 ("guile" ,guile-3.0))) ; for the script wrapper
10206 (home-page "https://compbio.uthscsa.edu/GESS_Web/")
10207 (synopsis "Detect exon-skipping events from raw RNA-seq data")
10208 (description
10209 "GESS is an implementation of a novel computational method to detect de
10210 novo exon-skipping events directly from raw RNA-seq data without the prior
10211 knowledge of gene annotation information. GESS stands for the graph-based
10212 exon-skipping scanner detection scheme.")
10213 (license license:bsd-3)))
10214
10215 (define-public phylip
10216 (package
10217 (name "phylip")
10218 (version "3.696")
10219 (source
10220 (origin
10221 (method url-fetch)
10222 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
10223 "download/phylip-" version ".tar.gz"))
10224 (sha256
10225 (base32
10226 "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
10227 (build-system gnu-build-system)
10228 (arguments
10229 `(#:tests? #f ; no check target
10230 #:make-flags (list "-f" "Makefile.unx" "install")
10231 #:parallel-build? #f ; not supported
10232 #:phases
10233 (modify-phases %standard-phases
10234 (add-after 'unpack 'enter-dir
10235 (lambda _ (chdir "src") #t))
10236 (delete 'configure)
10237 (replace 'install
10238 (lambda* (#:key inputs outputs #:allow-other-keys)
10239 (let ((target (string-append (assoc-ref outputs "out")
10240 "/bin")))
10241 (mkdir-p target)
10242 (for-each (lambda (file)
10243 (install-file file target))
10244 (find-files "../exe" ".*")))
10245 #t)))))
10246 (home-page "http://evolution.genetics.washington.edu/phylip/")
10247 (synopsis "Tools for inferring phylogenies")
10248 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
10249 programs for inferring phylogenies (evolutionary trees).")
10250 (license license:bsd-2)))
10251
10252 (define-public imp
10253 (package
10254 (name "imp")
10255 (version "2.13.0")
10256 (source
10257 (origin
10258 (method url-fetch)
10259 (uri (string-append "https://integrativemodeling.org/"
10260 version "/download/imp-" version ".tar.gz"))
10261 (sha256
10262 (base32
10263 "1z1vcpwbylixk0zywngg5iw0jv083jj1bqphi817jpg3fb9fx2jj"))))
10264 (build-system cmake-build-system)
10265 (arguments
10266 `( ;; CMake 3.17 or newer is required for the CMAKE_TEST_ARGUMENTS used
10267 ;; below to have an effect.
10268 #:cmake ,cmake
10269 #:configure-flags
10270 (let ((disabled-tests
10271 '("expensive" ;exclude expensive tests
10272 "IMP.modeller" ;fail to import its own modules
10273 "IMP.parallel-test_sge.py" ;fail in build container
10274 ;; The following test fails non-reproducibly on
10275 ;; an inexact numbers assertion.
10276 "IMP.em-medium_test_local_fitting.py")))
10277 (list
10278 (string-append
10279 "-DCMAKE_CTEST_ARGUMENTS="
10280 (string-join
10281 (list "-L" "-tests?-" ;select only tests
10282 "-E" (format #f "'(~a)'" (string-join disabled-tests "|")))
10283 ";"))))))
10284 (native-inputs
10285 `(("python" ,python-wrapper)
10286 ("swig" ,swig)))
10287 (inputs
10288 `(("boost" ,boost)
10289 ("cgal" ,cgal)
10290 ("gsl" ,gsl)
10291 ("hdf5" ,hdf5)
10292 ("fftw" ,fftw)
10293 ("eigen" ,eigen)
10294 ;; Enabling MPI causes the build to use all the available memory and
10295 ;; fail (tested on a machine with 32 GiB of RAM).
10296 ;;("mpi" ,openmpi)
10297 ("opencv" ,opencv)))
10298 (propagated-inputs
10299 `(("python-numpy" ,python-numpy)
10300 ("python-scipy" ,python-scipy)
10301 ("python-pandas" ,python-pandas)
10302 ("python-scikit-learn" ,python-scikit-learn)
10303 ("python-networkx" ,python-networkx)))
10304 (home-page "https://integrativemodeling.org")
10305 (synopsis "Integrative modeling platform")
10306 (description "IMP's broad goal is to contribute to a comprehensive
10307 structural characterization of biomolecules ranging in size and complexity
10308 from small peptides to large macromolecular assemblies, by integrating data
10309 from diverse biochemical and biophysical experiments. IMP provides a C++ and
10310 Python toolbox for solving complex modeling problems, and a number of
10311 applications for tackling some common problems in a user-friendly way.")
10312 ;; IMP is largely available under the GNU Lesser GPL; see the file
10313 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
10314 ;; available under the GNU GPL (see the file COPYING.GPL).
10315 (license (list license:lgpl2.1+
10316 license:gpl3+))))
10317
10318 (define-public tadbit
10319 (package
10320 (name "tadbit")
10321 (version "1.0.1")
10322 (source (origin
10323 (method git-fetch)
10324 (uri (git-reference
10325 (url "https://github.com/3DGenomes/TADbit")
10326 (commit (string-append "v" version))))
10327 (file-name (git-file-name name version))
10328 (sha256
10329 (base32
10330 "0hqrlymh2a2bimcfdvlssy1x5h1lp3h1c5a7jj11hmcqczzqn3ni"))))
10331 (build-system python-build-system)
10332 (arguments
10333 `(#:phases
10334 (modify-phases %standard-phases
10335 (add-after 'unpack 'fix-problems-with-setup.py
10336 (lambda* (#:key outputs #:allow-other-keys)
10337 ;; Don't attempt to install the bash completions to
10338 ;; the home directory.
10339 (rename-file "extras/.bash_completion"
10340 "extras/tadbit")
10341 (substitute* "setup.py"
10342 (("\\(path.expanduser\\('~'\\)")
10343 (string-append "(\""
10344 (assoc-ref outputs "out")
10345 "/etc/bash_completion.d\""))
10346 (("extras/\\.bash_completion")
10347 "extras/tadbit"))
10348 #t))
10349 (replace 'check
10350 (lambda* (#:key inputs outputs #:allow-other-keys)
10351 (add-installed-pythonpath inputs outputs)
10352 (invoke "python3" "test/test_all.py")
10353 #t)))))
10354 (native-inputs
10355 `(("glib" ,glib "bin") ;for gtester
10356 ("pkg-config" ,pkg-config)))
10357 (inputs
10358 ;; TODO: add Chimera for visualization
10359 `(("imp" ,imp)
10360 ("mcl" ,mcl)
10361 ("python-future" ,python-future)
10362 ("python-h5py" ,python-h5py)
10363 ("python-scipy" ,python-scipy)
10364 ("python-numpy" ,python-numpy)
10365 ("python-matplotlib" ,python-matplotlib)
10366 ("python-pysam" ,python-pysam)))
10367 (home-page "https://3dgenomes.github.io/TADbit/")
10368 (synopsis "Analyze, model, and explore 3C-based data")
10369 (description
10370 "TADbit is a complete Python library to deal with all steps to analyze,
10371 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
10372 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
10373 correct interaction matrices, identify and compare the so-called
10374 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
10375 interaction matrices, and finally, extract structural properties from the
10376 models. TADbit is complemented by TADkit for visualizing 3D models.")
10377 (license license:gpl3+)))
10378
10379 (define-public kentutils
10380 (package
10381 (name "kentutils")
10382 ;; 302.1.0 is out, but the only difference is the inclusion of
10383 ;; pre-built binaries.
10384 (version "302.0.0")
10385 (source
10386 (origin
10387 (method git-fetch)
10388 (uri (git-reference
10389 (url "https://github.com/ENCODE-DCC/kentUtils")
10390 (commit (string-append "v" version))))
10391 (file-name (git-file-name name version))
10392 (sha256
10393 (base32
10394 "0n1wbyjpzii2b9qhyp9r1q76j623cggpg3y8fmw78ld3z4y7ivha"))
10395 (modules '((guix build utils)
10396 (srfi srfi-26)
10397 (ice-9 ftw)))
10398 (snippet
10399 '(begin
10400 ;; Only the contents of the specified directories are free
10401 ;; for all uses, so we remove the rest. "hg/autoSql" and
10402 ;; "hg/autoXml" are nominally free, but they depend on a
10403 ;; library that is built from the sources in "hg/lib",
10404 ;; which is nonfree.
10405 (let ((free (list "." ".."
10406 "utils" "lib" "inc" "tagStorm"
10407 "parasol" "htslib"))
10408 (directory? (lambda (file)
10409 (eq? 'directory (stat:type (stat file))))))
10410 (for-each (lambda (file)
10411 (and (directory? file)
10412 (delete-file-recursively file)))
10413 (map (cut string-append "src/" <>)
10414 (scandir "src"
10415 (lambda (file)
10416 (not (member file free)))))))
10417 ;; Only make the utils target, not the userApps target,
10418 ;; because that requires libraries we won't build.
10419 (substitute* "Makefile"
10420 ((" userApps") " utils"))
10421 ;; Only build libraries that are free.
10422 (substitute* "src/makefile"
10423 (("DIRS =.*") "DIRS =\n")
10424 (("cd jkOwnLib.*") "")
10425 ((" hgLib") "")
10426 (("cd hg.*") ""))
10427 (substitute* "src/utils/makefile"
10428 ;; These tools depend on "jkhgap.a", which is part of the
10429 ;; nonfree "src/hg/lib" directory.
10430 (("raSqlQuery") "")
10431 (("pslLiftSubrangeBlat") "")
10432
10433 ;; Do not build UCSC tools, which may require nonfree
10434 ;; components.
10435 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
10436 #t))))
10437 (build-system gnu-build-system)
10438 (arguments
10439 `( ;; There is no global test target and the test target for
10440 ;; individual tools depends on input files that are not
10441 ;; included.
10442 #:tests? #f
10443 #:phases
10444 (modify-phases %standard-phases
10445 (add-after 'unpack 'fix-permissions
10446 (lambda _ (make-file-writable "src/inc/localEnvironment.mk") #t))
10447 (add-after 'unpack 'fix-paths
10448 (lambda _
10449 (substitute* "Makefile"
10450 (("/bin/echo") (which "echo")))
10451 #t))
10452 (add-after 'unpack 'prepare-samtabix
10453 (lambda* (#:key inputs #:allow-other-keys)
10454 (copy-recursively (assoc-ref inputs "samtabix")
10455 "samtabix")
10456 #t))
10457 (delete 'configure)
10458 (replace 'install
10459 (lambda* (#:key outputs #:allow-other-keys)
10460 (let ((bin (string-append (assoc-ref outputs "out")
10461 "/bin")))
10462 (copy-recursively "bin" bin))
10463 #t)))))
10464 (native-inputs
10465 `(("samtabix"
10466 ,(let ((commit "10fd107909c1ac4d679299908be4262a012965ba"))
10467 (origin
10468 (method git-fetch)
10469 (uri (git-reference
10470 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
10471 (commit commit)))
10472 (file-name (git-file-name "samtabix" (string-take commit 7)))
10473 (sha256
10474 (base32
10475 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma")))))))
10476 (inputs
10477 `(("zlib" ,zlib)
10478 ("tcsh" ,tcsh)
10479 ("perl" ,perl)
10480 ("libpng" ,libpng)
10481 ("mariadb-dev" ,mariadb "dev")
10482 ("openssl" ,openssl)))
10483 (home-page "https://genome.cse.ucsc.edu/index.html")
10484 (synopsis "Assorted bioinformatics utilities")
10485 (description "This package provides the kentUtils, a selection of
10486 bioinformatics utilities used in combination with the UCSC genome
10487 browser.")
10488 ;; Only a subset of the sources are released under a non-copyleft
10489 ;; free software license. All other sources are removed in a
10490 ;; snippet. See this bug report for an explanation of how the
10491 ;; license statements apply:
10492 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
10493 (license (license:non-copyleft
10494 "http://genome.ucsc.edu/license/"
10495 "The contents of this package are free for all uses."))))
10496
10497 (define-public f-seq
10498 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
10499 (revision "1"))
10500 (package
10501 (name "f-seq")
10502 (version (string-append "1.1-" revision "." (string-take commit 7)))
10503 (source (origin
10504 (method git-fetch)
10505 (uri (git-reference
10506 (url "https://github.com/aboyle/F-seq")
10507 (commit commit)))
10508 (file-name (string-append name "-" version))
10509 (sha256
10510 (base32
10511 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
10512 (modules '((guix build utils)))
10513 ;; Remove bundled Java library archives.
10514 (snippet
10515 '(begin
10516 (for-each delete-file (find-files "lib" ".*"))
10517 #t))))
10518 (build-system ant-build-system)
10519 (arguments
10520 `(#:tests? #f ; no tests included
10521 #:phases
10522 (modify-phases %standard-phases
10523 (replace 'install
10524 (lambda* (#:key inputs outputs #:allow-other-keys)
10525 (let* ((target (assoc-ref outputs "out"))
10526 (bin (string-append target "/bin"))
10527 (doc (string-append target "/share/doc/f-seq"))
10528 (lib (string-append target "/lib")))
10529 (mkdir-p target)
10530 (mkdir-p doc)
10531 (substitute* "bin/linux/fseq"
10532 (("java") (which "java"))
10533 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
10534 (string-append (assoc-ref inputs "java-commons-cli")
10535 "/share/java/commons-cli.jar"))
10536 (("REALDIR=.*")
10537 (string-append "REALDIR=" bin "\n")))
10538 (install-file "README.txt" doc)
10539 (install-file "bin/linux/fseq" bin)
10540 (install-file "build~/fseq.jar" lib)
10541 (copy-recursively "lib" lib)
10542 #t))))))
10543 (inputs
10544 `(("perl" ,perl)
10545 ("java-commons-cli" ,java-commons-cli)))
10546 (home-page "https://fureylab.web.unc.edu/software/fseq/")
10547 (synopsis "Feature density estimator for high-throughput sequence tags")
10548 (description
10549 "F-Seq is a software package that generates a continuous tag sequence
10550 density estimation allowing identification of biologically meaningful sites
10551 such as transcription factor binding sites (ChIP-seq) or regions of open
10552 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
10553 Browser.")
10554 (license license:gpl3+))))
10555
10556 (define-public bismark
10557 (package
10558 (name "bismark")
10559 (version "0.20.1")
10560 (source
10561 (origin
10562 (method git-fetch)
10563 (uri (git-reference
10564 (url "https://github.com/FelixKrueger/Bismark")
10565 (commit version)))
10566 (file-name (string-append name "-" version "-checkout"))
10567 (sha256
10568 (base32
10569 "0xchm3rgilj6vfjnyzfzzymfd7djr64sbrmrvs3njbwi66jqbzw9"))))
10570 (build-system perl-build-system)
10571 (arguments
10572 `(#:tests? #f ; there are no tests
10573 #:modules ((guix build utils)
10574 (ice-9 popen)
10575 (srfi srfi-26)
10576 (guix build perl-build-system))
10577 #:phases
10578 (modify-phases %standard-phases
10579 ;; The bundled plotly.js is minified.
10580 (add-after 'unpack 'replace-plotly.js
10581 (lambda* (#:key inputs #:allow-other-keys)
10582 (let* ((file (assoc-ref inputs "plotly.js"))
10583 (installed "plotly/plotly.js"))
10584 (let ((minified (open-pipe* OPEN_READ "uglify-js" file)))
10585 (call-with-output-file installed
10586 (cut dump-port minified <>))))
10587 #t))
10588 (delete 'configure)
10589 (delete 'build)
10590 (replace 'install
10591 (lambda* (#:key inputs outputs #:allow-other-keys)
10592 (let* ((out (assoc-ref outputs "out"))
10593 (bin (string-append out "/bin"))
10594 (share (string-append out "/share/bismark"))
10595 (docdir (string-append out "/share/doc/bismark"))
10596 (docs '("Docs/Bismark_User_Guide.html"))
10597 (scripts '("bismark"
10598 "bismark_genome_preparation"
10599 "bismark_methylation_extractor"
10600 "bismark2bedGraph"
10601 "bismark2report"
10602 "coverage2cytosine"
10603 "deduplicate_bismark"
10604 "filter_non_conversion"
10605 "bam2nuc"
10606 "bismark2summary"
10607 "NOMe_filtering")))
10608 (substitute* "bismark2report"
10609 (("\\$RealBin/plotly")
10610 (string-append share "/plotly")))
10611 (mkdir-p share)
10612 (mkdir-p docdir)
10613 (mkdir-p bin)
10614 (for-each (lambda (file) (install-file file bin))
10615 scripts)
10616 (for-each (lambda (file) (install-file file docdir))
10617 docs)
10618 (copy-recursively "Docs/Images" (string-append docdir "/Images"))
10619 (copy-recursively "plotly"
10620 (string-append share "/plotly"))
10621
10622 ;; Fix references to gunzip
10623 (substitute* (map (lambda (file)
10624 (string-append bin "/" file))
10625 scripts)
10626 (("\"gunzip -c")
10627 (string-append "\"" (assoc-ref inputs "gzip")
10628 "/bin/gunzip -c")))
10629 #t))))))
10630 (inputs
10631 `(("gzip" ,gzip)
10632 ("perl-carp" ,perl-carp)
10633 ("perl-getopt-long" ,perl-getopt-long)))
10634 (native-inputs
10635 `(("plotly.js"
10636 ,(origin
10637 (method url-fetch)
10638 (uri (string-append "https://raw.githubusercontent.com/plotly/plotly.js/"
10639 "v1.39.4/dist/plotly.js"))
10640 (sha256
10641 (base32 "138mwsr4nf5qif4mrxx286mpnagxd1xwl6k8aidrjgknaqg88zyr"))))
10642 ("uglify-js" ,uglify-js)))
10643 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/bismark/")
10644 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
10645 (description "Bismark is a program to map bisulfite treated sequencing
10646 reads to a genome of interest and perform methylation calls in a single step.
10647 The output can be easily imported into a genome viewer, such as SeqMonk, and
10648 enables a researcher to analyse the methylation levels of their samples
10649 straight away. Its main features are:
10650
10651 @itemize
10652 @item Bisulfite mapping and methylation calling in one single step
10653 @item Supports single-end and paired-end read alignments
10654 @item Supports ungapped and gapped alignments
10655 @item Alignment seed length, number of mismatches etc are adjustable
10656 @item Output discriminates between cytosine methylation in CpG, CHG
10657 and CHH context
10658 @end itemize\n")
10659 (license license:gpl3+)))
10660
10661 (define-public paml
10662 (package
10663 (name "paml")
10664 (version "4.9e")
10665 (source (origin
10666 (method url-fetch)
10667 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
10668 "paml" version ".tgz"))
10669 (sha256
10670 (base32
10671 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
10672 (modules '((guix build utils)))
10673 ;; Remove Windows binaries
10674 (snippet
10675 '(begin
10676 (for-each delete-file (find-files "." "\\.exe$"))
10677 ;; Some files in the original tarball have restrictive
10678 ;; permissions, which makes repackaging fail
10679 (for-each (lambda (file) (chmod file #o644)) (find-files "."))
10680 #t))))
10681 (build-system gnu-build-system)
10682 (arguments
10683 `(#:tests? #f ; there are no tests
10684 #:make-flags '("CC=gcc")
10685 #:phases
10686 (modify-phases %standard-phases
10687 (replace 'configure
10688 (lambda _
10689 (substitute* "src/BFdriver.c"
10690 (("/bin/bash") (which "bash")))
10691 (chdir "src")
10692 #t))
10693 (replace 'install
10694 (lambda* (#:key outputs #:allow-other-keys)
10695 (let ((tools '("baseml" "basemlg" "codeml"
10696 "pamp" "evolver" "yn00" "chi2"))
10697 (bin (string-append (assoc-ref outputs "out") "/bin"))
10698 (docdir (string-append (assoc-ref outputs "out")
10699 "/share/doc/paml")))
10700 (mkdir-p bin)
10701 (for-each (lambda (file) (install-file file bin)) tools)
10702 (copy-recursively "../doc" docdir)
10703 #t))))))
10704 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
10705 (synopsis "Phylogentic analysis by maximum likelihood")
10706 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
10707 contains a few programs for model fitting and phylogenetic tree reconstruction
10708 using nucleotide or amino-acid sequence data.")
10709 ;; GPLv3 only
10710 (license license:gpl3)))
10711
10712 (define-public kallisto
10713 (package
10714 (name "kallisto")
10715 (version "0.46.2")
10716 (source (origin
10717 (method git-fetch)
10718 (uri (git-reference
10719 (url "https://github.com/pachterlab/kallisto")
10720 (commit (string-append "v" version))))
10721 (file-name (git-file-name name version))
10722 (sha256
10723 (base32
10724 "0ij5n7v3m90jdfi7sn8nvglfyf58abp1f5xq42r4k73l0lfds6xi"))
10725 (modules '((guix build utils)))
10726 (snippet
10727 '(delete-file-recursively "ext/htslib/"))))
10728 (build-system cmake-build-system)
10729 (arguments
10730 `(#:tests? #f ; no "check" target
10731 #:phases
10732 (modify-phases %standard-phases
10733 (add-after 'unpack 'do-not-use-bundled-htslib
10734 (lambda _
10735 (substitute* "CMakeLists.txt"
10736 (("^ExternalProject_Add" m)
10737 (string-append "if (NEVER)\n" m))
10738 (("^\\)")
10739 (string-append ")\nendif(NEVER)"))
10740 (("include_directories\\(\\$\\{htslib_PREFIX.*" m)
10741 (string-append "# " m)))
10742 (substitute* "src/CMakeLists.txt"
10743 (("target_link_libraries\\(kallisto kallisto_core pthread \
10744 \\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/../ext/htslib/libhts.a\\)")
10745 "target_link_libraries(kallisto kallisto_core pthread hts)")
10746 (("include_directories\\(\\.\\./ext/htslib\\)") ""))
10747 #t)))))
10748 (inputs
10749 `(("hdf5" ,hdf5)
10750 ("htslib" ,htslib-1.9)
10751 ("zlib" ,zlib)))
10752 (home-page "https://pachterlab.github.io/kallisto/")
10753 (synopsis "Near-optimal RNA-Seq quantification")
10754 (description
10755 "Kallisto is a program for quantifying abundances of transcripts from
10756 RNA-Seq data, or more generally of target sequences using high-throughput
10757 sequencing reads. It is based on the novel idea of pseudoalignment for
10758 rapidly determining the compatibility of reads with targets, without the need
10759 for alignment. Pseudoalignment of reads preserves the key information needed
10760 for quantification, and kallisto is therefore not only fast, but also as
10761 accurate as existing quantification tools.")
10762 (license license:bsd-2)))
10763
10764 (define-public libgff
10765 (package
10766 (name "libgff")
10767 (version "1.0")
10768 (source (origin
10769 (method git-fetch)
10770 (uri (git-reference
10771 (url "https://github.com/Kingsford-Group/libgff")
10772 (commit (string-append "v" version))))
10773 (file-name (git-file-name name version))
10774 (sha256
10775 (base32
10776 "0n6vfjnq7a2mianipscbshrvbncss8z4zkgkbjw754p9043nfkps"))))
10777 (build-system cmake-build-system)
10778 (arguments `(#:tests? #f)) ; no tests included
10779 (home-page "https://github.com/Kingsford-Group/libgff")
10780 (synopsis "Parser library for reading/writing GFF files")
10781 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
10782 code that is used in the Cufflinks codebase. The goal of this library is to
10783 provide this functionality without the necessity of drawing in a heavy-weight
10784 dependency like SeqAn.")
10785 (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
10786
10787 (define-public sailfish
10788 (package
10789 (name "sailfish")
10790 (version "0.10.1")
10791 (source (origin
10792 (method git-fetch)
10793 (uri (git-reference
10794 (url "https://github.com/kingsfordgroup/sailfish")
10795 (commit (string-append "v" version))))
10796 (file-name (git-file-name name version))
10797 (sha256
10798 (base32
10799 "1amcc5hqvsl42hg4x19bi9vy47cl874s0lw1fmi0hwsdk9i8c03v"))
10800 (modules '((guix build utils)))
10801 (snippet
10802 '(begin
10803 ;; Delete bundled headers for eigen3.
10804 (delete-file-recursively "include/eigen3/")
10805 #t))))
10806 (build-system cmake-build-system)
10807 (arguments
10808 `(#:configure-flags
10809 (list (string-append "-DBOOST_INCLUDEDIR="
10810 (assoc-ref %build-inputs "boost")
10811 "/include/")
10812 (string-append "-DBOOST_LIBRARYDIR="
10813 (assoc-ref %build-inputs "boost")
10814 "/lib/")
10815 (string-append "-DBoost_LIBRARIES="
10816 "-lboost_iostreams "
10817 "-lboost_filesystem "
10818 "-lboost_system "
10819 "-lboost_thread "
10820 "-lboost_timer "
10821 "-lboost_chrono "
10822 "-lboost_program_options")
10823 "-DBoost_FOUND=TRUE"
10824 ;; Don't download RapMap---we already have it!
10825 "-DFETCHED_RAPMAP=1")
10826 ;; Tests must be run after installation and the location of the test
10827 ;; data file must be overridden. But the tests fail. It looks like
10828 ;; they are not really meant to be run.
10829 #:tests? #f
10830 #:phases
10831 (modify-phases %standard-phases
10832 ;; Boost cannot be found, even though it's right there.
10833 (add-after 'unpack 'do-not-look-for-boost
10834 (lambda* (#:key inputs #:allow-other-keys)
10835 (substitute* "CMakeLists.txt"
10836 (("find_package\\(Boost 1\\.53\\.0") "#"))
10837 #t))
10838 (add-after 'unpack 'do-not-assign-to-macro
10839 (lambda _
10840 (substitute* "include/spdlog/details/format.cc"
10841 (("const unsigned CHAR_WIDTH = 1;") ""))
10842 #t))
10843 (add-after 'unpack 'prepare-rapmap
10844 (lambda* (#:key inputs #:allow-other-keys)
10845 (let ((src "external/install/src/rapmap/")
10846 (include "external/install/include/rapmap/")
10847 (rapmap (assoc-ref inputs "rapmap")))
10848 (mkdir-p "/tmp/rapmap")
10849 (invoke "tar" "xf"
10850 (assoc-ref inputs "rapmap")
10851 "-C" "/tmp/rapmap"
10852 "--strip-components=1")
10853 (mkdir-p src)
10854 (mkdir-p include)
10855 (for-each (lambda (file)
10856 (install-file file src))
10857 (find-files "/tmp/rapmap/src" "\\.(c|cpp)"))
10858 (copy-recursively "/tmp/rapmap/include" include))
10859 #t))
10860 (add-after 'unpack 'use-system-libraries
10861 (lambda* (#:key inputs #:allow-other-keys)
10862 (substitute* '("src/SailfishIndexer.cpp"
10863 "src/SailfishUtils.cpp"
10864 "src/SailfishQuantify.cpp"
10865 "src/FASTAParser.cpp"
10866 "include/PCA.hpp"
10867 "include/SailfishUtils.hpp"
10868 "include/SailfishIndex.hpp"
10869 "include/CollapsedEMOptimizer.hpp"
10870 "src/CollapsedEMOptimizer.cpp")
10871 (("#include \"jellyfish/config.h\"") ""))
10872 (substitute* "src/CMakeLists.txt"
10873 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
10874 (string-append (assoc-ref inputs "jellyfish")
10875 "/include/jellyfish-" ,(package-version jellyfish)))
10876 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
10877 (string-append (assoc-ref inputs "jellyfish")
10878 "/lib/libjellyfish-2.0.a"))
10879 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
10880 (string-append (assoc-ref inputs "libdivsufsort")
10881 "/lib/libdivsufsort.so"))
10882 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
10883 (string-append (assoc-ref inputs "libdivsufsort")
10884 "/lib/libdivsufsort64.so")))
10885 (substitute* "CMakeLists.txt"
10886 ;; Don't prefer static libs
10887 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
10888 (("find_package\\(Jellyfish.*") "")
10889 (("ExternalProject_Add\\(libjellyfish") "message(")
10890 (("ExternalProject_Add\\(libgff") "message(")
10891 (("ExternalProject_Add\\(libsparsehash") "message(")
10892 (("ExternalProject_Add\\(libdivsufsort") "message("))
10893
10894 ;; Ensure that Eigen headers can be found
10895 (setenv "CPLUS_INCLUDE_PATH"
10896 (string-append (assoc-ref inputs "eigen")
10897 "/include/eigen3:"
10898 (or (getenv "CPLUS_INCLUDE_PATH") "")))
10899 #t)))))
10900 (inputs
10901 `(("boost" ,boost)
10902 ("eigen" ,eigen)
10903 ("jemalloc" ,jemalloc)
10904 ("jellyfish" ,jellyfish)
10905 ("sparsehash" ,sparsehash)
10906 ("rapmap" ,(origin
10907 (method git-fetch)
10908 (uri (git-reference
10909 (url "https://github.com/COMBINE-lab/RapMap")
10910 (commit (string-append "sf-v" version))))
10911 (file-name (string-append "rapmap-sf-v" version "-checkout"))
10912 (sha256
10913 (base32
10914 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
10915 (modules '((guix build utils)))
10916 ;; These files are expected to be excluded.
10917 (snippet
10918 '(begin (delete-file-recursively "include/spdlog")
10919 (for-each delete-file '("include/xxhash.h"
10920 "src/xxhash.c"))
10921 #t))))
10922 ("libdivsufsort" ,libdivsufsort)
10923 ("libgff" ,libgff)
10924 ("tbb" ,tbb)
10925 ("zlib" ,zlib)))
10926 (native-inputs
10927 `(("pkg-config" ,pkg-config)))
10928 (home-page "https://www.cs.cmu.edu/~ckingsf/software/sailfish/")
10929 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
10930 (description "Sailfish is a tool for genomic transcript quantification
10931 from RNA-seq data. It requires a set of target transcripts (either from a
10932 reference or de-novo assembly) to quantify. All you need to run sailfish is a
10933 fasta file containing your reference transcripts and a (set of) fasta/fastq
10934 file(s) containing your reads.")
10935 (license license:gpl3+)))
10936
10937 (define libstadenio-for-salmon
10938 (package
10939 (name "libstadenio")
10940 (version "1.14.8")
10941 (source (origin
10942 (method git-fetch)
10943 (uri (git-reference
10944 (url "https://github.com/COMBINE-lab/staden-io_lib")
10945 (commit (string-append "v" version))))
10946 (file-name (string-append name "-" version "-checkout"))
10947 (sha256
10948 (base32
10949 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
10950 (build-system gnu-build-system)
10951 (arguments '(#:parallel-tests? #f)) ; not supported
10952 (inputs
10953 `(("zlib" ,zlib)))
10954 (native-inputs
10955 `(("perl" ,perl))) ; for tests
10956 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
10957 (synopsis "General purpose trace and experiment file library")
10958 (description "This package provides a library of file reading and writing
10959 code to provide a general purpose Trace file (and Experiment File) reading
10960 interface.
10961
10962 The following file formats are supported:
10963
10964 @enumerate
10965 @item SCF trace files
10966 @item ABI trace files
10967 @item ALF trace files
10968 @item ZTR trace files
10969 @item SFF trace archives
10970 @item SRF trace archives
10971 @item Experiment files
10972 @item Plain text files
10973 @item SAM/BAM sequence files
10974 @item CRAM sequence files
10975 @end enumerate\n")
10976 (license license:bsd-3)))
10977
10978 (define-public salmon
10979 (package
10980 (name "salmon")
10981 (version "0.13.1")
10982 (source (origin
10983 (method git-fetch)
10984 (uri (git-reference
10985 (url "https://github.com/COMBINE-lab/salmon")
10986 (commit (string-append "v" version))))
10987 (file-name (git-file-name name version))
10988 (sha256
10989 (base32
10990 "1i2z4aivicmiixdz9bxalp7vmfzi3k92fxa63iqa8kgvfw5a4aq5"))
10991 (modules '((guix build utils)))
10992 (snippet
10993 '(begin
10994 ;; Delete bundled headers for eigen3.
10995 (delete-file-recursively "include/eigen3/")
10996 #t))))
10997 (build-system cmake-build-system)
10998 (arguments
10999 `(#:configure-flags
11000 (list (string-append "-DBOOST_INCLUDEDIR="
11001 (assoc-ref %build-inputs "boost")
11002 "/include/")
11003 (string-append "-DBOOST_LIBRARYDIR="
11004 (assoc-ref %build-inputs "boost")
11005 "/lib/")
11006 (string-append "-DBoost_LIBRARIES="
11007 "-lboost_iostreams "
11008 "-lboost_filesystem "
11009 "-lboost_system "
11010 "-lboost_thread "
11011 "-lboost_timer "
11012 "-lboost_chrono "
11013 "-lboost_program_options")
11014 "-DBoost_FOUND=TRUE"
11015 "-DTBB_LIBRARIES=tbb tbbmalloc"
11016 ;; Don't download RapMap---we already have it!
11017 "-DFETCHED_RAPMAP=1")
11018 #:phases
11019 (modify-phases %standard-phases
11020 ;; Boost cannot be found, even though it's right there.
11021 (add-after 'unpack 'do-not-look-for-boost
11022 (lambda* (#:key inputs #:allow-other-keys)
11023 (substitute* "CMakeLists.txt"
11024 (("find_package\\(Boost 1\\.59\\.0") "#"))
11025 #t))
11026 (add-after 'unpack 'do-not-phone-home
11027 (lambda _
11028 (substitute* "src/Salmon.cpp"
11029 (("getVersionMessage\\(\\)") "\"\""))
11030 #t))
11031 (add-after 'unpack 'prepare-rapmap
11032 (lambda* (#:key inputs #:allow-other-keys)
11033 (let ((src "external/install/src/rapmap/")
11034 (include "external/install/include/rapmap/")
11035 (rapmap (assoc-ref inputs "rapmap")))
11036 (mkdir-p src)
11037 (mkdir-p include)
11038 (copy-recursively (string-append rapmap "/src") src)
11039 (copy-recursively (string-append rapmap "/include") include)
11040 (for-each delete-file '("external/install/include/rapmap/xxhash.h"
11041 "external/install/include/rapmap/FastxParser.hpp"
11042 "external/install/include/rapmap/concurrentqueue.h"
11043 "external/install/include/rapmap/FastxParserThreadUtils.hpp"
11044 "external/install/src/rapmap/FastxParser.cpp"
11045 "external/install/src/rapmap/xxhash.c"))
11046 (delete-file-recursively "external/install/include/rapmap/spdlog"))
11047 #t))
11048 (add-after 'unpack 'use-system-libraries
11049 (lambda* (#:key inputs #:allow-other-keys)
11050 (substitute* "CMakeLists.txt"
11051 ;; Don't prefer static libs
11052 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
11053 (("set\\(TBB_LIBRARIES") "message(")
11054 ;; Don't download anything
11055 (("DOWNLOAD_COMMAND") "DOWNLOAD_COMMAND echo")
11056 (("externalproject_add\\(libcereal") "message(")
11057 (("externalproject_add\\(libgff") "message(")
11058 (("externalproject_add\\(libtbb") "message(")
11059 (("externalproject_add\\(libdivsufsort") "message(")
11060 (("externalproject_add\\(libstadenio") "message(")
11061 (("externalproject_add_step\\(") "message("))
11062 (substitute* "src/CMakeLists.txt"
11063 (("add_dependencies") "#")
11064 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libstaden-read.a")
11065 (string-append (assoc-ref inputs "libstadenio-for-salmon")
11066 "/lib/libstaden-read.so"))
11067 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
11068 (string-append (assoc-ref inputs "libdivsufsort")
11069 "/lib/libdivsufsort.so"))
11070 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
11071 (string-append (assoc-ref inputs "libdivsufsort")
11072 "/lib/libdivsufsort64.so"))
11073 (("lib/libdivsufsort.a") "/lib/libdivsufsort.so"))
11074
11075 ;; Ensure that all headers can be found
11076 (setenv "CPLUS_INCLUDE_PATH"
11077 (string-append (or (getenv "CPLUS_INCLUDE_PATH") "")
11078 ":"
11079 (assoc-ref inputs "eigen")
11080 "/include/eigen3"))
11081 #t))
11082 ;; CMAKE_INSTALL_PREFIX does not exist when the tests are
11083 ;; run. It only exists after the install phase.
11084 (add-after 'unpack 'fix-tests
11085 (lambda _
11086 (substitute* "src/CMakeLists.txt"
11087 (("DTOPLEVEL_DIR=\\$\\{CMAKE_INSTALL_PREFIX")
11088 "DTOPLEVEL_DIR=${GAT_SOURCE_DIR"))
11089 #t)))))
11090 (inputs
11091 `(("boost" ,boost)
11092 ("bzip2" ,bzip2)
11093 ("cereal" ,cereal)
11094 ("eigen" ,eigen)
11095 ("rapmap" ,(origin
11096 (method git-fetch)
11097 (uri (git-reference
11098 (url "https://github.com/COMBINE-lab/RapMap")
11099 (commit (string-append "salmon-v" version))))
11100 (file-name (string-append "rapmap-salmon-v" version "-checkout"))
11101 (sha256
11102 (base32
11103 "1biplxf0csc7a8h1wf219b0vmjkvw6wk2zylhdklb577kgmihdms"))))
11104 ("jemalloc" ,jemalloc)
11105 ("libgff" ,libgff)
11106 ("tbb" ,tbb)
11107 ("libdivsufsort" ,libdivsufsort)
11108 ("libstadenio-for-salmon" ,libstadenio-for-salmon)
11109 ("xz" ,xz)
11110 ("zlib" ,zlib)))
11111 (native-inputs
11112 `(("pkg-config" ,pkg-config)))
11113 (home-page "https://github.com/COMBINE-lab/salmon")
11114 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
11115 (description "Salmon is a program to produce highly-accurate,
11116 transcript-level quantification estimates from RNA-seq data. Salmon achieves
11117 its accuracy and speed via a number of different innovations, including the
11118 use of lightweight alignments (accurate but fast-to-compute proxies for
11119 traditional read alignments) and massively-parallel stochastic collapsed
11120 variational inference.")
11121 (license license:gpl3+)))
11122
11123 (define-public python-loompy
11124 (package
11125 (name "python-loompy")
11126 (version "2.0.17")
11127 ;; The tarball on Pypi does not include the tests.
11128 (source (origin
11129 (method git-fetch)
11130 (uri (git-reference
11131 (url "https://github.com/linnarsson-lab/loompy")
11132 (commit version)))
11133 (file-name (git-file-name name version))
11134 (sha256
11135 (base32
11136 "12a5kjgiikapv93wahfw0frszx1lblnppyz3vs5gy8fgmgngra07"))))
11137 (build-system python-build-system)
11138 (arguments
11139 `(#:phases
11140 (modify-phases %standard-phases
11141 (replace 'check
11142 (lambda _
11143 (setenv "PYTHONPATH"
11144 (string-append (getcwd) ":"
11145 (getenv "PYTHONPATH")))
11146 (invoke "pytest" "tests")
11147 #t)))))
11148 (propagated-inputs
11149 `(("python-h5py" ,python-h5py)
11150 ("python-numpy" ,python-numpy)
11151 ("python-pandas" ,python-pandas)
11152 ("python-scipy" ,python-scipy)))
11153 (native-inputs
11154 `(("python-pytest" ,python-pytest)))
11155 (home-page "https://github.com/linnarsson-lab/loompy")
11156 (synopsis "Work with .loom files for single-cell RNA-seq data")
11157 (description "The loom file format is an efficient format for very large
11158 omics datasets, consisting of a main matrix, optional additional layers, a
11159 variable number of row and column annotations. Loom also supports sparse
11160 graphs. This library makes it easy to work with @file{.loom} files for
11161 single-cell RNA-seq data.")
11162 (license license:bsd-3)))
11163
11164 ;; We cannot use the latest commit because it requires Java 9.
11165 (define-public java-forester
11166 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
11167 (revision "1"))
11168 (package
11169 (name "java-forester")
11170 (version (string-append "0-" revision "." (string-take commit 7)))
11171 (source (origin
11172 (method git-fetch)
11173 (uri (git-reference
11174 (url "https://github.com/cmzmasek/forester")
11175 (commit commit)))
11176 (file-name (string-append name "-" version "-checkout"))
11177 (sha256
11178 (base32
11179 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
11180 (modules '((guix build utils)))
11181 (snippet
11182 '(begin
11183 ;; Delete bundled jars and pre-built classes
11184 (delete-file-recursively "forester/java/resources")
11185 (delete-file-recursively "forester/java/classes")
11186 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
11187 ;; Delete bundled applications
11188 (delete-file-recursively "forester_applications")
11189 #t))))
11190 (build-system ant-build-system)
11191 (arguments
11192 `(#:tests? #f ; there are none
11193 #:jdk ,icedtea-8
11194 #:modules ((guix build ant-build-system)
11195 (guix build utils)
11196 (guix build java-utils)
11197 (sxml simple)
11198 (sxml transform))
11199 #:phases
11200 (modify-phases %standard-phases
11201 (add-after 'unpack 'chdir
11202 (lambda _ (chdir "forester/java") #t))
11203 (add-after 'chdir 'fix-dependencies
11204 (lambda _
11205 (chmod "build.xml" #o664)
11206 (call-with-output-file "build.xml.new"
11207 (lambda (port)
11208 (sxml->xml
11209 (pre-post-order
11210 (with-input-from-file "build.xml"
11211 (lambda _ (xml->sxml #:trim-whitespace? #t)))
11212 `(;; Remove all unjar tags to avoid repacking classes.
11213 (unjar . ,(lambda _ '()))
11214 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
11215 (*text* . ,(lambda (_ txt) txt))))
11216 port)))
11217 (rename-file "build.xml.new" "build.xml")
11218 #t))
11219 ;; FIXME: itext is difficult to package as it depends on a few
11220 ;; unpackaged libraries.
11221 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
11222 (lambda _
11223 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
11224 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
11225 (("pdf_written_to = PdfExporter.*")
11226 "throw new IOException(\"PDF export is not available.\");"))
11227 #t))
11228 ;; There is no install target
11229 (replace 'install (install-jars ".")))))
11230 (propagated-inputs
11231 `(("java-commons-codec" ,java-commons-codec)
11232 ("java-openchart2" ,java-openchart2)))
11233 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
11234 (synopsis "Phylogenomics libraries for Java")
11235 (description "Forester is a collection of Java libraries for
11236 phylogenomics and evolutionary biology research. It includes support for
11237 reading, writing, and exporting phylogenetic trees.")
11238 (license license:lgpl2.1+))))
11239
11240 (define-public java-forester-1.005
11241 (package
11242 (name "java-forester")
11243 (version "1.005")
11244 (source (origin
11245 (method url-fetch)
11246 (uri (string-append "https://repo1.maven.org/maven2/"
11247 "org/biojava/thirdparty/forester/"
11248 version "/forester-" version "-sources.jar"))
11249 (file-name (string-append name "-" version ".jar"))
11250 (sha256
11251 (base32
11252 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
11253 (build-system ant-build-system)
11254 (arguments
11255 `(#:tests? #f ; there are none
11256 #:jdk ,icedtea-8
11257 #:modules ((guix build ant-build-system)
11258 (guix build utils)
11259 (guix build java-utils)
11260 (sxml simple)
11261 (sxml transform))
11262 #:phases
11263 (modify-phases %standard-phases
11264 (add-after 'unpack 'fix-dependencies
11265 (lambda* (#:key inputs #:allow-other-keys)
11266 (call-with-output-file "build.xml"
11267 (lambda (port)
11268 (sxml->xml
11269 (pre-post-order
11270 (with-input-from-file "src/build.xml"
11271 (lambda _ (xml->sxml #:trim-whitespace? #t)))
11272 `(;; Remove all unjar tags to avoid repacking classes.
11273 (unjar . ,(lambda _ '()))
11274 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
11275 (*text* . ,(lambda (_ txt) txt))))
11276 port)))
11277 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
11278 "synth_look_and_feel_1.xml")
11279 (copy-file (assoc-ref inputs "phyloxml.xsd")
11280 "phyloxml.xsd")
11281 (substitute* "build.xml"
11282 (("../resources/synth_laf/synth_look_and_feel_1.xml")
11283 "synth_look_and_feel_1.xml")
11284 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
11285 "phyloxml.xsd"))
11286 #t))
11287 ;; FIXME: itext is difficult to package as it depends on a few
11288 ;; unpackaged libraries.
11289 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
11290 (lambda _
11291 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
11292 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
11293 "src/org/forester/archaeopteryx/MainFrameApplication.java")
11294 (("pdf_written_to = PdfExporter.*")
11295 "throw new IOException(\"PDF export is not available.\"); /*")
11296 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
11297 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
11298 #t))
11299 (add-after 'unpack 'delete-pre-built-classes
11300 (lambda _ (delete-file-recursively "src/classes") #t))
11301 ;; There is no install target
11302 (replace 'install (install-jars ".")))))
11303 (propagated-inputs
11304 `(("java-commons-codec" ,java-commons-codec)
11305 ("java-openchart2" ,java-openchart2)))
11306 ;; The source archive does not contain the resources.
11307 (native-inputs
11308 `(("phyloxml.xsd"
11309 ,(origin
11310 (method url-fetch)
11311 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
11312 "b61cc2dcede0bede317db362472333115756b8c6/"
11313 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
11314 (file-name (string-append name "-phyloxml-" version ".xsd"))
11315 (sha256
11316 (base32
11317 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
11318 ("synth_look_and_feel_1.xml"
11319 ,(origin
11320 (method url-fetch)
11321 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
11322 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
11323 "forester/java/classes/resources/"
11324 "synth_look_and_feel_1.xml"))
11325 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
11326 (sha256
11327 (base32
11328 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
11329 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
11330 (synopsis "Phylogenomics libraries for Java")
11331 (description "Forester is a collection of Java libraries for
11332 phylogenomics and evolutionary biology research. It includes support for
11333 reading, writing, and exporting phylogenetic trees.")
11334 (license license:lgpl2.1+)))
11335
11336 (define-public java-biojava-core
11337 (package
11338 (name "java-biojava-core")
11339 (version "4.2.11")
11340 (source (origin
11341 (method git-fetch)
11342 (uri (git-reference
11343 (url "https://github.com/biojava/biojava")
11344 (commit (string-append "biojava-" version))))
11345 (file-name (string-append name "-" version "-checkout"))
11346 (sha256
11347 (base32
11348 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
11349 (build-system ant-build-system)
11350 (arguments
11351 `(#:jdk ,icedtea-8
11352 #:jar-name "biojava-core.jar"
11353 #:source-dir "biojava-core/src/main/java/"
11354 #:test-dir "biojava-core/src/test"
11355 ;; These tests seem to require internet access.
11356 #:test-exclude (list "**/SearchIOTest.java"
11357 "**/BlastXMLParserTest.java"
11358 "**/GenbankCookbookTest.java"
11359 "**/GenbankProxySequenceReaderTest.java")
11360 #:phases
11361 (modify-phases %standard-phases
11362 (add-before 'build 'copy-resources
11363 (lambda _
11364 (copy-recursively "biojava-core/src/main/resources"
11365 "build/classes")
11366 #t))
11367 (add-before 'check 'copy-test-resources
11368 (lambda _
11369 (copy-recursively "biojava-core/src/test/resources"
11370 "build/test-classes")
11371 #t)))))
11372 (propagated-inputs
11373 `(("java-log4j-api" ,java-log4j-api)
11374 ("java-log4j-core" ,java-log4j-core)
11375 ("java-slf4j-api" ,java-slf4j-api)
11376 ("java-slf4j-simple" ,java-slf4j-simple)))
11377 (native-inputs
11378 `(("java-junit" ,java-junit)
11379 ("java-hamcrest-core" ,java-hamcrest-core)))
11380 (home-page "https://biojava.org")
11381 (synopsis "Core libraries of Java framework for processing biological data")
11382 (description "BioJava is a project dedicated to providing a Java framework
11383 for processing biological data. It provides analytical and statistical
11384 routines, parsers for common file formats, reference implementations of
11385 popular algorithms, and allows the manipulation of sequences and 3D
11386 structures. The goal of the biojava project is to facilitate rapid
11387 application development for bioinformatics.
11388
11389 This package provides the core libraries.")
11390 (license license:lgpl2.1+)))
11391
11392 (define-public java-biojava-phylo
11393 (package (inherit java-biojava-core)
11394 (name "java-biojava-phylo")
11395 (build-system ant-build-system)
11396 (arguments
11397 `(#:jdk ,icedtea-8
11398 #:jar-name "biojava-phylo.jar"
11399 #:source-dir "biojava-phylo/src/main/java/"
11400 #:test-dir "biojava-phylo/src/test"
11401 #:phases
11402 (modify-phases %standard-phases
11403 (add-before 'build 'copy-resources
11404 (lambda _
11405 (copy-recursively "biojava-phylo/src/main/resources"
11406 "build/classes")
11407 #t))
11408 (add-before 'check 'copy-test-resources
11409 (lambda _
11410 (copy-recursively "biojava-phylo/src/test/resources"
11411 "build/test-classes")
11412 #t)))))
11413 (propagated-inputs
11414 `(("java-log4j-api" ,java-log4j-api)
11415 ("java-log4j-core" ,java-log4j-core)
11416 ("java-slf4j-api" ,java-slf4j-api)
11417 ("java-slf4j-simple" ,java-slf4j-simple)
11418 ("java-biojava-core" ,java-biojava-core)
11419 ("java-forester" ,java-forester)))
11420 (native-inputs
11421 `(("java-junit" ,java-junit)
11422 ("java-hamcrest-core" ,java-hamcrest-core)))
11423 (home-page "https://biojava.org")
11424 (synopsis "Biojava interface to the forester phylogenomics library")
11425 (description "The phylo module provides a biojava interface layer to the
11426 forester phylogenomics library for constructing phylogenetic trees.")))
11427
11428 (define-public java-biojava-alignment
11429 (package (inherit java-biojava-core)
11430 (name "java-biojava-alignment")
11431 (build-system ant-build-system)
11432 (arguments
11433 `(#:jdk ,icedtea-8
11434 #:jar-name "biojava-alignment.jar"
11435 #:source-dir "biojava-alignment/src/main/java/"
11436 #:test-dir "biojava-alignment/src/test"
11437 #:phases
11438 (modify-phases %standard-phases
11439 (add-before 'build 'copy-resources
11440 (lambda _
11441 (copy-recursively "biojava-alignment/src/main/resources"
11442 "build/classes")
11443 #t))
11444 (add-before 'check 'copy-test-resources
11445 (lambda _
11446 (copy-recursively "biojava-alignment/src/test/resources"
11447 "build/test-classes")
11448 #t)))))
11449 (propagated-inputs
11450 `(("java-log4j-api" ,java-log4j-api)
11451 ("java-log4j-core" ,java-log4j-core)
11452 ("java-slf4j-api" ,java-slf4j-api)
11453 ("java-slf4j-simple" ,java-slf4j-simple)
11454 ("java-biojava-core" ,java-biojava-core)
11455 ("java-biojava-phylo" ,java-biojava-phylo)
11456 ("java-forester" ,java-forester)))
11457 (native-inputs
11458 `(("java-junit" ,java-junit)
11459 ("java-hamcrest-core" ,java-hamcrest-core)))
11460 (home-page "https://biojava.org")
11461 (synopsis "Biojava API for genetic sequence alignment")
11462 (description "The alignment module of BioJava provides an API that
11463 contains
11464
11465 @itemize
11466 @item implementations of dynamic programming algorithms for sequence
11467 alignment;
11468 @item reading and writing of popular alignment file formats;
11469 @item a single-, or multi- threaded multiple sequence alignment algorithm.
11470 @end itemize\n")))
11471
11472 (define-public java-biojava-core-4.0
11473 (package (inherit java-biojava-core)
11474 (name "java-biojava-core")
11475 (version "4.0.0")
11476 (source (origin
11477 (method git-fetch)
11478 (uri (git-reference
11479 (url "https://github.com/biojava/biojava")
11480 (commit (string-append "biojava-" version))))
11481 (file-name (string-append name "-" version "-checkout"))
11482 (sha256
11483 (base32
11484 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
11485
11486 (define-public java-biojava-phylo-4.0
11487 (package (inherit java-biojava-core-4.0)
11488 (name "java-biojava-phylo")
11489 (build-system ant-build-system)
11490 (arguments
11491 `(#:jdk ,icedtea-8
11492 #:jar-name "biojava-phylo.jar"
11493 #:source-dir "biojava-phylo/src/main/java/"
11494 #:test-dir "biojava-phylo/src/test"
11495 #:phases
11496 (modify-phases %standard-phases
11497 (add-before 'build 'copy-resources
11498 (lambda _
11499 (copy-recursively "biojava-phylo/src/main/resources"
11500 "build/classes")
11501 #t))
11502 (add-before 'check 'copy-test-resources
11503 (lambda _
11504 (copy-recursively "biojava-phylo/src/test/resources"
11505 "build/test-classes")
11506 #t)))))
11507 (propagated-inputs
11508 `(("java-log4j-api" ,java-log4j-api)
11509 ("java-log4j-core" ,java-log4j-core)
11510 ("java-slf4j-api" ,java-slf4j-api)
11511 ("java-slf4j-simple" ,java-slf4j-simple)
11512 ("java-biojava-core" ,java-biojava-core-4.0)
11513 ("java-forester" ,java-forester-1.005)))
11514 (native-inputs
11515 `(("java-junit" ,java-junit)
11516 ("java-hamcrest-core" ,java-hamcrest-core)))
11517 (home-page "https://biojava.org")
11518 (synopsis "Biojava interface to the forester phylogenomics library")
11519 (description "The phylo module provides a biojava interface layer to the
11520 forester phylogenomics library for constructing phylogenetic trees.")))
11521
11522 (define-public java-biojava-alignment-4.0
11523 (package (inherit java-biojava-core-4.0)
11524 (name "java-biojava-alignment")
11525 (build-system ant-build-system)
11526 (arguments
11527 `(#:jdk ,icedtea-8
11528 #:jar-name "biojava-alignment.jar"
11529 #:source-dir "biojava-alignment/src/main/java/"
11530 #:test-dir "biojava-alignment/src/test"
11531 #:phases
11532 (modify-phases %standard-phases
11533 (add-before 'build 'copy-resources
11534 (lambda _
11535 (copy-recursively "biojava-alignment/src/main/resources"
11536 "build/classes")
11537 #t))
11538 (add-before 'check 'copy-test-resources
11539 (lambda _
11540 (copy-recursively "biojava-alignment/src/test/resources"
11541 "build/test-classes")
11542 #t)))))
11543 (propagated-inputs
11544 `(("java-log4j-api" ,java-log4j-api)
11545 ("java-log4j-core" ,java-log4j-core)
11546 ("java-slf4j-api" ,java-slf4j-api)
11547 ("java-slf4j-simple" ,java-slf4j-simple)
11548 ("java-biojava-core" ,java-biojava-core-4.0)
11549 ("java-biojava-phylo" ,java-biojava-phylo-4.0)
11550 ("java-forester" ,java-forester-1.005)))
11551 (native-inputs
11552 `(("java-junit" ,java-junit)
11553 ("java-hamcrest-core" ,java-hamcrest-core)))
11554 (home-page "https://biojava.org")
11555 (synopsis "Biojava API for genetic sequence alignment")
11556 (description "The alignment module of BioJava provides an API that
11557 contains
11558
11559 @itemize
11560 @item implementations of dynamic programming algorithms for sequence
11561 alignment;
11562 @item reading and writing of popular alignment file formats;
11563 @item a single-, or multi- threaded multiple sequence alignment algorithm.
11564 @end itemize\n")))
11565
11566 (define-public dropseq-tools
11567 (package
11568 (name "dropseq-tools")
11569 (version "1.13")
11570 (source
11571 (origin
11572 (method url-fetch)
11573 (uri "http://mccarrolllab.com/download/1276/")
11574 (file-name (string-append "dropseq-tools-" version ".zip"))
11575 (sha256
11576 (base32
11577 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
11578 ;; Delete bundled libraries
11579 (modules '((guix build utils)))
11580 (snippet
11581 '(begin
11582 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
11583 (delete-file-recursively "3rdParty")
11584 #t))))
11585 (build-system ant-build-system)
11586 (arguments
11587 `(#:tests? #f ; test data are not included
11588 #:test-target "test"
11589 #:build-target "all"
11590 #:source-dir "public/src/"
11591 #:jdk ,icedtea-8
11592 #:make-flags
11593 (list (string-append "-Dpicard.executable.dir="
11594 (assoc-ref %build-inputs "java-picard")
11595 "/share/java/"))
11596 #:modules ((ice-9 match)
11597 (srfi srfi-1)
11598 (guix build utils)
11599 (guix build java-utils)
11600 (guix build ant-build-system))
11601 #:phases
11602 (modify-phases %standard-phases
11603 ;; FIXME: fails with "java.io.FileNotFoundException:
11604 ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
11605 (delete 'generate-jar-indices)
11606 ;; All dependencies must be linked to "lib", because that's where
11607 ;; they will be searched for when the Class-Path property of the
11608 ;; manifest is computed.
11609 (add-after 'unpack 'record-references
11610 (lambda* (#:key inputs #:allow-other-keys)
11611 (mkdir-p "jar/lib")
11612 (let ((dirs (filter-map (match-lambda
11613 ((name . dir)
11614 (if (and (string-prefix? "java-" name)
11615 (not (string=? name "java-testng")))
11616 dir #f)))
11617 inputs)))
11618 (for-each (lambda (jar)
11619 (symlink jar (string-append "jar/lib/" (basename jar))))
11620 (append-map (lambda (dir) (find-files dir "\\.jar$"))
11621 dirs)))
11622 #t))
11623 ;; There is no installation target
11624 (replace 'install
11625 (lambda* (#:key inputs outputs #:allow-other-keys)
11626 (let* ((out (assoc-ref outputs "out"))
11627 (bin (string-append out "/bin"))
11628 (share (string-append out "/share/java/"))
11629 (lib (string-append share "/lib/"))
11630 (scripts (list "BAMTagHistogram"
11631 "BAMTagofTagCounts"
11632 "BaseDistributionAtReadPosition"
11633 "CollapseBarcodesInPlace"
11634 "CollapseTagWithContext"
11635 "ConvertToRefFlat"
11636 "CreateIntervalsFiles"
11637 "DetectBeadSynthesisErrors"
11638 "DigitalExpression"
11639 "Drop-seq_alignment.sh"
11640 "FilterBAM"
11641 "FilterBAMByTag"
11642 "GatherGeneGCLength"
11643 "GatherMolecularBarcodeDistributionByGene"
11644 "GatherReadQualityMetrics"
11645 "PolyATrimmer"
11646 "ReduceGTF"
11647 "SelectCellsByNumTranscripts"
11648 "SingleCellRnaSeqMetricsCollector"
11649 "TagBamWithReadSequenceExtended"
11650 "TagReadWithGeneExon"
11651 "TagReadWithInterval"
11652 "TrimStartingSequence"
11653 "ValidateReference")))
11654 (for-each mkdir-p (list bin share lib))
11655 (install-file "dist/dropseq.jar" share)
11656 (for-each (lambda (script)
11657 (chmod script #o555)
11658 (install-file script bin))
11659 scripts)
11660 (substitute* (map (lambda (script)
11661 (string-append bin "/" script))
11662 scripts)
11663 (("^java") (which "java"))
11664 (("jar_deploy_dir=.*")
11665 (string-append "jar_deploy_dir=" share "\n"))))
11666 #t))
11667 ;; FIXME: We do this after stripping jars because we don't want it to
11668 ;; copy all these jars and strip them. We only want to install
11669 ;; links. Arguably, this is a problem with the ant-build-system.
11670 (add-after 'strip-jar-timestamps 'install-links
11671 (lambda* (#:key outputs #:allow-other-keys)
11672 (let* ((out (assoc-ref outputs "out"))
11673 (share (string-append out "/share/java/"))
11674 (lib (string-append share "/lib/")))
11675 (for-each (lambda (jar)
11676 (symlink (readlink jar)
11677 (string-append lib (basename jar))))
11678 (find-files "jar/lib" "\\.jar$")))
11679 #t)))))
11680 (inputs
11681 `(("jdk" ,icedtea-8)
11682 ("java-picard" ,java-picard-2.10.3)
11683 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
11684 ("java-commons-math3" ,java-commons-math3)
11685 ("java-commons-jexl2" ,java-commons-jexl-2)
11686 ("java-commons-collections4" ,java-commons-collections4)
11687 ("java-commons-lang2" ,java-commons-lang)
11688 ("java-commons-io" ,java-commons-io)
11689 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
11690 ("java-guava" ,java-guava)
11691 ("java-la4j" ,java-la4j)
11692 ("java-biojava-core" ,java-biojava-core-4.0)
11693 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
11694 ("java-jdistlib" ,java-jdistlib)
11695 ("java-simple-xml" ,java-simple-xml)
11696 ("java-snakeyaml" ,java-snakeyaml)))
11697 (native-inputs
11698 `(("unzip" ,unzip)
11699 ("java-testng" ,java-testng)))
11700 (home-page "http://mccarrolllab.com/dropseq/")
11701 (synopsis "Tools for Drop-seq analyses")
11702 (description "Drop-seq is a technology to enable biologists to
11703 analyze RNA expression genome-wide in thousands of individual cells at
11704 once. This package provides tools to perform Drop-seq analyses.")
11705 (license license:expat)))
11706
11707 (define-public pigx-rnaseq
11708 (package
11709 (name "pigx-rnaseq")
11710 (version "0.0.10")
11711 (source (origin
11712 (method url-fetch)
11713 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
11714 "releases/download/v" version
11715 "/pigx_rnaseq-" version ".tar.gz"))
11716 (sha256
11717 (base32
11718 "0z3hr120wk2vrlmlpz1vp3n9wy3rq4y2mnzh2vf08qgqn2xfdwcw"))))
11719 (build-system gnu-build-system)
11720 (arguments
11721 `(#:parallel-tests? #f ; not supported
11722 #:phases
11723 (modify-phases %standard-phases
11724 ;; "test.sh" runs STAR, which requires excessive amounts of memory.
11725 (add-after 'unpack 'disable-resource-intensive-test
11726 (lambda _
11727 (substitute* "Makefile.in"
11728 (("(^ tests/test_trim_galore/test.sh).*" _ m) m)
11729 (("^ tests/test_multiqc/test.sh") "")
11730 (("^ test.sh") ""))
11731 #t)))))
11732 (inputs
11733 `(("coreutils" ,coreutils)
11734 ("sed" ,sed)
11735 ("gzip" ,gzip)
11736 ("snakemake" ,snakemake)
11737 ("fastqc" ,fastqc)
11738 ("multiqc" ,multiqc)
11739 ("star" ,star-for-pigx)
11740 ("trim-galore" ,trim-galore)
11741 ("htseq" ,htseq)
11742 ("samtools" ,samtools)
11743 ("r-minimal" ,r-minimal)
11744 ("r-rmarkdown" ,r-rmarkdown)
11745 ("r-ggplot2" ,r-ggplot2)
11746 ("r-ggrepel" ,r-ggrepel)
11747 ("r-gprofiler" ,r-gprofiler)
11748 ("r-deseq2" ,r-deseq2)
11749 ("r-dt" ,r-dt)
11750 ("r-knitr" ,r-knitr)
11751 ("r-pheatmap" ,r-pheatmap)
11752 ("r-corrplot" ,r-corrplot)
11753 ("r-reshape2" ,r-reshape2)
11754 ("r-plotly" ,r-plotly)
11755 ("r-scales" ,r-scales)
11756 ("r-summarizedexperiment" ,r-summarizedexperiment)
11757 ("r-crosstalk" ,r-crosstalk)
11758 ("r-tximport" ,r-tximport)
11759 ("r-rtracklayer" ,r-rtracklayer)
11760 ("r-rjson" ,r-rjson)
11761 ("salmon" ,salmon)
11762 ("pandoc" ,pandoc)
11763 ("pandoc-citeproc" ,pandoc-citeproc)
11764 ("python-wrapper" ,python-wrapper)
11765 ("python-pyyaml" ,python-pyyaml)))
11766 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11767 (synopsis "Analysis pipeline for RNA sequencing experiments")
11768 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
11769 reporting for RNA sequencing experiments. It is easy to use and produces high
11770 quality reports. The inputs are reads files from the sequencing experiment,
11771 and a configuration file which describes the experiment. In addition to
11772 quality control of the experiment, the pipeline produces a differential
11773 expression report comparing samples in an easily configurable manner.")
11774 (license license:gpl3+)))
11775
11776 (define-public pigx-chipseq
11777 (package
11778 (name "pigx-chipseq")
11779 (version "0.0.51")
11780 (source (origin
11781 (method url-fetch)
11782 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
11783 "releases/download/v" version
11784 "/pigx_chipseq-" version ".tar.gz"))
11785 (sha256
11786 (base32
11787 "0bb6hzpl0qq0jd57pgd1m5ns547rfipr6071a4m12vxlm4nlpi5q"))))
11788 (build-system gnu-build-system)
11789 ;; parts of the tests rely on access to the network
11790 (arguments '(#:tests? #f))
11791 (inputs
11792 `(("grep" ,grep)
11793 ("coreutils" ,coreutils)
11794 ("r-minimal" ,r-minimal)
11795 ("r-argparser" ,r-argparser)
11796 ("r-biocparallel" ,r-biocparallel)
11797 ("r-biostrings" ,r-biostrings)
11798 ("r-chipseq" ,r-chipseq)
11799 ("r-corrplot" ,r-corrplot)
11800 ("r-data-table" ,r-data-table)
11801 ("r-deseq2" ,r-deseq2)
11802 ("r-dplyr" ,r-dplyr)
11803 ("r-dt" ,r-dt)
11804 ("r-genomation" ,r-genomation)
11805 ("r-genomicalignments" ,r-genomicalignments)
11806 ("r-genomicranges" ,r-genomicranges)
11807 ("r-ggplot2" ,r-ggplot2)
11808 ("r-ggrepel" ,r-ggrepel)
11809 ("r-gprofiler2" ,r-gprofiler2)
11810 ("r-heatmaply" ,r-heatmaply)
11811 ("r-htmlwidgets" ,r-htmlwidgets)
11812 ("r-jsonlite" ,r-jsonlite)
11813 ("r-pheatmap" ,r-pheatmap)
11814 ("r-plotly" ,r-plotly)
11815 ("r-rmarkdown" ,r-rmarkdown)
11816 ("r-rsamtools" ,r-rsamtools)
11817 ("r-rsubread" ,r-rsubread)
11818 ("r-rtracklayer" ,r-rtracklayer)
11819 ("r-s4vectors" ,r-s4vectors)
11820 ("r-stringr" ,r-stringr)
11821 ("r-tibble" ,r-tibble)
11822 ("r-tidyr" ,r-tidyr)
11823 ("python-wrapper" ,python-wrapper)
11824 ("python-pyyaml" ,python-pyyaml)
11825 ("python-magic" ,python-magic)
11826 ("python-xlrd" ,python-xlrd)
11827 ("trim-galore" ,trim-galore)
11828 ("macs" ,macs)
11829 ("multiqc" ,multiqc)
11830 ("perl" ,perl)
11831 ("pandoc" ,pandoc)
11832 ("pandoc-citeproc" ,pandoc-citeproc)
11833 ("fastqc" ,fastqc)
11834 ("bowtie" ,bowtie)
11835 ("idr" ,idr)
11836 ("snakemake" ,snakemake)
11837 ("samtools" ,samtools)
11838 ("bedtools" ,bedtools)
11839 ("kentutils" ,kentutils)))
11840 (native-inputs
11841 `(("python-pytest" ,python-pytest)))
11842 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11843 (synopsis "Analysis pipeline for ChIP sequencing experiments")
11844 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
11845 calling and reporting for ChIP sequencing experiments. It is easy to use and
11846 produces high quality reports. The inputs are reads files from the sequencing
11847 experiment, and a configuration file which describes the experiment. In
11848 addition to quality control of the experiment, the pipeline enables to set up
11849 multiple peak calling analysis and allows the generation of a UCSC track hub
11850 in an easily configurable manner.")
11851 (license license:gpl3+)))
11852
11853 (define-public pigx-bsseq
11854 (package
11855 (name "pigx-bsseq")
11856 (version "0.1.2")
11857 (source (origin
11858 (method url-fetch)
11859 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
11860 "releases/download/v" version
11861 "/pigx_bsseq-" version ".tar.gz"))
11862 (sha256
11863 (base32
11864 "0mpzlay2d5cjpmrcp7knff6rg1c2mqszd638n7lw0mc0cycbp9f8"))))
11865 (build-system gnu-build-system)
11866 (arguments
11867 `(;; TODO: tests currently require 12+GB of RAM. See
11868 ;; https://github.com/BIMSBbioinfo/pigx_bsseq/issues/164
11869 #:tests? #f
11870 #:phases
11871 (modify-phases %standard-phases
11872 (add-before 'check 'set-timezone
11873 ;; The readr package is picky about timezones.
11874 (lambda* (#:key inputs #:allow-other-keys)
11875 (setenv "TZ" "UTC+1")
11876 (setenv "TZDIR"
11877 (string-append (assoc-ref inputs "tzdata")
11878 "/share/zoneinfo"))
11879 #t)))))
11880 (native-inputs
11881 `(("tzdata" ,tzdata)))
11882 (inputs
11883 `(("coreutils" ,coreutils)
11884 ("sed" ,sed)
11885 ("grep" ,grep)
11886 ("r-minimal" ,r-minimal)
11887 ("r-annotationhub" ,r-annotationhub)
11888 ("r-dt" ,r-dt)
11889 ("r-genomation" ,r-genomation)
11890 ("r-ggrepel" ,r-ggrepel)
11891 ("r-methylkit" ,r-methylkit)
11892 ("r-rtracklayer" ,r-rtracklayer)
11893 ("r-rmarkdown" ,r-rmarkdown)
11894 ("r-bookdown" ,r-bookdown)
11895 ("r-ggplot2" ,r-ggplot2)
11896 ("r-ggbio" ,r-ggbio)
11897 ("pandoc" ,pandoc)
11898 ("pandoc-citeproc" ,pandoc-citeproc)
11899 ("python-wrapper" ,python-wrapper)
11900 ("python-pyyaml" ,python-pyyaml)
11901 ("snakemake" ,snakemake)
11902 ("bismark" ,bismark)
11903 ("bowtie" ,bowtie)
11904 ("bwa-meth" ,bwa-meth)
11905 ("fastqc" ,fastqc)
11906 ("methyldackel" ,methyldackel)
11907 ("multiqc" ,multiqc)
11908 ("trim-galore" ,trim-galore)
11909 ("cutadapt" ,cutadapt)
11910 ("samblaster" ,samblaster)
11911 ("samtools" ,samtools)))
11912 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11913 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
11914 (description "PiGx BSseq is a data processing pipeline for raw fastq read
11915 data of bisulfite experiments; it produces reports on aggregate methylation
11916 and coverage and can be used to produce information on differential
11917 methylation and segmentation.")
11918 (license license:gpl3+)))
11919
11920 (define-public pigx-scrnaseq
11921 (package
11922 (name "pigx-scrnaseq")
11923 (version "1.1.7")
11924 (source (origin
11925 (method url-fetch)
11926 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
11927 "releases/download/v" version
11928 "/pigx_scrnaseq-" version ".tar.gz"))
11929 (sha256
11930 (base32
11931 "1h5mcxzwj3cidlkvy9ly5wmi48vwfsjf8dxjfirknqxr9a92hwlx"))))
11932 (build-system gnu-build-system)
11933 (inputs
11934 `(("coreutils" ,coreutils)
11935 ("perl" ,perl)
11936 ("fastqc" ,fastqc)
11937 ("flexbar" ,flexbar)
11938 ("java" ,icedtea-8)
11939 ("jellyfish" ,jellyfish)
11940 ("python-wrapper" ,python-wrapper)
11941 ("python-pyyaml" ,python-pyyaml)
11942 ("python-pandas" ,python-pandas)
11943 ("python-magic" ,python-magic)
11944 ("python-numpy" ,python-numpy)
11945 ("python-loompy" ,python-loompy)
11946 ("pandoc" ,pandoc)
11947 ("pandoc-citeproc" ,pandoc-citeproc)
11948 ("samtools" ,samtools)
11949 ("snakemake" ,snakemake)
11950 ("star" ,star-for-pigx)
11951 ("r-minimal" ,r-minimal)
11952 ("r-argparser" ,r-argparser)
11953 ("r-cowplot" ,r-cowplot)
11954 ("r-data-table" ,r-data-table)
11955 ("r-delayedarray" ,r-delayedarray)
11956 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
11957 ("r-dplyr" ,r-dplyr)
11958 ("r-dropbead" ,r-dropbead)
11959 ("r-dt" ,r-dt)
11960 ("r-genomicalignments" ,r-genomicalignments)
11961 ("r-genomicfiles" ,r-genomicfiles)
11962 ("r-genomicranges" ,r-genomicranges)
11963 ("r-ggplot2" ,r-ggplot2)
11964 ("r-hdf5array" ,r-hdf5array)
11965 ("r-pheatmap" ,r-pheatmap)
11966 ("r-rmarkdown" ,r-rmarkdown)
11967 ("r-rsamtools" ,r-rsamtools)
11968 ("r-rtracklayer" ,r-rtracklayer)
11969 ("r-rtsne" ,r-rtsne)
11970 ("r-scater" ,r-scater)
11971 ("r-scran" ,r-scran)
11972 ("r-seurat" ,r-seurat)
11973 ("r-singlecellexperiment" ,r-singlecellexperiment)
11974 ("r-stringr" ,r-stringr)
11975 ("r-yaml" ,r-yaml)))
11976 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
11977 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
11978 (description "PiGX scRNAseq is an analysis pipeline for preprocessing and
11979 quality control for single cell RNA sequencing experiments. The inputs are
11980 read files from the sequencing experiment, and a configuration file which
11981 describes the experiment. It produces processed files for downstream analysis
11982 and interactive quality reports. The pipeline is designed to work with UMI
11983 based methods.")
11984 (license license:gpl3+)))
11985
11986 (define-public pigx
11987 (package
11988 (name "pigx")
11989 (version "0.0.3")
11990 (source (origin
11991 (method url-fetch)
11992 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
11993 "releases/download/v" version
11994 "/pigx-" version ".tar.gz"))
11995 (sha256
11996 (base32
11997 "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
11998 (build-system gnu-build-system)
11999 (inputs
12000 `(("python" ,python)
12001 ("pigx-bsseq" ,pigx-bsseq)
12002 ("pigx-chipseq" ,pigx-chipseq)
12003 ("pigx-rnaseq" ,pigx-rnaseq)
12004 ("pigx-scrnaseq" ,pigx-scrnaseq)))
12005 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
12006 (synopsis "Analysis pipelines for genomics")
12007 (description "PiGx is a collection of genomics pipelines. It includes the
12008 following pipelines:
12009
12010 @itemize
12011 @item PiGx BSseq for raw fastq read data of bisulfite experiments
12012 @item PiGx RNAseq for RNAseq samples
12013 @item PiGx scRNAseq for single cell dropseq analysis
12014 @item PiGx ChIPseq for reads from ChIPseq experiments
12015 @end itemize
12016
12017 All pipelines are easily configured with a simple sample sheet and a
12018 descriptive settings file. The result is a set of comprehensive, interactive
12019 HTML reports with interesting findings about your samples.")
12020 (license license:gpl3+)))
12021
12022 (define-public genrich
12023 (package
12024 (name "genrich")
12025 (version "0.5")
12026 (source (origin
12027 (method git-fetch)
12028 (uri (git-reference
12029 (url "https://github.com/jsh58/Genrich")
12030 (commit (string-append "v" version))))
12031 (file-name (git-file-name name version))
12032 (sha256
12033 (base32
12034 "0x0q6z0208n3cxzqjla4rgjqpyqgwpmz27852lcvzkzaigymq4zp"))))
12035 (build-system gnu-build-system)
12036 (arguments
12037 `(#:tests? #f ; there are none
12038 #:phases
12039 (modify-phases %standard-phases
12040 (delete 'configure)
12041 (replace 'install
12042 (lambda* (#:key outputs #:allow-other-keys)
12043 (install-file "Genrich" (string-append (assoc-ref outputs "out") "/bin"))
12044 #t)))))
12045 (inputs
12046 `(("zlib" ,zlib)))
12047 (home-page "https://github.com/jsh58/Genrich")
12048 (synopsis "Detecting sites of genomic enrichment")
12049 (description "Genrich is a peak-caller for genomic enrichment
12050 assays (e.g. ChIP-seq, ATAC-seq). It analyzes alignment files generated
12051 following the assay and produces a file detailing peaks of significant
12052 enrichment.")
12053 (license license:expat)))
12054
12055 (define-public mantis
12056 (let ((commit "4ffd171632c2cb0056a86d709dfd2bf21bc69b84")
12057 (revision "1"))
12058 (package
12059 (name "mantis")
12060 (version (git-version "0" revision commit))
12061 (source (origin
12062 (method git-fetch)
12063 (uri (git-reference
12064 (url "https://github.com/splatlab/mantis")
12065 (commit commit)))
12066 (file-name (git-file-name name version))
12067 (sha256
12068 (base32
12069 "0iqbr0dhmlc8mzpirmm2s4pkzkwdgrcx50yx6cv3wlr2qi064p55"))))
12070 (build-system cmake-build-system)
12071 (arguments '(#:tests? #f)) ; there are none
12072 (inputs
12073 `(("sdsl-lite" ,sdsl-lite)
12074 ("openssl" ,openssl)
12075 ("zlib" ,zlib)))
12076 (home-page "https://github.com/splatlab/mantis")
12077 (synopsis "Large-scale sequence-search index data structure")
12078 (description "Mantis is a space-efficient data structure that can be
12079 used to index thousands of raw-read genomics experiments and facilitate
12080 large-scale sequence searches on those experiments. Mantis uses counting
12081 quotient filters instead of Bloom filters, enabling rapid index builds and
12082 queries, small indexes, and exact results, i.e., no false positives or
12083 negatives. Furthermore, Mantis is also a colored de Bruijn graph
12084 representation, so it supports fast graph traversal and other topological
12085 analyses in addition to large-scale sequence-level searches.")
12086 ;; uses __uint128_t and inline assembly
12087 (supported-systems '("x86_64-linux"))
12088 (license license:bsd-3))))
12089
12090 (define-public sjcount
12091 ;; There is no tag for version 3.2, nor is there a release archive.
12092 (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
12093 (revision "1"))
12094 (package
12095 (name "sjcount")
12096 (version (git-version "3.2" revision commit))
12097 (source (origin
12098 (method git-fetch)
12099 (uri (git-reference
12100 (url "https://github.com/pervouchine/sjcount-full")
12101 (commit commit)))
12102 (file-name (string-append name "-" version "-checkout"))
12103 (sha256
12104 (base32
12105 "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
12106 (build-system gnu-build-system)
12107 (arguments
12108 `(#:tests? #f ; requires a 1.4G test file
12109 #:make-flags
12110 (list (string-append "SAMTOOLS_DIR="
12111 (assoc-ref %build-inputs "samtools")
12112 "/lib/"))
12113 #:phases
12114 (modify-phases %standard-phases
12115 (replace 'configure
12116 (lambda* (#:key inputs #:allow-other-keys)
12117 (substitute* "makefile"
12118 (("-I \\$\\{SAMTOOLS_DIR\\}")
12119 (string-append "-I" (assoc-ref inputs "samtools")
12120 "/include/samtools"))
12121 (("-lz ") "-lz -lpthread "))
12122 #t))
12123 (replace 'install
12124 (lambda* (#:key outputs #:allow-other-keys)
12125 (for-each (lambda (tool)
12126 (install-file tool
12127 (string-append (assoc-ref outputs "out")
12128 "/bin")))
12129 '("j_count" "b_count" "sjcount"))
12130 #t)))))
12131 (inputs
12132 `(("samtools" ,samtools-0.1)
12133 ("zlib" ,zlib)))
12134 (home-page "https://github.com/pervouchine/sjcount-full/")
12135 (synopsis "Annotation-agnostic splice junction counting pipeline")
12136 (description "Sjcount is a utility for fast quantification of splice
12137 junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
12138 version does count multisplits.")
12139 (license license:gpl3+))))
12140
12141 (define-public minimap2
12142 (package
12143 (name "minimap2")
12144 (version "2.17")
12145 (source
12146 (origin
12147 (method url-fetch)
12148 (uri (string-append "https://github.com/lh3/minimap2/"
12149 "releases/download/v" version "/"
12150 "minimap2-" version ".tar.bz2"))
12151 (sha256
12152 (base32
12153 "0hi7i9pzxhvjj44khzzzj1lrn5gb5837arr4wgln7k1k5n4ci2mn"))
12154 (patches (search-patches "minimap2-aarch64-support.patch"))))
12155 (build-system gnu-build-system)
12156 (arguments
12157 `(#:tests? #f ; there are none
12158 #:make-flags
12159 (list (string-append "CC=" ,(cc-for-target))
12160 (let ((system ,(or (%current-target-system)
12161 (%current-system))))
12162 (cond
12163 ((string-prefix? "x86_64" system)
12164 "all")
12165 ((or (string-prefix? "i586" system)
12166 (string-prefix? "i686" system))
12167 "sse2only=1")
12168 ((string-prefix? "armhf" system)
12169 "arm_neon=1")
12170 ((string-prefix? "aarch64" system)
12171 "aarch64=1")
12172 (else ""))))
12173 #:phases
12174 (modify-phases %standard-phases
12175 (delete 'configure)
12176 (replace 'install
12177 (lambda* (#:key outputs #:allow-other-keys)
12178 (let* ((out (assoc-ref outputs "out"))
12179 (bin (string-append out "/bin"))
12180 (man (string-append out "/share/man/man1")))
12181 (install-file "minimap2" bin)
12182 (mkdir-p man)
12183 (install-file "minimap2.1" man))
12184 #t)))))
12185 (inputs
12186 `(("zlib" ,zlib)))
12187 (home-page "https://lh3.github.io/minimap2/")
12188 (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
12189 (description "Minimap2 is a versatile sequence alignment program that
12190 aligns DNA or mRNA sequences against a large reference database. Typical use
12191 cases include:
12192
12193 @enumerate
12194 @item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
12195 @item finding overlaps between long reads with error rate up to ~15%;
12196 @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
12197 reads against a reference genome;
12198 @item aligning Illumina single- or paired-end reads;
12199 @item assembly-to-assembly alignment;
12200 @item full-genome alignment between two closely related species with
12201 divergence below ~15%.
12202 @end enumerate\n")
12203 (license license:expat)))
12204
12205 (define-public miniasm
12206 (package
12207 (name "miniasm")
12208 (version "0.3")
12209 (source (origin
12210 (method git-fetch)
12211 (uri (git-reference
12212 (url "https://github.com/lh3/miniasm")
12213 (commit (string-append "v" version))))
12214 (file-name (git-file-name name version))
12215 (sha256
12216 (base32
12217 "04dv5wv8bhsw1imxwyd438bnn9kby7svp44nbcz8lsadzjjci5gs"))))
12218 (build-system gnu-build-system)
12219 (inputs
12220 `(("zlib" ,zlib)))
12221 (arguments
12222 `(#:tests? #f ; There are no tests.
12223 #:phases
12224 (modify-phases %standard-phases
12225 (delete 'configure)
12226 (replace 'install
12227 (lambda* (#:key inputs outputs #:allow-other-keys)
12228 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
12229 (install-file "miniasm" bin)
12230 (install-file "minidot" bin)
12231 #t))))))
12232 (home-page "https://github.com/lh3/miniasm")
12233 (synopsis "Ultrafast de novo assembly for long noisy reads")
12234 (description "Miniasm is a very fast OLC-based de novo assembler for noisy
12235 long reads. It takes all-vs-all read self-mappings (typically by minimap) as
12236 input and outputs an assembly graph in the GFA format. Different from
12237 mainstream assemblers, miniasm does not have a consensus step. It simply
12238 concatenates pieces of read sequences to generate the final unitig sequences.
12239 Thus the per-base error rate is similar to the raw input reads.")
12240 (license license:expat)))
12241
12242 (define-public bandage
12243 (package
12244 (name "bandage")
12245 (version "0.8.1")
12246 (source
12247 (origin
12248 (method git-fetch)
12249 (uri (git-reference
12250 (url "https://github.com/rrwick/Bandage")
12251 (commit (string-append "v" version))))
12252 (file-name (git-file-name name version))
12253 (sha256
12254 (base32 "1bbsn5f5x8wlspg4pbibqz6m5vin8c19nl224f3z3km0pkc97rwv"))))
12255 (build-system qt-build-system)
12256 (arguments
12257 `(#:phases
12258 (modify-phases %standard-phases
12259 (replace 'configure
12260 (lambda _
12261 (invoke "qmake" "Bandage.pro")))
12262 (replace 'check
12263 (lambda* (#:key tests? #:allow-other-keys)
12264 (when tests?
12265 (substitute* "tests/bandage_command_line_tests.sh"
12266 (("^bandagepath=.*")
12267 (string-append "bandagepath=" (getcwd) "/Bandage\n")))
12268 (with-directory-excursion "tests"
12269 (setenv "XDG_RUNTIME_DIR" (getcwd))
12270 (invoke "./bandage_command_line_tests.sh")))
12271 #t))
12272 (replace 'install
12273 (lambda* (#:key outputs #:allow-other-keys)
12274 (let ((out (assoc-ref outputs "out")))
12275 (install-file "Bandage" (string-append out "/bin"))
12276 #t))))))
12277 (inputs
12278 `(("qtbase" ,qtbase)
12279 ("qtsvg" ,qtsvg)))
12280 (native-inputs
12281 `(("imagemagick" ,imagemagick)))
12282 (home-page "https://rrwick.github.io/Bandage/")
12283 (synopsis
12284 "Bioinformatics Application for Navigating De novo Assembly Graphs Easily")
12285 (description "Bandage is a program for visualising de novo assembly graphs.
12286 It allows users to interact with the assembly graphs made by de novo assemblers
12287 such as Velvet, SPAdes, MEGAHIT and others. De novo assembly graphs contain not
12288 only assembled contigs but also the connections between those contigs, which
12289 were previously not easily accessible. Bandage visualises assembly graphs, with
12290 connections, using graph layout algorithms. Nodes in the drawn graph, which
12291 represent contigs, can be automatically labelled with their ID, length or depth.
12292 Users can interact with the graph by moving, labelling and colouring nodes.
12293 Sequence information can also be extracted directly from the graph viewer. By
12294 displaying connections between contigs, Bandage opens up new possibilities for
12295 analysing and improving de novo assemblies that are not possible by looking at
12296 contigs alone.")
12297 (license (list license:gpl2+ ; bundled ogdf
12298 license:gpl3+))))
12299
12300 (define-public r-circus
12301 (package
12302 (name "r-circus")
12303 (version "0.1.5")
12304 (source
12305 (origin
12306 (method git-fetch)
12307 (uri (git-reference
12308 (url "https://github.com/BIMSBbioinfo/ciRcus")
12309 (commit (string-append "v" version))))
12310 (file-name (git-file-name name version))
12311 (sha256
12312 (base32
12313 "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
12314 (build-system r-build-system)
12315 (propagated-inputs
12316 `(("r-annotationdbi" ,r-annotationdbi)
12317 ("r-annotationhub" ,r-annotationhub)
12318 ("r-biomart" ,r-biomart)
12319 ("r-data-table" ,r-data-table)
12320 ("r-dbi" ,r-dbi)
12321 ("r-genomicfeatures" ,r-genomicfeatures)
12322 ("r-genomicranges" ,r-genomicranges)
12323 ("r-ggplot2" ,r-ggplot2)
12324 ("r-hash" ,r-hash)
12325 ("r-iranges" ,r-iranges)
12326 ("r-rcolorbrewer" ,r-rcolorbrewer)
12327 ("r-rmysql" ,r-rmysql)
12328 ("r-s4vectors" ,r-s4vectors)
12329 ("r-stringr" ,r-stringr)
12330 ("r-summarizedexperiment" ,r-summarizedexperiment)))
12331 (native-inputs
12332 `(("r-knitr" ,r-knitr)))
12333 (home-page "https://github.com/BIMSBbioinfo/ciRcus")
12334 (synopsis "Annotation, analysis and visualization of circRNA data")
12335 (description "Circus is an R package for annotation, analysis and
12336 visualization of circRNA data. Users can annotate their circRNA candidates
12337 with host genes, gene featrues they are spliced from, and discriminate between
12338 known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
12339 can be calculated, and a number of descriptive plots easily generated.")
12340 (license license:artistic2.0)))
12341
12342 (define-public gffread
12343 ;; We cannot use the tagged release because it is not in sync with gclib.
12344 ;; See https://github.com/gpertea/gffread/issues/26
12345 (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
12346 (revision "1"))
12347 (package
12348 (name "gffread")
12349 (version (git-version "0.9.12" revision commit))
12350 (source
12351 (origin
12352 (method git-fetch)
12353 (uri (git-reference
12354 (url "https://github.com/gpertea/gffread")
12355 (commit commit)))
12356 (file-name (git-file-name name version))
12357 (sha256
12358 (base32
12359 "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
12360 (build-system gnu-build-system)
12361 (arguments
12362 `(#:tests? #f ; no check target
12363 #:make-flags
12364 (list "GCLDIR=gclib")
12365 #:phases
12366 (modify-phases %standard-phases
12367 (delete 'configure)
12368 (add-after 'unpack 'copy-gclib-source
12369 (lambda* (#:key inputs #:allow-other-keys)
12370 (mkdir-p "gclib")
12371 (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
12372 #t))
12373 ;; There is no install target
12374 (replace 'install
12375 (lambda* (#:key outputs #:allow-other-keys)
12376 (let* ((out (assoc-ref outputs "out"))
12377 (bin (string-append out "/bin")))
12378 (install-file "gffread" bin))
12379 #t)))))
12380 (native-inputs
12381 `(("gclib-source"
12382 ,(let ((version "0.10.3")
12383 (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
12384 (revision "1"))
12385 (origin
12386 (method git-fetch)
12387 (uri (git-reference
12388 (url "https://github.com/gpertea/gclib")
12389 (commit commit)))
12390 (file-name (git-file-name "gclib" version))
12391 (sha256
12392 (base32
12393 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
12394 (home-page "https://github.com/gpertea/gffread/")
12395 (synopsis "Parse and convert GFF/GTF files")
12396 (description
12397 "This package provides a GFF/GTF file parsing utility providing format
12398 conversions, region filtering, FASTA sequence extraction and more.")
12399 ;; gffread is under Expat, but gclib is under Artistic 2.0
12400 (license (list license:expat
12401 license:artistic2.0)))))
12402
12403 (define-public find-circ
12404 ;; The last release was in 2015. The license was clarified in 2017, so we
12405 ;; take the latest commit.
12406 (let ((commit "8655dca54970fcf7e92e22fbf57e1188724dda7d")
12407 (revision "1"))
12408 (package
12409 (name "find-circ")
12410 (version (git-version "1.2" revision commit))
12411 (source
12412 (origin
12413 (method git-fetch)
12414 (uri (git-reference
12415 (url "https://github.com/marvin-jens/find_circ")
12416 (commit commit)))
12417 (file-name (git-file-name name version))
12418 (sha256
12419 (base32
12420 "0p77pbqbclqr4srms34y1b9b4njybfpjiknc11ki84f3p8skb3cg"))))
12421 (build-system gnu-build-system)
12422 (arguments
12423 `(#:tests? #f ; there are none
12424 #:phases
12425 ;; There is no actual build system.
12426 (modify-phases %standard-phases
12427 (delete 'configure)
12428 (delete 'build)
12429 (replace 'install
12430 (lambda* (#:key outputs #:allow-other-keys)
12431 (let* ((out (assoc-ref outputs "out"))
12432 (bin (string-append out "/bin"))
12433 (path (getenv "PYTHONPATH")))
12434 (for-each (lambda (script)
12435 (install-file script bin)
12436 (wrap-program (string-append bin "/" script)
12437 `("PYTHONPATH" ":" prefix (,path))))
12438 '("cmp_bed.py"
12439 "find_circ.py"
12440 "maxlength.py"
12441 "merge_bed.py"
12442 "unmapped2anchors.py")))
12443 #t)))))
12444 (inputs
12445 `(("python2" ,python-2)
12446 ("python2-pysam" ,python2-pysam)
12447 ("python2-numpy" ,python2-numpy)))
12448 (home-page "https://github.com/marvin-jens/find_circ")
12449 (synopsis "circRNA detection from RNA-seq reads")
12450 (description "This package provides tools to detect head-to-tail
12451 spliced (back-spliced) sequencing reads, indicative of circular RNA (circRNA)
12452 in RNA-seq data.")
12453 (license license:gpl3))))
12454
12455 (define-public python-scanpy
12456 (package
12457 (name "python-scanpy")
12458 (version "1.4.6")
12459 (source
12460 (origin
12461 (method url-fetch)
12462 (uri (pypi-uri "scanpy" version))
12463 (sha256
12464 (base32
12465 "0s2b6cvaigx4wzw3850qb93sjwwxbzh22kpbp498zklc5rjpbz4l"))))
12466 (build-system python-build-system)
12467 (arguments
12468 `(#:phases
12469 (modify-phases %standard-phases
12470 (replace 'check
12471 (lambda* (#:key inputs #:allow-other-keys)
12472 ;; These tests require Internet access.
12473 (delete-file-recursively "scanpy/tests/notebooks")
12474 (delete-file "scanpy/tests/test_clustering.py")
12475 (delete-file "scanpy/tests/test_datasets.py")
12476
12477 ;; TODO: I can't get the plotting tests to work, even with Xvfb.
12478 (delete-file "scanpy/tests/test_plotting.py")
12479 (delete-file "scanpy/tests/test_preprocessing.py")
12480 (delete-file "scanpy/tests/test_read_10x.py")
12481
12482 (setenv "PYTHONPATH"
12483 (string-append (getcwd) ":"
12484 (getenv "PYTHONPATH")))
12485 (invoke "pytest")
12486 #t)))))
12487 (propagated-inputs
12488 `(("python-anndata" ,python-anndata)
12489 ("python-h5py" ,python-h5py)
12490 ("python-igraph" ,python-igraph)
12491 ("python-joblib" ,python-joblib)
12492 ("python-legacy-api-wrap" ,python-legacy-api-wrap)
12493 ("python-louvain" ,python-louvain)
12494 ("python-matplotlib" ,python-matplotlib)
12495 ("python-natsort" ,python-natsort)
12496 ("python-networkx" ,python-networkx)
12497 ("python-numba" ,python-numba)
12498 ("python-packaging" ,python-packaging)
12499 ("python-pandas" ,python-pandas)
12500 ("python-patsy" ,python-patsy)
12501 ("python-scikit-learn" ,python-scikit-learn)
12502 ("python-scipy" ,python-scipy)
12503 ("python-seaborn" ,python-seaborn)
12504 ("python-statsmodels" ,python-statsmodels)
12505 ("python-tables" ,python-tables)
12506 ("python-tqdm" ,python-tqdm)
12507 ("python-umap-learn" ,python-umap-learn)))
12508 (native-inputs
12509 `(("python-pytest" ,python-pytest)
12510 ("python-setuptools-scm" ,python-setuptools-scm)))
12511 (home-page "https://github.com/theislab/scanpy")
12512 (synopsis "Single-Cell Analysis in Python.")
12513 (description "Scanpy is a scalable toolkit for analyzing single-cell gene
12514 expression data. It includes preprocessing, visualization, clustering,
12515 pseudotime and trajectory inference and differential expression testing. The
12516 Python-based implementation efficiently deals with datasets of more than one
12517 million cells.")
12518 (license license:bsd-3)))
12519
12520 (define-public python-bbknn
12521 (package
12522 (name "python-bbknn")
12523 (version "1.3.6")
12524 (source
12525 (origin
12526 (method url-fetch)
12527 (uri (pypi-uri "bbknn" version))
12528 (sha256
12529 (base32
12530 "1jbsh01f57zj4bhvjr3jh4532zznqd6nccmgrl3qi9gnhkf7c4y0"))))
12531 (build-system python-build-system)
12532 (arguments
12533 `(#:tests? #f)) ; TODO: Enable after migration to scikit-learn.
12534 (propagated-inputs
12535 `(("python-annoy" ,python-annoy)
12536 ("python-cython" ,python-cython)
12537 ("python-numpy" ,python-numpy)
12538 ("python-scipy" ,python-scipy)
12539 ("python-umap-learn" ,python-umap-learn)))
12540 (home-page "https://github.com/Teichlab/bbknn")
12541 (synopsis "Batch balanced KNN")
12542 (description "BBKNN is a batch effect removal tool that can be directly
12543 used in the Scanpy workflow. It serves as an alternative to
12544 @code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour
12545 graph for subsequent use in clustering, pseudotime and UMAP visualisation. If
12546 technical artifacts are present in the data, they will make it challenging to
12547 link corresponding cell types across different batches. BBKNN actively
12548 combats this effect by splitting your data into batches and finding a smaller
12549 number of neighbours for each cell within each of the groups. This helps
12550 create connections between analogous cells in different batches without
12551 altering the counts or PCA space.")
12552 (license license:expat)))
12553
12554 (define-public python-drep
12555 (package
12556 (name "python-drep")
12557 (version "3.2.0")
12558 (source
12559 (origin
12560 (method url-fetch)
12561 (uri (pypi-uri "drep" version))
12562 (sha256
12563 (base32
12564 "08vk0x6v5c5n7afgd5pcjhsvb424absypxy22hw1cm1n9kirbi77"))))
12565 (build-system python-build-system)
12566 (propagated-inputs
12567 `(("python-biopython" ,python-biopython)
12568 ("python-matplotlib" ,python-matplotlib)
12569 ("python-numpy" ,python-numpy)
12570 ("python-pandas" ,python-pandas)
12571 ("python-pytest" ,python-pytest)
12572 ("python-scikit-learn" ,python-scikit-learn)
12573 ("python-seaborn" ,python-seaborn)
12574 ("python-tqdm" ,python-tqdm)))
12575 (home-page "https://github.com/MrOlm/drep")
12576 (synopsis "De-replication of microbial genomes assembled from multiple samples")
12577 (description
12578 "dRep is a Python program for rapidly comparing large numbers of genomes.
12579 dRep can also \"de-replicate\" a genome set by identifying groups of highly
12580 similar genomes and choosing the best representative genome for each genome
12581 set.")
12582 (license license:expat)))
12583
12584 (define-public instrain
12585 (package
12586 (name "instrain")
12587 (version "1.5.2")
12588 (source
12589 (origin
12590 (method url-fetch)
12591 (uri (pypi-uri "inStrain" version))
12592 (sha256
12593 (base32
12594 "0ykqlpf6yz4caihsaz3ys00cyvlr7wdj4s9a8rh56q5r8xf80ic0"))))
12595 (build-system python-build-system)
12596 (arguments
12597 `(#:phases
12598 (modify-phases %standard-phases
12599 (add-after 'unpack 'patch-relative-imports
12600 (lambda _
12601 (substitute* "docker/run_instrain.py"
12602 (("from s3_utils")
12603 "from .s3_utils")
12604 (("from job_utils")
12605 "from .job_utils")))))))
12606 (inputs
12607 `(("python-biopython" ,python-biopython)
12608 ("python-boto3" ,python-boto3)
12609 ("python-h5py" ,python-h5py)
12610 ("python-lmfit" ,python-lmfit)
12611 ("python-matplotlib" ,python-matplotlib)
12612 ("python-networkx" ,python-networkx)
12613 ("python-numba" ,python-numba)
12614 ("python-numpy" ,python-numpy)
12615 ("python-pandas" ,python-pandas)
12616 ("python-psutil" ,python-psutil)
12617 ("python-pysam" ,python-pysam)
12618 ("python-scikit-learn" ,python-scikit-learn)
12619 ("python-seaborn" ,python-seaborn)
12620 ("python-tqdm" ,python-tqdm)
12621 ;; drep is needed for deprecated plot utilities
12622 ("python-drep" ,python-drep)))
12623 (native-inputs
12624 `(("python-pytest" ,python-pytest)))
12625 (home-page "https://github.com/MrOlm/inStrain")
12626 (synopsis "Calculation of strain-level metrics")
12627 (description
12628 "inStrain is a Python program for analysis of co-occurring genome
12629 populations from metagenomes that allows highly accurate genome comparisons,
12630 analysis of coverage, microdiversity, and linkage, and sensitive SNP detection
12631 with gene localization and synonymous non-synonymous identification.")
12632 ;; The tool itself says that the license is "MIT", but the repository
12633 ;; contains a LICENSE file with the GPLv3.
12634 ;; See https://github.com/MrOlm/inStrain/issues/51
12635 (license license:expat)))
12636
12637 (define-public gffcompare
12638 (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
12639 (revision "1"))
12640 (package
12641 (name "gffcompare")
12642 (version (git-version "0.10.15" revision commit))
12643 (source
12644 (origin
12645 (method git-fetch)
12646 (uri (git-reference
12647 (url "https://github.com/gpertea/gffcompare/")
12648 (commit commit)))
12649 (file-name (git-file-name name version))
12650 (sha256
12651 (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
12652 (build-system gnu-build-system)
12653 (arguments
12654 `(#:tests? #f ; no check target
12655 #:phases
12656 (modify-phases %standard-phases
12657 (delete 'configure)
12658 (add-before 'build 'copy-gclib-source
12659 (lambda* (#:key inputs #:allow-other-keys)
12660 (mkdir "../gclib")
12661 (copy-recursively
12662 (assoc-ref inputs "gclib-source") "../gclib")
12663 #t))
12664 (replace 'install
12665 (lambda* (#:key outputs #:allow-other-keys)
12666 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
12667 (install-file "gffcompare" bin)
12668 #t))))))
12669 (native-inputs
12670 `(("gclib-source" ; see 'README.md' of gffcompare
12671 ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
12672 (revision "1")
12673 (name "gclib")
12674 (version (git-version "0.10.3" revision commit)))
12675 (origin
12676 (method git-fetch)
12677 (uri (git-reference
12678 (url "https://github.com/gpertea/gclib/")
12679 (commit commit)))
12680 (file-name (git-file-name name version))
12681 (sha256
12682 (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
12683 (home-page "https://github.com/gpertea/gffcompare/")
12684 (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
12685 (description
12686 "@code{gffcompare} is a tool that can:
12687 @enumerate
12688 @item compare and evaluate the accuracy of RNA-Seq transcript assemblers
12689 (Cufflinks, Stringtie);
12690 @item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
12691 resulted from assembly of different samples);
12692 @item classify transcripts from one or multiple GTF/GFF3 files as they relate to
12693 reference transcripts provided in a annotation file (also in GTF/GFF3 format).
12694 @end enumerate")
12695 (license
12696 (list
12697 license:expat ;license for gffcompare
12698 license:artistic2.0))))) ;license for gclib
12699
12700 (define-public intervaltree
12701 (let ((commit "b90527f9e6d51cd36ecbb50429e4524d3a418ea5"))
12702 (package
12703 (name "intervaltree")
12704 (version (git-version "0.0.0" "1" commit))
12705 (source
12706 (origin
12707 (method git-fetch)
12708 (uri (git-reference
12709 (url "https://github.com/ekg/intervaltree/")
12710 (commit commit)))
12711 (file-name (git-file-name name version))
12712 (sha256
12713 (base32 "0rgv6q5fl4x5d74n6p5wvdna6zmbdbqpb4jqqh6vq3670gn08xad"))))
12714 (build-system gnu-build-system)
12715 (arguments
12716 '(#:tests? #f ; No tests.
12717 #:make-flags (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
12718 "DESTDIR=\"\"")
12719 #:phases
12720 (modify-phases %standard-phases
12721 (delete 'configure)))) ; There is no configure phase.
12722 (home-page "https://github.com/ekg/intervaltree")
12723 (synopsis "Minimal C++ interval tree implementation")
12724 (description "An interval tree can be used to efficiently find a set of
12725 numeric intervals overlapping or containing another interval. This library
12726 provides a basic implementation of an interval tree using C++ templates,
12727 allowing the insertion of arbitrary types into the tree.")
12728 (license license:expat))))
12729
12730 (define-public python-intervaltree
12731 (package
12732 (name "python-intervaltree")
12733 (version "3.0.2")
12734 (source
12735 (origin
12736 (method url-fetch)
12737 (uri (pypi-uri "intervaltree" version))
12738 (sha256
12739 (base32
12740 "0wz234g6irlm4hivs2qzmnywk0ss06ckagwh15nflkyb3p462kyb"))))
12741 (build-system python-build-system)
12742 (arguments
12743 `(#:phases
12744 (modify-phases %standard-phases
12745 ;; pytest seems to have a check to make sure the user is testing
12746 ;; their checked-out code and not an installed, potentially
12747 ;; out-of-date copy. This is harmless here, since we just installed
12748 ;; the package, so we disable the check to avoid skipping tests
12749 ;; entirely.
12750 (add-before 'check 'import-mismatch-error-workaround
12751 (lambda _
12752 (setenv "PY_IGNORE_IMPORTMISMATCH" "1")
12753 #t)))))
12754 (propagated-inputs
12755 `(("python-sortedcontainers" ,python-sortedcontainers)))
12756 (native-inputs
12757 `(("python-pytest" ,python-pytest)))
12758 (home-page "https://github.com/chaimleib/intervaltree")
12759 (synopsis "Editable interval tree data structure")
12760 (description
12761 "This package provides a mutable, self-balancing interval tree
12762 implementation for Python. Queries may be by point, by range overlap, or by
12763 range envelopment. This library was designed to allow tagging text and time
12764 intervals, where the intervals include the lower bound but not the upper
12765 bound.")
12766 (license license:asl2.0)))
12767
12768 (define-public python-pypairix
12769 (package
12770 (name "python-pypairix")
12771 (version "0.3.7")
12772 ;; The tarball on pypi does not include the makefile to build the
12773 ;; programs.
12774 (source
12775 (origin
12776 (method git-fetch)
12777 (uri (git-reference
12778 (url "https://github.com/4dn-dcic/pairix")
12779 (commit version)))
12780 (file-name (git-file-name name version))
12781 (sha256
12782 (base32
12783 "1snr3lrmsld8sy77ng6ba6wcmd33xjccf1l2f3m6pi29xis9nd6p"))))
12784 (build-system python-build-system)
12785 (arguments
12786 `(#:phases
12787 (modify-phases %standard-phases
12788 (add-before 'build 'build-programs
12789 (lambda _ (invoke "make")))
12790 (add-after 'install 'install-programs
12791 (lambda* (#:key outputs #:allow-other-keys)
12792 (copy-recursively "bin" (string-append
12793 (assoc-ref outputs "out")
12794 "/bin"))
12795 #t)))))
12796 (inputs
12797 `(("zlib" ,zlib)))
12798 (home-page "https://github.com/4dn-dcic/pairix")
12799 (synopsis "Support for querying pairix-indexed bgzipped text files")
12800 (description
12801 "Pypairix is a Python module for fast querying on a pairix-indexed
12802 bgzipped text file that contains a pair of genomic coordinates per line.")
12803 (license license:expat)))
12804
12805 (define-public python-pyfaidx
12806 (package
12807 (name "python-pyfaidx")
12808 (version "0.5.8")
12809 (source
12810 (origin
12811 (method url-fetch)
12812 (uri (pypi-uri "pyfaidx" version))
12813 (sha256
12814 (base32
12815 "038xi3a6zvrxbyyfpp64ka8pcjgsdq4fgw9cl5lpxbvmm1bzzw2q"))))
12816 (build-system python-build-system)
12817 (propagated-inputs
12818 `(("python-six" ,python-six)))
12819 (home-page "http://mattshirley.com")
12820 (synopsis "Random access to fasta subsequences")
12821 (description
12822 "This package provides procedures for efficient pythonic random access to
12823 fasta subsequences.")
12824 (license license:bsd-3)))
12825
12826 (define-public python2-pyfaidx
12827 (package-with-python2 python-pyfaidx))
12828
12829 (define-public python-cooler
12830 (package
12831 (name "python-cooler")
12832 (version "0.8.7")
12833 (source
12834 (origin
12835 (method url-fetch)
12836 (uri (pypi-uri "cooler" version))
12837 (sha256
12838 (base32
12839 "01g6gqix9ba27sappz6nfyiwabzrlf8i5fn8kwcz8ra356cq9crp"))))
12840 (build-system python-build-system)
12841 (propagated-inputs
12842 `(("python-asciitree" ,python-asciitree)
12843 ("python-biopython" ,python-biopython)
12844 ("python-click" ,python-click)
12845 ("python-cytoolz" ,python-cytoolz)
12846 ("python-dask" ,python-dask)
12847 ("python-h5py" ,python-h5py)
12848 ("python-multiprocess" ,python-multiprocess)
12849 ("python-numpy" ,python-numpy)
12850 ("python-pandas" ,python-pandas)
12851 ("python-pyfaidx" ,python-pyfaidx)
12852 ("python-pypairix" ,python-pypairix)
12853 ("python-pysam" ,python-pysam)
12854 ("python-pyyaml" ,python-pyyaml)
12855 ("python-scipy" ,python-scipy)
12856 ("python-simplejson" ,python-simplejson)))
12857 (native-inputs
12858 `(("python-mock" ,python-mock)
12859 ("python-pytest" ,python-pytest)))
12860 (home-page "https://github.com/mirnylab/cooler")
12861 (synopsis "Sparse binary format for genomic interaction matrices")
12862 (description
12863 "Cooler is a support library for a sparse, compressed, binary persistent
12864 storage format, called @code{cool}, used to store genomic interaction data,
12865 such as Hi-C contact matrices.")
12866 (license license:bsd-3)))
12867
12868 (define-public python-hicmatrix
12869 (package
12870 (name "python-hicmatrix")
12871 (version "12")
12872 (source
12873 (origin
12874 ;; Version 12 is not available on pypi.
12875 (method git-fetch)
12876 (uri (git-reference
12877 (url "https://github.com/deeptools/HiCMatrix")
12878 (commit version)))
12879 (file-name (git-file-name name version))
12880 (sha256
12881 (base32
12882 "1xhdyx16f3brgxgxybixdi64ki8nbbkq5vk4h9ahi11pzpjfn1pj"))))
12883 (build-system python-build-system)
12884 (arguments
12885 `(#:phases
12886 (modify-phases %standard-phases
12887 (add-after 'unpack 'relax-requirements
12888 (lambda _
12889 (substitute* '("requirements.txt"
12890 "setup.py")
12891 (("cooler *=+ *0.8.5")
12892 "cooler==0.8.*"))
12893 #t)))))
12894 (propagated-inputs
12895 `(("python-cooler" ,python-cooler)
12896 ("python-intervaltree" ,python-intervaltree)
12897 ("python-numpy" ,python-numpy)
12898 ("python-pandas" ,python-pandas)
12899 ("python-scipy" ,python-scipy)
12900 ("python-tables" ,python-tables)))
12901 (home-page "https://github.com/deeptools/HiCMatrix/")
12902 (synopsis "HiCMatrix class for HiCExplorer and pyGenomeTracks")
12903 (description
12904 "This helper package implements the @code{HiCMatrix} class for
12905 the HiCExplorer and pyGenomeTracks packages.")
12906 (license license:gpl3+)))
12907
12908 (define-public python-hicexplorer
12909 (package
12910 (name "python-hicexplorer")
12911 (version "2.1.4")
12912 (source
12913 (origin
12914 ;; The latest version is not available on Pypi.
12915 (method git-fetch)
12916 (uri (git-reference
12917 (url "https://github.com/deeptools/HiCExplorer")
12918 (commit version)))
12919 (file-name (git-file-name name version))
12920 (sha256
12921 (base32
12922 "0q5gpbzmrkvygqgw524q36b4nrivcmyi5v194vsx0qw7b3gcmq08"))))
12923 (build-system python-build-system)
12924 (arguments
12925 `(#:phases
12926 (modify-phases %standard-phases
12927 (add-after 'unpack 'loosen-up-requirements
12928 (lambda _
12929 (substitute* "setup.py"
12930 (("==") ">="))
12931 #t)))))
12932 (propagated-inputs
12933 `(("python-biopython" ,python-biopython)
12934 ("python-configparser" ,python-configparser)
12935 ("python-cooler" ,python-cooler)
12936 ("python-future" ,python-future)
12937 ("python-intervaltree" ,python-intervaltree)
12938 ("python-jinja2" ,python-jinja2)
12939 ("python-matplotlib" ,python-matplotlib)
12940 ("python-numpy" ,python-numpy)
12941 ("python-pandas" ,python-pandas)
12942 ("python-pybigwig" ,python-pybigwig)
12943 ("python-pysam" ,python-pysam)
12944 ("python-scipy" ,python-scipy)
12945 ("python-six" ,python-six)
12946 ("python-tables" ,python-tables)
12947 ("python-unidecode" ,python-unidecode)))
12948 (home-page "https://hicexplorer.readthedocs.io")
12949 (synopsis "Process, analyze and visualize Hi-C data")
12950 (description
12951 "HiCExplorer is a powerful and easy to use set of tools to process,
12952 normalize and visualize Hi-C data. HiCExplorer facilitates the creation of
12953 contact matrices, correction of contacts, TAD detection, A/B compartments,
12954 merging, reordering or chromosomes, conversion from different formats
12955 including cooler and detection of long-range contacts. Moreover, it allows
12956 the visualization of multiple contact matrices along with other types of data
12957 like genes, compartments, ChIP-seq coverage tracks (and in general any type of
12958 genomic scores), long range contacts and the visualization of viewpoints.")
12959 (license license:gpl3)))
12960
12961 (define-public python-pygenometracks
12962 (package
12963 (name "python-pygenometracks")
12964 (version "3.3")
12965 (source
12966 (origin
12967 (method url-fetch)
12968 (uri (pypi-uri "pyGenomeTracks" version))
12969 (sha256
12970 (base32
12971 "16laa0wnf4qn9fb9ych4w1vqhqwjss70v0y0f6wp4gwqfrlgac0f"))))
12972 (build-system python-build-system)
12973 (arguments
12974 `(#:tests? #f ; there are none
12975 #:phases
12976 (modify-phases %standard-phases
12977 (add-after 'unpack 'relax-requirements
12978 (lambda _
12979 (substitute* "setup.py"
12980 (("matplotlib ==3.1.1")
12981 "matplotlib >=3.1.1"))
12982 #t)))))
12983 (propagated-inputs
12984 `(("python-future" ,python-future)
12985 ("python-gffutils" ,python-gffutils)
12986 ("python-hicmatrix" ,python-hicmatrix)
12987 ("python-intervaltree" ,python-intervaltree)
12988 ("python-matplotlib" ,python-matplotlib)
12989 ("python-numpy" ,python-numpy)
12990 ("python-pybigwig" ,python-pybigwig)
12991 ("python-pysam" ,python-pysam)
12992 ("python-tqdm" ,python-tqdm)))
12993 (native-inputs
12994 `(("python-pytest" ,python-pytest)))
12995 (home-page "https://pygenometracks.readthedocs.io")
12996 (synopsis "Program and library to plot beautiful genome browser tracks")
12997 (description
12998 "This package aims to produce high-quality genome browser tracks that
12999 are highly customizable. Currently, it is possible to plot: bigwig, bed (many
13000 options), bedgraph, links (represented as arcs), and Hi-C matrices.
13001 pyGenomeTracks can make plots with or without Hi-C data.")
13002 (license license:gpl3+)))
13003
13004 (define-public python-hic2cool
13005 (package
13006 (name "python-hic2cool")
13007 (version "0.4.2")
13008 (source
13009 (origin
13010 (method url-fetch)
13011 (uri (pypi-uri "hic2cool" version))
13012 (sha256
13013 (base32
13014 "0xy6mhfns2lzib1kcr6419jjp6pmh0qx8z8na55lmiwn0ds8q9cl"))))
13015 (build-system python-build-system)
13016 (arguments '(#:tests? #f)) ; no tests included
13017 (propagated-inputs
13018 `(("python-cooler" ,python-cooler)))
13019 (home-page "https://github.com/4dn-dcic/hic2cool")
13020 (synopsis "Converter for .hic and .cool files")
13021 (description
13022 "This package provides a converter between @code{.hic} files (from
13023 juicer) and single-resolution or multi-resolution @code{.cool} files (for
13024 cooler). Both @code{hic} and @code{cool} files describe Hi-C contact
13025 matrices.")
13026 (license license:expat)))
13027
13028 (define-public r-pore
13029 (package
13030 (name "r-pore")
13031 (version "0.24")
13032 (source
13033 (origin
13034 (method url-fetch)
13035 (uri
13036 (string-append "mirror://sourceforge/rpore/" version
13037 "/poRe_" version ".tar.gz"))
13038 (sha256
13039 (base32 "0pih9nljbv8g4x8rkk29i7aqq681b782r5s5ynp4nw9yzqnmmksv"))))
13040 (properties `((upstream-name . "poRe")))
13041 (build-system r-build-system)
13042 (propagated-inputs
13043 `(("r-bit64" ,r-bit64)
13044 ("r-data-table" ,r-data-table)
13045 ("r-rhdf5" ,r-rhdf5)
13046 ("r-shiny" ,r-shiny)
13047 ("r-svdialogs" ,r-svdialogs)))
13048 (home-page "https://sourceforge.net/projects/rpore/")
13049 (synopsis "Visualize Nanopore sequencing data")
13050 (description
13051 "This package provides graphical user interfaces to organize and visualize Nanopore
13052 sequencing data.")
13053 ;; This is free software but the license variant is unclear:
13054 ;; <https://github.com/mw55309/poRe_docs/issues/10>.
13055 (license license:bsd-3)))
13056
13057 (define-public r-xbioc
13058 (let ((revision "1")
13059 (commit "6ff0670a37ab3036aaf1d94aa4b208310946b0b5"))
13060 (package
13061 (name "r-xbioc")
13062 (version (git-version "0.1.16" revision commit))
13063 (source (origin
13064 (method git-fetch)
13065 (uri (git-reference
13066 (url "https://github.com/renozao/xbioc")
13067 (commit commit)))
13068 (file-name (git-file-name name version))
13069 (sha256
13070 (base32
13071 "0w8bsq5myiwkfhh83nm6is5ichiyvwa1axx2szvxnzq39x6knf66"))))
13072 (build-system r-build-system)
13073 (propagated-inputs
13074 `(("r-annotationdbi" ,r-annotationdbi)
13075 ("r-assertthat" ,r-assertthat)
13076 ("r-biobase" ,r-biobase)
13077 ("r-biocmanager" ,r-biocmanager)
13078 ("r-digest" ,r-digest)
13079 ("r-pkgmaker" ,r-pkgmaker)
13080 ("r-plyr" ,r-plyr)
13081 ("r-reshape2" ,r-reshape2)
13082 ("r-stringr" ,r-stringr)))
13083 (home-page "https://github.com/renozao/xbioc/")
13084 (synopsis "Extra base functions for Bioconductor")
13085 (description "This package provides extra utility functions to perform
13086 common tasks in the analysis of omics data, leveraging and enhancing features
13087 provided by Bioconductor packages.")
13088 (license license:gpl3+))))
13089
13090 (define-public r-cssam
13091 (let ((revision "1")
13092 (commit "9ec58c982fa551af0d80b1a266890d92954833f2"))
13093 (package
13094 (name "r-cssam")
13095 (version (git-version "1.4" revision commit))
13096 (source (origin
13097 (method git-fetch)
13098 (uri (git-reference
13099 (url "https://github.com/shenorrLab/csSAM")
13100 (commit commit)))
13101 (file-name (git-file-name name version))
13102 (sha256
13103 (base32
13104 "128syf9v39gk0z3ip000qpsjbg6l1siyq6c8b0hz41dzg5achyb3"))))
13105 (build-system r-build-system)
13106 (propagated-inputs
13107 `(("r-formula" ,r-formula)
13108 ("r-ggplot2" ,r-ggplot2)
13109 ("r-pkgmaker" ,r-pkgmaker)
13110 ("r-plyr" ,r-plyr)
13111 ("r-rngtools" ,r-rngtools)
13112 ("r-scales" ,r-scales)))
13113 (home-page "https://github.com/shenorrLab/csSAM/")
13114 (synopsis "Cell type-specific statistical analysis of microarray")
13115 (description "This package implements the method csSAM that computes
13116 cell-specific differential expression from measured cell proportions using
13117 SAM.")
13118 ;; Any version
13119 (license license:lgpl2.1+))))
13120
13121 (define-public r-bseqsc
13122 (let ((revision "1")
13123 (commit "fef3f3e38dcf3df37103348b5780937982b43b98"))
13124 (package
13125 (name "r-bseqsc")
13126 (version (git-version "1.0" revision commit))
13127 (source (origin
13128 (method git-fetch)
13129 (uri (git-reference
13130 (url "https://github.com/shenorrLab/bseqsc")
13131 (commit commit)))
13132 (file-name (git-file-name name version))
13133 (sha256
13134 (base32
13135 "1prw13wa20f7wlc3gkkls66n1kxz8d28qrb8icfqdwdnnv8w5qg8"))))
13136 (build-system r-build-system)
13137 (propagated-inputs
13138 `(("r-abind" ,r-abind)
13139 ("r-annotationdbi" ,r-annotationdbi)
13140 ("r-biobase" ,r-biobase)
13141 ("r-cssam" ,r-cssam)
13142 ("r-dplyr" ,r-dplyr)
13143 ("r-e1071" ,r-e1071)
13144 ("r-edger" ,r-edger)
13145 ("r-ggplot2" ,r-ggplot2)
13146 ("r-nmf" ,r-nmf)
13147 ("r-openxlsx" ,r-openxlsx)
13148 ("r-pkgmaker" ,r-pkgmaker)
13149 ("r-plyr" ,r-plyr)
13150 ("r-preprocesscore" ,r-preprocesscore)
13151 ("r-rngtools" ,r-rngtools)
13152 ("r-scales" ,r-scales)
13153 ("r-stringr" ,r-stringr)
13154 ("r-xbioc" ,r-xbioc)))
13155 (home-page "https://github.com/shenorrLab/bseqsc")
13156 (synopsis "Deconvolution of bulk sequencing experiments using single cell data")
13157 (description "BSeq-sc is a bioinformatics analysis pipeline that
13158 leverages single-cell sequencing data to estimate cell type proportion and
13159 cell type-specific gene expression differences from RNA-seq data from bulk
13160 tissue samples. This is a companion package to the publication \"A
13161 single-cell transcriptomic map of the human and mouse pancreas reveals inter-
13162 and intra-cell population structure.\" Baron et al. Cell Systems (2016)
13163 @url{https://www.ncbi.nlm.nih.gov/pubmed/27667365}.")
13164 (license license:gpl2+))))
13165
13166 (define-public porechop
13167 ;; The recommended way to install is to clone the git repository
13168 ;; https://github.com/rrwick/Porechop#installation
13169 (let ((commit "289d5dca4a5fc327f97b3f8cecb68ecaf1014861")
13170 (revision "1"))
13171 (package
13172 (name "porechop")
13173 (version (git-version "0.2.3" revision commit))
13174 (source
13175 (origin
13176 (method git-fetch)
13177 (uri (git-reference
13178 (url "https://github.com/rrwick/Porechop")
13179 (commit commit)))
13180 (file-name (git-file-name name version))
13181 (sha256
13182 (base32 "05ps43gig0d3ia9x5lj84lb00hbsl6ba9n7y7jz927npxbr2ym23"))))
13183 (build-system python-build-system)
13184 (home-page "https://github.com/rrwick/porechop")
13185 (synopsis "Finding, trimming or splitting adapters, in Oxford Nanopore reads")
13186 (description
13187 "The porechop package is a tool for finding and removing adapters from Oxford
13188 Nanopore reads. Adapters on the ends of reads are trimmed off, and when a read
13189 has an adapter in its middle, it is treated as chimeric and chopped into
13190 separate reads. Porechop performs thorough alignments to effectively find
13191 adapters, even at low sequence identity. Porechop also supports demultiplexing
13192 of Nanopore reads that were barcoded with the Native Barcoding Kit, PCR
13193 Barcoding Kit or Rapid Barcoding Kit.")
13194 (license license:gpl3+))))
13195
13196 (define-public poretools
13197 ;; The latest release was in 2016 and the latest commit is from 2017
13198 ;; the recommended way to install is to clone the git repository
13199 ;; https://poretools.readthedocs.io/en/latest/content/installation.html
13200 (let ((commit "e426b1f09e86ac259a00c261c79df91510777407")
13201 (revision "1"))
13202 (package
13203 (name "poretools")
13204 (version (git-version "0.6.0" revision commit))
13205 (source
13206 (origin
13207 (method git-fetch)
13208 (uri (git-reference
13209 (url "https://github.com/arq5x/poretools")
13210 (commit commit)))
13211 (file-name (git-file-name name version))
13212 (sha256
13213 (base32 "0bglj833wxpp3cq430p1d3xp085ls221js2y90w7ir2x5ay8l7am"))))
13214 (build-system python-build-system)
13215 ;; requires python >=2.7, <3.0, and the same for python dependencies
13216 (arguments `(#:python ,python-2))
13217 (inputs
13218 `(("hdf5" ,hdf5)))
13219 (propagated-inputs
13220 `(("python-dateutil" ,python2-dateutil)
13221 ("python-h5py" ,python2-h5py)
13222 ("python-matplotlib" ,python2-matplotlib)
13223 ("python-pandas" ,python2-pandas)
13224 ("python-seaborn" ,python2-seaborn)))
13225 (home-page "https://poretools.readthedocs.io")
13226 (synopsis "Toolkit for working with nanopore sequencing data")
13227 (description
13228 "The MinION from Oxford Nanopore Technologies is a nanopore sequencer.
13229 This @code{poretools} package is a flexible toolkit for exploring datasets
13230 generated by nanopore sequencing devices for the purposes of quality control and
13231 downstream analysis. Poretools operates directly on the native FAST5, a variant
13232 of the Hierarchical Data Format (HDF5) standard.")
13233 (license license:expat))))
13234
13235 (define-public jamm
13236 (package
13237 (name "jamm")
13238 (version "1.0.7.6")
13239 (source
13240 (origin
13241 (method git-fetch)
13242 (uri (git-reference
13243 (url "https://github.com/mahmoudibrahim/JAMM")
13244 (commit (string-append "JAMMv" version))))
13245 (file-name (git-file-name name version))
13246 (sha256
13247 (base32
13248 "0bsa5mf9n9q5jz7mmacrra41l7r8rac5vgsn6wv1fb52ya58b970"))))
13249 (build-system gnu-build-system)
13250 (arguments
13251 `(#:tests? #f ; there are none
13252 #:phases
13253 (modify-phases %standard-phases
13254 (delete 'configure)
13255 (delete 'build)
13256 (replace 'install
13257 (lambda* (#:key inputs outputs #:allow-other-keys)
13258 (let* ((out (assoc-ref outputs "out"))
13259 (libexec (string-append out "/libexec/jamm"))
13260 (bin (string-append out "/bin")))
13261 (substitute* '("JAMM.sh"
13262 "SignalGenerator.sh")
13263 (("^sPath=.*")
13264 (string-append "sPath=\"" libexec "\"\n")))
13265 (for-each (lambda (file)
13266 (install-file file libexec))
13267 (list "bincalculator.r"
13268 "peakfinder.r"
13269 "peakhelper.r"
13270 "signalmaker.r"
13271 "xcorr.r"
13272 "xcorrhelper.r"
13273 ;; Perl scripts
13274 "peakfilter.pl"
13275 "readshifter.pl"))
13276
13277 (for-each
13278 (lambda (script)
13279 (chmod script #o555)
13280 (install-file script bin)
13281 (wrap-program (string-append bin "/" script)
13282 `("PATH" ":" prefix
13283 (,(string-append (assoc-ref inputs "coreutils") "/bin")
13284 ,(string-append (assoc-ref inputs "gawk") "/bin")
13285 ,(string-append (assoc-ref inputs "perl") "/bin")
13286 ,(string-append (assoc-ref inputs "r-minimal") "/bin")))
13287 `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))
13288 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
13289 (list "JAMM.sh" "SignalGenerator.sh")))
13290 #t)))))
13291 (inputs
13292 `(("bash" ,bash)
13293 ("coreutils" ,coreutils)
13294 ("gawk" ,gawk)
13295 ("perl" ,perl)
13296 ("r-minimal" ,r-minimal)
13297 ;;("r-parallel" ,r-parallel)
13298 ("r-signal" ,r-signal)
13299 ("r-mclust" ,r-mclust)))
13300 (home-page "https://github.com/mahmoudibrahim/JAMM")
13301 (synopsis "Peak finder for NGS datasets")
13302 (description
13303 "JAMM is a peak finder for next generation sequencing datasets (ChIP-Seq,
13304 ATAC-Seq, DNase-Seq, etc.) that can integrate replicates and assign peak
13305 boundaries accurately. JAMM is applicable to both broad and narrow
13306 datasets.")
13307 (license license:gpl3+)))
13308
13309 (define-public ngless
13310 (package
13311 (name "ngless")
13312 (version "1.1.0")
13313 (source
13314 (origin
13315 (method git-fetch)
13316 (uri (git-reference
13317 (url "https://gitlab.com/ngless/ngless.git")
13318 (commit (string-append "v" version))))
13319 (file-name (git-file-name name version))
13320 (sha256
13321 (base32
13322 "1wim8wpqyff080dfcazynrmjwqas38m24m0v350w245mmhrapdma"))))
13323 (build-system haskell-build-system)
13324 (arguments
13325 `(#:haddock? #f ; The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
13326 ; error: parse error on input import
13327 ; import Options.Applicative
13328 #:phases
13329 (modify-phases %standard-phases
13330 (add-after 'unpack 'create-Versions.hs
13331 (lambda _
13332 (substitute* "Makefile"
13333 (("BWA_VERSION = .*")
13334 (string-append "BWA_VERSION = "
13335 ,(package-version bwa) "\n"))
13336 (("SAM_VERSION = .*")
13337 (string-append "SAM_VERSION = "
13338 ,(package-version samtools) "\n"))
13339 (("PRODIGAL_VERSION = .*")
13340 (string-append "PRODIGAL_VERSION = "
13341 ,(package-version prodigal) "\n"))
13342 (("MINIMAP2_VERSION = .*")
13343 (string-append "MINIMAP2_VERSION = "
13344 ,(package-version minimap2) "\n")))
13345 (invoke "make" "NGLess/Dependencies/Versions.hs")
13346 #t))
13347 (add-after 'create-Versions.hs 'create-cabal-file
13348 (lambda _ (invoke "hpack") #t))
13349 ;; These tools are expected to be installed alongside ngless.
13350 (add-after 'install 'link-tools
13351 (lambda* (#:key inputs outputs #:allow-other-keys)
13352 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
13353 (symlink (string-append (assoc-ref inputs "prodigal")
13354 "/bin/prodigal")
13355 (string-append bin "ngless-" ,version "-prodigal"))
13356 (symlink (string-append (assoc-ref inputs "minimap2")
13357 "/bin/minimap2")
13358 (string-append bin "ngless-" ,version "-minimap2"))
13359 (symlink (string-append (assoc-ref inputs "samtools")
13360 "/bin/samtools")
13361 (string-append bin "ngless-" ,version "-samtools"))
13362 (symlink (string-append (assoc-ref inputs "bwa")
13363 "/bin/bwa")
13364 (string-append bin "ngless-" ,version "-bwa"))
13365 #t))))))
13366 (inputs
13367 `(("prodigal" ,prodigal)
13368 ("bwa" ,bwa)
13369 ("samtools" ,samtools)
13370 ("minimap2" ,minimap2)
13371 ("ghc-aeson" ,ghc-aeson)
13372 ("ghc-ansi-terminal" ,ghc-ansi-terminal)
13373 ("ghc-async" ,ghc-async)
13374 ("ghc-atomic-write" ,ghc-atomic-write)
13375 ("ghc-bytestring-lexing" ,ghc-bytestring-lexing)
13376 ("ghc-conduit" ,ghc-conduit)
13377 ("ghc-conduit-algorithms" ,ghc-conduit-algorithms)
13378 ("ghc-conduit-extra" ,ghc-conduit-extra)
13379 ("ghc-configurator" ,ghc-configurator)
13380 ("ghc-convertible" ,ghc-convertible)
13381 ("ghc-data-default" ,ghc-data-default)
13382 ("ghc-diagrams-core" ,ghc-diagrams-core)
13383 ("ghc-diagrams-lib" ,ghc-diagrams-lib)
13384 ("ghc-diagrams-svg" ,ghc-diagrams-svg)
13385 ("ghc-double-conversion" ,ghc-double-conversion)
13386 ("ghc-edit-distance" ,ghc-edit-distance)
13387 ("ghc-either" ,ghc-either)
13388 ("ghc-errors" ,ghc-errors)
13389 ("ghc-extra" ,ghc-extra)
13390 ("ghc-filemanip" ,ghc-filemanip)
13391 ("ghc-file-embed" ,ghc-file-embed)
13392 ("ghc-gitrev" ,ghc-gitrev)
13393 ("ghc-hashtables" ,ghc-hashtables)
13394 ("ghc-http-conduit" ,ghc-http-conduit)
13395 ("ghc-inline-c" ,ghc-inline-c)
13396 ("ghc-inline-c-cpp" ,ghc-inline-c-cpp)
13397 ("ghc-intervalmap" ,ghc-intervalmap)
13398 ("ghc-missingh" ,ghc-missingh)
13399 ("ghc-optparse-applicative" ,ghc-optparse-applicative)
13400 ("ghc-regex" ,ghc-regex)
13401 ("ghc-safe" ,ghc-safe)
13402 ("ghc-safeio" ,ghc-safeio)
13403 ("ghc-strict" ,ghc-strict)
13404 ("ghc-tar" ,ghc-tar)
13405 ("ghc-tar-conduit" ,ghc-tar-conduit)
13406 ("ghc-unliftio" ,ghc-unliftio)
13407 ("ghc-unliftio-core" ,ghc-unliftio-core)
13408 ("ghc-vector" ,ghc-vector)
13409 ("ghc-yaml" ,ghc-yaml)
13410 ("ghc-zlib" ,ghc-zlib)))
13411 (propagated-inputs
13412 `(("r-r6" ,r-r6)
13413 ("r-hdf5r" ,r-hdf5r)
13414 ("r-iterators" ,r-iterators)
13415 ("r-itertools" ,r-itertools)
13416 ("r-matrix" ,r-matrix)))
13417 (native-inputs
13418 `(("ghc-hpack" ,ghc-hpack)
13419 ("ghc-quickcheck" ,ghc-quickcheck)
13420 ("ghc-test-framework" ,ghc-test-framework)
13421 ("ghc-test-framework-hunit",ghc-test-framework-hunit)
13422 ("ghc-test-framework-quickcheck2" ,ghc-test-framework-quickcheck2)
13423 ("ghc-test-framework-th" ,ghc-test-framework-th)))
13424 (home-page "https://gitlab.com/ngless/ngless")
13425 (synopsis "DSL for processing next-generation sequencing data")
13426 (description "Ngless is a domain-specific language for
13427 @dfn{next-generation sequencing} (NGS) data processing.")
13428 (license license:expat)))
13429
13430 (define-public filtlong
13431 ;; The recommended way to install is to clone the git repository
13432 ;; https://github.com/rrwick/Filtlong#installation
13433 ;; and the lastest release is more than nine months old
13434 (let ((commit "d1bb46dfe8bc7efe6257b5ce222c04bfe8aedaab")
13435 (revision "1"))
13436 (package
13437 (name "filtlong")
13438 (version (git-version "0.2.0" revision commit))
13439 (source
13440 (origin
13441 (method git-fetch)
13442 (uri (git-reference
13443 (url "https://github.com/rrwick/Filtlong")
13444 (commit commit)))
13445 (file-name (git-file-name name version))
13446 (sha256
13447 (base32 "1xr92r820x8qlkcr3b57iw223yq8vjgyi42jr79w2xgw47qzr575"))))
13448 (build-system gnu-build-system)
13449 (arguments
13450 `(#:tests? #f ; no check target
13451 #:phases
13452 (modify-phases %standard-phases
13453 (delete 'configure)
13454 (replace 'install
13455 (lambda* (#:key outputs #:allow-other-keys)
13456 (let* ((out (assoc-ref outputs "out"))
13457 (bin (string-append out "/bin"))
13458 (scripts (string-append out "/share/filtlong/scripts")))
13459 (install-file "bin/filtlong" bin)
13460 (install-file "scripts/histogram.py" scripts)
13461 (install-file "scripts/read_info_histograms.sh" scripts))
13462 #t))
13463 (add-after 'install 'wrap-program
13464 (lambda* (#:key inputs outputs #:allow-other-keys)
13465 (let* ((out (assoc-ref outputs "out"))
13466 (path (getenv "PYTHONPATH")))
13467 (wrap-program (string-append out
13468 "/share/filtlong/scripts/histogram.py")
13469 `("PYTHONPATH" ":" prefix (,path))))
13470 #t))
13471 (add-before 'check 'patch-tests
13472 (lambda _
13473 (substitute* "scripts/read_info_histograms.sh"
13474 (("awk") (which "gawk")))
13475 #t)))))
13476 (inputs
13477 `(("gawk" ,gawk) ;for read_info_histograms.sh
13478 ("python" ,python-2) ;required for histogram.py
13479 ("zlib" ,zlib)))
13480 (home-page "https://github.com/rrwick/Filtlong/")
13481 (synopsis "Tool for quality filtering of Nanopore and PacBio data")
13482 (description
13483 "The Filtlong package is a tool for filtering long reads by quality.
13484 It can take a set of long reads and produce a smaller, better subset. It uses
13485 both read length (longer is better) and read identity (higher is better) when
13486 choosing which reads pass the filter.")
13487 (license (list license:gpl3 ;filtlong
13488 license:asl2.0))))) ;histogram.py
13489
13490 (define-public nanopolish
13491 ;; The recommended way to install is to clone the git repository
13492 ;; <https://github.com/jts/nanopolish#installing-a-particular-release>.
13493 ;; Also, the differences between release and current version seem to be
13494 ;; significant.
13495 (let ((commit "6331dc4f15b9dfabb954ba3fae9d76b6c3ca6377")
13496 (revision "1"))
13497 (package
13498 (name "nanopolish")
13499 (version (git-version "0.11.1" revision commit))
13500 (source
13501 (origin
13502 (method git-fetch)
13503 (uri (git-reference
13504 (url "https://github.com/jts/nanopolish")
13505 (commit commit)
13506 (recursive? #t)))
13507 (file-name (git-file-name name version))
13508 (sha256
13509 (base32 "15ikl3d37y49pwd7vx36xksgsqajhf24q7qqsnpl15dqqyy5qgbc"))
13510 (modules '((guix build utils)))
13511 (snippet
13512 '(begin
13513 (delete-file-recursively "htslib")
13514 #t))))
13515 (build-system gnu-build-system)
13516 (arguments
13517 `(#:make-flags
13518 `("HDF5=noinstall" "EIGEN=noinstall" "HTS=noinstall" "CC=gcc")
13519 #:tests? #f ; no check target
13520 #:phases
13521 (modify-phases %standard-phases
13522 (add-after 'unpack 'find-eigen
13523 (lambda* (#:key inputs #:allow-other-keys)
13524 (setenv "CPATH"
13525 (string-append (assoc-ref inputs "eigen")
13526 "/include/eigen3:"
13527 (or (getenv "CPATH") "")))
13528 #t))
13529 (delete 'configure)
13530 (replace 'install
13531 (lambda* (#:key outputs #:allow-other-keys)
13532 (let* ((out (assoc-ref outputs "out"))
13533 (bin (string-append out "/bin"))
13534 (scripts (string-append out "/share/nanopolish/scripts")))
13535
13536 (install-file "nanopolish" bin)
13537 (for-each (lambda (file) (install-file file scripts))
13538 (find-files "scripts" ".*"))
13539 #t)))
13540 (add-after 'install 'wrap-programs
13541 (lambda* (#:key outputs #:allow-other-keys)
13542 (let ((pythonpath (getenv "PYTHONPATH"))
13543 (perl5lib (getenv "PERL5LIB"))
13544 (scripts (string-append (assoc-ref outputs "out")
13545 "/share/nanopolish/scripts")))
13546 (for-each (lambda (file)
13547 (wrap-program file `("PYTHONPATH" ":" prefix (,pythonpath))))
13548 (find-files scripts "\\.py"))
13549 (for-each (lambda (file)
13550 (wrap-script file `("PERL5LIB" ":" prefix (,perl5lib))))
13551 (find-files scripts "\\.pl"))))))))
13552 (inputs
13553 `(("guile" ,guile-3.0) ; for wrappers
13554 ("eigen" ,eigen)
13555 ("hdf5" ,hdf5)
13556 ("htslib" ,htslib)
13557 ("perl" ,perl)
13558 ("bioperl" ,bioperl-minimal)
13559 ("perl-getopt-long" ,perl-getopt-long)
13560 ("python" ,python-wrapper)
13561 ("python-biopython" ,python-biopython)
13562 ("python-numpy" ,python-numpy)
13563 ("python-pysam" ,python-pysam)
13564 ("python-scikit-learn" , python-scikit-learn)
13565 ("python-scipy" ,python-scipy)
13566 ("zlib" ,zlib)))
13567 (home-page "https://github.com/jts/nanopolish")
13568 (synopsis "Signal-level analysis of Oxford Nanopore sequencing data")
13569 (description
13570 "This package analyses the Oxford Nanopore sequencing data at signal-level.
13571 Nanopolish can calculate an improved consensus sequence for a draft genome
13572 assembly, detect base modifications, call SNPs (Single nucleotide
13573 polymorphisms) and indels with respect to a reference genome and more.")
13574 (license license:expat))))
13575
13576 (define-public cnvkit
13577 (package
13578 (name "cnvkit")
13579 (version "0.9.5")
13580 (source
13581 (origin
13582 (method git-fetch)
13583 (uri (git-reference
13584 (url "https://github.com/etal/cnvkit")
13585 (commit (string-append "v" version))))
13586 (file-name (git-file-name name version))
13587 (sha256
13588 (base32 "0g2f78k68yglmj4fsfmgs8idqv3di9aj53fg0ld0hqljg8chhh82"))))
13589 (build-system python-build-system)
13590 (propagated-inputs
13591 `(("python-biopython" ,python-biopython)
13592 ("python-future" ,python-future)
13593 ("python-matplotlib" ,python-matplotlib)
13594 ("python-numpy" ,python-numpy)
13595 ("python-reportlab" ,python-reportlab)
13596 ("python-pandas" ,python-pandas)
13597 ("python-pysam" ,python-pysam)
13598 ("python-pyfaidx" ,python-pyfaidx)
13599 ("python-scipy" ,python-scipy)
13600 ;; R packages
13601 ("r-dnacopy" ,r-dnacopy)))
13602 (home-page "https://cnvkit.readthedocs.org/")
13603 (synopsis "Copy number variant detection from targeted DNA sequencing")
13604 (description
13605 "CNVkit is a Python library and command-line software toolkit to infer
13606 and visualize copy number from high-throughput DNA sequencing data. It is
13607 designed for use with hybrid capture, including both whole-exome and custom
13608 target panels, and short-read sequencing platforms such as Illumina and Ion
13609 Torrent.")
13610 (license license:asl2.0)))
13611
13612 (define-public python-pyfit-sne
13613 (package
13614 (name "python-pyfit-sne")
13615 (version "1.0.1")
13616 (source
13617 (origin
13618 (method git-fetch)
13619 (uri (git-reference
13620 (url "https://github.com/KlugerLab/pyFIt-SNE")
13621 (commit version)))
13622 (file-name (git-file-name name version))
13623 (sha256
13624 (base32 "13wh3qkzs56azmmgnxib6xfr29g7xh09sxylzjpni5j0pp0rc5qw"))))
13625 (build-system python-build-system)
13626 (propagated-inputs
13627 `(("python-numpy" ,python-numpy)))
13628 (inputs
13629 `(("fftw" ,fftw)))
13630 (native-inputs
13631 `(("python-cython" ,python-cython)))
13632 (home-page "https://github.com/KlugerLab/pyFIt-SNE")
13633 (synopsis "FFT-accelerated Interpolation-based t-SNE")
13634 (description
13635 "t-Stochastic Neighborhood Embedding (t-SNE) is a highly successful
13636 method for dimensionality reduction and visualization of high dimensional
13637 datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
13638 approximate the gradient at each iteration of gradient descent. This package
13639 is a Cython wrapper for FIt-SNE.")
13640 (license license:bsd-4)))
13641
13642 (define-public bbmap
13643 (package
13644 (name "bbmap")
13645 (version "38.90")
13646 (source (origin
13647 (method url-fetch)
13648 (uri (string-append
13649 "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz"))
13650 (sha256
13651 (base32
13652 "1wb94bcc006qq86x77z2rz0lc8m9f1kpnw6gdhjfg9bdaqf56rm3"))))
13653 (build-system ant-build-system)
13654 (arguments
13655 `(#:build-target "dist"
13656 #:tests? #f ; there are none
13657 #:make-flags
13658 (list (string-append "-Dmpijar="
13659 (assoc-ref %build-inputs "java-openmpi")
13660 "/lib/mpi.jar"))
13661 #:modules ((guix build ant-build-system)
13662 (guix build utils)
13663 (guix build java-utils))
13664 #:phases
13665 (modify-phases %standard-phases
13666 (add-after 'build 'build-jni-library
13667 (lambda _
13668 (with-directory-excursion "jni"
13669 (invoke "make" "-f" "makefile.linux"))))
13670 ;; There is no install target
13671 (replace 'install (install-jars "dist"))
13672 (add-after 'install 'install-scripts-and-documentation
13673 (lambda* (#:key outputs #:allow-other-keys)
13674 (substitute* "calcmem.sh"
13675 (("\\| awk ") (string-append "| " (which "awk") " ")))
13676 (let* ((scripts (find-files "." "\\.sh$"))
13677 (out (assoc-ref outputs "out"))
13678 (bin (string-append out "/bin"))
13679 (doc (string-append out "/share/doc/bbmap"))
13680 (jni (string-append out "/lib/jni")))
13681 (substitute* scripts
13682 (("\\$DIR\"\"docs") doc)
13683 (("^CP=.*")
13684 (string-append "CP=" out "/share/java/BBTools.jar\n"))
13685 (("^NATIVELIBDIR.*")
13686 (string-append "NATIVELIBDIR=" jni "\n"))
13687 (("CMD=\"java")
13688 (string-append "CMD=\"" (which "java"))))
13689 (for-each (lambda (script) (install-file script bin)) scripts)
13690
13691 ;; Install JNI library
13692 (install-file "jni/libbbtoolsjni.so" jni)
13693
13694 ;; Install documentation
13695 (install-file "docs/readme.txt" doc)
13696 (copy-recursively "docs/guides" doc))
13697 #t)))
13698 #:jdk ,openjdk11))
13699 (inputs
13700 `(("gawk" ,gawk)
13701 ("java-eclipse-jdt-core" ,java-eclipse-jdt-core)
13702 ("java-eclipse-jdt-compiler-apt" ,java-eclipse-jdt-compiler-apt)
13703 ("java-openmpi" ,java-openmpi)))
13704 (home-page "https://sourceforge.net/projects/bbmap/")
13705 (synopsis "Aligner and other tools for short sequencing reads")
13706 (description
13707 "This package provides bioinformatic tools to align, deduplicate,
13708 reformat, filter and normalize DNA and RNA-seq data. It includes the
13709 following tools: BBMap, a short read aligner for DNA and RNA-seq data; BBNorm,
13710 a kmer-based error-correction and normalization tool; Dedupe, a tool to
13711 simplify assemblies by removing duplicate or contained subsequences that share
13712 a target percent identity; Reformat, to convert reads between
13713 fasta/fastq/scarf/fasta+qual/sam, interleaved/paired, and ASCII-33/64, at over
13714 500 MB/s; and BBDuk, a tool to filter, trim, or mask reads with kmer matches
13715 to an artifact/contaminant file.")
13716 (license license:bsd-3)))
13717
13718 (define-public velvet
13719 (package
13720 (name "velvet")
13721 (version "1.2.10")
13722 (source (origin
13723 (method url-fetch)
13724 (uri (string-append "https://www.ebi.ac.uk/~zerbino/velvet/"
13725 "velvet_" version ".tgz"))
13726 (sha256
13727 (base32
13728 "0h3njwy66p6bx14r3ar1byb0ccaxmxka4c65rn4iybyiqa4d8kc8"))
13729 ;; Delete bundled libraries
13730 (modules '((guix build utils)))
13731 (snippet
13732 '(begin
13733 (delete-file "Manual.pdf")
13734 (delete-file-recursively "third-party")
13735 #t))))
13736 (build-system gnu-build-system)
13737 (arguments
13738 `(#:make-flags '("OPENMP=t")
13739 #:test-target "test"
13740 #:phases
13741 (modify-phases %standard-phases
13742 (delete 'configure)
13743 (add-after 'unpack 'fix-zlib-include
13744 (lambda _
13745 (substitute* "src/binarySequences.c"
13746 (("../third-party/zlib-1.2.3/zlib.h") "zlib.h"))
13747 #t))
13748 (replace 'install
13749 (lambda* (#:key outputs #:allow-other-keys)
13750 (let* ((out (assoc-ref outputs "out"))
13751 (bin (string-append out "/bin"))
13752 (doc (string-append out "/share/doc/velvet")))
13753 (mkdir-p bin)
13754 (mkdir-p doc)
13755 (install-file "velveth" bin)
13756 (install-file "velvetg" bin)
13757 (install-file "Manual.pdf" doc)
13758 (install-file "Columbus_manual.pdf" doc)
13759 #t))))))
13760 (inputs
13761 `(("openmpi" ,openmpi)
13762 ("zlib" ,zlib)))
13763 (native-inputs
13764 `(("texlive" ,(texlive-union (list texlive-latex-graphics
13765 texlive-latex-hyperref)))))
13766 (home-page "https://www.ebi.ac.uk/~zerbino/velvet/")
13767 (synopsis "Nucleic acid sequence assembler for very short reads")
13768 (description
13769 "Velvet is a de novo genomic assembler specially designed for short read
13770 sequencing technologies, such as Solexa or 454. Velvet currently takes in
13771 short read sequences, removes errors then produces high quality unique
13772 contigs. It then uses paired read information, if available, to retrieve the
13773 repeated areas between contigs.")
13774 (license license:gpl2+)))
13775
13776 (define-public python-velocyto
13777 (package
13778 (name "python-velocyto")
13779 (version "0.17.17")
13780 (source
13781 (origin
13782 (method url-fetch)
13783 (uri (pypi-uri "velocyto" version))
13784 (sha256
13785 (base32
13786 "0fgygyzqgrq32dv6a00biq1p1cwi6kbl5iqblxq1kklj6b2mzmhs"))))
13787 (build-system python-build-system)
13788 (native-inputs
13789 `(("python-joblib" ,python-joblib)))
13790 (propagated-inputs
13791 `(("python-click" ,python-click)
13792 ("python-cython" ,python-cython)
13793 ("python-h5py" ,python-h5py)
13794 ("python-loompy" ,python-loompy)
13795 ("python-matplotlib" ,python-matplotlib)
13796 ("python-numba" ,python-numba)
13797 ("python-numpy" ,python-numpy)
13798 ("python-pandas" ,python-pandas)
13799 ("python-pysam" ,python-pysam)
13800 ("python-scikit-learn" ,python-scikit-learn)
13801 ("python-scipy" ,python-scipy)))
13802 (home-page "https://github.com/velocyto-team/velocyto.py")
13803 (synopsis "RNA velocity analysis for single cell RNA-seq data")
13804 (description
13805 "Velocyto is a library for the analysis of RNA velocity. Velocyto
13806 includes a command line tool and an analysis pipeline.")
13807 (license license:bsd-2)))
13808
13809 (define-public arriba
13810 (package
13811 (name "arriba")
13812 (version "1.0.1")
13813 (source
13814 (origin
13815 (method url-fetch)
13816 (uri (string-append "https://github.com/suhrig/arriba/releases/"
13817 "download/v" version "/arriba_v" version ".tar.gz"))
13818 (sha256
13819 (base32
13820 "0jx9656ry766vb8z08m1c3im87b0c82qpnjby9wz4kcz8vn87dx2"))))
13821 (build-system gnu-build-system)
13822 (arguments
13823 `(#:tests? #f ; there are none
13824 #:phases
13825 (modify-phases %standard-phases
13826 (replace 'configure
13827 (lambda* (#:key inputs #:allow-other-keys)
13828 (let ((htslib (assoc-ref inputs "htslib")))
13829 (substitute* "Makefile"
13830 (("-I\\$\\(HTSLIB\\)/htslib")
13831 (string-append "-I" htslib "/include/htslib"))
13832 ((" \\$\\(HTSLIB\\)/libhts.a")
13833 (string-append " " htslib "/lib/libhts.so"))))
13834 (substitute* "run_arriba.sh"
13835 (("^STAR ") (string-append (which "STAR") " "))
13836 (("samtools --version-only")
13837 (string-append (which "samtools") " --version-only"))
13838 (("samtools index")
13839 (string-append (which "samtools") " index"))
13840 (("samtools sort")
13841 (string-append (which "samtools") " sort")))
13842 #t))
13843 (replace 'install
13844 (lambda* (#:key outputs #:allow-other-keys)
13845 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13846 (install-file "arriba" bin)
13847 (install-file "run_arriba.sh" bin)
13848 (install-file "draw_fusions.R" bin)
13849 (wrap-program (string-append bin "/draw_fusions.R")
13850 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
13851 #t)))))
13852 (inputs
13853 `(("htslib" ,htslib)
13854 ("r-minimal" ,r-minimal)
13855 ("r-circlize" ,r-circlize)
13856 ("r-genomicalignments" ,r-genomicalignments)
13857 ("r-genomicranges" ,r-genomicranges)
13858 ("samtools" ,samtools)
13859 ("star" ,star)
13860 ("zlib" ,zlib)))
13861 (home-page "https://github.com/suhrig/arriba")
13862 (synopsis "Gene fusion detection from RNA-Seq data ")
13863 (description
13864 "Arriba is a command-line tool for the detection of gene fusions from
13865 RNA-Seq data. It was developed for the use in a clinical research setting.
13866 Therefore, short runtimes and high sensitivity were important design criteria.
13867 It is based on the fast STAR aligner and the post-alignment runtime is
13868 typically just around two minutes. In contrast to many other fusion detection
13869 tools which build on STAR, Arriba does not require to reduce the
13870 @code{alignIntronMax} parameter of STAR to detect small deletions.")
13871 ;; All code is under the Expat license with the exception of
13872 ;; "draw_fusions.R", which is under GPLv3.
13873 (license (list license:expat license:gpl3))))
13874
13875 (define-public adapterremoval
13876 (package
13877 (name "adapterremoval")
13878 (version "2.3.0")
13879 (source
13880 (origin
13881 (method git-fetch)
13882 (uri (git-reference
13883 (url "https://github.com/MikkelSchubert/adapterremoval")
13884 (commit (string-append "v" version))))
13885 (file-name (git-file-name name version))
13886 (sha256
13887 (base32
13888 "1nf3ki5pfzalhrx2fr1y6pfqfi133yj2m7q4fj9irf5fb94bapwr"))))
13889 (build-system gnu-build-system)
13890 (arguments
13891 `(#:make-flags (list "COLOR_BUILD=no"
13892 (string-append "PREFIX="
13893 (assoc-ref %outputs "out")))
13894 #:test-target "test"
13895 #:phases
13896 (modify-phases %standard-phases
13897 (delete 'configure))))
13898 (inputs
13899 `(("zlib" ,zlib)))
13900 (home-page "https://adapterremoval.readthedocs.io/")
13901 (synopsis "Rapid sequence adapter trimming, identification, and read merging")
13902 (description
13903 "This program searches for and removes remnant adapter sequences from
13904 @dfn{High-Throughput Sequencing} (HTS) data and (optionally) trims low quality
13905 bases from the 3' end of reads following adapter removal. AdapterRemoval can
13906 analyze both single end and paired end data, and can be used to merge
13907 overlapping paired-ended reads into (longer) consensus sequences.
13908 Additionally, the AdapterRemoval may be used to recover a consensus adapter
13909 sequence for paired-ended data, for which this information is not available.")
13910 (license license:gpl3+)))
13911
13912 (define-public pplacer
13913 (let ((commit "807f6f3"))
13914 (package
13915 (name "pplacer")
13916 ;; The commit should be updated with each version change.
13917 (version "1.1.alpha19")
13918 (source
13919 (origin
13920 (method git-fetch)
13921 (uri (git-reference
13922 (url "https://github.com/matsen/pplacer")
13923 (commit (string-append "v" version))))
13924 (file-name (git-file-name name version))
13925 (sha256
13926 (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn"))))
13927 (build-system ocaml-build-system)
13928 (arguments
13929 `(#:modules ((guix build ocaml-build-system)
13930 (guix build utils)
13931 (ice-9 ftw))
13932 #:phases
13933 (modify-phases %standard-phases
13934 (delete 'configure)
13935 (add-after 'unpack 'fix-build-with-latest-ocaml
13936 (lambda _
13937 (substitute* "myocamlbuild.ml"
13938 (("dep \\[\"c_pam\"\\]" m)
13939 (string-append "flag [\"ocaml\"; \"compile\"] (A \"-unsafe-string\");\n"
13940 m))
13941 (("let run_and_read" m)
13942 (string-append "
13943 let split s ch =
13944 let x = ref [] in
13945 let rec go s =
13946 let pos = String.index s ch in
13947 x := (String.before s pos)::!x;
13948 go (String.after s (pos + 1))
13949 in
13950 try go s
13951 with Not_found -> !x
13952 let split_nl s = split s '\\n'
13953 let before_space s =
13954 try String.before s (String.index s ' ')
13955 with Not_found -> s
13956
13957 " m))
13958 (("run_and_read \"ocamlfind list \\| cut -d' ' -f1\"" m)
13959 (string-append "List.map before_space (split_nl & " m ")"))
13960 ((" blank_sep_strings &") "")
13961 ((" Lexing.from_string &") ""))
13962 #t))
13963 (add-after 'unpack 'replace-bundled-cddlib
13964 (lambda* (#:key inputs #:allow-other-keys)
13965 (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
13966 (local-dir "cddlib_guix"))
13967 (mkdir local-dir)
13968 (with-directory-excursion local-dir
13969 (invoke "tar" "xvf" cddlib-src))
13970 (let ((cddlib-src-folder
13971 (string-append local-dir "/"
13972 (list-ref (scandir local-dir) 2)
13973 "/lib-src")))
13974 (for-each make-file-writable (find-files "cdd_src" ".*"))
13975 (for-each
13976 (lambda (file)
13977 (copy-file file
13978 (string-append "cdd_src/" (basename file))))
13979 (find-files cddlib-src-folder ".*[ch]$")))
13980 #t)))
13981 (add-after 'unpack 'fix-makefile
13982 (lambda _
13983 ;; Remove system calls to 'git'.
13984 (substitute* "Makefile"
13985 (("^DESCRIPT:=pplacer-.*")
13986 (string-append
13987 "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
13988 (substitute* "myocamlbuild.ml"
13989 (("git describe --tags --long .*\\\" with")
13990 (string-append
13991 "echo -n v" ,version "-" ,commit "\" with")))
13992 #t))
13993 (replace 'install
13994 (lambda* (#:key outputs #:allow-other-keys)
13995 (let* ((out (assoc-ref outputs "out"))
13996 (bin (string-append out "/bin")))
13997 (copy-recursively "bin" bin))
13998 #t)))
13999 #:ocaml ,ocaml-4.07
14000 #:findlib ,ocaml4.07-findlib))
14001 (inputs
14002 `(("zlib" ,zlib "static")
14003 ("gsl" ,gsl)
14004 ("ocaml-ounit" ,(package-with-ocaml4.07 ocaml-ounit))
14005 ("ocaml-batteries" ,(package-with-ocaml4.07 ocaml-batteries))
14006 ("ocaml-camlzip" ,(package-with-ocaml4.07 camlzip))
14007 ("ocaml-csv" ,(package-with-ocaml4.07 ocaml-csv))
14008 ("ocaml-sqlite3" ,ocaml4.07-sqlite3)
14009 ("ocaml-xmlm" ,(package-with-ocaml4.07 ocaml-xmlm))
14010 ("ocaml-mcl" ,(package-with-ocaml4.07 ocaml-mcl))
14011 ("ocaml-gsl" ,ocaml4.07-gsl-1)))
14012 (native-inputs
14013 `(("cddlib-src" ,(package-source cddlib))
14014 ("ocamlbuild" ,(package-with-ocaml4.07 ocamlbuild))
14015 ("pkg-config" ,pkg-config)))
14016 (propagated-inputs
14017 `(("pplacer-scripts" ,pplacer-scripts)))
14018 (synopsis "Phylogenetic placement of biological sequences")
14019 (description
14020 "Pplacer places query sequences on a fixed reference phylogenetic tree
14021 to maximize phylogenetic likelihood or posterior probability according to a
14022 reference alignment. Pplacer is designed to be fast, to give useful
14023 information about uncertainty, and to offer advanced visualization and
14024 downstream analysis.")
14025 (home-page "https://matsen.fhcrc.org/pplacer/")
14026 (license license:gpl3))))
14027
14028 ;; This package is installed alongside 'pplacer'. It is a separate package so
14029 ;; that it can use the python-build-system for the scripts that are
14030 ;; distributed alongside the main OCaml binaries.
14031 (define pplacer-scripts
14032 (package
14033 (inherit pplacer)
14034 (name "pplacer-scripts")
14035 (build-system python-build-system)
14036 (arguments
14037 `(#:python ,python-2
14038 #:phases
14039 (modify-phases %standard-phases
14040 (add-after 'unpack 'enter-scripts-dir
14041 (lambda _ (chdir "scripts") #t))
14042 (replace 'check
14043 (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t))
14044 (add-after 'install 'wrap-executables
14045 (lambda* (#:key inputs outputs #:allow-other-keys)
14046 (let* ((out (assoc-ref outputs "out"))
14047 (bin (string-append out "/bin")))
14048 (let ((path (string-append
14049 (assoc-ref inputs "hmmer") "/bin:"
14050 (assoc-ref inputs "infernal") "/bin")))
14051 (display path)
14052 (wrap-program (string-append bin "/refpkg_align.py")
14053 `("PATH" ":" prefix (,path))))
14054 (let ((path (string-append
14055 (assoc-ref inputs "hmmer") "/bin")))
14056 (wrap-program (string-append bin "/hrefpkg_query.py")
14057 `("PATH" ":" prefix (,path)))))
14058 #t)))))
14059 (inputs
14060 `(("infernal" ,infernal)
14061 ("hmmer" ,hmmer)))
14062 (propagated-inputs
14063 `(("python-biopython" ,python2-biopython)
14064 ("taxtastic" ,taxtastic)))
14065 (synopsis "Pplacer Python scripts")))
14066
14067 (define-public python2-checkm-genome
14068 (package
14069 (name "python2-checkm-genome")
14070 (version "1.0.13")
14071 (source
14072 (origin
14073 (method url-fetch)
14074 (uri (pypi-uri "checkm-genome" version))
14075 (sha256
14076 (base32
14077 "0bm8gpxjmzxsxxl8lzwqhgx8g1dlnmp6znz7wv3hgb0gdjbf9dzz"))))
14078 (build-system python-build-system)
14079 (arguments
14080 `(#:python ,python-2
14081 #:tests? #f)) ; some tests are interactive
14082 (propagated-inputs
14083 `(("python-dendropy" ,python2-dendropy)
14084 ("python-matplotlib" ,python2-matplotlib)
14085 ("python-numpy" ,python2-numpy)
14086 ("python-pysam" ,python2-pysam)
14087 ("python-scipy" ,python2-scipy)))
14088 (home-page "https://pypi.org/project/Checkm/")
14089 (synopsis "Assess the quality of putative genome bins")
14090 (description
14091 "CheckM provides a set of tools for assessing the quality of genomes
14092 recovered from isolates, single cells, or metagenomes. It provides robust
14093 estimates of genome completeness and contamination by using collocated sets of
14094 genes that are ubiquitous and single-copy within a phylogenetic lineage.
14095 Assessment of genome quality can also be examined using plots depicting key
14096 genomic characteristics (e.g., GC, coding density) which highlight sequences
14097 outside the expected distributions of a typical genome. CheckM also provides
14098 tools for identifying genome bins that are likely candidates for merging based
14099 on marker set compatibility, similarity in genomic characteristics, and
14100 proximity within a reference genome.")
14101 (license license:gpl3+)))
14102
14103 (define-public umi-tools
14104 (package
14105 (name "umi-tools")
14106 (version "1.0.0")
14107 (source
14108 (origin
14109 (method url-fetch)
14110 (uri (pypi-uri "umi_tools" version))
14111 (sha256
14112 (base32
14113 "08y3vz1vcx09whmbsn722lcs6jl9wyrh9i4p3k8j4cb1i32bij4a"))))
14114 (build-system python-build-system)
14115 (inputs
14116 `(("python-pandas" ,python-pandas)
14117 ("python-future" ,python-future)
14118 ("python-scipy" ,python-scipy)
14119 ("python-matplotlib" ,python-matplotlib)
14120 ("python-regex" ,python-regex)
14121 ("python-pysam" ,python-pysam)))
14122 (native-inputs
14123 `(("python-cython" ,python-cython)))
14124 (home-page "https://github.com/CGATOxford/UMI-tools")
14125 (synopsis "Tools for analyzing unique modular identifiers")
14126 (description "This package provides tools for dealing with @dfn{Unique
14127 Molecular Identifiers} (UMIs) and @dfn{Random Molecular Tags} (RMTs) in
14128 genetic sequences. There are six tools: the @code{extract} and
14129 @code{whitelist} commands are used to prepare a fastq containing UMIs @code{+/-}
14130 cell barcodes for alignment. The remaining commands, @code{group},
14131 @code{dedup}, and @{count}/@code{count_tab}, are used to identify PCR
14132 duplicates using the UMIs and perform different levels of analysis depending
14133 on the needs of the user.")
14134 (license license:expat)))
14135
14136 (define-public ataqv
14137 (package
14138 (name "ataqv")
14139 (version "1.0.0")
14140 (source
14141 (origin
14142 (method git-fetch)
14143 (uri (git-reference
14144 (url "https://github.com/ParkerLab/ataqv")
14145 (commit version)))
14146 (file-name (git-file-name name version))
14147 (sha256
14148 (base32
14149 "031xr6jx1aprh26y5b1lv3gzrlmzg4alfl73vvshymx8cq8asrqi"))))
14150 (build-system gnu-build-system)
14151 (arguments
14152 `(#:make-flags
14153 (list (string-append "prefix=" (assoc-ref %outputs "out"))
14154 (string-append "BOOST_ROOT="
14155 (assoc-ref %build-inputs "boost"))
14156 (string-append "HTSLIB_ROOT="
14157 (assoc-ref %build-inputs "htslib")))
14158 #:test-target "test"
14159 #:phases
14160 (modify-phases %standard-phases
14161 (delete 'configure))))
14162 (inputs
14163 `(("boost" ,boost)
14164 ("htslib" ,htslib)
14165 ("ncurses" ,ncurses)
14166 ("zlib" ,zlib)))
14167 (native-inputs
14168 `(("lcov" ,lcov)))
14169 (home-page "https://github.com/ParkerLab/ataqv")
14170 (synopsis "Toolkit for quality control and visualization of ATAC-seq data")
14171 (description "This package provides a toolkit for measuring and comparing
14172 ATAC-seq results. It was written to make it easier to spot differences that
14173 might be caused by ATAC-seq library prep or sequencing. The main program,
14174 @code{ataqv}, examines aligned reads and reports some basic metrics.")
14175 (license license:gpl3+)))
14176
14177 (define-public r-psiplot
14178 (package
14179 (name "r-psiplot")
14180 (version "2.3.0")
14181 (source
14182 (origin
14183 (method git-fetch)
14184 (uri (git-reference
14185 (url "https://github.com/kcha/psiplot")
14186 (commit (string-append "v" version))))
14187 (file-name (git-file-name name version))
14188 (sha256
14189 (base32 "08438h16cfry5kqh3y9hs8q1b1a8bxhblsm75knviz5r6q0n1jxh"))))
14190 (build-system r-build-system)
14191 (propagated-inputs
14192 `(("r-mass" ,r-mass)
14193 ("r-dplyr" ,r-dplyr)
14194 ("r-tidyr" ,r-tidyr)
14195 ("r-purrr" ,r-purrr)
14196 ("r-readr" ,r-readr)
14197 ("r-magrittr" ,r-magrittr)
14198 ("r-ggplot2" ,r-ggplot2)))
14199 (home-page "https://github.com/kcha/psiplot")
14200 (synopsis "Plot percent spliced-in values of alternatively-spliced exons")
14201 (description
14202 "PSIplot is an R package for generating plots of @dfn{percent
14203 spliced-in} (PSI) values of alternatively-spliced exons that were computed by
14204 vast-tools, an RNA-Seq pipeline for alternative splicing analysis. The plots
14205 are generated using @code{ggplot2}.")
14206 (license license:expat)))
14207
14208 (define-public python-ont-fast5-api
14209 (package
14210 (name "python-ont-fast5-api")
14211 (version "1.4.4")
14212 (source
14213 (origin
14214 (method git-fetch)
14215 (uri (git-reference
14216 (url "https://github.com/nanoporetech/ont_fast5_api")
14217 (commit (string-append "release_" version))))
14218 (file-name (git-file-name name version))
14219 (sha256
14220 (base32
14221 "03cbq4zbbwhll8ml2m9k8sa31mirsvcbjkrq1yna0kkzz9fad5fm"))))
14222 (build-system python-build-system)
14223 (propagated-inputs
14224 `(("python-numpy" ,python-numpy)
14225 ("python-six" ,python-six)
14226 ("python-h5py" ,python-h5py)
14227 ("python-progressbar33" ,python-progressbar33)))
14228 (home-page "https://github.com/nanoporetech/ont_fast5_api")
14229 (synopsis "Interface to HDF5 files of the Oxford Nanopore fast5 file format")
14230 (description
14231 "This package provides a concrete implementation of the fast5 file schema
14232 using the generic @code{h5py} library, plain-named methods to interact with
14233 and reflect the fast5 file schema, and tools to convert between
14234 @code{multi_read} and @code{single_read} formats.")
14235 (license license:mpl2.0)))
14236
14237 (define-public tbsp
14238 (let ((commit "ec8fff4410cfb13a677dbbb95cbbc60217e64907")
14239 (revision "1"))
14240 (package
14241 (name "tbsp")
14242 (version (git-version "1.0.0" revision commit))
14243 (source
14244 (origin
14245 (method git-fetch)
14246 (uri (git-reference
14247 (url "https://github.com/phoenixding/tbsp")
14248 (commit commit)))
14249 (file-name (git-file-name name version))
14250 (sha256
14251 (base32
14252 "025ym14x8gbd6hb55lsinqj6f5qzw36i10klgs7ldzxxd7s39ki1"))))
14253 (build-system python-build-system)
14254 (arguments '(#:tests? #f)) ; no tests included
14255 (inputs
14256 `(("python-matplotlib" ,python-matplotlib)
14257 ("python-networkx" ,python-networkx)
14258 ("python-numpy" ,python-numpy)
14259 ("python-pybigwig" ,python-pybigwig)
14260 ("python-biopython" ,python-biopython)
14261 ("python-scikit-learn" ,python-scikit-learn)
14262 ("python-scipy" ,python-scipy)))
14263 (home-page "https://github.com/phoenixding/tbsp/")
14264 (synopsis "SNP-based trajectory inference")
14265 (description
14266 "Several studies focus on the inference of developmental and response
14267 trajectories from single cell RNA-Seq (scRNA-Seq) data. A number of
14268 computational methods, often referred to as pseudo-time ordering, have been
14269 developed for this task. CRISPR has also been used to reconstruct lineage
14270 trees by inserting random mutations. The tbsp package implements an
14271 alternative method to detect significant, cell type specific sequence
14272 mutations from scRNA-Seq data.")
14273 (license license:expat))))
14274
14275 (define-public tabixpp
14276 (package
14277 (name "tabixpp")
14278 (version "1.1.0")
14279 (source (origin
14280 (method git-fetch)
14281 (uri (git-reference
14282 (url "https://github.com/ekg/tabixpp")
14283 (commit (string-append "v" version))))
14284 (file-name (git-file-name name version))
14285 (sha256
14286 (base32 "1k2a3vbq96ic4lw72iwp5s3mwwc4xhdffjj584yn6l9637q9j1yd"))
14287 (modules '((guix build utils)))
14288 (snippet
14289 `(begin
14290 (delete-file-recursively "htslib") #t))))
14291 (build-system gnu-build-system)
14292 (inputs
14293 `(("htslib" ,htslib)
14294 ("zlib" ,zlib)))
14295 (arguments
14296 `(#:tests? #f ; There are no tests to run.
14297 #:phases
14298 (modify-phases %standard-phases
14299 (delete 'configure) ; There is no configure phase.
14300 ;; The build phase needs overriding the location of htslib.
14301 (replace 'build
14302 (lambda* (#:key inputs #:allow-other-keys)
14303 (let ((htslib-ref (assoc-ref inputs "htslib")))
14304 (invoke "make"
14305 (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a")
14306 (string-append "INCLUDES= -I" htslib-ref "/include/htslib")
14307 "HTS_HEADERS=" ; No need to check for headers here.
14308 (string-append "LIBPATH=-L. -L" htslib-ref "/include"))
14309 (invoke "g++" "-shared" "-o" "libtabixpp.so" "tabix.o" "-lhts")
14310 (invoke "ar" "rcs" "libtabixpp.a" "tabix.o"))))
14311 (replace 'install
14312 (lambda* (#:key outputs #:allow-other-keys)
14313 (let* ((out (assoc-ref outputs "out"))
14314 (lib (string-append out "/lib"))
14315 (bin (string-append out "/bin")))
14316 (install-file "tabix++" bin)
14317 (install-file "libtabixpp.so" lib)
14318 (install-file "libtabixpp.a" lib)
14319 (install-file "tabix.hpp" (string-append out "/include"))
14320 (mkdir-p (string-append lib "/pkgconfig"))
14321 (with-output-to-file (string-append lib "/pkgconfig/tabixpp.pc")
14322 (lambda _
14323 (format #t "prefix=~a~@
14324 exec_prefix=${prefix}~@
14325 libdir=${exec_prefix}/lib~@
14326 includedir=${prefix}/include~@
14327 ~@
14328 ~@
14329 Name: libtabixpp~@
14330 Version: ~a~@
14331 Description: C++ wrapper around tabix project~@
14332 Libs: -L${libdir} -ltabixpp~@
14333 Cflags: -I${includedir}~%"
14334 out ,version)))
14335 #t))))))
14336 (home-page "https://github.com/ekg/tabixpp")
14337 (synopsis "C++ wrapper around tabix project")
14338 (description "This is a C++ wrapper around the Tabix project which abstracts
14339 some of the details of opening and jumping in tabix-indexed files.")
14340 (license license:expat)))
14341
14342 (define-public smithwaterman
14343 (let ((commit "2610e259611ae4cde8f03c72499d28f03f6d38a7"))
14344 (package
14345 (name "smithwaterman")
14346 (version (git-version "0.0.0" "2" commit))
14347 (source (origin
14348 (method git-fetch)
14349 (uri (git-reference
14350 (url "https://github.com/ekg/smithwaterman/")
14351 (commit commit)))
14352 (file-name (git-file-name name version))
14353 (sha256
14354 (base32 "0i9d8zrxpiracw3mxzd9siybpy62p06rqz9mc2w93arajgbk45bs"))))
14355 (build-system gnu-build-system)
14356 (arguments
14357 `(#:tests? #f ; There are no tests to run.
14358 #:make-flags '("libsw.a" "all")
14359 #:phases
14360 (modify-phases %standard-phases
14361 (delete 'configure) ; There is no configure phase.
14362 (add-after 'unpack 'patch-source
14363 (lambda _
14364 (substitute* "Makefile"
14365 (("-c ") "-c -fPIC "))
14366 #t))
14367 (add-after 'build 'build-dynamic
14368 (lambda _
14369 (invoke "g++"
14370 "-shared" "-o" "libsmithwaterman.so"
14371 "smithwaterman.o" "SmithWatermanGotoh.o"
14372 "disorder.o" "BandedSmithWaterman.o"
14373 "LeftAlign.o" "Repeats.o" "IndelAllele.o")))
14374 (replace 'install
14375 (lambda* (#:key outputs #:allow-other-keys)
14376 (let* ((out (assoc-ref outputs "out"))
14377 (bin (string-append out "/bin"))
14378 (lib (string-append out "/lib")))
14379 (install-file "smithwaterman" bin)
14380 (for-each
14381 (lambda (file)
14382 (install-file file (string-append out "/include/smithwaterman")))
14383 (find-files "." "\\.h$"))
14384 (install-file "libsmithwaterman.so" lib)
14385 (install-file "libsw.a" lib)
14386 (mkdir-p (string-append lib "/pkgconfig"))
14387 (with-output-to-file (string-append lib "/pkgconfig/smithwaterman.pc")
14388 (lambda _
14389 (format #t "prefix=~a~@
14390 exec_prefix=${prefix}~@
14391 libdir=${exec_prefix}/lib~@
14392 includedir=${prefix}/include/smithwaterman~@
14393 ~@
14394 ~@
14395 Name: smithwaterman~@
14396 Version: ~a~@
14397 Description: smith-waterman-gotoh alignment algorithm~@
14398 Libs: -L${libdir} -lsmithwaterman~@
14399 Cflags: -I${includedir}~%"
14400 out ,version))))
14401 #t)))))
14402 (home-page "https://github.com/ekg/smithwaterman")
14403 (synopsis "Implementation of the Smith-Waterman algorithm")
14404 (description "Implementation of the Smith-Waterman algorithm.")
14405 ;; The licensing terms are unclear: https://github.com/ekg/smithwaterman/issues/9.
14406 (license (list license:gpl2 license:expat)))))
14407
14408 (define-public multichoose
14409 (package
14410 (name "multichoose")
14411 (version "1.0.3")
14412 (source (origin
14413 (method git-fetch)
14414 (uri (git-reference
14415 (url "https://github.com/ekg/multichoose/")
14416 (commit (string-append "v" version))))
14417 (file-name (git-file-name name version))
14418 (sha256
14419 (base32 "0ci5fqvmpamwgxvmyd79ygj6n3bnbl3vc7b6h1sxz58186sm3pfs"))))
14420 (build-system gnu-build-system)
14421 (arguments
14422 `(#:tests? #f ; Tests require node.
14423 #:phases
14424 (modify-phases %standard-phases
14425 (delete 'configure) ; There is no configure phase.
14426 (replace 'install
14427 (lambda* (#:key outputs #:allow-other-keys)
14428 (let* ((out (assoc-ref outputs "out"))
14429 (bin (string-append out "/bin"))
14430 (include (string-append out "/include")))
14431 ;; TODO: There are Python modules for these programs too.
14432 (install-file "multichoose" bin)
14433 (install-file "multipermute" bin)
14434 (install-file "multichoose.h" include)
14435 (install-file "multipermute.h" include))
14436 #t)))))
14437 (home-page "https://github.com/ekg/multichoose")
14438 (synopsis "Efficient loopless multiset combination generation algorithm")
14439 (description "This library implements an efficient loopless multiset
14440 combination generation algorithm which is (approximately) described in
14441 \"Loopless algorithms for generating permutations, combinations, and other
14442 combinatorial configurations.\", G. Ehrlich - Journal of the ACM (JACM),
14443 1973. (Algorithm 7.)")
14444 (license license:expat)))
14445
14446 (define-public fsom
14447 (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3"))
14448 (package
14449 (name "fsom")
14450 (version (git-version "0.0.0" "1" commit))
14451 (source (origin
14452 (method git-fetch)
14453 (uri (git-reference
14454 (url "https://github.com/ekg/fsom/")
14455 (commit commit)))
14456 (file-name (git-file-name name version))
14457 (sha256
14458 (base32 "0gw1lpvr812pywg9y546x0h1hhj261xwls41r6kqhddjlrcjc0pi"))))
14459 (build-system gnu-build-system)
14460 (arguments
14461 `(#:tests? #f ; There are no tests to run.
14462 #:phases
14463 (modify-phases %standard-phases
14464 (delete 'configure) ; There is no configure phase.
14465 (replace 'install
14466 (lambda* (#:key outputs #:allow-other-keys)
14467 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
14468 (install-file "fsom" bin))
14469 #t)))))
14470 (home-page "https://github.com/ekg/fsom")
14471 (synopsis "Manage SOM (Self-Organizing Maps) neural networks")
14472 (description "A tiny C library for managing SOM (Self-Organizing Maps)
14473 neural networks.")
14474 (license license:gpl3))))
14475
14476 (define-public fastahack
14477 (package
14478 (name "fastahack")
14479 (version "1.0.0")
14480 (source (origin
14481 (method git-fetch)
14482 (uri (git-reference
14483 (url "https://github.com/ekg/fastahack/")
14484 (commit (string-append "v" version))))
14485 (file-name (git-file-name name version))
14486 (sha256
14487 (base32 "0rp1blskhzxf7vbh253ibpxbgl9wwgyzf1wbkxndi08d3j4vcss9"))))
14488 (build-system gnu-build-system)
14489 (arguments
14490 `(#:tests? #f ; Unclear how to run tests: https://github.com/ekg/fastahack/issues/15
14491 #:phases
14492 (modify-phases %standard-phases
14493 (delete 'configure) ; There is no configure phase.
14494 (add-after 'unpack 'patch-source
14495 (lambda _
14496 (substitute* "Makefile"
14497 (("-c ") "-c -fPIC "))
14498 #t))
14499 (add-after 'build 'build-dynamic
14500 (lambda _
14501 (invoke "g++"
14502 "-shared" "-o" "libfastahack.so"
14503 "Fasta.o" "FastaHack.o" "split.o" "disorder.o")))
14504 (replace 'install
14505 (lambda* (#:key outputs #:allow-other-keys)
14506 (let* ((out (assoc-ref outputs "out"))
14507 (lib (string-append out "/lib"))
14508 (bin (string-append out "/bin")))
14509 (mkdir-p (string-append out "/include/fastahack"))
14510 (for-each
14511 (lambda (file)
14512 (install-file file (string-append out "/include/fastahack")))
14513 (find-files "." "\\.h$"))
14514 (install-file "fastahack" bin)
14515 (install-file "libfastahack.so" lib)
14516 (mkdir-p (string-append lib "/pkgconfig"))
14517 (with-output-to-file (string-append lib "/pkgconfig/fastahack.pc")
14518 (lambda _
14519 (format #t "prefix=~a~@
14520 exec_prefix=${prefix}~@
14521 libdir=${exec_prefix}/lib~@
14522 includedir=${prefix}/include/fastahack~@
14523 ~@
14524 ~@
14525 Name: fastahack~@
14526 Version: ~a~@
14527 Description: Indexing and sequence extraction from FASTA files~@
14528 Libs: -L${libdir} -lfastahack~@
14529 Cflags: -I${includedir}~%"
14530 out ,version))))
14531 #t)))))
14532 (home-page "https://github.com/ekg/fastahack")
14533 (synopsis "Indexing and sequence extraction from FASTA files")
14534 (description "Fastahack is a small application for indexing and
14535 extracting sequences and subsequences from FASTA files. The included library
14536 provides a FASTA reader and indexer that can be embedded into applications
14537 which would benefit from directly reading subsequences from FASTA files. The
14538 library automatically handles index file generation and use.")
14539 (license (list license:expat license:gpl2))))
14540
14541 (define-public vcflib
14542 (package
14543 (name "vcflib")
14544 (version "1.0.2")
14545 (source
14546 (origin
14547 (method git-fetch)
14548 (uri (git-reference
14549 (url "https://github.com/vcflib/vcflib")
14550 (commit (string-append "v" version))))
14551 (file-name (git-file-name name version))
14552 (sha256
14553 (base32 "1k1z3876kbzifj1sqfzsf3lgb4rw779hvkg6ryxbyq5bc2paj9kh"))
14554 (modules '((guix build utils)))
14555 (snippet
14556 '(begin
14557 (substitute* "CMakeLists.txt"
14558 ((".*fastahack.*") "")
14559 ((".*smithwaterman.*") "")
14560 (("(pkg_check_modules\\(TABIXPP)" text)
14561 (string-append
14562 "pkg_check_modules(FASTAHACK REQUIRED fastahack)\n"
14563 "pkg_check_modules(SMITHWATERMAN REQUIRED smithwaterman)\n"
14564 text))
14565 (("\\$\\{TABIXPP_LIBRARIES\\}" text)
14566 (string-append "${FASTAHACK_LIBRARIES} "
14567 "${SMITHWATERMAN_LIBRARIES} "
14568 text)))
14569 (substitute* (find-files "." "\\.(h|c)(pp)?$")
14570 (("\"SmithWatermanGotoh.h\"") "<smithwaterman/SmithWatermanGotoh.h>")
14571 (("\"convert.h\"") "<smithwaterman/convert.h>")
14572 (("\"disorder.h\"") "<smithwaterman/disorder.h>")
14573 (("Fasta.h") "fastahack/Fasta.h"))
14574 (for-each delete-file-recursively
14575 '("fastahack" "filevercmp" "fsom" "googletest" "intervaltree"
14576 "libVCFH" "multichoose" "smithwaterman"))
14577 #t))))
14578 (build-system cmake-build-system)
14579 (inputs
14580 `(("bzip2" ,bzip2)
14581 ("htslib" ,htslib)
14582 ("fastahack" ,fastahack)
14583 ("perl" ,perl)
14584 ("python" ,python)
14585 ("smithwaterman" ,smithwaterman)
14586 ("tabixpp" ,tabixpp)
14587 ("xz" ,xz)
14588 ("zlib" ,zlib)))
14589 (native-inputs
14590 `(("pkg-config" ,pkg-config)
14591 ;; Submodules.
14592 ;; This package builds against the .o files so we need to extract the source.
14593 ("filevercmp-src" ,(package-source filevercmp))
14594 ("fsom-src" ,(package-source fsom))
14595 ("intervaltree-src" ,(package-source intervaltree))
14596 ("multichoose-src" ,(package-source multichoose))))
14597 (arguments
14598 `(#:tests? #f ; no tests
14599 #:phases
14600 (modify-phases %standard-phases
14601 (add-after 'unpack 'build-shared-library
14602 (lambda _
14603 (substitute* "CMakeLists.txt"
14604 (("vcflib STATIC") "vcflib SHARED"))
14605 (substitute* "test/Makefile"
14606 (("libvcflib.a") "libvcflib.so"))
14607 #t))
14608 (add-after 'unpack 'unpack-submodule-sources
14609 (lambda* (#:key inputs #:allow-other-keys)
14610 (let ((unpack (lambda (source target)
14611 (mkdir target)
14612 (with-directory-excursion target
14613 (if (file-is-directory? (assoc-ref inputs source))
14614 (copy-recursively (assoc-ref inputs source) ".")
14615 (invoke "tar" "xvf"
14616 (assoc-ref inputs source)
14617 "--strip-components=1"))))))
14618 (and
14619 (unpack "filevercmp-src" "filevercmp")
14620 (unpack "fsom-src" "fsom")
14621 (unpack "intervaltree-src" "intervaltree")
14622 (unpack "multichoose-src" "multichoose"))
14623 #t)))
14624 ;; This pkg-config file is provided by other distributions.
14625 (add-after 'install 'install-pkg-config-file
14626 (lambda* (#:key outputs #:allow-other-keys)
14627 (let* ((out (assoc-ref outputs "out"))
14628 (pkgconfig (string-append out "/lib/pkgconfig")))
14629 (mkdir-p pkgconfig)
14630 (with-output-to-file (string-append pkgconfig "/vcflib.pc")
14631 (lambda _
14632 (format #t "prefix=~a~@
14633 exec_prefix=${prefix}~@
14634 libdir=${exec_prefix}/lib~@
14635 includedir=${prefix}/include~@
14636 ~@
14637 Name: vcflib~@
14638 Version: ~a~@
14639 Requires: smithwaterman, fastahack, tabixpp~@
14640 Description: C++ library for parsing and manipulating VCF files~@
14641 Libs: -L${libdir} -lvcflib~@
14642 Cflags: -I${includedir}~%"
14643 out ,version)))
14644 #t))))))
14645 (home-page "https://github.com/vcflib/vcflib/")
14646 (synopsis "Library for parsing and manipulating VCF files")
14647 (description "Vcflib provides methods to manipulate and interpret
14648 sequence variation as it can be described by VCF. It is both an API for parsing
14649 and operating on records of genomic variation as it can be described by the VCF
14650 format, and a collection of command-line utilities for executing complex
14651 manipulations on VCF files.")
14652 (license license:expat)))
14653
14654 (define-public freebayes
14655 (package
14656 (name "freebayes")
14657 (version "1.3.3")
14658 (source (origin
14659 (method git-fetch)
14660 (uri (git-reference
14661 (url "https://github.com/freebayes/freebayes")
14662 (commit (string-append "v" version))))
14663 (file-name (git-file-name name version))
14664 (sha256
14665 (base32 "0myz3giad7jqp6ricdfnig9ymlcps2h67mlivadvx97ngagm85z8"))
14666 (patches (search-patches "freebayes-devendor-deps.patch"))
14667 (modules '((guix build utils)))
14668 (snippet
14669 '(begin
14670 (delete-file-recursively "contrib/htslib")
14671 #t))))
14672 (build-system meson-build-system)
14673 (inputs
14674 `(("fastahack" ,fastahack)
14675 ("htslib" ,htslib)
14676 ("smithwaterman" ,smithwaterman)
14677 ("tabixpp" ,tabixpp)
14678 ("vcflib" ,vcflib)
14679 ("zlib" ,zlib)))
14680 (native-inputs
14681 `(("bash-tap" ,bash-tap)
14682 ("bc" ,bc)
14683 ("grep" ,grep) ; Built with perl support.
14684 ("parallel" ,parallel)
14685 ("perl" ,perl)
14686 ("pkg-config" ,pkg-config)
14687 ("samtools" ,samtools)
14688 ("simde" ,simde)
14689 ;; This submodule is needed to run the tests.
14690 ("test-simple-bash-src"
14691 ,(origin
14692 (method git-fetch)
14693 (uri (git-reference
14694 (url "https://github.com/ingydotnet/test-simple-bash/")
14695 (commit "124673ff204b01c8e96b7fc9f9b32ee35d898acc")))
14696 (file-name "test-simple-bash-src-checkout")
14697 (sha256
14698 (base32 "043plp6z0x9yf7mdpky1fw7zcpwn1p47px95w9mh16603zqqqpga"))))))
14699 (arguments
14700 `(#:phases
14701 (modify-phases %standard-phases
14702 (add-after 'unpack 'patch-source
14703 (lambda* (#:key inputs #:allow-other-keys)
14704 (let ((bash-tap (assoc-ref inputs "bash-tap")))
14705 (substitute* (find-files "test/t")
14706 (("BASH_TAP_ROOT=bash-tap")
14707 (string-append "BASH_TAP_ROOT=" bash-tap "/bin"))
14708 (("bash-tap/bash-tap-bootstrap")
14709 (string-append bash-tap "/bin/bash-tap-bootstrap"))
14710 (("source.*bash-tap-bootstrap")
14711 (string-append "source " bash-tap "/bin/bash-tap-bootstrap")))
14712 (substitute* "meson.build"
14713 ;; Some inputs aren't actually needed.
14714 ((".*bamtools/src.*") "")
14715 ((".*multichoose.*") ""))
14716 (substitute* '("src/BedReader.cpp"
14717 "src/BedReader.h")
14718 (("../intervaltree/IntervalTree.h") "IntervalTree.h"))
14719 #t)))
14720 (add-after 'unpack 'unpack-submodule-sources
14721 (lambda* (#:key inputs #:allow-other-keys)
14722 (mkdir-p "test/test-simple-bash")
14723 (copy-recursively (assoc-ref inputs "test-simple-bash-src")
14724 "test/test-simple-bash")
14725 #t))
14726 ;; The slow tests take longer than the specified timeout.
14727 ,@(if (any (cute string=? <> (%current-system))
14728 '("armhf-linux" "aarch64-linux"))
14729 '((replace 'check
14730 (lambda* (#:key tests? #:allow-other-keys)
14731 (when tests?
14732 (invoke "meson" "test" "--timeout-multiplier" "5"))
14733 #t)))
14734 '()))))
14735 (home-page "https://github.com/freebayes/freebayes")
14736 (synopsis "Haplotype-based variant detector")
14737 (description "FreeBayes is a Bayesian genetic variant detector designed to
14738 find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
14739 indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
14740 complex events (composite insertion and substitution events) smaller than the
14741 length of a short-read sequencing alignment.")
14742 (license license:expat)))
14743
14744 (define-public samblaster
14745 (package
14746 (name "samblaster")
14747 (version "0.1.24")
14748 (source (origin
14749 (method git-fetch)
14750 (uri (git-reference
14751 (url "https://github.com/GregoryFaust/samblaster")
14752 (commit (string-append "v." version))))
14753 (file-name (git-file-name name version))
14754 (sha256
14755 (base32
14756 "0iv2ddfw8363vb2x8gr3p8g88whb6mb9m0pf71i2cqsbv6jghap7"))))
14757 (build-system gnu-build-system)
14758 (arguments
14759 `(#:tests? #f ; there are none
14760 #:phases
14761 (modify-phases %standard-phases
14762 (delete 'configure) ; There is no configure phase.
14763 (replace 'install
14764 (lambda* (#:key outputs #:allow-other-keys)
14765 (install-file "samblaster"
14766 (string-append (assoc-ref outputs "out") "/bin"))
14767 #t)))))
14768 (home-page "https://github.com/GregoryFaust/samblaster")
14769 (synopsis "Mark duplicates in paired-end SAM files")
14770 (description "Samblaster is a fast and flexible program for marking
14771 duplicates in read-id grouped paired-end SAM files. It can also optionally
14772 output discordant read pairs and/or split read mappings to separate SAM files,
14773 and/or unmapped/clipped reads to a separate FASTQ file. When marking
14774 duplicates, samblaster will require approximately 20MB of memory per 1M read
14775 pairs.")
14776 (license license:expat)))
14777
14778 (define-public r-velocyto
14779 (let ((commit "d7790346cb99f49ab9c2b23ba70dcf9d2c9fc350")
14780 (revision "1"))
14781 (package
14782 (name "r-velocyto")
14783 (version (git-version "0.6" revision commit))
14784 (source
14785 (origin
14786 (method git-fetch)
14787 (uri (git-reference
14788 (url "https://github.com/velocyto-team/velocyto.R")
14789 (commit commit)))
14790 (file-name (git-file-name name version))
14791 (sha256
14792 (base32
14793 "16wqf70j7rd7pay2q513iyz12i8n9vrpg1bisah4lddbcpx5dz1n"))))
14794 (build-system r-build-system)
14795 (inputs
14796 `(("boost" ,boost)))
14797 (propagated-inputs
14798 `(("r-hdf5r" ,r-hdf5r)
14799 ("r-mass" ,r-mass)
14800 ("r-mgcv" ,r-mgcv)
14801 ("r-pcamethods" ,r-pcamethods)
14802 ("r-rcpp" ,r-rcpp)
14803 ("r-rcpparmadillo" ,r-rcpparmadillo)
14804 ;; Suggested packages
14805 ("r-rtsne" ,r-rtsne)
14806 ("r-cluster" ,r-cluster)
14807 ("r-abind" ,r-abind)
14808 ("r-h5" ,r-h5)
14809 ("r-biocgenerics" ,r-biocgenerics)
14810 ("r-genomicalignments" ,r-genomicalignments)
14811 ("r-rsamtools" ,r-rsamtools)
14812 ("r-edger" ,r-edger)
14813 ("r-igraph" ,r-igraph)))
14814 (home-page "https://velocyto.org")
14815 (synopsis "RNA velocity estimation in R")
14816 (description
14817 "This package provides basic routines for estimation of gene-specific
14818 transcriptional derivatives and visualization of the resulting velocity
14819 patterns.")
14820 (license license:gpl3))))
14821
14822 (define-public methyldackel
14823 (package
14824 (name "methyldackel")
14825 (version "0.5.1")
14826 (source (origin
14827 (method git-fetch)
14828 (uri (git-reference
14829 (url "https://github.com/dpryan79/MethylDackel")
14830 (commit version)))
14831 (file-name (git-file-name name version))
14832 (sha256
14833 (base32
14834 "1sfhf2ap75qxpnmy1ifgmxqs18rq8mah9mcgkby73vc6h0sw99ws"))))
14835 (build-system gnu-build-system)
14836 (arguments
14837 `(#:test-target "test"
14838 #:make-flags
14839 (list "CC=gcc"
14840 (string-append "prefix="
14841 (assoc-ref %outputs "out") "/bin/"))
14842 #:phases
14843 (modify-phases %standard-phases
14844 (replace 'configure
14845 (lambda* (#:key outputs #:allow-other-keys)
14846 (substitute* "Makefile"
14847 (("-lhts ") "-lhts -lBigWig ")
14848 (("install MethylDackel \\$\\(prefix\\)" match)
14849 (string-append "install -d $(prefix); " match)))
14850 #t)))))
14851 (inputs
14852 `(("curl" ,curl) ; XXX: needed by libbigwig
14853 ("htslib" ,htslib-1.9)
14854 ("libbigwig" ,libbigwig)
14855 ("zlib" ,zlib)))
14856 ;; Needed for tests
14857 (native-inputs
14858 `(("python" ,python-wrapper)))
14859 (home-page "https://github.com/dpryan79/MethylDackel")
14860 (synopsis "Universal methylation extractor for BS-seq experiments")
14861 (description
14862 "MethylDackel will process a coordinate-sorted and indexed BAM or CRAM
14863 file containing some form of BS-seq alignments and extract per-base
14864 methylation metrics from them. MethylDackel requires an indexed fasta file
14865 containing the reference genome as well.")
14866 ;; See https://github.com/dpryan79/MethylDackel/issues/85
14867 (license license:expat)))
14868
14869 ;; This package bundles PCRE 8.02 and cannot be built with the current
14870 ;; version.
14871 (define-public phast
14872 (package
14873 (name "phast")
14874 (version "1.5")
14875 (source (origin
14876 (method git-fetch)
14877 (uri (git-reference
14878 (url "https://github.com/CshlSiepelLab/phast")
14879 (commit (string-append "v" version))))
14880 (file-name (git-file-name name version))
14881 (sha256
14882 (base32
14883 "10lpbllvny923jjbbyrpxahhd1m5h7sbj9gx7rd123rg10mlidki"))))
14884 (build-system gnu-build-system)
14885 (arguments
14886 `(#:make-flags
14887 (list "CC=gcc"
14888 (string-append "DESTDIR=" (assoc-ref %outputs "out")))
14889 #:phases
14890 (modify-phases %standard-phases
14891 (replace 'configure
14892 (lambda* (#:key inputs outputs #:allow-other-keys)
14893 ;; Fix syntax
14894 (substitute* "test/Makefile"
14895 ((" ") " "))
14896 (substitute* "Makefile"
14897 (("CLAPACKPATH=/usr/lib")
14898 (string-append "CLAPACKPATH="
14899 (assoc-ref inputs "clapack") "/lib")))
14900 ;; Renaming the libraries is not necessary with our version of
14901 ;; CLAPACK.
14902 (substitute* "src/lib/Makefile"
14903 (("ifdef CLAPACKPATH") "ifdef UNNECESSARY"))
14904 (substitute* "src/make-include.mk"
14905 (("-lblaswr") "-lblas")
14906 (("-ltmg") "-ltmglib")
14907 (("liblapack.a") "liblapack.so")
14908 (("libblas.a") "libblas.so")
14909 (("libf2c.a") "libf2c.so"))
14910 (substitute* "src/Makefile"
14911 (("/opt") "/share")
14912 (("/usr/") "/"))
14913 #t))
14914 (replace 'check
14915 (lambda _
14916 (setenv "PATH"
14917 (string-append (getcwd) "/bin:" (getenv "PATH")))
14918 ;; Disable broken test
14919 (substitute* "test/Makefile"
14920 ((".*if.*hmrc_summary" m) (string-append "#" m)))
14921 ;; Only run the msa_view tests because the others fail for
14922 ;; unknown reasons.
14923 (invoke "make" "-C" "test" "msa_view"))))))
14924 (inputs
14925 `(("clapack" ,clapack)))
14926 (native-inputs
14927 `(("perl" ,perl)))
14928 (home-page "http://compgen.cshl.edu/phast/")
14929 (synopsis "Phylogenetic analysis with space/time models")
14930 (description
14931 "Phylogenetic Analysis with Space/Time models (PHAST) is a collection of
14932 command-line programs and supporting libraries for comparative and
14933 evolutionary genomics. Best known as the search engine behind the
14934 Conservation tracks in the University of California, Santa Cruz (UCSC) Genome
14935 Browser, PHAST also includes several tools for phylogenetic modeling,
14936 functional element identification, as well as utilities for manipulating
14937 alignments, trees and genomic annotations.")
14938 (license license:bsd-3)))
14939
14940 (define-public python-gffutils
14941 ;; The latest release is older more than a year than the latest commit
14942 (let ((commit "4034c54600813b1402945e12faa91b3a53162cf1")
14943 (revision "1"))
14944 (package
14945 (name "python-gffutils")
14946 (version (git-version "0.9" revision commit))
14947 (source
14948 (origin
14949 (method git-fetch)
14950 (uri (git-reference
14951 (url "https://github.com/daler/gffutils")
14952 (commit commit)))
14953 (file-name (git-file-name name version))
14954 (sha256
14955 (base32
14956 "1rwafjdnbir5wnk0ap06ww4lra3p5frhy7mfs03rlldgfnwxymsn"))))
14957 (build-system python-build-system)
14958 (arguments
14959 `(#:phases
14960 (modify-phases %standard-phases
14961 (replace 'check
14962 (lambda _
14963 ;; Tests need to access the HOME directory
14964 (setenv "HOME" "/tmp")
14965 (invoke "nosetests" "-a" "!slow")))
14966 (add-after 'unpack 'make-gz-files-writable
14967 (lambda _
14968 (for-each make-file-writable
14969 (find-files "." "\\.gz"))
14970 #t)))))
14971 (propagated-inputs
14972 `(("python-argcomplete" ,python-argcomplete)
14973 ("python-argh" ,python-argh)
14974 ("python-biopython" ,python-biopython)
14975 ("python-pybedtools" ,python-pybedtools)
14976 ("python-pyfaidx" ,python-pyfaidx)
14977 ("python-simplejson" ,python-simplejson)
14978 ("python-six" ,python-six)))
14979 (native-inputs
14980 `(("python-nose" , python-nose)))
14981 (home-page "https://github.com/daler/gffutils")
14982 (synopsis "Tool for manipulation of GFF and GTF files")
14983 (description
14984 "python-gffutils is a Python package for working with and manipulating
14985 the GFF and GTF format files typically used for genomic annotations. The
14986 files are loaded into a SQLite database, allowing much more complex
14987 manipulation of hierarchical features (e.g., genes, transcripts, and exons)
14988 than is possible with plain-text methods alone.")
14989 (license license:expat))))
14990
14991 (define-public indelfixer
14992 (package
14993 (name "indelfixer")
14994 (version "1.1")
14995 (source (origin
14996 (method git-fetch)
14997 (uri (git-reference
14998 (url "https://github.com/cbg-ethz/InDelFixer/")
14999 (commit (string-append "v" version))))
15000 (file-name (git-file-name name version))
15001 (sha256
15002 (base32
15003 "10ak05x8i1bx2p7rriv2rglqg1wr7c8wrhjrqlq1wm7ka99w8i79"))))
15004 (build-system ant-build-system)
15005 (arguments
15006 `(#:jar-name "InDelFixer.jar"
15007 #:source-dir "src/main/java"
15008 #:test-dir "src/test"))
15009 (inputs
15010 `(("java-commons-lang2" ,java-commons-lang)
15011 ("java-args4j" ,java-args4j)))
15012 (native-inputs
15013 `(("java-junit" ,java-junit)))
15014 (home-page "https://github.com/cbg-ethz/InDelFixer/")
15015 (synopsis "Iterative and sensitive NGS sequence aligner")
15016 (description "InDelFixer is a sensitive aligner for 454, Illumina and
15017 PacBio data, employing a full Smith-Waterman alignment against a reference.
15018 This Java command line application aligns Next-Generation Sequencing (NGS) and
15019 third-generation reads to a set of reference sequences, by a prior fast k-mer
15020 matching and removes indels, causing frame shifts. In addition, only a
15021 specific region can be considered. An iterative refinement of the alignment
15022 can be performed, by alignment against the consensus sequence with wobbles.
15023 The output is in SAM format.")
15024 (license license:gpl3+)))
15025
15026 (define-public libsbml
15027 (package
15028 (name "libsbml")
15029 (version "5.18.0")
15030 (source (origin
15031 (method url-fetch)
15032 (uri (string-append "mirror://sourceforge/sbml/libsbml/"
15033 version "/stable/libSBML-"
15034 version "-core-src.tar.gz"))
15035 (sha256
15036 (base32
15037 "0slkagrk3nfi2qsksv6b1brj6zhx4bj4bkib2sdycvrcd10ql2lh"))))
15038 (build-system cmake-build-system)
15039 (arguments
15040 `(#:test-target "test"
15041 #:configure-flags
15042 (list "-DWITH_CHECK=ON"
15043 (string-append "-DLIBXML_LIBRARY="
15044 (assoc-ref %build-inputs "libxml2")
15045 "/lib/libxml2.so")
15046 (string-append "-DLIBXML_INCLUDE_DIR="
15047 (assoc-ref %build-inputs "libxml2")
15048 "/include/libxml2"))))
15049 (propagated-inputs
15050 `(("libxml2" ,libxml2)))
15051 (native-inputs
15052 `(("check" ,check-0.14)
15053 ("swig" ,swig)))
15054 (home-page "http://sbml.org/Software/libSBML")
15055 (synopsis "Process SBML files and data streams")
15056 (description "LibSBML is a library to help you read, write, manipulate,
15057 translate, and validate SBML files and data streams. The @dfn{Systems Biology
15058 Markup Language} (SBML) is an interchange format for computer models of
15059 biological processes. SBML is useful for models of metabolism, cell
15060 signaling, and more. It continues to be evolved and expanded by an
15061 international community.")
15062 (license license:lgpl2.1+)))
15063
15064 (define-public r-signac
15065 (let ((commit "e0512d348adeda4a3f23a2e8f56d1fe09840e03c")
15066 (revision "1"))
15067 (package
15068 (name "r-signac")
15069 (version (git-version "1.1.1" revision commit))
15070 (source
15071 (origin
15072 (method git-fetch)
15073 (uri (git-reference
15074 (url "https://github.com/timoast/signac/")
15075 (commit commit)))
15076 (file-name (git-file-name name version))
15077 (sha256
15078 (base32
15079 "1yihhrv7zs87ax61la1nb4y12lg3knraw4b20k5digbcwm8488lb"))))
15080 (properties `((upstream-name . "Signac")))
15081 (build-system r-build-system)
15082 (inputs `(("zlib" ,zlib)))
15083 (propagated-inputs
15084 `(("r-annotationfilter" ,r-annotationfilter)
15085 ("r-biocgenerics" ,r-biocgenerics)
15086 ("r-biostrings" ,r-biostrings)
15087 ("r-biovizbase" ,r-biovizbase)
15088 ("r-data-table" ,r-data-table)
15089 ("r-dplyr" ,r-dplyr)
15090 ("r-fastmatch" ,r-fastmatch)
15091 ("r-future" ,r-future)
15092 ("r-future-apply" ,r-future-apply)
15093 ("r-genomeinfodb" ,r-genomeinfodb)
15094 ("r-genomicranges" ,r-genomicranges)
15095 ("r-ggbio" ,r-ggbio)
15096 ("r-ggforce" ,r-ggforce)
15097 ("r-ggplot2" ,r-ggplot2)
15098 ("r-ggrepel" ,r-ggrepel)
15099 ("r-ggseqlogo" ,r-ggseqlogo)
15100 ("r-iranges" ,r-iranges)
15101 ("r-irlba" ,r-irlba)
15102 ("r-lsa" ,r-lsa)
15103 ("r-matrix" ,r-matrix)
15104 ("r-patchwork" ,r-patchwork)
15105 ("r-pbapply" ,r-pbapply)
15106 ("r-rcpp" ,r-rcpp)
15107 ("r-rcpproll" ,r-rcpproll)
15108 ("r-rsamtools" ,r-rsamtools)
15109 ("r-s4vectors" ,r-s4vectors)
15110 ("r-scales" ,r-scales)
15111 ("r-seurat" ,r-seurat)
15112 ("r-seuratobject" ,r-seuratobject)
15113 ("r-stringi" ,r-stringi)
15114 ("r-tidyr" ,r-tidyr)))
15115 (home-page "https://github.com/timoast/signac/")
15116 (synopsis "Analysis of single-cell chromatin data")
15117 (description
15118 "This package provides a framework for the analysis and exploration of
15119 single-cell chromatin data. The Signac package contains functions for
15120 quantifying single-cell chromatin data, computing per-cell quality control
15121 metrics, dimension reduction and normalization, visualization, and DNA
15122 sequence motif analysis.")
15123 (license license:expat))))