gnu: Remove ".git" from "https://github/…/….git".
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016, 2020 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017, 2018, 2019, 2020 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com>
9 ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
12 ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
13 ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
14 ;;; Copyright © 2018, 2019 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
15 ;;; Copyright © 2019 Maxim Cournoyer <maxim.cournoyer@gmail.com>
16 ;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
17 ;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
18 ;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
19 ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
20 ;;; Copyright © 2020 Pierre Langlois <pierre.langlois@gmx.com>
21 ;;;
22 ;;; This file is part of GNU Guix.
23 ;;;
24 ;;; GNU Guix is free software; you can redistribute it and/or modify it
25 ;;; under the terms of the GNU General Public License as published by
26 ;;; the Free Software Foundation; either version 3 of the License, or (at
27 ;;; your option) any later version.
28 ;;;
29 ;;; GNU Guix is distributed in the hope that it will be useful, but
30 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
31 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 ;;; GNU General Public License for more details.
33 ;;;
34 ;;; You should have received a copy of the GNU General Public License
35 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
36
37 (define-module (gnu packages bioinformatics)
38 #:use-module ((guix licenses) #:prefix license:)
39 #:use-module (guix packages)
40 #:use-module (guix utils)
41 #:use-module (guix download)
42 #:use-module (guix git-download)
43 #:use-module (guix hg-download)
44 #:use-module (guix build-system ant)
45 #:use-module (guix build-system gnu)
46 #:use-module (guix build-system cmake)
47 #:use-module (guix build-system go)
48 #:use-module (guix build-system haskell)
49 #:use-module (guix build-system meson)
50 #:use-module (guix build-system ocaml)
51 #:use-module (guix build-system perl)
52 #:use-module (guix build-system python)
53 #:use-module (guix build-system r)
54 #:use-module (guix build-system ruby)
55 #:use-module (guix build-system scons)
56 #:use-module (guix build-system trivial)
57 #:use-module (guix deprecation)
58 #:use-module (gnu packages)
59 #:use-module (gnu packages autotools)
60 #:use-module (gnu packages algebra)
61 #:use-module (gnu packages base)
62 #:use-module (gnu packages bash)
63 #:use-module (gnu packages bison)
64 #:use-module (gnu packages bioconductor)
65 #:use-module (gnu packages boost)
66 #:use-module (gnu packages check)
67 #:use-module (gnu packages code)
68 #:use-module (gnu packages compression)
69 #:use-module (gnu packages cpio)
70 #:use-module (gnu packages cran)
71 #:use-module (gnu packages curl)
72 #:use-module (gnu packages documentation)
73 #:use-module (gnu packages databases)
74 #:use-module (gnu packages datastructures)
75 #:use-module (gnu packages dlang)
76 #:use-module (gnu packages file)
77 #:use-module (gnu packages flex)
78 #:use-module (gnu packages gawk)
79 #:use-module (gnu packages gcc)
80 #:use-module (gnu packages gd)
81 #:use-module (gnu packages golang)
82 #:use-module (gnu packages glib)
83 #:use-module (gnu packages graph)
84 #:use-module (gnu packages graphviz)
85 #:use-module (gnu packages groff)
86 #:use-module (gnu packages gtk)
87 #:use-module (gnu packages guile)
88 #:use-module (gnu packages guile-xyz)
89 #:use-module (gnu packages haskell-check)
90 #:use-module (gnu packages haskell-web)
91 #:use-module (gnu packages haskell-xyz)
92 #:use-module (gnu packages image)
93 #:use-module (gnu packages imagemagick)
94 #:use-module (gnu packages java)
95 #:use-module (gnu packages java-compression)
96 #:use-module (gnu packages jemalloc)
97 #:use-module (gnu packages linux)
98 #:use-module (gnu packages lisp-xyz)
99 #:use-module (gnu packages logging)
100 #:use-module (gnu packages machine-learning)
101 #:use-module (gnu packages man)
102 #:use-module (gnu packages maths)
103 #:use-module (gnu packages mpi)
104 #:use-module (gnu packages ncurses)
105 #:use-module (gnu packages ocaml)
106 #:use-module (gnu packages pcre)
107 #:use-module (gnu packages parallel)
108 #:use-module (gnu packages pdf)
109 #:use-module (gnu packages perl)
110 #:use-module (gnu packages perl-check)
111 #:use-module (gnu packages pkg-config)
112 #:use-module (gnu packages popt)
113 #:use-module (gnu packages protobuf)
114 #:use-module (gnu packages python)
115 #:use-module (gnu packages python-compression)
116 #:use-module (gnu packages python-science)
117 #:use-module (gnu packages python-web)
118 #:use-module (gnu packages python-xyz)
119 #:use-module (gnu packages readline)
120 #:use-module (gnu packages ruby)
121 #:use-module (gnu packages serialization)
122 #:use-module (gnu packages shells)
123 #:use-module (gnu packages sphinx)
124 #:use-module (gnu packages statistics)
125 #:use-module (gnu packages swig)
126 #:use-module (gnu packages tbb)
127 #:use-module (gnu packages tex)
128 #:use-module (gnu packages texinfo)
129 #:use-module (gnu packages textutils)
130 #:use-module (gnu packages time)
131 #:use-module (gnu packages tls)
132 #:use-module (gnu packages vim)
133 #:use-module (gnu packages web)
134 #:use-module (gnu packages xml)
135 #:use-module (gnu packages xorg)
136 #:use-module (srfi srfi-1)
137 #:use-module (ice-9 match))
138
139 (define-public aragorn
140 (package
141 (name "aragorn")
142 (version "1.2.38")
143 (source (origin
144 (method url-fetch)
145 (uri (string-append
146 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
147 version ".tgz"))
148 (sha256
149 (base32
150 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
151 (build-system gnu-build-system)
152 (arguments
153 `(#:tests? #f ; there are no tests
154 #:phases
155 (modify-phases %standard-phases
156 (delete 'configure)
157 (replace 'build
158 (lambda _
159 (invoke "gcc"
160 "-O3"
161 "-ffast-math"
162 "-finline-functions"
163 "-o"
164 "aragorn"
165 (string-append "aragorn" ,version ".c"))
166 #t))
167 (replace 'install
168 (lambda* (#:key outputs #:allow-other-keys)
169 (let* ((out (assoc-ref outputs "out"))
170 (bin (string-append out "/bin"))
171 (man (string-append out "/share/man/man1")))
172 (install-file "aragorn" bin)
173 (install-file "aragorn.1" man))
174 #t)))))
175 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
176 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
177 (description
178 "Aragorn identifies transfer RNA, mitochondrial RNA and
179 transfer-messenger RNA from nucleotide sequences, based on homology to known
180 tRNA consensus sequences and RNA structure. It also outputs the secondary
181 structure of the predicted RNA.")
182 (license license:gpl2)))
183
184 (define-public bamm
185 (package
186 (name "bamm")
187 (version "1.7.3")
188 (source (origin
189 (method git-fetch)
190 ;; BamM is not available on pypi.
191 (uri (git-reference
192 (url "https://github.com/Ecogenomics/BamM")
193 (commit version)
194 (recursive? #t)))
195 (file-name (git-file-name name version))
196 (sha256
197 (base32
198 "1p83ahi984ipslxlg4yqy1gdnya9rkn1v71z8djgxkm9d2chw4c5"))
199 (modules '((guix build utils)))
200 (snippet
201 `(begin
202 ;; Delete bundled htslib.
203 (delete-file-recursively "c/htslib-1.3.1")
204 #t))))
205 (build-system python-build-system)
206 (arguments
207 `(#:python ,python-2 ; BamM is Python 2 only.
208 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
209 ;; been modified from its original form.
210 #:configure-flags
211 (let ((htslib (assoc-ref %build-inputs "htslib")))
212 (list "--with-libhts-lib" (string-append htslib "/lib")
213 "--with-libhts-inc" (string-append htslib "/include/htslib")))
214 #:phases
215 (modify-phases %standard-phases
216 (add-after 'unpack 'autogen
217 (lambda _
218 (with-directory-excursion "c"
219 (let ((sh (which "sh")))
220 (for-each make-file-writable (find-files "." ".*"))
221 ;; Use autogen so that 'configure' works.
222 (substitute* "autogen.sh" (("/bin/sh") sh))
223 (setenv "CONFIG_SHELL" sh)
224 (invoke "./autogen.sh")))
225 #t))
226 (delete 'build)
227 ;; Run tests after installation so compilation only happens once.
228 (delete 'check)
229 (add-after 'install 'wrap-executable
230 (lambda* (#:key outputs #:allow-other-keys)
231 (let* ((out (assoc-ref outputs "out"))
232 (path (getenv "PATH")))
233 (wrap-program (string-append out "/bin/bamm")
234 `("PATH" ":" prefix (,path))))
235 #t))
236 (add-after 'wrap-executable 'post-install-check
237 (lambda* (#:key inputs outputs #:allow-other-keys)
238 (setenv "PATH"
239 (string-append (assoc-ref outputs "out")
240 "/bin:"
241 (getenv "PATH")))
242 (setenv "PYTHONPATH"
243 (string-append
244 (assoc-ref outputs "out")
245 "/lib/python"
246 (string-take (string-take-right
247 (assoc-ref inputs "python") 5) 3)
248 "/site-packages:"
249 (getenv "PYTHONPATH")))
250 ;; There are 2 errors printed, but they are safe to ignore:
251 ;; 1) [E::hts_open_format] fail to open file ...
252 ;; 2) samtools view: failed to open ...
253 (invoke "nosetests")
254 #t)))))
255 (native-inputs
256 `(("autoconf" ,autoconf)
257 ("automake" ,automake)
258 ("libtool" ,libtool)
259 ("zlib" ,zlib)
260 ("python-nose" ,python2-nose)
261 ("python-pysam" ,python2-pysam)))
262 (inputs
263 `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
264 ("samtools" ,samtools)
265 ("bwa" ,bwa)
266 ("grep" ,grep)
267 ("sed" ,sed)
268 ("coreutils" ,coreutils)))
269 (propagated-inputs
270 `(("python-numpy" ,python2-numpy)))
271 (home-page "https://ecogenomics.github.io/BamM/")
272 (synopsis "Metagenomics-focused BAM file manipulator")
273 (description
274 "BamM is a C library, wrapped in python, to efficiently generate and
275 parse BAM files, specifically for the analysis of metagenomic data. For
276 instance, it implements several methods to assess contig-wise read coverage.")
277 (license license:lgpl3+)))
278
279 (define-public bamtools
280 (package
281 (name "bamtools")
282 (version "2.5.1")
283 (source (origin
284 (method git-fetch)
285 (uri (git-reference
286 (url "https://github.com/pezmaster31/bamtools")
287 (commit (string-append "v" version))))
288 (file-name (git-file-name name version))
289 (sha256
290 (base32
291 "0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
292 (build-system cmake-build-system)
293 (arguments
294 `(#:tests? #f ;no "check" target
295 #:phases
296 (modify-phases %standard-phases
297 (add-before
298 'configure 'set-ldflags
299 (lambda* (#:key outputs #:allow-other-keys)
300 (setenv "LDFLAGS"
301 (string-append
302 "-Wl,-rpath="
303 (assoc-ref outputs "out") "/lib/bamtools"))
304 #t)))))
305 (inputs `(("zlib" ,zlib)))
306 (home-page "https://github.com/pezmaster31/bamtools")
307 (synopsis "C++ API and command-line toolkit for working with BAM data")
308 (description
309 "BamTools provides both a C++ API and a command-line toolkit for handling
310 BAM files.")
311 (license license:expat)))
312
313 (define-public bcftools
314 (package
315 (name "bcftools")
316 (version "1.9")
317 (source (origin
318 (method url-fetch)
319 (uri (string-append "https://github.com/samtools/bcftools/"
320 "releases/download/"
321 version "/bcftools-" version ".tar.bz2"))
322 (sha256
323 (base32
324 "1j3h638i8kgihzyrlnpj82xg1b23sijibys9hvwari3fy7kd0dkg"))
325 (modules '((guix build utils)))
326 (snippet '(begin
327 ;; Delete bundled htslib.
328 (delete-file-recursively "htslib-1.9")
329 #t))))
330 (build-system gnu-build-system)
331 (arguments
332 `(#:configure-flags
333 (list "--enable-libgsl")
334 #:test-target "test"
335 #:phases
336 (modify-phases %standard-phases
337 (add-before 'check 'patch-tests
338 (lambda _
339 (substitute* "test/test.pl"
340 (("/bin/bash") (which "bash")))
341 #t)))))
342 (native-inputs
343 `(("htslib" ,htslib)
344 ("perl" ,perl)))
345 (inputs
346 `(("gsl" ,gsl)
347 ("zlib" ,zlib)))
348 (home-page "https://samtools.github.io/bcftools/")
349 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
350 (description
351 "BCFtools is a set of utilities that manipulate variant calls in the
352 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
353 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
354 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
355 (license (list license:gpl3+ license:expat))))
356
357 (define-public bedops
358 (package
359 (name "bedops")
360 (version "2.4.35")
361 (source (origin
362 (method git-fetch)
363 (uri (git-reference
364 (url "https://github.com/bedops/bedops")
365 (commit (string-append "v" version))))
366 (file-name (git-file-name name version))
367 (sha256
368 (base32
369 "0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
370 (build-system gnu-build-system)
371 (arguments
372 '(#:tests? #f
373 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
374 #:phases
375 (modify-phases %standard-phases
376 (add-after 'unpack 'unpack-tarballs
377 (lambda _
378 ;; FIXME: Bedops includes tarballs of minimally patched upstream
379 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
380 ;; libraries because at least one of the libraries (zlib) is
381 ;; patched to add a C++ function definition (deflateInit2cpp).
382 ;; Until the Bedops developers offer a way to link against system
383 ;; libraries we have to build the in-tree copies of these three
384 ;; libraries.
385
386 ;; See upstream discussion:
387 ;; https://github.com/bedops/bedops/issues/124
388
389 ;; Unpack the tarballs to benefit from shebang patching.
390 (with-directory-excursion "third-party"
391 (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
392 (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
393 (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
394 ;; Disable unpacking of tarballs in Makefile.
395 (substitute* "system.mk/Makefile.linux"
396 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
397 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
398 (substitute* "third-party/zlib-1.2.7/Makefile.in"
399 (("^SHELL=.*$") "SHELL=bash\n"))
400 #t))
401 (delete 'configure))))
402 (home-page "https://github.com/bedops/bedops")
403 (synopsis "Tools for high-performance genomic feature operations")
404 (description
405 "BEDOPS is a suite of tools to address common questions raised in genomic
406 studies---mostly with regard to overlap and proximity relationships between
407 data sets. It aims to be scalable and flexible, facilitating the efficient
408 and accurate analysis and management of large-scale genomic data.
409
410 BEDOPS provides tools that perform highly efficient and scalable Boolean and
411 other set operations, statistical calculations, archiving, conversion and
412 other management of genomic data of arbitrary scale. Tasks can be easily
413 split by chromosome for distributing whole-genome analyses across a
414 computational cluster.")
415 (license license:gpl2+)))
416
417 (define-public bedtools
418 (package
419 (name "bedtools")
420 (version "2.29.2")
421 (source (origin
422 (method url-fetch)
423 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
424 "download/v" version "/"
425 "bedtools-" version ".tar.gz"))
426 (sha256
427 (base32
428 "0m3hk6548846w83a9s5drsczvy67n2azx41kj71n03klb2gbzwg3"))))
429 (build-system gnu-build-system)
430 (arguments
431 '(#:test-target "test"
432 #:make-flags
433 (list (string-append "prefix=" (assoc-ref %outputs "out")))
434 #:phases
435 (modify-phases %standard-phases
436 (delete 'configure))))
437 (native-inputs
438 `(("python" ,python-wrapper)))
439 (inputs
440 `(("samtools" ,samtools)
441 ("zlib" ,zlib)))
442 (home-page "https://github.com/arq5x/bedtools2")
443 (synopsis "Tools for genome analysis and arithmetic")
444 (description
445 "Collectively, the bedtools utilities are a swiss-army knife of tools for
446 a wide-range of genomics analysis tasks. The most widely-used tools enable
447 genome arithmetic: that is, set theory on the genome. For example, bedtools
448 allows one to intersect, merge, count, complement, and shuffle genomic
449 intervals from multiple files in widely-used genomic file formats such as BAM,
450 BED, GFF/GTF, VCF.")
451 (license license:expat)))
452
453 ;; Later releases of bedtools produce files with more columns than
454 ;; what Ribotaper expects.
455 (define-public bedtools-2.18
456 (package (inherit bedtools)
457 (name "bedtools")
458 (version "2.18.0")
459 (source (origin
460 (method url-fetch)
461 (uri (string-append "https://github.com/arq5x/bedtools2/"
462 "releases/download/v" version
463 "/bedtools-" version ".tar.gz"))
464 (sha256
465 (base32
466 "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
467 (arguments
468 '(#:test-target "test"
469 #:phases
470 (modify-phases %standard-phases
471 (delete 'configure)
472 (replace 'install
473 (lambda* (#:key outputs #:allow-other-keys)
474 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
475 (for-each (lambda (file)
476 (install-file file bin))
477 (find-files "bin" ".*")))
478 #t)))))))
479
480 (define-public pbbam
481 (package
482 (name "pbbam")
483 (version "0.23.0")
484 (source (origin
485 (method git-fetch)
486 (uri (git-reference
487 (url "https://github.com/PacificBiosciences/pbbam")
488 (commit version)))
489 (file-name (git-file-name name version))
490 (sha256
491 (base32
492 "0h9gkrpf2lrxklxp72xfl5bi3h5zcm5hprrya9gf0hr3xwlbpp0x"))))
493 (build-system meson-build-system)
494 (arguments
495 `(#:phases
496 (modify-phases %standard-phases
497 (add-after 'unpack 'find-googletest
498 (lambda* (#:key inputs #:allow-other-keys)
499 ;; It doesn't find gtest_main because there's no pkg-config file
500 ;; for it. Find it another way.
501 (substitute* "tests/meson.build"
502 (("pbbam_gtest_dep = dependency\\('gtest_main'.*")
503 (format #f "cpp = meson.get_compiler('cpp')
504 pbbam_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
505 (assoc-ref inputs "googletest"))))
506 #t)))
507 ;; TODO: tests/pbbam_test cannot be linked
508 ;; ld: tests/59830eb@@pbbam_test@exe/src_test_Accuracy.cpp.o:
509 ;; undefined reference to symbol '_ZTIN7testing4TestE'
510 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
511 ;; error adding symbols: DSO missing from command line
512 #:tests? #f
513 #:configure-flags '("-Dtests=false")))
514 ;; These libraries are listed as "Required" in the pkg-config file.
515 (propagated-inputs
516 `(("htslib" ,htslib)
517 ("zlib" ,zlib)))
518 (inputs
519 `(("boost" ,boost)
520 ("samtools" ,samtools)))
521 (native-inputs
522 `(("googletest" ,googletest)
523 ("pkg-config" ,pkg-config)
524 ("python" ,python-wrapper))) ; for tests
525 (home-page "https://github.com/PacificBiosciences/pbbam")
526 (synopsis "Work with PacBio BAM files")
527 (description
528 "The pbbam software package provides components to create, query, and
529 edit PacBio BAM files and associated indices. These components include a core
530 C++ library, bindings for additional languages, and command-line utilities.
531 This library is not intended to be used as a general-purpose BAM utility - all
532 input and output BAMs must adhere to the PacBio BAM format specification.
533 Non-PacBio BAMs will cause exceptions to be thrown.")
534 (license license:bsd-3)))
535
536 (define-public blasr-libcpp
537 (package
538 (name "blasr-libcpp")
539 (version "5.3.3")
540 (source (origin
541 (method git-fetch)
542 (uri (git-reference
543 (url "https://github.com/PacificBiosciences/blasr_libcpp")
544 (commit version)))
545 (file-name (git-file-name name version))
546 (sha256
547 (base32
548 "0cn5l42zyq67sj0g2imqkhayz2iqvv0a1pgpbmlq0qynjmsrbfd2"))))
549 (build-system meson-build-system)
550 (arguments
551 `(#:phases
552 (modify-phases %standard-phases
553 (add-after 'unpack 'link-with-hdf5
554 (lambda* (#:key inputs #:allow-other-keys)
555 (let ((hdf5 (assoc-ref inputs "hdf5")))
556 (substitute* "meson.build"
557 (("libblasr_deps = \\[" m)
558 (string-append
559 m
560 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
561 cpp.find_library('hdf5_cpp', dirs : '~a'), "
562 hdf5 hdf5)))))
563 #t))
564 (add-after 'unpack 'find-googletest
565 (lambda* (#:key inputs #:allow-other-keys)
566 ;; It doesn't find gtest_main because there's no pkg-config file
567 ;; for it. Find it another way.
568 (substitute* "unittest/meson.build"
569 (("libblasr_gtest_dep = dependency\\('gtest_main'.*")
570 (format #f "cpp = meson.get_compiler('cpp')
571 libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
572 (assoc-ref inputs "googletest"))))
573 #t)))
574 ;; TODO: unittest/libblasr_unittest cannot be linked
575 ;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
576 ;; undefined reference to symbol
577 ;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
578 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
579 ;; error adding symbols: DSO missing from command line
580 #:tests? #f
581 #:configure-flags '("-Dtests=false")))
582 (inputs
583 `(("boost" ,boost)
584 ("hdf5" ,hdf5)
585 ("pbbam" ,pbbam)
586 ("zlib" ,zlib)))
587 (native-inputs
588 `(("googletest" ,googletest)
589 ("pkg-config" ,pkg-config)))
590 (home-page "https://github.com/PacificBiosciences/blasr_libcpp")
591 (synopsis "Library for analyzing PacBio genomic sequences")
592 (description
593 "This package provides three libraries used by applications for analyzing
594 PacBio genomic sequences. This library contains three sub-libraries: pbdata,
595 hdf and alignment.")
596 (license license:bsd-3)))
597
598 (define-public blasr
599 (package
600 (name "blasr")
601 (version "5.3.3")
602 (source (origin
603 (method git-fetch)
604 (uri (git-reference
605 (url "https://github.com/PacificBiosciences/blasr")
606 (commit version)))
607 (file-name (git-file-name name version))
608 (sha256
609 (base32
610 "1skgy2mvz8gsgfh1gc2nfgwvpyzb1hpmp2cf2773h5wsj8nw22kl"))))
611 (build-system meson-build-system)
612 (arguments
613 `(#:phases
614 (modify-phases %standard-phases
615 (add-after 'unpack 'link-with-hdf5
616 (lambda* (#:key inputs #:allow-other-keys)
617 (let ((hdf5 (assoc-ref inputs "hdf5")))
618 (substitute* "meson.build"
619 (("blasr_deps = \\[" m)
620 (string-append
621 m
622 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
623 cpp.find_library('hdf5_cpp', dirs : '~a'), "
624 hdf5 hdf5)))))
625 #t)))
626 ;; Tests require "cram" executable, which is not packaged.
627 #:tests? #f
628 #:configure-flags '("-Dtests=false")))
629 (inputs
630 `(("boost" ,boost)
631 ("blasr-libcpp" ,blasr-libcpp)
632 ("hdf5" ,hdf5)
633 ("pbbam" ,pbbam)
634 ("zlib" ,zlib)))
635 (native-inputs
636 `(("pkg-config" ,pkg-config)))
637 (home-page "https://github.com/PacificBiosciences/blasr")
638 (synopsis "PacBio long read aligner")
639 (description
640 "Blasr is a genomic sequence aligner for processing PacBio long reads.")
641 (license license:bsd-3)))
642
643 (define-public ribotaper
644 (package
645 (name "ribotaper")
646 (version "1.3.1")
647 (source (origin
648 (method url-fetch)
649 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
650 "files/RiboTaper/RiboTaper_Version_"
651 version ".tar.gz"))
652 (sha256
653 (base32
654 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
655 (build-system gnu-build-system)
656 (arguments
657 `(#:phases
658 (modify-phases %standard-phases
659 (add-after 'install 'wrap-executables
660 (lambda* (#:key inputs outputs #:allow-other-keys)
661 (let* ((out (assoc-ref outputs "out")))
662 (for-each
663 (lambda (script)
664 (wrap-program (string-append out "/bin/" script)
665 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
666 '("create_annotations_files.bash"
667 "create_metaplots.bash"
668 "Ribotaper_ORF_find.sh"
669 "Ribotaper.sh")))
670 #t)))))
671 (inputs
672 `(("bedtools" ,bedtools-2.18)
673 ("samtools" ,samtools-0.1)
674 ("r-minimal" ,r-minimal)
675 ("r-foreach" ,r-foreach)
676 ("r-xnomial" ,r-xnomial)
677 ("r-domc" ,r-domc)
678 ("r-multitaper" ,r-multitaper)
679 ("r-seqinr" ,r-seqinr)))
680 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
681 (synopsis "Define translated ORFs using ribosome profiling data")
682 (description
683 "Ribotaper is a method for defining translated @dfn{open reading
684 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
685 provides the Ribotaper pipeline.")
686 (license license:gpl3+)))
687
688 (define-public ribodiff
689 (package
690 (name "ribodiff")
691 (version "0.2.2")
692 (source
693 (origin
694 (method git-fetch)
695 (uri (git-reference
696 (url "https://github.com/ratschlab/RiboDiff")
697 (commit (string-append "v" version))))
698 (file-name (git-file-name name version))
699 (sha256
700 (base32
701 "0x75nlp7qnmm64jasbi6l21f2cy99r2cjyl6b4hr8zf2bq22drnz"))))
702 (build-system python-build-system)
703 (arguments
704 `(#:python ,python-2
705 #:phases
706 (modify-phases %standard-phases
707 ;; Generate an installable executable script wrapper.
708 (add-after 'unpack 'patch-setup.py
709 (lambda _
710 (substitute* "setup.py"
711 (("^(.*)packages=.*" line prefix)
712 (string-append line "\n"
713 prefix "scripts=['scripts/TE.py'],\n")))
714 #t)))))
715 (inputs
716 `(("python-numpy" ,python2-numpy)
717 ("python-matplotlib" ,python2-matplotlib)
718 ("python-scipy" ,python2-scipy)
719 ("python-statsmodels" ,python2-statsmodels)))
720 (native-inputs
721 `(("python-mock" ,python2-mock)
722 ("python-nose" ,python2-nose)))
723 (home-page "https://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
724 (synopsis "Detect translation efficiency changes from ribosome footprints")
725 (description "RiboDiff is a statistical tool that detects the protein
726 translational efficiency change from Ribo-Seq (ribosome footprinting) and
727 RNA-Seq data. It uses a generalized linear model to detect genes showing
728 difference in translational profile taking mRNA abundance into account. It
729 facilitates us to decipher the translational regulation that behave
730 independently with transcriptional regulation.")
731 (license license:gpl3+)))
732
733 (define-public bioawk
734 (package
735 (name "bioawk")
736 (version "1.0")
737 (source (origin
738 (method git-fetch)
739 (uri (git-reference
740 (url "https://github.com/lh3/bioawk")
741 (commit (string-append "v" version))))
742 (file-name (git-file-name name version))
743 (sha256
744 (base32
745 "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
746 (build-system gnu-build-system)
747 (inputs
748 `(("zlib" ,zlib)))
749 (native-inputs
750 `(("bison" ,bison)))
751 (arguments
752 `(#:tests? #f ; There are no tests to run.
753 ;; Bison must generate files, before other targets can build.
754 #:parallel-build? #f
755 #:phases
756 (modify-phases %standard-phases
757 (delete 'configure) ; There is no configure phase.
758 (replace 'install
759 (lambda* (#:key outputs #:allow-other-keys)
760 (let* ((out (assoc-ref outputs "out"))
761 (bin (string-append out "/bin"))
762 (man (string-append out "/share/man/man1")))
763 (mkdir-p man)
764 (copy-file "awk.1" (string-append man "/bioawk.1"))
765 (install-file "bioawk" bin))
766 #t)))))
767 (home-page "https://github.com/lh3/bioawk")
768 (synopsis "AWK with bioinformatics extensions")
769 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
770 support of several common biological data formats, including optionally gzip'ed
771 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
772 also adds a few built-in functions and a command line option to use TAB as the
773 input/output delimiter. When the new functionality is not used, bioawk is
774 intended to behave exactly the same as the original BWK awk.")
775 (license license:x11)))
776
777 (define-public python-pybedtools
778 (package
779 (name "python-pybedtools")
780 (version "0.8.1")
781 (source (origin
782 (method url-fetch)
783 (uri (pypi-uri "pybedtools" version))
784 (sha256
785 (base32
786 "14w5i40gi25clrr7h4wa2pcpnyipya8hrqi7nq77553zc5wf0df0"))))
787 (build-system python-build-system)
788 (arguments
789 `(#:modules ((ice-9 ftw)
790 (srfi srfi-1)
791 (srfi srfi-26)
792 (guix build utils)
793 (guix build python-build-system))
794 ;; See https://github.com/daler/pybedtools/issues/192
795 #:phases
796 (modify-phases %standard-phases
797 (add-after 'unpack 'disable-broken-tests
798 (lambda _
799 (substitute* "pybedtools/test/test_scripts.py"
800 ;; This test freezes.
801 (("def test_intron_exon_reads")
802 "def _do_not_test_intron_exon_reads")
803 ;; This test fails in the Python 2 build.
804 (("def test_venn_mpl")
805 "def _do_not_test_venn_mpl"))
806 (substitute* "pybedtools/test/test_helpers.py"
807 ;; Requires internet access.
808 (("def test_chromsizes")
809 "def _do_not_test_chromsizes")
810 ;; Broken as a result of the workaround used in the check phase
811 ;; (see: https://github.com/daler/pybedtools/issues/192).
812 (("def test_getting_example_beds")
813 "def _do_not_test_getting_example_beds"))
814 ;; This issue still occurs on python2
815 (substitute* "pybedtools/test/test_issues.py"
816 (("def test_issue_303")
817 "def _test_issue_303"))
818 #t))
819 ;; TODO: Remove phase after it's part of PYTHON-BUILD-SYSTEM.
820 ;; build system.
821 ;; Force the Cythonization of C++ files to guard against compilation
822 ;; problems.
823 (add-after 'unpack 'remove-cython-generated-files
824 (lambda _
825 (let ((cython-sources (map (cut string-drop-right <> 4)
826 (find-files "." "\\.pyx$")))
827 (c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
828 (define (strip-extension filename)
829 (string-take filename (string-index-right filename #\.)))
830 (define (cythonized? c/c++-file)
831 (member (strip-extension c/c++-file) cython-sources))
832 (for-each delete-file (filter cythonized? c/c++-files))
833 #t)))
834 (add-after 'remove-cython-generated-files 'generate-cython-extensions
835 (lambda _
836 (invoke "python" "setup.py" "cythonize")))
837 (replace 'check
838 (lambda _
839 (let* ((cwd (getcwd))
840 (build-root-directory (string-append cwd "/build/"))
841 (build (string-append
842 build-root-directory
843 (find (cut string-prefix? "lib" <>)
844 (scandir (string-append
845 build-root-directory)))))
846 (scripts (string-append
847 build-root-directory
848 (find (cut string-prefix? "scripts" <>)
849 (scandir build-root-directory)))))
850 (setenv "PYTHONPATH"
851 (string-append build ":" (getenv "PYTHONPATH")))
852 ;; Executable scripts such as 'intron_exon_reads.py' must be
853 ;; available in the PATH.
854 (setenv "PATH"
855 (string-append scripts ":" (getenv "PATH"))))
856 ;; The tests need to be run from elsewhere...
857 (mkdir-p "/tmp/test")
858 (copy-recursively "pybedtools/test" "/tmp/test")
859 (with-directory-excursion "/tmp/test"
860 (invoke "pytest" "-v" "--doctest-modules")))))))
861 (propagated-inputs
862 `(("bedtools" ,bedtools)
863 ("samtools" ,samtools)
864 ("python-matplotlib" ,python-matplotlib)
865 ("python-pysam" ,python-pysam)
866 ("python-pyyaml" ,python-pyyaml)))
867 (native-inputs
868 `(("python-numpy" ,python-numpy)
869 ("python-pandas" ,python-pandas)
870 ("python-cython" ,python-cython)
871 ("kentutils" ,kentutils) ; for bedGraphToBigWig
872 ("python-six" ,python-six)
873 ;; For the test suite.
874 ("python-pytest" ,python-pytest)
875 ("python-psutil" ,python-psutil)))
876 (home-page "https://pythonhosted.org/pybedtools/")
877 (synopsis "Python wrapper for BEDtools programs")
878 (description
879 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
880 which are widely used for genomic interval manipulation or \"genome algebra\".
881 pybedtools extends BEDTools by offering feature-level manipulations from with
882 Python.")
883 (license license:gpl2+)))
884
885 (define-public python2-pybedtools
886 (let ((pybedtools (package-with-python2 python-pybedtools)))
887 (package
888 (inherit pybedtools)
889 (native-inputs
890 `(("python2-pathlib" ,python2-pathlib)
891 ,@(package-native-inputs pybedtools))))))
892
893 (define-public python-biom-format
894 (package
895 (name "python-biom-format")
896 (version "2.1.7")
897 (source
898 (origin
899 (method git-fetch)
900 ;; Use GitHub as source because PyPI distribution does not contain
901 ;; test data: https://github.com/biocore/biom-format/issues/693
902 (uri (git-reference
903 (url "https://github.com/biocore/biom-format")
904 (commit version)))
905 (file-name (git-file-name name version))
906 (sha256
907 (base32
908 "1rna16lyk5aqhnv0dp77wwaplias93f1vw28ad3jmyw6hwkai05v"))
909 (modules '((guix build utils)))
910 (snippet '(begin
911 ;; Delete generated C files.
912 (for-each delete-file (find-files "." "\\.c"))
913 #t))))
914 (build-system python-build-system)
915 (arguments
916 `(#:phases
917 (modify-phases %standard-phases
918 (add-after 'unpack 'use-cython
919 (lambda _ (setenv "USE_CYTHON" "1") #t))
920 (add-after 'unpack 'disable-broken-tests
921 (lambda _
922 (substitute* "biom/tests/test_cli/test_validate_table.py"
923 (("^(.+)def test_invalid_hdf5" m indent)
924 (string-append indent
925 "@npt.dec.skipif(True, msg='Guix')\n"
926 m)))
927 (substitute* "biom/tests/test_table.py"
928 (("^(.+)def test_from_hdf5_issue_731" m indent)
929 (string-append indent
930 "@npt.dec.skipif(True, msg='Guix')\n"
931 m)))
932 #t))
933 (add-before 'reset-gzip-timestamps 'make-files-writable
934 (lambda* (#:key outputs #:allow-other-keys)
935 (let ((out (assoc-ref outputs "out")))
936 (for-each (lambda (file) (chmod file #o644))
937 (find-files out "\\.gz"))
938 #t))))))
939 (propagated-inputs
940 `(("python-numpy" ,python-numpy)
941 ("python-scipy" ,python-scipy)
942 ("python-flake8" ,python-flake8)
943 ("python-future" ,python-future)
944 ("python-click" ,python-click)
945 ("python-h5py" ,python-h5py)
946 ;; FIXME: Upgrade to pandas 1.0 when
947 ;; https://github.com/biocore/biom-format/issues/837 is resolved.
948 ("python-pandas" ,python-pandas-0.25)))
949 (native-inputs
950 `(("python-cython" ,python-cython)
951 ("python-pytest" ,python-pytest)
952 ("python-pytest-cov" ,python-pytest-cov)
953 ("python-nose" ,python-nose)))
954 (home-page "http://www.biom-format.org")
955 (synopsis "Biological Observation Matrix (BIOM) format utilities")
956 (description
957 "The BIOM file format is designed to be a general-use format for
958 representing counts of observations e.g. operational taxonomic units, KEGG
959 orthology groups or lipid types, in one or more biological samples
960 e.g. microbiome samples, genomes, metagenomes.")
961 (license license:bsd-3)
962 (properties `((python2-variant . ,(delay python2-biom-format))))))
963
964 (define-public python2-biom-format
965 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
966 (package
967 (inherit base)
968 (arguments
969 (substitute-keyword-arguments (package-arguments base)
970 ((#:phases phases)
971 `(modify-phases ,phases
972 ;; Do not require the unmaintained pyqi library.
973 (add-after 'unpack 'remove-pyqi
974 (lambda _
975 (substitute* "setup.py"
976 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
977 #t)))))))))
978
979 (define-public python-pairtools
980 (package
981 (name "python-pairtools")
982 (version "0.3.0")
983 (source (origin
984 (method git-fetch)
985 (uri (git-reference
986 (url "https://github.com/mirnylab/pairtools")
987 (commit (string-append "v" version))))
988 (file-name (git-file-name name version))
989 (sha256
990 (base32
991 "0gr8y13q7sd6yai6df4aavl2470n1f9s3cib6r473z4hr8hcbwmc"))))
992 (build-system python-build-system)
993 (arguments
994 `(#:phases
995 (modify-phases %standard-phases
996 (add-after 'unpack 'fix-references
997 (lambda _
998 (substitute* '("pairtools/pairtools_merge.py"
999 "pairtools/pairtools_sort.py")
1000 (("/bin/bash") (which "bash")))
1001 #t))
1002 (replace 'check
1003 (lambda* (#:key inputs outputs #:allow-other-keys)
1004 (add-installed-pythonpath inputs outputs)
1005 (with-directory-excursion "/tmp"
1006 (invoke "pytest" "-v")))))))
1007 (native-inputs
1008 `(("python-cython" ,python-cython)
1009 ("python-nose" ,python-nose)
1010 ("python-pytest" ,python-pytest)))
1011 (inputs
1012 `(("python" ,python-wrapper)))
1013 (propagated-inputs
1014 `(("htslib" ,htslib) ; for bgzip, looked up in PATH
1015 ("samtools" ,samtools) ; looked up in PATH
1016 ("lz4" ,lz4) ; for lz4c
1017 ("python-click" ,python-click)
1018 ("python-numpy" ,python-numpy)))
1019 (home-page "https://github.com/mirnylab/pairtools")
1020 (synopsis "Process mapped Hi-C data")
1021 (description "Pairtools is a simple and fast command-line framework to
1022 process sequencing data from a Hi-C experiment. Process pair-end sequence
1023 alignments and perform the following operations:
1024
1025 @itemize
1026 @item detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end
1027 sequences of Hi-C DNA molecules
1028 @item sort @code{.pairs} files for downstream analyses
1029 @item detect, tag and remove PCR/optical duplicates
1030 @item generate extensive statistics of Hi-C datasets
1031 @item select Hi-C pairs given flexibly defined criteria
1032 @item restore @code{.sam} alignments from Hi-C pairs.
1033 @end itemize
1034 ")
1035 (license license:expat)))
1036
1037 (define-public bioperl-minimal
1038 (let* ((inputs `(("perl-module-build" ,perl-module-build)
1039 ("perl-data-stag" ,perl-data-stag)
1040 ("perl-libwww" ,perl-libwww)
1041 ("perl-uri" ,perl-uri)))
1042 (transitive-inputs
1043 (map (compose package-name cadr)
1044 (delete-duplicates
1045 (concatenate
1046 (map (compose package-transitive-target-inputs cadr) inputs))))))
1047 (package
1048 (name "bioperl-minimal")
1049 (version "1.7.0")
1050 (source
1051 (origin
1052 (method git-fetch)
1053 (uri (git-reference
1054 (url "https://github.com/bioperl/bioperl-live")
1055 (commit (string-append "release-"
1056 (string-map (lambda (c)
1057 (if (char=? c #\.)
1058 #\- c)) version)))))
1059 (file-name (git-file-name name version))
1060 (sha256
1061 (base32
1062 "0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
1063 (build-system perl-build-system)
1064 (arguments
1065 `(#:phases
1066 (modify-phases %standard-phases
1067 (add-after
1068 'install 'wrap-programs
1069 (lambda* (#:key outputs #:allow-other-keys)
1070 ;; Make sure all executables in "bin" find the required Perl
1071 ;; modules at runtime. As the PERL5LIB variable contains also
1072 ;; the paths of native inputs, we pick the transitive target
1073 ;; inputs from %build-inputs.
1074 (let* ((out (assoc-ref outputs "out"))
1075 (bin (string-append out "/bin/"))
1076 (path (string-join
1077 (cons (string-append out "/lib/perl5/site_perl")
1078 (map (lambda (name)
1079 (assoc-ref %build-inputs name))
1080 ',transitive-inputs))
1081 ":")))
1082 (for-each (lambda (file)
1083 (wrap-program file
1084 `("PERL5LIB" ":" prefix (,path))))
1085 (find-files bin "\\.pl$"))
1086 #t))))))
1087 (inputs inputs)
1088 (native-inputs
1089 `(("perl-test-most" ,perl-test-most)))
1090 (home-page "https://metacpan.org/release/BioPerl")
1091 (synopsis "Bioinformatics toolkit")
1092 (description
1093 "BioPerl is the product of a community effort to produce Perl code which
1094 is useful in biology. Examples include Sequence objects, Alignment objects
1095 and database searching objects. These objects not only do what they are
1096 advertised to do in the documentation, but they also interact - Alignment
1097 objects are made from the Sequence objects, Sequence objects have access to
1098 Annotation and SeqFeature objects and databases, Blast objects can be
1099 converted to Alignment objects, and so on. This means that the objects
1100 provide a coordinated and extensible framework to do computational biology.")
1101 (license license:perl-license))))
1102
1103 (define-public python-biopython
1104 (package
1105 (name "python-biopython")
1106 (version "1.70")
1107 (source (origin
1108 (method url-fetch)
1109 ;; use PyPi rather than biopython.org to ease updating
1110 (uri (pypi-uri "biopython" version))
1111 (sha256
1112 (base32
1113 "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
1114 (build-system python-build-system)
1115 (arguments
1116 `(#:phases
1117 (modify-phases %standard-phases
1118 (add-before 'check 'set-home
1119 ;; Some tests require a home directory to be set.
1120 (lambda _ (setenv "HOME" "/tmp") #t)))))
1121 (propagated-inputs
1122 `(("python-numpy" ,python-numpy)))
1123 (home-page "https://biopython.org/")
1124 (synopsis "Tools for biological computation in Python")
1125 (description
1126 "Biopython is a set of tools for biological computation including parsers
1127 for bioinformatics files into Python data structures; interfaces to common
1128 bioinformatics programs; a standard sequence class and tools for performing
1129 common operations on them; code to perform data classification; code for
1130 dealing with alignments; code making it easy to split up parallelizable tasks
1131 into separate processes; and more.")
1132 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
1133
1134 (define-public python2-biopython
1135 (package-with-python2 python-biopython))
1136
1137 (define-public python-fastalite
1138 (package
1139 (name "python-fastalite")
1140 (version "0.3")
1141 (source
1142 (origin
1143 (method url-fetch)
1144 (uri (pypi-uri "fastalite" version))
1145 (sha256
1146 (base32
1147 "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
1148 (build-system python-build-system)
1149 (arguments
1150 `(#:tests? #f)) ; Test data is not distributed.
1151 (home-page "https://github.com/nhoffman/fastalite")
1152 (synopsis "Simplest possible FASTA parser")
1153 (description "This library implements a FASTA and a FASTQ parser without
1154 relying on a complex dependency tree.")
1155 (license license:expat)))
1156
1157 (define-public python2-fastalite
1158 (package-with-python2 python-fastalite))
1159
1160 (define-public bpp-core
1161 ;; The last release was in 2014 and the recommended way to install from source
1162 ;; is to clone the git repository, so we do this.
1163 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1164 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
1165 (package
1166 (name "bpp-core")
1167 (version (string-append "2.2.0-1." (string-take commit 7)))
1168 (source (origin
1169 (method git-fetch)
1170 (uri (git-reference
1171 (url "http://biopp.univ-montp2.fr/git/bpp-core")
1172 (commit commit)))
1173 (file-name (string-append name "-" version "-checkout"))
1174 (sha256
1175 (base32
1176 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
1177 (build-system cmake-build-system)
1178 (arguments
1179 `(#:parallel-build? #f))
1180 (home-page "http://biopp.univ-montp2.fr")
1181 (synopsis "C++ libraries for Bioinformatics")
1182 (description
1183 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1184 analysis, phylogenetics, molecular evolution and population genetics. It is
1185 Object Oriented and is designed to be both easy to use and computer efficient.
1186 Bio++ intends to help programmers to write computer expensive programs, by
1187 providing them a set of re-usable tools.")
1188 (license license:cecill-c))))
1189
1190 (define-public bpp-phyl
1191 ;; The last release was in 2014 and the recommended way to install from source
1192 ;; is to clone the git repository, so we do this.
1193 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1194 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
1195 (package
1196 (name "bpp-phyl")
1197 (version (string-append "2.2.0-1." (string-take commit 7)))
1198 (source (origin
1199 (method git-fetch)
1200 (uri (git-reference
1201 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
1202 (commit commit)))
1203 (file-name (string-append name "-" version "-checkout"))
1204 (sha256
1205 (base32
1206 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
1207 (build-system cmake-build-system)
1208 (arguments
1209 `(#:parallel-build? #f
1210 ;; If out-of-source, test data is not copied into the build directory
1211 ;; so the tests fail.
1212 #:out-of-source? #f))
1213 (inputs
1214 `(("bpp-core" ,bpp-core)
1215 ("bpp-seq" ,bpp-seq)))
1216 (home-page "http://biopp.univ-montp2.fr")
1217 (synopsis "Bio++ phylogenetic Library")
1218 (description
1219 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1220 analysis, phylogenetics, molecular evolution and population genetics. This
1221 library provides phylogenetics-related modules.")
1222 (license license:cecill-c))))
1223
1224 (define-public bpp-popgen
1225 ;; The last release was in 2014 and the recommended way to install from source
1226 ;; is to clone the git repository, so we do this.
1227 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1228 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
1229 (package
1230 (name "bpp-popgen")
1231 (version (string-append "2.2.0-1." (string-take commit 7)))
1232 (source (origin
1233 (method git-fetch)
1234 (uri (git-reference
1235 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
1236 (commit commit)))
1237 (file-name (string-append name "-" version "-checkout"))
1238 (sha256
1239 (base32
1240 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
1241 (build-system cmake-build-system)
1242 (arguments
1243 `(#:parallel-build? #f
1244 #:tests? #f)) ; There are no tests.
1245 (inputs
1246 `(("bpp-core" ,bpp-core)
1247 ("bpp-seq" ,bpp-seq)))
1248 (home-page "http://biopp.univ-montp2.fr")
1249 (synopsis "Bio++ population genetics library")
1250 (description
1251 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1252 analysis, phylogenetics, molecular evolution and population genetics. This
1253 library provides population genetics-related modules.")
1254 (license license:cecill-c))))
1255
1256 (define-public bpp-seq
1257 ;; The last release was in 2014 and the recommended way to install from source
1258 ;; is to clone the git repository, so we do this.
1259 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1260 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
1261 (package
1262 (name "bpp-seq")
1263 (version (string-append "2.2.0-1." (string-take commit 7)))
1264 (source (origin
1265 (method git-fetch)
1266 (uri (git-reference
1267 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
1268 (commit commit)))
1269 (file-name (string-append name "-" version "-checkout"))
1270 (sha256
1271 (base32
1272 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
1273 (build-system cmake-build-system)
1274 (arguments
1275 `(#:parallel-build? #f
1276 ;; If out-of-source, test data is not copied into the build directory
1277 ;; so the tests fail.
1278 #:out-of-source? #f))
1279 (inputs
1280 `(("bpp-core" ,bpp-core)))
1281 (home-page "http://biopp.univ-montp2.fr")
1282 (synopsis "Bio++ sequence library")
1283 (description
1284 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1285 analysis, phylogenetics, molecular evolution and population genetics. This
1286 library provides sequence-related modules.")
1287 (license license:cecill-c))))
1288
1289 (define-public bppsuite
1290 ;; The last release was in 2014 and the recommended way to install from source
1291 ;; is to clone the git repository, so we do this.
1292 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1293 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
1294 (package
1295 (name "bppsuite")
1296 (version (string-append "2.2.0-1." (string-take commit 7)))
1297 (source (origin
1298 (method git-fetch)
1299 (uri (git-reference
1300 (url "http://biopp.univ-montp2.fr/git/bppsuite")
1301 (commit commit)))
1302 (file-name (string-append name "-" version "-checkout"))
1303 (sha256
1304 (base32
1305 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
1306 (build-system cmake-build-system)
1307 (arguments
1308 `(#:parallel-build? #f
1309 #:tests? #f)) ; There are no tests.
1310 (native-inputs
1311 `(("groff" ,groff)
1312 ("man-db" ,man-db)
1313 ("texinfo" ,texinfo)))
1314 (inputs
1315 `(("bpp-core" ,bpp-core)
1316 ("bpp-seq" ,bpp-seq)
1317 ("bpp-phyl" ,bpp-phyl)
1318 ("bpp-phyl" ,bpp-popgen)))
1319 (home-page "http://biopp.univ-montp2.fr")
1320 (synopsis "Bioinformatics tools written with the Bio++ libraries")
1321 (description
1322 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1323 analysis, phylogenetics, molecular evolution and population genetics. This
1324 package provides command line tools using the Bio++ library.")
1325 (license license:cecill-c))))
1326
1327 (define-public blast+
1328 (package
1329 (name "blast+")
1330 (version "2.10.1")
1331 (source (origin
1332 (method url-fetch)
1333 (uri (string-append
1334 "https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
1335 version "/ncbi-blast-" version "+-src.tar.gz"))
1336 (sha256
1337 (base32
1338 "11kvrrl0mcwww6530r55hccpg3x3msmhr3051fwnjbq8rzg2j1qi"))
1339 (modules '((guix build utils)))
1340 (snippet
1341 '(begin
1342 ;; Remove bundled bzip2, zlib and pcre.
1343 (delete-file-recursively "c++/src/util/compress/bzip2")
1344 (delete-file-recursively "c++/src/util/compress/zlib")
1345 (delete-file-recursively "c++/src/util/regexp")
1346 (substitute* "c++/src/util/compress/Makefile.in"
1347 (("bzip2 zlib api") "api"))
1348 ;; Remove useless msbuild directory
1349 (delete-file-recursively
1350 "c++/src/build-system/project_tree_builder/msbuild")
1351 #t))))
1352 (build-system gnu-build-system)
1353 (arguments
1354 `(;; There are two(!) tests for this massive library, and both fail with
1355 ;; "unparsable timing stats".
1356 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
1357 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
1358 #:tests? #f
1359 #:out-of-source? #t
1360 #:parallel-build? #f ; not supported
1361 #:phases
1362 (modify-phases %standard-phases
1363 (add-before 'configure 'set-HOME
1364 ;; $HOME needs to be set at some point during the configure phase
1365 (lambda _ (setenv "HOME" "/tmp") #t))
1366 (add-after 'unpack 'enter-dir
1367 (lambda _ (chdir "c++") #t))
1368 (add-after 'enter-dir 'fix-build-system
1369 (lambda _
1370 (define (which* cmd)
1371 (cond ((string=? cmd "date")
1372 ;; make call to "date" deterministic
1373 "date -d @0")
1374 ((which cmd)
1375 => identity)
1376 (else
1377 (format (current-error-port)
1378 "WARNING: Unable to find absolute path for ~s~%"
1379 cmd)
1380 #f)))
1381
1382 ;; Rewrite hardcoded paths to various tools
1383 (substitute* (append '("src/build-system/configure.ac"
1384 "src/build-system/configure"
1385 "src/build-system/helpers/run_with_lock.c"
1386 "scripts/common/impl/if_diff.sh"
1387 "scripts/common/impl/run_with_lock.sh"
1388 "src/build-system/Makefile.configurables.real"
1389 "src/build-system/Makefile.in.top"
1390 "src/build-system/Makefile.meta.gmake=no"
1391 "src/build-system/Makefile.meta.in"
1392 "src/build-system/Makefile.meta_l"
1393 "src/build-system/Makefile.meta_p"
1394 "src/build-system/Makefile.meta_r"
1395 "src/build-system/Makefile.mk.in"
1396 "src/build-system/Makefile.requirements"
1397 "src/build-system/Makefile.rules_with_autodep.in")
1398 (find-files "scripts/common/check" "\\.sh$"))
1399 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1400 (or (which* cmd) all)))
1401
1402 (substitute* (find-files "src/build-system" "^config.*")
1403 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1404 (("^PATH=.*") ""))
1405
1406 ;; rewrite "/var/tmp" in check script
1407 (substitute* "scripts/common/check/check_make_unix.sh"
1408 (("/var/tmp") "/tmp"))
1409
1410 ;; do not reset PATH
1411 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1412 (("^ *PATH=.*") "")
1413 (("action=/bin/") "action=")
1414 (("export PATH") ":"))
1415 #t))
1416 (replace 'configure
1417 (lambda* (#:key inputs outputs #:allow-other-keys)
1418 (let ((out (assoc-ref outputs "out"))
1419 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1420 (include (string-append (assoc-ref outputs "include")
1421 "/include/ncbi-tools++")))
1422 ;; The 'configure' script doesn't recognize things like
1423 ;; '--enable-fast-install'.
1424 (invoke "./configure.orig"
1425 (string-append "--with-build-root=" (getcwd) "/build")
1426 (string-append "--prefix=" out)
1427 (string-append "--libdir=" lib)
1428 (string-append "--includedir=" include)
1429 (string-append "--with-bz2="
1430 (assoc-ref inputs "bzip2"))
1431 (string-append "--with-z="
1432 (assoc-ref inputs "zlib"))
1433 (string-append "--with-pcre="
1434 (assoc-ref inputs "pcre"))
1435 ;; Each library is built twice by default, once
1436 ;; with "-static" in its name, and again
1437 ;; without.
1438 "--without-static"
1439 "--with-dll")
1440 #t))))))
1441 (outputs '("out" ; 21 MB
1442 "lib" ; 226 MB
1443 "include")) ; 33 MB
1444 (inputs
1445 `(("bzip2" ,bzip2)
1446 ("lmdb" ,lmdb)
1447 ("zlib" ,zlib)
1448 ("pcre" ,pcre)
1449 ("perl" ,perl)
1450 ("python" ,python-wrapper)))
1451 (native-inputs
1452 `(("cpio" ,cpio)))
1453 (home-page "https://blast.ncbi.nlm.nih.gov")
1454 (synopsis "Basic local alignment search tool")
1455 (description
1456 "BLAST is a popular method of performing a DNA or protein sequence
1457 similarity search, using heuristics to produce results quickly. It also
1458 calculates an “expect value” that estimates how many matches would have
1459 occurred at a given score by chance, which can aid a user in judging how much
1460 confidence to have in an alignment.")
1461 ;; Most of the sources are in the public domain, with the following
1462 ;; exceptions:
1463 ;; * Expat:
1464 ;; * ./c++/include/util/bitset/
1465 ;; * ./c++/src/html/ncbi_menu*.js
1466 ;; * Boost license:
1467 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1468 ;; * LGPL 2+:
1469 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1470 ;; * ASL 2.0:
1471 ;; * ./c++/src/corelib/teamcity_*
1472 (license (list license:public-domain
1473 license:expat
1474 license:boost1.0
1475 license:lgpl2.0+
1476 license:asl2.0))))
1477
1478 (define-public bless
1479 (package
1480 (name "bless")
1481 (version "1p02")
1482 (source (origin
1483 (method url-fetch)
1484 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1485 version ".tgz"))
1486 (sha256
1487 (base32
1488 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1489 (modules '((guix build utils)))
1490 (snippet
1491 `(begin
1492 ;; Remove bundled boost, pigz, zlib, and .git directory
1493 ;; FIXME: also remove bundled sources for murmurhash3 and
1494 ;; kmc once packaged.
1495 (delete-file-recursively "boost")
1496 (delete-file-recursively "pigz")
1497 (delete-file-recursively "google-sparsehash")
1498 (delete-file-recursively "zlib")
1499 (delete-file-recursively ".git")
1500 #t))))
1501 (build-system gnu-build-system)
1502 (arguments
1503 '(#:tests? #f ;no "check" target
1504 #:make-flags
1505 (list (string-append "ZLIB="
1506 (assoc-ref %build-inputs "zlib:static")
1507 "/lib/libz.a")
1508 (string-append "LDFLAGS="
1509 (string-join '("-lboost_filesystem"
1510 "-lboost_system"
1511 "-lboost_iostreams"
1512 "-lz"
1513 "-fopenmp"))))
1514 #:phases
1515 (modify-phases %standard-phases
1516 (add-after 'unpack 'do-not-build-bundled-pigz
1517 (lambda* (#:key inputs outputs #:allow-other-keys)
1518 (substitute* "Makefile"
1519 (("cd pigz/pigz-2.3.3; make") ""))
1520 #t))
1521 (add-after 'unpack 'patch-paths-to-executables
1522 (lambda* (#:key inputs outputs #:allow-other-keys)
1523 (substitute* "parse_args.cpp"
1524 (("kmc_binary = .*")
1525 (string-append "kmc_binary = \""
1526 (assoc-ref outputs "out")
1527 "/bin/kmc\";"))
1528 (("pigz_binary = .*")
1529 (string-append "pigz_binary = \""
1530 (assoc-ref inputs "pigz")
1531 "/bin/pigz\";")))
1532 #t))
1533 (replace 'install
1534 (lambda* (#:key outputs #:allow-other-keys)
1535 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1536 (for-each (lambda (file)
1537 (install-file file bin))
1538 '("bless" "kmc/bin/kmc"))
1539 #t)))
1540 (delete 'configure))))
1541 (native-inputs
1542 `(("perl" ,perl)))
1543 (inputs
1544 `(("openmpi" ,openmpi)
1545 ("boost" ,boost)
1546 ("sparsehash" ,sparsehash)
1547 ("pigz" ,pigz)
1548 ("zlib:static" ,zlib "static")
1549 ("zlib" ,zlib)))
1550 (supported-systems '("x86_64-linux"))
1551 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1552 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1553 (description
1554 "@dfn{Bloom-filter-based error correction solution for high-throughput
1555 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1556 correction tool for genomic reads produced by @dfn{Next-generation
1557 sequencing} (NGS). BLESS produces accurate correction results with much less
1558 memory compared with previous solutions and is also able to tolerate a higher
1559 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1560 errors at the end of reads.")
1561 (license license:gpl3+)))
1562
1563 (define-public bowtie
1564 (package
1565 (name "bowtie")
1566 (version "2.3.4.3")
1567 (source (origin
1568 (method git-fetch)
1569 (uri (git-reference
1570 (url "https://github.com/BenLangmead/bowtie2")
1571 (commit (string-append "v" version))))
1572 (file-name (git-file-name name version))
1573 (sha256
1574 (base32
1575 "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
1576 (modules '((guix build utils)))
1577 (snippet
1578 '(begin
1579 (substitute* "Makefile"
1580 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1581 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1582 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1583 #t))))
1584 (build-system gnu-build-system)
1585 (arguments
1586 '(#:make-flags
1587 (list "allall"
1588 "WITH_TBB=1"
1589 (string-append "prefix=" (assoc-ref %outputs "out")))
1590 #:phases
1591 (modify-phases %standard-phases
1592 (delete 'configure)
1593 (replace 'check
1594 (lambda _
1595 (invoke "perl"
1596 "scripts/test/simple_tests.pl"
1597 "--bowtie2=./bowtie2"
1598 "--bowtie2-build=./bowtie2-build")
1599 #t)))))
1600 (inputs
1601 `(("tbb" ,tbb)
1602 ("zlib" ,zlib)
1603 ("python" ,python-wrapper)))
1604 (native-inputs
1605 `(("perl" ,perl)
1606 ("perl-clone" ,perl-clone)
1607 ("perl-test-deep" ,perl-test-deep)
1608 ("perl-test-simple" ,perl-test-simple)))
1609 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1610 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1611 (description
1612 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1613 reads to long reference sequences. It is particularly good at aligning reads
1614 of about 50 up to 100s or 1,000s of characters, and particularly good at
1615 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1616 genome with an FM Index to keep its memory footprint small: for the human
1617 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1618 gapped, local, and paired-end alignment modes.")
1619 (supported-systems '("x86_64-linux"))
1620 (license license:gpl3+)))
1621
1622 (define-public bowtie1
1623 (package
1624 (name "bowtie1")
1625 (version "1.2.3")
1626 (source (origin
1627 (method url-fetch)
1628 (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
1629 version "/bowtie-src-x86_64.zip"))
1630 (sha256
1631 (base32
1632 "0vmiqdhc9dzyfy9sh6vgi7k9xy2hiw8g87vbamnc6cgpm179zsa4"))
1633 (modules '((guix build utils)))
1634 (snippet
1635 '(substitute* "Makefile"
1636 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1637 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1638 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
1639 (build-system gnu-build-system)
1640 (arguments
1641 '(#:tests? #f ; no "check" target
1642 #:make-flags
1643 (list "all"
1644 (string-append "prefix=" (assoc-ref %outputs "out")))
1645 #:phases
1646 (modify-phases %standard-phases
1647 (delete 'configure))))
1648 (inputs
1649 `(("tbb" ,tbb)
1650 ("zlib" ,zlib)))
1651 (supported-systems '("x86_64-linux"))
1652 (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
1653 (synopsis "Fast aligner for short nucleotide sequence reads")
1654 (description
1655 "Bowtie is a fast, memory-efficient short read aligner. It aligns short
1656 DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
1657 reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to
1658 keep its memory footprint small: typically about 2.2 GB for the human
1659 genome (2.9 GB for paired-end).")
1660 (license license:artistic2.0)))
1661
1662 (define-public tophat
1663 (package
1664 (name "tophat")
1665 (version "2.1.1")
1666 (source (origin
1667 (method url-fetch)
1668 (uri (string-append
1669 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1670 version ".tar.gz"))
1671 (sha256
1672 (base32
1673 "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
1674 (modules '((guix build utils)))
1675 (snippet
1676 '(begin
1677 ;; Remove bundled SeqAn and samtools
1678 (delete-file-recursively "src/SeqAn-1.4.2")
1679 (delete-file-recursively "src/samtools-0.1.18")
1680 #t))))
1681 (build-system gnu-build-system)
1682 (arguments
1683 '(#:parallel-build? #f ; not supported
1684 #:phases
1685 (modify-phases %standard-phases
1686 (add-after 'unpack 'use-system-samtools
1687 (lambda* (#:key inputs #:allow-other-keys)
1688 (substitute* "src/Makefile.in"
1689 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1690 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1691 (("SAMPROG = samtools_0\\.1\\.18") "")
1692 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1693 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1694 (substitute* '("src/common.cpp"
1695 "src/tophat.py")
1696 (("samtools_0.1.18") (which "samtools")))
1697 (substitute* '("src/common.h"
1698 "src/bam2fastx.cpp")
1699 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1700 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1701 (substitute* '("src/bwt_map.h"
1702 "src/map2gtf.h"
1703 "src/align_status.h")
1704 (("#include <bam.h>") "#include <samtools/bam.h>")
1705 (("#include <sam.h>") "#include <samtools/sam.h>"))
1706 #t)))))
1707 (native-inputs
1708 `(("gcc" ,gcc-5))) ;; doesn't build with later versions
1709 (inputs
1710 `(("boost" ,boost)
1711 ("bowtie" ,bowtie)
1712 ("ncurses" ,ncurses)
1713 ("perl" ,perl)
1714 ("python" ,python-2)
1715 ("samtools" ,samtools-0.1)
1716 ("seqan" ,seqan-1)
1717 ("zlib" ,zlib)))
1718 (home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
1719 (synopsis "Spliced read mapper for RNA-Seq data")
1720 (description
1721 "TopHat is a fast splice junction mapper for nucleotide sequence
1722 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1723 mammalian-sized genomes using the ultra high-throughput short read
1724 aligner Bowtie, and then analyzes the mapping results to identify
1725 splice junctions between exons.")
1726 ;; TopHat is released under the Boost Software License, Version 1.0
1727 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1728 (license license:boost1.0)))
1729
1730 (define-public bwa
1731 (package
1732 (name "bwa")
1733 (version "0.7.17")
1734 (source (origin
1735 (method url-fetch)
1736 (uri (string-append
1737 "https://github.com/lh3/bwa/releases/download/v"
1738 version "/bwa-" version ".tar.bz2"))
1739 (sha256
1740 (base32
1741 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
1742 (build-system gnu-build-system)
1743 (arguments
1744 '(#:tests? #f ;no "check" target
1745 #:phases
1746 (modify-phases %standard-phases
1747 (replace 'install
1748 (lambda* (#:key outputs #:allow-other-keys)
1749 (let* ((out (assoc-ref outputs "out"))
1750 (bin (string-append out "/bin"))
1751 (lib (string-append out "/lib"))
1752 (doc (string-append out "/share/doc/bwa"))
1753 (man (string-append out "/share/man/man1")))
1754 (install-file "bwa" bin)
1755 (install-file "libbwa.a" lib)
1756 (install-file "README.md" doc)
1757 (install-file "bwa.1" man))
1758 #t))
1759 ;; no "configure" script
1760 (delete 'configure))))
1761 (inputs `(("zlib" ,zlib)))
1762 ;; Non-portable SSE instructions are used so building fails on platforms
1763 ;; other than x86_64.
1764 (supported-systems '("x86_64-linux"))
1765 (home-page "http://bio-bwa.sourceforge.net/")
1766 (synopsis "Burrows-Wheeler sequence aligner")
1767 (description
1768 "BWA is a software package for mapping low-divergent sequences against a
1769 large reference genome, such as the human genome. It consists of three
1770 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1771 designed for Illumina sequence reads up to 100bp, while the rest two for
1772 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1773 features such as long-read support and split alignment, but BWA-MEM, which is
1774 the latest, is generally recommended for high-quality queries as it is faster
1775 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
1776 70-100bp Illumina reads.")
1777 (license license:gpl3+)))
1778
1779 (define-public bwa-pssm
1780 (package (inherit bwa)
1781 (name "bwa-pssm")
1782 (version "0.5.11")
1783 (source (origin
1784 (method git-fetch)
1785 (uri (git-reference
1786 (url "https://github.com/pkerpedjiev/bwa-pssm")
1787 (commit version)))
1788 (file-name (git-file-name name version))
1789 (sha256
1790 (base32
1791 "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
1792 (build-system gnu-build-system)
1793 (inputs
1794 `(("gdsl" ,gdsl)
1795 ("zlib" ,zlib)
1796 ("perl" ,perl)))
1797 (home-page "http://bwa-pssm.binf.ku.dk/")
1798 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1799 (description
1800 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1801 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1802 existing aligners it is fast and sensitive. Unlike most other aligners,
1803 however, it is also adaptible in the sense that one can direct the alignment
1804 based on known biases within the data set. It is coded as a modification of
1805 the original BWA alignment program and shares the genome index structure as
1806 well as many of the command line options.")
1807 (license license:gpl3+)))
1808
1809 (define-public bwa-meth
1810 (package
1811 (name "bwa-meth")
1812 (version "0.2.2")
1813 (source (origin
1814 (method git-fetch)
1815 (uri (git-reference
1816 (url "https://github.com/brentp/bwa-meth")
1817 (commit (string-append "v" version))))
1818 (file-name (git-file-name name version))
1819 (sha256
1820 (base32
1821 "17j31i7zws5j7mhsq9x3qgkxly6mlmrgwhfq0qbflgxrmx04yaiz"))))
1822 (build-system python-build-system)
1823 (arguments
1824 `(#:phases
1825 (modify-phases %standard-phases
1826 (add-after 'unpack 'keep-references-to-bwa
1827 (lambda* (#:key inputs #:allow-other-keys)
1828 (substitute* "bwameth.py"
1829 (("bwa (mem|index)" _ command)
1830 (string-append (which "bwa") " " command))
1831 ;; There's an ill-advised check for "samtools" on PATH.
1832 (("^checkX.*") ""))
1833 #t)))))
1834 (inputs
1835 `(("bwa" ,bwa)))
1836 (native-inputs
1837 `(("python-toolshed" ,python-toolshed)))
1838 (home-page "https://github.com/brentp/bwa-meth")
1839 (synopsis "Fast and accurante alignment of BS-Seq reads")
1840 (description
1841 "BWA-Meth works for single-end reads and for paired-end reads from the
1842 directional protocol (most common). It uses the method employed by
1843 methylcoder and Bismark of in silico conversion of all C's to T's in both
1844 reference and reads. It recovers the original read (needed to tabulate
1845 methylation) by attaching it as a comment which BWA appends as a tag to the
1846 read. It performs favorably to existing aligners gauged by number of on and
1847 off-target reads for a capture method that targets CpG-rich region.")
1848 (license license:expat)))
1849
1850 (define-public python-bx-python
1851 (package
1852 (name "python-bx-python")
1853 (version "0.8.2")
1854 (source (origin
1855 (method url-fetch)
1856 (uri (pypi-uri "bx-python" version))
1857 (sha256
1858 (base32
1859 "11kksg2rbzihpmcid823xvg42xi88m7sz58rzk29abybkxy0rszs"))))
1860 (build-system python-build-system)
1861 ;; Tests fail because test data are not included
1862 (arguments '(#:tests? #f))
1863 (propagated-inputs
1864 `(("python-numpy" ,python-numpy)
1865 ("python-six" ,python-six)))
1866 (inputs
1867 `(("zlib" ,zlib)))
1868 (native-inputs
1869 `(("python-lzo" ,python-lzo)
1870 ("python-nose" ,python-nose)
1871 ("python-cython" ,python-cython)))
1872 (home-page "https://github.com/bxlab/bx-python")
1873 (synopsis "Tools for manipulating biological data")
1874 (description
1875 "bx-python provides tools for manipulating biological data, particularly
1876 multiple sequence alignments.")
1877 (license license:expat)))
1878
1879 (define-public python2-bx-python
1880 (package-with-python2 python-bx-python))
1881
1882 (define-public python-pysam
1883 (package
1884 (name "python-pysam")
1885 (version "0.15.1")
1886 (source (origin
1887 (method git-fetch)
1888 ;; Test data is missing on PyPi.
1889 (uri (git-reference
1890 (url "https://github.com/pysam-developers/pysam")
1891 (commit (string-append "v" version))))
1892 (file-name (git-file-name name version))
1893 (sha256
1894 (base32
1895 "1vj367w6xbn9bpmksm162l1aipf7cj97h1q83y7jcpm33ihwpf7x"))
1896 (modules '((guix build utils)))
1897 (snippet '(begin
1898 ;; Drop bundled htslib. TODO: Also remove samtools
1899 ;; and bcftools.
1900 (delete-file-recursively "htslib")
1901 #t))))
1902 (build-system python-build-system)
1903 (arguments
1904 `(#:modules ((ice-9 ftw)
1905 (srfi srfi-26)
1906 (guix build python-build-system)
1907 (guix build utils))
1908 #:phases
1909 (modify-phases %standard-phases
1910 (add-before 'build 'set-flags
1911 (lambda* (#:key inputs #:allow-other-keys)
1912 (setenv "HTSLIB_MODE" "external")
1913 (setenv "HTSLIB_LIBRARY_DIR"
1914 (string-append (assoc-ref inputs "htslib") "/lib"))
1915 (setenv "HTSLIB_INCLUDE_DIR"
1916 (string-append (assoc-ref inputs "htslib") "/include"))
1917 (setenv "LDFLAGS" "-lncurses")
1918 (setenv "CFLAGS" "-D_CURSES_LIB=1")
1919 #t))
1920 (replace 'check
1921 (lambda* (#:key inputs outputs #:allow-other-keys)
1922 ;; This file contains tests that require a connection to the
1923 ;; internet.
1924 (delete-file "tests/tabix_test.py")
1925 ;; FIXME: This test fails
1926 (delete-file "tests/AlignmentFile_test.py")
1927 ;; Add first subdirectory of "build" directory to PYTHONPATH.
1928 (setenv "PYTHONPATH"
1929 (string-append
1930 (getenv "PYTHONPATH")
1931 ":" (getcwd) "/build/"
1932 (car (scandir "build"
1933 (negate (cut string-prefix? "." <>))))))
1934 ;; Step out of source dir so python does not import from CWD.
1935 (with-directory-excursion "tests"
1936 (setenv "HOME" "/tmp")
1937 (invoke "make" "-C" "pysam_data")
1938 (invoke "make" "-C" "cbcf_data")
1939 ;; Running nosetests without explicitly asking for a single
1940 ;; process leads to a crash. Running with multiple processes
1941 ;; fails because the tests are not designed to run in parallel.
1942
1943 ;; FIXME: tests keep timing out on some systems.
1944 (invoke "nosetests" "-v" "--processes" "1")))))))
1945 (propagated-inputs
1946 `(("htslib" ,htslib))) ; Included from installed header files.
1947 (inputs
1948 `(("ncurses" ,ncurses)
1949 ("curl" ,curl)
1950 ("zlib" ,zlib)))
1951 (native-inputs
1952 `(("python-cython" ,python-cython)
1953 ;; Dependencies below are are for tests only.
1954 ("samtools" ,samtools)
1955 ("bcftools" ,bcftools)
1956 ("python-nose" ,python-nose)))
1957 (home-page "https://github.com/pysam-developers/pysam")
1958 (synopsis "Python bindings to the SAMtools C API")
1959 (description
1960 "Pysam is a Python module for reading and manipulating files in the
1961 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1962 also includes an interface for tabix.")
1963 (license license:expat)))
1964
1965 (define-public python2-pysam
1966 (package-with-python2 python-pysam))
1967
1968 (define-public python-twobitreader
1969 (package
1970 (name "python-twobitreader")
1971 (version "3.1.6")
1972 (source (origin
1973 (method git-fetch)
1974 (uri (git-reference
1975 (url "https://github.com/benjschiller/twobitreader")
1976 (commit version)))
1977 (file-name (git-file-name name version))
1978 (sha256
1979 (base32
1980 "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
1981 (build-system python-build-system)
1982 ;; Tests are not included
1983 (arguments '(#:tests? #f))
1984 (native-inputs
1985 `(("python-sphinx" ,python-sphinx)))
1986 (home-page "https://github.com/benjschiller/twobitreader")
1987 (synopsis "Python library for reading .2bit files")
1988 (description
1989 "twobitreader is a Python library for reading .2bit files as used by the
1990 UCSC genome browser.")
1991 (license license:artistic2.0)))
1992
1993 (define-public python2-twobitreader
1994 (package-with-python2 python-twobitreader))
1995
1996 (define-public python-plastid
1997 (package
1998 (name "python-plastid")
1999 (version "0.4.8")
2000 (source (origin
2001 (method url-fetch)
2002 (uri (pypi-uri "plastid" version))
2003 (sha256
2004 (base32
2005 "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
2006 (build-system python-build-system)
2007 (arguments
2008 ;; Some test files are not included.
2009 `(#:tests? #f))
2010 (propagated-inputs
2011 `(("python-numpy" ,python-numpy)
2012 ("python-scipy" ,python-scipy)
2013 ("python-pandas" ,python-pandas)
2014 ("python-pysam" ,python-pysam)
2015 ("python-matplotlib" ,python-matplotlib)
2016 ("python-biopython" ,python-biopython)
2017 ("python-twobitreader" ,python-twobitreader)
2018 ("python-termcolor" ,python-termcolor)))
2019 (native-inputs
2020 `(("python-cython" ,python-cython)
2021 ("python-nose" ,python-nose)))
2022 (home-page "https://github.com/joshuagryphon/plastid")
2023 (synopsis "Python library for genomic analysis")
2024 (description
2025 "plastid is a Python library for genomic analysis – in particular,
2026 high-throughput sequencing data – with an emphasis on simplicity.")
2027 (license license:bsd-3)))
2028
2029 (define-public python2-plastid
2030 (package-with-python2 python-plastid))
2031
2032 (define-public tetoolkit
2033 (package
2034 (name "tetoolkit")
2035 (version "2.0.3")
2036 (source (origin
2037 (method git-fetch)
2038 (uri (git-reference
2039 (url "https://github.com/mhammell-laboratory/tetoolkit")
2040 (commit version)))
2041 (file-name (git-file-name name version))
2042 (sha256
2043 (base32
2044 "1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807"))))
2045 (build-system python-build-system)
2046 (arguments
2047 `(#:python ,python-2 ; not guaranteed to work with Python 3
2048 #:phases
2049 (modify-phases %standard-phases
2050 (add-after 'unpack 'make-writable
2051 (lambda _
2052 (for-each make-file-writable (find-files "."))
2053 #t))
2054 (add-after 'unpack 'patch-invocations
2055 (lambda* (#:key inputs #:allow-other-keys)
2056 (substitute* '("bin/TEtranscripts"
2057 "bin/TEcount")
2058 (("'sort ")
2059 (string-append "'" (which "sort") " "))
2060 (("'rm -f ")
2061 (string-append "'" (which "rm") " -f "))
2062 (("'Rscript'") (string-append "'" (which "Rscript") "'")))
2063 (substitute* "TEToolkit/IO/ReadInputs.py"
2064 (("BamToBED") (which "bamToBed")))
2065 (substitute* "TEToolkit/Normalization.py"
2066 (("\"Rscript\"")
2067 (string-append "\"" (which "Rscript") "\"")))
2068 #t))
2069 (add-after 'install 'wrap-program
2070 (lambda* (#:key outputs #:allow-other-keys)
2071 ;; Make sure the executables find R packages.
2072 (let ((out (assoc-ref outputs "out")))
2073 (for-each
2074 (lambda (script)
2075 (wrap-program (string-append out "/bin/" script)
2076 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
2077 '("TEtranscripts"
2078 "TEcount")))
2079 #t)))))
2080 (inputs
2081 `(("coreutils" ,coreutils)
2082 ("bedtools" ,bedtools)
2083 ("python-argparse" ,python2-argparse)
2084 ("python-pysam" ,python2-pysam)
2085 ("r-minimal" ,r-minimal)
2086 ("r-deseq2" ,r-deseq2)))
2087 (home-page "https://github.com/mhammell-laboratory/tetoolkit")
2088 (synopsis "Transposable elements in differential enrichment analysis")
2089 (description
2090 "This is package for including transposable elements in differential
2091 enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
2092 RNA-seq (and similar data) and annotates reads to both genes and transposable
2093 elements. TEtranscripts then performs differential analysis using DESeq2.
2094 Note that TEtranscripts and TEcount rely on specially curated GTF files, which
2095 are not included due to their size.")
2096 (license license:gpl3+)))
2097
2098 (define-public cd-hit
2099 (package
2100 (name "cd-hit")
2101 (version "4.6.8")
2102 (source (origin
2103 (method url-fetch)
2104 (uri (string-append "https://github.com/weizhongli/cdhit"
2105 "/releases/download/V" version
2106 "/cd-hit-v" version
2107 "-2017-0621-source.tar.gz"))
2108 (sha256
2109 (base32
2110 "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
2111 (build-system gnu-build-system)
2112 (arguments
2113 `(#:tests? #f ; there are no tests
2114 #:make-flags
2115 ;; Executables are copied directly to the PREFIX.
2116 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin")
2117 ;; Support longer sequences (e.g. Pacbio sequences)
2118 "MAX_SEQ=60000000")
2119 #:phases
2120 (modify-phases %standard-phases
2121 ;; No "configure" script
2122 (delete 'configure)
2123 ;; Remove sources of non-determinism
2124 (add-after 'unpack 'be-timeless
2125 (lambda _
2126 (substitute* "cdhit-utility.c++"
2127 ((" \\(built on \" __DATE__ \"\\)") ""))
2128 (substitute* "cdhit-common.c++"
2129 (("__DATE__") "\"0\"")
2130 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
2131 #t))
2132 ;; The "install" target does not create the target directory.
2133 (add-before 'install 'create-target-dir
2134 (lambda* (#:key outputs #:allow-other-keys)
2135 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
2136 #t)))))
2137 (inputs
2138 `(("perl" ,perl)))
2139 (home-page "http://weizhongli-lab.org/cd-hit/")
2140 (synopsis "Cluster and compare protein or nucleotide sequences")
2141 (description
2142 "CD-HIT is a program for clustering and comparing protein or nucleotide
2143 sequences. CD-HIT is designed to be fast and handle extremely large
2144 databases.")
2145 ;; The manual says: "It can be copied under the GNU General Public License
2146 ;; version 2 (GPLv2)."
2147 (license license:gpl2)))
2148
2149 (define-public clipper
2150 (package
2151 (name "clipper")
2152 (version "1.2.1")
2153 (source (origin
2154 (method git-fetch)
2155 (uri (git-reference
2156 (url "https://github.com/YeoLab/clipper")
2157 (commit version)))
2158 (file-name (git-file-name name version))
2159 (sha256
2160 (base32
2161 "0fja1rj84wp9vpj8rxpj3n8zqzcqq454m904yp9as1w4phccirjb"))
2162 (modules '((guix build utils)))
2163 (snippet
2164 '(begin
2165 ;; remove unnecessary setup dependency
2166 (substitute* "setup.py"
2167 (("setup_requires = .*") ""))
2168 #t))))
2169 (build-system python-build-system)
2170 (arguments
2171 `(#:python ,python-2 ; only Python 2 is supported
2172 #:phases
2173 (modify-phases %standard-phases
2174 ;; This is fixed in upstream commit
2175 ;; f6c2990198f906bf97730d95695b4bd5a6d01ddb.
2176 (add-after 'unpack 'fix-typo
2177 (lambda _
2178 (substitute* "clipper/src/readsToWiggle.pyx"
2179 (("^sc.*") ""))
2180 #t)))))
2181 (inputs
2182 `(("htseq" ,python2-htseq)
2183 ("python-pybedtools" ,python2-pybedtools)
2184 ("python-cython" ,python2-cython)
2185 ("python-scikit-learn" ,python2-scikit-learn)
2186 ("python-matplotlib" ,python2-matplotlib)
2187 ("python-pandas" ,python2-pandas)
2188 ("python-pysam" ,python2-pysam)
2189 ("python-numpy" ,python2-numpy)
2190 ("python-scipy" ,python2-scipy)))
2191 (native-inputs
2192 `(("python-mock" ,python2-mock) ; for tests
2193 ("python-nose" ,python2-nose) ; for tests
2194 ("python-pytz" ,python2-pytz))) ; for tests
2195 (home-page "https://github.com/YeoLab/clipper")
2196 (synopsis "CLIP peak enrichment recognition")
2197 (description
2198 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
2199 (license license:gpl2)))
2200
2201 (define-public codingquarry
2202 (package
2203 (name "codingquarry")
2204 (version "2.0")
2205 (source (origin
2206 (method url-fetch)
2207 (uri (string-append
2208 "mirror://sourceforge/codingquarry/CodingQuarry_v"
2209 version ".tar.gz"))
2210 (sha256
2211 (base32
2212 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
2213 (build-system gnu-build-system)
2214 (arguments
2215 '(#:tests? #f ; no "check" target
2216 #:phases
2217 (modify-phases %standard-phases
2218 (delete 'configure)
2219 (replace 'install
2220 (lambda* (#:key outputs #:allow-other-keys)
2221 (let* ((out (assoc-ref outputs "out"))
2222 (bin (string-append out "/bin"))
2223 (doc (string-append out "/share/doc/codingquarry")))
2224 (install-file "INSTRUCTIONS.pdf" doc)
2225 (copy-recursively "QuarryFiles"
2226 (string-append out "/QuarryFiles"))
2227 (install-file "CodingQuarry" bin)
2228 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
2229 #t)))))
2230 (inputs `(("openmpi" ,openmpi)))
2231 (native-search-paths
2232 (list (search-path-specification
2233 (variable "QUARRY_PATH")
2234 (files '("QuarryFiles")))))
2235 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
2236 (synopsis "Fungal gene predictor")
2237 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
2238 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
2239 (home-page "https://sourceforge.net/projects/codingquarry/")
2240 (license license:gpl3+)))
2241
2242 (define-public couger
2243 (package
2244 (name "couger")
2245 (version "1.8.2")
2246 (source (origin
2247 (method url-fetch)
2248 (uri (string-append
2249 "http://couger.oit.duke.edu/static/assets/COUGER"
2250 version ".zip"))
2251 (sha256
2252 (base32
2253 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
2254 (build-system gnu-build-system)
2255 (arguments
2256 `(#:tests? #f
2257 #:phases
2258 (modify-phases %standard-phases
2259 (delete 'configure)
2260 (delete 'build)
2261 (replace
2262 'install
2263 (lambda* (#:key outputs #:allow-other-keys)
2264 (let* ((out (assoc-ref outputs "out"))
2265 (bin (string-append out "/bin")))
2266 (copy-recursively "src" (string-append out "/src"))
2267 (mkdir bin)
2268 ;; Add "src" directory to module lookup path.
2269 (substitute* "couger"
2270 (("from argparse")
2271 (string-append "import sys\nsys.path.append(\""
2272 out "\")\nfrom argparse")))
2273 (install-file "couger" bin))
2274 #t))
2275 (add-after
2276 'install 'wrap-program
2277 (lambda* (#:key inputs outputs #:allow-other-keys)
2278 ;; Make sure 'couger' runs with the correct PYTHONPATH.
2279 (let* ((out (assoc-ref outputs "out"))
2280 (path (getenv "PYTHONPATH")))
2281 (wrap-program (string-append out "/bin/couger")
2282 `("PYTHONPATH" ":" prefix (,path))))
2283 #t)))))
2284 (inputs
2285 `(("python" ,python-2)
2286 ("python2-pillow" ,python2-pillow)
2287 ("python2-numpy" ,python2-numpy)
2288 ("python2-scipy" ,python2-scipy)
2289 ("python2-matplotlib" ,python2-matplotlib)))
2290 (propagated-inputs
2291 `(("r-minimal" ,r-minimal)
2292 ("libsvm" ,libsvm)
2293 ("randomjungle" ,randomjungle)))
2294 (native-inputs
2295 `(("unzip" ,unzip)))
2296 (home-page "http://couger.oit.duke.edu")
2297 (synopsis "Identify co-factors in sets of genomic regions")
2298 (description
2299 "COUGER can be applied to any two sets of genomic regions bound by
2300 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
2301 putative co-factors that provide specificity to each TF. The framework
2302 determines the genomic targets uniquely-bound by each TF, and identifies a
2303 small set of co-factors that best explain the in vivo binding differences
2304 between the two TFs.
2305
2306 COUGER uses classification algorithms (support vector machines and random
2307 forests) with features that reflect the DNA binding specificities of putative
2308 co-factors. The features are generated either from high-throughput TF-DNA
2309 binding data (from protein binding microarray experiments), or from large
2310 collections of DNA motifs.")
2311 (license license:gpl3+)))
2312
2313 (define-public clustal-omega
2314 (package
2315 (name "clustal-omega")
2316 (version "1.2.4")
2317 (source (origin
2318 (method url-fetch)
2319 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
2320 version ".tar.gz"))
2321 (sha256
2322 (base32
2323 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
2324 (build-system gnu-build-system)
2325 (inputs
2326 `(("argtable" ,argtable)))
2327 (home-page "http://www.clustal.org/omega/")
2328 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
2329 (description
2330 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
2331 program for protein and DNA/RNA. It produces high quality MSAs and is capable
2332 of handling data-sets of hundreds of thousands of sequences in reasonable
2333 time.")
2334 (license license:gpl2+)))
2335
2336 (define-public crossmap
2337 (package
2338 (name "crossmap")
2339 (version "0.3.8")
2340 (source (origin
2341 (method url-fetch)
2342 (uri (pypi-uri "CrossMap" version))
2343 (sha256
2344 (base32
2345 "1sb2f2qbxya4fzw3yjl09vbrs8vfmw22zrygrvz004sf9gb1vkan"))))
2346 (build-system python-build-system)
2347 (inputs
2348 `(("python-bx-python" ,python-bx-python)
2349 ("python-numpy" ,python-numpy)
2350 ("python-pybigwig" ,python-pybigwig)
2351 ("python-pysam" ,python-pysam)
2352 ("zlib" ,zlib)))
2353 (native-inputs
2354 `(("python-cython" ,python-cython)
2355 ("python-nose" ,python-nose)))
2356 (home-page "http://crossmap.sourceforge.net/")
2357 (synopsis "Convert genome coordinates between assemblies")
2358 (description
2359 "CrossMap is a program for conversion of genome coordinates or annotation
2360 files between different genome assemblies. It supports most commonly used
2361 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
2362 (license license:gpl2+)))
2363
2364 (define-public python-dnaio
2365 (package
2366 (name "python-dnaio")
2367 (version "0.3")
2368 (source
2369 (origin
2370 (method url-fetch)
2371 (uri (pypi-uri "dnaio" version))
2372 (sha256
2373 (base32
2374 "0f16m7hdlm0fz1n7y5asy0v9ghyrq17ni1p9iybq22ddzyd49r27"))))
2375 (build-system python-build-system)
2376 (native-inputs
2377 `(("python-cython" ,python-cython)
2378 ("python-pytest" ,python-pytest)
2379 ("python-xopen" ,python-xopen)))
2380 (home-page "https://github.com/marcelm/dnaio/")
2381 (synopsis "Read FASTA and FASTQ files efficiently")
2382 (description
2383 "dnaio is a Python library for fast parsing of FASTQ and also FASTA
2384 files. The code was previously part of the cutadapt tool.")
2385 (license license:expat)))
2386
2387 (define-public python-deeptoolsintervals
2388 (package
2389 (name "python-deeptoolsintervals")
2390 (version "0.1.9")
2391 (source (origin
2392 (method url-fetch)
2393 (uri (pypi-uri "deeptoolsintervals" version))
2394 (sha256
2395 (base32
2396 "1xnl80nblysj6dylj4683wgrfa425rkx4dp5k65hvwdns9pw753x"))))
2397 (build-system python-build-system)
2398 (inputs
2399 `(("zlib" ,zlib)))
2400 (home-page "https://github.com/deeptools/deeptools_intervals")
2401 (synopsis "Create GTF-based interval trees with associated meta-data")
2402 (description
2403 "This package provides a Python module creating/accessing GTF-based
2404 interval trees with associated meta-data. It is primarily used by the
2405 @code{deeptools} package.")
2406 (license license:expat)))
2407
2408 (define-public python-deeptools
2409 (package
2410 (name "python-deeptools")
2411 (version "3.4.3")
2412 (source (origin
2413 (method git-fetch)
2414 (uri (git-reference
2415 (url "https://github.com/deeptools/deepTools")
2416 (commit version)))
2417 (file-name (git-file-name name version))
2418 (sha256
2419 (base32
2420 "0l09vyynz6s6w7fnyd94rpys4a6aja6kp4gli64pngdxdz3md1nl"))))
2421 (build-system python-build-system)
2422 (native-inputs
2423 `(("python-mock" ,python-mock)
2424 ("python-nose" ,python-nose)))
2425 (propagated-inputs
2426 `(("python-matplotlib" ,python-matplotlib)
2427 ("python-numpy" ,python-numpy)
2428 ("python-numpydoc" ,python-numpydoc)
2429 ("python-py2bit" ,python-py2bit)
2430 ("python-pybigwig" ,python-pybigwig)
2431 ("python-pysam" ,python-pysam)
2432 ("python-scipy" ,python-scipy)
2433 ("python-deeptoolsintervals" ,python-deeptoolsintervals)
2434 ("python-plotly" ,python-plotly-2.4.1)))
2435 (home-page "https://pypi.org/project/deepTools/")
2436 (synopsis "Useful tools for exploring deep sequencing data")
2437 (description "This package addresses the challenge of handling large amounts
2438 of data that are now routinely generated from DNA sequencing centers.
2439 @code{deepTools} contains useful modules to process the mapped reads data for
2440 multiple quality checks, creating normalized coverage files in standard bedGraph
2441 and bigWig file formats, that allow comparison between different files. Finally,
2442 using such normalized and standardized files, deepTools can create many
2443 publication-ready visualizations to identify enrichments and for functional
2444 annotations of the genome.")
2445 ;; The file deeptools/cm.py is licensed under the BSD license. The
2446 ;; remainder of the code is licensed under the MIT license.
2447 (license (list license:bsd-3 license:expat))))
2448
2449 (define-deprecated deeptools python-deeptools)
2450
2451 (define-public cutadapt
2452 (package
2453 (name "cutadapt")
2454 (version "2.1")
2455 (source (origin
2456 (method url-fetch)
2457 (uri (pypi-uri "cutadapt" version))
2458 (sha256
2459 (base32
2460 "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
2461 (build-system python-build-system)
2462 (inputs
2463 `(("python-dnaio" ,python-dnaio)
2464 ("python-xopen" ,python-xopen)))
2465 (native-inputs
2466 `(("python-cython" ,python-cython)
2467 ("python-pytest" ,python-pytest)
2468 ("python-setuptools-scm" ,python-setuptools-scm)))
2469 (home-page "https://cutadapt.readthedocs.io/en/stable/")
2470 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
2471 (description
2472 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
2473 other types of unwanted sequence from high-throughput sequencing reads.")
2474 (license license:expat)))
2475
2476 (define-public libbigwig
2477 (package
2478 (name "libbigwig")
2479 (version "0.4.4")
2480 (source (origin
2481 (method git-fetch)
2482 (uri (git-reference
2483 (url "https://github.com/dpryan79/libBigWig")
2484 (commit version)))
2485 (file-name (git-file-name name version))
2486 (sha256
2487 (base32
2488 "09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
2489 (build-system gnu-build-system)
2490 (arguments
2491 `(#:test-target "test"
2492 #:tests? #f ; tests require access to the web
2493 #:make-flags
2494 (list "CC=gcc"
2495 (string-append "prefix=" (assoc-ref %outputs "out")))
2496 #:phases
2497 (modify-phases %standard-phases
2498 (delete 'configure))))
2499 (inputs
2500 `(("zlib" ,zlib)
2501 ("curl" ,curl)))
2502 (native-inputs
2503 `(("doxygen" ,doxygen)
2504 ;; Need for tests
2505 ("python" ,python-2)))
2506 (home-page "https://github.com/dpryan79/libBigWig")
2507 (synopsis "C library for handling bigWig files")
2508 (description
2509 "This package provides a C library for parsing local and remote BigWig
2510 files.")
2511 (license license:expat)))
2512
2513 (define-public python-pybigwig
2514 (package
2515 (name "python-pybigwig")
2516 (version "0.3.17")
2517 (source (origin
2518 (method url-fetch)
2519 (uri (pypi-uri "pyBigWig" version))
2520 (sha256
2521 (base32
2522 "157x6v48y299zm382krf1dw08fdxg95im8lnabhp5vc94s04zxj1"))
2523 (modules '((guix build utils)))
2524 (snippet
2525 '(begin
2526 ;; Delete bundled libBigWig sources
2527 (delete-file-recursively "libBigWig")
2528 #t))))
2529 (build-system python-build-system)
2530 (arguments
2531 `(#:phases
2532 (modify-phases %standard-phases
2533 (add-after 'unpack 'link-with-libBigWig
2534 (lambda* (#:key inputs #:allow-other-keys)
2535 (substitute* "setup.py"
2536 (("libs=\\[") "libs=[\"BigWig\", "))
2537 #t)))))
2538 (propagated-inputs
2539 `(("python-numpy" ,python-numpy)))
2540 (inputs
2541 `(("libbigwig" ,libbigwig)
2542 ("zlib" ,zlib)
2543 ("curl" ,curl)))
2544 (home-page "https://github.com/dpryan79/pyBigWig")
2545 (synopsis "Access bigWig files in Python using libBigWig")
2546 (description
2547 "This package provides Python bindings to the libBigWig library for
2548 accessing bigWig files.")
2549 (license license:expat)))
2550
2551 (define-public python2-pybigwig
2552 (package-with-python2 python-pybigwig))
2553
2554 (define-public python-dendropy
2555 (package
2556 (name "python-dendropy")
2557 (version "4.4.0")
2558 (source
2559 (origin
2560 (method git-fetch)
2561 ;; Source from GitHub so that tests are included.
2562 (uri (git-reference
2563 (url "https://github.com/jeetsukumaran/DendroPy")
2564 (commit (string-append "v" version))))
2565 (file-name (git-file-name name version))
2566 (sha256
2567 (base32
2568 "097hfyv2kaf4x92i4rjx0paw2cncxap48qivv8zxng4z7nhid0x9"))))
2569 (build-system python-build-system)
2570 (home-page "https://dendropy.org/")
2571 (synopsis "Library for phylogenetics and phylogenetic computing")
2572 (description
2573 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
2574 writing, simulation, processing and manipulation of phylogenetic
2575 trees (phylogenies) and characters.")
2576 (license license:bsd-3)))
2577
2578 (define-public python2-dendropy
2579 (let ((base (package-with-python2 python-dendropy)))
2580 (package
2581 (inherit base)
2582 (arguments
2583 `(#:phases
2584 (modify-phases %standard-phases
2585 (add-after 'unpack 'remove-failing-test
2586 (lambda _
2587 ;; This test fails when the full test suite is run, as documented
2588 ;; at https://github.com/jeetsukumaran/DendroPy/issues/74
2589 (substitute* "tests/test_dataio_nexml_reader_tree_list.py"
2590 (("test_collection_comments_and_annotations")
2591 "do_not_test_collection_comments_and_annotations"))
2592 #t)))
2593 ,@(package-arguments base))))))
2594
2595 (define-public python-py2bit
2596 (package
2597 (name "python-py2bit")
2598 (version "0.3.0")
2599 (source
2600 (origin
2601 (method url-fetch)
2602 (uri (pypi-uri "py2bit" version))
2603 (sha256
2604 (base32
2605 "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
2606 (build-system python-build-system)
2607 (home-page "https://github.com/dpryan79/py2bit")
2608 (synopsis "Access 2bit files using lib2bit")
2609 (description
2610 "This package provides Python bindings for lib2bit to access 2bit files
2611 with Python.")
2612 (license license:expat)))
2613
2614 (define-public delly
2615 (package
2616 (name "delly")
2617 (version "0.7.9")
2618 (source (origin
2619 (method git-fetch)
2620 (uri (git-reference
2621 (url "https://github.com/dellytools/delly")
2622 (commit (string-append "v" version))))
2623 (file-name (git-file-name name version))
2624 (sha256
2625 (base32 "034jqsxswy9gqdh2zkgc1js99qkv75ks4xvzgmh0284sraagv61z"))
2626 (modules '((guix build utils)))
2627 (snippet
2628 '(begin
2629 (delete-file-recursively "src/htslib")
2630 #t))))
2631 (build-system gnu-build-system)
2632 (arguments
2633 `(#:tests? #f ; There are no tests to run.
2634 #:make-flags
2635 (list "PARALLEL=1" ; Allow parallel execution at run-time.
2636 (string-append "prefix=" (assoc-ref %outputs "out")))
2637 #:phases
2638 (modify-phases %standard-phases
2639 (delete 'configure) ; There is no configure phase.
2640 (add-after 'install 'install-templates
2641 (lambda* (#:key outputs #:allow-other-keys)
2642 (let ((templates (string-append (assoc-ref outputs "out")
2643 "/share/delly/templates")))
2644 (mkdir-p templates)
2645 (copy-recursively "excludeTemplates" templates)
2646 #t))))))
2647 (inputs
2648 `(("boost" ,boost)
2649 ("htslib" ,htslib)
2650 ("zlib" ,zlib)
2651 ("bzip2" ,bzip2)))
2652 (home-page "https://github.com/dellytools/delly")
2653 (synopsis "Integrated structural variant prediction method")
2654 (description "Delly is an integrated structural variant prediction method
2655 that can discover and genotype deletions, tandem duplications, inversions and
2656 translocations at single-nucleotide resolution in short-read massively parallel
2657 sequencing data. It uses paired-ends and split-reads to sensitively and
2658 accurately delineate genomic rearrangements throughout the genome.")
2659 (license license:gpl3+)))
2660
2661 (define-public diamond
2662 (package
2663 (name "diamond")
2664 (version "0.9.30")
2665 (source (origin
2666 (method git-fetch)
2667 (uri (git-reference
2668 (url "https://github.com/bbuchfink/diamond")
2669 (commit (string-append "v" version))))
2670 (file-name (git-file-name name version))
2671 (sha256
2672 (base32
2673 "0k6f3kb6cniw11xw6763kkbs1sl0yack7xsy7q5fl5v170ssphq4"))))
2674 (build-system cmake-build-system)
2675 (arguments
2676 '(#:tests? #f ; no "check" target
2677 #:phases
2678 (modify-phases %standard-phases
2679 (add-after 'unpack 'remove-native-compilation
2680 (lambda _
2681 (substitute* "CMakeLists.txt" (("-march=native") ""))
2682 #t)))))
2683 (inputs
2684 `(("zlib" ,zlib)))
2685 (home-page "https://github.com/bbuchfink/diamond")
2686 (synopsis "Accelerated BLAST compatible local sequence aligner")
2687 (description
2688 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2689 translated DNA query sequences against a protein reference database (BLASTP
2690 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2691 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2692 data and settings.")
2693 (license license:agpl3+)))
2694
2695 (define-public discrover
2696 (package
2697 (name "discrover")
2698 (version "1.6.0")
2699 (source
2700 (origin
2701 (method git-fetch)
2702 (uri (git-reference
2703 (url "https://github.com/maaskola/discrover")
2704 (commit version)))
2705 (file-name (git-file-name name version))
2706 (sha256
2707 (base32
2708 "173fwi2vb6a5kp406hm3jj6j7v4whww796f2qcygp4rpvamh307y"))))
2709 (build-system cmake-build-system)
2710 (arguments
2711 `(#:tests? #f ; there are no tests
2712 #:phases
2713 (modify-phases %standard-phases
2714 (add-after 'unpack 'fix-latex-errors
2715 (lambda _
2716 (with-fluids ((%default-port-encoding #f))
2717 (substitute* "doc/references.bib"
2718 (("\\{S\\}illanp[^,]+,")
2719 "{S}illanp{\\\"a}{\\\"a},")))
2720 ;; XXX: I just can't get pdflatex to not complain about these
2721 ;; characters. They end up in the manual via the generated
2722 ;; discrover-cli-help.txt.
2723 (substitute* "src/hmm/cli.cpp"
2724 (("µ") "mu")
2725 (("η") "eta")
2726 (("≤") "<="))
2727 ;; This seems to be a syntax error.
2728 (substitute* "doc/discrover-manual.tex"
2729 (("theverbbox\\[t\\]") "theverbbox"))
2730 #t))
2731 (add-after 'unpack 'add-missing-includes
2732 (lambda _
2733 (substitute* "src/executioninformation.hpp"
2734 (("#define EXECUTIONINFORMATION_HPP" line)
2735 (string-append line "\n#include <random>")))
2736 (substitute* "src/plasma/fasta.hpp"
2737 (("#define FASTA_HPP" line)
2738 (string-append line "\n#include <random>")))
2739 #t))
2740 ;; FIXME: this is needed because we're using texlive-union, which
2741 ;; doesn't handle fonts correctly. It expects to be able to generate
2742 ;; fonts in the home directory.
2743 (add-before 'build 'setenv-HOME
2744 (lambda _ (setenv "HOME" "/tmp") #t)))))
2745 (inputs
2746 `(("boost" ,boost)
2747 ("cairo" ,cairo)
2748 ("rmath-standalone" ,rmath-standalone)))
2749 (native-inputs
2750 `(("texlive" ,(texlive-union (list texlive-fonts-cm
2751 texlive-fonts-amsfonts
2752
2753 texlive-latex-doi
2754 texlive-latex-examplep
2755 texlive-latex-hyperref
2756 texlive-latex-ms
2757 texlive-latex-natbib
2758 texlive-bibtex ; style files used by natbib
2759 texlive-latex-pgf ; tikz
2760 texlive-latex-verbatimbox)))
2761 ("imagemagick" ,imagemagick)))
2762 (home-page "https://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2763 (synopsis "Discover discriminative nucleotide sequence motifs")
2764 (description "Discrover is a motif discovery method to find binding sites
2765 of nucleic acid binding proteins.")
2766 (license license:gpl3+)))
2767
2768 (define-public eigensoft
2769 (package
2770 (name "eigensoft")
2771 (version "7.2.1")
2772 (source
2773 (origin
2774 (method git-fetch)
2775 (uri (git-reference
2776 (url "https://github.com/DReichLab/EIG")
2777 (commit (string-append "v" version))))
2778 (file-name (git-file-name name version))
2779 (sha256
2780 (base32
2781 "1c141fqvhnzibmnf22sv23vbmzm20kjjyrib44cfh75wyndp2d9k"))
2782 (modules '((guix build utils)))
2783 ;; Remove pre-built binaries.
2784 (snippet '(begin
2785 (delete-file-recursively "bin")
2786 (mkdir "bin")
2787 #t))))
2788 (build-system gnu-build-system)
2789 (arguments
2790 `(#:tests? #f ; There are no tests.
2791 #:make-flags '("CC=gcc")
2792 #:phases
2793 (modify-phases %standard-phases
2794 ;; There is no configure phase, but the Makefile is in a
2795 ;; sub-directory.
2796 (replace 'configure
2797 (lambda _ (chdir "src") #t))
2798 ;; The provided install target only copies executables to
2799 ;; the "bin" directory in the build root.
2800 (add-after 'install 'actually-install
2801 (lambda* (#:key outputs #:allow-other-keys)
2802 (let* ((out (assoc-ref outputs "out"))
2803 (bin (string-append out "/bin")))
2804 (for-each (lambda (file)
2805 (install-file file bin))
2806 (find-files "../bin" ".*"))
2807 #t))))))
2808 (inputs
2809 `(("gsl" ,gsl)
2810 ("lapack" ,lapack)
2811 ("openblas" ,openblas)
2812 ("perl" ,perl)
2813 ("gfortran" ,gfortran "lib")))
2814 (home-page "https://github.com/DReichLab/EIG")
2815 (synopsis "Tools for population genetics")
2816 (description "The EIGENSOFT package provides tools for population
2817 genetics and stratification correction. EIGENSOFT implements methods commonly
2818 used in population genetics analyses such as PCA, computation of Tracy-Widom
2819 statistics, and finding related individuals in structured populations. It
2820 comes with a built-in plotting script and supports multiple file formats and
2821 quantitative phenotypes.")
2822 ;; The license of the eigensoft tools is Expat, but since it's
2823 ;; linking with the GNU Scientific Library (GSL) the effective
2824 ;; license is the GPL.
2825 (license license:gpl3+)))
2826
2827 (define-public edirect
2828 (package
2829 (name "edirect")
2830 (version "13.3.20200128")
2831 (source (origin
2832 (method url-fetch)
2833 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
2834 "/versions/" version
2835 "/edirect-" version ".tar.gz"))
2836 (sha256
2837 (base32
2838 "093zp7klv81ph0y8mm8d78a9hnpfxbv2kdym70gzdf3vz176rw33"))
2839 (modules '((guix build utils)))
2840 (snippet
2841 '(begin (delete-file "Mozilla-CA.tar.gz")
2842 (substitute* "rchive.go"
2843 ;; This go library does not have any license.
2844 (("github.com/fiam/gounidecode/unidecode")
2845 "golang.org/rainycape/unidecode"))
2846 #t))))
2847 (build-system perl-build-system)
2848 (arguments
2849 `(#:phases
2850 (modify-phases %standard-phases
2851 (delete 'configure)
2852 (delete 'build)
2853 (delete 'check) ; simple check after install
2854 (add-after 'unpack 'patch-programs
2855 (lambda* (#:key inputs #:allow-other-keys)
2856 ;; Ignore errors about missing xtract.Linux and rchive.Linux.
2857 (substitute* "pm-refresh"
2858 (("cat \\\"\\$target")
2859 "grep ^[[:digit:]] \"$target"))
2860 #t))
2861 (replace 'install
2862 (lambda* (#:key inputs outputs #:allow-other-keys)
2863 (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
2864 (edirect-go (assoc-ref inputs "edirect-go-programs")))
2865 (for-each
2866 (lambda (file)
2867 (install-file file bin))
2868 '("archive-pubmed" "asp-cp" "asp-ls" "download-ncbi-data"
2869 "download-pubmed" "edirect.pl" "efetch" "epost" "esearch"
2870 "fetch-pubmed" "ftp-cp" "ftp-ls" "has-asp" "index-pubmed"
2871 "pm-prepare" "pm-refresh" "pm-stash" "pm-collect"
2872 "pm-index" "pm-invert" "pm-merge" "pm-promote"))
2873 (symlink (string-append edirect-go "/bin/xtract.Linux")
2874 (string-append bin "/xtract"))
2875 (symlink (string-append edirect-go "/bin/rchive.Linux")
2876 (string-append bin "/rchive")))
2877 #t))
2878 (add-after 'install 'wrap-program
2879 (lambda* (#:key outputs #:allow-other-keys)
2880 ;; Make sure everything can run in a pure environment.
2881 (let ((out (assoc-ref outputs "out"))
2882 (path (getenv "PERL5LIB")))
2883 (for-each
2884 (lambda (file)
2885 (wrap-program file
2886 `("PERL5LIB" ":" prefix (,path)))
2887 (wrap-program file
2888 `("PATH" ":" prefix (,(string-append out "/bin")
2889 ,(dirname (which "sed"))
2890 ,(dirname (which "gzip"))
2891 ,(dirname (which "grep"))
2892 ,(dirname (which "perl"))
2893 ,(dirname (which "uname"))))))
2894 (find-files out ".")))
2895 #t))
2896 (add-after 'wrap-program 'check
2897 (lambda* (#:key outputs #:allow-other-keys)
2898 (invoke (string-append (assoc-ref outputs "out")
2899 "/bin/edirect.pl")
2900 "-filter" "-help")
2901 #t)))))
2902 (inputs
2903 `(("edirect-go-programs" ,edirect-go-programs)
2904 ("perl-html-parser" ,perl-html-parser)
2905 ("perl-encode-locale" ,perl-encode-locale)
2906 ("perl-file-listing" ,perl-file-listing)
2907 ("perl-html-tagset" ,perl-html-tagset)
2908 ("perl-html-tree" ,perl-html-tree)
2909 ("perl-http-cookies" ,perl-http-cookies)
2910 ("perl-http-date" ,perl-http-date)
2911 ("perl-http-message" ,perl-http-message)
2912 ("perl-http-negotiate" ,perl-http-negotiate)
2913 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2914 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2915 ("perl-net-http" ,perl-net-http)
2916 ("perl-uri" ,perl-uri)
2917 ("perl-www-robotrules" ,perl-www-robotrules)
2918 ("perl-xml-simple" ,perl-xml-simple)
2919 ("perl" ,perl)))
2920 (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
2921 (synopsis "Tools for accessing the NCBI's set of databases")
2922 (description
2923 "Entrez Direct (EDirect) is a method for accessing the National Center
2924 for Biotechnology Information's (NCBI) set of interconnected
2925 databases (publication, sequence, structure, gene, variation, expression,
2926 etc.) from a terminal. Functions take search terms from command-line
2927 arguments. Individual operations are combined to build multi-step queries.
2928 Record retrieval and formatting normally complete the process.
2929
2930 EDirect also provides an argument-driven function that simplifies the
2931 extraction of data from document summaries or other results that are returned
2932 in structured XML format. This can eliminate the need for writing custom
2933 software to answer ad hoc questions.")
2934 (native-search-paths
2935 ;; Ideally this should be set for LWP somewhere.
2936 (list (search-path-specification
2937 (variable "PERL_LWP_SSL_CA_FILE")
2938 (file-type 'regular)
2939 (separator #f)
2940 (files '("/etc/ssl/certs/ca-certificates.crt")))))
2941 (license license:public-domain)))
2942
2943 (define-public edirect-go-programs
2944 (package
2945 (inherit edirect)
2946 (name "edirect-go-programs")
2947 (build-system go-build-system)
2948 (arguments
2949 `(#:install-source? #f
2950 #:tests? #f ; No tests.
2951 #:import-path "ncbi.nlm.nih.gov/entrez/edirect"
2952 #:phases
2953 (modify-phases %standard-phases
2954 (replace 'build
2955 (lambda* (#:key import-path #:allow-other-keys)
2956 (with-directory-excursion (string-append "src/" import-path)
2957 (invoke "go" "build" "-v" "-x" "j2x.go")
2958 (invoke "go" "build" "-v" "-x" "t2x.go")
2959 (invoke "go" "build" "-v" "-x" "-o"
2960 "xtract.Linux" "xtract.go" "common.go")
2961 (invoke "go" "build" "-v" "-x" "-o"
2962 "rchive.Linux" "rchive.go" "common.go")
2963 (invoke "go" "build" "-v" "-x" "-o" "symbols.Linux" "s2p.go"))))
2964 (replace 'install
2965 (lambda* (#:key outputs import-path #:allow-other-keys)
2966 (let ((dest (string-append (assoc-ref outputs "out") "/bin"))
2967 (source (string-append "src/" import-path "/")))
2968 (for-each (lambda (file)
2969 (format #t "installing ~a~%" file)
2970 (install-file (string-append source file) dest))
2971 '("j2x" "t2x" "symbols.Linux" "xtract.Linux" "rchive.Linux"))
2972 #t))))))
2973 (native-inputs '())
2974 (propagated-inputs '())
2975 (inputs
2976 `(("go-github-com-fatih-color" ,go-github-com-fatih-color)
2977 ("go-github-com-fogleman-gg" ,go-github-com-fogleman-gg)
2978 ("go-github-com-gedex-inflector" ,go-github-com-gedex-inflector)
2979 ("go-github-com-golang-freetype" ,go-github-com-golang-freetype)
2980 ("go-github-com-klauspost-cpuid" ,go-github-com-klauspost-cpuid)
2981 ("go-github-com-pbnjay-memory" ,go-github-com-pbnjay-memory)
2982 ("go-github-com-surgebase-porter2" ,go-github-com-surgebase-porter2)
2983 ("go-golang-org-rainycape-unidecode" ,go-golang-org-rainycape-unidecode)
2984 ("go-golang-org-x-image" ,go-golang-org-x-image)
2985 ("go-golang-org-x-text" ,go-golang-org-x-text)))))
2986
2987 (define-public exonerate
2988 (package
2989 (name "exonerate")
2990 (version "2.4.0")
2991 (source
2992 (origin
2993 (method url-fetch)
2994 (uri
2995 (string-append
2996 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2997 "exonerate-" version ".tar.gz"))
2998 (sha256
2999 (base32
3000 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
3001 (build-system gnu-build-system)
3002 (arguments
3003 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
3004 (native-inputs
3005 `(("pkg-config" ,pkg-config)))
3006 (inputs
3007 `(("glib" ,glib)))
3008 (home-page
3009 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
3010 (synopsis "Generic tool for biological sequence alignment")
3011 (description
3012 "Exonerate is a generic tool for pairwise sequence comparison. It allows
3013 the alignment of sequences using a many alignment models, either exhaustive
3014 dynamic programming or a variety of heuristics.")
3015 (license license:gpl3)))
3016
3017 (define-public express
3018 (package
3019 (name "express")
3020 (version "1.5.3")
3021 (source (origin
3022 (method git-fetch)
3023 (uri (git-reference
3024 (url "https://github.com/adarob/eXpress")
3025 (commit version)))
3026 (file-name (git-file-name name version))
3027 (sha256
3028 (base32
3029 "18nb22n7x820fzjngf4qgyb3mspqkw7xyk7v7s5ps6wfrd8qwscb"))))
3030 (build-system cmake-build-system)
3031 (arguments
3032 `(#:tests? #f ;no "check" target
3033 #:phases
3034 (modify-phases %standard-phases
3035 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
3036 (lambda* (#:key inputs #:allow-other-keys)
3037 (substitute* "CMakeLists.txt"
3038 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
3039 "set(Boost_USE_STATIC_LIBS OFF)")
3040 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
3041 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
3042 (substitute* "src/CMakeLists.txt"
3043 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
3044 (string-append (assoc-ref inputs "bamtools") "/lib"))
3045 (("libprotobuf.a") "libprotobuf.so"))
3046 #t))
3047 (add-after 'unpack 'remove-update-check
3048 (lambda _
3049 (substitute* "src/main.cpp"
3050 (("#include \"update_check.h\"") "")
3051 (("check_version\\(PACKAGE_VERSION\\);") ""))
3052 #t)))))
3053 (inputs
3054 `(("boost" ,boost)
3055 ("bamtools" ,bamtools)
3056 ("protobuf" ,protobuf)
3057 ("zlib" ,zlib)))
3058 (home-page "http://bio.math.berkeley.edu/eXpress")
3059 (synopsis "Streaming quantification for high-throughput genomic sequencing")
3060 (description
3061 "eXpress is a streaming tool for quantifying the abundances of a set of
3062 target sequences from sampled subsequences. Example applications include
3063 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
3064 analysis (from RNA-Seq), transcription factor binding quantification in
3065 ChIP-Seq, and analysis of metagenomic data.")
3066 (license license:artistic2.0)))
3067
3068 (define-public express-beta-diversity
3069 (package
3070 (name "express-beta-diversity")
3071 (version "1.0.8")
3072 (source (origin
3073 (method git-fetch)
3074 (uri (git-reference
3075 (url "https://github.com/dparks1134/ExpressBetaDiversity")
3076 (commit (string-append "v" version))))
3077 (file-name (git-file-name name version))
3078 (sha256
3079 (base32
3080 "0s0yzg5c21349rh7x4w9266jsvnp7j1hp9cf8sk32hz8nvrj745x"))))
3081 (build-system gnu-build-system)
3082 (arguments
3083 `(#:phases
3084 (modify-phases %standard-phases
3085 (delete 'configure)
3086 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
3087 (replace 'check
3088 (lambda _ (invoke "../bin/ExpressBetaDiversity" "-u") #t))
3089 (replace 'install
3090 (lambda* (#:key outputs #:allow-other-keys)
3091 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3092 (install-file "../scripts/convertToEBD.py" bin)
3093 (install-file "../bin/ExpressBetaDiversity" bin)
3094 #t))))))
3095 (inputs
3096 `(("python" ,python-2)))
3097 (home-page "https://github.com/dparks1134/ExpressBetaDiversity")
3098 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
3099 (description
3100 "Express Beta Diversity (EBD) calculates ecological beta diversity
3101 (dissimilarity) measures between biological communities. EBD implements a
3102 variety of diversity measures including those that make use of phylogenetic
3103 similarity of community members.")
3104 (license license:gpl3+)))
3105
3106 (define-public fasttree
3107 (package
3108 (name "fasttree")
3109 (version "2.1.10")
3110 (source (origin
3111 (method url-fetch)
3112 (uri (string-append
3113 "http://www.microbesonline.org/fasttree/FastTree-"
3114 version ".c"))
3115 (sha256
3116 (base32
3117 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
3118 (build-system gnu-build-system)
3119 (arguments
3120 `(#:tests? #f ; no "check" target
3121 #:phases
3122 (modify-phases %standard-phases
3123 (delete 'unpack)
3124 (delete 'configure)
3125 (replace 'build
3126 (lambda* (#:key source #:allow-other-keys)
3127 (invoke "gcc"
3128 "-O3"
3129 "-finline-functions"
3130 "-funroll-loops"
3131 "-Wall"
3132 "-o"
3133 "FastTree"
3134 source
3135 "-lm")
3136 (invoke "gcc"
3137 "-DOPENMP"
3138 "-fopenmp"
3139 "-O3"
3140 "-finline-functions"
3141 "-funroll-loops"
3142 "-Wall"
3143 "-o"
3144 "FastTreeMP"
3145 source
3146 "-lm")
3147 #t))
3148 (replace 'install
3149 (lambda* (#:key outputs #:allow-other-keys)
3150 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3151 (install-file "FastTree" bin)
3152 (install-file "FastTreeMP" bin)
3153 #t))))))
3154 (home-page "http://www.microbesonline.org/fasttree")
3155 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
3156 (description
3157 "FastTree can handle alignments with up to a million of sequences in a
3158 reasonable amount of time and memory. For large alignments, FastTree is
3159 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
3160 (license license:gpl2+)))
3161
3162 (define-public fastx-toolkit
3163 (package
3164 (name "fastx-toolkit")
3165 (version "0.0.14")
3166 (source (origin
3167 (method url-fetch)
3168 (uri
3169 (string-append
3170 "https://github.com/agordon/fastx_toolkit/releases/download/"
3171 version "/fastx_toolkit-" version ".tar.bz2"))
3172 (sha256
3173 (base32
3174 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
3175 (build-system gnu-build-system)
3176 (inputs
3177 `(("libgtextutils" ,libgtextutils)))
3178 (native-inputs
3179 `(("gcc" ,gcc-6) ;; doesn't build with later versions
3180 ("pkg-config" ,pkg-config)))
3181 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
3182 (synopsis "Tools for FASTA/FASTQ file preprocessing")
3183 (description
3184 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
3185 FASTA/FASTQ files preprocessing.
3186
3187 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
3188 containing multiple short-reads sequences. The main processing of such
3189 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
3190 is sometimes more productive to preprocess the files before mapping the
3191 sequences to the genome---manipulating the sequences to produce better mapping
3192 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
3193 (license license:agpl3+)))
3194
3195 (define-public flexbar
3196 (package
3197 (name "flexbar")
3198 (version "3.4.0")
3199 (source (origin
3200 (method git-fetch)
3201 (uri (git-reference
3202 (url "https://github.com/seqan/flexbar")
3203 (commit (string-append "v" version))))
3204 (file-name (git-file-name name version))
3205 (sha256
3206 (base32
3207 "1pq9sxvdnldl14libk234m72dqhwgzs3acgl943wchwdqlcsi5r2"))))
3208 (build-system cmake-build-system)
3209 (arguments
3210 `(#:phases
3211 (modify-phases %standard-phases
3212 (add-after 'unpack 'do-not-tune-to-CPU
3213 (lambda _
3214 (substitute* "src/CMakeLists.txt"
3215 ((" -march=native") ""))
3216 #t))
3217 (replace 'check
3218 (lambda* (#:key outputs #:allow-other-keys)
3219 (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
3220 (with-directory-excursion "../source/test"
3221 (invoke "bash" "flexbar_test.sh"))
3222 #t))
3223 (replace 'install
3224 (lambda* (#:key outputs #:allow-other-keys)
3225 (let* ((out (string-append (assoc-ref outputs "out")))
3226 (bin (string-append out "/bin/")))
3227 (install-file "flexbar" bin))
3228 #t)))))
3229 (inputs
3230 `(("tbb" ,tbb)
3231 ("zlib" ,zlib)))
3232 (native-inputs
3233 `(("pkg-config" ,pkg-config)
3234 ("seqan" ,seqan)))
3235 (home-page "https://github.com/seqan/flexbar")
3236 (synopsis "Barcode and adapter removal tool for sequencing platforms")
3237 (description
3238 "Flexbar preprocesses high-throughput nucleotide sequencing data
3239 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
3240 Moreover, trimming and filtering features are provided. Flexbar increases
3241 read mapping rates and improves genome and transcriptome assemblies. It
3242 supports next-generation sequencing data in fasta/q and csfasta/q format from
3243 Illumina, Roche 454, and the SOLiD platform.")
3244 (license license:bsd-3)))
3245
3246 (define-public fraggenescan
3247 (package
3248 (name "fraggenescan")
3249 (version "1.30")
3250 (source
3251 (origin
3252 (method url-fetch)
3253 (uri
3254 (string-append "mirror://sourceforge/fraggenescan/"
3255 "FragGeneScan" version ".tar.gz"))
3256 (sha256
3257 (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
3258 (build-system gnu-build-system)
3259 (arguments
3260 `(#:phases
3261 (modify-phases %standard-phases
3262 (delete 'configure)
3263 (add-before 'build 'patch-paths
3264 (lambda* (#:key outputs #:allow-other-keys)
3265 (let* ((out (string-append (assoc-ref outputs "out")))
3266 (share (string-append out "/share/fraggenescan/")))
3267 (substitute* "run_FragGeneScan.pl"
3268 (("system\\(\"rm")
3269 (string-append "system(\"" (which "rm")))
3270 (("system\\(\"mv")
3271 (string-append "system(\"" (which "mv")))
3272 (("\\\"awk") (string-append "\"" (which "awk")))
3273 ;; This script and other programs expect the training files
3274 ;; to be in the non-standard location bin/train/XXX. Change
3275 ;; this to be share/fraggenescan/train/XXX instead.
3276 (("^\\$train.file = \\$dir.*")
3277 (string-append "$train_file = \""
3278 share
3279 "train/\".$FGS_train_file;")))
3280 (substitute* "run_hmm.c"
3281 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
3282 (string-append " strcpy(train_dir, \"" share "/train/\");"))))
3283 #t))
3284 (replace 'build
3285 (lambda _
3286 (invoke "make" "clean")
3287 (invoke "make" "fgs")
3288 #t))
3289 (replace 'install
3290 (lambda* (#:key outputs #:allow-other-keys)
3291 (let* ((out (string-append (assoc-ref outputs "out")))
3292 (bin (string-append out "/bin/"))
3293 (share (string-append out "/share/fraggenescan/train")))
3294 (install-file "run_FragGeneScan.pl" bin)
3295 (install-file "FragGeneScan" bin)
3296 (copy-recursively "train" share))
3297 #t))
3298 (delete 'check)
3299 (add-after 'install 'post-install-check
3300 ;; In lieu of 'make check', run one of the examples and check the
3301 ;; output files gets created.
3302 (lambda* (#:key outputs #:allow-other-keys)
3303 (let* ((out (string-append (assoc-ref outputs "out")))
3304 (bin (string-append out "/bin/"))
3305 (frag (string-append bin "run_FragGeneScan.pl")))
3306 ;; Test complete genome.
3307 (invoke frag
3308 "-genome=./example/NC_000913.fna"
3309 "-out=./test2"
3310 "-complete=1"
3311 "-train=complete")
3312 (unless (and (file-exists? "test2.faa")
3313 (file-exists? "test2.ffn")
3314 (file-exists? "test2.gff")
3315 (file-exists? "test2.out"))
3316 (error "Expected files do not exist."))
3317 ;; Test incomplete sequences.
3318 (invoke frag
3319 "-genome=./example/NC_000913-fgs.ffn"
3320 "-out=out"
3321 "-complete=0"
3322 "-train=454_30")
3323 #t))))))
3324 (inputs
3325 `(("perl" ,perl)
3326 ("python" ,python-2))) ;not compatible with python 3.
3327 (home-page "https://sourceforge.net/projects/fraggenescan/")
3328 (synopsis "Finds potentially fragmented genes in short reads")
3329 (description
3330 "FragGeneScan is a program for predicting bacterial and archaeal genes in
3331 short and error-prone DNA sequencing reads. It can also be applied to predict
3332 genes in incomplete assemblies or complete genomes.")
3333 ;; GPL3+ according to private correspondense with the authors.
3334 (license license:gpl3+)))
3335
3336 (define-public fxtract
3337 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
3338 (package
3339 (name "fxtract")
3340 (version "2.3")
3341 (source
3342 (origin
3343 (method git-fetch)
3344 (uri (git-reference
3345 (url "https://github.com/ctSkennerton/fxtract")
3346 (commit version)))
3347 (file-name (git-file-name name version))
3348 (sha256
3349 (base32
3350 "0hab3gpwf4w9s87qlbswq6ws1qqybh4dcqk79q1ahyldzai5fgp5"))))
3351 (build-system gnu-build-system)
3352 (arguments
3353 `(#:make-flags (list
3354 (string-append "PREFIX=" (assoc-ref %outputs "out"))
3355 "CC=gcc")
3356 #:test-target "fxtract_test"
3357 #:phases
3358 (modify-phases %standard-phases
3359 (delete 'configure)
3360 (add-before 'build 'copy-util
3361 (lambda* (#:key inputs #:allow-other-keys)
3362 (rmdir "util")
3363 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
3364 #t))
3365 ;; Do not use make install as this requires additional dependencies.
3366 (replace 'install
3367 (lambda* (#:key outputs #:allow-other-keys)
3368 (let* ((out (assoc-ref outputs "out"))
3369 (bin (string-append out"/bin")))
3370 (install-file "fxtract" bin)
3371 #t))))))
3372 (inputs
3373 `(("pcre" ,pcre)
3374 ("zlib" ,zlib)))
3375 (native-inputs
3376 ;; ctskennerton-util is licensed under GPL2.
3377 `(("ctskennerton-util"
3378 ,(origin
3379 (method git-fetch)
3380 (uri (git-reference
3381 (url "https://github.com/ctSkennerton/util")
3382 (commit util-commit)))
3383 (file-name (string-append
3384 "ctstennerton-util-" util-commit "-checkout"))
3385 (sha256
3386 (base32
3387 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
3388 (home-page "https://github.com/ctSkennerton/fxtract")
3389 (synopsis "Extract sequences from FASTA and FASTQ files")
3390 (description
3391 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
3392 or FASTQ) file given a subsequence. It uses a simple substring search for
3393 basic tasks but can change to using POSIX regular expressions, PCRE, hash
3394 lookups or multi-pattern searching as required. By default fxtract looks in
3395 the sequence of each record but can also be told to look in the header,
3396 comment or quality sections.")
3397 ;; 'util' requires SSE instructions.
3398 (supported-systems '("x86_64-linux"))
3399 (license license:expat))))
3400
3401 (define-public gemma
3402 (package
3403 (name "gemma")
3404 (version "0.98")
3405 (source (origin
3406 (method git-fetch)
3407 (uri (git-reference
3408 (url "https://github.com/xiangzhou/GEMMA")
3409 (commit (string-append "v" version))))
3410 (file-name (git-file-name name version))
3411 (sha256
3412 (base32
3413 "1s3ncnbn45r2hh1cvrqky1kbqq6546biypr4f5mkw1kqlrgyh0yg"))))
3414 (inputs
3415 `(("eigen" ,eigen)
3416 ("gfortran" ,gfortran "lib")
3417 ("gsl" ,gsl)
3418 ("lapack" ,lapack)
3419 ("openblas" ,openblas)
3420 ("zlib" ,zlib)))
3421 (build-system gnu-build-system)
3422 (arguments
3423 `(#:make-flags
3424 '(,@(match (%current-system)
3425 ("x86_64-linux"
3426 '("FORCE_DYNAMIC=1"))
3427 ("i686-linux"
3428 '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
3429 (_
3430 '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
3431 #:phases
3432 (modify-phases %standard-phases
3433 (delete 'configure)
3434 (add-after 'unpack 'find-eigen
3435 (lambda* (#:key inputs #:allow-other-keys)
3436 ;; Ensure that Eigen headers can be found
3437 (setenv "CPLUS_INCLUDE_PATH"
3438 (string-append (assoc-ref inputs "eigen")
3439 "/include/eigen3"))
3440 #t))
3441 (add-before 'build 'bin-mkdir
3442 (lambda _
3443 (mkdir-p "bin")
3444 #t))
3445 (replace 'install
3446 (lambda* (#:key outputs #:allow-other-keys)
3447 (let ((out (assoc-ref outputs "out")))
3448 (install-file "bin/gemma"
3449 (string-append
3450 out "/bin")))
3451 #t)))
3452 #:tests? #f)) ; no tests included yet
3453 (home-page "https://github.com/xiangzhou/GEMMA")
3454 (synopsis "Tool for genome-wide efficient mixed model association")
3455 (description
3456 "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
3457 standard linear mixed model resolver with application in genome-wide
3458 association studies (GWAS).")
3459 (license license:gpl3)))
3460
3461 (define-public grit
3462 (package
3463 (name "grit")
3464 (version "2.0.5")
3465 (source (origin
3466 (method git-fetch)
3467 (uri (git-reference
3468 (url "https://github.com/nboley/grit")
3469 (commit version)))
3470 (file-name (git-file-name name version))
3471 (sha256
3472 (base32
3473 "1l5v8vfvfbrpmgnrvbrbv40d0arhxcnmxgv2f1mlcqfa3q6bkqm9"))))
3474 (build-system python-build-system)
3475 (arguments
3476 `(#:python ,python-2
3477 #:phases
3478 (modify-phases %standard-phases
3479 (add-after 'unpack 'generate-from-cython-sources
3480 (lambda* (#:key inputs outputs #:allow-other-keys)
3481 ;; Delete these C files to force fresh generation from pyx sources.
3482 (delete-file "grit/sparsify_support_fns.c")
3483 (delete-file "grit/call_peaks_support_fns.c")
3484 (substitute* "setup.py"
3485 (("Cython.Setup") "Cython.Build"))
3486 #t)))))
3487 (inputs
3488 `(("python-scipy" ,python2-scipy)
3489 ("python-numpy" ,python2-numpy)
3490 ("python-pysam" ,python2-pysam)
3491 ("python-networkx" ,python2-networkx)))
3492 (native-inputs
3493 `(("python-cython" ,python2-cython)))
3494 ;; The canonical <http://grit-bio.org> home page times out as of 2020-01-21.
3495 (home-page "https://github.com/nboley/grit")
3496 (synopsis "Tool for integrative analysis of RNA-seq type assays")
3497 (description
3498 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
3499 full length transcript models. When none of these data sources are available,
3500 GRIT can be run by providing a candidate set of TES or TSS sites. In
3501 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
3502 also be run in quantification mode, where it uses a provided GTF file and just
3503 estimates transcript expression.")
3504 (license license:gpl3+)))
3505
3506 (define-public hisat
3507 (package
3508 (name "hisat")
3509 (version "0.1.4")
3510 (source (origin
3511 (method url-fetch)
3512 (uri (string-append
3513 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
3514 version "-beta-source.zip"))
3515 (sha256
3516 (base32
3517 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
3518 (build-system gnu-build-system)
3519 (arguments
3520 `(#:tests? #f ;no check target
3521 #:make-flags '("allall"
3522 ;; Disable unsupported `popcnt' instructions on
3523 ;; architectures other than x86_64
3524 ,@(if (string-prefix? "x86_64"
3525 (or (%current-target-system)
3526 (%current-system)))
3527 '()
3528 '("POPCNT_CAPABILITY=0")))
3529 #:phases
3530 (modify-phases %standard-phases
3531 (add-after 'unpack 'patch-sources
3532 (lambda _
3533 ;; XXX Cannot use snippet because zip files are not supported
3534 (substitute* "Makefile"
3535 (("^CC = .*$") "CC = gcc")
3536 (("^CPP = .*$") "CPP = g++")
3537 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
3538 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
3539 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
3540 (substitute* '("hisat-build" "hisat-inspect")
3541 (("/usr/bin/env") (which "env")))
3542 #t))
3543 (replace 'install
3544 (lambda* (#:key outputs #:allow-other-keys)
3545 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3546 (for-each (lambda (file)
3547 (install-file file bin))
3548 (find-files
3549 "."
3550 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
3551 #t))
3552 (delete 'configure))))
3553 (native-inputs
3554 `(("unzip" ,unzip)))
3555 (inputs
3556 `(("perl" ,perl)
3557 ("python" ,python)
3558 ("zlib" ,zlib)))
3559 ;; Non-portable SSE instructions are used so building fails on platforms
3560 ;; other than x86_64.
3561 (supported-systems '("x86_64-linux"))
3562 (home-page "https://ccb.jhu.edu/software/hisat/index.shtml")
3563 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
3564 (description
3565 "HISAT is a fast and sensitive spliced alignment program for mapping
3566 RNA-seq reads. In addition to one global FM index that represents a whole
3567 genome, HISAT uses a large set of small FM indexes that collectively cover the
3568 whole genome. These small indexes (called local indexes) combined with
3569 several alignment strategies enable effective alignment of RNA-seq reads, in
3570 particular, reads spanning multiple exons.")
3571 (license license:gpl3+)))
3572
3573 (define-public hisat2
3574 (package
3575 (name "hisat2")
3576 (version "2.0.5")
3577 (source
3578 (origin
3579 (method url-fetch)
3580 (uri (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
3581 "/downloads/hisat2-" version "-source.zip"))
3582 (sha256
3583 (base32
3584 "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"))))
3585 (build-system gnu-build-system)
3586 (arguments
3587 `(#:tests? #f ; no check target
3588 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
3589 #:modules ((guix build gnu-build-system)
3590 (guix build utils)
3591 (srfi srfi-26))
3592 #:phases
3593 (modify-phases %standard-phases
3594 (add-after 'unpack 'make-deterministic
3595 (lambda _
3596 (substitute* "Makefile"
3597 (("`date`") "0"))
3598 #t))
3599 (delete 'configure)
3600 (replace 'install
3601 (lambda* (#:key outputs #:allow-other-keys)
3602 (let* ((out (assoc-ref outputs "out"))
3603 (bin (string-append out "/bin/"))
3604 (doc (string-append out "/share/doc/hisat2/")))
3605 (for-each
3606 (cut install-file <> bin)
3607 (find-files "."
3608 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
3609 (mkdir-p doc)
3610 (install-file "doc/manual.inc.html" doc))
3611 #t)))))
3612 (native-inputs
3613 `(("unzip" ,unzip) ; needed for archive from ftp
3614 ("perl" ,perl)
3615 ("pandoc" ,ghc-pandoc))) ; for documentation
3616 (home-page "https://ccb.jhu.edu/software/hisat2/index.shtml")
3617 (synopsis "Graph-based alignment of genomic sequencing reads")
3618 (description "HISAT2 is a fast and sensitive alignment program for mapping
3619 next-generation sequencing reads (both DNA and RNA) to a population of human
3620 genomes (as well as to a single reference genome). In addition to using one
3621 global @dfn{graph FM} (GFM) index that represents a population of human
3622 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
3623 the whole genome. These small indexes, combined with several alignment
3624 strategies, enable rapid and accurate alignment of sequencing reads. This new
3625 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
3626 ;; HISAT2 contains files from Bowtie2, which is released under
3627 ;; GPLv2 or later. The HISAT2 source files are released under
3628 ;; GPLv3 or later.
3629 (license license:gpl3+)))
3630
3631 (define-public hmmer
3632 (package
3633 (name "hmmer")
3634 (version "3.2.1")
3635 (source
3636 (origin
3637 (method url-fetch)
3638 (uri (string-append
3639 "http://eddylab.org/software/hmmer/hmmer-" version ".tar.gz"))
3640 (sha256
3641 (base32
3642 "171bivy6xhgjsz5nv53n81pc3frnwz29ylblawk2bv46szwjjqd5"))))
3643 (build-system gnu-build-system)
3644 (native-inputs `(("perl" ,perl)))
3645 (home-page "http://hmmer.org/")
3646 (synopsis "Biosequence analysis using profile hidden Markov models")
3647 (description
3648 "HMMER is used for searching sequence databases for homologs of protein
3649 sequences, and for making protein sequence alignments. It implements methods
3650 using probabilistic models called profile hidden Markov models (profile
3651 HMMs).")
3652 ;; hmmer uses non-portable SSE intrinsics so building fails on other
3653 ;; platforms.
3654 (supported-systems '("x86_64-linux" "i686-linux"))
3655 (license license:bsd-3)))
3656
3657 (define-public htseq
3658 (package
3659 (name "htseq")
3660 (version "0.9.1")
3661 (source (origin
3662 (method url-fetch)
3663 (uri (pypi-uri "HTSeq" version))
3664 (sha256
3665 (base32
3666 "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
3667 (build-system python-build-system)
3668 (native-inputs
3669 `(("python-cython" ,python-cython)))
3670 ;; Numpy needs to be propagated when htseq is used as a Python library.
3671 (propagated-inputs
3672 `(("python-numpy" ,python-numpy)))
3673 (inputs
3674 `(("python-pysam" ,python-pysam)
3675 ("python-matplotlib" ,python-matplotlib)))
3676 (home-page "https://htseq.readthedocs.io/")
3677 (synopsis "Analysing high-throughput sequencing data with Python")
3678 (description
3679 "HTSeq is a Python package that provides infrastructure to process data
3680 from high-throughput sequencing assays.")
3681 (license license:gpl3+)))
3682
3683 (define-public python2-htseq
3684 (package-with-python2 htseq))
3685
3686 (define-public java-htsjdk
3687 (package
3688 (name "java-htsjdk")
3689 (version "2.3.0") ; last version without build dependency on gradle
3690 (source (origin
3691 (method git-fetch)
3692 (uri (git-reference
3693 (url "https://github.com/samtools/htsjdk")
3694 (commit version)))
3695 (file-name (git-file-name name version))
3696 (sha256
3697 (base32
3698 "1b178ixcabanm834ydjl3jiakpyxdmki32hqfv2abrzn3rcwa28i"))
3699 (modules '((guix build utils)))
3700 (snippet
3701 ;; Delete pre-built binaries
3702 '(begin
3703 (delete-file-recursively "lib")
3704 (mkdir-p "lib")
3705 #t))))
3706 (build-system ant-build-system)
3707 (arguments
3708 `(#:tests? #f ; test require Internet access
3709 #:jdk ,icedtea-8
3710 #:make-flags
3711 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
3712 "/share/java/htsjdk/"))
3713 #:build-target "all"
3714 #:phases
3715 (modify-phases %standard-phases
3716 ;; The build phase also installs the jars
3717 (delete 'install))))
3718 (inputs
3719 `(("java-ngs" ,java-ngs)
3720 ("java-snappy-1" ,java-snappy-1)
3721 ("java-commons-compress" ,java-commons-compress)
3722 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3723 ("java-commons-jexl-2" ,java-commons-jexl-2)
3724 ("java-xz" ,java-xz)))
3725 (native-inputs
3726 `(("java-testng" ,java-testng)))
3727 (home-page "http://samtools.github.io/htsjdk/")
3728 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3729 (description
3730 "HTSJDK is an implementation of a unified Java library for accessing
3731 common file formats, such as SAM and VCF, used for high-throughput
3732 sequencing (HTS) data. There are also an number of useful utilities for
3733 manipulating HTS data.")
3734 (license license:expat)))
3735
3736 (define-public java-htsjdk-latest
3737 (package
3738 (name "java-htsjdk")
3739 (version "2.14.3")
3740 (source (origin
3741 (method git-fetch)
3742 (uri (git-reference
3743 (url "https://github.com/samtools/htsjdk")
3744 (commit version)))
3745 (file-name (string-append name "-" version "-checkout"))
3746 (sha256
3747 (base32
3748 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
3749 (build-system ant-build-system)
3750 (arguments
3751 `(#:tests? #f ; test require Scala
3752 #:jdk ,icedtea-8
3753 #:jar-name "htsjdk.jar"
3754 #:phases
3755 (modify-phases %standard-phases
3756 (add-after 'unpack 'remove-useless-build.xml
3757 (lambda _ (delete-file "build.xml") #t))
3758 ;; The tests require the scalatest package.
3759 (add-after 'unpack 'remove-tests
3760 (lambda _ (delete-file-recursively "src/test") #t)))))
3761 (inputs
3762 `(("java-ngs" ,java-ngs)
3763 ("java-snappy-1" ,java-snappy-1)
3764 ("java-commons-compress" ,java-commons-compress)
3765 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3766 ("java-commons-jexl-2" ,java-commons-jexl-2)
3767 ("java-xz" ,java-xz)))
3768 (native-inputs
3769 `(("java-junit" ,java-junit)))
3770 (home-page "http://samtools.github.io/htsjdk/")
3771 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3772 (description
3773 "HTSJDK is an implementation of a unified Java library for accessing
3774 common file formats, such as SAM and VCF, used for high-throughput
3775 sequencing (HTS) data. There are also an number of useful utilities for
3776 manipulating HTS data.")
3777 (license license:expat)))
3778
3779 ;; This is needed for picard 2.10.3
3780 (define-public java-htsjdk-2.10.1
3781 (package (inherit java-htsjdk-latest)
3782 (name "java-htsjdk")
3783 (version "2.10.1")
3784 (source (origin
3785 (method git-fetch)
3786 (uri (git-reference
3787 (url "https://github.com/samtools/htsjdk")
3788 (commit version)))
3789 (file-name (string-append name "-" version "-checkout"))
3790 (sha256
3791 (base32
3792 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
3793 (build-system ant-build-system)
3794 (arguments
3795 `(#:tests? #f ; tests require Scala
3796 #:jdk ,icedtea-8
3797 #:jar-name "htsjdk.jar"
3798 #:phases
3799 (modify-phases %standard-phases
3800 (add-after 'unpack 'remove-useless-build.xml
3801 (lambda _ (delete-file "build.xml") #t))
3802 ;; The tests require the scalatest package.
3803 (add-after 'unpack 'remove-tests
3804 (lambda _ (delete-file-recursively "src/test") #t)))))))
3805
3806 ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
3807 ;; recent version of java-htsjdk, which depends on gradle.
3808 (define-public java-picard
3809 (package
3810 (name "java-picard")
3811 (version "2.3.0")
3812 (source (origin
3813 (method git-fetch)
3814 (uri (git-reference
3815 (url "https://github.com/broadinstitute/picard")
3816 (commit version)))
3817 (file-name (string-append "java-picard-" version "-checkout"))
3818 (sha256
3819 (base32
3820 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
3821 (modules '((guix build utils)))
3822 (snippet
3823 '(begin
3824 ;; Delete pre-built binaries.
3825 (delete-file-recursively "lib")
3826 (mkdir-p "lib")
3827 (substitute* "build.xml"
3828 ;; Remove build-time dependency on git.
3829 (("failifexecutionfails=\"true\"")
3830 "failifexecutionfails=\"false\"")
3831 ;; Use our htsjdk.
3832 (("depends=\"compile-htsjdk, ")
3833 "depends=\"")
3834 (("depends=\"compile-htsjdk-tests, ")
3835 "depends=\"")
3836 ;; Build picard-lib.jar before building picard.jar
3837 (("name=\"picard-jar\" depends=\"" line)
3838 (string-append line "picard-lib-jar, ")))
3839 #t))))
3840 (build-system ant-build-system)
3841 (arguments
3842 `(#:build-target "picard-jar"
3843 #:test-target "test"
3844 ;; Tests require jacoco:coverage.
3845 #:tests? #f
3846 #:make-flags
3847 (list (string-append "-Dhtsjdk_lib_dir="
3848 (assoc-ref %build-inputs "java-htsjdk")
3849 "/share/java/htsjdk/")
3850 "-Dhtsjdk-classes=dist/tmp"
3851 (string-append "-Dhtsjdk-version="
3852 ,(package-version java-htsjdk)))
3853 #:jdk ,icedtea-8
3854 #:phases
3855 (modify-phases %standard-phases
3856 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3857 (delete 'generate-jar-indices)
3858 (add-after 'unpack 'use-our-htsjdk
3859 (lambda* (#:key inputs #:allow-other-keys)
3860 (substitute* "build.xml"
3861 (("\\$\\{htsjdk\\}/lib")
3862 (string-append (assoc-ref inputs "java-htsjdk")
3863 "/share/java/htsjdk/")))
3864 #t))
3865 (add-after 'unpack 'make-test-target-independent
3866 (lambda* (#:key inputs #:allow-other-keys)
3867 (substitute* "build.xml"
3868 (("name=\"test\" depends=\"compile, ")
3869 "name=\"test\" depends=\""))
3870 #t))
3871 (replace 'install (install-jars "dist")))))
3872 (inputs
3873 `(("java-htsjdk" ,java-htsjdk)
3874 ("java-guava" ,java-guava)))
3875 (native-inputs
3876 `(("java-testng" ,java-testng)))
3877 (home-page "http://broadinstitute.github.io/picard/")
3878 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3879 (description "Picard is a set of Java command line tools for manipulating
3880 high-throughput sequencing (HTS) data and formats. Picard is implemented
3881 using the HTSJDK Java library to support accessing file formats that are
3882 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3883 VCF.")
3884 (license license:expat)))
3885
3886 ;; This is needed for dropseq-tools
3887 (define-public java-picard-2.10.3
3888 (package
3889 (name "java-picard")
3890 (version "2.10.3")
3891 (source (origin
3892 (method git-fetch)
3893 (uri (git-reference
3894 (url "https://github.com/broadinstitute/picard")
3895 (commit version)))
3896 (file-name (string-append "java-picard-" version "-checkout"))
3897 (sha256
3898 (base32
3899 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
3900 (build-system ant-build-system)
3901 (arguments
3902 `(#:jar-name "picard.jar"
3903 ;; Tests require jacoco:coverage.
3904 #:tests? #f
3905 #:jdk ,icedtea-8
3906 #:main-class "picard.cmdline.PicardCommandLine"
3907 #:modules ((guix build ant-build-system)
3908 (guix build utils)
3909 (guix build java-utils)
3910 (sxml simple)
3911 (sxml transform)
3912 (sxml xpath))
3913 #:phases
3914 (modify-phases %standard-phases
3915 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3916 (delete 'generate-jar-indices)
3917 (add-after 'unpack 'remove-useless-build.xml
3918 (lambda _ (delete-file "build.xml") #t))
3919 ;; This is necessary to ensure that htsjdk is found when using
3920 ;; picard.jar as an executable.
3921 (add-before 'build 'edit-classpath-in-manifest
3922 (lambda* (#:key inputs #:allow-other-keys)
3923 (chmod "build.xml" #o664)
3924 (call-with-output-file "build.xml.new"
3925 (lambda (port)
3926 (sxml->xml
3927 (pre-post-order
3928 (with-input-from-file "build.xml"
3929 (lambda _ (xml->sxml #:trim-whitespace? #t)))
3930 `((target . ,(lambda (tag . kids)
3931 (let ((name ((sxpath '(name *text*))
3932 (car kids)))
3933 ;; FIXME: We're breaking the line
3934 ;; early with a dummy path to
3935 ;; ensure that the store reference
3936 ;; isn't broken apart and can still
3937 ;; be found by the reference
3938 ;; scanner.
3939 (msg (format #f
3940 "\
3941 Class-Path: /~a \
3942 ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
3943 ;; maximum line length is 70
3944 (string-tabulate (const #\b) 57)
3945 (assoc-ref inputs "java-htsjdk"))))
3946 (if (member "manifest" name)
3947 `(,tag ,@kids
3948 (replaceregexp
3949 (@ (file "${manifest.file}")
3950 (match "\\r\\n\\r\\n")
3951 (replace "${line.separator}")))
3952 (echo
3953 (@ (message ,msg)
3954 (file "${manifest.file}")
3955 (append "true"))))
3956 `(,tag ,@kids)))))
3957 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
3958 (*text* . ,(lambda (_ txt) txt))))
3959 port)))
3960 (rename-file "build.xml.new" "build.xml")
3961 #t)))))
3962 (propagated-inputs
3963 `(("java-htsjdk" ,java-htsjdk-2.10.1)))
3964 (native-inputs
3965 `(("java-testng" ,java-testng)
3966 ("java-guava" ,java-guava)))
3967 (home-page "http://broadinstitute.github.io/picard/")
3968 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3969 (description "Picard is a set of Java command line tools for manipulating
3970 high-throughput sequencing (HTS) data and formats. Picard is implemented
3971 using the HTSJDK Java library to support accessing file formats that are
3972 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3973 VCF.")
3974 (license license:expat)))
3975
3976 ;; This is the last version of Picard to provide net.sf.samtools
3977 (define-public java-picard-1.113
3978 (package (inherit java-picard)
3979 (name "java-picard")
3980 (version "1.113")
3981 (source (origin
3982 (method git-fetch)
3983 (uri (git-reference
3984 (url "https://github.com/broadinstitute/picard")
3985 (commit version)))
3986 (file-name (string-append "java-picard-" version "-checkout"))
3987 (sha256
3988 (base32
3989 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
3990 (modules '((guix build utils)))
3991 (snippet
3992 '(begin
3993 ;; Delete pre-built binaries.
3994 (delete-file-recursively "lib")
3995 (mkdir-p "lib")
3996 #t))))
3997 (build-system ant-build-system)
3998 (arguments
3999 `(#:build-target "picard-jar"
4000 #:test-target "test"
4001 ;; FIXME: the class path at test time is wrong.
4002 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
4003 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
4004 #:tests? #f
4005 #:jdk ,icedtea-8
4006 ;; This is only used for tests.
4007 #:make-flags
4008 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
4009 #:phases
4010 (modify-phases %standard-phases
4011 ;; FIXME: This phase fails.
4012 (delete 'generate-jar-indices)
4013 ;; Do not use bundled ant bzip2.
4014 (add-after 'unpack 'use-ant-bzip
4015 (lambda* (#:key inputs #:allow-other-keys)
4016 (substitute* "build.xml"
4017 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
4018 (string-append (assoc-ref inputs "ant")
4019 "/lib/ant.jar")))
4020 #t))
4021 (add-after 'unpack 'make-test-target-independent
4022 (lambda* (#:key inputs #:allow-other-keys)
4023 (substitute* "build.xml"
4024 (("name=\"test\" depends=\"compile, ")
4025 "name=\"test\" depends=\"compile-tests, ")
4026 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
4027 "name=\"compile\" depends=\"compile-src\""))
4028 #t))
4029 (add-after 'unpack 'fix-deflater-path
4030 (lambda* (#:key outputs #:allow-other-keys)
4031 (substitute* "src/java/net/sf/samtools/Defaults.java"
4032 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
4033 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
4034 (assoc-ref outputs "out")
4035 "/lib/jni/libIntelDeflater.so"
4036 "\")")))
4037 #t))
4038 ;; Build the deflater library, because we've previously deleted the
4039 ;; pre-built one. This can only be built with access to the JDK
4040 ;; sources.
4041 (add-after 'build 'build-jni
4042 (lambda* (#:key inputs #:allow-other-keys)
4043 (mkdir-p "lib/jni")
4044 (mkdir-p "jdk-src")
4045 (invoke "tar" "--strip-components=1" "-C" "jdk-src"
4046 "-xf" (assoc-ref inputs "jdk-src"))
4047 (invoke "javah" "-jni"
4048 "-classpath" "classes"
4049 "-d" "lib/"
4050 "net.sf.samtools.util.zip.IntelDeflater")
4051 (with-directory-excursion "src/c/inteldeflater"
4052 (invoke "gcc" "-I../../../lib" "-I."
4053 (string-append "-I" (assoc-ref inputs "jdk")
4054 "/include/linux")
4055 "-I../../../jdk-src/src/share/native/common/"
4056 "-I../../../jdk-src/src/solaris/native/common/"
4057 "-c" "-O3" "-fPIC" "IntelDeflater.c")
4058 (invoke "gcc" "-shared"
4059 "-o" "../../../lib/jni/libIntelDeflater.so"
4060 "IntelDeflater.o" "-lz" "-lstdc++"))
4061 #t))
4062 ;; We can only build everything else after building the JNI library.
4063 (add-after 'build-jni 'build-rest
4064 (lambda* (#:key make-flags #:allow-other-keys)
4065 (apply invoke `("ant" "all" ,@make-flags))
4066 #t))
4067 (add-before 'build 'set-JAVA6_HOME
4068 (lambda _
4069 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
4070 #t))
4071 (replace 'install (install-jars "dist"))
4072 (add-after 'install 'install-jni-lib
4073 (lambda* (#:key outputs #:allow-other-keys)
4074 (let ((jni (string-append (assoc-ref outputs "out")
4075 "/lib/jni")))
4076 (mkdir-p jni)
4077 (install-file "lib/jni/libIntelDeflater.so" jni)
4078 #t))))))
4079 (inputs
4080 `(("java-snappy-1" ,java-snappy-1)
4081 ("java-commons-jexl-2" ,java-commons-jexl-2)
4082 ("java-cofoja" ,java-cofoja)
4083 ("ant" ,ant) ; for bzip2 support at runtime
4084 ("zlib" ,zlib)))
4085 (native-inputs
4086 `(("ant-apache-bcel" ,ant-apache-bcel)
4087 ("ant-junit" ,ant-junit)
4088 ("java-testng" ,java-testng)
4089 ("java-commons-bcel" ,java-commons-bcel)
4090 ("java-jcommander" ,java-jcommander)
4091 ("jdk" ,icedtea-8 "jdk")
4092 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
4093
4094 (define-public fastqc
4095 (package
4096 (name "fastqc")
4097 (version "0.11.5")
4098 (source
4099 (origin
4100 (method url-fetch)
4101 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
4102 "projects/fastqc/fastqc_v"
4103 version "_source.zip"))
4104 (sha256
4105 (base32
4106 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
4107 (build-system ant-build-system)
4108 (arguments
4109 `(#:tests? #f ; there are no tests
4110 #:build-target "build"
4111 #:phases
4112 (modify-phases %standard-phases
4113 (add-after 'unpack 'fix-dependencies
4114 (lambda* (#:key inputs #:allow-other-keys)
4115 (substitute* "build.xml"
4116 (("jbzip2-0.9.jar")
4117 (string-append (assoc-ref inputs "java-jbzip2")
4118 "/share/java/jbzip2.jar"))
4119 (("sam-1.103.jar")
4120 (string-append (assoc-ref inputs "java-picard-1.113")
4121 "/share/java/sam-1.112.jar"))
4122 (("cisd-jhdf5.jar")
4123 (string-append (assoc-ref inputs "java-cisd-jhdf5")
4124 "/share/java/sis-jhdf5.jar")))
4125 #t))
4126 ;; There is no installation target
4127 (replace 'install
4128 (lambda* (#:key inputs outputs #:allow-other-keys)
4129 (let* ((out (assoc-ref outputs "out"))
4130 (bin (string-append out "/bin"))
4131 (share (string-append out "/share/fastqc/"))
4132 (exe (string-append share "/fastqc")))
4133 (for-each mkdir-p (list bin share))
4134 (copy-recursively "bin" share)
4135 (substitute* exe
4136 (("my \\$java_bin = 'java';")
4137 (string-append "my $java_bin = '"
4138 (assoc-ref inputs "java")
4139 "/bin/java';")))
4140 (chmod exe #o555)
4141 (symlink exe (string-append bin "/fastqc"))
4142 #t))))))
4143 (inputs
4144 `(("java" ,icedtea)
4145 ("perl" ,perl) ; needed for the wrapper script
4146 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
4147 ("java-picard-1.113" ,java-picard-1.113)
4148 ("java-jbzip2" ,java-jbzip2)))
4149 (native-inputs
4150 `(("unzip" ,unzip)))
4151 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
4152 (synopsis "Quality control tool for high throughput sequence data")
4153 (description
4154 "FastQC aims to provide a simple way to do some quality control
4155 checks on raw sequence data coming from high throughput sequencing
4156 pipelines. It provides a modular set of analyses which you can use to
4157 give a quick impression of whether your data has any problems of which
4158 you should be aware before doing any further analysis.
4159
4160 The main functions of FastQC are:
4161
4162 @itemize
4163 @item Import of data from BAM, SAM or FastQ files (any variant);
4164 @item Providing a quick overview to tell you in which areas there may
4165 be problems;
4166 @item Summary graphs and tables to quickly assess your data;
4167 @item Export of results to an HTML based permanent report;
4168 @item Offline operation to allow automated generation of reports
4169 without running the interactive application.
4170 @end itemize\n")
4171 (license license:gpl3+)))
4172
4173 (define-public fastp
4174 (package
4175 (name "fastp")
4176 (version "0.14.1")
4177 (source
4178 (origin
4179 (method git-fetch)
4180 (uri (git-reference
4181 (url "https://github.com/OpenGene/fastp")
4182 (commit (string-append "v" version))))
4183 (file-name (git-file-name name version))
4184 (sha256
4185 (base32
4186 "1r6ms5zbf5rps4rgp4z73nczadl00b5rqylw8f684isfz27dp0xh"))))
4187 (build-system gnu-build-system)
4188 (arguments
4189 `(#:tests? #f ; there are none
4190 #:make-flags
4191 (list (string-append "BINDIR=" (assoc-ref %outputs "out") "/bin"))
4192 #:phases
4193 (modify-phases %standard-phases
4194 (delete 'configure)
4195 (add-before 'install 'create-target-dir
4196 (lambda* (#:key outputs #:allow-other-keys)
4197 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4198 #t)))))
4199 (inputs
4200 `(("zlib" ,zlib)))
4201 (home-page "https://github.com/OpenGene/fastp/")
4202 (synopsis "All-in-one FastQ preprocessor")
4203 (description
4204 "Fastp is a tool designed to provide fast all-in-one preprocessing for
4205 FastQ files. This tool has multi-threading support to afford high
4206 performance.")
4207 (license license:expat)))
4208
4209 (define-public htslib
4210 (package
4211 (name "htslib")
4212 (version "1.9")
4213 (source (origin
4214 (method url-fetch)
4215 (uri (string-append
4216 "https://github.com/samtools/htslib/releases/download/"
4217 version "/htslib-" version ".tar.bz2"))
4218 (sha256
4219 (base32
4220 "16ljv43sc3fxmv63w7b2ff8m1s7h89xhazwmbm1bicz8axq8fjz0"))))
4221 (build-system gnu-build-system)
4222 (inputs
4223 `(("curl" ,curl)
4224 ("openssl" ,openssl)))
4225 ;; This is referred to in the pkg-config file as a required library.
4226 (propagated-inputs
4227 `(("zlib" ,zlib)))
4228 (native-inputs
4229 `(("perl" ,perl)))
4230 (home-page "https://www.htslib.org")
4231 (synopsis "C library for reading/writing high-throughput sequencing data")
4232 (description
4233 "HTSlib is a C library for reading/writing high-throughput sequencing
4234 data. It also provides the @command{bgzip}, @command{htsfile}, and
4235 @command{tabix} utilities.")
4236 ;; Files under cram/ are released under the modified BSD license;
4237 ;; the rest is released under the Expat license
4238 (license (list license:expat license:bsd-3))))
4239
4240 ;; This package should be removed once no packages rely upon it.
4241 (define htslib-1.3
4242 (package
4243 (inherit htslib)
4244 (version "1.3.1")
4245 (source (origin
4246 (method url-fetch)
4247 (uri (string-append
4248 "https://github.com/samtools/htslib/releases/download/"
4249 version "/htslib-" version ".tar.bz2"))
4250 (sha256
4251 (base32
4252 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
4253
4254 (define-public idr
4255 (package
4256 (name "idr")
4257 (version "2.0.3")
4258 (source (origin
4259 (method git-fetch)
4260 (uri (git-reference
4261 (url "https://github.com/nboley/idr")
4262 (commit version)))
4263 (file-name (git-file-name name version))
4264 (sha256
4265 (base32
4266 "04j876h6z444v2q79drxx283d3k5snd72kj895wbalnl42206x9g"))
4267 ;; Delete generated C code.
4268 (snippet
4269 '(begin (delete-file "idr/inv_cdf.c") #t))))
4270 (build-system python-build-system)
4271 ;; There is only one test ("test_inv_cdf.py") and it tests features that
4272 ;; are no longer part of this package. It also asserts False, which
4273 ;; causes the tests to always fail.
4274 (arguments `(#:tests? #f))
4275 (propagated-inputs
4276 `(("python-scipy" ,python-scipy)
4277 ("python-sympy" ,python-sympy)
4278 ("python-numpy" ,python-numpy)
4279 ("python-matplotlib" ,python-matplotlib)))
4280 (native-inputs
4281 `(("python-cython" ,python-cython)))
4282 (home-page "https://github.com/nboley/idr")
4283 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
4284 (description
4285 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
4286 to measure the reproducibility of findings identified from replicate
4287 experiments and provide highly stable thresholds based on reproducibility.")
4288 (license license:gpl2+)))
4289
4290 (define-public jellyfish
4291 (package
4292 (name "jellyfish")
4293 (version "2.2.10")
4294 (source (origin
4295 (method url-fetch)
4296 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
4297 "releases/download/v" version
4298 "/jellyfish-" version ".tar.gz"))
4299 (sha256
4300 (base32
4301 "1k4pc3fvv6w1km2yph4m5sd78fbxp21d6xyzgmy0gjihzc6mb249"))))
4302 (build-system gnu-build-system)
4303 (outputs '("out" ;for library
4304 "ruby" ;for Ruby bindings
4305 "python")) ;for Python bindings
4306 (arguments
4307 `(#:configure-flags
4308 (list (string-append "--enable-ruby-binding="
4309 (assoc-ref %outputs "ruby"))
4310 (string-append "--enable-python-binding="
4311 (assoc-ref %outputs "python")))
4312 #:phases
4313 (modify-phases %standard-phases
4314 (add-before 'check 'set-SHELL-variable
4315 (lambda _
4316 ;; generator_manager.hpp either uses /bin/sh or $SHELL
4317 ;; to run tests.
4318 (setenv "SHELL" (which "bash"))
4319 #t)))))
4320 (native-inputs
4321 `(("bc" ,bc)
4322 ("time" ,time)
4323 ("ruby" ,ruby)
4324 ("python" ,python-2)
4325 ("pkg-config" ,pkg-config)))
4326 (inputs
4327 `(("htslib" ,htslib)))
4328 (synopsis "Tool for fast counting of k-mers in DNA")
4329 (description
4330 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
4331 DNA. A k-mer is a substring of length k, and counting the occurrences of all
4332 such substrings is a central step in many analyses of DNA sequence. Jellyfish
4333 is a command-line program that reads FASTA and multi-FASTA files containing
4334 DNA sequences. It outputs its k-mer counts in a binary format, which can be
4335 translated into a human-readable text format using the @code{jellyfish dump}
4336 command, or queried for specific k-mers with @code{jellyfish query}.")
4337 (home-page "http://www.genome.umd.edu/jellyfish.html")
4338 ;; JELLYFISH seems to be 64-bit only.
4339 (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
4340 ;; The combined work is published under the GPLv3 or later. Individual
4341 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
4342 (license (list license:gpl3+ license:expat))))
4343
4344 (define-public khmer
4345 (package
4346 (name "khmer")
4347 (version "3.0.0a3")
4348 (source
4349 (origin
4350 (method git-fetch)
4351 (uri (git-reference
4352 (url "https://github.com/dib-lab/khmer")
4353 (commit (string-append "v" version))))
4354 (file-name (git-file-name name version))
4355 (sha256
4356 (base32
4357 "01l4jczglkl7yfhgvzx8j0df7k54bk1r8sli9ll16i1mis0d8f37"))
4358 (modules '((guix build utils)))
4359 (snippet
4360 '(begin
4361 ;; Delete bundled libraries. We do not replace the bundled seqan
4362 ;; as it is a modified subset of the old version 1.4.1.
4363 ;;
4364 ;; We do not replace the bundled MurmurHash as the canonical
4365 ;; repository for this code 'SMHasher' is unsuitable for providing
4366 ;; a library. See
4367 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
4368 (delete-file-recursively "third-party/zlib")
4369 (delete-file-recursively "third-party/bzip2")
4370 (delete-file-recursively "third-party/seqan")
4371 (substitute* "setup.cfg"
4372 (("# libraries = z,bz2")
4373 "libraries = z,bz2")
4374 (("include:third-party/zlib:third-party/bzip2")
4375 "include:"))
4376 #t))))
4377 (build-system python-build-system)
4378 (arguments
4379 `(#:phases
4380 (modify-phases %standard-phases
4381 (add-after 'unpack 'set-cc
4382 (lambda _ (setenv "CC" "gcc") #t))
4383
4384 (add-before 'reset-gzip-timestamps 'make-files-writable
4385 (lambda* (#:key outputs #:allow-other-keys)
4386 ;; Make sure .gz files are writable so that the
4387 ;; 'reset-gzip-timestamps' phase can do its work.
4388 (let ((out (assoc-ref outputs "out")))
4389 (for-each make-file-writable
4390 (find-files out "\\.gz$"))
4391 #t))))))
4392 (native-inputs
4393 `(("python-cython" ,python-cython)
4394 ("python-pytest" ,python-pytest)
4395 ("python-pytest-runner" ,python-pytest-runner)))
4396 (inputs
4397 `(("zlib" ,zlib)
4398 ("bzip2" ,bzip2)
4399 ("seqan" ,seqan-1)
4400 ("python-screed" ,python-screed)
4401 ("python-bz2file" ,python-bz2file)))
4402 (home-page "https://khmer.readthedocs.org/")
4403 (synopsis "K-mer counting, filtering and graph traversal library")
4404 (description "The khmer software is a set of command-line tools for
4405 working with DNA shotgun sequencing data from genomes, transcriptomes,
4406 metagenomes and single cells. Khmer can make de novo assemblies faster, and
4407 sometimes better. Khmer can also identify and fix problems with shotgun
4408 data.")
4409 ;; When building on i686, armhf and mips64el, we get the following error:
4410 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
4411 (supported-systems '("x86_64-linux" "aarch64-linux"))
4412 (license license:bsd-3)))
4413
4414 (define-public kaiju
4415 (package
4416 (name "kaiju")
4417 (version "1.6.3")
4418 (source (origin
4419 (method git-fetch)
4420 (uri (git-reference
4421 (url "https://github.com/bioinformatics-centre/kaiju")
4422 (commit (string-append "v" version))))
4423 (file-name (git-file-name name version))
4424 (sha256
4425 (base32
4426 "119pzi0ddzv9mjg4wwa6han0cwr3k3ssn7kirvsjfcq05mi5ka0x"))))
4427 (build-system gnu-build-system)
4428 (arguments
4429 `(#:tests? #f ; There are no tests.
4430 #:phases
4431 (modify-phases %standard-phases
4432 (delete 'configure)
4433 (add-before 'build 'move-to-src-dir
4434 (lambda _ (chdir "src") #t))
4435 (replace 'install
4436 (lambda* (#:key inputs outputs #:allow-other-keys)
4437 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
4438 (mkdir-p bin)
4439 (chdir "..")
4440 (copy-recursively "bin" bin))
4441 #t)))))
4442 (inputs
4443 `(("perl" ,perl)
4444 ("zlib" ,zlib)))
4445 (home-page "http://kaiju.binf.ku.dk/")
4446 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
4447 (description "Kaiju is a program for sensitive taxonomic classification
4448 of high-throughput sequencing reads from metagenomic whole genome sequencing
4449 experiments.")
4450 (license license:gpl3+)))
4451
4452 (define-public macs
4453 (package
4454 (name "macs")
4455 (version "2.2.6")
4456 (source (origin
4457 ;; The PyPi tarball does not contain tests.
4458 (method git-fetch)
4459 (uri (git-reference
4460 (url "https://github.com/taoliu/MACS")
4461 (commit (string-append "v" version))))
4462 (file-name (git-file-name name version))
4463 (sha256
4464 (base32
4465 "1c5gxr0mk6hkd4vclf0k00wvyvzw2vrmk52c85338p7aqjwg6n15"))))
4466 (build-system python-build-system)
4467 (arguments
4468 `(#:phases
4469 (modify-phases %standard-phases
4470 (delete 'check)
4471 (add-after 'install 'check
4472 (lambda* (#:key inputs outputs #:allow-other-keys)
4473 (add-installed-pythonpath inputs outputs)
4474 (invoke "pytest" "-v"))))))
4475 (inputs
4476 `(("python-numpy" ,python-numpy)))
4477 (native-inputs
4478 `(("python-pytest" ,python-pytest)))
4479 (home-page "https://github.com/taoliu/MACS/")
4480 (synopsis "Model based analysis for ChIP-Seq data")
4481 (description
4482 "MACS is an implementation of a ChIP-Seq analysis algorithm for
4483 identifying transcript factor binding sites named Model-based Analysis of
4484 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
4485 the significance of enriched ChIP regions and it improves the spatial
4486 resolution of binding sites through combining the information of both
4487 sequencing tag position and orientation.")
4488 (license license:bsd-3)))
4489
4490 (define-public mafft
4491 (package
4492 (name "mafft")
4493 (version "7.394")
4494 (source (origin
4495 (method url-fetch)
4496 (uri (string-append
4497 "https://mafft.cbrc.jp/alignment/software/mafft-" version
4498 "-without-extensions-src.tgz"))
4499 (file-name (string-append name "-" version ".tgz"))
4500 (sha256
4501 (base32
4502 "0bacjkxfg944p5khhyh5rd4y7wkjc9qk4v2jjj442sqlq0f8ar7b"))))
4503 (build-system gnu-build-system)
4504 (arguments
4505 `(#:tests? #f ; no automated tests, though there are tests in the read me
4506 #:make-flags (let ((out (assoc-ref %outputs "out")))
4507 (list (string-append "PREFIX=" out)
4508 (string-append "BINDIR="
4509 (string-append out "/bin"))))
4510 #:phases
4511 (modify-phases %standard-phases
4512 (add-after 'unpack 'enter-dir
4513 (lambda _ (chdir "core") #t))
4514 (add-after 'enter-dir 'patch-makefile
4515 (lambda _
4516 ;; on advice from the MAFFT authors, there is no need to
4517 ;; distribute mafft-profile, mafft-distance, or
4518 ;; mafft-homologs.rb as they are too "specialised".
4519 (substitute* "Makefile"
4520 ;; remove mafft-homologs.rb from SCRIPTS
4521 (("^SCRIPTS = mafft mafft-homologs.rb")
4522 "SCRIPTS = mafft")
4523 ;; remove mafft-homologs from MANPAGES
4524 (("^MANPAGES = mafft.1 mafft-homologs.1")
4525 "MANPAGES = mafft.1")
4526 ;; remove mafft-distance from PROGS
4527 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
4528 "PROGS = dvtditr dndfast7 dndblast sextet5")
4529 ;; remove mafft-profile from PROGS
4530 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
4531 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
4532 (("^rm -f mafft-profile mafft-profile.exe") "#")
4533 (("^rm -f mafft-distance mafft-distance.exe") ")#")
4534 ;; do not install MAN pages in libexec folder
4535 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
4536 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
4537 #t))
4538 (add-after 'enter-dir 'patch-paths
4539 (lambda* (#:key inputs #:allow-other-keys)
4540 (substitute* '("pairash.c"
4541 "mafft.tmpl")
4542 (("perl") (which "perl"))
4543 (("([\"`| ])awk" _ prefix)
4544 (string-append prefix (which "awk")))
4545 (("grep") (which "grep")))
4546 #t))
4547 (delete 'configure)
4548 (add-after 'install 'wrap-programs
4549 (lambda* (#:key outputs #:allow-other-keys)
4550 (let* ((out (assoc-ref outputs "out"))
4551 (bin (string-append out "/bin"))
4552 (path (string-append
4553 (assoc-ref %build-inputs "coreutils") "/bin:")))
4554 (for-each (lambda (file)
4555 (wrap-program file
4556 `("PATH" ":" prefix (,path))))
4557 (find-files bin)))
4558 #t)))))
4559 (inputs
4560 `(("perl" ,perl)
4561 ("ruby" ,ruby)
4562 ("gawk" ,gawk)
4563 ("grep" ,grep)
4564 ("coreutils" ,coreutils)))
4565 (home-page "http://mafft.cbrc.jp/alignment/software/")
4566 (synopsis "Multiple sequence alignment program")
4567 (description
4568 "MAFFT offers a range of multiple alignment methods for nucleotide and
4569 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
4570 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
4571 sequences).")
4572 (license (license:non-copyleft
4573 "http://mafft.cbrc.jp/alignment/software/license.txt"
4574 "BSD-3 with different formatting"))))
4575
4576 (define-public mash
4577 (package
4578 (name "mash")
4579 (version "2.1")
4580 (source (origin
4581 (method git-fetch)
4582 (uri (git-reference
4583 (url "https://github.com/marbl/mash")
4584 (commit (string-append "v" version))))
4585 (file-name (git-file-name name version))
4586 (sha256
4587 (base32
4588 "049hwcc059p2fd9vwndn63laifvvsi0wmv84i6y1fr79k15dxwy6"))
4589 (modules '((guix build utils)))
4590 (snippet
4591 '(begin
4592 ;; Delete bundled kseq.
4593 ;; TODO: Also delete bundled murmurhash and open bloom filter.
4594 (delete-file "src/mash/kseq.h")
4595 #t))))
4596 (build-system gnu-build-system)
4597 (arguments
4598 `(#:tests? #f ; No tests.
4599 #:configure-flags
4600 (list
4601 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
4602 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
4603 #:make-flags (list "CC=gcc")
4604 #:phases
4605 (modify-phases %standard-phases
4606 (add-after 'unpack 'fix-includes
4607 (lambda _
4608 (substitute* '("src/mash/Sketch.cpp"
4609 "src/mash/CommandFind.cpp"
4610 "src/mash/CommandScreen.cpp")
4611 (("^#include \"kseq\\.h\"")
4612 "#include \"htslib/kseq.h\""))
4613 #t))
4614 (add-after 'fix-includes 'use-c++14
4615 (lambda _
4616 ;; capnproto 0.7 requires c++14 to build
4617 (substitute* "configure.ac"
4618 (("c\\+\\+11") "c++14"))
4619 (substitute* "Makefile.in"
4620 (("c\\+\\+11") "c++14"))
4621 #t)))))
4622 (native-inputs
4623 `(("autoconf" ,autoconf)
4624 ;; Capnproto and htslib are statically embedded in the final
4625 ;; application. Therefore we also list their licenses, below.
4626 ("capnproto" ,capnproto)
4627 ("htslib" ,htslib)))
4628 (inputs
4629 `(("gsl" ,gsl)
4630 ("zlib" ,zlib)))
4631 (supported-systems '("x86_64-linux"))
4632 (home-page "https://mash.readthedocs.io")
4633 (synopsis "Fast genome and metagenome distance estimation using MinHash")
4634 (description "Mash is a fast sequence distance estimator that uses the
4635 MinHash algorithm and is designed to work with genomes and metagenomes in the
4636 form of assemblies or reads.")
4637 (license (list license:bsd-3 ; Mash
4638 license:expat ; HTSlib and capnproto
4639 license:public-domain ; MurmurHash 3
4640 license:cpl1.0)))) ; Open Bloom Filter
4641
4642 (define-public metabat
4643 (package
4644 (name "metabat")
4645 (version "2.12.1")
4646 (source
4647 (origin
4648 (method git-fetch)
4649 (uri (git-reference
4650 (url "https://bitbucket.org/berkeleylab/metabat.git")
4651 (commit (string-append "v" version))))
4652 (file-name (git-file-name name version))
4653 (sha256
4654 (base32
4655 "0hyg2smw1nz69mfvjpk45xyyychmda92c80a0cv7baji84ri4iyn"))
4656 (patches (search-patches "metabat-fix-compilation.patch"))))
4657 (build-system scons-build-system)
4658 (arguments
4659 `(#:scons ,scons-python2
4660 #:scons-flags
4661 (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
4662 (string-append "BOOST_ROOT=" (assoc-ref %build-inputs "boost")))
4663 #:tests? #f ;; Tests are run during the build phase.
4664 #:phases
4665 (modify-phases %standard-phases
4666 (add-after 'unpack 'fix-includes
4667 (lambda _
4668 (substitute* "src/BamUtils.h"
4669 (("^#include \"bam/bam\\.h\"")
4670 "#include \"samtools/bam.h\"")
4671 (("^#include \"bam/sam\\.h\"")
4672 "#include \"samtools/sam.h\""))
4673 (substitute* "src/KseqReader.h"
4674 (("^#include \"bam/kseq\\.h\"")
4675 "#include \"htslib/kseq.h\""))
4676 #t))
4677 (add-after 'unpack 'fix-scons
4678 (lambda* (#:key inputs #:allow-other-keys)
4679 (substitute* "SConstruct"
4680 (("^htslib_dir += 'samtools'")
4681 (string-append "htslib_dir = '"
4682 (assoc-ref inputs "htslib")
4683 "'"))
4684 (("^samtools_dir = 'samtools'")
4685 (string-append "samtools_dir = '"
4686 (assoc-ref inputs "samtools")
4687 "'"))
4688 (("^findStaticOrShared\\('bam', hts_lib")
4689 (string-append "findStaticOrShared('bam', '"
4690 (assoc-ref inputs "samtools")
4691 "/lib'"))
4692 ;; Do not distribute README.
4693 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
4694 #t)))))
4695 (inputs
4696 `(("zlib" ,zlib)
4697 ("perl" ,perl)
4698 ("samtools" ,samtools)
4699 ("htslib" ,htslib)
4700 ("boost" ,boost)))
4701 (home-page "https://bitbucket.org/berkeleylab/metabat")
4702 (synopsis
4703 "Reconstruction of single genomes from complex microbial communities")
4704 (description
4705 "Grouping large genomic fragments assembled from shotgun metagenomic
4706 sequences to deconvolute complex microbial communities, or metagenome binning,
4707 enables the study of individual organisms and their interactions. MetaBAT is
4708 an automated metagenome binning software, which integrates empirical
4709 probabilistic distances of genome abundance and tetranucleotide frequency.")
4710 ;; The source code contains inline assembly.
4711 (supported-systems '("x86_64-linux" "i686-linux"))
4712 (license (license:non-copyleft "file://license.txt"
4713 "See license.txt in the distribution."))))
4714
4715 (define-public minced
4716 (package
4717 (name "minced")
4718 (version "0.3.2")
4719 (source (origin
4720 (method git-fetch)
4721 (uri (git-reference
4722 (url "https://github.com/ctSkennerton/minced")
4723 (commit version)))
4724 (file-name (git-file-name name version))
4725 (sha256
4726 (base32
4727 "1f5h9him0gd355cnx7p6pnxpknhckd4g0v62mg8zyhfbx9as25fv"))))
4728 (build-system gnu-build-system)
4729 (arguments
4730 `(#:test-target "test"
4731 #:phases
4732 (modify-phases %standard-phases
4733 (delete 'configure)
4734 (add-before 'check 'fix-test
4735 (lambda _
4736 ;; Fix test for latest version.
4737 (substitute* "t/Aquifex_aeolicus_VF5.expected"
4738 (("minced:0.1.6") "minced:0.2.0"))
4739 #t))
4740 (replace 'install ; No install target.
4741 (lambda* (#:key inputs outputs #:allow-other-keys)
4742 (let* ((out (assoc-ref outputs "out"))
4743 (bin (string-append out "/bin"))
4744 (wrapper (string-append bin "/minced")))
4745 ;; Minced comes with a wrapper script that tries to figure out where
4746 ;; it is located before running the JAR. Since these paths are known
4747 ;; to us, we build our own wrapper to avoid coreutils dependency.
4748 (install-file "minced.jar" bin)
4749 (with-output-to-file wrapper
4750 (lambda _
4751 (display
4752 (string-append
4753 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
4754 (assoc-ref inputs "jre") "/bin/java -jar "
4755 bin "/minced.jar \"$@\"\n"))))
4756 (chmod wrapper #o555))
4757 #t)))))
4758 (native-inputs
4759 `(("jdk" ,icedtea "jdk")))
4760 (inputs
4761 `(("bash" ,bash)
4762 ("jre" ,icedtea "out")))
4763 (home-page "https://github.com/ctSkennerton/minced")
4764 (synopsis "Mining CRISPRs in Environmental Datasets")
4765 (description
4766 "MinCED is a program to find Clustered Regularly Interspaced Short
4767 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
4768 unassembled metagenomic reads, but is mainly designed for full genomes and
4769 assembled metagenomic sequence.")
4770 (license license:gpl3+)))
4771
4772 (define-public miso
4773 (package
4774 (name "miso")
4775 (version "0.5.4")
4776 (source (origin
4777 (method url-fetch)
4778 (uri (pypi-uri "misopy" version))
4779 (sha256
4780 (base32
4781 "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
4782 (modules '((guix build utils)))
4783 (snippet '(begin
4784 (substitute* "setup.py"
4785 ;; Use setuptools, or else the executables are not
4786 ;; installed.
4787 (("distutils.core") "setuptools")
4788 ;; Use "gcc" instead of "cc" for compilation.
4789 (("^defines")
4790 "cc.set_executables(
4791 compiler='gcc',
4792 compiler_so='gcc',
4793 linker_exe='gcc',
4794 linker_so='gcc -shared'); defines"))
4795 #t))))
4796 (build-system python-build-system)
4797 (arguments
4798 `(#:python ,python-2 ; only Python 2 is supported
4799 #:tests? #f)) ; no "test" target
4800 (inputs
4801 `(("samtools" ,samtools)
4802 ("python-numpy" ,python2-numpy)
4803 ("python-pysam" ,python2-pysam)
4804 ("python-scipy" ,python2-scipy)
4805 ("python-matplotlib" ,python2-matplotlib)))
4806 (native-inputs
4807 `(("python-mock" ,python2-mock) ; for tests
4808 ("python-pytz" ,python2-pytz))) ; for tests
4809 (home-page "https://www.genes.mit.edu/burgelab/miso/index.html")
4810 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
4811 (description
4812 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
4813 the expression level of alternatively spliced genes from RNA-Seq data, and
4814 identifies differentially regulated isoforms or exons across samples. By
4815 modeling the generative process by which reads are produced from isoforms in
4816 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
4817 that a read originated from a particular isoform.")
4818 (license license:gpl2)))
4819
4820 (define-public muscle
4821 (package
4822 (name "muscle")
4823 (version "3.8.1551")
4824 (source (origin
4825 (method url-fetch/tarbomb)
4826 (uri (string-append
4827 "http://www.drive5.com/muscle/muscle_src_"
4828 version ".tar.gz"))
4829 (sha256
4830 (base32
4831 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
4832 (build-system gnu-build-system)
4833 (arguments
4834 `(#:make-flags (list "LDLIBS = -lm")
4835 #:phases
4836 (modify-phases %standard-phases
4837 (delete 'configure)
4838 (replace 'check
4839 ;; There are no tests, so just test if it runs.
4840 (lambda _ (invoke "./muscle" "-version") #t))
4841 (replace 'install
4842 (lambda* (#:key outputs #:allow-other-keys)
4843 (let* ((out (assoc-ref outputs "out"))
4844 (bin (string-append out "/bin")))
4845 (install-file "muscle" bin)
4846 #t))))))
4847 (home-page "http://www.drive5.com/muscle")
4848 (synopsis "Multiple sequence alignment program")
4849 (description
4850 "MUSCLE aims to be a fast and accurate multiple sequence alignment
4851 program for nucleotide and protein sequences.")
4852 ;; License information found in 'muscle -h' and usage.cpp.
4853 (license license:public-domain)))
4854
4855 (define-public newick-utils
4856 ;; There are no recent releases so we package from git.
4857 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
4858 (package
4859 (name "newick-utils")
4860 (version (string-append "1.6-1." (string-take commit 8)))
4861 (source (origin
4862 (method git-fetch)
4863 (uri (git-reference
4864 (url "https://github.com/tjunier/newick_utils")
4865 (commit commit)))
4866 (file-name (string-append name "-" version "-checkout"))
4867 (sha256
4868 (base32
4869 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
4870 (build-system gnu-build-system)
4871 (inputs
4872 ;; XXX: TODO: Enable Lua and Guile bindings.
4873 ;; https://github.com/tjunier/newick_utils/issues/13
4874 `(("libxml2" ,libxml2)
4875 ("flex" ,flex)
4876 ("bison" ,bison)))
4877 (native-inputs
4878 `(("autoconf" ,autoconf)
4879 ("automake" ,automake)
4880 ("libtool" ,libtool)))
4881 (synopsis "Programs for working with newick format phylogenetic trees")
4882 (description
4883 "Newick-utils is a suite of utilities for processing phylogenetic trees
4884 in Newick format. Functions include re-rooting, extracting subtrees,
4885 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
4886 (home-page "https://github.com/tjunier/newick_utils")
4887 (license license:bsd-3))))
4888
4889 (define-public orfm
4890 (package
4891 (name "orfm")
4892 (version "0.7.1")
4893 (source (origin
4894 (method url-fetch)
4895 (uri (string-append
4896 "https://github.com/wwood/OrfM/releases/download/v"
4897 version "/orfm-" version ".tar.gz"))
4898 (sha256
4899 (base32
4900 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
4901 (build-system gnu-build-system)
4902 (inputs `(("zlib" ,zlib)))
4903 (native-inputs
4904 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
4905 ("ruby-rspec" ,ruby-rspec)
4906 ("ruby" ,ruby)))
4907 (synopsis "Simple and not slow open reading frame (ORF) caller")
4908 (description
4909 "An ORF caller finds stretches of DNA that, when translated, are not
4910 interrupted by stop codons. OrfM finds and prints these ORFs.")
4911 (home-page "https://github.com/wwood/OrfM")
4912 (license license:lgpl3+)))
4913
4914 (define-public python2-pbcore
4915 (package
4916 (name "python2-pbcore")
4917 (version "1.2.10")
4918 (source (origin
4919 (method url-fetch)
4920 (uri (pypi-uri "pbcore" version))
4921 (sha256
4922 (base32
4923 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
4924 (build-system python-build-system)
4925 (arguments
4926 `(#:python ,python-2 ;pbcore < 2.0 requires Python 2.7
4927 #:phases (modify-phases %standard-phases
4928 (add-after 'unpack 'remove-sphinx-dependency
4929 (lambda _
4930 ;; Sphinx is only required for documentation tests, which
4931 ;; we do not run; furthermore it depends on python2-sphinx
4932 ;; which is no longer maintained.
4933 (substitute* "requirements-dev.txt"
4934 (("^sphinx") ""))
4935 #t)))))
4936 (propagated-inputs
4937 `(("python-cython" ,python2-cython)
4938 ("python-numpy" ,python2-numpy)
4939 ("python-pysam" ,python2-pysam)
4940 ("python-h5py" ,python2-h5py)))
4941 (native-inputs
4942 `(("python-nose" ,python2-nose)
4943 ("python-pyxb" ,python2-pyxb)))
4944 (home-page "https://pacificbiosciences.github.io/pbcore/")
4945 (synopsis "Library for reading and writing PacBio data files")
4946 (description
4947 "The pbcore package provides Python APIs for interacting with PacBio data
4948 files and writing bioinformatics applications.")
4949 (license license:bsd-3)))
4950
4951 (define-public python2-warpedlmm
4952 (package
4953 (name "python2-warpedlmm")
4954 (version "0.21")
4955 (source
4956 (origin
4957 (method url-fetch)
4958 (uri (pypi-uri "WarpedLMM" version ".zip"))
4959 (sha256
4960 (base32
4961 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
4962 (build-system python-build-system)
4963 (arguments
4964 `(#:python ,python-2 ; requires Python 2.7
4965 #:tests? #f ; test data are not included
4966 #:phases
4967 (modify-phases %standard-phases
4968 (add-after 'unpack 'use-weave
4969 (lambda _
4970 (substitute* "warpedlmm/util/linalg.py"
4971 (("from scipy import linalg, weave")
4972 "from scipy import linalg\nimport weave"))
4973 #t)))))
4974 (propagated-inputs
4975 `(("python-scipy" ,python2-scipy)
4976 ("python-numpy" ,python2-numpy)
4977 ("python-matplotlib" ,python2-matplotlib)
4978 ("python-fastlmm" ,python2-fastlmm)
4979 ("python-pandas" ,python2-pandas)
4980 ("python-pysnptools" ,python2-pysnptools)
4981 ("python-weave" ,python2-weave)))
4982 (native-inputs
4983 `(("python-mock" ,python2-mock)
4984 ("python-nose" ,python2-nose)
4985 ("unzip" ,unzip)))
4986 (home-page "https://github.com/PMBio/warpedLMM")
4987 (synopsis "Implementation of warped linear mixed models")
4988 (description
4989 "WarpedLMM is a Python implementation of the warped linear mixed model,
4990 which automatically learns an optimal warping function (or transformation) for
4991 the phenotype as it models the data.")
4992 (license license:asl2.0)))
4993
4994 (define-public pbtranscript-tofu
4995 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
4996 (package
4997 (name "pbtranscript-tofu")
4998 (version (string-append "2.2.3." (string-take commit 7)))
4999 (source (origin
5000 (method git-fetch)
5001 (uri (git-reference
5002 (url "https://github.com/PacificBiosciences/cDNA_primer")
5003 (commit commit)))
5004 (file-name (string-append name "-" version "-checkout"))
5005 (sha256
5006 (base32
5007 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
5008 (modules '((guix build utils)))
5009 (snippet
5010 '(begin
5011 ;; remove bundled Cython sources
5012 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
5013 #t))))
5014 (build-system python-build-system)
5015 (arguments
5016 `(#:python ,python-2
5017 ;; FIXME: Tests fail with "No such file or directory:
5018 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
5019 #:tests? #f
5020 #:phases
5021 (modify-phases %standard-phases
5022 (add-after 'unpack 'enter-directory
5023 (lambda _
5024 (chdir "pbtranscript-tofu/pbtranscript/")
5025 #t))
5026 ;; With setuptools version 18.0 and later this setup.py hack causes
5027 ;; a build error, so we disable it.
5028 (add-after 'enter-directory 'patch-setuppy
5029 (lambda _
5030 (substitute* "setup.py"
5031 (("if 'setuptools.extension' in sys.modules:")
5032 "if False:"))
5033 #t)))))
5034 (inputs
5035 `(("python-numpy" ,python2-numpy)
5036 ("python-bx-python" ,python2-bx-python)
5037 ("python-networkx" ,python2-networkx)
5038 ("python-scipy" ,python2-scipy)
5039 ("python-pbcore" ,python2-pbcore)
5040 ("python-h5py" ,python2-h5py)))
5041 (native-inputs
5042 `(("python-cython" ,python2-cython)
5043 ("python-nose" ,python2-nose)))
5044 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
5045 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
5046 (description
5047 "pbtranscript-tofu contains scripts to analyze transcriptome data
5048 generated using the PacBio Iso-Seq protocol.")
5049 (license license:bsd-3))))
5050
5051 (define-public prank
5052 (package
5053 (name "prank")
5054 (version "170427")
5055 (source (origin
5056 (method url-fetch)
5057 (uri (string-append
5058 "http://wasabiapp.org/download/prank/prank.source."
5059 version ".tgz"))
5060 (sha256
5061 (base32
5062 "0nc8g9c5rkdxcir46s0in9ci1sxwzbjibxrvkksf22ybnplvagk2"))))
5063 (build-system gnu-build-system)
5064 (arguments
5065 `(#:phases
5066 (modify-phases %standard-phases
5067 (add-after 'unpack 'enter-src-dir
5068 (lambda _
5069 (chdir "src")
5070 #t))
5071 (add-after 'unpack 'remove-m64-flag
5072 ;; Prank will build with the correct 'bit-ness' without this flag
5073 ;; and this allows building on 32-bit machines.
5074 (lambda _ (substitute* "src/Makefile"
5075 (("-m64") ""))
5076 #t))
5077 (delete 'configure)
5078 (replace 'install
5079 (lambda* (#:key outputs #:allow-other-keys)
5080 (let* ((out (assoc-ref outputs "out"))
5081 (bin (string-append out "/bin"))
5082 (man (string-append out "/share/man/man1"))
5083 (path (string-append
5084 (assoc-ref %build-inputs "mafft") "/bin:"
5085 (assoc-ref %build-inputs "exonerate") "/bin:"
5086 (assoc-ref %build-inputs "bppsuite") "/bin")))
5087 (install-file "prank" bin)
5088 (wrap-program (string-append bin "/prank")
5089 `("PATH" ":" prefix (,path)))
5090 (install-file "prank.1" man))
5091 #t)))))
5092 (inputs
5093 `(("mafft" ,mafft)
5094 ("exonerate" ,exonerate)
5095 ("bppsuite" ,bppsuite)))
5096 (home-page "http://wasabiapp.org/software/prank/")
5097 (synopsis "Probabilistic multiple sequence alignment program")
5098 (description
5099 "PRANK is a probabilistic multiple sequence alignment program for DNA,
5100 codon and amino-acid sequences. It is based on a novel algorithm that treats
5101 insertions correctly and avoids over-estimation of the number of deletion
5102 events. In addition, PRANK borrows ideas from maximum likelihood methods used
5103 in phylogenetics and correctly takes into account the evolutionary distances
5104 between sequences. Lastly, PRANK allows for defining a potential structure
5105 for sequences to be aligned and then, simultaneously with the alignment,
5106 predicts the locations of structural units in the sequences.")
5107 (license license:gpl2+)))
5108
5109 (define-public proteinortho
5110 (package
5111 (name "proteinortho")
5112 (version "6.0.14")
5113 (source (origin
5114 (method git-fetch)
5115 (uri (git-reference
5116 (url "https://gitlab.com/paulklemm_PHD/proteinortho.git")
5117 (commit (string-append "v" version))))
5118 (file-name (git-file-name name version))
5119 (sha256
5120 (base32
5121 "0pmy617zy2z2w6hjqxjhf3rzikf5n3mpia80ysq8233vfr7wrzff"))
5122 (modules '((guix build utils)))
5123 (snippet
5124 '(begin
5125 ;; remove pre-built scripts
5126 (delete-file-recursively "src/BUILD/")
5127 #t))))
5128 (build-system gnu-build-system)
5129 (arguments
5130 `(#:test-target "test"
5131 #:make-flags '("CC=gcc")
5132 #:phases
5133 (modify-phases %standard-phases
5134 (replace 'configure
5135 ;; There is no configure script, so we modify the Makefile directly.
5136 (lambda* (#:key outputs #:allow-other-keys)
5137 (substitute* "Makefile"
5138 (("INSTALLDIR=.*")
5139 (string-append
5140 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
5141 #t))
5142 (add-before 'install 'make-install-directory
5143 ;; The install directory is not created during 'make install'.
5144 (lambda* (#:key outputs #:allow-other-keys)
5145 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
5146 #t))
5147 (add-after 'install 'wrap-programs
5148 (lambda* (#:key inputs outputs #:allow-other-keys)
5149 (let ((path (getenv "PATH"))
5150 (out (assoc-ref outputs "out")))
5151 (for-each (lambda (script)
5152 (wrap-script script `("PATH" ":" prefix (,path))))
5153 (cons (string-append out "/bin/proteinortho")
5154 (find-files out "\\.(pl|py)$"))))
5155 #t)))))
5156 (inputs
5157 `(("guile" ,guile-3.0) ; for wrap-script
5158 ("diamond" ,diamond)
5159 ("perl" ,perl)
5160 ("python" ,python-wrapper)
5161 ("blast+" ,blast+)
5162 ("lapack" ,lapack)
5163 ("openblas" ,openblas)))
5164 (native-inputs
5165 `(("which" ,which)))
5166 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
5167 (synopsis "Detect orthologous genes across species")
5168 (description
5169 "Proteinortho is a tool to detect orthologous genes across different
5170 species. For doing so, it compares similarities of given gene sequences and
5171 clusters them to find significant groups. The algorithm was designed to handle
5172 large-scale data and can be applied to hundreds of species at once.")
5173 (license license:gpl3+)))
5174
5175 (define-public pyicoteo
5176 (package
5177 (name "pyicoteo")
5178 (version "2.0.7")
5179 (source
5180 (origin
5181 (method git-fetch)
5182 (uri (git-reference
5183 (url "https://bitbucket.org/regulatorygenomicsupf/pyicoteo.git")
5184 (commit (string-append "v" version))))
5185 (file-name (git-file-name name version))
5186 (sha256
5187 (base32
5188 "0hz5g8d25lbjy1wpscr490l0lmyvaix893hhax4fxnh1h9w34w8p"))))
5189 (build-system python-build-system)
5190 (arguments
5191 `(#:python ,python-2 ; does not work with Python 3
5192 #:tests? #f)) ; there are no tests
5193 (inputs
5194 `(("python2-matplotlib" ,python2-matplotlib)))
5195 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
5196 (synopsis "Analyze high-throughput genetic sequencing data")
5197 (description
5198 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
5199 sequencing data. It works with genomic coordinates. There are currently six
5200 different command-line tools:
5201
5202 @enumerate
5203 @item pyicoregion: for generating exploratory regions automatically;
5204 @item pyicoenrich: for differential enrichment between two conditions;
5205 @item pyicoclip: for calling CLIP-Seq peaks without a control;
5206 @item pyicos: for genomic coordinates manipulation;
5207 @item pyicoller: for peak calling on punctuated ChIP-Seq;
5208 @item pyicount: to count how many reads from N experiment files overlap in a
5209 region file;
5210 @item pyicotrocol: to combine operations from pyicoteo.
5211 @end enumerate\n")
5212 (license license:gpl3+)))
5213
5214 (define-public prodigal
5215 (package
5216 (name "prodigal")
5217 ;; Check for a new home page when updating this package:
5218 ;; https://github.com/hyattpd/Prodigal/issues/36#issuecomment-536617588
5219 (version "2.6.3")
5220 (source (origin
5221 (method git-fetch)
5222 (uri (git-reference
5223 (url "https://github.com/hyattpd/Prodigal")
5224 (commit (string-append "v" version))))
5225 (file-name (git-file-name name version))
5226 (sha256
5227 (base32
5228 "1fs1hqk83qjbjhrvhw6ni75zakx5ki1ayy3v6wwkn3xvahc9hi5s"))))
5229 (build-system gnu-build-system)
5230 (arguments
5231 `(#:tests? #f ; no check target
5232 #:make-flags (list (string-append "INSTALLDIR="
5233 (assoc-ref %outputs "out")
5234 "/bin"))
5235 #:phases
5236 (modify-phases %standard-phases
5237 (delete 'configure))))
5238 (home-page "https://github.com/hyattpd/Prodigal")
5239 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
5240 (description
5241 "Prodigal runs smoothly on finished genomes, draft genomes, and
5242 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
5243 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
5244 partial genes, and identifies translation initiation sites.")
5245 (license license:gpl3+)))
5246
5247 (define-public roary
5248 (package
5249 (name "roary")
5250 (version "3.12.0")
5251 (source
5252 (origin
5253 (method url-fetch)
5254 (uri (string-append
5255 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
5256 version ".tar.gz"))
5257 (sha256
5258 (base32
5259 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
5260 (build-system perl-build-system)
5261 (arguments
5262 `(#:phases
5263 (modify-phases %standard-phases
5264 (delete 'configure)
5265 (delete 'build)
5266 (replace 'check
5267 (lambda _
5268 ;; The tests are not run by default, so we run each test file
5269 ;; directly.
5270 (setenv "PATH" (string-append (getcwd) "/bin" ":"
5271 (getenv "PATH")))
5272 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
5273 (getenv "PERL5LIB")))
5274 (for-each (lambda (file)
5275 (display file)(display "\n")
5276 (invoke "perl" file))
5277 (find-files "t" ".*\\.t$"))
5278 #t))
5279 (replace 'install
5280 ;; There is no 'install' target in the Makefile.
5281 (lambda* (#:key outputs #:allow-other-keys)
5282 (let* ((out (assoc-ref outputs "out"))
5283 (bin (string-append out "/bin"))
5284 (perl (string-append out "/lib/perl5/site_perl"))
5285 (roary-plots "contrib/roary_plots"))
5286 (mkdir-p bin)
5287 (mkdir-p perl)
5288 (copy-recursively "bin" bin)
5289 (copy-recursively "lib" perl)
5290 #t)))
5291 (add-after 'install 'wrap-programs
5292 (lambda* (#:key inputs outputs #:allow-other-keys)
5293 (let* ((out (assoc-ref outputs "out"))
5294 (perl5lib (getenv "PERL5LIB"))
5295 (path (getenv "PATH")))
5296 (for-each (lambda (prog)
5297 (let ((binary (string-append out "/" prog)))
5298 (wrap-program binary
5299 `("PERL5LIB" ":" prefix
5300 (,(string-append perl5lib ":" out
5301 "/lib/perl5/site_perl"))))
5302 (wrap-program binary
5303 `("PATH" ":" prefix
5304 (,(string-append path ":" out "/bin"))))))
5305 (find-files "bin" ".*[^R]$"))
5306 (let ((file
5307 (string-append out "/bin/roary-create_pan_genome_plots.R"))
5308 (r-site-lib (getenv "R_LIBS_SITE"))
5309 (coreutils-path
5310 (string-append (assoc-ref inputs "coreutils") "/bin")))
5311 (wrap-program file
5312 `("R_LIBS_SITE" ":" prefix
5313 (,(string-append r-site-lib ":" out "/site-library/"))))
5314 (wrap-program file
5315 `("PATH" ":" prefix
5316 (,(string-append coreutils-path ":" out "/bin"))))))
5317 #t)))))
5318 (native-inputs
5319 `(("perl-env-path" ,perl-env-path)
5320 ("perl-test-files" ,perl-test-files)
5321 ("perl-test-most" ,perl-test-most)
5322 ("perl-test-output" ,perl-test-output)))
5323 (inputs
5324 `(("perl-array-utils" ,perl-array-utils)
5325 ("bioperl" ,bioperl-minimal)
5326 ("perl-digest-md5-file" ,perl-digest-md5-file)
5327 ("perl-exception-class" ,perl-exception-class)
5328 ("perl-file-find-rule" ,perl-file-find-rule)
5329 ("perl-file-grep" ,perl-file-grep)
5330 ("perl-file-slurper" ,perl-file-slurper)
5331 ("perl-file-which" ,perl-file-which)
5332 ("perl-graph" ,perl-graph)
5333 ("perl-graph-readwrite" ,perl-graph-readwrite)
5334 ("perl-log-log4perl" ,perl-log-log4perl)
5335 ("perl-moose" ,perl-moose)
5336 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
5337 ("perl-text-csv" ,perl-text-csv)
5338 ("bedtools" ,bedtools)
5339 ("cd-hit" ,cd-hit)
5340 ("blast+" ,blast+)
5341 ("mcl" ,mcl)
5342 ("parallel" ,parallel)
5343 ("prank" ,prank)
5344 ("mafft" ,mafft)
5345 ("fasttree" ,fasttree)
5346 ("grep" ,grep)
5347 ("sed" ,sed)
5348 ("gawk" ,gawk)
5349 ("r-minimal" ,r-minimal)
5350 ("r-ggplot2" ,r-ggplot2)
5351 ("coreutils" ,coreutils)))
5352 (home-page "https://sanger-pathogens.github.io/Roary/")
5353 (synopsis "High speed stand-alone pan genome pipeline")
5354 (description
5355 "Roary is a high speed stand alone pan genome pipeline, which takes
5356 annotated assemblies in GFF3 format (produced by the Prokka program) and
5357 calculates the pan genome. Using a standard desktop PC, it can analyse
5358 datasets with thousands of samples, without compromising the quality of the
5359 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
5360 single processor. Roary is not intended for metagenomics or for comparing
5361 extremely diverse sets of genomes.")
5362 (license license:gpl3)))
5363
5364 (define-public raxml
5365 (package
5366 (name "raxml")
5367 (version "8.2.12")
5368 (source
5369 (origin
5370 (method git-fetch)
5371 (uri (git-reference
5372 (url "https://github.com/stamatak/standard-RAxML")
5373 (commit (string-append "v" version))))
5374 (file-name (git-file-name name version))
5375 (sha256
5376 (base32
5377 "1jqjzhch0rips0vp04prvb8vmc20c5pdmsqn8knadcf91yy859fh"))))
5378 (build-system gnu-build-system)
5379 (arguments
5380 `(#:tests? #f ; There are no tests.
5381 ;; Use 'standard' Makefile rather than SSE or AVX ones.
5382 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
5383 #:phases
5384 (modify-phases %standard-phases
5385 (delete 'configure)
5386 (replace 'install
5387 (lambda* (#:key outputs #:allow-other-keys)
5388 (let* ((out (assoc-ref outputs "out"))
5389 (bin (string-append out "/bin"))
5390 (executable "raxmlHPC-HYBRID"))
5391 (install-file executable bin)
5392 (symlink (string-append bin "/" executable) "raxml"))
5393 #t)))))
5394 (inputs
5395 `(("openmpi" ,openmpi)))
5396 (home-page "https://cme.h-its.org/exelixis/web/software/raxml/index.html")
5397 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
5398 (description
5399 "RAxML is a tool for phylogenetic analysis and post-analysis of large
5400 phylogenies.")
5401 ;; The source includes x86 specific code
5402 (supported-systems '("x86_64-linux" "i686-linux"))
5403 (license license:gpl2+)))
5404
5405 (define-public rsem
5406 (package
5407 (name "rsem")
5408 (version "1.3.1")
5409 (source
5410 (origin
5411 (method git-fetch)
5412 (uri (git-reference
5413 (url "https://github.com/deweylab/RSEM")
5414 (commit (string-append "v" version))))
5415 (sha256
5416 (base32 "1jlq11d1p8qp64w75yj8cnbbd1a93viq10pzsbwal7vdn8fg13j1"))
5417 (file-name (git-file-name name version))
5418 (modules '((guix build utils)))
5419 (snippet
5420 '(begin
5421 ;; remove bundled copy of boost and samtools
5422 (delete-file-recursively "boost")
5423 (delete-file-recursively "samtools-1.3")
5424 #t))))
5425 (build-system gnu-build-system)
5426 (arguments
5427 `(#:tests? #f ;no "check" target
5428 #:make-flags
5429 (list (string-append "BOOST="
5430 (assoc-ref %build-inputs "boost")
5431 "/include/")
5432 (string-append "SAMHEADERS="
5433 (assoc-ref %build-inputs "htslib")
5434 "/include/htslib/sam.h")
5435 (string-append "SAMLIBS="
5436 (assoc-ref %build-inputs "htslib")
5437 "/lib/libhts.a"))
5438 #:phases
5439 (modify-phases %standard-phases
5440 ;; No "configure" script.
5441 ;; Do not build bundled samtools library.
5442 (replace 'configure
5443 (lambda _
5444 (substitute* "Makefile"
5445 (("^all : \\$\\(PROGRAMS\\).*") "all: $(PROGRAMS)\n")
5446 (("^\\$\\(SAMLIBS\\).*") ""))
5447 #t))
5448 (replace 'install
5449 (lambda* (#:key outputs #:allow-other-keys)
5450 (let* ((out (string-append (assoc-ref outputs "out")))
5451 (bin (string-append out "/bin/"))
5452 (perl (string-append out "/lib/perl5/site_perl")))
5453 (mkdir-p bin)
5454 (mkdir-p perl)
5455 (for-each (lambda (file)
5456 (install-file file bin))
5457 (find-files "." "rsem-.*"))
5458 (install-file "rsem_perl_utils.pm" perl))
5459 #t))
5460 (add-after 'install 'wrap-program
5461 (lambda* (#:key outputs #:allow-other-keys)
5462 (let ((out (assoc-ref outputs "out")))
5463 (for-each (lambda (prog)
5464 (wrap-program (string-append out "/bin/" prog)
5465 `("PERL5LIB" ":" prefix
5466 (,(string-append out "/lib/perl5/site_perl")))))
5467 '("rsem-calculate-expression"
5468 "rsem-control-fdr"
5469 "rsem-generate-data-matrix"
5470 "rsem-generate-ngvector"
5471 "rsem-plot-transcript-wiggles"
5472 "rsem-prepare-reference"
5473 "rsem-run-ebseq"
5474 "rsem-run-prsem-testing-procedure")))
5475 #t)))))
5476 (inputs
5477 `(("boost" ,boost)
5478 ("r-minimal" ,r-minimal)
5479 ("perl" ,perl)
5480 ("htslib" ,htslib-1.3)
5481 ("zlib" ,zlib)))
5482 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
5483 (synopsis "Estimate gene expression levels from RNA-Seq data")
5484 (description
5485 "RSEM is a software package for estimating gene and isoform expression
5486 levels from RNA-Seq data. The RSEM package provides a user-friendly
5487 interface, supports threads for parallel computation of the EM algorithm,
5488 single-end and paired-end read data, quality scores, variable-length reads and
5489 RSPD estimation. In addition, it provides posterior mean and 95% credibility
5490 interval estimates for expression levels. For visualization, it can generate
5491 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
5492 (license license:gpl3+)))
5493
5494 (define-public rseqc
5495 (package
5496 (name "rseqc")
5497 (version "3.0.1")
5498 (source
5499 (origin
5500 (method url-fetch)
5501 (uri
5502 (string-append "mirror://sourceforge/rseqc/"
5503 "RSeQC-" version ".tar.gz"))
5504 (sha256
5505 (base32
5506 "0gbb9iyb7swiv5455fm5rg98r7l6qn27v564yllqjd574hncpx6m"))))
5507 (build-system python-build-system)
5508 (inputs
5509 `(("python-cython" ,python-cython)
5510 ("python-bx-python" ,python-bx-python)
5511 ("python-pybigwig" ,python-pybigwig)
5512 ("python-pysam" ,python-pysam)
5513 ("python-numpy" ,python-numpy)
5514 ("zlib" ,zlib)))
5515 (native-inputs
5516 `(("python-nose" ,python-nose)))
5517 (home-page "http://rseqc.sourceforge.net/")
5518 (synopsis "RNA-seq quality control package")
5519 (description
5520 "RSeQC provides a number of modules that can comprehensively evaluate
5521 high throughput sequence data, especially RNA-seq data. Some basic modules
5522 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
5523 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
5524 distribution, coverage uniformity, strand specificity, etc.")
5525 (license license:gpl3+)))
5526
5527 (define-public seek
5528 ;; There are no release tarballs. According to the installation
5529 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
5530 ;; stable release is identified by this changeset ID.
5531 (let ((changeset "2329130")
5532 (revision "1"))
5533 (package
5534 (name "seek")
5535 (version (string-append "0-" revision "." changeset))
5536 (source (origin
5537 (method hg-fetch)
5538 (uri (hg-reference
5539 (url "https://bitbucket.org/libsleipnir/sleipnir")
5540 (changeset changeset)))
5541 (file-name (string-append name "-" version "-checkout"))
5542 (sha256
5543 (base32
5544 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
5545 (build-system gnu-build-system)
5546 (arguments
5547 `(#:modules ((srfi srfi-1)
5548 (guix build gnu-build-system)
5549 (guix build utils))
5550 #:phases
5551 (let ((dirs '("SeekMiner"
5552 "SeekEvaluator"
5553 "SeekPrep"
5554 "Distancer"
5555 "Data2DB"
5556 "PCL2Bin")))
5557 (modify-phases %standard-phases
5558 (replace 'bootstrap
5559 (lambda _
5560 (substitute* "gen_tools_am"
5561 (("/usr/bin/env.*") (which "perl")))
5562 (invoke "bash" "gen_auto")
5563 #t))
5564 (add-after 'build 'build-additional-tools
5565 (lambda* (#:key make-flags #:allow-other-keys)
5566 (for-each (lambda (dir)
5567 (with-directory-excursion (string-append "tools/" dir)
5568 (apply invoke "make" make-flags)))
5569 dirs)
5570 #t))
5571 (add-after 'install 'install-additional-tools
5572 (lambda* (#:key make-flags #:allow-other-keys)
5573 (for-each (lambda (dir)
5574 (with-directory-excursion (string-append "tools/" dir)
5575 (apply invoke `("make" ,@make-flags "install"))))
5576 dirs)
5577 #t))))))
5578 (inputs
5579 `(("gsl" ,gsl)
5580 ("boost" ,boost)
5581 ("libsvm" ,libsvm)
5582 ("readline" ,readline)
5583 ("gengetopt" ,gengetopt)
5584 ("log4cpp" ,log4cpp)))
5585 (native-inputs
5586 `(("autoconf" ,autoconf)
5587 ("automake" ,automake)
5588 ("perl" ,perl)))
5589 (home-page "http://seek.princeton.edu")
5590 (synopsis "Gene co-expression search engine")
5591 (description
5592 "SEEK is a computational gene co-expression search engine. SEEK provides
5593 biologists with a way to navigate the massive human expression compendium that
5594 now contains thousands of expression datasets. SEEK returns a robust ranking
5595 of co-expressed genes in the biological area of interest defined by the user's
5596 query genes. It also prioritizes thousands of expression datasets according
5597 to the user's query of interest.")
5598 (license license:cc-by3.0))))
5599
5600 (define-public samtools
5601 (package
5602 (name "samtools")
5603 (version "1.9")
5604 (source
5605 (origin
5606 (method url-fetch)
5607 (uri
5608 (string-append "mirror://sourceforge/samtools/samtools/"
5609 version "/samtools-" version ".tar.bz2"))
5610 (sha256
5611 (base32
5612 "10ilqbmm7ri8z431sn90lvbjwizd0hhkf9rcqw8j823hf26nhgq8"))
5613 (modules '((guix build utils)))
5614 (snippet '(begin
5615 ;; Delete bundled htslib.
5616 (delete-file-recursively "htslib-1.9")
5617 #t))))
5618 (build-system gnu-build-system)
5619 (arguments
5620 `(#:modules ((ice-9 ftw)
5621 (ice-9 regex)
5622 (guix build gnu-build-system)
5623 (guix build utils))
5624 #:configure-flags (list "--with-ncurses")
5625 #:phases
5626 (modify-phases %standard-phases
5627 (add-after 'unpack 'patch-tests
5628 (lambda _
5629 (substitute* "test/test.pl"
5630 ;; The test script calls out to /bin/bash
5631 (("/bin/bash") (which "bash")))
5632 #t))
5633 (add-after 'install 'install-library
5634 (lambda* (#:key outputs #:allow-other-keys)
5635 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
5636 (install-file "libbam.a" lib)
5637 #t)))
5638 (add-after 'install 'install-headers
5639 (lambda* (#:key outputs #:allow-other-keys)
5640 (let ((include (string-append (assoc-ref outputs "out")
5641 "/include/samtools/")))
5642 (for-each (lambda (file)
5643 (install-file file include))
5644 (scandir "." (lambda (name) (string-match "\\.h$" name))))
5645 #t))))))
5646 (native-inputs `(("pkg-config" ,pkg-config)))
5647 (inputs
5648 `(("htslib" ,htslib)
5649 ("ncurses" ,ncurses)
5650 ("perl" ,perl)
5651 ("python" ,python)
5652 ("zlib" ,zlib)))
5653 (home-page "http://samtools.sourceforge.net")
5654 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
5655 (description
5656 "Samtools implements various utilities for post-processing nucleotide
5657 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
5658 variant calling (in conjunction with bcftools), and a simple alignment
5659 viewer.")
5660 (license license:expat)))
5661
5662 (define-public samtools-0.1
5663 ;; This is the most recent version of the 0.1 line of samtools. The input
5664 ;; and output formats differ greatly from that used and produced by samtools
5665 ;; 1.x and is still used in many bioinformatics pipelines.
5666 (package (inherit samtools)
5667 (version "0.1.19")
5668 (source
5669 (origin
5670 (method url-fetch)
5671 (uri
5672 (string-append "mirror://sourceforge/samtools/samtools/"
5673 version "/samtools-" version ".tar.bz2"))
5674 (sha256
5675 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
5676 (arguments
5677 `(#:tests? #f ;no "check" target
5678 #:make-flags
5679 (list "LIBCURSES=-lncurses")
5680 ,@(substitute-keyword-arguments (package-arguments samtools)
5681 ((#:phases phases)
5682 `(modify-phases ,phases
5683 (replace 'install
5684 (lambda* (#:key outputs #:allow-other-keys)
5685 (let ((bin (string-append
5686 (assoc-ref outputs "out") "/bin")))
5687 (mkdir-p bin)
5688 (install-file "samtools" bin)
5689 #t)))
5690 (delete 'patch-tests)
5691 (delete 'configure))))))))
5692
5693 (define-public mosaik
5694 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
5695 (package
5696 (name "mosaik")
5697 (version "2.2.30")
5698 (source (origin
5699 ;; There are no release tarballs nor tags.
5700 (method git-fetch)
5701 (uri (git-reference
5702 (url "https://github.com/wanpinglee/MOSAIK")
5703 (commit commit)))
5704 (file-name (string-append name "-" version))
5705 (sha256
5706 (base32
5707 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
5708 (build-system gnu-build-system)
5709 (arguments
5710 `(#:tests? #f ; no tests
5711 #:make-flags (list "CC=gcc")
5712 #:phases
5713 (modify-phases %standard-phases
5714 (replace 'configure
5715 (lambda _ (chdir "src") #t))
5716 (replace 'install
5717 (lambda* (#:key outputs #:allow-other-keys)
5718 (let ((bin (string-append (assoc-ref outputs "out")
5719 "/bin")))
5720 (mkdir-p bin)
5721 (copy-recursively "../bin" bin)
5722 #t))))))
5723 (inputs
5724 `(("perl" ,perl)
5725 ("zlib:static" ,zlib "static")
5726 ("zlib" ,zlib)))
5727 (supported-systems '("x86_64-linux"))
5728 (home-page "https://github.com/wanpinglee/MOSAIK")
5729 (synopsis "Map nucleotide sequence reads to reference genomes")
5730 (description
5731 "MOSAIK is a program for mapping second and third-generation sequencing
5732 reads to a reference genome. MOSAIK can align reads generated by all the
5733 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
5734 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
5735 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
5736 ;; code released into the public domain:
5737 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
5738 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
5739 (license (list license:gpl2+ license:public-domain)))))
5740
5741 (define-public ngs-sdk
5742 (package
5743 (name "ngs-sdk")
5744 (version "2.10.5")
5745 (source (origin
5746 (method git-fetch)
5747 (uri (git-reference
5748 (url "https://github.com/ncbi/ngs")
5749 (commit version)))
5750 (file-name (git-file-name name version))
5751 (sha256
5752 (base32
5753 "1ix51c25hjn57w93qmwzw80xh2i34wx8j2hn7szh8p6w8i3az5qa"))))
5754 (build-system gnu-build-system)
5755 (arguments
5756 `(#:parallel-build? #f ; not supported
5757 #:tests? #f ; no "check" target
5758 #:phases
5759 (modify-phases %standard-phases
5760 (replace 'configure
5761 (lambda* (#:key outputs #:allow-other-keys)
5762 (let ((out (assoc-ref outputs "out")))
5763 ;; Allow 'konfigure.perl' to find 'package.prl'.
5764 (setenv "PERL5LIB"
5765 (string-append ".:" (getenv "PERL5LIB")))
5766
5767 ;; The 'configure' script doesn't recognize things like
5768 ;; '--enable-fast-install'.
5769 (invoke "./configure"
5770 (string-append "--build-prefix=" (getcwd) "/build")
5771 (string-append "--prefix=" out))
5772 #t)))
5773 (add-after 'unpack 'enter-dir
5774 (lambda _ (chdir "ngs-sdk") #t)))))
5775 (native-inputs `(("perl" ,perl)))
5776 ;; According to the test
5777 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
5778 ;; in ngs-sdk/setup/konfigure.perl
5779 (supported-systems '("i686-linux" "x86_64-linux"))
5780 (home-page "https://github.com/ncbi/ngs")
5781 (synopsis "API for accessing Next Generation Sequencing data")
5782 (description
5783 "NGS is a domain-specific API for accessing reads, alignments and pileups
5784 produced from Next Generation Sequencing. The API itself is independent from
5785 any particular back-end implementation, and supports use of multiple back-ends
5786 simultaneously.")
5787 (license license:public-domain)))
5788
5789 (define-public java-ngs
5790 (package (inherit ngs-sdk)
5791 (name "java-ngs")
5792 (arguments
5793 `(,@(substitute-keyword-arguments
5794 `(#:modules ((guix build gnu-build-system)
5795 (guix build utils)
5796 (srfi srfi-1)
5797 (srfi srfi-26))
5798 ,@(package-arguments ngs-sdk))
5799 ((#:phases phases)
5800 `(modify-phases ,phases
5801 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
5802 (inputs
5803 `(("jdk" ,icedtea "jdk")
5804 ("ngs-sdk" ,ngs-sdk)))
5805 (synopsis "Java bindings for NGS SDK")))
5806
5807 (define-public ncbi-vdb
5808 (package
5809 (name "ncbi-vdb")
5810 (version "2.10.6")
5811 (source (origin
5812 (method git-fetch)
5813 (uri (git-reference
5814 (url "https://github.com/ncbi/ncbi-vdb")
5815 (commit version)))
5816 (file-name (git-file-name name version))
5817 (sha256
5818 (base32
5819 "0m8hlxscidsfqm9x9fyi62q6lpf1dv5115kgjjgnrkl49q9c27m6"))))
5820 (build-system gnu-build-system)
5821 (arguments
5822 `(#:parallel-build? #f ; not supported
5823 #:tests? #f ; no "check" target
5824 #:make-flags '("HAVE_HDF5=1")
5825 #:phases
5826 (modify-phases %standard-phases
5827 (add-after 'unpack 'make-files-writable
5828 (lambda _ (for-each make-file-writable (find-files "." ".*")) #t))
5829 (add-before 'configure 'set-perl-search-path
5830 (lambda _
5831 ;; Work around "dotless @INC" build failure.
5832 (setenv "PERL5LIB"
5833 (string-append (getcwd) "/setup:"
5834 (getenv "PERL5LIB")))
5835 #t))
5836 ;; See https://github.com/ncbi/ncbi-vdb/issues/14
5837 (add-after 'unpack 'patch-krypto-flags
5838 (lambda _
5839 (substitute* "libs/krypto/Makefile"
5840 (("-Wa,-march=generic64\\+aes") "")
5841 (("-Wa,-march=generic64\\+sse4") ""))
5842 #t))
5843 (replace 'configure
5844 (lambda* (#:key inputs outputs #:allow-other-keys)
5845 (let ((out (assoc-ref outputs "out")))
5846 ;; Override include path for libmagic
5847 (substitute* "setup/package.prl"
5848 (("name => 'magic', Include => '/usr/include'")
5849 (string-append "name=> 'magic', Include => '"
5850 (assoc-ref inputs "libmagic")
5851 "/include" "'")))
5852
5853 ;; Install kdf5 library (needed by sra-tools)
5854 (substitute* "build/Makefile.install"
5855 (("LIBRARIES_TO_INSTALL =")
5856 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
5857
5858 (substitute* "build/Makefile.env"
5859 (("CFLAGS =" prefix)
5860 (string-append prefix "-msse2 ")))
5861
5862 ;; Override search path for ngs-java
5863 (substitute* "setup/package.prl"
5864 (("/usr/local/ngs/ngs-java")
5865 (assoc-ref inputs "java-ngs")))
5866
5867 ;; The 'configure' script doesn't recognize things like
5868 ;; '--enable-fast-install'.
5869 (invoke "./configure"
5870 (string-append "--build-prefix=" (getcwd) "/build")
5871 (string-append "--prefix=" (assoc-ref outputs "out"))
5872 (string-append "--debug")
5873 (string-append "--with-xml2-prefix="
5874 (assoc-ref inputs "libxml2"))
5875 (string-append "--with-ngs-sdk-prefix="
5876 (assoc-ref inputs "ngs-sdk"))
5877 (string-append "--with-hdf5-prefix="
5878 (assoc-ref inputs "hdf5")))
5879 #t)))
5880 (add-after 'install 'install-interfaces
5881 (lambda* (#:key outputs #:allow-other-keys)
5882 ;; Install interface libraries. On i686 the interface libraries
5883 ;; are installed to "linux/gcc/i386", so we need to use the Linux
5884 ;; architecture name ("i386") instead of the target system prefix
5885 ;; ("i686").
5886 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
5887 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
5888 ,(system->linux-architecture
5889 (or (%current-target-system)
5890 (%current-system)))
5891 "/rel/ilib")
5892 (string-append (assoc-ref outputs "out")
5893 "/ilib"))
5894 ;; Install interface headers
5895 (copy-recursively "interfaces"
5896 (string-append (assoc-ref outputs "out")
5897 "/include"))
5898 #t))
5899 ;; These files are needed by sra-tools.
5900 (add-after 'install 'install-configuration-files
5901 (lambda* (#:key outputs #:allow-other-keys)
5902 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
5903 (mkdir target)
5904 (install-file "libs/kfg/default.kfg" target)
5905 (install-file "libs/kfg/certs.kfg" target))
5906 #t)))))
5907 (inputs
5908 `(("libxml2" ,libxml2)
5909 ("ngs-sdk" ,ngs-sdk)
5910 ("java-ngs" ,java-ngs)
5911 ("libmagic" ,file)
5912 ("hdf5" ,hdf5)))
5913 (native-inputs `(("perl" ,perl)))
5914 ;; NCBI-VDB requires SSE capability.
5915 (supported-systems '("i686-linux" "x86_64-linux"))
5916 (home-page "https://github.com/ncbi/ncbi-vdb")
5917 (synopsis "Database engine for genetic information")
5918 (description
5919 "The NCBI-VDB library implements a highly compressed columnar data
5920 warehousing engine that is most often used to store genetic information.
5921 Databases are stored in a portable image within the file system, and can be
5922 accessed/downloaded on demand across HTTP.")
5923 (license license:public-domain)))
5924
5925 (define-public plink
5926 (package
5927 (name "plink")
5928 (version "1.07")
5929 (source
5930 (origin
5931 (method url-fetch)
5932 (uri (string-append
5933 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
5934 version "-src.zip"))
5935 (sha256
5936 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
5937 (patches (search-patches "plink-1.07-unclobber-i.patch"
5938 "plink-endian-detection.patch"))))
5939 (build-system gnu-build-system)
5940 (arguments
5941 '(#:tests? #f ;no "check" target
5942 #:make-flags (list (string-append "LIB_LAPACK="
5943 (assoc-ref %build-inputs "lapack")
5944 "/lib/liblapack.so")
5945 "WITH_LAPACK=1"
5946 "FORCE_DYNAMIC=1"
5947 ;; disable phoning home
5948 "WITH_WEBCHECK=")
5949 #:phases
5950 (modify-phases %standard-phases
5951 ;; no "configure" script
5952 (delete 'configure)
5953 (replace 'install
5954 (lambda* (#:key outputs #:allow-other-keys)
5955 (let ((bin (string-append (assoc-ref outputs "out")
5956 "/bin/")))
5957 (install-file "plink" bin)
5958 #t))))))
5959 (inputs
5960 `(("zlib" ,zlib)
5961 ("lapack" ,lapack)))
5962 (native-inputs
5963 `(("unzip" ,unzip)))
5964 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
5965 (synopsis "Whole genome association analysis toolset")
5966 (description
5967 "PLINK is a whole genome association analysis toolset, designed to
5968 perform a range of basic, large-scale analyses in a computationally efficient
5969 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
5970 so there is no support for steps prior to this (e.g. study design and
5971 planning, generating genotype or CNV calls from raw data). Through
5972 integration with gPLINK and Haploview, there is some support for the
5973 subsequent visualization, annotation and storage of results.")
5974 ;; Code is released under GPLv2, except for fisher.h, which is under
5975 ;; LGPLv2.1+
5976 (license (list license:gpl2 license:lgpl2.1+))))
5977
5978 (define-public plink-ng
5979 (package (inherit plink)
5980 (name "plink-ng")
5981 (version "1.90b4")
5982 (source
5983 (origin
5984 (method git-fetch)
5985 (uri (git-reference
5986 (url "https://github.com/chrchang/plink-ng")
5987 (commit (string-append "v" version))))
5988 (file-name (git-file-name name version))
5989 (sha256
5990 (base32 "02npdwgkpfkdnhw819rhj5kw02a5k5m90b14zq9zzya4hyg929c0"))))
5991 (build-system gnu-build-system)
5992 (arguments
5993 '(#:tests? #f ;no "check" target
5994 #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
5995 "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
5996 "ZLIB=-lz"
5997 "-f" "Makefile.std")
5998 #:phases
5999 (modify-phases %standard-phases
6000 (add-after 'unpack 'chdir
6001 (lambda _ (chdir "1.9") #t))
6002 (delete 'configure) ; no "configure" script
6003 (replace 'install
6004 (lambda* (#:key outputs #:allow-other-keys)
6005 (let ((bin (string-append (assoc-ref outputs "out")
6006 "/bin/")))
6007 (install-file "plink" bin)
6008 #t))))))
6009 (inputs
6010 `(("zlib" ,zlib)
6011 ("lapack" ,lapack)
6012 ("openblas" ,openblas)))
6013 (home-page "https://www.cog-genomics.org/plink/")
6014 (license license:gpl3+)))
6015
6016 (define-public smithlab-cpp
6017 (let ((revision "1")
6018 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
6019 (package
6020 (name "smithlab-cpp")
6021 (version (string-append "0." revision "." (string-take commit 7)))
6022 (source (origin
6023 (method git-fetch)
6024 (uri (git-reference
6025 (url "https://github.com/smithlabcode/smithlab_cpp")
6026 (commit commit)))
6027 (file-name (string-append name "-" version "-checkout"))
6028 (sha256
6029 (base32
6030 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
6031 (build-system gnu-build-system)
6032 (arguments
6033 `(#:modules ((guix build gnu-build-system)
6034 (guix build utils)
6035 (srfi srfi-26))
6036 #:tests? #f ;no "check" target
6037 #:phases
6038 (modify-phases %standard-phases
6039 (add-after 'unpack 'use-samtools-headers
6040 (lambda _
6041 (substitute* '("SAM.cpp"
6042 "SAM.hpp")
6043 (("sam.h") "samtools/sam.h"))
6044 #t))
6045 (replace 'install
6046 (lambda* (#:key outputs #:allow-other-keys)
6047 (let* ((out (assoc-ref outputs "out"))
6048 (lib (string-append out "/lib"))
6049 (include (string-append out "/include/smithlab-cpp")))
6050 (mkdir-p lib)
6051 (mkdir-p include)
6052 (for-each (cut install-file <> lib)
6053 (find-files "." "\\.o$"))
6054 (for-each (cut install-file <> include)
6055 (find-files "." "\\.hpp$")))
6056 #t))
6057 (delete 'configure))))
6058 (inputs
6059 `(("samtools" ,samtools-0.1)
6060 ("zlib" ,zlib)))
6061 (home-page "https://github.com/smithlabcode/smithlab_cpp")
6062 (synopsis "C++ helper library for functions used in Smith lab projects")
6063 (description
6064 "Smithlab CPP is a C++ library that includes functions used in many of
6065 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
6066 structures, classes for genomic regions, mapped sequencing reads, etc.")
6067 (license license:gpl3+))))
6068
6069 (define-public preseq
6070 (package
6071 (name "preseq")
6072 (version "2.0.3")
6073 (source (origin
6074 (method url-fetch)
6075 (uri (string-append "https://github.com/smithlabcode/preseq/"
6076 "releases/download/v" version
6077 "/preseq_v" version ".tar.bz2"))
6078 (sha256
6079 (base32 "149x9xmk1wy1gff85325yfzqc0qk4sgp1w6gbyj9cnji4x1dszbl"))
6080 (modules '((guix build utils)))
6081 (snippet '(begin
6082 ;; Remove bundled samtools.
6083 (delete-file-recursively "samtools")
6084 #t))))
6085 (build-system gnu-build-system)
6086 (arguments
6087 `(#:tests? #f ;no "check" target
6088 #:phases
6089 (modify-phases %standard-phases
6090 (delete 'configure))
6091 #:make-flags
6092 (list (string-append "PREFIX="
6093 (assoc-ref %outputs "out"))
6094 (string-append "LIBBAM="
6095 (assoc-ref %build-inputs "samtools")
6096 "/lib/libbam.a")
6097 (string-append "SMITHLAB_CPP="
6098 (assoc-ref %build-inputs "smithlab-cpp")
6099 "/lib")
6100 "PROGS=preseq"
6101 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
6102 (inputs
6103 `(("gsl" ,gsl)
6104 ("samtools" ,samtools-0.1)
6105 ("smithlab-cpp" ,smithlab-cpp)
6106 ("zlib" ,zlib)))
6107 (home-page "http://smithlabresearch.org/software/preseq/")
6108 (synopsis "Program for analyzing library complexity")
6109 (description
6110 "The preseq package is aimed at predicting and estimating the complexity
6111 of a genomic sequencing library, equivalent to predicting and estimating the
6112 number of redundant reads from a given sequencing depth and how many will be
6113 expected from additional sequencing using an initial sequencing experiment.
6114 The estimates can then be used to examine the utility of further sequencing,
6115 optimize the sequencing depth, or to screen multiple libraries to avoid low
6116 complexity samples.")
6117 (license license:gpl3+)))
6118
6119 (define-public python-screed
6120 (package
6121 (name "python-screed")
6122 (version "1.0")
6123 (source
6124 (origin
6125 (method url-fetch)
6126 (uri (pypi-uri "screed" version))
6127 (sha256
6128 (base32
6129 "148vcb7w2wr6a4w6vs2bsxanbqibxfk490zbcbg4m61s8669zdjx"))))
6130 (build-system python-build-system)
6131 (arguments
6132 '(#:phases
6133 (modify-phases %standard-phases
6134 ;; Tests must be run after installation, as the "screed" command does
6135 ;; not exist right after building.
6136 (delete 'check)
6137 (add-after 'install 'check
6138 (lambda* (#:key inputs outputs #:allow-other-keys)
6139 (let ((out (assoc-ref outputs "out")))
6140 (setenv "PYTHONPATH"
6141 (string-append out "/lib/python"
6142 (string-take (string-take-right
6143 (assoc-ref inputs "python")
6144 5) 3)
6145 "/site-packages:"
6146 (getenv "PYTHONPATH")))
6147 (setenv "PATH" (string-append out "/bin:" (getenv "PATH"))))
6148 (invoke "python" "setup.py" "test")
6149 #t)))))
6150 (native-inputs
6151 `(("python-pytest" ,python-pytest)
6152 ("python-pytest-cov" ,python-pytest-cov)
6153 ("python-pytest-runner" ,python-pytest-runner)))
6154 (inputs
6155 `(("python-bz2file" ,python-bz2file)))
6156 (home-page "https://github.com/dib-lab/screed/")
6157 (synopsis "Short read sequence database utilities")
6158 (description "Screed parses FASTA and FASTQ files and generates databases.
6159 Values such as sequence name, sequence description, sequence quality and the
6160 sequence itself can be retrieved from these databases.")
6161 (license license:bsd-3)))
6162
6163 (define-public python2-screed
6164 (package-with-python2 python-screed))
6165
6166 (define-public sra-tools
6167 (package
6168 (name "sra-tools")
6169 (version "2.10.6")
6170 (source
6171 (origin
6172 (method git-fetch)
6173 (uri (git-reference
6174 (url "https://github.com/ncbi/sra-tools")
6175 (commit version)))
6176 (file-name (git-file-name name version))
6177 (sha256
6178 (base32
6179 "1cr2mijkfs5sm35ffjs6861qsd1qkgnhnbavdv65zg5d655abbjf"))))
6180 (build-system gnu-build-system)
6181 (arguments
6182 `(#:parallel-build? #f ; not supported
6183 #:tests? #f ; no "check" target
6184 #:make-flags
6185 (list (string-append "DEFAULT_CRT="
6186 (assoc-ref %build-inputs "ncbi-vdb")
6187 "/kfg/certs.kfg")
6188 (string-append "DEFAULT_KFG="
6189 (assoc-ref %build-inputs "ncbi-vdb")
6190 "/kfg/default.kfg")
6191 (string-append "VDB_LIBDIR="
6192 (assoc-ref %build-inputs "ncbi-vdb")
6193 ,(if (string-prefix? "x86_64"
6194 (or (%current-target-system)
6195 (%current-system)))
6196 "/lib64"
6197 "/lib32")))
6198 #:phases
6199 (modify-phases %standard-phases
6200 (add-before 'configure 'set-perl-search-path
6201 (lambda _
6202 ;; Work around "dotless @INC" build failure.
6203 (setenv "PERL5LIB"
6204 (string-append (getcwd) "/setup:"
6205 (getenv "PERL5LIB")))
6206 #t))
6207 (replace 'configure
6208 (lambda* (#:key inputs outputs #:allow-other-keys)
6209 ;; The build system expects a directory containing the sources and
6210 ;; raw build output of ncbi-vdb, including files that are not
6211 ;; installed. Since we are building against an installed version of
6212 ;; ncbi-vdb, the following modifications are needed.
6213 (substitute* "setup/konfigure.perl"
6214 ;; Make the configure script look for the "ilib" directory of
6215 ;; "ncbi-vdb" without first checking for the existence of a
6216 ;; matching library in its "lib" directory.
6217 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
6218 "my $f = File::Spec->catdir($ilibdir, $ilib);")
6219 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
6220 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
6221 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
6222
6223 ;; Dynamic linking
6224 (substitute* "tools/copycat/Makefile"
6225 (("smagic-static") "lmagic"))
6226 (substitute* "tools/driver-tool/utf8proc/Makefile"
6227 (("CC\\?=gcc") "myCC=gcc")
6228 (("\\(CC\\)") "(myCC)"))
6229
6230 ;; The 'configure' script doesn't recognize things like
6231 ;; '--enable-fast-install'.
6232 (invoke "./configure"
6233 (string-append "--build-prefix=" (getcwd) "/build")
6234 (string-append "--prefix=" (assoc-ref outputs "out"))
6235 (string-append "--debug")
6236 (string-append "--with-fuse-prefix="
6237 (assoc-ref inputs "fuse"))
6238 (string-append "--with-magic-prefix="
6239 (assoc-ref inputs "libmagic"))
6240 ;; TODO: building with libxml2 fails with linker errors
6241 #;
6242 (string-append "--with-xml2-prefix="
6243 (assoc-ref inputs "libxml2"))
6244 (string-append "--with-ncbi-vdb-sources="
6245 (assoc-ref inputs "ncbi-vdb"))
6246 (string-append "--with-ncbi-vdb-build="
6247 (assoc-ref inputs "ncbi-vdb"))
6248 (string-append "--with-ngs-sdk-prefix="
6249 (assoc-ref inputs "ngs-sdk"))
6250 (string-append "--with-hdf5-prefix="
6251 (assoc-ref inputs "hdf5")))
6252 #t)))))
6253 (native-inputs `(("perl" ,perl)))
6254 (inputs
6255 `(("ngs-sdk" ,ngs-sdk)
6256 ("ncbi-vdb" ,ncbi-vdb)
6257 ("libmagic" ,file)
6258 ("fuse" ,fuse)
6259 ("hdf5" ,hdf5-1.10)
6260 ("zlib" ,zlib)
6261 ("python" ,python-wrapper)))
6262 (home-page
6263 "https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
6264 (synopsis "Tools and libraries for reading and writing sequencing data")
6265 (description
6266 "The SRA Toolkit from NCBI is a collection of tools and libraries for
6267 reading of sequencing files from the Sequence Read Archive (SRA) database and
6268 writing files into the .sra format.")
6269 (license license:public-domain)))
6270
6271 (define-public seqan
6272 (package
6273 (name "seqan")
6274 (version "2.4.0")
6275 (source (origin
6276 (method url-fetch)
6277 (uri (string-append "https://github.com/seqan/seqan/releases/"
6278 "download/seqan-v" version
6279 "/seqan-library-" version ".tar.xz"))
6280 (sha256
6281 (base32
6282 "19a1rlxx03qy1i1iriicly68w64yjxbv24g9gdywnfmq998v35yx"))))
6283 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6284 ;; makes sense to split the outputs.
6285 (outputs '("out" "doc"))
6286 (build-system trivial-build-system)
6287 (arguments
6288 `(#:modules ((guix build utils))
6289 #:builder
6290 (begin
6291 (use-modules (guix build utils))
6292 (let ((tar (assoc-ref %build-inputs "tar"))
6293 (xz (assoc-ref %build-inputs "xz"))
6294 (out (assoc-ref %outputs "out"))
6295 (doc (assoc-ref %outputs "doc")))
6296 (setenv "PATH" (string-append tar "/bin:" xz "/bin"))
6297 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6298 (chdir (string-append "seqan-library-" ,version))
6299 (copy-recursively "include" (string-append out "/include"))
6300 (copy-recursively "share" (string-append doc "/share"))
6301 #t))))
6302 (native-inputs
6303 `(("source" ,source)
6304 ("tar" ,tar)
6305 ("xz" ,xz)))
6306 (home-page "http://www.seqan.de")
6307 (synopsis "Library for nucleotide sequence analysis")
6308 (description
6309 "SeqAn is a C++ library of efficient algorithms and data structures for
6310 the analysis of sequences with the focus on biological data. It contains
6311 algorithms and data structures for string representation and their
6312 manipulation, online and indexed string search, efficient I/O of
6313 bioinformatics file formats, sequence alignment, and more.")
6314 (license license:bsd-3)))
6315
6316 (define-public seqan-1
6317 (package (inherit seqan)
6318 (name "seqan")
6319 (version "1.4.2")
6320 (source (origin
6321 (method url-fetch)
6322 (uri (string-append "http://packages.seqan.de/seqan-library/"
6323 "seqan-library-" version ".tar.bz2"))
6324 (sha256
6325 (base32
6326 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
6327 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6328 ;; makes sense to split the outputs.
6329 (outputs '("out" "doc"))
6330 (build-system trivial-build-system)
6331 (arguments
6332 `(#:modules ((guix build utils))
6333 #:builder
6334 (begin
6335 (use-modules (guix build utils))
6336 (let ((tar (assoc-ref %build-inputs "tar"))
6337 (bzip (assoc-ref %build-inputs "bzip2"))
6338 (out (assoc-ref %outputs "out"))
6339 (doc (assoc-ref %outputs "doc")))
6340 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
6341 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6342 (chdir (string-append "seqan-library-" ,version))
6343 (copy-recursively "include" (string-append out "/include"))
6344 (copy-recursively "share" (string-append doc "/share"))
6345 #t))))
6346 (native-inputs
6347 `(("source" ,source)
6348 ("tar" ,tar)
6349 ("bzip2" ,bzip2)))))
6350
6351 (define-public seqmagick
6352 (package
6353 (name "seqmagick")
6354 (version "0.7.0")
6355 (source
6356 (origin
6357 (method url-fetch)
6358 (uri (pypi-uri "seqmagick" version))
6359 (sha256
6360 (base32
6361 "12bfyp8nqi0hd36rmj450aygafp01qy3hkbvlwn3bk39pyjjkgg5"))))
6362 (build-system python-build-system)
6363 (inputs
6364 `(("python-biopython" ,python-biopython)))
6365 (native-inputs
6366 `(("python-nose" ,python-nose)))
6367 (home-page "https://github.com/fhcrc/seqmagick")
6368 (synopsis "Tools for converting and modifying sequence files")
6369 (description
6370 "Bioinformaticians often have to convert sequence files between formats
6371 and do little manipulations on them, and it's not worth writing scripts for
6372 that. Seqmagick is a utility to expose the file format conversion in
6373 BioPython in a convenient way. Instead of having a big mess of scripts, there
6374 is one that takes arguments.")
6375 (license license:gpl3)))
6376
6377 (define-public seqtk
6378 (package
6379 (name "seqtk")
6380 (version "1.3")
6381 (source (origin
6382 (method git-fetch)
6383 (uri (git-reference
6384 (url "https://github.com/lh3/seqtk")
6385 (commit (string-append "v" version))))
6386 (file-name (git-file-name name version))
6387 (sha256
6388 (base32
6389 "1bfzlqa84b5s1qi22blmmw2s8xdyp9h9ydcq22pfjhh5gab3yz6l"))))
6390 (build-system gnu-build-system)
6391 (arguments
6392 `(#:phases
6393 (modify-phases %standard-phases
6394 (delete 'configure)
6395 (replace 'check
6396 ;; There are no tests, so we just run a sanity check.
6397 (lambda _ (invoke "./seqtk" "seq") #t))
6398 (replace 'install
6399 (lambda* (#:key outputs #:allow-other-keys)
6400 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6401 (install-file "seqtk" bin)
6402 #t))))))
6403 (inputs
6404 `(("zlib" ,zlib)))
6405 (home-page "https://github.com/lh3/seqtk")
6406 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
6407 (description
6408 "Seqtk is a fast and lightweight tool for processing sequences in the
6409 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
6410 optionally compressed by gzip.")
6411 (license license:expat)))
6412
6413 (define-public snap-aligner
6414 (package
6415 (name "snap-aligner")
6416 (version "1.0beta.18")
6417 (source (origin
6418 (method git-fetch)
6419 (uri (git-reference
6420 (url "https://github.com/amplab/snap")
6421 (commit (string-append "v" version))))
6422 (file-name (git-file-name name version))
6423 (sha256
6424 (base32
6425 "01w3qq4wm07z73vky0cfwlmrbf50n3w722cxrlzxfi99mnb808d8"))))
6426 (build-system gnu-build-system)
6427 (arguments
6428 '(#:phases
6429 (modify-phases %standard-phases
6430 (delete 'configure)
6431 (replace 'check (lambda _ (invoke "./unit_tests") #t))
6432 (replace 'install
6433 (lambda* (#:key outputs #:allow-other-keys)
6434 (let* ((out (assoc-ref outputs "out"))
6435 (bin (string-append out "/bin")))
6436 (install-file "snap-aligner" bin)
6437 (install-file "SNAPCommand" bin)
6438 #t))))))
6439 (native-inputs
6440 `(("zlib" ,zlib)))
6441 (home-page "http://snap.cs.berkeley.edu/")
6442 (synopsis "Short read DNA sequence aligner")
6443 (description
6444 "SNAP is a fast and accurate aligner for short DNA reads. It is
6445 optimized for modern read lengths of 100 bases or higher, and takes advantage
6446 of these reads to align data quickly through a hash-based indexing scheme.")
6447 ;; 32-bit systems are not supported by the unpatched code.
6448 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
6449 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
6450 ;; systems without a lot of memory cannot make good use of this program.
6451 (supported-systems '("x86_64-linux"))
6452 (license license:asl2.0)))
6453
6454 (define-public sortmerna
6455 (package
6456 (name "sortmerna")
6457 (version "2.1b")
6458 (source
6459 (origin
6460 (method git-fetch)
6461 (uri (git-reference
6462 (url "https://github.com/biocore/sortmerna")
6463 (commit version)))
6464 (file-name (git-file-name name version))
6465 (sha256
6466 (base32
6467 "0j3mbz4n25738yijmjbr5r4fyvkgm8v5vn3sshyfvmyqf5q9byqf"))))
6468 (build-system gnu-build-system)
6469 (outputs '("out" ;for binaries
6470 "db")) ;for sequence databases
6471 (arguments
6472 `(#:phases
6473 (modify-phases %standard-phases
6474 (replace 'install
6475 (lambda* (#:key outputs #:allow-other-keys)
6476 (let* ((out (assoc-ref outputs "out"))
6477 (bin (string-append out "/bin"))
6478 (db (assoc-ref outputs "db"))
6479 (share
6480 (string-append db "/share/sortmerna/rRNA_databases")))
6481 (install-file "sortmerna" bin)
6482 (install-file "indexdb_rna" bin)
6483 (for-each (lambda (file)
6484 (install-file file share))
6485 (find-files "rRNA_databases" ".*fasta"))
6486 #t))))))
6487 (inputs
6488 `(("zlib" ,zlib)))
6489 (home-page "https://bioinfo.lifl.fr/RNA/sortmerna/")
6490 (synopsis "Biological sequence analysis tool for NGS reads")
6491 (description
6492 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
6493 and operational taxonomic unit (OTU) picking of next generation
6494 sequencing (NGS) reads. The core algorithm is based on approximate seeds and
6495 allows for fast and sensitive analyses of nucleotide sequences. The main
6496 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
6497 ;; The source includes x86 specific code
6498 (supported-systems '("x86_64-linux" "i686-linux"))
6499 (license license:lgpl3)))
6500
6501 (define-public star
6502 (package
6503 (name "star")
6504 (version "2.7.3a")
6505 (source (origin
6506 (method git-fetch)
6507 (uri (git-reference
6508 (url "https://github.com/alexdobin/STAR")
6509 (commit version)))
6510 (file-name (git-file-name name version))
6511 (sha256
6512 (base32
6513 "1hgiqw5qhs0pc1xazzihcfd92na02xyq2kb469z04y1v51kpvvjq"))
6514 (modules '((guix build utils)))
6515 (snippet
6516 '(begin
6517 (substitute* "source/Makefile"
6518 (("/bin/rm") "rm"))
6519 ;; Remove pre-built binaries and bundled htslib sources.
6520 (delete-file-recursively "bin/MacOSX_x86_64")
6521 (delete-file-recursively "bin/Linux_x86_64")
6522 (delete-file-recursively "bin/Linux_x86_64_static")
6523 (delete-file-recursively "source/htslib")
6524 #t))))
6525 (build-system gnu-build-system)
6526 (arguments
6527 '(#:tests? #f ;no check target
6528 #:make-flags '("STAR")
6529 #:phases
6530 (modify-phases %standard-phases
6531 (add-after 'unpack 'enter-source-dir
6532 (lambda _ (chdir "source") #t))
6533 (add-after 'enter-source-dir 'make-reproducible
6534 (lambda _
6535 (substitute* "Makefile"
6536 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
6537 (string-append pre "Built with Guix" post)))
6538 #t))
6539 ;; See https://github.com/alexdobin/STAR/pull/562
6540 (add-after 'enter-source-dir 'add-missing-header
6541 (lambda _
6542 (substitute* "SoloReadFeature_inputRecords.cpp"
6543 (("#include \"binarySearch2.h\"" h)
6544 (string-append h "\n#include <math.h>")))
6545 #t))
6546 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
6547 (lambda _
6548 (substitute* "Makefile"
6549 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
6550 _ prefix) prefix))
6551 (substitute* '("BAMfunctions.cpp"
6552 "signalFromBAM.h"
6553 "bam_cat.h"
6554 "bam_cat.c"
6555 "STAR.cpp"
6556 "bamRemoveDuplicates.cpp")
6557 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
6558 (string-append "#include <" header ">")))
6559 (substitute* "IncludeDefine.h"
6560 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
6561 (string-append "<" header ">")))
6562 #t))
6563 (replace 'install
6564 (lambda* (#:key outputs #:allow-other-keys)
6565 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6566 (install-file "STAR" bin))
6567 #t))
6568 (delete 'configure))))
6569 (native-inputs
6570 `(("xxd" ,xxd)))
6571 (inputs
6572 `(("htslib" ,htslib)
6573 ("zlib" ,zlib)))
6574 (home-page "https://github.com/alexdobin/STAR")
6575 (synopsis "Universal RNA-seq aligner")
6576 (description
6577 "The Spliced Transcripts Alignment to a Reference (STAR) software is
6578 based on a previously undescribed RNA-seq alignment algorithm that uses
6579 sequential maximum mappable seed search in uncompressed suffix arrays followed
6580 by seed clustering and stitching procedure. In addition to unbiased de novo
6581 detection of canonical junctions, STAR can discover non-canonical splices and
6582 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
6583 sequences.")
6584 ;; Only 64-bit systems are supported according to the README.
6585 (supported-systems '("x86_64-linux" "mips64el-linux"))
6586 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
6587 (license license:gpl3+)))
6588
6589 (define-public starlong
6590 (package (inherit star)
6591 (name "starlong")
6592 (arguments
6593 (substitute-keyword-arguments (package-arguments star)
6594 ((#:make-flags flags)
6595 `(list "STARlong"))
6596 ((#:phases phases)
6597 `(modify-phases ,phases
6598 ;; Allow extra long sequence reads.
6599 (add-after 'unpack 'make-extra-long
6600 (lambda _
6601 (substitute* "source/IncludeDefine.h"
6602 (("(#define DEF_readNameLengthMax ).*" _ match)
6603 (string-append match "900000\n")))
6604 #t))
6605 (replace 'install
6606 (lambda* (#:key outputs #:allow-other-keys)
6607 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6608 (install-file "STARlong" bin))
6609 #t))))))))
6610
6611 (define-public subread
6612 (package
6613 (name "subread")
6614 (version "1.6.0")
6615 (source (origin
6616 (method url-fetch)
6617 (uri (string-append "mirror://sourceforge/subread/subread-"
6618 version "/subread-" version "-source.tar.gz"))
6619 (sha256
6620 (base32
6621 "0ah0n4jx6ksk2m2j7xk385x2qzmk1y4rfc6a4mfrdqrlq721w99i"))))
6622 (build-system gnu-build-system)
6623 (arguments
6624 `(#:tests? #f ;no "check" target
6625 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
6626 ;; optimizations by default, so we override these flags such that x86_64
6627 ;; flags are only added when the build target is an x86_64 system.
6628 #:make-flags
6629 (list (let ((system ,(or (%current-target-system)
6630 (%current-system)))
6631 (flags '("-ggdb" "-fomit-frame-pointer"
6632 "-ffast-math" "-funroll-loops"
6633 "-fmessage-length=0"
6634 "-O9" "-Wall" "-DMAKE_FOR_EXON"
6635 "-DMAKE_STANDALONE"
6636 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
6637 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
6638 (if (string-prefix? "x86_64" system)
6639 (string-append "CCFLAGS=" (string-join (append flags flags64)))
6640 (string-append "CCFLAGS=" (string-join flags))))
6641 "-f" "Makefile.Linux"
6642 "CC=gcc ${CCFLAGS}")
6643 #:phases
6644 (modify-phases %standard-phases
6645 (add-after 'unpack 'enter-dir
6646 (lambda _ (chdir "src") #t))
6647 (replace 'install
6648 (lambda* (#:key outputs #:allow-other-keys)
6649 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6650 (mkdir-p bin)
6651 (copy-recursively "../bin" bin))
6652 #t))
6653 ;; no "configure" script
6654 (delete 'configure))))
6655 (inputs `(("zlib" ,zlib)))
6656 (home-page "http://bioinf.wehi.edu.au/subread-package/")
6657 (synopsis "Tool kit for processing next-gen sequencing data")
6658 (description
6659 "The subread package contains the following tools: subread aligner, a
6660 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
6661 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
6662 features; exactSNP: a SNP caller that discovers SNPs by testing signals
6663 against local background noises.")
6664 (license license:gpl3+)))
6665
6666 (define-public stringtie
6667 (package
6668 (name "stringtie")
6669 (version "1.2.1")
6670 (source (origin
6671 (method url-fetch)
6672 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
6673 "stringtie-" version ".tar.gz"))
6674 (sha256
6675 (base32
6676 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
6677 (modules '((guix build utils)))
6678 (snippet
6679 '(begin
6680 (delete-file-recursively "samtools-0.1.18")
6681 #t))))
6682 (build-system gnu-build-system)
6683 (arguments
6684 `(#:tests? #f ;no test suite
6685 #:phases
6686 (modify-phases %standard-phases
6687 ;; no configure script
6688 (delete 'configure)
6689 (add-before 'build 'use-system-samtools
6690 (lambda _
6691 (substitute* "Makefile"
6692 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
6693 "stringtie: "))
6694 (substitute* '("gclib/GBam.h"
6695 "gclib/GBam.cpp")
6696 (("#include \"(bam|sam|kstring).h\"" _ header)
6697 (string-append "#include <samtools/" header ".h>")))
6698 #t))
6699 (add-after 'unpack 'remove-duplicate-typedef
6700 (lambda _
6701 ;; This typedef conflicts with the typedef in
6702 ;; glibc-2.25/include/bits/types.h
6703 (substitute* "gclib/GThreads.h"
6704 (("typedef long long __intmax_t;") ""))
6705 #t))
6706 (replace 'install
6707 (lambda* (#:key outputs #:allow-other-keys)
6708 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6709 (install-file "stringtie" bin)
6710 #t))))))
6711 (inputs
6712 `(("samtools" ,samtools-0.1)
6713 ("zlib" ,zlib)))
6714 (home-page "http://ccb.jhu.edu/software/stringtie/")
6715 (synopsis "Transcript assembly and quantification for RNA-Seq data")
6716 (description
6717 "StringTie is a fast and efficient assembler of RNA-Seq sequence
6718 alignments into potential transcripts. It uses a novel network flow algorithm
6719 as well as an optional de novo assembly step to assemble and quantitate
6720 full-length transcripts representing multiple splice variants for each gene
6721 locus. Its input can include not only the alignments of raw reads used by
6722 other transcript assemblers, but also alignments of longer sequences that have
6723 been assembled from those reads. To identify differentially expressed genes
6724 between experiments, StringTie's output can be processed either by the
6725 Cuffdiff or Ballgown programs.")
6726 (license license:artistic2.0)))
6727
6728 (define-public taxtastic
6729 (package
6730 (name "taxtastic")
6731 (version "0.8.11")
6732 (source (origin
6733 ;; The Pypi version does not include tests.
6734 (method git-fetch)
6735 (uri (git-reference
6736 (url "https://github.com/fhcrc/taxtastic")
6737 (commit (string-append "v" version))))
6738 (file-name (git-file-name name version))
6739 (sha256
6740 (base32
6741 "1sv8mkg64jn7zdwf1jj71c16686yrwxk0apb1l8sjszy9p166g0p"))))
6742 (build-system python-build-system)
6743 (arguments
6744 `(#:phases
6745 (modify-phases %standard-phases
6746 (add-after 'unpack 'prepare-directory
6747 (lambda _
6748 ;; The git checkout must be writable for tests.
6749 (for-each make-file-writable (find-files "."))
6750 ;; This test fails, but the error is not caught by the test
6751 ;; framework, so the tests fail...
6752 (substitute* "tests/test_taxit.py"
6753 (("self.cmd_fails\\(''\\)")
6754 "self.cmd_fails('nothing')"))
6755 ;; This version file is expected to be created with git describe.
6756 (mkdir-p "taxtastic/data")
6757 (with-output-to-file "taxtastic/data/ver"
6758 (lambda () (display ,version)))
6759 #t))
6760 (add-after 'unpack 'python37-compatibility
6761 (lambda _
6762 (substitute* "taxtastic/utils.py"
6763 (("import csv") "import csv, errno")
6764 (("os.errno") "errno"))
6765 #t))
6766 (replace 'check
6767 ;; Note, this fails to run with "-v" as it tries to write to a
6768 ;; closed output stream.
6769 (lambda _ (invoke "python" "-m" "unittest") #t)))))
6770 (propagated-inputs
6771 `(("python-sqlalchemy" ,python-sqlalchemy)
6772 ("python-decorator" ,python-decorator)
6773 ("python-biopython" ,python-biopython)
6774 ("python-pandas" ,python-pandas)
6775 ("python-psycopg2" ,python-psycopg2)
6776 ("python-fastalite" ,python-fastalite)
6777 ("python-pyyaml" ,python-pyyaml)
6778 ("python-six" ,python-six)
6779 ("python-jinja2" ,python-jinja2)
6780 ("python-dendropy" ,python-dendropy)))
6781 (home-page "https://github.com/fhcrc/taxtastic")
6782 (synopsis "Tools for taxonomic naming and annotation")
6783 (description
6784 "Taxtastic is software written in python used to build and maintain
6785 reference packages i.e. collections of reference trees, reference alignments,
6786 profiles, and associated taxonomic information.")
6787 (license license:gpl3+)))
6788
6789 (define-public vcftools
6790 (package
6791 (name "vcftools")
6792 (version "0.1.16")
6793 (source (origin
6794 (method url-fetch)
6795 (uri (string-append
6796 "https://github.com/vcftools/vcftools/releases/download/v"
6797 version "/vcftools-" version ".tar.gz"))
6798 (sha256
6799 (base32
6800 "1qqlx7flfv7axrjwkaz6njkscsl1d0jw98ns8d8bh1n1hd1pgz6v"))))
6801 (build-system gnu-build-system)
6802 (arguments
6803 `(#:tests? #f ; no "check" target
6804 #:make-flags (list
6805 "CFLAGS=-O2" ; override "-m64" flag
6806 (string-append "PREFIX=" (assoc-ref %outputs "out"))
6807 (string-append "MANDIR=" (assoc-ref %outputs "out")
6808 "/share/man/man1"))))
6809 (native-inputs
6810 `(("pkg-config" ,pkg-config)))
6811 (inputs
6812 `(("perl" ,perl)
6813 ("zlib" ,zlib)))
6814 (home-page "https://vcftools.github.io/")
6815 (synopsis "Tools for working with VCF files")
6816 (description
6817 "VCFtools is a program package designed for working with VCF files, such
6818 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
6819 provide easily accessible methods for working with complex genetic variation
6820 data in the form of VCF files.")
6821 ;; The license is declared as LGPLv3 in the README and
6822 ;; at https://vcftools.github.io/license.html
6823 (license license:lgpl3)))
6824
6825 (define-public infernal
6826 (package
6827 (name "infernal")
6828 (version "1.1.3")
6829 (source (origin
6830 (method url-fetch)
6831 (uri (string-append "http://eddylab.org/software/infernal/"
6832 "infernal-" version ".tar.gz"))
6833 (sha256
6834 (base32
6835 "0pm8bm3s6nfa0av4x6m6h27lsg12b3lz3jm0fyh1mc77l2isd61v"))))
6836 (build-system gnu-build-system)
6837 (native-inputs
6838 `(("perl" ,perl)
6839 ("python" ,python))) ; for tests
6840 (home-page "http://eddylab.org/infernal/")
6841 (synopsis "Inference of RNA alignments")
6842 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
6843 searching DNA sequence databases for RNA structure and sequence similarities.
6844 It is an implementation of a special case of profile stochastic context-free
6845 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
6846 profile, but it scores a combination of sequence consensus and RNA secondary
6847 structure consensus, so in many cases, it is more capable of identifying RNA
6848 homologs that conserve their secondary structure more than their primary
6849 sequence.")
6850 ;; Infernal 1.1.3 requires VMX or SSE capability for parallel instructions.
6851 (supported-systems '("i686-linux" "x86_64-linux"))
6852 (license license:bsd-3)))
6853
6854 (define-public r-scde
6855 (package
6856 (name "r-scde")
6857 (version "1.99.2")
6858 (source (origin
6859 (method git-fetch)
6860 (uri (git-reference
6861 (url "https://github.com/hms-dbmi/scde")
6862 (commit version)))
6863 (file-name (git-file-name name version))
6864 (sha256
6865 (base32
6866 "10na2gyka24mszdxf92wz9h2c13hdf1ww30c68gfsw53lvvhhhxb"))))
6867 (build-system r-build-system)
6868 (propagated-inputs
6869 `(("r-rcpp" ,r-rcpp)
6870 ("r-rcpparmadillo" ,r-rcpparmadillo)
6871 ("r-mgcv" ,r-mgcv)
6872 ("r-rook" ,r-rook)
6873 ("r-rjson" ,r-rjson)
6874 ("r-cairo" ,r-cairo)
6875 ("r-rcolorbrewer" ,r-rcolorbrewer)
6876 ("r-edger" ,r-edger)
6877 ("r-quantreg" ,r-quantreg)
6878 ("r-nnet" ,r-nnet)
6879 ("r-rmtstat" ,r-rmtstat)
6880 ("r-extremes" ,r-extremes)
6881 ("r-pcamethods" ,r-pcamethods)
6882 ("r-biocparallel" ,r-biocparallel)
6883 ("r-flexmix" ,r-flexmix)))
6884 (home-page "https://hms-dbmi.github.io/scde/")
6885 (synopsis "R package for analyzing single-cell RNA-seq data")
6886 (description "The SCDE package implements a set of statistical methods for
6887 analyzing single-cell RNA-seq data. SCDE fits individual error models for
6888 single-cell RNA-seq measurements. These models can then be used for
6889 assessment of differential expression between groups of cells, as well as
6890 other types of analysis. The SCDE package also contains the pagoda framework
6891 which applies pathway and gene set overdispersion analysis to identify aspects
6892 of transcriptional heterogeneity among single cells.")
6893 ;; See https://github.com/hms-dbmi/scde/issues/38
6894 (license license:gpl2)))
6895
6896 (define-public r-centipede
6897 (package
6898 (name "r-centipede")
6899 (version "1.2")
6900 (source (origin
6901 (method url-fetch)
6902 (uri (string-append "http://download.r-forge.r-project.org/"
6903 "src/contrib/CENTIPEDE_" version ".tar.gz"))
6904 (sha256
6905 (base32
6906 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
6907 (build-system r-build-system)
6908 (home-page "http://centipede.uchicago.edu/")
6909 (synopsis "Predict transcription factor binding sites")
6910 (description
6911 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
6912 of the genome that are bound by particular transcription factors. It starts
6913 by identifying a set of candidate binding sites, and then aims to classify the
6914 sites according to whether each site is bound or not bound by a transcription
6915 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
6916 between two different types of motif instances using as much relevant
6917 information as possible.")
6918 (license (list license:gpl2+ license:gpl3+))))
6919
6920 (define-public r-genefilter
6921 (package
6922 (name "r-genefilter")
6923 (version "1.70.0")
6924 (source
6925 (origin
6926 (method url-fetch)
6927 (uri (bioconductor-uri "genefilter" version))
6928 (sha256
6929 (base32
6930 "1sbbrnq6p90fri0ik6aq2zw26kasw63nyiy7xkzrj6vgyq7x258g"))))
6931 (build-system r-build-system)
6932 (native-inputs
6933 `(("gfortran" ,gfortran)
6934 ("r-knitr" ,r-knitr)))
6935 (propagated-inputs
6936 `(("r-annotate" ,r-annotate)
6937 ("r-annotationdbi" ,r-annotationdbi)
6938 ("r-biobase" ,r-biobase)
6939 ("r-biocgenerics" ,r-biocgenerics)
6940 ("r-survival" ,r-survival)))
6941 (home-page "https://bioconductor.org/packages/genefilter")
6942 (synopsis "Filter genes from high-throughput experiments")
6943 (description
6944 "This package provides basic functions for filtering genes from
6945 high-throughput sequencing experiments.")
6946 (license license:artistic2.0)))
6947
6948 (define-public r-deseq2
6949 (package
6950 (name "r-deseq2")
6951 (version "1.28.1")
6952 (source
6953 (origin
6954 (method url-fetch)
6955 (uri (bioconductor-uri "DESeq2" version))
6956 (sha256
6957 (base32
6958 "0xh12c2skr0bbv893p05gvbismkcnqw8zwh7yz4wmycgajfzg2pp"))))
6959 (properties `((upstream-name . "DESeq2")))
6960 (build-system r-build-system)
6961 (propagated-inputs
6962 `(("r-biobase" ,r-biobase)
6963 ("r-biocgenerics" ,r-biocgenerics)
6964 ("r-biocparallel" ,r-biocparallel)
6965 ("r-genefilter" ,r-genefilter)
6966 ("r-geneplotter" ,r-geneplotter)
6967 ("r-genomicranges" ,r-genomicranges)
6968 ("r-ggplot2" ,r-ggplot2)
6969 ("r-iranges" ,r-iranges)
6970 ("r-locfit" ,r-locfit)
6971 ("r-rcpp" ,r-rcpp)
6972 ("r-rcpparmadillo" ,r-rcpparmadillo)
6973 ("r-s4vectors" ,r-s4vectors)
6974 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6975 (native-inputs
6976 `(("r-knitr" ,r-knitr)))
6977 (home-page "https://bioconductor.org/packages/DESeq2")
6978 (synopsis "Differential gene expression analysis")
6979 (description
6980 "This package provides functions to estimate variance-mean dependence in
6981 count data from high-throughput nucleotide sequencing assays and test for
6982 differential expression based on a model using the negative binomial
6983 distribution.")
6984 (license license:lgpl3+)))
6985
6986 (define-public r-dexseq
6987 (package
6988 (name "r-dexseq")
6989 (version "1.34.1")
6990 (source
6991 (origin
6992 (method url-fetch)
6993 (uri (bioconductor-uri "DEXSeq" version))
6994 (sha256
6995 (base32
6996 "1m03awaw06mfv5gszq23k5apsqqzjqa5rcwp20y4xbpl7bywpsyl"))))
6997 (properties `((upstream-name . "DEXSeq")))
6998 (build-system r-build-system)
6999 (propagated-inputs
7000 `(("r-annotationdbi" ,r-annotationdbi)
7001 ("r-biobase" ,r-biobase)
7002 ("r-biocgenerics" ,r-biocgenerics)
7003 ("r-biocparallel" ,r-biocparallel)
7004 ("r-biomart" ,r-biomart)
7005 ("r-deseq2" ,r-deseq2)
7006 ("r-genefilter" ,r-genefilter)
7007 ("r-geneplotter" ,r-geneplotter)
7008 ("r-genomicranges" ,r-genomicranges)
7009 ("r-hwriter" ,r-hwriter)
7010 ("r-iranges" ,r-iranges)
7011 ("r-rcolorbrewer" ,r-rcolorbrewer)
7012 ("r-rsamtools" ,r-rsamtools)
7013 ("r-s4vectors" ,r-s4vectors)
7014 ("r-statmod" ,r-statmod)
7015 ("r-stringr" ,r-stringr)
7016 ("r-summarizedexperiment" ,r-summarizedexperiment)))
7017 (native-inputs
7018 `(("r-knitr" ,r-knitr)))
7019 (home-page "https://bioconductor.org/packages/DEXSeq")
7020 (synopsis "Inference of differential exon usage in RNA-Seq")
7021 (description
7022 "This package is focused on finding differential exon usage using RNA-seq
7023 exon counts between samples with different experimental designs. It provides
7024 functions that allows the user to make the necessary statistical tests based
7025 on a model that uses the negative binomial distribution to estimate the
7026 variance between biological replicates and generalized linear models for
7027 testing. The package also provides functions for the visualization and
7028 exploration of the results.")
7029 (license license:gpl3+)))
7030
7031 (define-public r-annotationforge
7032 (package
7033 (name "r-annotationforge")
7034 (version "1.30.1")
7035 (source
7036 (origin
7037 (method url-fetch)
7038 (uri (bioconductor-uri "AnnotationForge" version))
7039 (sha256
7040 (base32
7041 "1a2155jxbwc6qr3dcyvp850grhdr7czc7bs3s87ff4sgdl0jp3jw"))))
7042 (properties
7043 `((upstream-name . "AnnotationForge")))
7044 (build-system r-build-system)
7045 (propagated-inputs
7046 `(("r-annotationdbi" ,r-annotationdbi)
7047 ("r-biobase" ,r-biobase)
7048 ("r-biocgenerics" ,r-biocgenerics)
7049 ("r-dbi" ,r-dbi)
7050 ("r-rcurl" ,r-rcurl)
7051 ("r-rsqlite" ,r-rsqlite)
7052 ("r-s4vectors" ,r-s4vectors)
7053 ("r-xml" ,r-xml)))
7054 (native-inputs
7055 `(("r-knitr" ,r-knitr)))
7056 (home-page "https://bioconductor.org/packages/AnnotationForge")
7057 (synopsis "Code for building annotation database packages")
7058 (description
7059 "This package provides code for generating Annotation packages and their
7060 databases. Packages produced are intended to be used with AnnotationDbi.")
7061 (license license:artistic2.0)))
7062
7063 (define-public r-rbgl
7064 (package
7065 (name "r-rbgl")
7066 (version "1.64.0")
7067 (source
7068 (origin
7069 (method url-fetch)
7070 (uri (bioconductor-uri "RBGL" version))
7071 (sha256
7072 (base32
7073 "079599a6xn2i7snfn2vgshkw0c00rrfhj44pvi03ap8id29bkayy"))))
7074 (properties `((upstream-name . "RBGL")))
7075 (build-system r-build-system)
7076 (propagated-inputs
7077 `(("r-bh" ,r-bh)
7078 ("r-graph" ,r-graph)))
7079 (home-page "https://www.bioconductor.org/packages/RBGL")
7080 (synopsis "Interface to the Boost graph library")
7081 (description
7082 "This package provides a fairly extensive and comprehensive interface to
7083 the graph algorithms contained in the Boost library.")
7084 (license license:artistic2.0)))
7085
7086 (define-public r-gseabase
7087 (package
7088 (name "r-gseabase")
7089 (version "1.50.1")
7090 (source
7091 (origin
7092 (method url-fetch)
7093 (uri (bioconductor-uri "GSEABase" version))
7094 (sha256
7095 (base32
7096 "1k4faj53cwvqijad8cf7fcghzxcv9shlbpl8n73bsncc8k192y2j"))))
7097 (properties `((upstream-name . "GSEABase")))
7098 (build-system r-build-system)
7099 (propagated-inputs
7100 `(("r-annotate" ,r-annotate)
7101 ("r-annotationdbi" ,r-annotationdbi)
7102 ("r-biobase" ,r-biobase)
7103 ("r-biocgenerics" ,r-biocgenerics)
7104 ("r-graph" ,r-graph)
7105 ("r-xml" ,r-xml)))
7106 (native-inputs
7107 `(("r-knitr" ,r-knitr)))
7108 (home-page "https://bioconductor.org/packages/GSEABase")
7109 (synopsis "Gene set enrichment data structures and methods")
7110 (description
7111 "This package provides classes and methods to support @dfn{Gene Set
7112 Enrichment Analysis} (GSEA).")
7113 (license license:artistic2.0)))
7114
7115 (define-public r-category
7116 (package
7117 (name "r-category")
7118 (version "2.54.0")
7119 (source
7120 (origin
7121 (method url-fetch)
7122 (uri (bioconductor-uri "Category" version))
7123 (sha256
7124 (base32
7125 "1grspdzk5a4vidnxwcd1jmy1vcn494aydsp3vydx235yv7iqac1b"))))
7126 (properties `((upstream-name . "Category")))
7127 (build-system r-build-system)
7128 (propagated-inputs
7129 `(("r-annotate" ,r-annotate)
7130 ("r-annotationdbi" ,r-annotationdbi)
7131 ("r-biobase" ,r-biobase)
7132 ("r-biocgenerics" ,r-biocgenerics)
7133 ("r-genefilter" ,r-genefilter)
7134 ("r-graph" ,r-graph)
7135 ("r-gseabase" ,r-gseabase)
7136 ("r-matrix" ,r-matrix)
7137 ("r-rbgl" ,r-rbgl)
7138 ("r-dbi" ,r-dbi)))
7139 (home-page "https://bioconductor.org/packages/Category")
7140 (synopsis "Category analysis")
7141 (description
7142 "This package provides a collection of tools for performing category
7143 analysis.")
7144 (license license:artistic2.0)))
7145
7146 (define-public r-gostats
7147 (package
7148 (name "r-gostats")
7149 (version "2.54.0")
7150 (source
7151 (origin
7152 (method url-fetch)
7153 (uri (bioconductor-uri "GOstats" version))
7154 (sha256
7155 (base32
7156 "00q39cyv4l28r6s9gjnd0qhl7h80vmwp4lpmchisqzj44xzyics9"))))
7157 (properties `((upstream-name . "GOstats")))
7158 (build-system r-build-system)
7159 (propagated-inputs
7160 `(("r-annotate" ,r-annotate)
7161 ("r-annotationdbi" ,r-annotationdbi)
7162 ("r-annotationforge" ,r-annotationforge)
7163 ("r-biobase" ,r-biobase)
7164 ("r-category" ,r-category)
7165 ("r-go-db" ,r-go-db)
7166 ("r-graph" ,r-graph)
7167 ("r-rgraphviz" ,r-rgraphviz)
7168 ("r-rbgl" ,r-rbgl)))
7169 (home-page "https://bioconductor.org/packages/GOstats")
7170 (synopsis "Tools for manipulating GO and microarrays")
7171 (description
7172 "This package provides a set of tools for interacting with GO and
7173 microarray data. A variety of basic manipulation tools for graphs, hypothesis
7174 testing and other simple calculations.")
7175 (license license:artistic2.0)))
7176
7177 (define-public r-shortread
7178 (package
7179 (name "r-shortread")
7180 (version "1.46.0")
7181 (source
7182 (origin
7183 (method url-fetch)
7184 (uri (bioconductor-uri "ShortRead" version))
7185 (sha256
7186 (base32
7187 "0l4kdln69y6yhln0xiv2jmpxg05fjcglln406p43a2bqvk2lr03d"))))
7188 (properties `((upstream-name . "ShortRead")))
7189 (build-system r-build-system)
7190 (inputs
7191 `(("zlib" ,zlib)))
7192 (propagated-inputs
7193 `(("r-biobase" ,r-biobase)
7194 ("r-biocgenerics" ,r-biocgenerics)
7195 ("r-biocparallel" ,r-biocparallel)
7196 ("r-biostrings" ,r-biostrings)
7197 ("r-genomeinfodb" ,r-genomeinfodb)
7198 ("r-genomicalignments" ,r-genomicalignments)
7199 ("r-genomicranges" ,r-genomicranges)
7200 ("r-hwriter" ,r-hwriter)
7201 ("r-iranges" ,r-iranges)
7202 ("r-lattice" ,r-lattice)
7203 ("r-latticeextra" ,r-latticeextra)
7204 ("r-rsamtools" ,r-rsamtools)
7205 ("r-s4vectors" ,r-s4vectors)
7206 ("r-xvector" ,r-xvector)
7207 ("r-zlibbioc" ,r-zlibbioc)))
7208 (home-page "https://bioconductor.org/packages/ShortRead")
7209 (synopsis "FASTQ input and manipulation tools")
7210 (description
7211 "This package implements sampling, iteration, and input of FASTQ files.
7212 It includes functions for filtering and trimming reads, and for generating a
7213 quality assessment report. Data are represented as
7214 @code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
7215 purposes. The package also contains legacy support for early single-end,
7216 ungapped alignment formats.")
7217 (license license:artistic2.0)))
7218
7219 (define-public r-systempiper
7220 (package
7221 (name "r-systempiper")
7222 (version "1.22.0")
7223 (source
7224 (origin
7225 (method url-fetch)
7226 (uri (bioconductor-uri "systemPipeR" version))
7227 (sha256
7228 (base32
7229 "01ilhlrvy28jfdyxjria4024yryj5zgympgqznw17g3y3az78kk2"))))
7230 (properties `((upstream-name . "systemPipeR")))
7231 (build-system r-build-system)
7232 (propagated-inputs
7233 `(("r-annotate" ,r-annotate)
7234 ("r-assertthat" ,r-assertthat)
7235 ("r-batchtools" ,r-batchtools)
7236 ("r-biostrings" ,r-biostrings)
7237 ("r-deseq2" ,r-deseq2)
7238 ("r-dot" ,r-dot)
7239 ("r-edger" ,r-edger)
7240 ("r-genomicfeatures" ,r-genomicfeatures)
7241 ("r-genomicranges" ,r-genomicranges)
7242 ("r-ggplot2" ,r-ggplot2)
7243 ("r-go-db" ,r-go-db)
7244 ("r-gostats" ,r-gostats)
7245 ("r-limma" ,r-limma)
7246 ("r-magrittr" ,r-magrittr)
7247 ("r-pheatmap" ,r-pheatmap)
7248 ("r-rjson" ,r-rjson)
7249 ("r-rsamtools" ,r-rsamtools)
7250 ("r-rsvg" ,r-rsvg)
7251 ("r-shortread" ,r-shortread)
7252 ("r-stringr" ,r-stringr)
7253 ("r-summarizedexperiment" ,r-summarizedexperiment)
7254 ("r-yaml" ,r-yaml)
7255 ("r-variantannotation" ,r-variantannotation)))
7256 (native-inputs
7257 `(("r-knitr" ,r-knitr)))
7258 (home-page "https://github.com/tgirke/systemPipeR")
7259 (synopsis "Next generation sequencing workflow and reporting environment")
7260 (description
7261 "This R package provides tools for building and running automated
7262 end-to-end analysis workflows for a wide range of @dfn{next generation
7263 sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
7264 Important features include a uniform workflow interface across different NGS
7265 applications, automated report generation, and support for running both R and
7266 command-line software, such as NGS aligners or peak/variant callers, on local
7267 computers or compute clusters. Efficient handling of complex sample sets and
7268 experimental designs is facilitated by a consistently implemented sample
7269 annotation infrastructure.")
7270 (license license:artistic2.0)))
7271
7272 (define-public r-grohmm
7273 (package
7274 (name "r-grohmm")
7275 (version "1.22.0")
7276 (source
7277 (origin
7278 (method url-fetch)
7279 (uri (bioconductor-uri "groHMM" version))
7280 (sha256
7281 (base32
7282 "04z9qq1xwdsaxbqhyld37w0ybvzly9pc1hcyrnwdbyjwd7n1fncb"))))
7283 (properties `((upstream-name . "groHMM")))
7284 (build-system r-build-system)
7285 (propagated-inputs
7286 `(("r-genomeinfodb" ,r-genomeinfodb)
7287 ("r-genomicalignments" ,r-genomicalignments)
7288 ("r-genomicranges" ,r-genomicranges)
7289 ("r-iranges" ,r-iranges)
7290 ("r-mass" ,r-mass)
7291 ("r-rtracklayer" ,r-rtracklayer)
7292 ("r-s4vectors" ,r-s4vectors)))
7293 (home-page "https://github.com/Kraus-Lab/groHMM")
7294 (synopsis "GRO-seq analysis pipeline")
7295 (description
7296 "This package provides a pipeline for the analysis of GRO-seq data.")
7297 (license license:gpl3+)))
7298
7299 (define-public vsearch
7300 (package
7301 (name "vsearch")
7302 (version "2.9.1")
7303 (source
7304 (origin
7305 (method git-fetch)
7306 (uri (git-reference
7307 (url "https://github.com/torognes/vsearch")
7308 (commit (string-append "v" version))))
7309 (file-name (git-file-name name version))
7310 (sha256
7311 (base32
7312 "0vhrpjfdf75ba04b24xknp41790cvcgwl0vgpy7qbzj5xh2521ss"))
7313 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
7314 (snippet
7315 '(begin
7316 ;; Remove bundled cityhash sources. The vsearch source is adjusted
7317 ;; for this in the patch.
7318 (delete-file "src/city.h")
7319 (delete-file "src/citycrc.h")
7320 (delete-file "src/city.cc")
7321 #t))))
7322 (build-system gnu-build-system)
7323 (inputs
7324 `(("zlib" ,zlib)
7325 ("bzip2" ,bzip2)
7326 ("cityhash" ,cityhash)))
7327 (native-inputs
7328 `(("autoconf" ,autoconf)
7329 ("automake" ,automake)))
7330 (synopsis "Sequence search tools for metagenomics")
7331 (description
7332 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
7333 dereplication, pairwise alignment, shuffling, subsampling, sorting and
7334 masking. The tool takes advantage of parallelism in the form of SIMD
7335 vectorization as well as multiple threads to perform accurate alignments at
7336 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
7337 Needleman-Wunsch).")
7338 (home-page "https://github.com/torognes/vsearch")
7339 ;; vsearch uses non-portable SSE intrinsics so building fails on other
7340 ;; platforms.
7341 (supported-systems '("x86_64-linux"))
7342 ;; Dual licensed; also includes public domain source.
7343 (license (list license:gpl3 license:bsd-2))))
7344
7345 (define-public pardre
7346 (package
7347 (name "pardre")
7348 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
7349 (version "1.1.5-1")
7350 (source
7351 (origin
7352 (method url-fetch)
7353 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
7354 "1.1.5" ".tar.gz"))
7355 (sha256
7356 (base32
7357 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
7358 (build-system gnu-build-system)
7359 (arguments
7360 `(#:tests? #f ; no tests included
7361 #:phases
7362 (modify-phases %standard-phases
7363 (delete 'configure)
7364 (replace 'install
7365 (lambda* (#:key outputs #:allow-other-keys)
7366 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
7367 (install-file "ParDRe" bin)
7368 #t))))))
7369 (inputs
7370 `(("openmpi" ,openmpi)
7371 ("zlib" ,zlib)))
7372 (synopsis "Parallel tool to remove duplicate DNA reads")
7373 (description
7374 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
7375 Duplicate reads can be seen as identical or nearly identical sequences with
7376 some mismatches. This tool lets users avoid the analysis of unnecessary
7377 reads, reducing the time of subsequent procedures with the
7378 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
7379 in order to exploit the parallel capabilities of multicore clusters. It is
7380 faster than multithreaded counterparts (end of 2015) for the same number of
7381 cores and, thanks to the message-passing technology, it can be executed on
7382 clusters.")
7383 (home-page "https://sourceforge.net/projects/pardre/")
7384 (license license:gpl3+)))
7385
7386 (define-public ruby-bio-kseq
7387 (package
7388 (name "ruby-bio-kseq")
7389 (version "0.0.2")
7390 (source
7391 (origin
7392 (method url-fetch)
7393 (uri (rubygems-uri "bio-kseq" version))
7394 (sha256
7395 (base32
7396 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
7397 (build-system ruby-build-system)
7398 (arguments
7399 `(#:test-target "spec"))
7400 (native-inputs
7401 `(("bundler" ,bundler)
7402 ("ruby-rspec" ,ruby-rspec)
7403 ("ruby-rake-compiler" ,ruby-rake-compiler)))
7404 (inputs
7405 `(("zlib" ,zlib)))
7406 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
7407 (description
7408 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
7409 FASTQ parsing code. It provides a fast iterator over sequences and their
7410 quality scores.")
7411 (home-page "https://github.com/gusevfe/bio-kseq")
7412 (license license:expat)))
7413
7414 (define-public bio-locus
7415 (package
7416 (name "bio-locus")
7417 (version "0.0.7")
7418 (source
7419 (origin
7420 (method url-fetch)
7421 (uri (rubygems-uri "bio-locus" version))
7422 (sha256
7423 (base32
7424 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
7425 (build-system ruby-build-system)
7426 (native-inputs
7427 `(("ruby-rspec" ,ruby-rspec)))
7428 (synopsis "Tool for fast querying of genome locations")
7429 (description
7430 "Bio-locus is a tabix-like tool for fast querying of genome
7431 locations. Many file formats in bioinformatics contain records that
7432 start with a chromosome name and a position for a SNP, or a start-end
7433 position for indels. Bio-locus allows users to store this chr+pos or
7434 chr+pos+alt information in a database.")
7435 (home-page "https://github.com/pjotrp/bio-locus")
7436 (license license:expat)))
7437
7438 (define-public bio-blastxmlparser
7439 (package
7440 (name "bio-blastxmlparser")
7441 (version "2.0.4")
7442 (source (origin
7443 (method url-fetch)
7444 (uri (rubygems-uri "bio-blastxmlparser" version))
7445 (sha256
7446 (base32
7447 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
7448 (build-system ruby-build-system)
7449 (propagated-inputs
7450 `(("ruby-bio-logger" ,ruby-bio-logger)
7451 ("ruby-nokogiri" ,ruby-nokogiri)))
7452 (inputs
7453 `(("ruby-rspec" ,ruby-rspec)))
7454 (synopsis "Fast big data BLAST XML parser and library")
7455 (description
7456 "Very fast parallel big-data BLAST XML file parser which can be used as
7457 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
7458 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
7459 (home-page "https://github.com/pjotrp/blastxmlparser")
7460 (license license:expat)))
7461
7462 (define-public bioruby
7463 (package
7464 (name "bioruby")
7465 (version "1.5.2")
7466 (source
7467 (origin
7468 (method url-fetch)
7469 (uri (rubygems-uri "bio" version))
7470 (sha256
7471 (base32
7472 "1d56amdsjv1mag7m6gv2w0xij8hqx1v5xbdjsix8sp3yp36m7938"))))
7473 (build-system ruby-build-system)
7474 (propagated-inputs
7475 `(("ruby-libxml" ,ruby-libxml)))
7476 (native-inputs
7477 `(("which" ,which))) ; required for test phase
7478 (arguments
7479 `(#:phases
7480 (modify-phases %standard-phases
7481 (add-before 'build 'patch-test-command
7482 (lambda _
7483 (substitute* '("test/functional/bio/test_command.rb")
7484 (("/bin/sh") (which "sh")))
7485 (substitute* '("test/functional/bio/test_command.rb")
7486 (("/bin/ls") (which "ls")))
7487 (substitute* '("test/functional/bio/test_command.rb")
7488 (("which") (which "which")))
7489 (substitute* '("test/functional/bio/test_command.rb",
7490 "test/data/command/echoarg2.sh")
7491 (("/bin/echo") (which "echo")))
7492 #t)))))
7493 (synopsis "Ruby library, shell and utilities for bioinformatics")
7494 (description "BioRuby comes with a comprehensive set of Ruby development
7495 tools and libraries for bioinformatics and molecular biology. BioRuby has
7496 components for sequence analysis, pathway analysis, protein modelling and
7497 phylogenetic analysis; it supports many widely used data formats and provides
7498 easy access to databases, external programs and public web services, including
7499 BLAST, KEGG, GenBank, MEDLINE and GO.")
7500 (home-page "http://bioruby.org/")
7501 ;; Code is released under Ruby license, except for setup
7502 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
7503 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
7504
7505 (define-public r-biocviews
7506 (package
7507 (name "r-biocviews")
7508 (version "1.56.1")
7509 (source (origin
7510 (method url-fetch)
7511 (uri (bioconductor-uri "biocViews" version))
7512 (sha256
7513 (base32
7514 "0zcx8gha3x3jc0ra6ii6wwq2vfsmffrrnilknbq8h5xjrl55m6ci"))))
7515 (properties
7516 `((upstream-name . "biocViews")))
7517 (build-system r-build-system)
7518 (propagated-inputs
7519 `(("r-biobase" ,r-biobase)
7520 ("r-biocmanager" ,r-biocmanager)
7521 ("r-graph" ,r-graph)
7522 ("r-rbgl" ,r-rbgl)
7523 ("r-rcurl" ,r-rcurl)
7524 ("r-xml" ,r-xml)
7525 ("r-runit" ,r-runit)))
7526 (home-page "https://bioconductor.org/packages/biocViews")
7527 (synopsis "Bioconductor package categorization helper")
7528 (description "The purpose of biocViews is to create HTML pages that
7529 categorize packages in a Bioconductor package repository according to keywords,
7530 also known as views, in a controlled vocabulary.")
7531 (license license:artistic2.0)))
7532
7533 (define-public r-biocstyle
7534 (package
7535 (name "r-biocstyle")
7536 (version "2.16.0")
7537 (source (origin
7538 (method url-fetch)
7539 (uri (bioconductor-uri "BiocStyle" version))
7540 (sha256
7541 (base32
7542 "07rjl2n4sazdg581zh7w3yykzphgr2gpz41ba4ryqs7347vh9nbf"))))
7543 (properties
7544 `((upstream-name . "BiocStyle")))
7545 (build-system r-build-system)
7546 (propagated-inputs
7547 `(("r-biocmanager" ,r-biocmanager)
7548 ("r-bookdown" ,r-bookdown)
7549 ("r-knitr" ,r-knitr)
7550 ("r-rmarkdown" ,r-rmarkdown)
7551 ("r-yaml" ,r-yaml)))
7552 (native-inputs
7553 `(("r-knitr" ,r-knitr)))
7554 (home-page "https://bioconductor.org/packages/BiocStyle")
7555 (synopsis "Bioconductor formatting styles")
7556 (description "This package provides standard formatting styles for
7557 Bioconductor PDF and HTML documents. Package vignettes illustrate use and
7558 functionality.")
7559 (license license:artistic2.0)))
7560
7561 (define-public r-bioccheck
7562 (package
7563 (name "r-bioccheck")
7564 (version "1.24.0")
7565 (source (origin
7566 (method url-fetch)
7567 (uri (bioconductor-uri "BiocCheck" version))
7568 (sha256
7569 (base32
7570 "1p9ys18sn1crxw1iac2jdgqvwpb5hjd9nfxx0qn0ncrv0b550mny"))))
7571 (properties
7572 `((upstream-name . "BiocCheck")))
7573 (build-system r-build-system)
7574 (arguments
7575 '(#:phases
7576 (modify-phases %standard-phases
7577 ;; This package can be used by calling BiocCheck(<package>) from
7578 ;; within R, or by running R CMD BiocCheck <package>. This phase
7579 ;; makes sure the latter works. For this to work, the BiocCheck
7580 ;; script must be somewhere on the PATH (not the R bin directory).
7581 (add-after 'install 'install-bioccheck-subcommand
7582 (lambda* (#:key outputs #:allow-other-keys)
7583 (let* ((out (assoc-ref outputs "out"))
7584 (dest-dir (string-append out "/bin"))
7585 (script-dir
7586 (string-append out "/site-library/BiocCheck/script/")))
7587 (mkdir-p dest-dir)
7588 (symlink (string-append script-dir "/checkBadDeps.R")
7589 (string-append dest-dir "/checkBadDeps.R"))
7590 (symlink (string-append script-dir "/BiocCheck")
7591 (string-append dest-dir "/BiocCheck")))
7592 #t)))))
7593 (propagated-inputs
7594 `(("r-codetools" ,r-codetools)
7595 ("r-graph" ,r-graph)
7596 ("r-httr" ,r-httr)
7597 ("r-knitr" ,r-knitr)
7598 ("r-optparse" ,r-optparse)
7599 ("r-biocmanager" ,r-biocmanager)
7600 ("r-biocviews" ,r-biocviews)
7601 ("r-stringdist" ,r-stringdist)))
7602 (native-inputs
7603 `(("r-knitr" ,r-knitr)))
7604 (home-page "https://bioconductor.org/packages/BiocCheck")
7605 (synopsis "Executes Bioconductor-specific package checks")
7606 (description "This package contains tools to perform additional quality
7607 checks on R packages that are to be submitted to the Bioconductor repository.")
7608 (license license:artistic2.0)))
7609
7610 (define-public r-s4vectors
7611 (package
7612 (name "r-s4vectors")
7613 (version "0.26.1")
7614 (source (origin
7615 (method url-fetch)
7616 (uri (bioconductor-uri "S4Vectors" version))
7617 (sha256
7618 (base32
7619 "1ddr3ngyczx332zw9ai1a6h7442lgrbfcj8vrhvbkdkzqwj14xmb"))))
7620 (properties
7621 `((upstream-name . "S4Vectors")))
7622 (build-system r-build-system)
7623 (propagated-inputs
7624 `(("r-biocgenerics" ,r-biocgenerics)))
7625 (home-page "https://bioconductor.org/packages/S4Vectors")
7626 (synopsis "S4 implementation of vectors and lists")
7627 (description
7628 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
7629 classes and a set of generic functions that extend the semantic of ordinary
7630 vectors and lists in R. Package developers can easily implement vector-like
7631 or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
7632 In addition, a few low-level concrete subclasses of general interest (e.g.
7633 @code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
7634 S4Vectors package itself.")
7635 (license license:artistic2.0)))
7636
7637 (define-public r-iranges
7638 (package
7639 (name "r-iranges")
7640 (version "2.22.2")
7641 (source (origin
7642 (method url-fetch)
7643 (uri (bioconductor-uri "IRanges" version))
7644 (sha256
7645 (base32
7646 "1y24jw62806wp2afiyj1x6n00gj7d3323klqdypra9q43pg1w49d"))))
7647 (properties
7648 `((upstream-name . "IRanges")))
7649 (build-system r-build-system)
7650 (propagated-inputs
7651 `(("r-biocgenerics" ,r-biocgenerics)
7652 ("r-s4vectors" ,r-s4vectors)))
7653 (home-page "https://bioconductor.org/packages/IRanges")
7654 (synopsis "Infrastructure for manipulating intervals on sequences")
7655 (description
7656 "This package provides efficient low-level and highly reusable S4 classes
7657 for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
7658 generally, data that can be organized sequentially (formally defined as
7659 @code{Vector} objects), as well as views on these @code{Vector} objects.
7660 Efficient list-like classes are also provided for storing big collections of
7661 instances of the basic classes. All classes in the package use consistent
7662 naming and share the same rich and consistent \"Vector API\" as much as
7663 possible.")
7664 (license license:artistic2.0)))
7665
7666 (define-public r-genomeinfodbdata
7667 (package
7668 (name "r-genomeinfodbdata")
7669 (version "1.2.0")
7670 (source (origin
7671 (method url-fetch)
7672 ;; We cannot use bioconductor-uri here because this tarball is
7673 ;; located under "data/annotation/" instead of "bioc/".
7674 (uri (string-append "https://bioconductor.org/packages/release/"
7675 "data/annotation/src/contrib/GenomeInfoDbData_"
7676 version ".tar.gz"))
7677 (sha256
7678 (base32
7679 "0di6nlqpsyqf693k2na65ayqldih563x3zfrczpqc5q2hl5kg35c"))))
7680 (properties
7681 `((upstream-name . "GenomeInfoDbData")))
7682 (build-system r-build-system)
7683 (home-page "https://bioconductor.org/packages/GenomeInfoDbData")
7684 (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
7685 (description "This package contains data for mapping between NCBI taxonomy
7686 ID and species. It is used by functions in the GenomeInfoDb package.")
7687 (license license:artistic2.0)))
7688
7689 (define-public r-genomeinfodb
7690 (package
7691 (name "r-genomeinfodb")
7692 (version "1.24.2")
7693 (source (origin
7694 (method url-fetch)
7695 (uri (bioconductor-uri "GenomeInfoDb" version))
7696 (sha256
7697 (base32
7698 "1cqs53p4m5q1dr59war72bccphy01ilw4xra24fmngrv4x32rznd"))))
7699 (properties
7700 `((upstream-name . "GenomeInfoDb")))
7701 (build-system r-build-system)
7702 (propagated-inputs
7703 `(("r-biocgenerics" ,r-biocgenerics)
7704 ("r-genomeinfodbdata" ,r-genomeinfodbdata)
7705 ("r-iranges" ,r-iranges)
7706 ("r-rcurl" ,r-rcurl)
7707 ("r-s4vectors" ,r-s4vectors)))
7708 (native-inputs
7709 `(("r-knitr" ,r-knitr)))
7710 (home-page "https://bioconductor.org/packages/GenomeInfoDb")
7711 (synopsis "Utilities for manipulating chromosome identifiers")
7712 (description
7713 "This package contains data and functions that define and allow
7714 translation between different chromosome sequence naming conventions (e.g.,
7715 \"chr1\" versus \"1\"), including a function that attempts to place sequence
7716 names in their natural, rather than lexicographic, order.")
7717 (license license:artistic2.0)))
7718
7719 (define-public r-edger
7720 (package
7721 (name "r-edger")
7722 (version "3.30.3")
7723 (source (origin
7724 (method url-fetch)
7725 (uri (bioconductor-uri "edgeR" version))
7726 (sha256
7727 (base32
7728 "1z9bkg08rgqn3jm2s4ndbj641w33wl8jd3j6m5if6h2nnw6011ic"))))
7729 (properties `((upstream-name . "edgeR")))
7730 (build-system r-build-system)
7731 (propagated-inputs
7732 `(("r-limma" ,r-limma)
7733 ("r-locfit" ,r-locfit)
7734 ("r-rcpp" ,r-rcpp)
7735 ("r-statmod" ,r-statmod))) ;for estimateDisp
7736 (home-page "http://bioinf.wehi.edu.au/edgeR")
7737 (synopsis "EdgeR does empirical analysis of digital gene expression data")
7738 (description "This package can do differential expression analysis of
7739 RNA-seq expression profiles with biological replication. It implements a range
7740 of statistical methodology based on the negative binomial distributions,
7741 including empirical Bayes estimation, exact tests, generalized linear models
7742 and quasi-likelihood tests. It be applied to differential signal analysis of
7743 other types of genomic data that produce counts, including ChIP-seq, SAGE and
7744 CAGE.")
7745 (license license:gpl2+)))
7746
7747 (define-public r-variantannotation
7748 (package
7749 (name "r-variantannotation")
7750 (version "1.34.0")
7751 (source (origin
7752 (method url-fetch)
7753 (uri (bioconductor-uri "VariantAnnotation" version))
7754 (sha256
7755 (base32
7756 "09y6ymwky839nb0y7y93w810hk9mvwqn7595q1276c28dkddiqvw"))))
7757 (properties
7758 `((upstream-name . "VariantAnnotation")))
7759 (inputs
7760 `(("zlib" ,zlib)))
7761 (propagated-inputs
7762 `(("r-annotationdbi" ,r-annotationdbi)
7763 ("r-biobase" ,r-biobase)
7764 ("r-biocgenerics" ,r-biocgenerics)
7765 ("r-biostrings" ,r-biostrings)
7766 ("r-bsgenome" ,r-bsgenome)
7767 ("r-dbi" ,r-dbi)
7768 ("r-genomeinfodb" ,r-genomeinfodb)
7769 ("r-genomicfeatures" ,r-genomicfeatures)
7770 ("r-genomicranges" ,r-genomicranges)
7771 ("r-iranges" ,r-iranges)
7772 ("r-summarizedexperiment" ,r-summarizedexperiment)
7773 ("r-rhtslib" ,r-rhtslib)
7774 ("r-rsamtools" ,r-rsamtools)
7775 ("r-rtracklayer" ,r-rtracklayer)
7776 ("r-s4vectors" ,r-s4vectors)
7777 ("r-xvector" ,r-xvector)
7778 ("r-zlibbioc" ,r-zlibbioc)))
7779 (build-system r-build-system)
7780 (home-page "https://bioconductor.org/packages/VariantAnnotation")
7781 (synopsis "Package for annotation of genetic variants")
7782 (description "This R package can annotate variants, compute amino acid
7783 coding changes and predict coding outcomes.")
7784 (license license:artistic2.0)))
7785
7786 (define-public r-limma
7787 (package
7788 (name "r-limma")
7789 (version "3.44.3")
7790 (source (origin
7791 (method url-fetch)
7792 (uri (bioconductor-uri "limma" version))
7793 (sha256
7794 (base32
7795 "09fnqxx4rzq5n447aqg2l6y0idfwgz2jxz99sifxsr2q8afzbcj6"))))
7796 (build-system r-build-system)
7797 (home-page "http://bioinf.wehi.edu.au/limma")
7798 (synopsis "Package for linear models for microarray and RNA-seq data")
7799 (description "This package can be used for the analysis of gene expression
7800 studies, especially the use of linear models for analysing designed experiments
7801 and the assessment of differential expression. The analysis methods apply to
7802 different technologies, including microarrays, RNA-seq, and quantitative PCR.")
7803 (license license:gpl2+)))
7804
7805 (define-public r-xvector
7806 (package
7807 (name "r-xvector")
7808 (version "0.28.0")
7809 (source (origin
7810 (method url-fetch)
7811 (uri (bioconductor-uri "XVector" version))
7812 (sha256
7813 (base32
7814 "11h1hszv4798q1gbx8r6zf8vlaqx4v9ql0lbh2xaxybp66a03pvc"))))
7815 (properties
7816 `((upstream-name . "XVector")))
7817 (build-system r-build-system)
7818 (arguments
7819 `(#:phases
7820 (modify-phases %standard-phases
7821 (add-after 'unpack 'use-system-zlib
7822 (lambda _
7823 (substitute* "DESCRIPTION"
7824 (("zlibbioc, ") ""))
7825 (substitute* "NAMESPACE"
7826 (("import\\(zlibbioc\\)") ""))
7827 #t)))))
7828 (inputs
7829 `(("zlib" ,zlib)))
7830 (propagated-inputs
7831 `(("r-biocgenerics" ,r-biocgenerics)
7832 ("r-iranges" ,r-iranges)
7833 ("r-s4vectors" ,r-s4vectors)))
7834 (home-page "https://bioconductor.org/packages/XVector")
7835 (synopsis "Representation and manpulation of external sequences")
7836 (description
7837 "This package provides memory efficient S4 classes for storing sequences
7838 \"externally\" (behind an R external pointer, or on disk).")
7839 (license license:artistic2.0)))
7840
7841 (define-public r-genomicranges
7842 (package
7843 (name "r-genomicranges")
7844 (version "1.40.0")
7845 (source (origin
7846 (method url-fetch)
7847 (uri (bioconductor-uri "GenomicRanges" version))
7848 (sha256
7849 (base32
7850 "0wn1zr2qq0rpv9z2wialgizn6xzdlcjg1w2kif67n53svz6vk2x1"))))
7851 (properties
7852 `((upstream-name . "GenomicRanges")))
7853 (build-system r-build-system)
7854 (propagated-inputs
7855 `(("r-biocgenerics" ,r-biocgenerics)
7856 ("r-genomeinfodb" ,r-genomeinfodb)
7857 ("r-iranges" ,r-iranges)
7858 ("r-s4vectors" ,r-s4vectors)
7859 ("r-xvector" ,r-xvector)))
7860 (native-inputs
7861 `(("r-knitr" ,r-knitr)))
7862 (home-page "https://bioconductor.org/packages/GenomicRanges")
7863 (synopsis "Representation and manipulation of genomic intervals")
7864 (description
7865 "This package provides tools to efficiently represent and manipulate
7866 genomic annotations and alignments is playing a central role when it comes to
7867 analyzing high-throughput sequencing data (a.k.a. NGS data). The
7868 GenomicRanges package defines general purpose containers for storing and
7869 manipulating genomic intervals and variables defined along a genome.")
7870 (license license:artistic2.0)))
7871
7872 (define-public r-biobase
7873 (package
7874 (name "r-biobase")
7875 (version "2.48.0")
7876 (source (origin
7877 (method url-fetch)
7878 (uri (bioconductor-uri "Biobase" version))
7879 (sha256
7880 (base32
7881 "13p3kgnxm7hbn8cy289kbhaiyfa6rxx7l1pbvajwqzbay3cxznqp"))))
7882 (properties
7883 `((upstream-name . "Biobase")))
7884 (build-system r-build-system)
7885 (propagated-inputs
7886 `(("r-biocgenerics" ,r-biocgenerics)))
7887 (home-page "https://bioconductor.org/packages/Biobase")
7888 (synopsis "Base functions for Bioconductor")
7889 (description
7890 "This package provides functions that are needed by many other packages
7891 on Bioconductor or which replace R functions.")
7892 (license license:artistic2.0)))
7893
7894 (define-public r-annotationdbi
7895 (package
7896 (name "r-annotationdbi")
7897 (version "1.50.1")
7898 (source (origin
7899 (method url-fetch)
7900 (uri (bioconductor-uri "AnnotationDbi" version))
7901 (sha256
7902 (base32
7903 "00pd8lsdppxlmx0l65phw0jhsm0qkwjc4wsdxpvgc31iiz9yslbj"))))
7904 (properties
7905 `((upstream-name . "AnnotationDbi")))
7906 (build-system r-build-system)
7907 (propagated-inputs
7908 `(("r-biobase" ,r-biobase)
7909 ("r-biocgenerics" ,r-biocgenerics)
7910 ("r-dbi" ,r-dbi)
7911 ("r-iranges" ,r-iranges)
7912 ("r-rsqlite" ,r-rsqlite)
7913 ("r-s4vectors" ,r-s4vectors)))
7914 (native-inputs
7915 `(("r-knitr" ,r-knitr)))
7916 (home-page "https://bioconductor.org/packages/AnnotationDbi")
7917 (synopsis "Annotation database interface")
7918 (description
7919 "This package provides user interface and database connection code for
7920 annotation data packages using SQLite data storage.")
7921 (license license:artistic2.0)))
7922
7923 (define-public r-biomart
7924 (package
7925 (name "r-biomart")
7926 (version "2.44.1")
7927 (source (origin
7928 (method url-fetch)
7929 (uri (bioconductor-uri "biomaRt" version))
7930 (sha256
7931 (base32
7932 "0np4nh3gj60mgb6312z7x0z9fg5bhrhw872sp3dzgmqc8q8b84iz"))))
7933 (properties
7934 `((upstream-name . "biomaRt")))
7935 (build-system r-build-system)
7936 (propagated-inputs
7937 `(("r-annotationdbi" ,r-annotationdbi)
7938 ("r-biocfilecache" ,r-biocfilecache)
7939 ("r-httr" ,r-httr)
7940 ("r-openssl" ,r-openssl)
7941 ("r-progress" ,r-progress)
7942 ("r-rappdirs" ,r-rappdirs)
7943 ("r-stringr" ,r-stringr)
7944 ("r-xml" ,r-xml)))
7945 (native-inputs
7946 `(("r-knitr" ,r-knitr)))
7947 (home-page "https://bioconductor.org/packages/biomaRt")
7948 (synopsis "Interface to BioMart databases")
7949 (description
7950 "biomaRt provides an interface to a growing collection of databases
7951 implementing the @url{BioMart software suite, http://www.biomart.org}. The
7952 package enables retrieval of large amounts of data in a uniform way without
7953 the need to know the underlying database schemas or write complex SQL queries.
7954 Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
7955 Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
7956 users direct access to a diverse set of data and enable a wide range of
7957 powerful online queries from gene annotation to database mining.")
7958 (license license:artistic2.0)))
7959
7960 (define-public r-biocparallel
7961 (package
7962 (name "r-biocparallel")
7963 (version "1.22.0")
7964 (source (origin
7965 (method url-fetch)
7966 (uri (bioconductor-uri "BiocParallel" version))
7967 (sha256
7968 (base32
7969 "1lsg5xm5j3ly5k60fidzhkgqc9jgscyfkqngjvd9qp6yfzvsb82g"))))
7970 (properties
7971 `((upstream-name . "BiocParallel")))
7972 (build-system r-build-system)
7973 (propagated-inputs
7974 `(("r-futile-logger" ,r-futile-logger)
7975 ("r-snow" ,r-snow)
7976 ("r-bh" ,r-bh)))
7977 (native-inputs
7978 `(("r-knitr" ,r-knitr)))
7979 (home-page "https://bioconductor.org/packages/BiocParallel")
7980 (synopsis "Bioconductor facilities for parallel evaluation")
7981 (description
7982 "This package provides modified versions and novel implementation of
7983 functions for parallel evaluation, tailored to use with Bioconductor
7984 objects.")
7985 (license (list license:gpl2+ license:gpl3+))))
7986
7987 (define-public r-biostrings
7988 (package
7989 (name "r-biostrings")
7990 (version "2.56.0")
7991 (source (origin
7992 (method url-fetch)
7993 (uri (bioconductor-uri "Biostrings" version))
7994 (sha256
7995 (base32
7996 "0imhfz7dg8b3l5qzipjranqqshdsg2x6zc49drlhn8sc7j40cvi8"))))
7997 (properties
7998 `((upstream-name . "Biostrings")))
7999 (build-system r-build-system)
8000 (propagated-inputs
8001 `(("r-biocgenerics" ,r-biocgenerics)
8002 ("r-crayon" ,r-crayon)
8003 ("r-iranges" ,r-iranges)
8004 ("r-s4vectors" ,r-s4vectors)
8005 ("r-xvector" ,r-xvector)))
8006 (home-page "https://bioconductor.org/packages/Biostrings")
8007 (synopsis "String objects and algorithms for biological sequences")
8008 (description
8009 "This package provides memory efficient string containers, string
8010 matching algorithms, and other utilities, for fast manipulation of large
8011 biological sequences or sets of sequences.")
8012 (license license:artistic2.0)))
8013
8014 (define-public r-rsamtools
8015 (package
8016 (name "r-rsamtools")
8017 (version "2.4.0")
8018 (source (origin
8019 (method url-fetch)
8020 (uri (bioconductor-uri "Rsamtools" version))
8021 (sha256
8022 (base32
8023 "0z01z0s71f941k7sns46nyabps28c69d6jxx6sppjpc6h4vrw0vq"))))
8024 (properties
8025 `((upstream-name . "Rsamtools")))
8026 (build-system r-build-system)
8027 (arguments
8028 `(#:phases
8029 (modify-phases %standard-phases
8030 (add-after 'unpack 'use-system-zlib
8031 (lambda _
8032 (substitute* "DESCRIPTION"
8033 (("zlibbioc, ") ""))
8034 (substitute* "NAMESPACE"
8035 (("import\\(zlibbioc\\)") ""))
8036 #t)))))
8037 (inputs
8038 `(("zlib" ,zlib)))
8039 (propagated-inputs
8040 `(("r-biocgenerics" ,r-biocgenerics)
8041 ("r-biocparallel" ,r-biocparallel)
8042 ("r-biostrings" ,r-biostrings)
8043 ("r-bitops" ,r-bitops)
8044 ("r-genomeinfodb" ,r-genomeinfodb)
8045 ("r-genomicranges" ,r-genomicranges)
8046 ("r-iranges" ,r-iranges)
8047 ("r-rhtslib" ,r-rhtslib)
8048 ("r-s4vectors" ,r-s4vectors)
8049 ("r-xvector" ,r-xvector)))
8050 (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
8051 (synopsis "Interface to samtools, bcftools, and tabix")
8052 (description
8053 "This package provides an interface to the @code{samtools},
8054 @code{bcftools}, and @code{tabix} utilities for manipulating SAM (Sequence
8055 Alignment / Map), FASTA, binary variant call (BCF) and compressed indexed
8056 tab-delimited (tabix) files.")
8057 (license license:expat)))
8058
8059 (define-public r-delayedarray
8060 (package
8061 (name "r-delayedarray")
8062 (version "0.14.0")
8063 (source (origin
8064 (method url-fetch)
8065 (uri (bioconductor-uri "DelayedArray" version))
8066 (sha256
8067 (base32
8068 "1lz7a0rrlfv3w44n073mk8pw39z7lfs0njdxp5vpp0rdsmvdf1qk"))))
8069 (properties
8070 `((upstream-name . "DelayedArray")))
8071 (build-system r-build-system)
8072 (propagated-inputs
8073 `(("r-biocgenerics" ,r-biocgenerics)
8074 ("r-s4vectors" ,r-s4vectors)
8075 ("r-iranges" ,r-iranges)
8076 ("r-matrix" ,r-matrix)
8077 ("r-matrixstats" ,r-matrixstats)))
8078 (native-inputs
8079 `(("r-knitr" ,r-knitr)))
8080 (home-page "https://bioconductor.org/packages/DelayedArray")
8081 (synopsis "Delayed operations on array-like objects")
8082 (description
8083 "Wrapping an array-like object (typically an on-disk object) in a
8084 @code{DelayedArray} object allows one to perform common array operations on it
8085 without loading the object in memory. In order to reduce memory usage and
8086 optimize performance, operations on the object are either delayed or executed
8087 using a block processing mechanism. Note that this also works on in-memory
8088 array-like objects like @code{DataFrame} objects (typically with Rle columns),
8089 @code{Matrix} objects, and ordinary arrays and data frames.")
8090 (license license:artistic2.0)))
8091
8092 (define-public r-summarizedexperiment
8093 (package
8094 (name "r-summarizedexperiment")
8095 (version "1.18.1")
8096 (source (origin
8097 (method url-fetch)
8098 (uri (bioconductor-uri "SummarizedExperiment" version))
8099 (sha256
8100 (base32
8101 "07rc79k5dp0dnf4dvsxif995aa9cgfkf13yf84qnwl64k9pf3c2c"))))
8102 (properties
8103 `((upstream-name . "SummarizedExperiment")))
8104 (build-system r-build-system)
8105 (propagated-inputs
8106 `(("r-biobase" ,r-biobase)
8107 ("r-biocgenerics" ,r-biocgenerics)
8108 ("r-delayedarray" ,r-delayedarray)
8109 ("r-genomeinfodb" ,r-genomeinfodb)
8110 ("r-genomicranges" ,r-genomicranges)
8111 ("r-iranges" ,r-iranges)
8112 ("r-matrix" ,r-matrix)
8113 ("r-s4vectors" ,r-s4vectors)))
8114 (native-inputs
8115 `(("r-knitr" ,r-knitr)))
8116 (home-page "https://bioconductor.org/packages/SummarizedExperiment")
8117 (synopsis "Container for representing genomic ranges by sample")
8118 (description
8119 "The SummarizedExperiment container contains one or more assays, each
8120 represented by a matrix-like object of numeric or other mode. The rows
8121 typically represent genomic ranges of interest and the columns represent
8122 samples.")
8123 (license license:artistic2.0)))
8124
8125 (define-public r-genomicalignments
8126 (package
8127 (name "r-genomicalignments")
8128 (version "1.24.0")
8129 (source (origin
8130 (method url-fetch)
8131 (uri (bioconductor-uri "GenomicAlignments" version))
8132 (sha256
8133 (base32
8134 "0v8k6d7frm5p48cmk4zik78cw9abz4inx0zhl4zrmmx31ifyvk8d"))))
8135 (properties
8136 `((upstream-name . "GenomicAlignments")))
8137 (build-system r-build-system)
8138 (propagated-inputs
8139 `(("r-biocgenerics" ,r-biocgenerics)
8140 ("r-biocparallel" ,r-biocparallel)
8141 ("r-biostrings" ,r-biostrings)
8142 ("r-genomeinfodb" ,r-genomeinfodb)
8143 ("r-genomicranges" ,r-genomicranges)
8144 ("r-iranges" ,r-iranges)
8145 ("r-rsamtools" ,r-rsamtools)
8146 ("r-s4vectors" ,r-s4vectors)
8147 ("r-summarizedexperiment" ,r-summarizedexperiment)))
8148 (home-page "https://bioconductor.org/packages/GenomicAlignments")
8149 (synopsis "Representation and manipulation of short genomic alignments")
8150 (description
8151 "This package provides efficient containers for storing and manipulating
8152 short genomic alignments (typically obtained by aligning short reads to a
8153 reference genome). This includes read counting, computing the coverage,
8154 junction detection, and working with the nucleotide content of the
8155 alignments.")
8156 (license license:artistic2.0)))
8157
8158 (define-public r-rtracklayer
8159 (package
8160 (name "r-rtracklayer")
8161 (version "1.48.0")
8162 (source (origin
8163 (method url-fetch)
8164 (uri (bioconductor-uri "rtracklayer" version))
8165 (sha256
8166 (base32
8167 "1zcgk92sidhy4y7ws9ms4nkkh2hnccfhfh53qgna0kma9jy4v5xf"))))
8168 (build-system r-build-system)
8169 (arguments
8170 `(#:phases
8171 (modify-phases %standard-phases
8172 (add-after 'unpack 'use-system-zlib
8173 (lambda _
8174 (substitute* "DESCRIPTION"
8175 ((" zlibbioc,") ""))
8176 (substitute* "NAMESPACE"
8177 (("import\\(zlibbioc\\)") ""))
8178 #t)))))
8179 (native-inputs
8180 `(("pkg-config" ,pkg-config)))
8181 (inputs
8182 `(("zlib" ,zlib)))
8183 (propagated-inputs
8184 `(("r-biocgenerics" ,r-biocgenerics)
8185 ("r-biostrings" ,r-biostrings)
8186 ("r-genomeinfodb" ,r-genomeinfodb)
8187 ("r-genomicalignments" ,r-genomicalignments)
8188 ("r-genomicranges" ,r-genomicranges)
8189 ("r-iranges" ,r-iranges)
8190 ("r-rcurl" ,r-rcurl)
8191 ("r-rsamtools" ,r-rsamtools)
8192 ("r-s4vectors" ,r-s4vectors)
8193 ("r-xml" ,r-xml)
8194 ("r-xvector" ,r-xvector)))
8195 (home-page "https://bioconductor.org/packages/rtracklayer")
8196 (synopsis "R interface to genome browsers and their annotation tracks")
8197 (description
8198 "rtracklayer is an extensible framework for interacting with multiple
8199 genome browsers (currently UCSC built-in) and manipulating annotation tracks
8200 in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
8201 built-in). The user may export/import tracks to/from the supported browsers,
8202 as well as query and modify the browser state, such as the current viewport.")
8203 (license license:artistic2.0)))
8204
8205 (define-public r-genomicfeatures
8206 (package
8207 (name "r-genomicfeatures")
8208 (version "1.40.0")
8209 (source (origin
8210 (method url-fetch)
8211 (uri (bioconductor-uri "GenomicFeatures" version))
8212 (sha256
8213 (base32
8214 "09ffsflk2yhhj9ivm028y3qdkvv8idgxpm4il3y9rym7shc8b3f8"))))
8215 (properties
8216 `((upstream-name . "GenomicFeatures")))
8217 (build-system r-build-system)
8218 (propagated-inputs
8219 `(("r-annotationdbi" ,r-annotationdbi)
8220 ("r-biobase" ,r-biobase)
8221 ("r-biocgenerics" ,r-biocgenerics)
8222 ("r-biomart" ,r-biomart)
8223 ("r-biostrings" ,r-biostrings)
8224 ("r-dbi" ,r-dbi)
8225 ("r-genomeinfodb" ,r-genomeinfodb)
8226 ("r-genomicranges" ,r-genomicranges)
8227 ("r-iranges" ,r-iranges)
8228 ("r-rcurl" ,r-rcurl)
8229 ("r-rsqlite" ,r-rsqlite)
8230 ("r-rtracklayer" ,r-rtracklayer)
8231 ("r-s4vectors" ,r-s4vectors)
8232 ("r-xvector" ,r-xvector)))
8233 (native-inputs
8234 `(("r-knitr" ,r-knitr)))
8235 (home-page "https://bioconductor.org/packages/GenomicFeatures")
8236 (synopsis "Tools for working with transcript centric annotations")
8237 (description
8238 "This package provides a set of tools and methods for making and
8239 manipulating transcript centric annotations. With these tools the user can
8240 easily download the genomic locations of the transcripts, exons and cds of a
8241 given organism, from either the UCSC Genome Browser or a BioMart
8242 database (more sources will be supported in the future). This information is
8243 then stored in a local database that keeps track of the relationship between
8244 transcripts, exons, cds and genes. Flexible methods are provided for
8245 extracting the desired features in a convenient format.")
8246 (license license:artistic2.0)))
8247
8248 (define-public r-go-db
8249 (package
8250 (name "r-go-db")
8251 (version "3.7.0")
8252 (source (origin
8253 (method url-fetch)
8254 (uri (string-append "https://www.bioconductor.org/packages/"
8255 "release/data/annotation/src/contrib/GO.db_"
8256 version ".tar.gz"))
8257 (sha256
8258 (base32
8259 "0i3wcf5h3n0dawzc1hy0kv74f06j80c47n4p3g3fmrcxlhi3jpa5"))))
8260 (properties
8261 `((upstream-name . "GO.db")))
8262 (build-system r-build-system)
8263 (propagated-inputs
8264 `(("r-annotationdbi" ,r-annotationdbi)))
8265 (home-page "https://bioconductor.org/packages/GO.db")
8266 (synopsis "Annotation maps describing the entire Gene Ontology")
8267 (description
8268 "The purpose of this GO.db annotation package is to provide detailed
8269 information about the latest version of the Gene Ontologies.")
8270 (license license:artistic2.0)))
8271
8272 (define-public r-topgo
8273 (package
8274 (name "r-topgo")
8275 (version "2.40.0")
8276 (source (origin
8277 (method url-fetch)
8278 (uri (bioconductor-uri "topGO" version))
8279 (sha256
8280 (base32
8281 "13rhbvn27sj75fklf1cnjaazacx8yyjlhqlnbp5zk157q6y5cwdr"))))
8282 (properties
8283 `((upstream-name . "topGO")))
8284 (build-system r-build-system)
8285 (propagated-inputs
8286 `(("r-annotationdbi" ,r-annotationdbi)
8287 ("r-dbi" ,r-dbi)
8288 ("r-biobase" ,r-biobase)
8289 ("r-biocgenerics" ,r-biocgenerics)
8290 ("r-go-db" ,r-go-db)
8291 ("r-graph" ,r-graph)
8292 ("r-lattice" ,r-lattice)
8293 ("r-matrixstats" ,r-matrixstats)
8294 ("r-sparsem" ,r-sparsem)))
8295 (home-page "https://bioconductor.org/packages/topGO")
8296 (synopsis "Enrichment analysis for gene ontology")
8297 (description
8298 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
8299 terms while accounting for the topology of the GO graph. Different test
8300 statistics and different methods for eliminating local similarities and
8301 dependencies between GO terms can be implemented and applied.")
8302 ;; Any version of the LGPL applies.
8303 (license license:lgpl2.1+)))
8304
8305 (define-public r-bsgenome
8306 (package
8307 (name "r-bsgenome")
8308 (version "1.56.0")
8309 (source (origin
8310 (method url-fetch)
8311 (uri (bioconductor-uri "BSgenome" version))
8312 (sha256
8313 (base32
8314 "1jw8r1qm9fpg2s1cw2y4np243jjxm65j2xdy2785h8fc1b02msf6"))))
8315 (properties
8316 `((upstream-name . "BSgenome")))
8317 (build-system r-build-system)
8318 (propagated-inputs
8319 `(("r-biocgenerics" ,r-biocgenerics)
8320 ("r-biostrings" ,r-biostrings)
8321 ("r-genomeinfodb" ,r-genomeinfodb)
8322 ("r-genomicranges" ,r-genomicranges)
8323 ("r-iranges" ,r-iranges)
8324 ("r-matrixstats" ,r-matrixstats)
8325 ("r-rsamtools" ,r-rsamtools)
8326 ("r-rtracklayer" ,r-rtracklayer)
8327 ("r-s4vectors" ,r-s4vectors)
8328 ("r-xvector" ,r-xvector)))
8329 (home-page "https://bioconductor.org/packages/BSgenome")
8330 (synopsis "Infrastructure for Biostrings-based genome data packages")
8331 (description
8332 "This package provides infrastructure shared by all Biostrings-based
8333 genome data packages and support for efficient SNP representation.")
8334 (license license:artistic2.0)))
8335
8336 (define-public r-impute
8337 (package
8338 (name "r-impute")
8339 (version "1.62.0")
8340 (source (origin
8341 (method url-fetch)
8342 (uri (bioconductor-uri "impute" version))
8343 (sha256
8344 (base32
8345 "161p6l1cp3wwdynkxwvg0yhrh6yv20brdlplw5w5mavn4hf1nm0h"))))
8346 (native-inputs
8347 `(("gfortran" ,gfortran)))
8348 (build-system r-build-system)
8349 (home-page "https://bioconductor.org/packages/impute")
8350 (synopsis "Imputation for microarray data")
8351 (description
8352 "This package provides a function to impute missing gene expression
8353 microarray data, using nearest neighbor averaging.")
8354 (license license:gpl2+)))
8355
8356 (define-public r-seqpattern
8357 (package
8358 (name "r-seqpattern")
8359 (version "1.20.0")
8360 (source (origin
8361 (method url-fetch)
8362 (uri (bioconductor-uri "seqPattern" version))
8363 (sha256
8364 (base32
8365 "0f1yvx2ri1557rzjx08q5bgml1cvkm8hjl8xn1qi4rjs64sy6mci"))))
8366 (properties
8367 `((upstream-name . "seqPattern")))
8368 (build-system r-build-system)
8369 (propagated-inputs
8370 `(("r-biostrings" ,r-biostrings)
8371 ("r-genomicranges" ,r-genomicranges)
8372 ("r-iranges" ,r-iranges)
8373 ("r-kernsmooth" ,r-kernsmooth)
8374 ("r-plotrix" ,r-plotrix)))
8375 (home-page "https://bioconductor.org/packages/seqPattern")
8376 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
8377 (description
8378 "This package provides tools to visualize oligonucleotide patterns and
8379 sequence motif occurrences across a large set of sequences centred at a common
8380 reference point and sorted by a user defined feature.")
8381 (license license:gpl3+)))
8382
8383 (define-public r-genomation
8384 (package
8385 (name "r-genomation")
8386 (version "1.20.0")
8387 (source (origin
8388 (method url-fetch)
8389 (uri (bioconductor-uri "genomation" version))
8390 (sha256
8391 (base32
8392 "1cy8kqwddiha5jy6nda1al956i4wncbgjkrxwijdb08cmka2sfwh"))))
8393 (build-system r-build-system)
8394 (propagated-inputs
8395 `(("r-biostrings" ,r-biostrings)
8396 ("r-bsgenome" ,r-bsgenome)
8397 ("r-data-table" ,r-data-table)
8398 ("r-genomeinfodb" ,r-genomeinfodb)
8399 ("r-genomicalignments" ,r-genomicalignments)
8400 ("r-genomicranges" ,r-genomicranges)
8401 ("r-ggplot2" ,r-ggplot2)
8402 ("r-gridbase" ,r-gridbase)
8403 ("r-impute" ,r-impute)
8404 ("r-iranges" ,r-iranges)
8405 ("r-matrixstats" ,r-matrixstats)
8406 ("r-plotrix" ,r-plotrix)
8407 ("r-plyr" ,r-plyr)
8408 ("r-rcpp" ,r-rcpp)
8409 ("r-readr" ,r-readr)
8410 ("r-reshape2" ,r-reshape2)
8411 ("r-rsamtools" ,r-rsamtools)
8412 ("r-rtracklayer" ,r-rtracklayer)
8413 ("r-runit" ,r-runit)
8414 ("r-s4vectors" ,r-s4vectors)
8415 ("r-seqpattern" ,r-seqpattern)))
8416 (native-inputs
8417 `(("r-knitr" ,r-knitr)))
8418 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
8419 (synopsis "Summary, annotation and visualization of genomic data")
8420 (description
8421 "This package provides a package for summary and annotation of genomic
8422 intervals. Users can visualize and quantify genomic intervals over
8423 pre-defined functional regions, such as promoters, exons, introns, etc. The
8424 genomic intervals represent regions with a defined chromosome position, which
8425 may be associated with a score, such as aligned reads from HT-seq experiments,
8426 TF binding sites, methylation scores, etc. The package can use any tabular
8427 genomic feature data as long as it has minimal information on the locations of
8428 genomic intervals. In addition, it can use BAM or BigWig files as input.")
8429 (license license:artistic2.0)))
8430
8431 (define-public r-genomationdata
8432 (package
8433 (name "r-genomationdata")
8434 (version "1.14.0")
8435 (source (origin
8436 (method url-fetch)
8437 ;; We cannot use bioconductor-uri here because this tarball is
8438 ;; located under "data/annotation/" instead of "bioc/".
8439 (uri (string-append "https://bioconductor.org/packages/"
8440 "release/data/experiment/src/contrib/"
8441 "genomationData_" version ".tar.gz"))
8442 (sha256
8443 (base32
8444 "10xyb8akjrhmak2i0mnv1agny2ipy364q9nlibyplpzc7vdb6bw7"))))
8445 (build-system r-build-system)
8446 ;; As this package provides little more than large data files, it doesn't
8447 ;; make sense to build substitutes.
8448 (arguments `(#:substitutable? #f))
8449 (native-inputs
8450 `(("r-knitr" ,r-knitr)))
8451 (home-page "http://bioinformatics.mdc-berlin.de/genomation/")
8452 (synopsis "Experimental data for use with the genomation package")
8453 (description
8454 "This package contains experimental genetic data for use with the
8455 genomation package. Included are Chip Seq, Methylation and Cage data,
8456 downloaded from Encode.")
8457 (license license:gpl3+)))
8458
8459 (define-public r-seqlogo
8460 (package
8461 (name "r-seqlogo")
8462 (version "1.54.3")
8463 (source
8464 (origin
8465 (method url-fetch)
8466 (uri (bioconductor-uri "seqLogo" version))
8467 (sha256
8468 (base32
8469 "09kkxir305fv8z2yyihvspkrzclxbw1cx5mvhfkrhl10rap6662j"))))
8470 (properties `((upstream-name . "seqLogo")))
8471 (build-system r-build-system)
8472 (home-page "https://bioconductor.org/packages/seqLogo")
8473 (synopsis "Sequence logos for DNA sequence alignments")
8474 (description
8475 "seqLogo takes the position weight matrix of a DNA sequence motif and
8476 plots the corresponding sequence logo as introduced by Schneider and
8477 Stephens (1990).")
8478 (license license:lgpl2.0+)))
8479
8480 (define-public r-motifrg
8481 (package
8482 (name "r-motifrg")
8483 (version "1.31.0")
8484 (source
8485 (origin
8486 (method url-fetch)
8487 (uri (bioconductor-uri "motifRG" version))
8488 (sha256
8489 (base32
8490 "1ml6zyzlk8yjbnfhga2qnw8nl43rankvka0kc1yljxr2b66aqbhn"))))
8491 (properties `((upstream-name . "motifRG")))
8492 (build-system r-build-system)
8493 (propagated-inputs
8494 `(("r-biostrings" ,r-biostrings)
8495 ("r-bsgenome" ,r-bsgenome)
8496 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8497 ("r-iranges" ,r-iranges)
8498 ("r-seqlogo" ,r-seqlogo)
8499 ("r-xvector" ,r-xvector)))
8500 (home-page "https://bioconductor.org/packages/motifRG")
8501 (synopsis "Discover motifs in high throughput sequencing data")
8502 (description
8503 "This package provides tools for discriminative motif discovery in high
8504 throughput genetic sequencing data sets using regression methods.")
8505 (license license:artistic2.0)))
8506
8507 (define-public r-qtl
8508 (package
8509 (name "r-qtl")
8510 (version "1.46-2")
8511 (source
8512 (origin
8513 (method url-fetch)
8514 (uri (string-append "mirror://cran/src/contrib/qtl_"
8515 version ".tar.gz"))
8516 (sha256
8517 (base32
8518 "0rbwcnvyy96gq1dsgpxx03pv423qya26h6ws5y0blj3blfdmj83a"))))
8519 (build-system r-build-system)
8520 (home-page "https://rqtl.org/")
8521 (synopsis "R package for analyzing QTL experiments in genetics")
8522 (description "R/qtl is an extension library for the R statistics
8523 system. It is used to analyze experimental crosses for identifying
8524 genes contributing to variation in quantitative traits (so-called
8525 quantitative trait loci, QTLs).
8526
8527 Using a hidden Markov model, R/qtl estimates genetic maps, to
8528 identify genotyping errors, and to perform single-QTL and two-QTL,
8529 two-dimensional genome scans.")
8530 (license license:gpl3)))
8531
8532 (define-public r-qtl2
8533 (package
8534 (name "r-qtl2")
8535 (version "0.22-8")
8536 (source (origin
8537 (method url-fetch)
8538 (uri (cran-uri "qtl2" version))
8539 (sha256
8540 (base32 "07w74s7lyyrfa1l6q6v8sxnfaqwxb4w1x4wawcngf0hr1rwx9lda"))))
8541 (build-system r-build-system)
8542 (propagated-inputs
8543 `(("r-data-table" ,r-data-table)
8544 ("r-jsonlite" ,r-jsonlite)
8545 ("r-rcpp" ,r-rcpp)
8546 ("r-rcppeigen" ,r-rcppeigen)
8547 ("r-rsqlite" ,r-rsqlite)
8548 ("r-yaml" ,r-yaml)))
8549 (home-page "https://kbroman.org/qtl2/")
8550 (synopsis "Quantitative Trait Locus Mapping in Experimental Crosses")
8551 (description
8552 "This package provides a set of tools to perform @dfn{Quantitative Trait
8553 Locus} (QTL) analysis in experimental crosses. It is a reimplementation of the
8554 @code{R/qtl} package to better handle high-dimensional data and complex cross
8555 designs. Broman et al. (2018) <doi:10.1534/genetics.118.301595>.")
8556 (license license:gpl3)))
8557
8558 (define-public r-zlibbioc
8559 (package
8560 (name "r-zlibbioc")
8561 (version "1.34.0")
8562 (source (origin
8563 (method url-fetch)
8564 (uri (bioconductor-uri "zlibbioc" version))
8565 (sha256
8566 (base32
8567 "0j1l052jb2cwc1nifxzwknc9csagf4f2d092zs0i95dz0rma89l0"))))
8568 (properties
8569 `((upstream-name . "zlibbioc")))
8570 (build-system r-build-system)
8571 (home-page "https://bioconductor.org/packages/zlibbioc")
8572 (synopsis "Provider for zlib-1.2.5 to R packages")
8573 (description "This package uses the source code of zlib-1.2.5 to create
8574 libraries for systems that do not have these available via other means.")
8575 (license license:artistic2.0)))
8576
8577 (define-public r-r4rna
8578 (package
8579 (name "r-r4rna")
8580 (version "0.1.4")
8581 (source
8582 (origin
8583 (method url-fetch)
8584 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8585 version ".tar.gz"))
8586 (sha256
8587 (base32
8588 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8589 (build-system r-build-system)
8590 (propagated-inputs
8591 `(("r-optparse" ,r-optparse)
8592 ("r-rcolorbrewer" ,r-rcolorbrewer)))
8593 (home-page "https://www.e-rna.org/r-chie/index.cgi")
8594 (synopsis "Analysis framework for RNA secondary structure")
8595 (description
8596 "The R4RNA package aims to be a general framework for the analysis of RNA
8597 secondary structure and comparative analysis in R.")
8598 (license license:gpl3+)))
8599
8600 (define-public r-rhtslib
8601 (package
8602 (name "r-rhtslib")
8603 (version "1.20.0")
8604 (source
8605 (origin
8606 (method url-fetch)
8607 (uri (bioconductor-uri "Rhtslib" version))
8608 (sha256
8609 (base32
8610 "186r7icrkzrni1c4n33ip7dlsfgys7hnqf0simvxrpl3yhh3ygdi"))))
8611 (properties `((upstream-name . "Rhtslib")))
8612 (build-system r-build-system)
8613 ;; Without this a temporary directory ends up in the Rhtslib.so binary,
8614 ;; which makes R abort the build.
8615 (arguments '(#:configure-flags '("--no-staged-install")))
8616 (propagated-inputs
8617 `(("curl" ,curl)
8618 ("r-zlibbioc" ,r-zlibbioc)))
8619 (inputs
8620 `(("zlib" ,zlib)))
8621 (native-inputs
8622 `(("pkg-config" ,pkg-config)
8623 ("r-knitr" ,r-knitr)))
8624 (home-page "https://github.com/nhayden/Rhtslib")
8625 (synopsis "High-throughput sequencing library as an R package")
8626 (description
8627 "This package provides the HTSlib C library for high-throughput
8628 nucleotide sequence analysis. The package is primarily useful to developers
8629 of other R packages who wish to make use of HTSlib.")
8630 (license license:lgpl2.0+)))
8631
8632 (define-public r-bamsignals
8633 (package
8634 (name "r-bamsignals")
8635 (version "1.20.0")
8636 (source
8637 (origin
8638 (method url-fetch)
8639 (uri (bioconductor-uri "bamsignals" version))
8640 (sha256
8641 (base32
8642 "0p858xxfv79yc8b3lq58zl9f00irvbn3czsd8wdi5040xg42m402"))))
8643 (build-system r-build-system)
8644 (propagated-inputs
8645 `(("r-biocgenerics" ,r-biocgenerics)
8646 ("r-genomicranges" ,r-genomicranges)
8647 ("r-iranges" ,r-iranges)
8648 ("r-rcpp" ,r-rcpp)
8649 ("r-rhtslib" ,r-rhtslib)
8650 ("r-zlibbioc" ,r-zlibbioc)))
8651 (inputs
8652 `(("zlib" ,zlib)))
8653 (native-inputs
8654 `(("r-knitr" ,r-knitr)))
8655 (home-page "https://bioconductor.org/packages/bamsignals")
8656 (synopsis "Extract read count signals from bam files")
8657 (description
8658 "This package efficiently obtains count vectors from indexed bam
8659 files. It counts the number of nucleotide sequence reads in given genomic
8660 ranges and it computes reads profiles and coverage profiles. It also handles
8661 paired-end data.")
8662 (license license:gpl2+)))
8663
8664 (define-public r-rcas
8665 (package
8666 (name "r-rcas")
8667 (version "1.14.0")
8668 (source (origin
8669 (method url-fetch)
8670 (uri (bioconductor-uri "RCAS" version))
8671 (sha256
8672 (base32
8673 "0f812pgv3ys1zv4n9sqkgm01hj4cdd0i0h85dqbhkwd94zl6cavl"))))
8674 (properties `((upstream-name . "RCAS")))
8675 (build-system r-build-system)
8676 (propagated-inputs
8677 `(("r-biocgenerics" ,r-biocgenerics)
8678 ("r-biostrings" ,r-biostrings)
8679 ("r-bsgenome" ,r-bsgenome)
8680 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8681 ("r-cowplot" ,r-cowplot)
8682 ("r-data-table" ,r-data-table)
8683 ("r-dt" ,r-dt)
8684 ("r-genomation" ,r-genomation)
8685 ("r-genomeinfodb" ,r-genomeinfodb)
8686 ("r-genomicfeatures" ,r-genomicfeatures)
8687 ("r-genomicranges" ,r-genomicranges)
8688 ("r-ggplot2" ,r-ggplot2)
8689 ("r-ggseqlogo" ,r-ggseqlogo)
8690 ("r-gprofiler2" ,r-gprofiler2)
8691 ("r-iranges" ,r-iranges)
8692 ("r-pbapply" ,r-pbapply)
8693 ("r-pheatmap" ,r-pheatmap)
8694 ("r-plotly" ,r-plotly)
8695 ("r-plotrix" ,r-plotrix)
8696 ("r-proxy" ,r-proxy)
8697 ("r-ranger" ,r-ranger)
8698 ("r-rsqlite" ,r-rsqlite)
8699 ("r-rtracklayer" ,r-rtracklayer)
8700 ("r-rmarkdown" ,r-rmarkdown)
8701 ("r-s4vectors" ,r-s4vectors)
8702 ("pandoc" ,ghc-pandoc)))
8703 (native-inputs
8704 `(("r-knitr" ,r-knitr)))
8705 (synopsis "RNA-centric annotation system")
8706 (description
8707 "RCAS aims to be a standalone RNA-centric annotation system that provides
8708 intuitive reports and publication-ready graphics. This package provides the R
8709 library implementing most of the pipeline's features.")
8710 (home-page "https://github.com/BIMSBbioinfo/RCAS")
8711 (license license:artistic2.0)))
8712
8713 (define-public rcas-web
8714 (package
8715 (name "rcas-web")
8716 (version "0.1.0")
8717 (source
8718 (origin
8719 (method url-fetch)
8720 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8721 "releases/download/v" version
8722 "/rcas-web-" version ".tar.gz"))
8723 (sha256
8724 (base32
8725 "0wq951aj45gqki1bickg876i993lmawkp8x24agg264br5x716db"))))
8726 (build-system gnu-build-system)
8727 (arguments
8728 `(#:phases
8729 (modify-phases %standard-phases
8730 (add-before 'configure 'find-RCAS
8731 ;; The configure script can't find non-1.3.x versions of RCAS because
8732 ;; its R expression ‘1.10.1 >= 1.3.4’ evaluates to false.
8733 (lambda _
8734 (substitute* "configure"
8735 (("1\\.3\\.4") "0.0.0"))
8736 #t))
8737 (add-after 'install 'wrap-executable
8738 (lambda* (#:key inputs outputs #:allow-other-keys)
8739 (let* ((out (assoc-ref outputs "out"))
8740 (json (assoc-ref inputs "guile-json"))
8741 (redis (assoc-ref inputs "guile-redis"))
8742 (path (string-append
8743 json "/share/guile/site/2.2:"
8744 redis "/share/guile/site/2.2")))
8745 (wrap-program (string-append out "/bin/rcas-web")
8746 `("GUILE_LOAD_PATH" ":" = (,path))
8747 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8748 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8749 #t)))))
8750 (inputs
8751 `(("r-minimal" ,r-minimal)
8752 ("r-rcas" ,r-rcas)
8753 ("guile" ,guile-2.2)
8754 ("guile-json" ,guile-json-1)
8755 ("guile-redis" ,guile2.2-redis)))
8756 (native-inputs
8757 `(("pkg-config" ,pkg-config)))
8758 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8759 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8760 (description "This package provides a simple web interface for the
8761 @dfn{RNA-centric annotation system} (RCAS).")
8762 (license license:agpl3+)))
8763
8764 (define-public r-mutationalpatterns
8765 (package
8766 (name "r-mutationalpatterns")
8767 (version "2.0.0")
8768 (source
8769 (origin
8770 (method url-fetch)
8771 (uri (bioconductor-uri "MutationalPatterns" version))
8772 (sha256
8773 (base32
8774 "02lyjiabyhmifycksvpcx29a0pb7z9xjw0hgg8n0sd0dy3afqhcm"))))
8775 (build-system r-build-system)
8776 (propagated-inputs
8777 `(("r-biocgenerics" ,r-biocgenerics)
8778 ("r-biostrings" ,r-biostrings)
8779 ;; These two packages are suggested packages
8780 ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
8781 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8782 ("r-genomicranges" ,r-genomicranges)
8783 ("r-genomeinfodb" ,r-genomeinfodb)
8784 ("r-ggplot2" ,r-ggplot2)
8785 ("r-iranges" ,r-iranges)
8786 ("r-nmf" ,r-nmf)
8787 ("r-plyr" ,r-plyr)
8788 ("r-pracma" ,r-pracma)
8789 ("r-reshape2" ,r-reshape2)
8790 ("r-cowplot" ,r-cowplot)
8791 ("r-ggdendro" ,r-ggdendro)
8792 ("r-s4vectors" ,r-s4vectors)
8793 ("r-summarizedexperiment" ,r-summarizedexperiment)
8794 ("r-variantannotation" ,r-variantannotation)))
8795 (home-page "https://bioconductor.org/packages/MutationalPatterns/")
8796 (synopsis "Extract and visualize mutational patterns in genomic data")
8797 (description "This package provides an extensive toolset for the
8798 characterization and visualization of a wide range of mutational patterns
8799 in SNV base substitution data.")
8800 (license license:expat)))
8801
8802 (define-public r-chipkernels
8803 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8804 (revision "1"))
8805 (package
8806 (name "r-chipkernels")
8807 (version (string-append "1.1-" revision "." (string-take commit 9)))
8808 (source
8809 (origin
8810 (method git-fetch)
8811 (uri (git-reference
8812 (url "https://github.com/ManuSetty/ChIPKernels")
8813 (commit commit)))
8814 (file-name (string-append name "-" version))
8815 (sha256
8816 (base32
8817 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8818 (build-system r-build-system)
8819 (propagated-inputs
8820 `(("r-iranges" ,r-iranges)
8821 ("r-xvector" ,r-xvector)
8822 ("r-biostrings" ,r-biostrings)
8823 ("r-bsgenome" ,r-bsgenome)
8824 ("r-gtools" ,r-gtools)
8825 ("r-genomicranges" ,r-genomicranges)
8826 ("r-sfsmisc" ,r-sfsmisc)
8827 ("r-kernlab" ,r-kernlab)
8828 ("r-s4vectors" ,r-s4vectors)
8829 ("r-biocgenerics" ,r-biocgenerics)))
8830 (home-page "https://github.com/ManuSetty/ChIPKernels")
8831 (synopsis "Build string kernels for DNA Sequence analysis")
8832 (description "ChIPKernels is an R package for building different string
8833 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8834 must be built and this dictionary can be used for determining kernels for DNA
8835 Sequences.")
8836 (license license:gpl2+))))
8837
8838 (define-public r-seqgl
8839 (package
8840 (name "r-seqgl")
8841 (version "1.1.4")
8842 (source
8843 (origin
8844 (method git-fetch)
8845 (uri (git-reference
8846 (url "https://github.com/ManuSetty/SeqGL")
8847 (commit version)))
8848 (file-name (git-file-name name version))
8849 (sha256
8850 (base32
8851 "1r6ywvhxl3ffv48lgj7sbd582mcc6dha3ksgc2qjlvjrnkbj3799"))))
8852 (build-system r-build-system)
8853 (propagated-inputs
8854 `(("r-biostrings" ,r-biostrings)
8855 ("r-chipkernels" ,r-chipkernels)
8856 ("r-genomicranges" ,r-genomicranges)
8857 ("r-spams" ,r-spams)
8858 ("r-wgcna" ,r-wgcna)
8859 ("r-fastcluster" ,r-fastcluster)))
8860 (home-page "https://github.com/ManuSetty/SeqGL")
8861 (synopsis "Group lasso for Dnase/ChIP-seq data")
8862 (description "SeqGL is a group lasso based algorithm to extract
8863 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8864 This package presents a method which uses group lasso to discriminate between
8865 bound and non bound genomic regions to accurately identify transcription
8866 factors bound at the specific regions.")
8867 (license license:gpl2+)))
8868
8869 (define-public r-tximport
8870 (package
8871 (name "r-tximport")
8872 (version "1.16.1")
8873 (source (origin
8874 (method url-fetch)
8875 (uri (bioconductor-uri "tximport" version))
8876 (sha256
8877 (base32
8878 "1x9959lkjl2h869rgd1b30q1idxzjkr1fyqbpndqk3kbi4q2gr40"))))
8879 (build-system r-build-system)
8880 (native-inputs
8881 `(("r-knitr" ,r-knitr)))
8882 (home-page "https://bioconductor.org/packages/tximport")
8883 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
8884 (description
8885 "This package provides tools to import transcript-level abundance,
8886 estimated counts and transcript lengths, and to summarize them into matrices
8887 for use with downstream gene-level analysis packages. Average transcript
8888 length, weighted by sample-specific transcript abundance estimates, is
8889 provided as a matrix which can be used as an offset for different expression
8890 of gene-level counts.")
8891 (license license:gpl2+)))
8892
8893 (define-public r-rhdf5
8894 (package
8895 (name "r-rhdf5")
8896 (version "2.32.1")
8897 (source (origin
8898 (method url-fetch)
8899 (uri (bioconductor-uri "rhdf5" version))
8900 (sha256
8901 (base32
8902 "102zam2j43jwgaz9ch6y2jjbc3qf56ngmggikf99s8l3w9ggbskm"))))
8903 (build-system r-build-system)
8904 (propagated-inputs
8905 `(("r-rhdf5lib" ,r-rhdf5lib)))
8906 (inputs
8907 `(("zlib" ,zlib)))
8908 (native-inputs
8909 `(("r-knitr" ,r-knitr)))
8910 (home-page "https://bioconductor.org/packages/rhdf5")
8911 (synopsis "HDF5 interface to R")
8912 (description
8913 "This R/Bioconductor package provides an interface between HDF5 and R.
8914 HDF5's main features are the ability to store and access very large and/or
8915 complex datasets and a wide variety of metadata on mass storage (disk) through
8916 a completely portable file format. The rhdf5 package is thus suited for the
8917 exchange of large and/or complex datasets between R and other software
8918 package, and for letting R applications work on datasets that are larger than
8919 the available RAM.")
8920 (license license:artistic2.0)))
8921
8922 (define-public r-annotationfilter
8923 (package
8924 (name "r-annotationfilter")
8925 (version "1.12.0")
8926 (source (origin
8927 (method url-fetch)
8928 (uri (bioconductor-uri "AnnotationFilter" version))
8929 (sha256
8930 (base32
8931 "18kh1xrhpwb48s1qj4f1v8af3jmw49pnbp5afi2myn9894hxg0cs"))))
8932 (properties
8933 `((upstream-name . "AnnotationFilter")))
8934 (build-system r-build-system)
8935 (propagated-inputs
8936 `(("r-genomicranges" ,r-genomicranges)
8937 ("r-lazyeval" ,r-lazyeval)))
8938 (native-inputs
8939 `(("r-knitr" ,r-knitr)))
8940 (home-page "https://github.com/Bioconductor/AnnotationFilter")
8941 (synopsis "Facilities for filtering Bioconductor annotation resources")
8942 (description
8943 "This package provides classes and other infrastructure to implement
8944 filters for manipulating Bioconductor annotation resources. The filters are
8945 used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
8946 (license license:artistic2.0)))
8947
8948 (define-public emboss
8949 (package
8950 (name "emboss")
8951 (version "6.5.7")
8952 (source (origin
8953 (method url-fetch)
8954 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8955 (version-major+minor version) ".0/"
8956 "EMBOSS-" version ".tar.gz"))
8957 (sha256
8958 (base32
8959 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8960 (build-system gnu-build-system)
8961 (arguments
8962 `(#:configure-flags
8963 (list (string-append "--with-hpdf="
8964 (assoc-ref %build-inputs "libharu")))
8965 #:phases
8966 (modify-phases %standard-phases
8967 (add-after 'unpack 'fix-checks
8968 (lambda _
8969 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8970 ;; and zlib, but assume that they are all found at the same
8971 ;; prefix.
8972 (substitute* "configure.in"
8973 (("CHECK_PNGDRIVER")
8974 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8975 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8976 AM_CONDITIONAL(AMPNG, true)"))
8977 #t))
8978 (add-after 'fix-checks 'disable-update-check
8979 (lambda _
8980 ;; At build time there is no connection to the Internet, so
8981 ;; looking for updates will not work.
8982 (substitute* "Makefile.am"
8983 (("\\$\\(bindir\\)/embossupdate") ""))
8984 #t))
8985 (add-after 'disable-update-check 'autogen
8986 (lambda _ (invoke "autoreconf" "-vif") #t)))))
8987 (inputs
8988 `(("perl" ,perl)
8989 ("libpng" ,libpng)
8990 ("gd" ,gd)
8991 ("libx11" ,libx11)
8992 ("libharu" ,libharu)
8993 ("zlib" ,zlib)))
8994 (native-inputs
8995 `(("autoconf" ,autoconf)
8996 ("automake" ,automake)
8997 ("libtool" ,libtool)
8998 ("pkg-config" ,pkg-config)))
8999 (home-page "http://emboss.sourceforge.net")
9000 (synopsis "Molecular biology analysis suite")
9001 (description "EMBOSS is the \"European Molecular Biology Open Software
9002 Suite\". EMBOSS is an analysis package specially developed for the needs of
9003 the molecular biology (e.g. EMBnet) user community. The software
9004 automatically copes with data in a variety of formats and even allows
9005 transparent retrieval of sequence data from the web. It also provides a
9006 number of libraries for the development of software in the field of molecular
9007 biology. EMBOSS also integrates a range of currently available packages and
9008 tools for sequence analysis into a seamless whole.")
9009 (license license:gpl2+)))
9010
9011 (define-public bits
9012 (let ((revision "1")
9013 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
9014 (package
9015 (name "bits")
9016 ;; The version is 2.13.0 even though no release archives have been
9017 ;; published as yet.
9018 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
9019 (source (origin
9020 (method git-fetch)
9021 (uri (git-reference
9022 (url "https://github.com/arq5x/bits")
9023 (commit commit)))
9024 (file-name (string-append name "-" version "-checkout"))
9025 (sha256
9026 (base32
9027 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
9028 (build-system gnu-build-system)
9029 (arguments
9030 `(#:tests? #f ;no tests included
9031 #:phases
9032 (modify-phases %standard-phases
9033 (delete 'configure)
9034 (add-after 'unpack 'remove-cuda
9035 (lambda _
9036 (substitute* "Makefile"
9037 ((".*_cuda") "")
9038 (("(bits_test_intersections) \\\\" _ match) match))
9039 #t))
9040 (replace 'install
9041 (lambda* (#:key outputs #:allow-other-keys)
9042 (copy-recursively
9043 "bin" (string-append (assoc-ref outputs "out") "/bin"))
9044 #t)))))
9045 (inputs
9046 `(("gsl" ,gsl)
9047 ("zlib" ,zlib)))
9048 (home-page "https://github.com/arq5x/bits")
9049 (synopsis "Implementation of binary interval search algorithm")
9050 (description "This package provides an implementation of the
9051 BITS (Binary Interval Search) algorithm, an approach to interval set
9052 intersection. It is especially suited for the comparison of diverse genomic
9053 datasets and the exploration of large datasets of genome
9054 intervals (e.g. genes, sequence alignments).")
9055 (license license:gpl2))))
9056
9057 (define-public piranha
9058 ;; There is no release tarball for the latest version. The latest commit is
9059 ;; older than one year at the time of this writing.
9060 (let ((revision "1")
9061 (commit "0466d364b71117d01e4471b74c514436cc281233"))
9062 (package
9063 (name "piranha")
9064 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
9065 (source (origin
9066 (method git-fetch)
9067 (uri (git-reference
9068 (url "https://github.com/smithlabcode/piranha")
9069 (commit commit)))
9070 (file-name (git-file-name name version))
9071 (sha256
9072 (base32
9073 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
9074 (build-system gnu-build-system)
9075 (arguments
9076 `(#:test-target "test"
9077 #:phases
9078 (modify-phases %standard-phases
9079 (add-after 'unpack 'copy-smithlab-cpp
9080 (lambda* (#:key inputs #:allow-other-keys)
9081 (for-each (lambda (file)
9082 (install-file file "./src/smithlab_cpp/"))
9083 (find-files (assoc-ref inputs "smithlab-cpp")))
9084 #t))
9085 (add-after 'install 'install-to-store
9086 (lambda* (#:key outputs #:allow-other-keys)
9087 (let* ((out (assoc-ref outputs "out"))
9088 (bin (string-append out "/bin")))
9089 (for-each (lambda (file)
9090 (install-file file bin))
9091 (find-files "bin" ".*")))
9092 #t)))
9093 #:configure-flags
9094 (list (string-append "--with-bam_tools_headers="
9095 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
9096 (string-append "--with-bam_tools_library="
9097 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
9098 (inputs
9099 `(("bamtools" ,bamtools)
9100 ("samtools" ,samtools-0.1)
9101 ("gsl" ,gsl)
9102 ("smithlab-cpp"
9103 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
9104 (origin
9105 (method git-fetch)
9106 (uri (git-reference
9107 (url "https://github.com/smithlabcode/smithlab_cpp")
9108 (commit commit)))
9109 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
9110 (sha256
9111 (base32
9112 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
9113 (native-inputs
9114 `(("python" ,python-2)))
9115 (home-page "https://github.com/smithlabcode/piranha")
9116 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
9117 (description
9118 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
9119 RIP-seq experiments. It takes input in BED or BAM format and identifies
9120 regions of statistically significant read enrichment. Additional covariates
9121 may optionally be provided to further inform the peak-calling process.")
9122 (license license:gpl3+))))
9123
9124 (define-public pepr
9125 (package
9126 (name "pepr")
9127 (version "1.0.9")
9128 (source (origin
9129 (method url-fetch)
9130 (uri (pypi-uri "PePr" version))
9131 (sha256
9132 (base32
9133 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
9134 (build-system python-build-system)
9135 (arguments
9136 `(#:python ,python-2 ; python2 only
9137 #:tests? #f)) ; no tests included
9138 (propagated-inputs
9139 `(("python2-numpy" ,python2-numpy)
9140 ("python2-scipy" ,python2-scipy)
9141 ("python2-pysam" ,python2-pysam)))
9142 (home-page "https://github.com/shawnzhangyx/PePr")
9143 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
9144 (description
9145 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
9146 that is primarily designed for data with biological replicates. It uses a
9147 negative binomial distribution to model the read counts among the samples in
9148 the same group, and look for consistent differences between ChIP and control
9149 group or two ChIP groups run under different conditions.")
9150 (license license:gpl3+)))
9151
9152 (define-public filevercmp
9153 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
9154 (package
9155 (name "filevercmp")
9156 (version (string-append "0-1." (string-take commit 7)))
9157 (source (origin
9158 (method git-fetch)
9159 (uri (git-reference
9160 (url "https://github.com/ekg/filevercmp")
9161 (commit commit)))
9162 (file-name (git-file-name name commit))
9163 (sha256
9164 (base32
9165 "1j9vxsy0y050v59h0q1d6501fcw1kjvj0d18l1xk2zyg0jzj247c"))))
9166 (build-system gnu-build-system)
9167 (arguments
9168 `(#:tests? #f ; There are no tests to run.
9169 #:phases
9170 (modify-phases %standard-phases
9171 (delete 'configure) ; There is no configure phase.
9172 (replace 'install
9173 (lambda* (#:key outputs #:allow-other-keys)
9174 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
9175 (install-file "filevercmp" bin)
9176 #t))))))
9177 (home-page "https://github.com/ekg/filevercmp")
9178 (synopsis "This program compares version strings")
9179 (description "This program compares version strings. It intends to be a
9180 replacement for strverscmp.")
9181 (license license:gpl3+))))
9182
9183 (define-public multiqc
9184 (package
9185 (name "multiqc")
9186 (version "1.5")
9187 (source
9188 (origin
9189 (method url-fetch)
9190 (uri (pypi-uri "multiqc" version))
9191 (sha256
9192 (base32
9193 "02iihfl0w0hpnr4pa0sbd1y9qxrg3ycyhjp5lidkcrqh1lmzs3zy"))))
9194 (build-system python-build-system)
9195 (propagated-inputs
9196 `(("python-jinja2" ,python-jinja2)
9197 ("python-simplejson" ,python-simplejson)
9198 ("python-pyyaml" ,python-pyyaml)
9199 ("python-click" ,python-click)
9200 ("python-spectra" ,python-spectra)
9201 ("python-requests" ,python-requests)
9202 ("python-markdown" ,python-markdown)
9203 ("python-lzstring" ,python-lzstring)
9204 ("python-matplotlib" ,python-matplotlib)
9205 ("python-numpy" ,python-numpy)
9206 ;; MultQC checks for the presence of nose at runtime.
9207 ("python-nose" ,python-nose)))
9208 (arguments
9209 `(#:phases
9210 (modify-phases %standard-phases
9211 (add-after 'unpack 'relax-requirements
9212 (lambda _
9213 (substitute* "setup.py"
9214 ;; MultiQC 1.5 ‘requires’ a version of python-matplotlib older
9215 ;; than the one in Guix, but should work fine with 2.2.2.
9216 ;; See <https://github.com/ewels/MultiQC/issues/725> and
9217 ;; <https://github.com/ewels/MultiQC/issues/732> for details.
9218 (("['\"]matplotlib.*?['\"]")
9219 "'matplotlib'"))
9220 #t)))))
9221 (home-page "https://multiqc.info")
9222 (synopsis "Aggregate bioinformatics analysis reports")
9223 (description
9224 "MultiQC is a tool to aggregate bioinformatics results across many
9225 samples into a single report. It contains modules for a large number of
9226 common bioinformatics tools.")
9227 (license license:gpl3+)))
9228
9229 (define-public variant-tools
9230 (package
9231 (name "variant-tools")
9232 (version "3.1.2")
9233 (source
9234 (origin
9235 (method git-fetch)
9236 (uri (git-reference
9237 (url "https://github.com/vatlab/varianttools")
9238 ;; There is no tag corresponding to version 3.1.2
9239 (commit "813ae4a90d25b69abc8a40f4f70441fe09015249")))
9240 (file-name (git-file-name name version))
9241 (sha256
9242 (base32
9243 "12ibdmksj7icyqhks4xyvd61bygk4pjmxn618kp6vgk1af01y34g"))))
9244 (build-system python-build-system)
9245 (inputs
9246 `(("boost" ,boost)
9247 ("c-blosc" ,c-blosc)
9248 ("gsl" ,gsl)
9249 ("hdf5" ,hdf5)
9250 ("hdf5-blosc" ,hdf5-blosc)
9251 ("python-cython" ,python-cython)
9252 ("zlib" ,zlib)))
9253 (propagated-inputs
9254 `(("python-numpy" ,python-numpy)
9255 ("python-pycurl" ,python-pycurl)
9256 ("python-pyzmq" ,python-pyzmq)
9257 ("python-scipy" ,python-scipy)
9258 ("python-tables" ,python-tables)))
9259 (home-page "https://vatlab.github.io/vat-docs/")
9260 (synopsis "Analyze genetic variants from Next-Gen sequencing studies")
9261 (description
9262 "Variant tools is a tool for the manipulation, annotation,
9263 selection, simulation, and analysis of variants in the context of next-gen
9264 sequencing analysis. Unlike some other tools used for next-gen sequencing
9265 analysis, variant tools is project based and provides a whole set of tools to
9266 manipulate and analyze genetic variants.")
9267 (license license:gpl3+)))
9268
9269 (define-public r-chipseq
9270 (package
9271 (name "r-chipseq")
9272 (version "1.38.0")
9273 (source
9274 (origin
9275 (method url-fetch)
9276 (uri (bioconductor-uri "chipseq" version))
9277 (sha256
9278 (base32
9279 "0lh859s0aq73vac1phcgagf6n000qgq2xsk0bmfr61n5swifml2a"))))
9280 (build-system r-build-system)
9281 (propagated-inputs
9282 `(("r-biocgenerics" ,r-biocgenerics)
9283 ("r-genomicranges" ,r-genomicranges)
9284 ("r-iranges" ,r-iranges)
9285 ("r-lattice" ,r-lattice)
9286 ("r-s4vectors" ,r-s4vectors)
9287 ("r-shortread" ,r-shortread)))
9288 (home-page "https://bioconductor.org/packages/chipseq")
9289 (synopsis "Package for analyzing ChIPseq data")
9290 (description
9291 "This package provides tools for processing short read data from ChIPseq
9292 experiments.")
9293 (license license:artistic2.0)))
9294
9295 (define-public r-copyhelper
9296 (package
9297 (name "r-copyhelper")
9298 (version "1.6.0")
9299 (source
9300 (origin
9301 (method url-fetch)
9302 (uri (string-append "https://bioconductor.org/packages/release/"
9303 "data/experiment/src/contrib/CopyhelpeR_"
9304 version ".tar.gz"))
9305 (sha256
9306 (base32
9307 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
9308 (properties `((upstream-name . "CopyhelpeR")))
9309 (build-system r-build-system)
9310 (home-page "https://bioconductor.org/packages/CopyhelpeR/")
9311 (synopsis "Helper files for CopywriteR")
9312 (description
9313 "This package contains the helper files that are required to run the
9314 Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
9315 and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
9316 mm10. In addition, it contains a blacklist filter to remove regions that
9317 display copy number variation. Files are stored as GRanges objects from the
9318 GenomicRanges Bioconductor package.")
9319 (license license:gpl2)))
9320
9321 (define-public r-copywriter
9322 (package
9323 (name "r-copywriter")
9324 (version "2.20.0")
9325 (source
9326 (origin
9327 (method url-fetch)
9328 (uri (bioconductor-uri "CopywriteR" version))
9329 (sha256
9330 (base32
9331 "0c36wpv0rygkbqpf3dwh5xmc3lr7p8lrdzsq2fbbpw04skl6i7m2"))))
9332 (properties `((upstream-name . "CopywriteR")))
9333 (build-system r-build-system)
9334 (propagated-inputs
9335 `(("r-biocparallel" ,r-biocparallel)
9336 ("r-chipseq" ,r-chipseq)
9337 ("r-copyhelper" ,r-copyhelper)
9338 ("r-data-table" ,r-data-table)
9339 ("r-dnacopy" ,r-dnacopy)
9340 ("r-futile-logger" ,r-futile-logger)
9341 ("r-genomeinfodb" ,r-genomeinfodb)
9342 ("r-genomicalignments" ,r-genomicalignments)
9343 ("r-genomicranges" ,r-genomicranges)
9344 ("r-gtools" ,r-gtools)
9345 ("r-iranges" ,r-iranges)
9346 ("r-matrixstats" ,r-matrixstats)
9347 ("r-rsamtools" ,r-rsamtools)
9348 ("r-s4vectors" ,r-s4vectors)))
9349 (home-page "https://github.com/PeeperLab/CopywriteR")
9350 (synopsis "Copy number information from targeted sequencing")
9351 (description
9352 "CopywriteR extracts DNA copy number information from targeted sequencing
9353 by utilizing off-target reads. It allows for extracting uniformly distributed
9354 copy number information, can be used without reference, and can be applied to
9355 sequencing data obtained from various techniques including chromatin
9356 immunoprecipitation and target enrichment on small gene panels. Thereby,
9357 CopywriteR constitutes a widely applicable alternative to available copy
9358 number detection tools.")
9359 (license license:gpl2)))
9360
9361 (define-public r-methylkit
9362 (package
9363 (name "r-methylkit")
9364 (version "1.14.2")
9365 (source (origin
9366 (method url-fetch)
9367 (uri (bioconductor-uri "methylKit" version))
9368 (sha256
9369 (base32
9370 "1qr13d2712ypbn96ijic2z5adr5dsd61kzscx7shw6vyj360rlm5"))))
9371 (properties `((upstream-name . "methylKit")))
9372 (build-system r-build-system)
9373 (propagated-inputs
9374 `(("r-data-table" ,r-data-table)
9375 ("r-emdbook" ,r-emdbook)
9376 ("r-fastseg" ,r-fastseg)
9377 ("r-genomeinfodb" ,r-genomeinfodb)
9378 ("r-genomicranges" ,r-genomicranges)
9379 ("r-gtools" ,r-gtools)
9380 ("r-iranges" ,r-iranges)
9381 ("r-kernsmooth" ,r-kernsmooth)
9382 ("r-limma" ,r-limma)
9383 ("r-mclust" ,r-mclust)
9384 ("r-mgcv" ,r-mgcv)
9385 ("r-qvalue" ,r-qvalue)
9386 ("r-r-utils" ,r-r-utils)
9387 ("r-rcpp" ,r-rcpp)
9388 ("r-rhtslib" ,r-rhtslib)
9389 ("r-rsamtools" ,r-rsamtools)
9390 ("r-rtracklayer" ,r-rtracklayer)
9391 ("r-s4vectors" ,r-s4vectors)
9392 ("r-zlibbioc" ,r-zlibbioc)))
9393 (native-inputs
9394 `(("r-knitr" ,r-knitr))) ; for vignettes
9395 (inputs
9396 `(("zlib" ,zlib)))
9397 (home-page "https://github.com/al2na/methylKit")
9398 (synopsis
9399 "DNA methylation analysis from high-throughput bisulfite sequencing results")
9400 (description
9401 "MethylKit is an R package for DNA methylation analysis and annotation
9402 from high-throughput bisulfite sequencing. The package is designed to deal
9403 with sequencing data from @dfn{Reduced representation bisulfite
9404 sequencing} (RRBS) and its variants, but also target-capture methods and whole
9405 genome bisulfite sequencing. It also has functions to analyze base-pair
9406 resolution 5hmC data from experimental protocols such as oxBS-Seq and
9407 TAB-Seq.")
9408 (license license:artistic2.0)))
9409
9410 (define-public r-sva
9411 (package
9412 (name "r-sva")
9413 (version "3.36.0")
9414 (source
9415 (origin
9416 (method url-fetch)
9417 (uri (bioconductor-uri "sva" version))
9418 (sha256
9419 (base32
9420 "0xa1lm0k1a6nig90mab6xh4gln88rbs5l1cdr6ik6agg7jhs7ji4"))))
9421 (build-system r-build-system)
9422 (propagated-inputs
9423 `(("r-edger" ,r-edger)
9424 ("r-genefilter" ,r-genefilter)
9425 ("r-mgcv" ,r-mgcv)
9426 ("r-biocparallel" ,r-biocparallel)
9427 ("r-matrixstats" ,r-matrixstats)
9428 ("r-limma" ,r-limma)))
9429 (home-page "https://bioconductor.org/packages/sva")
9430 (synopsis "Surrogate variable analysis")
9431 (description
9432 "This package contains functions for removing batch effects and other
9433 unwanted variation in high-throughput experiment. It also contains functions
9434 for identifying and building surrogate variables for high-dimensional data
9435 sets. Surrogate variables are covariates constructed directly from
9436 high-dimensional data like gene expression/RNA sequencing/methylation/brain
9437 imaging data that can be used in subsequent analyses to adjust for unknown,
9438 unmodeled, or latent sources of noise.")
9439 (license license:artistic2.0)))
9440
9441 (define-public r-seqminer
9442 (package
9443 (name "r-seqminer")
9444 (version "8.0")
9445 (source
9446 (origin
9447 (method url-fetch)
9448 (uri (cran-uri "seqminer" version))
9449 (sha256
9450 (base32
9451 "00jzj8mwb0zaiwlifd41b26mrq9mzigj18nc29dydi0r42hxg16i"))))
9452 (build-system r-build-system)
9453 (inputs
9454 `(("zlib" ,zlib)))
9455 (home-page "http://seqminer.genomic.codes")
9456 (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
9457 (description
9458 "This package provides tools to integrate nucleotide sequencing
9459 data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
9460 ;; Any version of the GPL is acceptable
9461 (license (list license:gpl2+ license:gpl3+))))
9462
9463 (define-public r-raremetals2
9464 (package
9465 (name "r-raremetals2")
9466 (version "0.1")
9467 (source
9468 (origin
9469 (method url-fetch)
9470 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9471 "b/b7/RareMETALS2_" version ".tar.gz"))
9472 (sha256
9473 (base32
9474 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9475 (properties `((upstream-name . "RareMETALS2")))
9476 (build-system r-build-system)
9477 (propagated-inputs
9478 `(("r-seqminer" ,r-seqminer)
9479 ("r-mvtnorm" ,r-mvtnorm)
9480 ("r-mass" ,r-mass)
9481 ("r-compquadform" ,r-compquadform)
9482 ("r-getopt" ,r-getopt)))
9483 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9484 (synopsis "Analyze gene-level association tests for binary trait")
9485 (description
9486 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9487 It was designed to meta-analyze gene-level association tests for binary trait.
9488 While rareMETALS offers a near-complete solution for meta-analysis of
9489 gene-level tests for quantitative trait, it does not offer the optimal
9490 solution for binary trait. The package rareMETALS2 offers improved features
9491 for analyzing gene-level association tests in meta-analyses for binary
9492 trait.")
9493 (license license:gpl3)))
9494
9495 (define-public r-maldiquant
9496 (package
9497 (name "r-maldiquant")
9498 (version "1.19.3")
9499 (source
9500 (origin
9501 (method url-fetch)
9502 (uri (cran-uri "MALDIquant" version))
9503 (sha256
9504 (base32
9505 "0b7kdz3x4sdq413h1q09l1qhcvdnnwv6fqsqwllks1cd3xy34c57"))))
9506 (properties `((upstream-name . "MALDIquant")))
9507 (build-system r-build-system)
9508 (home-page "https://cran.r-project.org/web/packages/MALDIquant")
9509 (synopsis "Quantitative analysis of mass spectrometry data")
9510 (description
9511 "This package provides a complete analysis pipeline for matrix-assisted
9512 laser desorption/ionization-time-of-flight (MALDI-TOF) and other
9513 two-dimensional mass spectrometry data. In addition to commonly used plotting
9514 and processing methods it includes distinctive features, namely baseline
9515 subtraction methods such as morphological filters (TopHat) or the
9516 statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
9517 alignment using warping functions, handling of replicated measurements as well
9518 as allowing spectra with different resolutions.")
9519 (license license:gpl3+)))
9520
9521 (define-public r-protgenerics
9522 (package
9523 (name "r-protgenerics")
9524 (version "1.20.0")
9525 (source
9526 (origin
9527 (method url-fetch)
9528 (uri (bioconductor-uri "ProtGenerics" version))
9529 (sha256
9530 (base32
9531 "14xzdh7vxss8vmrw91hcwrszdn3ikm71mah8875b2lkrkrfzbl73"))))
9532 (properties `((upstream-name . "ProtGenerics")))
9533 (build-system r-build-system)
9534 (home-page "https://github.com/lgatto/ProtGenerics")
9535 (synopsis "S4 generic functions for proteomics infrastructure")
9536 (description
9537 "This package provides S4 generic functions needed by Bioconductor
9538 proteomics packages.")
9539 (license license:artistic2.0)))
9540
9541 (define-public r-mzr
9542 (package
9543 (name "r-mzr")
9544 (version "2.22.0")
9545 (source
9546 (origin
9547 (method url-fetch)
9548 (uri (bioconductor-uri "mzR" version))
9549 (sha256
9550 (base32
9551 "1r8j8yiz5lcan7j4h37sza2kwczl48dxvld3da3ghjjq67cdc2cm"))
9552 (modules '((guix build utils)))
9553 (snippet
9554 '(begin
9555 (delete-file-recursively "src/boost")
9556 #t))))
9557 (properties `((upstream-name . "mzR")))
9558 (build-system r-build-system)
9559 (arguments
9560 `(#:phases
9561 (modify-phases %standard-phases
9562 (add-after 'unpack 'use-system-boost
9563 (lambda _
9564 (substitute* "src/Makevars"
9565 (("\\./boost/libs.*") "")
9566 (("ARCH_OBJS=" line)
9567 (string-append line
9568 "\nARCH_LIBS=-lboost_system -lboost_regex \
9569 -lboost_iostreams -lboost_thread -lboost_filesystem -lboost_chrono\n")))
9570 #t)))))
9571 (inputs
9572 `(;; Our default boost package won't work here, unfortunately, even with
9573 ;; mzR version 2.22.0.
9574 ("boost" ,boost-for-mysql) ; use this instead of the bundled boost sources
9575 ("zlib" ,zlib)))
9576 (propagated-inputs
9577 `(("r-biobase" ,r-biobase)
9578 ("r-biocgenerics" ,r-biocgenerics)
9579 ("r-ncdf4" ,r-ncdf4)
9580 ("r-protgenerics" ,r-protgenerics)
9581 ("r-rcpp" ,r-rcpp)
9582 ("r-rhdf5lib" ,r-rhdf5lib)
9583 ("r-zlibbioc" ,r-zlibbioc)))
9584 (native-inputs
9585 `(("r-knitr" ,r-knitr)))
9586 (home-page "https://github.com/sneumann/mzR/")
9587 (synopsis "Parser for mass spectrometry data files")
9588 (description
9589 "The mzR package provides a unified API to the common file formats and
9590 parsers available for mass spectrometry data. It comes with a wrapper for the
9591 ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
9592 The package contains the original code written by the ISB, and a subset of the
9593 proteowizard library for mzML and mzIdentML. The netCDF reading code has
9594 previously been used in XCMS.")
9595 (license license:artistic2.0)))
9596
9597 (define-public r-affyio
9598 (package
9599 (name "r-affyio")
9600 (version "1.58.0")
9601 (source
9602 (origin
9603 (method url-fetch)
9604 (uri (bioconductor-uri "affyio" version))
9605 (sha256
9606 (base32
9607 "0j1f61409yq6hmkqrpzamfm7dx35rlq33ccs7wb1qcqx3d3nb75q"))))
9608 (build-system r-build-system)
9609 (propagated-inputs
9610 `(("r-zlibbioc" ,r-zlibbioc)))
9611 (inputs
9612 `(("zlib" ,zlib)))
9613 (home-page "https://github.com/bmbolstad/affyio")
9614 (synopsis "Tools for parsing Affymetrix data files")
9615 (description
9616 "This package provides routines for parsing Affymetrix data files based
9617 upon file format information. The primary focus is on accessing the CEL and
9618 CDF file formats.")
9619 (license license:lgpl2.0+)))
9620
9621 (define-public r-affy
9622 (package
9623 (name "r-affy")
9624 (version "1.66.0")
9625 (source
9626 (origin
9627 (method url-fetch)
9628 (uri (bioconductor-uri "affy" version))
9629 (sha256
9630 (base32
9631 "0m6hkyjxmsf80n3anhwh9k26csxczv6v92fkb7klnchdski61pyc"))))
9632 (build-system r-build-system)
9633 (propagated-inputs
9634 `(("r-affyio" ,r-affyio)
9635 ("r-biobase" ,r-biobase)
9636 ("r-biocgenerics" ,r-biocgenerics)
9637 ("r-biocmanager" ,r-biocmanager)
9638 ("r-preprocesscore" ,r-preprocesscore)
9639 ("r-zlibbioc" ,r-zlibbioc)))
9640 (inputs
9641 `(("zlib" ,zlib)))
9642 (home-page "https://bioconductor.org/packages/affy")
9643 (synopsis "Methods for affymetrix oligonucleotide arrays")
9644 (description
9645 "This package contains functions for exploratory oligonucleotide array
9646 analysis.")
9647 (license license:lgpl2.0+)))
9648
9649 (define-public r-vsn
9650 (package
9651 (name "r-vsn")
9652 (version "3.56.0")
9653 (source
9654 (origin
9655 (method url-fetch)
9656 (uri (bioconductor-uri "vsn" version))
9657 (sha256
9658 (base32
9659 "1k82dikrv1gcync5y1131wg7z1kxv2z2jl4nndg20bixc3398h58"))))
9660 (build-system r-build-system)
9661 (propagated-inputs
9662 `(("r-affy" ,r-affy)
9663 ("r-biobase" ,r-biobase)
9664 ("r-ggplot2" ,r-ggplot2)
9665 ("r-lattice" ,r-lattice)
9666 ("r-limma" ,r-limma)))
9667 (native-inputs
9668 `(("r-knitr" ,r-knitr))) ; for vignettes
9669 (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
9670 (synopsis "Variance stabilization and calibration for microarray data")
9671 (description
9672 "The package implements a method for normalising microarray intensities,
9673 and works for single- and multiple-color arrays. It can also be used for data
9674 from other technologies, as long as they have similar format. The method uses
9675 a robust variant of the maximum-likelihood estimator for an
9676 additive-multiplicative error model and affine calibration. The model
9677 incorporates data calibration step (a.k.a. normalization), a model for the
9678 dependence of the variance on the mean intensity and a variance stabilizing
9679 data transformation. Differences between transformed intensities are
9680 analogous to \"normalized log-ratios\". However, in contrast to the latter,
9681 their variance is independent of the mean, and they are usually more sensitive
9682 and specific in detecting differential transcription.")
9683 (license license:artistic2.0)))
9684
9685 (define-public r-mzid
9686 (package
9687 (name "r-mzid")
9688 (version "1.26.0")
9689 (source
9690 (origin
9691 (method url-fetch)
9692 (uri (bioconductor-uri "mzID" version))
9693 (sha256
9694 (base32
9695 "0y50lzkdamkpz67f6r5whp246qsxpbammjil7g8vjprx0c4jk5n5"))))
9696 (properties `((upstream-name . "mzID")))
9697 (build-system r-build-system)
9698 (propagated-inputs
9699 `(("r-doparallel" ,r-doparallel)
9700 ("r-foreach" ,r-foreach)
9701 ("r-iterators" ,r-iterators)
9702 ("r-plyr" ,r-plyr)
9703 ("r-protgenerics" ,r-protgenerics)
9704 ("r-rcpp" ,r-rcpp)
9705 ("r-xml" ,r-xml)))
9706 (native-inputs
9707 `(("r-knitr" ,r-knitr)))
9708 (home-page "https://bioconductor.org/packages/mzID")
9709 (synopsis "Parser for mzIdentML files")
9710 (description
9711 "This package provides a parser for mzIdentML files implemented using the
9712 XML package. The parser tries to be general and able to handle all types of
9713 mzIdentML files with the drawback of having less pretty output than a vendor
9714 specific parser.")
9715 (license license:gpl2+)))
9716
9717 (define-public r-pcamethods
9718 (package
9719 (name "r-pcamethods")
9720 (version "1.80.0")
9721 (source
9722 (origin
9723 (method url-fetch)
9724 (uri (bioconductor-uri "pcaMethods" version))
9725 (sha256
9726 (base32
9727 "10cww4jxyynkwxbbsx804nwac31j0hh8dgisygld0q663gaxkgni"))))
9728 (properties `((upstream-name . "pcaMethods")))
9729 (build-system r-build-system)
9730 (propagated-inputs
9731 `(("r-biobase" ,r-biobase)
9732 ("r-biocgenerics" ,r-biocgenerics)
9733 ("r-mass" ,r-mass)
9734 ("r-rcpp" ,r-rcpp)))
9735 (home-page "https://github.com/hredestig/pcamethods")
9736 (synopsis "Collection of PCA methods")
9737 (description
9738 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
9739 Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
9740 for missing value estimation is included for comparison. BPCA, PPCA and
9741 NipalsPCA may be used to perform PCA on incomplete data as well as for
9742 accurate missing value estimation. A set of methods for printing and plotting
9743 the results is also provided. All PCA methods make use of the same data
9744 structure (pcaRes) to provide a common interface to the PCA results.")
9745 (license license:gpl3+)))
9746
9747 (define-public r-msnbase
9748 (package
9749 (name "r-msnbase")
9750 (version "2.14.2")
9751 (source
9752 (origin
9753 (method url-fetch)
9754 (uri (bioconductor-uri "MSnbase" version))
9755 (sha256
9756 (base32
9757 "17vlv9gh41s1hp043b7j1jfqiw52alh1misjzy1kxl0g90rld00l"))))
9758 (properties `((upstream-name . "MSnbase")))
9759 (build-system r-build-system)
9760 (propagated-inputs
9761 `(("r-affy" ,r-affy)
9762 ("r-biobase" ,r-biobase)
9763 ("r-biocgenerics" ,r-biocgenerics)
9764 ("r-biocparallel" ,r-biocparallel)
9765 ("r-digest" ,r-digest)
9766 ("r-ggplot2" ,r-ggplot2)
9767 ("r-impute" ,r-impute)
9768 ("r-iranges" ,r-iranges)
9769 ("r-lattice" ,r-lattice)
9770 ("r-maldiquant" ,r-maldiquant)
9771 ("r-mass" ,r-mass)
9772 ("r-mzid" ,r-mzid)
9773 ("r-mzr" ,r-mzr)
9774 ("r-pcamethods" ,r-pcamethods)
9775 ("r-plyr" ,r-plyr)
9776 ("r-preprocesscore" ,r-preprocesscore)
9777 ("r-protgenerics" ,r-protgenerics)
9778 ("r-rcpp" ,r-rcpp)
9779 ("r-s4vectors" ,r-s4vectors)
9780 ("r-scales" ,r-scales)
9781 ("r-vsn" ,r-vsn)
9782 ("r-xml" ,r-xml)))
9783 (native-inputs
9784 `(("r-knitr" ,r-knitr)))
9785 (home-page "https://github.com/lgatto/MSnbase")
9786 (synopsis "Base functions and classes for MS-based proteomics")
9787 (description
9788 "This package provides basic plotting, data manipulation and processing
9789 of mass spectrometry based proteomics data.")
9790 (license license:artistic2.0)))
9791
9792 (define-public r-msnid
9793 (package
9794 (name "r-msnid")
9795 (version "1.22.0")
9796 (source
9797 (origin
9798 (method url-fetch)
9799 (uri (bioconductor-uri "MSnID" version))
9800 (sha256
9801 (base32
9802 "0dwa6j2nqb3223a8g4f453aznjh69wngrpvdi12iy69j1psbbjcc"))))
9803 (properties `((upstream-name . "MSnID")))
9804 (build-system r-build-system)
9805 (propagated-inputs
9806 `(("r-biobase" ,r-biobase)
9807 ("r-data-table" ,r-data-table)
9808 ("r-doparallel" ,r-doparallel)
9809 ("r-dplyr" ,r-dplyr)
9810 ("r-foreach" ,r-foreach)
9811 ("r-iterators" ,r-iterators)
9812 ("r-msnbase" ,r-msnbase)
9813 ("r-mzid" ,r-mzid)
9814 ("r-mzr" ,r-mzr)
9815 ("r-protgenerics" ,r-protgenerics)
9816 ("r-r-cache" ,r-r-cache)
9817 ("r-rcpp" ,r-rcpp)
9818 ("r-reshape2" ,r-reshape2)))
9819 (home-page "https://bioconductor.org/packages/MSnID")
9820 (synopsis "Utilities for LC-MSn proteomics identifications")
9821 (description
9822 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
9823 from mzIdentML (leveraging the mzID package) or text files. After collating
9824 the search results from multiple datasets it assesses their identification
9825 quality and optimize filtering criteria to achieve the maximum number of
9826 identifications while not exceeding a specified false discovery rate. It also
9827 contains a number of utilities to explore the MS/MS results and assess missed
9828 and irregular enzymatic cleavages, mass measurement accuracy, etc.")
9829 (license license:artistic2.0)))
9830
9831 (define-public r-seurat
9832 (package
9833 (name "r-seurat")
9834 (version "3.1.5")
9835 (source (origin
9836 (method url-fetch)
9837 (uri (cran-uri "Seurat" version))
9838 (sha256
9839 (base32
9840 "1lbq2pqhb6ih6iqawlnzdh05zff71pwbw1cpfv2sld3pd7kz0zkm"))))
9841 (properties `((upstream-name . "Seurat")))
9842 (build-system r-build-system)
9843 (propagated-inputs
9844 `(("r-ape" ,r-ape)
9845 ("r-cluster" ,r-cluster)
9846 ("r-cowplot" ,r-cowplot)
9847 ("r-fitdistrplus" ,r-fitdistrplus)
9848 ("r-future" ,r-future)
9849 ("r-future-apply" ,r-future-apply)
9850 ("r-ggplot2" ,r-ggplot2)
9851 ("r-ggrepel" ,r-ggrepel)
9852 ("r-ggridges" ,r-ggridges)
9853 ("r-httr" ,r-httr)
9854 ("r-ica" ,r-ica)
9855 ("r-igraph" ,r-igraph)
9856 ("r-irlba" ,r-irlba)
9857 ("r-kernsmooth" ,r-kernsmooth)
9858 ("r-leiden" ,r-leiden)
9859 ("r-lmtest" ,r-lmtest)
9860 ("r-mass" ,r-mass)
9861 ("r-matrix" ,r-matrix)
9862 ("r-patchwork" ,r-patchwork)
9863 ("r-pbapply" ,r-pbapply)
9864 ("r-plotly" ,r-plotly)
9865 ("r-png" ,r-png)
9866 ("r-rann" ,r-rann)
9867 ("r-rcolorbrewer" ,r-rcolorbrewer)
9868 ("r-rcpp" ,r-rcpp)
9869 ("r-rcppannoy" ,r-rcppannoy)
9870 ("r-rcppeigen" ,r-rcppeigen)
9871 ("r-rcppprogress" ,r-rcppprogress)
9872 ("r-reticulate" ,r-reticulate)
9873 ("r-rlang" ,r-rlang)
9874 ("r-rocr" ,r-rocr)
9875 ("r-rsvd" ,r-rsvd)
9876 ("r-rtsne" ,r-rtsne)
9877 ("r-scales" ,r-scales)
9878 ("r-sctransform" ,r-sctransform)
9879 ("r-tsne" ,r-tsne)
9880 ("r-uwot" ,r-uwot)))
9881 (home-page "http://www.satijalab.org/seurat")
9882 (synopsis "Seurat is an R toolkit for single cell genomics")
9883 (description
9884 "This package is an R package designed for QC, analysis, and
9885 exploration of single cell RNA-seq data. It easily enables widely-used
9886 analytical techniques, including the identification of highly variable genes,
9887 dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
9888 algorithms; density clustering, hierarchical clustering, k-means, and the
9889 discovery of differentially expressed genes and markers.")
9890 (license license:gpl3)))
9891
9892 (define-public r-aroma-light
9893 (package
9894 (name "r-aroma-light")
9895 (version "3.18.0")
9896 (source
9897 (origin
9898 (method url-fetch)
9899 (uri (bioconductor-uri "aroma.light" version))
9900 (sha256
9901 (base32
9902 "19y5f2minx2pp73zdh43v1qkwpkaxygkl8cwlnwja15i46s0bcyc"))))
9903 (properties `((upstream-name . "aroma.light")))
9904 (build-system r-build-system)
9905 (propagated-inputs
9906 `(("r-matrixstats" ,r-matrixstats)
9907 ("r-r-methodss3" ,r-r-methodss3)
9908 ("r-r-oo" ,r-r-oo)
9909 ("r-r-utils" ,r-r-utils)))
9910 (home-page "https://github.com/HenrikBengtsson/aroma.light")
9911 (synopsis "Methods for normalization and visualization of microarray data")
9912 (description
9913 "This package provides methods for microarray analysis that take basic
9914 data types such as matrices and lists of vectors. These methods can be used
9915 standalone, be utilized in other packages, or be wrapped up in higher-level
9916 classes.")
9917 (license license:gpl2+)))
9918
9919 (define-public r-deseq
9920 (package
9921 (name "r-deseq")
9922 (version "1.39.0")
9923 (source
9924 (origin
9925 (method url-fetch)
9926 (uri (bioconductor-uri "DESeq" version))
9927 (sha256
9928 (base32
9929 "047hph5aqmjnz1aqprziw0smdn5lf96hmwpnvqrxv1j2yfvcf3h1"))))
9930 (properties `((upstream-name . "DESeq")))
9931 (build-system r-build-system)
9932 (propagated-inputs
9933 `(("r-biobase" ,r-biobase)
9934 ("r-biocgenerics" ,r-biocgenerics)
9935 ("r-genefilter" ,r-genefilter)
9936 ("r-geneplotter" ,r-geneplotter)
9937 ("r-lattice" ,r-lattice)
9938 ("r-locfit" ,r-locfit)
9939 ("r-mass" ,r-mass)
9940 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9941 (home-page "https://www-huber.embl.de/users/anders/DESeq/")
9942 (synopsis "Differential gene expression analysis")
9943 (description
9944 "This package provides tools for estimating variance-mean dependence in
9945 count data from high-throughput genetic sequencing assays and for testing for
9946 differential expression based on a model using the negative binomial
9947 distribution.")
9948 (license license:gpl3+)))
9949
9950 (define-public r-edaseq
9951 (package
9952 (name "r-edaseq")
9953 (version "2.22.0")
9954 (source
9955 (origin
9956 (method url-fetch)
9957 (uri (bioconductor-uri "EDASeq" version))
9958 (sha256
9959 (base32
9960 "12gzxjh73qshlwvsf92lbrf4bi199kxg2snrkprh1z4yqf7bjfm4"))))
9961 (properties `((upstream-name . "EDASeq")))
9962 (build-system r-build-system)
9963 (propagated-inputs
9964 `(("r-annotationdbi" ,r-annotationdbi)
9965 ("r-aroma-light" ,r-aroma-light)
9966 ("r-biobase" ,r-biobase)
9967 ("r-biocgenerics" ,r-biocgenerics)
9968 ("r-biocmanager" ,r-biocmanager)
9969 ("r-biomart" ,r-biomart)
9970 ("r-biostrings" ,r-biostrings)
9971 ("r-deseq" ,r-deseq)
9972 ("r-genomicfeatures" ,r-genomicfeatures)
9973 ("r-genomicranges" ,r-genomicranges)
9974 ("r-iranges" ,r-iranges)
9975 ("r-rsamtools" ,r-rsamtools)
9976 ("r-shortread" ,r-shortread)))
9977 (native-inputs
9978 `(("r-knitr" ,r-knitr)))
9979 (home-page "https://github.com/drisso/EDASeq")
9980 (synopsis "Exploratory data analysis and normalization for RNA-Seq")
9981 (description
9982 "This package provides support for numerical and graphical summaries of
9983 RNA-Seq genomic read data. Provided within-lane normalization procedures to
9984 adjust for GC-content effect (or other gene-level effects) on read counts:
9985 loess robust local regression, global-scaling, and full-quantile
9986 normalization. Between-lane normalization procedures to adjust for
9987 distributional differences between lanes (e.g., sequencing depth):
9988 global-scaling and full-quantile normalization.")
9989 (license license:artistic2.0)))
9990
9991 (define-public r-interactivedisplaybase
9992 (package
9993 (name "r-interactivedisplaybase")
9994 (version "1.26.3")
9995 (source
9996 (origin
9997 (method url-fetch)
9998 (uri (bioconductor-uri "interactiveDisplayBase" version))
9999 (sha256
10000 (base32
10001 "1x5vipqa4pgwpd62c1c58shnlpv3zyzzpf4wdwr00q1swkdb7wv3"))))
10002 (properties
10003 `((upstream-name . "interactiveDisplayBase")))
10004 (build-system r-build-system)
10005 (propagated-inputs
10006 `(("r-biocgenerics" ,r-biocgenerics)
10007 ("r-dt" ,r-dt)
10008 ("r-shiny" ,r-shiny)))
10009 (native-inputs
10010 `(("r-knitr" ,r-knitr)))
10011 (home-page "https://bioconductor.org/packages/interactiveDisplayBase")
10012 (synopsis "Base package for web displays of Bioconductor objects")
10013 (description
10014 "This package contains the basic methods needed to generate interactive
10015 Shiny-based display methods for Bioconductor objects.")
10016 (license license:artistic2.0)))
10017
10018 (define-public r-annotationhub
10019 (package
10020 (name "r-annotationhub")
10021 (version "2.20.0")
10022 (source
10023 (origin
10024 (method url-fetch)
10025 (uri (bioconductor-uri "AnnotationHub" version))
10026 (sha256
10027 (base32
10028 "0r4xzf93bm9cpys5cg70wg0b8hxli80hvqwgh4hzbd45yyf5c4wz"))))
10029 (properties `((upstream-name . "AnnotationHub")))
10030 (build-system r-build-system)
10031 (propagated-inputs
10032 `(("r-annotationdbi" ,r-annotationdbi)
10033 ("r-biocfilecache" ,r-biocfilecache)
10034 ("r-biocgenerics" ,r-biocgenerics)
10035 ("r-biocmanager" ,r-biocmanager)
10036 ("r-biocversion" ,r-biocversion)
10037 ("r-curl" ,r-curl)
10038 ("r-dplyr" ,r-dplyr)
10039 ("r-httr" ,r-httr)
10040 ("r-interactivedisplaybase" ,r-interactivedisplaybase)
10041 ("r-rappdirs" ,r-rappdirs)
10042 ("r-rsqlite" ,r-rsqlite)
10043 ("r-s4vectors" ,r-s4vectors)
10044 ("r-yaml" ,r-yaml)))
10045 (native-inputs
10046 `(("r-knitr" ,r-knitr)))
10047 (home-page "https://bioconductor.org/packages/AnnotationHub")
10048 (synopsis "Client to access AnnotationHub resources")
10049 (description
10050 "This package provides a client for the Bioconductor AnnotationHub web
10051 resource. The AnnotationHub web resource provides a central location where
10052 genomic files (e.g. VCF, bed, wig) and other resources from standard
10053 locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
10054 metadata about each resource, e.g., a textual description, tags, and date of
10055 modification. The client creates and manages a local cache of files retrieved
10056 by the user, helping with quick and reproducible access.")
10057 (license license:artistic2.0)))
10058
10059 (define-public r-fastseg
10060 (package
10061 (name "r-fastseg")
10062 (version "1.34.0")
10063 (source
10064 (origin
10065 (method url-fetch)
10066 (uri (bioconductor-uri "fastseg" version))
10067 (sha256
10068 (base32
10069 "1d48n245pzmvcpsz93lxb4frqh222gfhpmlvm0sb74skn16way63"))))
10070 (build-system r-build-system)
10071 (propagated-inputs
10072 `(("r-biobase" ,r-biobase)
10073 ("r-biocgenerics" ,r-biocgenerics)
10074 ("r-genomicranges" ,r-genomicranges)
10075 ("r-iranges" ,r-iranges)
10076 ("r-s4vectors" ,r-s4vectors)))
10077 (home-page "https://www.bioinf.jku.at/software/fastseg/index.html")
10078 (synopsis "Fast segmentation algorithm for genetic sequencing data")
10079 (description
10080 "Fastseg implements a very fast and efficient segmentation algorithm.
10081 It can segment data from DNA microarrays and data from next generation
10082 sequencing for example to detect copy number segments. Further it can segment
10083 data from RNA microarrays like tiling arrays to identify transcripts. Most
10084 generally, it can segment data given as a matrix or as a vector. Various data
10085 formats can be used as input to fastseg like expression set objects for
10086 microarrays or GRanges for sequencing data.")
10087 (license license:lgpl2.0+)))
10088
10089 (define-public r-keggrest
10090 (package
10091 (name "r-keggrest")
10092 (version "1.28.0")
10093 (source
10094 (origin
10095 (method url-fetch)
10096 (uri (bioconductor-uri "KEGGREST" version))
10097 (sha256
10098 (base32
10099 "0q76w17fya2x0z7mvyhkk5kqh07flldgih13ma44vhcy1bdlm6j1"))))
10100 (properties `((upstream-name . "KEGGREST")))
10101 (build-system r-build-system)
10102 (propagated-inputs
10103 `(("r-biostrings" ,r-biostrings)
10104 ("r-httr" ,r-httr)
10105 ("r-png" ,r-png)))
10106 (native-inputs
10107 `(("r-knitr" ,r-knitr)))
10108 (home-page "https://bioconductor.org/packages/KEGGREST")
10109 (synopsis "Client-side REST access to KEGG")
10110 (description
10111 "This package provides a package that provides a client interface to the
10112 @dfn{Kyoto Encyclopedia of Genes and Genomes} (KEGG) REST server.")
10113 (license license:artistic2.0)))
10114
10115 (define-public r-gage
10116 (package
10117 (name "r-gage")
10118 (version "2.37.0")
10119 (source
10120 (origin
10121 (method url-fetch)
10122 (uri (bioconductor-uri "gage" version))
10123 (sha256
10124 (base32
10125 "1zfaas4x6g7wiml6cmxa7b4f43az9s0lrw80k6sf7c96hsh1jijr"))))
10126 (build-system r-build-system)
10127 (propagated-inputs
10128 `(("r-annotationdbi" ,r-annotationdbi)
10129 ("r-graph" ,r-graph)
10130 ("r-keggrest" ,r-keggrest)))
10131 (home-page (string-append "https://bmcbioinformatics.biomedcentral.com/"
10132 "articles/10.1186/1471-2105-10-161"))
10133 (synopsis "Generally applicable gene-set enrichment for pathway analysis")
10134 (description
10135 "GAGE is a published method for gene set (enrichment or GSEA) or pathway
10136 analysis. GAGE is generally applicable independent of microarray or RNA-Seq
10137 data attributes including sample sizes, experimental designs, assay platforms,
10138 and other types of heterogeneity. The gage package provides functions for
10139 basic GAGE analysis, result processing and presentation. In addition, it
10140 provides demo microarray data and commonly used gene set data based on KEGG
10141 pathways and GO terms. These functions and data are also useful for gene set
10142 analysis using other methods.")
10143 (license license:gpl2+)))
10144
10145 (define-public r-genomicfiles
10146 (package
10147 (name "r-genomicfiles")
10148 (version "1.24.0")
10149 (source
10150 (origin
10151 (method url-fetch)
10152 (uri (bioconductor-uri "GenomicFiles" version))
10153 (sha256
10154 (base32
10155 "1k3824pzf9fdqvcv6cz2742q3mabpmncrc72hwa21ac8wy1b04n4"))))
10156 (properties `((upstream-name . "GenomicFiles")))
10157 (build-system r-build-system)
10158 (propagated-inputs
10159 `(("r-biocgenerics" ,r-biocgenerics)
10160 ("r-biocparallel" ,r-biocparallel)
10161 ("r-genomeinfodb" ,r-genomeinfodb)
10162 ("r-genomicalignments" ,r-genomicalignments)
10163 ("r-genomicranges" ,r-genomicranges)
10164 ("r-iranges" ,r-iranges)
10165 ("r-rsamtools" ,r-rsamtools)
10166 ("r-rtracklayer" ,r-rtracklayer)
10167 ("r-s4vectors" ,r-s4vectors)
10168 ("r-summarizedexperiment" ,r-summarizedexperiment)
10169 ("r-variantannotation" ,r-variantannotation)))
10170 (home-page "https://bioconductor.org/packages/GenomicFiles")
10171 (synopsis "Distributed computing by file or by range")
10172 (description
10173 "This package provides infrastructure for parallel computations
10174 distributed by file or by range. User defined mapper and reducer functions
10175 provide added flexibility for data combination and manipulation.")
10176 (license license:artistic2.0)))
10177
10178 (define-public r-complexheatmap
10179 (package
10180 (name "r-complexheatmap")
10181 (version "2.4.2")
10182 (source
10183 (origin
10184 (method url-fetch)
10185 (uri (bioconductor-uri "ComplexHeatmap" version))
10186 (sha256
10187 (base32
10188 "01jxxwxhf9n8baxgja4rb592p5210s4ppd7a5b4xby5aalhzkr0l"))))
10189 (properties
10190 `((upstream-name . "ComplexHeatmap")))
10191 (build-system r-build-system)
10192 (propagated-inputs
10193 `(("r-circlize" ,r-circlize)
10194 ("r-clue" ,r-clue)
10195 ("r-colorspace" ,r-colorspace)
10196 ("r-getoptlong" ,r-getoptlong)
10197 ("r-globaloptions" ,r-globaloptions)
10198 ("r-png" ,r-png)
10199 ("r-rcolorbrewer" ,r-rcolorbrewer)))
10200 (native-inputs
10201 `(("r-knitr" ,r-knitr)))
10202 (home-page
10203 "https://github.com/jokergoo/ComplexHeatmap")
10204 (synopsis "Making Complex Heatmaps")
10205 (description
10206 "Complex heatmaps are efficient to visualize associations between
10207 different sources of data sets and reveal potential structures. This package
10208 provides a highly flexible way to arrange multiple heatmaps and supports
10209 self-defined annotation graphics.")
10210 (license license:gpl2+)))
10211
10212 (define-public r-dirichletmultinomial
10213 (package
10214 (name "r-dirichletmultinomial")
10215 (version "1.30.0")
10216 (source
10217 (origin
10218 (method url-fetch)
10219 (uri (bioconductor-uri "DirichletMultinomial" version))
10220 (sha256
10221 (base32
10222 "1m9dsrddrllb2i88qzik1867iv9mggrgdkn0dlp8sq7gl69vmalb"))))
10223 (properties
10224 `((upstream-name . "DirichletMultinomial")))
10225 (build-system r-build-system)
10226 (inputs
10227 `(("gsl" ,gsl)))
10228 (propagated-inputs
10229 `(("r-biocgenerics" ,r-biocgenerics)
10230 ("r-iranges" ,r-iranges)
10231 ("r-s4vectors" ,r-s4vectors)))
10232 (home-page "https://bioconductor.org/packages/DirichletMultinomial")
10233 (synopsis "Dirichlet-Multinomial mixture models for microbiome data")
10234 (description
10235 "Dirichlet-multinomial mixture models can be used to describe variability
10236 in microbial metagenomic data. This package is an interface to code
10237 originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
10238 1-15.")
10239 (license license:lgpl3)))
10240
10241 (define-public r-ensembldb
10242 (package
10243 (name "r-ensembldb")
10244 (version "2.12.1")
10245 (source
10246 (origin
10247 (method url-fetch)
10248 (uri (bioconductor-uri "ensembldb" version))
10249 (sha256
10250 (base32
10251 "1vvchc04nshxc768fp31rxb603aj3hmq8xlh5qabcwf2c3z9719g"))))
10252 (build-system r-build-system)
10253 (propagated-inputs
10254 `(("r-annotationdbi" ,r-annotationdbi)
10255 ("r-annotationfilter" ,r-annotationfilter)
10256 ("r-biobase" ,r-biobase)
10257 ("r-biocgenerics" ,r-biocgenerics)
10258 ("r-biostrings" ,r-biostrings)
10259 ("r-curl" ,r-curl)
10260 ("r-dbi" ,r-dbi)
10261 ("r-genomeinfodb" ,r-genomeinfodb)
10262 ("r-genomicfeatures" ,r-genomicfeatures)
10263 ("r-genomicranges" ,r-genomicranges)
10264 ("r-iranges" ,r-iranges)
10265 ("r-protgenerics" ,r-protgenerics)
10266 ("r-rsamtools" ,r-rsamtools)
10267 ("r-rsqlite" ,r-rsqlite)
10268 ("r-rtracklayer" ,r-rtracklayer)
10269 ("r-s4vectors" ,r-s4vectors)))
10270 (native-inputs
10271 `(("r-knitr" ,r-knitr)))
10272 (home-page "https://github.com/jotsetung/ensembldb")
10273 (synopsis "Utilities to create and use Ensembl-based annotation databases")
10274 (description
10275 "The package provides functions to create and use transcript-centric
10276 annotation databases/packages. The annotation for the databases are directly
10277 fetched from Ensembl using their Perl API. The functionality and data is
10278 similar to that of the TxDb packages from the @code{GenomicFeatures} package,
10279 but, in addition to retrieve all gene/transcript models and annotations from
10280 the database, the @code{ensembldb} package also provides a filter framework
10281 allowing to retrieve annotations for specific entries like genes encoded on a
10282 chromosome region or transcript models of lincRNA genes.")
10283 ;; No version specified
10284 (license license:lgpl3+)))
10285
10286 (define-public r-organismdbi
10287 (package
10288 (name "r-organismdbi")
10289 (version "1.30.0")
10290 (source
10291 (origin
10292 (method url-fetch)
10293 (uri (bioconductor-uri "OrganismDbi" version))
10294 (sha256
10295 (base32
10296 "194h5576inq44qr666snzq0ygnc77rk5ljkn9bn8zs6x6gb3cwaw"))))
10297 (properties `((upstream-name . "OrganismDbi")))
10298 (build-system r-build-system)
10299 (propagated-inputs
10300 `(("r-annotationdbi" ,r-annotationdbi)
10301 ("r-biobase" ,r-biobase)
10302 ("r-biocgenerics" ,r-biocgenerics)
10303 ("r-biocmanager" ,r-biocmanager)
10304 ("r-dbi" ,r-dbi)
10305 ("r-genomicfeatures" ,r-genomicfeatures)
10306 ("r-genomicranges" ,r-genomicranges)
10307 ("r-graph" ,r-graph)
10308 ("r-iranges" ,r-iranges)
10309 ("r-rbgl" ,r-rbgl)
10310 ("r-s4vectors" ,r-s4vectors)))
10311 (home-page "https://bioconductor.org/packages/OrganismDbi")
10312 (synopsis "Software to enable the smooth interfacing of database packages")
10313 (description "The package enables a simple unified interface to several
10314 annotation packages each of which has its own schema by taking advantage of
10315 the fact that each of these packages implements a select methods.")
10316 (license license:artistic2.0)))
10317
10318 (define-public r-biovizbase
10319 (package
10320 (name "r-biovizbase")
10321 (version "1.36.0")
10322 (source
10323 (origin
10324 (method url-fetch)
10325 (uri (bioconductor-uri "biovizBase" version))
10326 (sha256
10327 (base32
10328 "1vq2mxa2jkljgw75zqjdkyml0ppi5dspvwj4cznfhi31cq8ds0qh"))))
10329 (properties `((upstream-name . "biovizBase")))
10330 (build-system r-build-system)
10331 (propagated-inputs
10332 `(("r-annotationdbi" ,r-annotationdbi)
10333 ("r-annotationfilter" ,r-annotationfilter)
10334 ("r-biocgenerics" ,r-biocgenerics)
10335 ("r-biostrings" ,r-biostrings)
10336 ("r-dichromat" ,r-dichromat)
10337 ("r-ensembldb" ,r-ensembldb)
10338 ("r-genomeinfodb" ,r-genomeinfodb)
10339 ("r-genomicalignments" ,r-genomicalignments)
10340 ("r-genomicfeatures" ,r-genomicfeatures)
10341 ("r-genomicranges" ,r-genomicranges)
10342 ("r-hmisc" ,r-hmisc)
10343 ("r-iranges" ,r-iranges)
10344 ("r-rcolorbrewer" ,r-rcolorbrewer)
10345 ("r-rlang" ,r-rlang)
10346 ("r-rsamtools" ,r-rsamtools)
10347 ("r-s4vectors" ,r-s4vectors)
10348 ("r-scales" ,r-scales)
10349 ("r-summarizedexperiment" ,r-summarizedexperiment)
10350 ("r-variantannotation" ,r-variantannotation)))
10351 (home-page "https://bioconductor.org/packages/biovizBase")
10352 (synopsis "Basic graphic utilities for visualization of genomic data")
10353 (description
10354 "The biovizBase package is designed to provide a set of utilities, color
10355 schemes and conventions for genomic data. It serves as the base for various
10356 high-level packages for biological data visualization. This saves development
10357 effort and encourages consistency.")
10358 (license license:artistic2.0)))
10359
10360 (define-public r-ggbio
10361 (package
10362 (name "r-ggbio")
10363 (version "1.36.0")
10364 (source
10365 (origin
10366 (method url-fetch)
10367 (uri (bioconductor-uri "ggbio" version))
10368 (sha256
10369 (base32
10370 "11ggnqjq42fi2hm9xlvrrlr2xhy4kglvl1a0mycp1s4v67lxw5h5"))))
10371 (build-system r-build-system)
10372 (arguments
10373 `(#:phases
10374 (modify-phases %standard-phases
10375 ;; See https://github.com/tengfei/ggbio/issues/117
10376 ;; This fix will be included in the next release.
10377 (add-after 'unpack 'fix-typo
10378 (lambda _
10379 (substitute* "R/GGbio-class.R"
10380 (("fechable") "fetchable"))
10381 #t)))))
10382 (propagated-inputs
10383 `(("r-annotationdbi" ,r-annotationdbi)
10384 ("r-annotationfilter" ,r-annotationfilter)
10385 ("r-biobase" ,r-biobase)
10386 ("r-biocgenerics" ,r-biocgenerics)
10387 ("r-biostrings" ,r-biostrings)
10388 ("r-biovizbase" ,r-biovizbase)
10389 ("r-bsgenome" ,r-bsgenome)
10390 ("r-ensembldb" ,r-ensembldb)
10391 ("r-genomeinfodb" ,r-genomeinfodb)
10392 ("r-genomicalignments" ,r-genomicalignments)
10393 ("r-genomicfeatures" ,r-genomicfeatures)
10394 ("r-genomicranges" ,r-genomicranges)
10395 ("r-ggally" ,r-ggally)
10396 ("r-ggplot2" ,r-ggplot2)
10397 ("r-gridextra" ,r-gridextra)
10398 ("r-gtable" ,r-gtable)
10399 ("r-hmisc" ,r-hmisc)
10400 ("r-iranges" ,r-iranges)
10401 ("r-organismdbi" ,r-organismdbi)
10402 ("r-reshape2" ,r-reshape2)
10403 ("r-rlang" ,r-rlang)
10404 ("r-rsamtools" ,r-rsamtools)
10405 ("r-rtracklayer" ,r-rtracklayer)
10406 ("r-s4vectors" ,r-s4vectors)
10407 ("r-scales" ,r-scales)
10408 ("r-summarizedexperiment" ,r-summarizedexperiment)
10409 ("r-variantannotation" ,r-variantannotation)))
10410 (native-inputs
10411 `(("r-knitr" ,r-knitr)))
10412 (home-page "http://www.tengfei.name/ggbio/")
10413 (synopsis "Visualization tools for genomic data")
10414 (description
10415 "The ggbio package extends and specializes the grammar of graphics for
10416 biological data. The graphics are designed to answer common scientific
10417 questions, in particular those often asked of high throughput genomics data.
10418 All core Bioconductor data structures are supported, where appropriate. The
10419 package supports detailed views of particular genomic regions, as well as
10420 genome-wide overviews. Supported overviews include ideograms and grand linear
10421 views. High-level plots include sequence fragment length, edge-linked
10422 interval to data view, mismatch pileup, and several splicing summaries.")
10423 (license license:artistic2.0)))
10424
10425 (define-public r-gqtlbase
10426 (package
10427 (name "r-gqtlbase")
10428 (version "1.20.0")
10429 (source
10430 (origin
10431 (method url-fetch)
10432 (uri (bioconductor-uri "gQTLBase" version))
10433 (sha256
10434 (base32
10435 "06xvzp4fn3qfa46ggg8kxi267gbyd821vvx4040173xkqxpr0g5j"))))
10436 (properties `((upstream-name . "gQTLBase")))
10437 (build-system r-build-system)
10438 (propagated-inputs
10439 `(("r-batchjobs" ,r-batchjobs)
10440 ("r-bbmisc" ,r-bbmisc)
10441 ("r-biocgenerics" ,r-biocgenerics)
10442 ("r-bit" ,r-bit)
10443 ("r-doparallel" ,r-doparallel)
10444 ("r-ff" ,r-ff)
10445 ("r-ffbase" ,r-ffbase)
10446 ("r-foreach" ,r-foreach)
10447 ("r-genomicfiles" ,r-genomicfiles)
10448 ("r-genomicranges" ,r-genomicranges)
10449 ("r-rtracklayer" ,r-rtracklayer)
10450 ("r-s4vectors" ,r-s4vectors)
10451 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10452 (native-inputs
10453 `(("r-knitr" ,r-knitr)))
10454 (home-page "https://bioconductor.org/packages/gQTLBase")
10455 (synopsis "Infrastructure for eQTL, mQTL and similar studies")
10456 (description
10457 "The purpose of this package is to simplify the storage and interrogation
10458 of @dfn{quantitative trait loci} (QTL) archives, such as eQTL, mQTL, dsQTL,
10459 and more.")
10460 (license license:artistic2.0)))
10461
10462 (define-public r-snpstats
10463 (package
10464 (name "r-snpstats")
10465 (version "1.38.0")
10466 (source
10467 (origin
10468 (method url-fetch)
10469 (uri (bioconductor-uri "snpStats" version))
10470 (sha256
10471 (base32
10472 "1qv3nqqr30d3n66mawqd9dbl95dl89r4bcjvkc5iassy1yrwr8wq"))))
10473 (properties `((upstream-name . "snpStats")))
10474 (build-system r-build-system)
10475 (inputs `(("zlib" ,zlib)))
10476 (propagated-inputs
10477 `(("r-biocgenerics" ,r-biocgenerics)
10478 ("r-matrix" ,r-matrix)
10479 ("r-survival" ,r-survival)
10480 ("r-zlibbioc" ,r-zlibbioc)))
10481 (home-page "https://bioconductor.org/packages/snpStats")
10482 (synopsis "Methods for SNP association studies")
10483 (description
10484 "This package provides classes and statistical methods for large
10485 @dfn{single-nucleotide polymorphism} (SNP) association studies. This extends
10486 the earlier snpMatrix package, allowing for uncertainty in genotypes.")
10487 (license license:gpl3)))
10488
10489 (define-public r-homo-sapiens
10490 (package
10491 (name "r-homo-sapiens")
10492 (version "1.3.1")
10493 (source (origin
10494 (method url-fetch)
10495 ;; We cannot use bioconductor-uri here because this tarball is
10496 ;; located under "data/annotation/" instead of "bioc/".
10497 (uri (string-append "https://www.bioconductor.org/packages/"
10498 "release/data/annotation/src/contrib/"
10499 "Homo.sapiens_"
10500 version ".tar.gz"))
10501 (sha256
10502 (base32
10503 "151vj7h5p1c8yd5swrchk46z469p135wk50hvkl0nhgndvy0jj01"))))
10504 (properties
10505 `((upstream-name . "Homo.sapiens")))
10506 (build-system r-build-system)
10507 (propagated-inputs
10508 `(("r-genomicfeatures" ,r-genomicfeatures)
10509 ("r-go-db" ,r-go-db)
10510 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
10511 ("r-txdb-hsapiens-ucsc-hg19-knowngene" ,r-txdb-hsapiens-ucsc-hg19-knowngene)
10512 ("r-organismdbi" ,r-organismdbi)
10513 ("r-annotationdbi" ,r-annotationdbi)))
10514 (home-page "https://bioconductor.org/packages/Homo.sapiens/")
10515 (synopsis "Annotation package for the Homo.sapiens object")
10516 (description
10517 "This package contains the Homo.sapiens object to access data from
10518 several related annotation packages.")
10519 (license license:artistic2.0)))
10520
10521 (define-public r-erma
10522 (package
10523 (name "r-erma")
10524 (version "1.4.0")
10525 (source
10526 (origin
10527 (method url-fetch)
10528 (uri (bioconductor-uri "erma" version))
10529 (sha256
10530 (base32
10531 "1ccfbq0r48sr3h8050w8zv8402h7nx09adr0xdyqlg7kwp9vd2l3"))))
10532 (build-system r-build-system)
10533 (propagated-inputs
10534 `(("r-annotationdbi" ,r-annotationdbi)
10535 ("r-biobase" ,r-biobase)
10536 ("r-biocgenerics" ,r-biocgenerics)
10537 ("r-biocparallel" ,r-biocparallel)
10538 ("r-genomeinfodb" ,r-genomeinfodb)
10539 ("r-genomicfiles" ,r-genomicfiles)
10540 ("r-genomicranges" ,r-genomicranges)
10541 ("r-ggplot2" ,r-ggplot2)
10542 ("r-homo-sapiens" ,r-homo-sapiens)
10543 ("r-iranges" ,r-iranges)
10544 ("r-rtracklayer" ,r-rtracklayer)
10545 ("r-s4vectors" ,r-s4vectors)
10546 ("r-shiny" ,r-shiny)
10547 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10548 (native-inputs
10549 `(("r-knitr" ,r-knitr)))
10550 (home-page "https://bioconductor.org/packages/erma")
10551 (synopsis "Epigenomic road map adventures")
10552 (description
10553 "The epigenomics road map describes locations of epigenetic marks in DNA
10554 from a variety of cell types. Of interest are locations of histone
10555 modifications, sites of DNA methylation, and regions of accessible chromatin.
10556 This package presents a selection of elements of the road map including
10557 metadata and outputs of the ChromImpute procedure applied to ENCODE cell lines
10558 by Ernst and Kellis.")
10559 (license license:artistic2.0)))
10560
10561 (define-public r-ldblock
10562 (package
10563 (name "r-ldblock")
10564 (version "1.18.0")
10565 (source
10566 (origin
10567 (method url-fetch)
10568 (uri (bioconductor-uri "ldblock" version))
10569 (sha256
10570 (base32
10571 "0plw00n2zfgh029ab41dnydzgv2yxrapjp770147rx9pff4dngrv"))))
10572 (build-system r-build-system)
10573 (propagated-inputs
10574 `(("r-biocgenerics" ,r-biocgenerics)
10575 ("r-ensdb-hsapiens-v75" ,r-ensdb-hsapiens-v75)
10576 ("r-ensembldb" ,r-ensembldb)
10577 ("r-genomeinfodb" ,r-genomeinfodb)
10578 ("r-genomicfiles" ,r-genomicfiles)
10579 ("r-httr" ,r-httr)
10580 ("r-matrix" ,r-matrix)
10581 ("r-rsamtools" ,r-rsamtools)
10582 ("r-snpstats" ,r-snpstats)
10583 ("r-variantannotation" ,r-variantannotation)))
10584 (native-inputs
10585 `(("r-knitr" ,r-knitr)))
10586 (home-page "https://bioconductor.org/packages/ldblock")
10587 (synopsis "Data structures for linkage disequilibrium measures in populations")
10588 (description
10589 "This package defines data structures for @dfn{linkage
10590 disequilibrium} (LD) measures in populations. Its purpose is to simplify
10591 handling of existing population-level data for the purpose of flexibly
10592 defining LD blocks.")
10593 (license license:artistic2.0)))
10594
10595 (define-public r-gqtlstats
10596 (package
10597 (name "r-gqtlstats")
10598 (version "1.20.0")
10599 (source
10600 (origin
10601 (method url-fetch)
10602 (uri (bioconductor-uri "gQTLstats" version))
10603 (sha256
10604 (base32
10605 "1jjqfpjp93nmxjn757j5mzcax96bzcqdd1gr3rsdxg7ap008l2w7"))))
10606 (properties `((upstream-name . "gQTLstats")))
10607 (build-system r-build-system)
10608 (propagated-inputs
10609 `(("r-annotationdbi" ,r-annotationdbi)
10610 ("r-batchjobs" ,r-batchjobs)
10611 ("r-bbmisc" ,r-bbmisc)
10612 ("r-beeswarm" ,r-beeswarm)
10613 ("r-biobase" ,r-biobase)
10614 ("r-biocgenerics" ,r-biocgenerics)
10615 ("r-doparallel" ,r-doparallel)
10616 ("r-dplyr" ,r-dplyr)
10617 ("r-erma" ,r-erma)
10618 ("r-ffbase" ,r-ffbase)
10619 ("r-foreach" ,r-foreach)
10620 ("r-genomeinfodb" ,r-genomeinfodb)
10621 ("r-genomicfeatures" ,r-genomicfeatures)
10622 ("r-genomicfiles" ,r-genomicfiles)
10623 ("r-genomicranges" ,r-genomicranges)
10624 ("r-ggbeeswarm" ,r-ggbeeswarm)
10625 ("r-ggplot2" ,r-ggplot2)
10626 ("r-gqtlbase" ,r-gqtlbase)
10627 ("r-hardyweinberg" ,r-hardyweinberg)
10628 ("r-homo-sapiens" ,r-homo-sapiens)
10629 ("r-iranges" ,r-iranges)
10630 ("r-limma" ,r-limma)
10631 ("r-mgcv" ,r-mgcv)
10632 ("r-plotly" ,r-plotly)
10633 ("r-reshape2" ,r-reshape2)
10634 ("r-s4vectors" ,r-s4vectors)
10635 ("r-shiny" ,r-shiny)
10636 ("r-snpstats" ,r-snpstats)
10637 ("r-summarizedexperiment" ,r-summarizedexperiment)
10638 ("r-variantannotation" ,r-variantannotation)))
10639 (native-inputs
10640 `(("r-knitr" ,r-knitr)))
10641 (home-page "https://bioconductor.org/packages/gQTLstats")
10642 (synopsis "Computationally efficient analysis for eQTL and allied studies")
10643 (description
10644 "This package provides tools for the computationally efficient analysis
10645 of @dfn{quantitative trait loci} (QTL) data, including eQTL, mQTL, dsQTL, etc.
10646 The software in this package aims to support refinements and functional
10647 interpretation of members of a collection of association statistics on a
10648 family of feature/genome hypotheses.")
10649 (license license:artistic2.0)))
10650
10651 (define-public r-gviz
10652 (package
10653 (name "r-gviz")
10654 (version "1.32.0")
10655 (source
10656 (origin
10657 (method url-fetch)
10658 (uri (bioconductor-uri "Gviz" version))
10659 (sha256
10660 (base32
10661 "0cgkp0ciyy2qykqgh3vzp5mx9b4vsvacjh2jnsj3wldiapzlz08a"))))
10662 (properties `((upstream-name . "Gviz")))
10663 (build-system r-build-system)
10664 (propagated-inputs
10665 `(("r-annotationdbi" ,r-annotationdbi)
10666 ("r-biobase" ,r-biobase)
10667 ("r-biocgenerics" ,r-biocgenerics)
10668 ("r-biomart" ,r-biomart)
10669 ("r-biostrings" ,r-biostrings)
10670 ("r-biovizbase" ,r-biovizbase)
10671 ("r-bsgenome" ,r-bsgenome)
10672 ("r-digest" ,r-digest)
10673 ("r-ensembldb" ,r-ensembldb)
10674 ("r-genomeinfodb" ,r-genomeinfodb)
10675 ("r-genomicalignments" ,r-genomicalignments)
10676 ("r-genomicfeatures" ,r-genomicfeatures)
10677 ("r-genomicranges" ,r-genomicranges)
10678 ("r-iranges" ,r-iranges)
10679 ("r-lattice" ,r-lattice)
10680 ("r-latticeextra" ,r-latticeextra)
10681 ("r-matrixstats" ,r-matrixstats)
10682 ("r-rcolorbrewer" ,r-rcolorbrewer)
10683 ("r-rsamtools" ,r-rsamtools)
10684 ("r-rtracklayer" ,r-rtracklayer)
10685 ("r-s4vectors" ,r-s4vectors)
10686 ("r-xvector" ,r-xvector)))
10687 (native-inputs
10688 `(("r-knitr" ,r-knitr)))
10689 (home-page "https://bioconductor.org/packages/Gviz")
10690 (synopsis "Plotting data and annotation information along genomic coordinates")
10691 (description
10692 "Genomic data analyses requires integrated visualization of known genomic
10693 information and new experimental data. Gviz uses the biomaRt and the
10694 rtracklayer packages to perform live annotation queries to Ensembl and UCSC
10695 and translates this to e.g. gene/transcript structures in viewports of the
10696 grid graphics package. This results in genomic information plotted together
10697 with your data.")
10698 (license license:artistic2.0)))
10699
10700 (define-public r-gwascat
10701 (package
10702 (name "r-gwascat")
10703 (version "2.20.1")
10704 (source
10705 (origin
10706 (method url-fetch)
10707 (uri (bioconductor-uri "gwascat" version))
10708 (sha256
10709 (base32
10710 "1cq5cmdrf0a0arr841yvkh6d8drc15p7mif1afr215l1s3y2dwd4"))))
10711 (build-system r-build-system)
10712 (propagated-inputs
10713 `(("r-annotationdbi" ,r-annotationdbi)
10714 ("r-biocgenerics" ,r-biocgenerics)
10715 ("r-biostrings" ,r-biostrings)
10716 ("r-genomeinfodb" ,r-genomeinfodb)
10717 ("r-genomicfeatures" ,r-genomicfeatures)
10718 ("r-genomicranges" ,r-genomicranges)
10719 ("r-ggplot2" ,r-ggplot2)
10720 ("r-iranges" ,r-iranges)
10721 ("r-rsamtools" ,r-rsamtools)
10722 ("r-rtracklayer" ,r-rtracklayer)
10723 ("r-s4vectors" ,r-s4vectors)))
10724 (native-inputs
10725 `(("r-knitr" ,r-knitr)))
10726 (home-page "https://bioconductor.org/packages/gwascat")
10727 (synopsis "Tools for data in the EMBL-EBI GWAS catalog")
10728 (description
10729 "This package provides tools for representing and modeling data in the
10730 EMBL-EBI GWAS catalog.")
10731 (license license:artistic2.0)))
10732
10733 (define-public r-sushi
10734 (package
10735 (name "r-sushi")
10736 (version "1.26.0")
10737 (source (origin
10738 (method url-fetch)
10739 (uri (bioconductor-uri "Sushi" version))
10740 (sha256
10741 (base32
10742 "17j3d5qjq5nbv99by5mq8rwr0jgh2jyyfn2nwxmwgzlmk3lgi1rb"))))
10743 (properties `((upstream-name . "Sushi")))
10744 (build-system r-build-system)
10745 (propagated-inputs
10746 `(("r-biomart" ,r-biomart)
10747 ("r-zoo" ,r-zoo)))
10748 (home-page "https://bioconductor.org/packages/Sushi")
10749 (synopsis "Tools for visualizing genomics data")
10750 (description
10751 "This package provides flexible, quantitative, and integrative genomic
10752 visualizations for publication-quality multi-panel figures.")
10753 (license license:gpl2+)))
10754
10755 (define-public r-fithic
10756 (package
10757 (name "r-fithic")
10758 (version "1.14.0")
10759 (source (origin
10760 (method url-fetch)
10761 (uri (bioconductor-uri "FitHiC" version))
10762 (sha256
10763 (base32
10764 "1dffkdxm08wq4kjd9j2v2625x3p6vbrk33a2zx94pwpgkghr72yp"))))
10765 (properties `((upstream-name . "FitHiC")))
10766 (build-system r-build-system)
10767 (propagated-inputs
10768 `(("r-data-table" ,r-data-table)
10769 ("r-fdrtool" ,r-fdrtool)
10770 ("r-rcpp" ,r-rcpp)))
10771 (native-inputs
10772 `(("r-knitr" ,r-knitr)))
10773 (home-page "https://bioconductor.org/packages/FitHiC")
10774 (synopsis "Confidence estimation for intra-chromosomal contact maps")
10775 (description
10776 "Fit-Hi-C is a tool for assigning statistical confidence estimates to
10777 intra-chromosomal contact maps produced by genome-wide genome architecture
10778 assays such as Hi-C.")
10779 (license license:gpl2+)))
10780
10781 (define-public r-hitc
10782 (package
10783 (name "r-hitc")
10784 (version "1.32.0")
10785 (source (origin
10786 (method url-fetch)
10787 (uri (bioconductor-uri "HiTC" version))
10788 (sha256
10789 (base32
10790 "1jx2pfa7sbdz7xi466lz1h5xv126g56z73n0a5l2wrq28k47qaxy"))))
10791 (properties `((upstream-name . "HiTC")))
10792 (build-system r-build-system)
10793 (propagated-inputs
10794 `(("r-biostrings" ,r-biostrings)
10795 ("r-genomeinfodb" ,r-genomeinfodb)
10796 ("r-genomicranges" ,r-genomicranges)
10797 ("r-iranges" ,r-iranges)
10798 ("r-matrix" ,r-matrix)
10799 ("r-rcolorbrewer" ,r-rcolorbrewer)
10800 ("r-rtracklayer" ,r-rtracklayer)))
10801 (home-page "https://bioconductor.org/packages/HiTC")
10802 (synopsis "High throughput chromosome conformation capture analysis")
10803 (description
10804 "The HiTC package was developed to explore high-throughput \"C\" data
10805 such as 5C or Hi-C. Dedicated R classes as well as standard methods for
10806 quality controls, normalization, visualization, and further analysis are also
10807 provided.")
10808 (license license:artistic2.0)))
10809
10810 (define-public r-hdf5array
10811 (package
10812 (name "r-hdf5array")
10813 (version "1.16.1")
10814 (source
10815 (origin
10816 (method url-fetch)
10817 (uri (bioconductor-uri "HDF5Array" version))
10818 (sha256
10819 (base32
10820 "01767v90nl0499jcicpxngbbs0af5p9c5aasi5va01w3v5bnqddn"))))
10821 (properties `((upstream-name . "HDF5Array")))
10822 (build-system r-build-system)
10823 (inputs
10824 `(("zlib" ,zlib)))
10825 (propagated-inputs
10826 `(("r-biocgenerics" ,r-biocgenerics)
10827 ("r-delayedarray" ,r-delayedarray)
10828 ("r-iranges" ,r-iranges)
10829 ("r-matrix" ,r-matrix)
10830 ("r-rhdf5" ,r-rhdf5)
10831 ("r-rhdf5lib" ,r-rhdf5lib)
10832 ("r-s4vectors" ,r-s4vectors)))
10833 (home-page "https://bioconductor.org/packages/HDF5Array")
10834 (synopsis "HDF5 back end for DelayedArray objects")
10835 (description "This package provides an array-like container for convenient
10836 access and manipulation of HDF5 datasets. It supports delayed operations and
10837 block processing.")
10838 (license license:artistic2.0)))
10839
10840 (define-public r-rhdf5lib
10841 (package
10842 (name "r-rhdf5lib")
10843 (version "1.10.0")
10844 (source
10845 (origin
10846 (method url-fetch)
10847 (uri (bioconductor-uri "Rhdf5lib" version))
10848 (sha256
10849 (base32
10850 "09ylwyk6a8sdrmi1mx7vpycpykqlqylmwa973g6jrcmk0h0qfa4w"))
10851 (modules '((guix build utils)))
10852 (snippet
10853 '(begin
10854 ;; Delete bundled binaries
10855 (delete-file-recursively "src/wininclude/")
10856 (delete-file-recursively "src/winlib-4.9.3/")
10857 (delete-file-recursively "src/winlib-8.3.0/")
10858 (delete-file "src/hdf5small_cxx_hl_1.10.6.tar.gz")
10859 #t))))
10860 (properties `((upstream-name . "Rhdf5lib")))
10861 (build-system r-build-system)
10862 (arguments
10863 `(#:phases
10864 (modify-phases %standard-phases
10865 (add-after 'unpack 'do-not-use-bundled-hdf5
10866 (lambda* (#:key inputs #:allow-other-keys)
10867 (for-each delete-file '("configure" "configure.ac"))
10868 ;; Do not make other packages link with the proprietary libsz.
10869 (substitute* "R/zzz.R"
10870 (("'\"%s/libhdf5.a\" \"%s/libsz.a\" -lz'")
10871 "'\"%s/libhdf5.a\" \"%s/libhdf5.a\" -lz'")
10872 (("'\"%s/libhdf5_cpp.a\" \"%s/libhdf5.a\" \"%s/libsz.a\" -lz'")
10873 "'\"%s/libhdf5_cpp.a\" \"%s/libhdf5.a\" \"%s/libhdf5.a\" -lz'")
10874 (("'%s/libhdf5_hl.a %s/libhdf5.a %s/libsz.a -lz'")
10875 "'%s/libhdf5_hl.a %s/libhdf5.a %s/libhdf5.a -lz'")
10876 (("'%s/libhdf5_hl_cpp.a %s/libhdf5_hl.a %s/libhdf5_cpp.a %s/libhdf5.a %s/libsz.a -lz'")
10877 "'%s/libhdf5_hl_cpp.a %s/libhdf5_hl.a %s/libhdf5_cpp.a %s/libhdf5.a %s/libhdf5.a -lz'"))
10878 (with-directory-excursion "src"
10879 (invoke "tar" "xvf" (assoc-ref inputs "hdf5-source"))
10880 (rename-file (string-append "hdf5-" ,(package-version hdf5-1.10))
10881 "hdf5")
10882 ;; Remove timestamp and host system information to make
10883 ;; the build reproducible.
10884 (substitute* "hdf5/src/libhdf5.settings.in"
10885 (("Configured on: @CONFIG_DATE@")
10886 "Configured on: Guix")
10887 (("Uname information:.*")
10888 "Uname information: Linux\n")
10889 ;; Remove unnecessary store reference.
10890 (("C Compiler:.*")
10891 "C Compiler: GCC\n"))
10892 (rename-file "Makevars.in" "Makevars")
10893 (substitute* "Makevars"
10894 (("@ZLIB_LIB@") "-lz")
10895 (("@ZLIB_INCLUDE@") "")
10896 (("HDF5_CXX_LIB=.*")
10897 (string-append "HDF5_CXX_LIB="
10898 (assoc-ref inputs "hdf5") "/lib/libhdf5_cpp.a\n"))
10899 (("HDF5_LIB=.*")
10900 (string-append "HDF5_LIB="
10901 (assoc-ref inputs "hdf5") "/lib/libhdf5.a\n"))
10902 (("HDF5_CXX_INCLUDE=.*") "HDF5_CXX_INCLUDE=./hdf5/c++/src\n")
10903 (("HDF5_INCLUDE=.*") "HDF5_INCLUDE=./hdf5/src\n")
10904 (("HDF5_HL_INCLUDE=.*") "HDF5_HL_INCLUDE=./hdf5/hl/src\n")
10905 (("HDF5_HL_CXX_INCLUDE=.*") "HDF5_HL_CXX_INCLUDE=./hdf5/hl/c++/src\n")
10906 (("HDF5_HL_LIB=.*")
10907 (string-append "HDF5_HL_LIB="
10908 (assoc-ref inputs "hdf5") "/lib/libhdf5_hl.a\n"))
10909 (("HDF5_HL_CXX_LIB=.*")
10910 (string-append "HDF5_HL_CXX_LIB="
10911 (assoc-ref inputs "hdf5") "/lib/libhdf5_hl_cpp.a\n"))
10912 ;; szip is non-free software
10913 (("cp \"\\$\\{SZIP_LIB\\}.*") "")
10914 (("PKG_LIBS =.*") "PKG_LIBS = -lz -lhdf5\n")))
10915 #t)))))
10916 (inputs
10917 `(("zlib" ,zlib)))
10918 (propagated-inputs
10919 `(("hdf5" ,hdf5-1.10)))
10920 (native-inputs
10921 `(("hdf5-source" ,(package-source hdf5-1.10))
10922 ("r-knitr" ,r-knitr)))
10923 (home-page "https://bioconductor.org/packages/Rhdf5lib")
10924 (synopsis "HDF5 library as an R package")
10925 (description "This package provides C and C++ HDF5 libraries for use in R
10926 packages.")
10927 (license license:artistic2.0)))
10928
10929 (define-public r-beachmat
10930 (package
10931 (name "r-beachmat")
10932 (version "2.4.0")
10933 (source
10934 (origin
10935 (method url-fetch)
10936 (uri (bioconductor-uri "beachmat" version))
10937 (sha256
10938 (base32
10939 "1vl6jbf9ia78cm4ikdb8vz04jv4b46zhvg5i006c63a9pzw7zhxi"))))
10940 (build-system r-build-system)
10941 (propagated-inputs
10942 `(("r-biocgenerics" ,r-biocgenerics)
10943 ("r-delayedarray" ,r-delayedarray)
10944 ("r-matrix" ,r-matrix)))
10945 (native-inputs
10946 `(("r-knitr" ,r-knitr)))
10947 (home-page "https://bioconductor.org/packages/beachmat")
10948 (synopsis "Compiling Bioconductor to handle each matrix type")
10949 (description "This package provides a consistent C++ class interface for a
10950 variety of commonly used matrix types, including sparse and HDF5-backed
10951 matrices.")
10952 (license license:gpl3)))
10953
10954 (define-public r-singlecellexperiment
10955 (package
10956 (name "r-singlecellexperiment")
10957 (version "1.10.1")
10958 (source
10959 (origin
10960 (method url-fetch)
10961 (uri (bioconductor-uri "SingleCellExperiment" version))
10962 (sha256
10963 (base32
10964 "092wvk11n7pa234vlwhxm3gdi4k3sbnz1splhxalbdhz3jf02zfp"))))
10965 (properties
10966 `((upstream-name . "SingleCellExperiment")))
10967 (build-system r-build-system)
10968 (propagated-inputs
10969 `(("r-biocgenerics" ,r-biocgenerics)
10970 ("r-s4vectors" ,r-s4vectors)
10971 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10972 (native-inputs
10973 `(("r-knitr" ,r-knitr)))
10974 (home-page "https://bioconductor.org/packages/SingleCellExperiment")
10975 (synopsis "S4 classes for single cell data")
10976 (description "This package defines an S4 class for storing data from
10977 single-cell experiments. This includes specialized methods to store and
10978 retrieve spike-in information, dimensionality reduction coordinates and size
10979 factors for each cell, along with the usual metadata for genes and
10980 libraries.")
10981 (license license:gpl3)))
10982
10983 (define-public r-scater
10984 (package
10985 (name "r-scater")
10986 (version "1.16.2")
10987 (source (origin
10988 (method url-fetch)
10989 (uri (bioconductor-uri "scater" version))
10990 (sha256
10991 (base32
10992 "1pa5wvgjb30rw1vsjwbnn07ss3sc5n8ck5d7khdby4r2s9177s33"))))
10993 (build-system r-build-system)
10994 (propagated-inputs
10995 `(("r-beachmat" ,r-beachmat)
10996 ("r-biocgenerics" ,r-biocgenerics)
10997 ("r-biocneighbors" ,r-biocneighbors)
10998 ("r-biocparallel" ,r-biocparallel)
10999 ("r-biocsingular" ,r-biocsingular)
11000 ("r-delayedarray" ,r-delayedarray)
11001 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
11002 ("r-ggbeeswarm" ,r-ggbeeswarm)
11003 ("r-ggplot2" ,r-ggplot2)
11004 ("r-matrix" ,r-matrix)
11005 ("r-rcpp" ,r-rcpp)
11006 ("r-rlang" ,r-rlang)
11007 ("r-s4vectors" ,r-s4vectors)
11008 ("r-singlecellexperiment" ,r-singlecellexperiment)
11009 ("r-summarizedexperiment" ,r-summarizedexperiment)
11010 ("r-viridis" ,r-viridis)))
11011 (native-inputs
11012 `(("r-knitr" ,r-knitr)))
11013 (home-page "https://github.com/davismcc/scater")
11014 (synopsis "Single-cell analysis toolkit for gene expression data in R")
11015 (description "This package provides a collection of tools for doing
11016 various analyses of single-cell RNA-seq gene expression data, with a focus on
11017 quality control.")
11018 (license license:gpl2+)))
11019
11020 (define-public r-scran
11021 (package
11022 (name "r-scran")
11023 (version "1.16.0")
11024 (source
11025 (origin
11026 (method url-fetch)
11027 (uri (bioconductor-uri "scran" version))
11028 (sha256
11029 (base32
11030 "1gm4ys4aq8h1pn45k1rxk384wjyf55izivw8kgxbrflj6j4xvvsv"))))
11031 (build-system r-build-system)
11032 (propagated-inputs
11033 `(("r-beachmat" ,r-beachmat)
11034 ("r-bh" ,r-bh)
11035 ("r-biocgenerics" ,r-biocgenerics)
11036 ("r-biocneighbors" ,r-biocneighbors)
11037 ("r-biocparallel" ,r-biocparallel)
11038 ("r-biocsingular" ,r-biocsingular)
11039 ("r-delayedarray" ,r-delayedarray)
11040 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
11041 ("r-dqrng" ,r-dqrng)
11042 ("r-edger" ,r-edger)
11043 ("r-igraph" ,r-igraph)
11044 ("r-iranges" ,r-iranges)
11045 ("r-limma" ,r-limma)
11046 ("r-matrix" ,r-matrix)
11047 ("r-rcpp" ,r-rcpp)
11048 ("r-s4vectors" ,r-s4vectors)
11049 ("r-scater" ,r-scater)
11050 ("r-singlecellexperiment" ,r-singlecellexperiment)
11051 ("r-statmod" ,r-statmod)
11052 ("r-summarizedexperiment" ,r-summarizedexperiment)))
11053 (native-inputs
11054 `(("r-knitr" ,r-knitr)))
11055 (home-page "https://bioconductor.org/packages/scran")
11056 (synopsis "Methods for single-cell RNA-Seq data analysis")
11057 (description "This package implements a variety of low-level analyses of
11058 single-cell RNA-seq data. Methods are provided for normalization of
11059 cell-specific biases, assignment of cell cycle phase, and detection of highly
11060 variable and significantly correlated genes.")
11061 (license license:gpl3)))
11062
11063 (define-public r-delayedmatrixstats
11064 (package
11065 (name "r-delayedmatrixstats")
11066 (version "1.10.0")
11067 (source
11068 (origin
11069 (method url-fetch)
11070 (uri (bioconductor-uri "DelayedMatrixStats" version))
11071 (sha256
11072 (base32
11073 "0i6gw8058gw9yajya3w9qq6l3p634fq9sv8dh8ifigxz0k9b98r6"))))
11074 (properties
11075 `((upstream-name . "DelayedMatrixStats")))
11076 (build-system r-build-system)
11077 (propagated-inputs
11078 `(("r-biocparallel" ,r-biocparallel)
11079 ("r-delayedarray" ,r-delayedarray)
11080 ("r-hdf5array" ,r-hdf5array)
11081 ("r-iranges" ,r-iranges)
11082 ("r-matrix" ,r-matrix)
11083 ("r-matrixstats" ,r-matrixstats)
11084 ("r-s4vectors" ,r-s4vectors)))
11085 (native-inputs
11086 `(("r-knitr" ,r-knitr)))
11087 (home-page "https://github.com/PeteHaitch/DelayedMatrixStats")
11088 (synopsis "Functions that apply to rows and columns of DelayedMatrix objects")
11089 (description
11090 "This package provides a port of the @code{matrixStats} API for use with
11091 @code{DelayedMatrix} objects from the @code{DelayedArray} package. It
11092 contains high-performing functions operating on rows and columns of
11093 @code{DelayedMatrix} objects, e.g. @code{colMedians}, @code{rowMedians},
11094 @code{colRanks}, @code{rowRanks}, @code{colSds}, and @code{rowSds}. Functions
11095 are optimized per data type and for subsetted calculations such that both
11096 memory usage and processing time is minimized.")
11097 (license license:expat)))
11098
11099 (define-public r-phangorn
11100 (package
11101 (name "r-phangorn")
11102 (version "2.5.5")
11103 (source
11104 (origin
11105 (method url-fetch)
11106 (uri (cran-uri "phangorn" version))
11107 (sha256
11108 (base32
11109 "0ihkaykqjmf80d8wrk3saphxvnv58zma6pd13633bd3cwanc33f5"))))
11110 (build-system r-build-system)
11111 (propagated-inputs
11112 `(("r-ape" ,r-ape)
11113 ("r-fastmatch" ,r-fastmatch)
11114 ("r-igraph" ,r-igraph)
11115 ("r-magrittr" ,r-magrittr)
11116 ("r-matrix" ,r-matrix)
11117 ("r-quadprog" ,r-quadprog)
11118 ("r-rcpp" ,r-rcpp)))
11119 (home-page "https://github.com/KlausVigo/phangorn")
11120 (synopsis "Phylogenetic analysis in R")
11121 (description
11122 "Phangorn is a package for phylogenetic analysis in R. It supports
11123 estimation of phylogenetic trees and networks using Maximum Likelihood,
11124 Maximum Parsimony, distance methods and Hadamard conjugation.")
11125 (license license:gpl2+)))
11126
11127 (define-public r-dropbead
11128 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
11129 (revision "2"))
11130 (package
11131 (name "r-dropbead")
11132 (version (string-append "0-" revision "." (string-take commit 7)))
11133 (source
11134 (origin
11135 (method git-fetch)
11136 (uri (git-reference
11137 (url "https://github.com/rajewsky-lab/dropbead")
11138 (commit commit)))
11139 (file-name (git-file-name name version))
11140 (sha256
11141 (base32
11142 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
11143 (build-system r-build-system)
11144 (propagated-inputs
11145 `(("r-ggplot2" ,r-ggplot2)
11146 ("r-rcolorbrewer" ,r-rcolorbrewer)
11147 ("r-gridextra" ,r-gridextra)
11148 ("r-gplots" ,r-gplots)
11149 ("r-plyr" ,r-plyr)))
11150 (home-page "https://github.com/rajewsky-lab/dropbead")
11151 (synopsis "Basic exploration and analysis of Drop-seq data")
11152 (description "This package offers a quick and straight-forward way to
11153 explore and perform basic analysis of single cell sequencing data coming from
11154 droplet sequencing. It has been particularly tailored for Drop-seq.")
11155 (license license:gpl3))))
11156
11157 (define htslib-for-sambamba
11158 (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
11159 (package
11160 (inherit htslib)
11161 (name "htslib-for-sambamba")
11162 (version (string-append "1.3.1-1." (string-take commit 9)))
11163 (source
11164 (origin
11165 (method git-fetch)
11166 (uri (git-reference
11167 (url "https://github.com/lomereiter/htslib")
11168 (commit commit)))
11169 (file-name (string-append "htslib-" version "-checkout"))
11170 (sha256
11171 (base32
11172 "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
11173 (native-inputs
11174 `(("autoconf" ,autoconf)
11175 ("automake" ,automake)
11176 ,@(package-native-inputs htslib))))))
11177
11178 (define-public sambamba
11179 (package
11180 (name "sambamba")
11181 (version "0.7.1")
11182 (source
11183 (origin
11184 (method git-fetch)
11185 (uri (git-reference
11186 (url "https://github.com/lomereiter/sambamba")
11187 (commit (string-append "v" version))))
11188 (file-name (string-append name "-" version "-checkout"))
11189 (sha256
11190 (base32
11191 "111h05b60pj8dxbidiamy4imc92x2962b3lmb7wgysl6lx064qis"))))
11192 (build-system gnu-build-system)
11193 (arguments
11194 `(#:tests? #f ; there is no test target
11195 #:parallel-build? #f ; not supported
11196 #:phases
11197 (modify-phases %standard-phases
11198 (delete 'configure)
11199 (add-after 'unpack 'fix-ldc-version
11200 (lambda _
11201 (substitute* "gen_ldc_version_info.py"
11202 (("/usr/bin/env.*") (which "python3")))
11203 (substitute* "Makefile"
11204 ;; We use ldc2 instead of ldmd2 to compile sambamba.
11205 (("\\$\\(shell which ldmd2\\)") (which "ldc2")))
11206 #t))
11207 (add-after 'unpack 'place-biod-and-undead
11208 (lambda* (#:key inputs #:allow-other-keys)
11209 (copy-recursively (assoc-ref inputs "biod") "BioD")
11210 #t))
11211 (add-after 'unpack 'unbundle-prerequisites
11212 (lambda _
11213 (substitute* "Makefile"
11214 (("htslib/libhts.a lz4/lib/liblz4.a")
11215 "-L-lhts -L-llz4")
11216 ((" lz4-static htslib-static") ""))
11217 #t))
11218 (replace 'install
11219 (lambda* (#:key outputs #:allow-other-keys)
11220 (let* ((out (assoc-ref outputs "out"))
11221 (bin (string-append out "/bin")))
11222 (mkdir-p bin)
11223 (copy-file (string-append "bin/sambamba-" ,version)
11224 (string-append bin "/sambamba"))
11225 #t))))))
11226 (native-inputs
11227 `(("ldc" ,ldc)
11228 ("rdmd" ,rdmd)
11229 ("python" ,python)
11230 ("biod"
11231 ,(let ((commit "7969eb0a847b05874e83ffddead26e193ece8101"))
11232 (origin
11233 (method git-fetch)
11234 (uri (git-reference
11235 (url "https://github.com/biod/BioD")
11236 (commit commit)))
11237 (file-name (string-append "biod-"
11238 (string-take commit 9)
11239 "-checkout"))
11240 (sha256
11241 (base32
11242 "0mjxsmbmv0jxl3pq21p8j5r829d648if8q58ka50b2956lc6qkpm")))))))
11243 (inputs
11244 `(("lz4" ,lz4)
11245 ("htslib" ,htslib-for-sambamba)))
11246 (home-page "https://lomereiter.github.io/sambamba/")
11247 (synopsis "Tools for working with SAM/BAM data")
11248 (description "Sambamba is a high performance modern robust and
11249 fast tool (and library), written in the D programming language, for
11250 working with SAM and BAM files. Current parallelised functionality is
11251 an important subset of samtools functionality, including view, index,
11252 sort, markdup, and depth.")
11253 (license license:gpl2+)))
11254
11255 (define-public ritornello
11256 (package
11257 (name "ritornello")
11258 (version "2.0.1")
11259 (source (origin
11260 (method git-fetch)
11261 (uri (git-reference
11262 (url "https://github.com/KlugerLab/Ritornello")
11263 (commit (string-append "v" version))))
11264 (file-name (git-file-name name version))
11265 (sha256
11266 (base32
11267 "1xahvq215qld7x1w8vpa5zbrsj6p9crb9shqa2x89sb0aaxa02jk"))))
11268 (build-system gnu-build-system)
11269 (arguments
11270 `(#:tests? #f ; there are no tests
11271 #:phases
11272 (modify-phases %standard-phases
11273 (add-after 'unpack 'patch-samtools-references
11274 (lambda* (#:key inputs #:allow-other-keys)
11275 (substitute* '("src/SamStream.h"
11276 "src/FLD.cpp")
11277 (("<sam.h>") "<samtools/sam.h>"))
11278 #t))
11279 (delete 'configure)
11280 (replace 'install
11281 (lambda* (#:key inputs outputs #:allow-other-keys)
11282 (let* ((out (assoc-ref outputs "out"))
11283 (bin (string-append out "/bin/")))
11284 (mkdir-p bin)
11285 (install-file "bin/Ritornello" bin)
11286 #t))))))
11287 (inputs
11288 `(("samtools" ,samtools-0.1)
11289 ("fftw" ,fftw)
11290 ("boost" ,boost)
11291 ("zlib" ,zlib)))
11292 (home-page "https://github.com/KlugerLab/Ritornello")
11293 (synopsis "Control-free peak caller for ChIP-seq data")
11294 (description "Ritornello is a ChIP-seq peak calling algorithm based on
11295 signal processing that can accurately call binding events without the need to
11296 do a pair total DNA input or IgG control sample. It has been tested for use
11297 with narrow binding events such as transcription factor ChIP-seq.")
11298 (license license:gpl3+)))
11299
11300 (define-public trim-galore
11301 (package
11302 (name "trim-galore")
11303 (version "0.6.1")
11304 (source
11305 (origin
11306 (method git-fetch)
11307 (uri (git-reference
11308 (url "https://github.com/FelixKrueger/TrimGalore")
11309 (commit version)))
11310 (file-name (git-file-name name version))
11311 (sha256
11312 (base32
11313 "1y31wbxwkm9xqzr5zv1pk5q418whnmlmgmfyxxpnl12h83m2i9iv"))))
11314 (build-system gnu-build-system)
11315 (arguments
11316 `(#:tests? #f ; no tests
11317 #:phases
11318 (modify-phases %standard-phases
11319 (replace 'configure
11320 (lambda _
11321 ;; Trim Galore tries to figure out what version of Python
11322 ;; cutadapt is using by looking at the shebang. Of course that
11323 ;; doesn't work, because cutadapt is wrapped in a shell script.
11324 (substitute* "trim_galore"
11325 (("my \\$python_return.*")
11326 "my $python_return = \"Python 3.999\";\n"))
11327 #t))
11328 (delete 'build)
11329 (add-after 'unpack 'hardcode-tool-references
11330 (lambda* (#:key inputs #:allow-other-keys)
11331 (substitute* "trim_galore"
11332 (("\\$path_to_cutadapt = 'cutadapt'")
11333 (string-append "$path_to_cutadapt = '"
11334 (assoc-ref inputs "cutadapt")
11335 "/bin/cutadapt'"))
11336 (("\\$compression_path = \"gzip\"")
11337 (string-append "$compression_path = \""
11338 (assoc-ref inputs "gzip")
11339 "/bin/gzip\""))
11340 (("\"gunzip")
11341 (string-append "\""
11342 (assoc-ref inputs "gzip")
11343 "/bin/gunzip"))
11344 (("\"pigz")
11345 (string-append "\""
11346 (assoc-ref inputs "pigz")
11347 "/bin/pigz")))
11348 #t))
11349 (replace 'install
11350 (lambda* (#:key outputs #:allow-other-keys)
11351 (let ((bin (string-append (assoc-ref outputs "out")
11352 "/bin")))
11353 (mkdir-p bin)
11354 (install-file "trim_galore" bin)
11355 #t))))))
11356 (inputs
11357 `(("gzip" ,gzip)
11358 ("perl" ,perl)
11359 ("pigz" ,pigz)
11360 ("cutadapt" ,cutadapt)))
11361 (native-inputs
11362 `(("unzip" ,unzip)))
11363 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
11364 (synopsis "Wrapper around Cutadapt and FastQC")
11365 (description "Trim Galore! is a wrapper script to automate quality and
11366 adapter trimming as well as quality control, with some added functionality to
11367 remove biased methylation positions for RRBS sequence files.")
11368 (license license:gpl3+)))
11369
11370 (define-public gess
11371 (package
11372 (name "gess")
11373 (version "1.0")
11374 (source (origin
11375 (method url-fetch)
11376 (uri (string-append "http://compbio.uthscsa.edu/"
11377 "GESS_Web/files/"
11378 "gess-" version ".src.tar.gz"))
11379 (sha256
11380 (base32
11381 "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
11382 (build-system gnu-build-system)
11383 (arguments
11384 `(#:tests? #f ; no tests
11385 #:phases
11386 (modify-phases %standard-phases
11387 (delete 'configure)
11388 (delete 'build)
11389 (replace 'install
11390 (lambda* (#:key inputs outputs #:allow-other-keys)
11391 (let* ((python (assoc-ref inputs "python"))
11392 (out (assoc-ref outputs "out"))
11393 (bin (string-append out "/bin/"))
11394 (target (string-append
11395 out "/lib/python"
11396 ,(version-major+minor
11397 (package-version python))
11398 "/site-packages/gess/")))
11399 (mkdir-p target)
11400 (copy-recursively "." target)
11401 ;; Make GESS.py executable
11402 (chmod (string-append target "GESS.py") #o555)
11403 ;; Add Python shebang to the top and make Matplotlib
11404 ;; usable.
11405 (substitute* (string-append target "GESS.py")
11406 (("\"\"\"Description:" line)
11407 (string-append "#!" (which "python") "
11408 import matplotlib
11409 matplotlib.use('Agg')
11410 " line)))
11411 ;; Make sure GESS has all modules in its path
11412 (wrap-script (string-append target "GESS.py")
11413 `("PYTHONPATH" ":" = (,target ,(getenv "PYTHONPATH"))))
11414 (mkdir-p bin)
11415 (symlink (string-append target "GESS.py")
11416 (string-append bin "GESS.py"))
11417 #t))))))
11418 (inputs
11419 `(("python" ,python-2)
11420 ("python2-pysam" ,python2-pysam)
11421 ("python2-scipy" ,python2-scipy)
11422 ("python2-numpy" ,python2-numpy)
11423 ("python2-networkx" ,python2-networkx)
11424 ("python2-biopython" ,python2-biopython)
11425 ("guile" ,guile-3.0))) ; for the script wrapper
11426 (home-page "https://compbio.uthscsa.edu/GESS_Web/")
11427 (synopsis "Detect exon-skipping events from raw RNA-seq data")
11428 (description
11429 "GESS is an implementation of a novel computational method to detect de
11430 novo exon-skipping events directly from raw RNA-seq data without the prior
11431 knowledge of gene annotation information. GESS stands for the graph-based
11432 exon-skipping scanner detection scheme.")
11433 (license license:bsd-3)))
11434
11435 (define-public phylip
11436 (package
11437 (name "phylip")
11438 (version "3.696")
11439 (source
11440 (origin
11441 (method url-fetch)
11442 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
11443 "download/phylip-" version ".tar.gz"))
11444 (sha256
11445 (base32
11446 "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
11447 (build-system gnu-build-system)
11448 (arguments
11449 `(#:tests? #f ; no check target
11450 #:make-flags (list "-f" "Makefile.unx" "install")
11451 #:parallel-build? #f ; not supported
11452 #:phases
11453 (modify-phases %standard-phases
11454 (add-after 'unpack 'enter-dir
11455 (lambda _ (chdir "src") #t))
11456 (delete 'configure)
11457 (replace 'install
11458 (lambda* (#:key inputs outputs #:allow-other-keys)
11459 (let ((target (string-append (assoc-ref outputs "out")
11460 "/bin")))
11461 (mkdir-p target)
11462 (for-each (lambda (file)
11463 (install-file file target))
11464 (find-files "../exe" ".*")))
11465 #t)))))
11466 (home-page "http://evolution.genetics.washington.edu/phylip/")
11467 (synopsis "Tools for inferring phylogenies")
11468 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
11469 programs for inferring phylogenies (evolutionary trees).")
11470 (license license:bsd-2)))
11471
11472 (define-public imp
11473 (package
11474 (name "imp")
11475 (version "2.6.2")
11476 (source
11477 (origin
11478 (method url-fetch)
11479 (uri (string-append "https://integrativemodeling.org/"
11480 version "/download/imp-" version ".tar.gz"))
11481 (sha256
11482 (base32
11483 "0lxqx7vh79d771svr611dkilp6sn30qrbw8zvscbrm37v38d2j6h"))))
11484 (build-system cmake-build-system)
11485 (arguments
11486 `(;; FIXME: Some tests fail because they produce warnings, others fail
11487 ;; because the PYTHONPATH does not include the modeller's directory.
11488 #:tests? #f))
11489 (inputs
11490 `(("boost" ,boost)
11491 ("gsl" ,gsl)
11492 ("swig" ,swig)
11493 ("hdf5" ,hdf5)
11494 ("fftw" ,fftw)
11495 ("python" ,python-2)))
11496 (propagated-inputs
11497 `(("python2-numpy" ,python2-numpy)
11498 ("python2-scipy" ,python2-scipy)
11499 ("python2-pandas" ,python2-pandas)
11500 ("python2-scikit-learn" ,python2-scikit-learn)
11501 ("python2-networkx" ,python2-networkx)))
11502 (home-page "https://integrativemodeling.org")
11503 (synopsis "Integrative modeling platform")
11504 (description "IMP's broad goal is to contribute to a comprehensive
11505 structural characterization of biomolecules ranging in size and complexity
11506 from small peptides to large macromolecular assemblies, by integrating data
11507 from diverse biochemical and biophysical experiments. IMP provides a C++ and
11508 Python toolbox for solving complex modeling problems, and a number of
11509 applications for tackling some common problems in a user-friendly way.")
11510 ;; IMP is largely available under the GNU Lesser GPL; see the file
11511 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
11512 ;; available under the GNU GPL (see the file COPYING.GPL).
11513 (license (list license:lgpl2.1+
11514 license:gpl3+))))
11515
11516 (define-public tadbit
11517 (package
11518 (name "tadbit")
11519 (version "0.2.0")
11520 (source (origin
11521 (method git-fetch)
11522 (uri (git-reference
11523 (url "https://github.com/3DGenomes/TADbit")
11524 (commit (string-append "v" version))))
11525 (file-name (git-file-name name version))
11526 (sha256
11527 (base32
11528 "07g3aj648prmsvxp9caz5yl41k0y0647vxh0f5p3w8376mfiljd0"))))
11529 (build-system python-build-system)
11530 (arguments
11531 `(;; Tests are included and must be run after installation, but
11532 ;; they are incomplete and thus cannot be run.
11533 #:tests? #f
11534 #:python ,python-2
11535 #:phases
11536 (modify-phases %standard-phases
11537 (add-after 'unpack 'fix-problems-with-setup.py
11538 (lambda* (#:key outputs #:allow-other-keys)
11539 ;; setup.py opens these files for writing
11540 (chmod "_pytadbit/_version.py" #o664)
11541 (chmod "README.rst" #o664)
11542
11543 ;; Don't attempt to install the bash completions to
11544 ;; the home directory.
11545 (rename-file "extras/.bash_completion"
11546 "extras/tadbit")
11547 (substitute* "setup.py"
11548 (("\\(path.expanduser\\('~'\\)")
11549 (string-append "(\""
11550 (assoc-ref outputs "out")
11551 "/etc/bash_completion.d\""))
11552 (("extras/\\.bash_completion")
11553 "extras/tadbit"))
11554 #t)))))
11555 (inputs
11556 ;; TODO: add Chimera for visualization
11557 `(("imp" ,imp)
11558 ("mcl" ,mcl)
11559 ("python2-scipy" ,python2-scipy)
11560 ("python2-numpy" ,python2-numpy)
11561 ("python2-matplotlib" ,python2-matplotlib)
11562 ("python2-pysam" ,python2-pysam)))
11563 (home-page "https://3dgenomes.github.io/TADbit/")
11564 (synopsis "Analyze, model, and explore 3C-based data")
11565 (description
11566 "TADbit is a complete Python library to deal with all steps to analyze,
11567 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
11568 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
11569 correct interaction matrices, identify and compare the so-called
11570 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
11571 interaction matrices, and finally, extract structural properties from the
11572 models. TADbit is complemented by TADkit for visualizing 3D models.")
11573 (license license:gpl3+)))
11574
11575 (define-public kentutils
11576 (package
11577 (name "kentutils")
11578 ;; 302.1.0 is out, but the only difference is the inclusion of
11579 ;; pre-built binaries.
11580 (version "302.0.0")
11581 (source
11582 (origin
11583 (method git-fetch)
11584 (uri (git-reference
11585 (url "https://github.com/ENCODE-DCC/kentUtils")
11586 (commit (string-append "v" version))))
11587 (file-name (git-file-name name version))
11588 (sha256
11589 (base32
11590 "0n1wbyjpzii2b9qhyp9r1q76j623cggpg3y8fmw78ld3z4y7ivha"))
11591 (modules '((guix build utils)
11592 (srfi srfi-26)
11593 (ice-9 ftw)))
11594 (snippet
11595 '(begin
11596 ;; Only the contents of the specified directories are free
11597 ;; for all uses, so we remove the rest. "hg/autoSql" and
11598 ;; "hg/autoXml" are nominally free, but they depend on a
11599 ;; library that is built from the sources in "hg/lib",
11600 ;; which is nonfree.
11601 (let ((free (list "." ".."
11602 "utils" "lib" "inc" "tagStorm"
11603 "parasol" "htslib"))
11604 (directory? (lambda (file)
11605 (eq? 'directory (stat:type (stat file))))))
11606 (for-each (lambda (file)
11607 (and (directory? file)
11608 (delete-file-recursively file)))
11609 (map (cut string-append "src/" <>)
11610 (scandir "src"
11611 (lambda (file)
11612 (not (member file free)))))))
11613 ;; Only make the utils target, not the userApps target,
11614 ;; because that requires libraries we won't build.
11615 (substitute* "Makefile"
11616 ((" userApps") " utils"))
11617 ;; Only build libraries that are free.
11618 (substitute* "src/makefile"
11619 (("DIRS =.*") "DIRS =\n")
11620 (("cd jkOwnLib.*") "")
11621 ((" hgLib") "")
11622 (("cd hg.*") ""))
11623 (substitute* "src/utils/makefile"
11624 ;; These tools depend on "jkhgap.a", which is part of the
11625 ;; nonfree "src/hg/lib" directory.
11626 (("raSqlQuery") "")
11627 (("pslLiftSubrangeBlat") "")
11628
11629 ;; Do not build UCSC tools, which may require nonfree
11630 ;; components.
11631 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
11632 #t))))
11633 (build-system gnu-build-system)
11634 (arguments
11635 `( ;; There is no global test target and the test target for
11636 ;; individual tools depends on input files that are not
11637 ;; included.
11638 #:tests? #f
11639 #:phases
11640 (modify-phases %standard-phases
11641 (add-after 'unpack 'fix-permissions
11642 (lambda _ (make-file-writable "src/inc/localEnvironment.mk") #t))
11643 (add-after 'unpack 'fix-paths
11644 (lambda _
11645 (substitute* "Makefile"
11646 (("/bin/echo") (which "echo")))
11647 #t))
11648 (add-after 'unpack 'prepare-samtabix
11649 (lambda* (#:key inputs #:allow-other-keys)
11650 (copy-recursively (assoc-ref inputs "samtabix")
11651 "samtabix")
11652 #t))
11653 (delete 'configure)
11654 (replace 'install
11655 (lambda* (#:key outputs #:allow-other-keys)
11656 (let ((bin (string-append (assoc-ref outputs "out")
11657 "/bin")))
11658 (copy-recursively "bin" bin))
11659 #t)))))
11660 (native-inputs
11661 `(("samtabix"
11662 ,(origin
11663 (method git-fetch)
11664 (uri (git-reference
11665 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
11666 (commit "10fd107909c1ac4d679299908be4262a012965ba")))
11667 (sha256
11668 (base32
11669 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma"))))))
11670 (inputs
11671 `(("zlib" ,zlib)
11672 ("tcsh" ,tcsh)
11673 ("perl" ,perl)
11674 ("libpng" ,libpng)
11675 ("mariadb" ,mariadb "lib")
11676 ("mariadb-dev" ,mariadb "dev")
11677 ("openssl" ,openssl-1.0)))
11678 (home-page "https://genome.cse.ucsc.edu/index.html")
11679 (synopsis "Assorted bioinformatics utilities")
11680 (description "This package provides the kentUtils, a selection of
11681 bioinformatics utilities used in combination with the UCSC genome
11682 browser.")
11683 ;; Only a subset of the sources are released under a non-copyleft
11684 ;; free software license. All other sources are removed in a
11685 ;; snippet. See this bug report for an explanation of how the
11686 ;; license statements apply:
11687 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
11688 (license (license:non-copyleft
11689 "http://genome.ucsc.edu/license/"
11690 "The contents of this package are free for all uses."))))
11691
11692 (define-public f-seq
11693 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
11694 (revision "1"))
11695 (package
11696 (name "f-seq")
11697 (version (string-append "1.1-" revision "." (string-take commit 7)))
11698 (source (origin
11699 (method git-fetch)
11700 (uri (git-reference
11701 (url "https://github.com/aboyle/F-seq")
11702 (commit commit)))
11703 (file-name (string-append name "-" version))
11704 (sha256
11705 (base32
11706 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
11707 (modules '((guix build utils)))
11708 ;; Remove bundled Java library archives.
11709 (snippet
11710 '(begin
11711 (for-each delete-file (find-files "lib" ".*"))
11712 #t))))
11713 (build-system ant-build-system)
11714 (arguments
11715 `(#:tests? #f ; no tests included
11716 #:phases
11717 (modify-phases %standard-phases
11718 (replace 'install
11719 (lambda* (#:key inputs outputs #:allow-other-keys)
11720 (let* ((target (assoc-ref outputs "out"))
11721 (bin (string-append target "/bin"))
11722 (doc (string-append target "/share/doc/f-seq"))
11723 (lib (string-append target "/lib")))
11724 (mkdir-p target)
11725 (mkdir-p doc)
11726 (substitute* "bin/linux/fseq"
11727 (("java") (which "java"))
11728 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
11729 (string-append (assoc-ref inputs "java-commons-cli")
11730 "/share/java/commons-cli.jar"))
11731 (("REALDIR=.*")
11732 (string-append "REALDIR=" bin "\n")))
11733 (install-file "README.txt" doc)
11734 (install-file "bin/linux/fseq" bin)
11735 (install-file "build~/fseq.jar" lib)
11736 (copy-recursively "lib" lib)
11737 #t))))))
11738 (inputs
11739 `(("perl" ,perl)
11740 ("java-commons-cli" ,java-commons-cli)))
11741 (home-page "http://fureylab.web.unc.edu/software/fseq/")
11742 (synopsis "Feature density estimator for high-throughput sequence tags")
11743 (description
11744 "F-Seq is a software package that generates a continuous tag sequence
11745 density estimation allowing identification of biologically meaningful sites
11746 such as transcription factor binding sites (ChIP-seq) or regions of open
11747 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
11748 Browser.")
11749 (license license:gpl3+))))
11750
11751 (define-public bismark
11752 (package
11753 (name "bismark")
11754 (version "0.20.1")
11755 (source
11756 (origin
11757 (method git-fetch)
11758 (uri (git-reference
11759 (url "https://github.com/FelixKrueger/Bismark")
11760 (commit version)))
11761 (file-name (string-append name "-" version "-checkout"))
11762 (sha256
11763 (base32
11764 "0xchm3rgilj6vfjnyzfzzymfd7djr64sbrmrvs3njbwi66jqbzw9"))))
11765 (build-system perl-build-system)
11766 (arguments
11767 `(#:tests? #f ; there are no tests
11768 #:modules ((guix build utils)
11769 (ice-9 popen)
11770 (srfi srfi-26)
11771 (guix build perl-build-system))
11772 #:phases
11773 (modify-phases %standard-phases
11774 ;; The bundled plotly.js is minified.
11775 (add-after 'unpack 'replace-plotly.js
11776 (lambda* (#:key inputs #:allow-other-keys)
11777 (let* ((file (assoc-ref inputs "plotly.js"))
11778 (installed "plotly/plotly.js"))
11779 (let ((minified (open-pipe* OPEN_READ "uglify-js" file)))
11780 (call-with-output-file installed
11781 (cut dump-port minified <>))))
11782 #t))
11783 (delete 'configure)
11784 (delete 'build)
11785 (replace 'install
11786 (lambda* (#:key inputs outputs #:allow-other-keys)
11787 (let* ((out (assoc-ref outputs "out"))
11788 (bin (string-append out "/bin"))
11789 (share (string-append out "/share/bismark"))
11790 (docdir (string-append out "/share/doc/bismark"))
11791 (docs '("Docs/Bismark_User_Guide.html"))
11792 (scripts '("bismark"
11793 "bismark_genome_preparation"
11794 "bismark_methylation_extractor"
11795 "bismark2bedGraph"
11796 "bismark2report"
11797 "coverage2cytosine"
11798 "deduplicate_bismark"
11799 "filter_non_conversion"
11800 "bam2nuc"
11801 "bismark2summary"
11802 "NOMe_filtering")))
11803 (substitute* "bismark2report"
11804 (("\\$RealBin/plotly")
11805 (string-append share "/plotly")))
11806 (mkdir-p share)
11807 (mkdir-p docdir)
11808 (mkdir-p bin)
11809 (for-each (lambda (file) (install-file file bin))
11810 scripts)
11811 (for-each (lambda (file) (install-file file docdir))
11812 docs)
11813 (copy-recursively "Docs/Images" (string-append docdir "/Images"))
11814 (copy-recursively "plotly"
11815 (string-append share "/plotly"))
11816
11817 ;; Fix references to gunzip
11818 (substitute* (map (lambda (file)
11819 (string-append bin "/" file))
11820 scripts)
11821 (("\"gunzip -c")
11822 (string-append "\"" (assoc-ref inputs "gzip")
11823 "/bin/gunzip -c")))
11824 #t))))))
11825 (inputs
11826 `(("gzip" ,gzip)
11827 ("perl-carp" ,perl-carp)
11828 ("perl-getopt-long" ,perl-getopt-long)))
11829 (native-inputs
11830 `(("plotly.js"
11831 ,(origin
11832 (method url-fetch)
11833 (uri (string-append "https://raw.githubusercontent.com/plotly/plotly.js/"
11834 "v1.39.4/dist/plotly.js"))
11835 (sha256
11836 (base32 "138mwsr4nf5qif4mrxx286mpnagxd1xwl6k8aidrjgknaqg88zyr"))))
11837 ("uglify-js" ,uglify-js)))
11838 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/bismark/")
11839 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
11840 (description "Bismark is a program to map bisulfite treated sequencing
11841 reads to a genome of interest and perform methylation calls in a single step.
11842 The output can be easily imported into a genome viewer, such as SeqMonk, and
11843 enables a researcher to analyse the methylation levels of their samples
11844 straight away. Its main features are:
11845
11846 @itemize
11847 @item Bisulfite mapping and methylation calling in one single step
11848 @item Supports single-end and paired-end read alignments
11849 @item Supports ungapped and gapped alignments
11850 @item Alignment seed length, number of mismatches etc are adjustable
11851 @item Output discriminates between cytosine methylation in CpG, CHG
11852 and CHH context
11853 @end itemize\n")
11854 (license license:gpl3+)))
11855
11856 (define-public paml
11857 (package
11858 (name "paml")
11859 (version "4.9e")
11860 (source (origin
11861 (method url-fetch)
11862 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
11863 "paml" version ".tgz"))
11864 (sha256
11865 (base32
11866 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
11867 (modules '((guix build utils)))
11868 ;; Remove Windows binaries
11869 (snippet
11870 '(begin
11871 (for-each delete-file (find-files "." "\\.exe$"))
11872 #t))))
11873 (build-system gnu-build-system)
11874 (arguments
11875 `(#:tests? #f ; there are no tests
11876 #:make-flags '("CC=gcc")
11877 #:phases
11878 (modify-phases %standard-phases
11879 (replace 'configure
11880 (lambda _
11881 (substitute* "src/BFdriver.c"
11882 (("/bin/bash") (which "bash")))
11883 (chdir "src")
11884 #t))
11885 (replace 'install
11886 (lambda* (#:key outputs #:allow-other-keys)
11887 (let ((tools '("baseml" "basemlg" "codeml"
11888 "pamp" "evolver" "yn00" "chi2"))
11889 (bin (string-append (assoc-ref outputs "out") "/bin"))
11890 (docdir (string-append (assoc-ref outputs "out")
11891 "/share/doc/paml")))
11892 (mkdir-p bin)
11893 (for-each (lambda (file) (install-file file bin)) tools)
11894 (copy-recursively "../doc" docdir)
11895 #t))))))
11896 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
11897 (synopsis "Phylogentic analysis by maximum likelihood")
11898 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
11899 contains a few programs for model fitting and phylogenetic tree reconstruction
11900 using nucleotide or amino-acid sequence data.")
11901 ;; GPLv3 only
11902 (license license:gpl3)))
11903
11904 (define-public kallisto
11905 (package
11906 (name "kallisto")
11907 (version "0.44.0")
11908 (source (origin
11909 (method git-fetch)
11910 (uri (git-reference
11911 (url "https://github.com/pachterlab/kallisto")
11912 (commit (string-append "v" version))))
11913 (file-name (git-file-name name version))
11914 (sha256
11915 (base32
11916 "0nj382jiywqnpgvyhichajpkkh5r0bapn43f4dx40zdaq5v4m40m"))))
11917 (build-system cmake-build-system)
11918 (arguments
11919 `(#:tests? #f ; no "check" target
11920 #:phases
11921 (modify-phases %standard-phases
11922 (add-after 'unpack 'do-not-use-bundled-htslib
11923 (lambda _
11924 (substitute* "CMakeLists.txt"
11925 (("^ExternalProject_Add" m)
11926 (string-append "if (NEVER)\n" m))
11927 (("^\\)")
11928 (string-append ")\nendif(NEVER)"))
11929 (("include_directories\\(\\$\\{htslib_PREFIX.*" m)
11930 (string-append "# " m)))
11931 (substitute* "src/CMakeLists.txt"
11932 (("target_link_libraries\\(kallisto kallisto_core pthread \
11933 \\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/../ext/htslib/libhts.a\\)")
11934 "target_link_libraries(kallisto kallisto_core pthread hts)")
11935 (("include_directories\\(\\.\\./ext/htslib\\)") ""))
11936 #t)))))
11937 (inputs
11938 `(("hdf5" ,hdf5)
11939 ("htslib" ,htslib)
11940 ("zlib" ,zlib)))
11941 (home-page "https://pachterlab.github.io/kallisto/")
11942 (synopsis "Near-optimal RNA-Seq quantification")
11943 (description
11944 "Kallisto is a program for quantifying abundances of transcripts from
11945 RNA-Seq data, or more generally of target sequences using high-throughput
11946 sequencing reads. It is based on the novel idea of pseudoalignment for
11947 rapidly determining the compatibility of reads with targets, without the need
11948 for alignment. Pseudoalignment of reads preserves the key information needed
11949 for quantification, and kallisto is therefore not only fast, but also as
11950 accurate as existing quantification tools.")
11951 (license license:bsd-2)))
11952
11953 (define-public libgff
11954 (package
11955 (name "libgff")
11956 (version "1.0")
11957 (source (origin
11958 (method git-fetch)
11959 (uri (git-reference
11960 (url "https://github.com/Kingsford-Group/libgff")
11961 (commit (string-append "v" version))))
11962 (file-name (git-file-name name version))
11963 (sha256
11964 (base32
11965 "0n6vfjnq7a2mianipscbshrvbncss8z4zkgkbjw754p9043nfkps"))))
11966 (build-system cmake-build-system)
11967 (arguments `(#:tests? #f)) ; no tests included
11968 (home-page "https://github.com/Kingsford-Group/libgff")
11969 (synopsis "Parser library for reading/writing GFF files")
11970 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
11971 code that is used in the Cufflinks codebase. The goal of this library is to
11972 provide this functionality without the necessity of drawing in a heavy-weight
11973 dependency like SeqAn.")
11974 (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
11975
11976 (define-public sailfish
11977 (package
11978 (name "sailfish")
11979 (version "0.10.1")
11980 (source (origin
11981 (method git-fetch)
11982 (uri (git-reference
11983 (url "https://github.com/kingsfordgroup/sailfish")
11984 (commit (string-append "v" version))))
11985 (file-name (git-file-name name version))
11986 (sha256
11987 (base32
11988 "1amcc5hqvsl42hg4x19bi9vy47cl874s0lw1fmi0hwsdk9i8c03v"))
11989 (modules '((guix build utils)))
11990 (snippet
11991 '(begin
11992 ;; Delete bundled headers for eigen3.
11993 (delete-file-recursively "include/eigen3/")
11994 #t))))
11995 (build-system cmake-build-system)
11996 (arguments
11997 `(#:configure-flags
11998 (list (string-append "-DBOOST_INCLUDEDIR="
11999 (assoc-ref %build-inputs "boost")
12000 "/include/")
12001 (string-append "-DBOOST_LIBRARYDIR="
12002 (assoc-ref %build-inputs "boost")
12003 "/lib/")
12004 (string-append "-DBoost_LIBRARIES="
12005 "-lboost_iostreams "
12006 "-lboost_filesystem "
12007 "-lboost_system "
12008 "-lboost_thread "
12009 "-lboost_timer "
12010 "-lboost_chrono "
12011 "-lboost_program_options")
12012 "-DBoost_FOUND=TRUE"
12013 ;; Don't download RapMap---we already have it!
12014 "-DFETCHED_RAPMAP=1")
12015 ;; Tests must be run after installation and the location of the test
12016 ;; data file must be overridden. But the tests fail. It looks like
12017 ;; they are not really meant to be run.
12018 #:tests? #f
12019 #:phases
12020 (modify-phases %standard-phases
12021 ;; Boost cannot be found, even though it's right there.
12022 (add-after 'unpack 'do-not-look-for-boost
12023 (lambda* (#:key inputs #:allow-other-keys)
12024 (substitute* "CMakeLists.txt"
12025 (("find_package\\(Boost 1\\.53\\.0") "#"))
12026 #t))
12027 (add-after 'unpack 'do-not-assign-to-macro
12028 (lambda _
12029 (substitute* "include/spdlog/details/format.cc"
12030 (("const unsigned CHAR_WIDTH = 1;") ""))
12031 #t))
12032 (add-after 'unpack 'prepare-rapmap
12033 (lambda* (#:key inputs #:allow-other-keys)
12034 (let ((src "external/install/src/rapmap/")
12035 (include "external/install/include/rapmap/")
12036 (rapmap (assoc-ref inputs "rapmap")))
12037 (mkdir-p "/tmp/rapmap")
12038 (invoke "tar" "xf"
12039 (assoc-ref inputs "rapmap")
12040 "-C" "/tmp/rapmap"
12041 "--strip-components=1")
12042 (mkdir-p src)
12043 (mkdir-p include)
12044 (for-each (lambda (file)
12045 (install-file file src))
12046 (find-files "/tmp/rapmap/src" "\\.(c|cpp)"))
12047 (copy-recursively "/tmp/rapmap/include" include))
12048 #t))
12049 (add-after 'unpack 'use-system-libraries
12050 (lambda* (#:key inputs #:allow-other-keys)
12051 (substitute* '("src/SailfishIndexer.cpp"
12052 "src/SailfishUtils.cpp"
12053 "src/SailfishQuantify.cpp"
12054 "src/FASTAParser.cpp"
12055 "include/PCA.hpp"
12056 "include/SailfishUtils.hpp"
12057 "include/SailfishIndex.hpp"
12058 "include/CollapsedEMOptimizer.hpp"
12059 "src/CollapsedEMOptimizer.cpp")
12060 (("#include \"jellyfish/config.h\"") ""))
12061 (substitute* "src/CMakeLists.txt"
12062 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
12063 (string-append (assoc-ref inputs "jellyfish")
12064 "/include/jellyfish-" ,(package-version jellyfish)))
12065 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
12066 (string-append (assoc-ref inputs "jellyfish")
12067 "/lib/libjellyfish-2.0.a"))
12068 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
12069 (string-append (assoc-ref inputs "libdivsufsort")
12070 "/lib/libdivsufsort.so"))
12071 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
12072 (string-append (assoc-ref inputs "libdivsufsort")
12073 "/lib/libdivsufsort64.so")))
12074 (substitute* "CMakeLists.txt"
12075 ;; Don't prefer static libs
12076 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
12077 (("find_package\\(Jellyfish.*") "")
12078 (("ExternalProject_Add\\(libjellyfish") "message(")
12079 (("ExternalProject_Add\\(libgff") "message(")
12080 (("ExternalProject_Add\\(libsparsehash") "message(")
12081 (("ExternalProject_Add\\(libdivsufsort") "message("))
12082
12083 ;; Ensure that Eigen headers can be found
12084 (setenv "CPLUS_INCLUDE_PATH"
12085 (string-append (assoc-ref inputs "eigen")
12086 "/include/eigen3:"
12087 (or (getenv "CPLUS_INCLUDE_PATH") "")))
12088 #t)))))
12089 (inputs
12090 `(("boost" ,boost)
12091 ("eigen" ,eigen)
12092 ("jemalloc" ,jemalloc)
12093 ("jellyfish" ,jellyfish)
12094 ("sparsehash" ,sparsehash)
12095 ("rapmap" ,(origin
12096 (method git-fetch)
12097 (uri (git-reference
12098 (url "https://github.com/COMBINE-lab/RapMap")
12099 (commit (string-append "sf-v" version))))
12100 (file-name (string-append "rapmap-sf-v" version "-checkout"))
12101 (sha256
12102 (base32
12103 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
12104 (modules '((guix build utils)))
12105 ;; These files are expected to be excluded.
12106 (snippet
12107 '(begin (delete-file-recursively "include/spdlog")
12108 (for-each delete-file '("include/xxhash.h"
12109 "src/xxhash.c"))
12110 #t))))
12111 ("libdivsufsort" ,libdivsufsort)
12112 ("libgff" ,libgff)
12113 ("tbb" ,tbb)
12114 ("zlib" ,zlib)))
12115 (native-inputs
12116 `(("pkg-config" ,pkg-config)))
12117 (home-page "https://www.cs.cmu.edu/~ckingsf/software/sailfish/")
12118 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
12119 (description "Sailfish is a tool for genomic transcript quantification
12120 from RNA-seq data. It requires a set of target transcripts (either from a
12121 reference or de-novo assembly) to quantify. All you need to run sailfish is a
12122 fasta file containing your reference transcripts and a (set of) fasta/fastq
12123 file(s) containing your reads.")
12124 (license license:gpl3+)))
12125
12126 (define libstadenio-for-salmon
12127 (package
12128 (name "libstadenio")
12129 (version "1.14.8")
12130 (source (origin
12131 (method git-fetch)
12132 (uri (git-reference
12133 (url "https://github.com/COMBINE-lab/staden-io_lib")
12134 (commit (string-append "v" version))))
12135 (file-name (string-append name "-" version "-checkout"))
12136 (sha256
12137 (base32
12138 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
12139 (build-system gnu-build-system)
12140 (arguments '(#:parallel-tests? #f)) ; not supported
12141 (inputs
12142 `(("zlib" ,zlib)))
12143 (native-inputs
12144 `(("perl" ,perl))) ; for tests
12145 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
12146 (synopsis "General purpose trace and experiment file library")
12147 (description "This package provides a library of file reading and writing
12148 code to provide a general purpose Trace file (and Experiment File) reading
12149 interface.
12150
12151 The following file formats are supported:
12152
12153 @enumerate
12154 @item SCF trace files
12155 @item ABI trace files
12156 @item ALF trace files
12157 @item ZTR trace files
12158 @item SFF trace archives
12159 @item SRF trace archives
12160 @item Experiment files
12161 @item Plain text files
12162 @item SAM/BAM sequence files
12163 @item CRAM sequence files
12164 @end enumerate\n")
12165 (license license:bsd-3)))
12166
12167 (define-public salmon
12168 (package
12169 (name "salmon")
12170 (version "0.13.1")
12171 (source (origin
12172 (method git-fetch)
12173 (uri (git-reference
12174 (url "https://github.com/COMBINE-lab/salmon")
12175 (commit (string-append "v" version))))
12176 (file-name (git-file-name name version))
12177 (sha256
12178 (base32
12179 "1i2z4aivicmiixdz9bxalp7vmfzi3k92fxa63iqa8kgvfw5a4aq5"))
12180 (modules '((guix build utils)))
12181 (snippet
12182 '(begin
12183 ;; Delete bundled headers for eigen3.
12184 (delete-file-recursively "include/eigen3/")
12185 #t))))
12186 (build-system cmake-build-system)
12187 (arguments
12188 `(#:configure-flags
12189 (list (string-append "-DBOOST_INCLUDEDIR="
12190 (assoc-ref %build-inputs "boost")
12191 "/include/")
12192 (string-append "-DBOOST_LIBRARYDIR="
12193 (assoc-ref %build-inputs "boost")
12194 "/lib/")
12195 (string-append "-DBoost_LIBRARIES="
12196 "-lboost_iostreams "
12197 "-lboost_filesystem "
12198 "-lboost_system "
12199 "-lboost_thread "
12200 "-lboost_timer "
12201 "-lboost_chrono "
12202 "-lboost_program_options")
12203 "-DBoost_FOUND=TRUE"
12204 "-DTBB_LIBRARIES=tbb tbbmalloc"
12205 ;; Don't download RapMap---we already have it!
12206 "-DFETCHED_RAPMAP=1")
12207 #:phases
12208 (modify-phases %standard-phases
12209 ;; Boost cannot be found, even though it's right there.
12210 (add-after 'unpack 'do-not-look-for-boost
12211 (lambda* (#:key inputs #:allow-other-keys)
12212 (substitute* "CMakeLists.txt"
12213 (("find_package\\(Boost 1\\.59\\.0") "#"))
12214 #t))
12215 (add-after 'unpack 'do-not-phone-home
12216 (lambda _
12217 (substitute* "src/Salmon.cpp"
12218 (("getVersionMessage\\(\\)") "\"\""))
12219 #t))
12220 (add-after 'unpack 'prepare-rapmap
12221 (lambda* (#:key inputs #:allow-other-keys)
12222 (let ((src "external/install/src/rapmap/")
12223 (include "external/install/include/rapmap/")
12224 (rapmap (assoc-ref inputs "rapmap")))
12225 (mkdir-p src)
12226 (mkdir-p include)
12227 (copy-recursively (string-append rapmap "/src") src)
12228 (copy-recursively (string-append rapmap "/include") include)
12229 (for-each delete-file '("external/install/include/rapmap/xxhash.h"
12230 "external/install/include/rapmap/FastxParser.hpp"
12231 "external/install/include/rapmap/concurrentqueue.h"
12232 "external/install/include/rapmap/FastxParserThreadUtils.hpp"
12233 "external/install/src/rapmap/FastxParser.cpp"
12234 "external/install/src/rapmap/xxhash.c"))
12235 (delete-file-recursively "external/install/include/rapmap/spdlog"))
12236 #t))
12237 (add-after 'unpack 'use-system-libraries
12238 (lambda* (#:key inputs #:allow-other-keys)
12239 (substitute* "CMakeLists.txt"
12240 ;; Don't prefer static libs
12241 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
12242 (("set\\(TBB_LIBRARIES") "message(")
12243 ;; Don't download anything
12244 (("DOWNLOAD_COMMAND") "DOWNLOAD_COMMAND echo")
12245 (("externalproject_add\\(libcereal") "message(")
12246 (("externalproject_add\\(libgff") "message(")
12247 (("externalproject_add\\(libtbb") "message(")
12248 (("externalproject_add\\(libdivsufsort") "message(")
12249 (("externalproject_add\\(libstadenio") "message(")
12250 (("externalproject_add_step\\(") "message("))
12251 (substitute* "src/CMakeLists.txt"
12252 (("add_dependencies") "#")
12253 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libstaden-read.a")
12254 (string-append (assoc-ref inputs "libstadenio-for-salmon")
12255 "/lib/libstaden-read.so"))
12256 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
12257 (string-append (assoc-ref inputs "libdivsufsort")
12258 "/lib/libdivsufsort.so"))
12259 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
12260 (string-append (assoc-ref inputs "libdivsufsort")
12261 "/lib/libdivsufsort64.so"))
12262 (("lib/libdivsufsort.a") "/lib/libdivsufsort.so"))
12263
12264 ;; Ensure that all headers can be found
12265 (setenv "CPLUS_INCLUDE_PATH"
12266 (string-append (or (getenv "CPLUS_INCLUDE_PATH") "")
12267 ":"
12268 (assoc-ref inputs "eigen")
12269 "/include/eigen3"))
12270 #t))
12271 ;; CMAKE_INSTALL_PREFIX does not exist when the tests are
12272 ;; run. It only exists after the install phase.
12273 (add-after 'unpack 'fix-tests
12274 (lambda _
12275 (substitute* "src/CMakeLists.txt"
12276 (("DTOPLEVEL_DIR=\\$\\{CMAKE_INSTALL_PREFIX")
12277 "DTOPLEVEL_DIR=${GAT_SOURCE_DIR"))
12278 #t)))))
12279 (inputs
12280 `(("boost" ,boost)
12281 ("bzip2" ,bzip2)
12282 ("cereal" ,cereal)
12283 ("eigen" ,eigen)
12284 ("rapmap" ,(origin
12285 (method git-fetch)
12286 (uri (git-reference
12287 (url "https://github.com/COMBINE-lab/RapMap")
12288 (commit (string-append "salmon-v" version))))
12289 (file-name (string-append "rapmap-salmon-v" version "-checkout"))
12290 (sha256
12291 (base32
12292 "1biplxf0csc7a8h1wf219b0vmjkvw6wk2zylhdklb577kgmihdms"))))
12293 ("jemalloc" ,jemalloc)
12294 ("libgff" ,libgff)
12295 ("tbb" ,tbb)
12296 ("libdivsufsort" ,libdivsufsort)
12297 ("libstadenio-for-salmon" ,libstadenio-for-salmon)
12298 ("xz" ,xz)
12299 ("zlib" ,zlib)))
12300 (native-inputs
12301 `(("pkg-config" ,pkg-config)))
12302 (home-page "https://github.com/COMBINE-lab/salmon")
12303 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
12304 (description "Salmon is a program to produce highly-accurate,
12305 transcript-level quantification estimates from RNA-seq data. Salmon achieves
12306 its accuracy and speed via a number of different innovations, including the
12307 use of lightweight alignments (accurate but fast-to-compute proxies for
12308 traditional read alignments) and massively-parallel stochastic collapsed
12309 variational inference.")
12310 (license license:gpl3+)))
12311
12312 (define-public python-loompy
12313 (package
12314 (name "python-loompy")
12315 (version "2.0.17")
12316 ;; The tarball on Pypi does not include the tests.
12317 (source (origin
12318 (method git-fetch)
12319 (uri (git-reference
12320 (url "https://github.com/linnarsson-lab/loompy")
12321 (commit version)))
12322 (file-name (git-file-name name version))
12323 (sha256
12324 (base32
12325 "12a5kjgiikapv93wahfw0frszx1lblnppyz3vs5gy8fgmgngra07"))))
12326 (build-system python-build-system)
12327 (arguments
12328 `(#:phases
12329 (modify-phases %standard-phases
12330 (replace 'check
12331 (lambda _
12332 (setenv "PYTHONPATH"
12333 (string-append (getcwd) ":"
12334 (getenv "PYTHONPATH")))
12335 (invoke "pytest" "tests")
12336 #t)))))
12337 (propagated-inputs
12338 `(("python-h5py" ,python-h5py)
12339 ("python-numpy" ,python-numpy)
12340 ("python-pandas" ,python-pandas)
12341 ("python-scipy" ,python-scipy)))
12342 (native-inputs
12343 `(("python-pytest" ,python-pytest)))
12344 (home-page "https://github.com/linnarsson-lab/loompy")
12345 (synopsis "Work with .loom files for single-cell RNA-seq data")
12346 (description "The loom file format is an efficient format for very large
12347 omics datasets, consisting of a main matrix, optional additional layers, a
12348 variable number of row and column annotations. Loom also supports sparse
12349 graphs. This library makes it easy to work with @file{.loom} files for
12350 single-cell RNA-seq data.")
12351 (license license:bsd-3)))
12352
12353 ;; We cannot use the latest commit because it requires Java 9.
12354 (define-public java-forester
12355 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
12356 (revision "1"))
12357 (package
12358 (name "java-forester")
12359 (version (string-append "0-" revision "." (string-take commit 7)))
12360 (source (origin
12361 (method git-fetch)
12362 (uri (git-reference
12363 (url "https://github.com/cmzmasek/forester")
12364 (commit commit)))
12365 (file-name (string-append name "-" version "-checkout"))
12366 (sha256
12367 (base32
12368 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
12369 (modules '((guix build utils)))
12370 (snippet
12371 '(begin
12372 ;; Delete bundled jars and pre-built classes
12373 (delete-file-recursively "forester/java/resources")
12374 (delete-file-recursively "forester/java/classes")
12375 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
12376 ;; Delete bundled applications
12377 (delete-file-recursively "forester_applications")
12378 #t))))
12379 (build-system ant-build-system)
12380 (arguments
12381 `(#:tests? #f ; there are none
12382 #:jdk ,icedtea-8
12383 #:modules ((guix build ant-build-system)
12384 (guix build utils)
12385 (guix build java-utils)
12386 (sxml simple)
12387 (sxml transform))
12388 #:phases
12389 (modify-phases %standard-phases
12390 (add-after 'unpack 'chdir
12391 (lambda _ (chdir "forester/java") #t))
12392 (add-after 'chdir 'fix-dependencies
12393 (lambda _
12394 (chmod "build.xml" #o664)
12395 (call-with-output-file "build.xml.new"
12396 (lambda (port)
12397 (sxml->xml
12398 (pre-post-order
12399 (with-input-from-file "build.xml"
12400 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12401 `(;; Remove all unjar tags to avoid repacking classes.
12402 (unjar . ,(lambda _ '()))
12403 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12404 (*text* . ,(lambda (_ txt) txt))))
12405 port)))
12406 (rename-file "build.xml.new" "build.xml")
12407 #t))
12408 ;; FIXME: itext is difficult to package as it depends on a few
12409 ;; unpackaged libraries.
12410 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
12411 (lambda _
12412 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12413 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
12414 (("pdf_written_to = PdfExporter.*")
12415 "throw new IOException(\"PDF export is not available.\");"))
12416 #t))
12417 ;; There is no install target
12418 (replace 'install (install-jars ".")))))
12419 (propagated-inputs
12420 `(("java-commons-codec" ,java-commons-codec)
12421 ("java-openchart2" ,java-openchart2)))
12422 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12423 (synopsis "Phylogenomics libraries for Java")
12424 (description "Forester is a collection of Java libraries for
12425 phylogenomics and evolutionary biology research. It includes support for
12426 reading, writing, and exporting phylogenetic trees.")
12427 (license license:lgpl2.1+))))
12428
12429 (define-public java-forester-1.005
12430 (package
12431 (name "java-forester")
12432 (version "1.005")
12433 (source (origin
12434 (method url-fetch)
12435 (uri (string-append "https://repo1.maven.org/maven2/"
12436 "org/biojava/thirdparty/forester/"
12437 version "/forester-" version "-sources.jar"))
12438 (file-name (string-append name "-" version ".jar"))
12439 (sha256
12440 (base32
12441 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
12442 (build-system ant-build-system)
12443 (arguments
12444 `(#:tests? #f ; there are none
12445 #:jdk ,icedtea-8
12446 #:modules ((guix build ant-build-system)
12447 (guix build utils)
12448 (guix build java-utils)
12449 (sxml simple)
12450 (sxml transform))
12451 #:phases
12452 (modify-phases %standard-phases
12453 (add-after 'unpack 'fix-dependencies
12454 (lambda* (#:key inputs #:allow-other-keys)
12455 (call-with-output-file "build.xml"
12456 (lambda (port)
12457 (sxml->xml
12458 (pre-post-order
12459 (with-input-from-file "src/build.xml"
12460 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12461 `(;; Remove all unjar tags to avoid repacking classes.
12462 (unjar . ,(lambda _ '()))
12463 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12464 (*text* . ,(lambda (_ txt) txt))))
12465 port)))
12466 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
12467 "synth_look_and_feel_1.xml")
12468 (copy-file (assoc-ref inputs "phyloxml.xsd")
12469 "phyloxml.xsd")
12470 (substitute* "build.xml"
12471 (("../resources/synth_laf/synth_look_and_feel_1.xml")
12472 "synth_look_and_feel_1.xml")
12473 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
12474 "phyloxml.xsd"))
12475 #t))
12476 ;; FIXME: itext is difficult to package as it depends on a few
12477 ;; unpackaged libraries.
12478 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
12479 (lambda _
12480 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12481 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
12482 "src/org/forester/archaeopteryx/MainFrameApplication.java")
12483 (("pdf_written_to = PdfExporter.*")
12484 "throw new IOException(\"PDF export is not available.\"); /*")
12485 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
12486 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
12487 #t))
12488 (add-after 'unpack 'delete-pre-built-classes
12489 (lambda _ (delete-file-recursively "src/classes") #t))
12490 ;; There is no install target
12491 (replace 'install (install-jars ".")))))
12492 (propagated-inputs
12493 `(("java-commons-codec" ,java-commons-codec)
12494 ("java-openchart2" ,java-openchart2)))
12495 ;; The source archive does not contain the resources.
12496 (native-inputs
12497 `(("phyloxml.xsd"
12498 ,(origin
12499 (method url-fetch)
12500 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12501 "b61cc2dcede0bede317db362472333115756b8c6/"
12502 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
12503 (file-name (string-append name "-phyloxml-" version ".xsd"))
12504 (sha256
12505 (base32
12506 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
12507 ("synth_look_and_feel_1.xml"
12508 ,(origin
12509 (method url-fetch)
12510 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12511 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
12512 "forester/java/classes/resources/"
12513 "synth_look_and_feel_1.xml"))
12514 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
12515 (sha256
12516 (base32
12517 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
12518 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12519 (synopsis "Phylogenomics libraries for Java")
12520 (description "Forester is a collection of Java libraries for
12521 phylogenomics and evolutionary biology research. It includes support for
12522 reading, writing, and exporting phylogenetic trees.")
12523 (license license:lgpl2.1+)))
12524
12525 (define-public java-biojava-core
12526 (package
12527 (name "java-biojava-core")
12528 (version "4.2.11")
12529 (source (origin
12530 (method git-fetch)
12531 (uri (git-reference
12532 (url "https://github.com/biojava/biojava")
12533 (commit (string-append "biojava-" version))))
12534 (file-name (string-append name "-" version "-checkout"))
12535 (sha256
12536 (base32
12537 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
12538 (build-system ant-build-system)
12539 (arguments
12540 `(#:jdk ,icedtea-8
12541 #:jar-name "biojava-core.jar"
12542 #:source-dir "biojava-core/src/main/java/"
12543 #:test-dir "biojava-core/src/test"
12544 ;; These tests seem to require internet access.
12545 #:test-exclude (list "**/SearchIOTest.java"
12546 "**/BlastXMLParserTest.java"
12547 "**/GenbankCookbookTest.java"
12548 "**/GenbankProxySequenceReaderTest.java")
12549 #:phases
12550 (modify-phases %standard-phases
12551 (add-before 'build 'copy-resources
12552 (lambda _
12553 (copy-recursively "biojava-core/src/main/resources"
12554 "build/classes")
12555 #t))
12556 (add-before 'check 'copy-test-resources
12557 (lambda _
12558 (copy-recursively "biojava-core/src/test/resources"
12559 "build/test-classes")
12560 #t)))))
12561 (propagated-inputs
12562 `(("java-log4j-api" ,java-log4j-api)
12563 ("java-log4j-core" ,java-log4j-core)
12564 ("java-slf4j-api" ,java-slf4j-api)
12565 ("java-slf4j-simple" ,java-slf4j-simple)))
12566 (native-inputs
12567 `(("java-junit" ,java-junit)
12568 ("java-hamcrest-core" ,java-hamcrest-core)))
12569 (home-page "http://biojava.org")
12570 (synopsis "Core libraries of Java framework for processing biological data")
12571 (description "BioJava is a project dedicated to providing a Java framework
12572 for processing biological data. It provides analytical and statistical
12573 routines, parsers for common file formats, reference implementations of
12574 popular algorithms, and allows the manipulation of sequences and 3D
12575 structures. The goal of the biojava project is to facilitate rapid
12576 application development for bioinformatics.
12577
12578 This package provides the core libraries.")
12579 (license license:lgpl2.1+)))
12580
12581 (define-public java-biojava-phylo
12582 (package (inherit java-biojava-core)
12583 (name "java-biojava-phylo")
12584 (build-system ant-build-system)
12585 (arguments
12586 `(#:jdk ,icedtea-8
12587 #:jar-name "biojava-phylo.jar"
12588 #:source-dir "biojava-phylo/src/main/java/"
12589 #:test-dir "biojava-phylo/src/test"
12590 #:phases
12591 (modify-phases %standard-phases
12592 (add-before 'build 'copy-resources
12593 (lambda _
12594 (copy-recursively "biojava-phylo/src/main/resources"
12595 "build/classes")
12596 #t))
12597 (add-before 'check 'copy-test-resources
12598 (lambda _
12599 (copy-recursively "biojava-phylo/src/test/resources"
12600 "build/test-classes")
12601 #t)))))
12602 (propagated-inputs
12603 `(("java-log4j-api" ,java-log4j-api)
12604 ("java-log4j-core" ,java-log4j-core)
12605 ("java-slf4j-api" ,java-slf4j-api)
12606 ("java-slf4j-simple" ,java-slf4j-simple)
12607 ("java-biojava-core" ,java-biojava-core)
12608 ("java-forester" ,java-forester)))
12609 (native-inputs
12610 `(("java-junit" ,java-junit)
12611 ("java-hamcrest-core" ,java-hamcrest-core)))
12612 (home-page "http://biojava.org")
12613 (synopsis "Biojava interface to the forester phylogenomics library")
12614 (description "The phylo module provides a biojava interface layer to the
12615 forester phylogenomics library for constructing phylogenetic trees.")))
12616
12617 (define-public java-biojava-alignment
12618 (package (inherit java-biojava-core)
12619 (name "java-biojava-alignment")
12620 (build-system ant-build-system)
12621 (arguments
12622 `(#:jdk ,icedtea-8
12623 #:jar-name "biojava-alignment.jar"
12624 #:source-dir "biojava-alignment/src/main/java/"
12625 #:test-dir "biojava-alignment/src/test"
12626 #:phases
12627 (modify-phases %standard-phases
12628 (add-before 'build 'copy-resources
12629 (lambda _
12630 (copy-recursively "biojava-alignment/src/main/resources"
12631 "build/classes")
12632 #t))
12633 (add-before 'check 'copy-test-resources
12634 (lambda _
12635 (copy-recursively "biojava-alignment/src/test/resources"
12636 "build/test-classes")
12637 #t)))))
12638 (propagated-inputs
12639 `(("java-log4j-api" ,java-log4j-api)
12640 ("java-log4j-core" ,java-log4j-core)
12641 ("java-slf4j-api" ,java-slf4j-api)
12642 ("java-slf4j-simple" ,java-slf4j-simple)
12643 ("java-biojava-core" ,java-biojava-core)
12644 ("java-biojava-phylo" ,java-biojava-phylo)
12645 ("java-forester" ,java-forester)))
12646 (native-inputs
12647 `(("java-junit" ,java-junit)
12648 ("java-hamcrest-core" ,java-hamcrest-core)))
12649 (home-page "http://biojava.org")
12650 (synopsis "Biojava API for genetic sequence alignment")
12651 (description "The alignment module of BioJava provides an API that
12652 contains
12653
12654 @itemize
12655 @item implementations of dynamic programming algorithms for sequence
12656 alignment;
12657 @item reading and writing of popular alignment file formats;
12658 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12659 @end itemize\n")))
12660
12661 (define-public java-biojava-core-4.0
12662 (package (inherit java-biojava-core)
12663 (name "java-biojava-core")
12664 (version "4.0.0")
12665 (source (origin
12666 (method git-fetch)
12667 (uri (git-reference
12668 (url "https://github.com/biojava/biojava")
12669 (commit (string-append "biojava-" version))))
12670 (file-name (string-append name "-" version "-checkout"))
12671 (sha256
12672 (base32
12673 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
12674
12675 (define-public java-biojava-phylo-4.0
12676 (package (inherit java-biojava-core-4.0)
12677 (name "java-biojava-phylo")
12678 (build-system ant-build-system)
12679 (arguments
12680 `(#:jdk ,icedtea-8
12681 #:jar-name "biojava-phylo.jar"
12682 #:source-dir "biojava-phylo/src/main/java/"
12683 #:test-dir "biojava-phylo/src/test"
12684 #:phases
12685 (modify-phases %standard-phases
12686 (add-before 'build 'copy-resources
12687 (lambda _
12688 (copy-recursively "biojava-phylo/src/main/resources"
12689 "build/classes")
12690 #t))
12691 (add-before 'check 'copy-test-resources
12692 (lambda _
12693 (copy-recursively "biojava-phylo/src/test/resources"
12694 "build/test-classes")
12695 #t)))))
12696 (propagated-inputs
12697 `(("java-log4j-api" ,java-log4j-api)
12698 ("java-log4j-core" ,java-log4j-core)
12699 ("java-slf4j-api" ,java-slf4j-api)
12700 ("java-slf4j-simple" ,java-slf4j-simple)
12701 ("java-biojava-core" ,java-biojava-core-4.0)
12702 ("java-forester" ,java-forester-1.005)))
12703 (native-inputs
12704 `(("java-junit" ,java-junit)
12705 ("java-hamcrest-core" ,java-hamcrest-core)))
12706 (home-page "http://biojava.org")
12707 (synopsis "Biojava interface to the forester phylogenomics library")
12708 (description "The phylo module provides a biojava interface layer to the
12709 forester phylogenomics library for constructing phylogenetic trees.")))
12710
12711 (define-public java-biojava-alignment-4.0
12712 (package (inherit java-biojava-core-4.0)
12713 (name "java-biojava-alignment")
12714 (build-system ant-build-system)
12715 (arguments
12716 `(#:jdk ,icedtea-8
12717 #:jar-name "biojava-alignment.jar"
12718 #:source-dir "biojava-alignment/src/main/java/"
12719 #:test-dir "biojava-alignment/src/test"
12720 #:phases
12721 (modify-phases %standard-phases
12722 (add-before 'build 'copy-resources
12723 (lambda _
12724 (copy-recursively "biojava-alignment/src/main/resources"
12725 "build/classes")
12726 #t))
12727 (add-before 'check 'copy-test-resources
12728 (lambda _
12729 (copy-recursively "biojava-alignment/src/test/resources"
12730 "build/test-classes")
12731 #t)))))
12732 (propagated-inputs
12733 `(("java-log4j-api" ,java-log4j-api)
12734 ("java-log4j-core" ,java-log4j-core)
12735 ("java-slf4j-api" ,java-slf4j-api)
12736 ("java-slf4j-simple" ,java-slf4j-simple)
12737 ("java-biojava-core" ,java-biojava-core-4.0)
12738 ("java-biojava-phylo" ,java-biojava-phylo-4.0)
12739 ("java-forester" ,java-forester-1.005)))
12740 (native-inputs
12741 `(("java-junit" ,java-junit)
12742 ("java-hamcrest-core" ,java-hamcrest-core)))
12743 (home-page "http://biojava.org")
12744 (synopsis "Biojava API for genetic sequence alignment")
12745 (description "The alignment module of BioJava provides an API that
12746 contains
12747
12748 @itemize
12749 @item implementations of dynamic programming algorithms for sequence
12750 alignment;
12751 @item reading and writing of popular alignment file formats;
12752 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12753 @end itemize\n")))
12754
12755 (define-public dropseq-tools
12756 (package
12757 (name "dropseq-tools")
12758 (version "1.13")
12759 (source
12760 (origin
12761 (method url-fetch)
12762 (uri "http://mccarrolllab.com/download/1276/")
12763 (file-name (string-append "dropseq-tools-" version ".zip"))
12764 (sha256
12765 (base32
12766 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
12767 ;; Delete bundled libraries
12768 (modules '((guix build utils)))
12769 (snippet
12770 '(begin
12771 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
12772 (delete-file-recursively "3rdParty")
12773 #t))))
12774 (build-system ant-build-system)
12775 (arguments
12776 `(#:tests? #f ; test data are not included
12777 #:test-target "test"
12778 #:build-target "all"
12779 #:source-dir "public/src/"
12780 #:jdk ,icedtea-8
12781 #:make-flags
12782 (list (string-append "-Dpicard.executable.dir="
12783 (assoc-ref %build-inputs "java-picard")
12784 "/share/java/"))
12785 #:modules ((ice-9 match)
12786 (srfi srfi-1)
12787 (guix build utils)
12788 (guix build java-utils)
12789 (guix build ant-build-system))
12790 #:phases
12791 (modify-phases %standard-phases
12792 ;; FIXME: fails with "java.io.FileNotFoundException:
12793 ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
12794 (delete 'generate-jar-indices)
12795 ;; All dependencies must be linked to "lib", because that's where
12796 ;; they will be searched for when the Class-Path property of the
12797 ;; manifest is computed.
12798 (add-after 'unpack 'record-references
12799 (lambda* (#:key inputs #:allow-other-keys)
12800 (mkdir-p "jar/lib")
12801 (let ((dirs (filter-map (match-lambda
12802 ((name . dir)
12803 (if (and (string-prefix? "java-" name)
12804 (not (string=? name "java-testng")))
12805 dir #f)))
12806 inputs)))
12807 (for-each (lambda (jar)
12808 (symlink jar (string-append "jar/lib/" (basename jar))))
12809 (append-map (lambda (dir) (find-files dir "\\.jar$"))
12810 dirs)))
12811 #t))
12812 ;; There is no installation target
12813 (replace 'install
12814 (lambda* (#:key inputs outputs #:allow-other-keys)
12815 (let* ((out (assoc-ref outputs "out"))
12816 (bin (string-append out "/bin"))
12817 (share (string-append out "/share/java/"))
12818 (lib (string-append share "/lib/"))
12819 (scripts (list "BAMTagHistogram"
12820 "BAMTagofTagCounts"
12821 "BaseDistributionAtReadPosition"
12822 "CollapseBarcodesInPlace"
12823 "CollapseTagWithContext"
12824 "ConvertToRefFlat"
12825 "CreateIntervalsFiles"
12826 "DetectBeadSynthesisErrors"
12827 "DigitalExpression"
12828 "Drop-seq_alignment.sh"
12829 "FilterBAM"
12830 "FilterBAMByTag"
12831 "GatherGeneGCLength"
12832 "GatherMolecularBarcodeDistributionByGene"
12833 "GatherReadQualityMetrics"
12834 "PolyATrimmer"
12835 "ReduceGTF"
12836 "SelectCellsByNumTranscripts"
12837 "SingleCellRnaSeqMetricsCollector"
12838 "TagBamWithReadSequenceExtended"
12839 "TagReadWithGeneExon"
12840 "TagReadWithInterval"
12841 "TrimStartingSequence"
12842 "ValidateReference")))
12843 (for-each mkdir-p (list bin share lib))
12844 (install-file "dist/dropseq.jar" share)
12845 (for-each (lambda (script)
12846 (chmod script #o555)
12847 (install-file script bin))
12848 scripts)
12849 (substitute* (map (lambda (script)
12850 (string-append bin "/" script))
12851 scripts)
12852 (("^java") (which "java"))
12853 (("jar_deploy_dir=.*")
12854 (string-append "jar_deploy_dir=" share "\n"))))
12855 #t))
12856 ;; FIXME: We do this after stripping jars because we don't want it to
12857 ;; copy all these jars and strip them. We only want to install
12858 ;; links. Arguably, this is a problem with the ant-build-system.
12859 (add-after 'strip-jar-timestamps 'install-links
12860 (lambda* (#:key outputs #:allow-other-keys)
12861 (let* ((out (assoc-ref outputs "out"))
12862 (share (string-append out "/share/java/"))
12863 (lib (string-append share "/lib/")))
12864 (for-each (lambda (jar)
12865 (symlink (readlink jar)
12866 (string-append lib (basename jar))))
12867 (find-files "jar/lib" "\\.jar$")))
12868 #t)))))
12869 (inputs
12870 `(("jdk" ,icedtea-8)
12871 ("java-picard" ,java-picard-2.10.3)
12872 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
12873 ("java-commons-math3" ,java-commons-math3)
12874 ("java-commons-jexl2" ,java-commons-jexl-2)
12875 ("java-commons-collections4" ,java-commons-collections4)
12876 ("java-commons-lang2" ,java-commons-lang)
12877 ("java-commons-io" ,java-commons-io)
12878 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
12879 ("java-guava" ,java-guava)
12880 ("java-la4j" ,java-la4j)
12881 ("java-biojava-core" ,java-biojava-core-4.0)
12882 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
12883 ("java-jdistlib" ,java-jdistlib)
12884 ("java-simple-xml" ,java-simple-xml)
12885 ("java-snakeyaml" ,java-snakeyaml)))
12886 (native-inputs
12887 `(("unzip" ,unzip)
12888 ("java-testng" ,java-testng)))
12889 (home-page "http://mccarrolllab.com/dropseq/")
12890 (synopsis "Tools for Drop-seq analyses")
12891 (description "Drop-seq is a technology to enable biologists to
12892 analyze RNA expression genome-wide in thousands of individual cells at
12893 once. This package provides tools to perform Drop-seq analyses.")
12894 (license license:expat)))
12895
12896 (define-public pigx-rnaseq
12897 (package
12898 (name "pigx-rnaseq")
12899 (version "0.0.10")
12900 (source (origin
12901 (method url-fetch)
12902 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
12903 "releases/download/v" version
12904 "/pigx_rnaseq-" version ".tar.gz"))
12905 (sha256
12906 (base32
12907 "0z3hr120wk2vrlmlpz1vp3n9wy3rq4y2mnzh2vf08qgqn2xfdwcw"))))
12908 (build-system gnu-build-system)
12909 (arguments
12910 `(#:parallel-tests? #f ; not supported
12911 #:phases
12912 (modify-phases %standard-phases
12913 ;; "test.sh" runs STAR, which requires excessive amounts of memory.
12914 (add-after 'unpack 'disable-resource-intensive-test
12915 (lambda _
12916 (substitute* "Makefile.in"
12917 (("(^ tests/test_trim_galore/test.sh).*" _ m) m)
12918 (("^ tests/test_multiqc/test.sh") "")
12919 (("^ test.sh") ""))
12920 #t)))))
12921 (inputs
12922 `(("coreutils" ,coreutils)
12923 ("sed" ,sed)
12924 ("gzip" ,gzip)
12925 ("snakemake" ,snakemake)
12926 ("fastqc" ,fastqc)
12927 ("multiqc" ,multiqc)
12928 ("star" ,star)
12929 ("trim-galore" ,trim-galore)
12930 ("htseq" ,htseq)
12931 ("samtools" ,samtools)
12932 ("r-minimal" ,r-minimal)
12933 ("r-rmarkdown" ,r-rmarkdown)
12934 ("r-ggplot2" ,r-ggplot2)
12935 ("r-ggrepel" ,r-ggrepel)
12936 ("r-gprofiler" ,r-gprofiler)
12937 ("r-deseq2" ,r-deseq2)
12938 ("r-dt" ,r-dt)
12939 ("r-knitr" ,r-knitr)
12940 ("r-pheatmap" ,r-pheatmap)
12941 ("r-corrplot" ,r-corrplot)
12942 ("r-reshape2" ,r-reshape2)
12943 ("r-plotly" ,r-plotly)
12944 ("r-scales" ,r-scales)
12945 ("r-summarizedexperiment" ,r-summarizedexperiment)
12946 ("r-crosstalk" ,r-crosstalk)
12947 ("r-tximport" ,r-tximport)
12948 ("r-rtracklayer" ,r-rtracklayer)
12949 ("r-rjson" ,r-rjson)
12950 ("salmon" ,salmon)
12951 ("ghc-pandoc" ,ghc-pandoc)
12952 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
12953 ("python-wrapper" ,python-wrapper)
12954 ("python-pyyaml" ,python-pyyaml)))
12955 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
12956 (synopsis "Analysis pipeline for RNA sequencing experiments")
12957 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
12958 reporting for RNA sequencing experiments. It is easy to use and produces high
12959 quality reports. The inputs are reads files from the sequencing experiment,
12960 and a configuration file which describes the experiment. In addition to
12961 quality control of the experiment, the pipeline produces a differential
12962 expression report comparing samples in an easily configurable manner.")
12963 (license license:gpl3+)))
12964
12965 (define-public pigx-chipseq
12966 (package
12967 (name "pigx-chipseq")
12968 (version "0.0.42")
12969 (source (origin
12970 (method url-fetch)
12971 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
12972 "releases/download/v" version
12973 "/pigx_chipseq-" version ".tar.gz"))
12974 (sha256
12975 (base32
12976 "0xbvgqpk32a8iczhvac56cacr46rdkqb0allhhpvmj940idf72bi"))))
12977 (build-system gnu-build-system)
12978 ;; parts of the tests rely on access to the network
12979 (arguments '(#:tests? #f))
12980 (inputs
12981 `(("grep" ,grep)
12982 ("coreutils" ,coreutils)
12983 ("r-minimal" ,r-minimal)
12984 ("r-argparser" ,r-argparser)
12985 ("r-biocparallel" ,r-biocparallel)
12986 ("r-biostrings" ,r-biostrings)
12987 ("r-chipseq" ,r-chipseq)
12988 ("r-data-table" ,r-data-table)
12989 ("r-dplyr" ,r-dplyr)
12990 ("r-genomation" ,r-genomation)
12991 ("r-genomicalignments" ,r-genomicalignments)
12992 ("r-genomicranges" ,r-genomicranges)
12993 ("r-rsamtools" ,r-rsamtools)
12994 ("r-rtracklayer" ,r-rtracklayer)
12995 ("r-s4vectors" ,r-s4vectors)
12996 ("r-stringr" ,r-stringr)
12997 ("r-tibble" ,r-tibble)
12998 ("r-tidyr" ,r-tidyr)
12999 ("r-jsonlite" ,r-jsonlite)
13000 ("r-heatmaply" ,r-heatmaply)
13001 ("r-htmlwidgets" ,r-htmlwidgets)
13002 ("r-ggplot2" ,r-ggplot2)
13003 ("r-plotly" ,r-plotly)
13004 ("r-rmarkdown" ,r-rmarkdown)
13005 ("python-wrapper" ,python-wrapper)
13006 ("python-pyyaml" ,python-pyyaml)
13007 ("python-magic" ,python-magic)
13008 ("python-xlrd" ,python-xlrd)
13009 ("trim-galore" ,trim-galore)
13010 ("macs" ,macs)
13011 ("multiqc" ,multiqc)
13012 ("perl" ,perl)
13013 ("ghc-pandoc" ,ghc-pandoc)
13014 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
13015 ("fastqc" ,fastqc)
13016 ("bowtie" ,bowtie)
13017 ("idr" ,idr)
13018 ("snakemake" ,snakemake)
13019 ("samtools" ,samtools)
13020 ("bedtools" ,bedtools)
13021 ("kentutils" ,kentutils)))
13022 (native-inputs
13023 `(("python-pytest" ,python-pytest)))
13024 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13025 (synopsis "Analysis pipeline for ChIP sequencing experiments")
13026 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
13027 calling and reporting for ChIP sequencing experiments. It is easy to use and
13028 produces high quality reports. The inputs are reads files from the sequencing
13029 experiment, and a configuration file which describes the experiment. In
13030 addition to quality control of the experiment, the pipeline enables to set up
13031 multiple peak calling analysis and allows the generation of a UCSC track hub
13032 in an easily configurable manner.")
13033 (license license:gpl3+)))
13034
13035 (define-public pigx-bsseq
13036 (package
13037 (name "pigx-bsseq")
13038 (version "0.0.10")
13039 (source (origin
13040 (method url-fetch)
13041 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
13042 "releases/download/v" version
13043 "/pigx_bsseq-" version ".tar.gz"))
13044 (sha256
13045 (base32
13046 "0l97wvkq4diq8lcarraj33bby1zzf0w804jwi8mlc5qddp8idwhy"))))
13047 (build-system gnu-build-system)
13048 (arguments
13049 `(#:phases
13050 (modify-phases %standard-phases
13051 (add-before 'check 'set-timezone
13052 ;; The readr package is picky about timezones.
13053 (lambda* (#:key inputs #:allow-other-keys)
13054 (setenv "TZ" "UTC+1")
13055 (setenv "TZDIR"
13056 (string-append (assoc-ref inputs "tzdata")
13057 "/share/zoneinfo"))
13058 #t)))))
13059 (native-inputs
13060 `(("tzdata" ,tzdata)))
13061 (inputs
13062 `(("coreutils" ,coreutils)
13063 ("sed" ,sed)
13064 ("grep" ,grep)
13065 ("r-minimal" ,r-minimal)
13066 ("r-annotationhub" ,r-annotationhub)
13067 ("r-dt" ,r-dt)
13068 ("r-genomation" ,r-genomation)
13069 ("r-methylkit" ,r-methylkit)
13070 ("r-rtracklayer" ,r-rtracklayer)
13071 ("r-rmarkdown" ,r-rmarkdown)
13072 ("r-bookdown" ,r-bookdown)
13073 ("r-ggplot2" ,r-ggplot2)
13074 ("r-ggbio" ,r-ggbio)
13075 ("ghc-pandoc" ,ghc-pandoc)
13076 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
13077 ("python-wrapper" ,python-wrapper)
13078 ("python-pyyaml" ,python-pyyaml)
13079 ("snakemake" ,snakemake)
13080 ("bismark" ,bismark)
13081 ("fastqc" ,fastqc)
13082 ("bowtie" ,bowtie)
13083 ("trim-galore" ,trim-galore)
13084 ("cutadapt" ,cutadapt)
13085 ("samtools" ,samtools)))
13086 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13087 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
13088 (description "PiGx BSseq is a data processing pipeline for raw fastq read
13089 data of bisulfite experiments; it produces reports on aggregate methylation
13090 and coverage and can be used to produce information on differential
13091 methylation and segmentation.")
13092 (license license:gpl3+)))
13093
13094 (define-public pigx-scrnaseq
13095 (package
13096 (name "pigx-scrnaseq")
13097 (version "1.1.4")
13098 (source (origin
13099 (method url-fetch)
13100 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
13101 "releases/download/v" version
13102 "/pigx_scrnaseq-" version ".tar.gz"))
13103 (sha256
13104 (base32
13105 "1d5l3gywypi67yz9advxq5xkgfhr4733gj0bwnngm723i3hdf5w9"))))
13106 (build-system gnu-build-system)
13107 (inputs
13108 `(("coreutils" ,coreutils)
13109 ("perl" ,perl)
13110 ("fastqc" ,fastqc)
13111 ("flexbar" ,flexbar)
13112 ("java" ,icedtea-8)
13113 ("jellyfish" ,jellyfish)
13114 ("python-wrapper" ,python-wrapper)
13115 ("python-pyyaml" ,python-pyyaml)
13116 ("python-pandas" ,python-pandas)
13117 ("python-magic" ,python-magic)
13118 ("python-numpy" ,python-numpy)
13119 ("python-loompy" ,python-loompy)
13120 ("ghc-pandoc" ,ghc-pandoc)
13121 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
13122 ("samtools" ,samtools)
13123 ("snakemake" ,snakemake)
13124 ("star" ,star)
13125 ("r-minimal" ,r-minimal)
13126 ("r-argparser" ,r-argparser)
13127 ("r-cowplot" ,r-cowplot)
13128 ("r-data-table" ,r-data-table)
13129 ("r-delayedarray" ,r-delayedarray)
13130 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
13131 ("r-dplyr" ,r-dplyr)
13132 ("r-dropbead" ,r-dropbead)
13133 ("r-dt" ,r-dt)
13134 ("r-genomicalignments" ,r-genomicalignments)
13135 ("r-genomicfiles" ,r-genomicfiles)
13136 ("r-genomicranges" ,r-genomicranges)
13137 ("r-ggplot2" ,r-ggplot2)
13138 ("r-hdf5array" ,r-hdf5array)
13139 ("r-pheatmap" ,r-pheatmap)
13140 ("r-rmarkdown" ,r-rmarkdown)
13141 ("r-rsamtools" ,r-rsamtools)
13142 ("r-rtracklayer" ,r-rtracklayer)
13143 ("r-rtsne" ,r-rtsne)
13144 ("r-scater" ,r-scater)
13145 ("r-scran" ,r-scran)
13146 ("r-seurat" ,r-seurat)
13147 ("r-singlecellexperiment" ,r-singlecellexperiment)
13148 ("r-stringr" ,r-stringr)
13149 ("r-yaml" ,r-yaml)))
13150 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13151 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
13152 (description "PiGX scRNAseq is an analysis pipeline for preprocessing and
13153 quality control for single cell RNA sequencing experiments. The inputs are
13154 read files from the sequencing experiment, and a configuration file which
13155 describes the experiment. It produces processed files for downstream analysis
13156 and interactive quality reports. The pipeline is designed to work with UMI
13157 based methods.")
13158 (license license:gpl3+)))
13159
13160 (define-public pigx
13161 (package
13162 (name "pigx")
13163 (version "0.0.3")
13164 (source (origin
13165 (method url-fetch)
13166 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
13167 "releases/download/v" version
13168 "/pigx-" version ".tar.gz"))
13169 (sha256
13170 (base32
13171 "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
13172 (build-system gnu-build-system)
13173 (inputs
13174 `(("python" ,python)
13175 ("pigx-bsseq" ,pigx-bsseq)
13176 ("pigx-chipseq" ,pigx-chipseq)
13177 ("pigx-rnaseq" ,pigx-rnaseq)
13178 ("pigx-scrnaseq" ,pigx-scrnaseq)))
13179 (home-page "http://bioinformatics.mdc-berlin.de/pigx/")
13180 (synopsis "Analysis pipelines for genomics")
13181 (description "PiGx is a collection of genomics pipelines. It includes the
13182 following pipelines:
13183
13184 @itemize
13185 @item PiGx BSseq for raw fastq read data of bisulfite experiments
13186 @item PiGx RNAseq for RNAseq samples
13187 @item PiGx scRNAseq for single cell dropseq analysis
13188 @item PiGx ChIPseq for reads from ChIPseq experiments
13189 @end itemize
13190
13191 All pipelines are easily configured with a simple sample sheet and a
13192 descriptive settings file. The result is a set of comprehensive, interactive
13193 HTML reports with interesting findings about your samples.")
13194 (license license:gpl3+)))
13195
13196 (define-public genrich
13197 (package
13198 (name "genrich")
13199 (version "0.5")
13200 (source (origin
13201 (method git-fetch)
13202 (uri (git-reference
13203 (url "https://github.com/jsh58/Genrich")
13204 (commit (string-append "v" version))))
13205 (file-name (git-file-name name version))
13206 (sha256
13207 (base32
13208 "0x0q6z0208n3cxzqjla4rgjqpyqgwpmz27852lcvzkzaigymq4zp"))))
13209 (build-system gnu-build-system)
13210 (arguments
13211 `(#:tests? #f ; there are none
13212 #:phases
13213 (modify-phases %standard-phases
13214 (delete 'configure)
13215 (replace 'install
13216 (lambda* (#:key outputs #:allow-other-keys)
13217 (install-file "Genrich" (string-append (assoc-ref outputs "out") "/bin"))
13218 #t)))))
13219 (inputs
13220 `(("zlib" ,zlib)))
13221 (home-page "https://github.com/jsh58/Genrich")
13222 (synopsis "Detecting sites of genomic enrichment")
13223 (description "Genrich is a peak-caller for genomic enrichment
13224 assays (e.g. ChIP-seq, ATAC-seq). It analyzes alignment files generated
13225 following the assay and produces a file detailing peaks of significant
13226 enrichment.")
13227 (license license:expat)))
13228
13229 (define-public mantis
13230 (let ((commit "4ffd171632c2cb0056a86d709dfd2bf21bc69b84")
13231 (revision "1"))
13232 (package
13233 (name "mantis")
13234 (version (git-version "0" revision commit))
13235 (source (origin
13236 (method git-fetch)
13237 (uri (git-reference
13238 (url "https://github.com/splatlab/mantis")
13239 (commit commit)))
13240 (file-name (git-file-name name version))
13241 (sha256
13242 (base32
13243 "0iqbr0dhmlc8mzpirmm2s4pkzkwdgrcx50yx6cv3wlr2qi064p55"))))
13244 (build-system cmake-build-system)
13245 (arguments '(#:tests? #f)) ; there are none
13246 (inputs
13247 `(("sdsl-lite" ,sdsl-lite)
13248 ("openssl" ,openssl)
13249 ("zlib" ,zlib)))
13250 (home-page "https://github.com/splatlab/mantis")
13251 (synopsis "Large-scale sequence-search index data structure")
13252 (description "Mantis is a space-efficient data structure that can be
13253 used to index thousands of raw-read genomics experiments and facilitate
13254 large-scale sequence searches on those experiments. Mantis uses counting
13255 quotient filters instead of Bloom filters, enabling rapid index builds and
13256 queries, small indexes, and exact results, i.e., no false positives or
13257 negatives. Furthermore, Mantis is also a colored de Bruijn graph
13258 representation, so it supports fast graph traversal and other topological
13259 analyses in addition to large-scale sequence-level searches.")
13260 ;; uses __uint128_t and inline assembly
13261 (supported-systems '("x86_64-linux"))
13262 (license license:bsd-3))))
13263
13264 (define-public r-diversitree
13265 (package
13266 (name "r-diversitree")
13267 (version "0.9-13")
13268 (source
13269 (origin
13270 (method url-fetch)
13271 (uri (cran-uri "diversitree" version))
13272 (sha256
13273 (base32
13274 "00vi4klywi35hd170ksjv3xja3hqqbkcidcnrrlpgv4179k0azix"))))
13275 (build-system r-build-system)
13276 (native-inputs
13277 `(("gfortran" ,gfortran)))
13278 (inputs `(("fftw" ,fftw) ("gsl" ,gsl)))
13279 (propagated-inputs
13280 `(("r-ape" ,r-ape)
13281 ("r-desolve" ,r-desolve)
13282 ("r-rcpp" ,r-rcpp)
13283 ("r-subplex" ,r-subplex)))
13284 (home-page "https://www.zoology.ubc.ca/prog/diversitree")
13285 (synopsis "Comparative 'phylogenetic' analyses of diversification")
13286 (description "This package contains a number of comparative \"phylogenetic\"
13287 methods, mostly focusing on analysing diversification and character evolution.
13288 Contains implementations of \"BiSSE\" (Binary State Speciation and Extinction)
13289 and its unresolved tree extensions, \"MuSSE\" (Multiple State Speciation and
13290 Extinction), \"QuaSSE\", \"GeoSSE\", and \"BiSSE-ness\" Other included methods
13291 include Markov models of discrete and continuous trait evolution and constant
13292 rate speciation and extinction.")
13293 (license license:gpl2+)))
13294
13295 (define-public sjcount
13296 ;; There is no tag for version 3.2, nor is there a release archive.
13297 (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
13298 (revision "1"))
13299 (package
13300 (name "sjcount")
13301 (version (git-version "3.2" revision commit))
13302 (source (origin
13303 (method git-fetch)
13304 (uri (git-reference
13305 (url "https://github.com/pervouchine/sjcount-full")
13306 (commit commit)))
13307 (file-name (string-append name "-" version "-checkout"))
13308 (sha256
13309 (base32
13310 "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
13311 (build-system gnu-build-system)
13312 (arguments
13313 `(#:tests? #f ; requires a 1.4G test file
13314 #:make-flags
13315 (list (string-append "SAMTOOLS_DIR="
13316 (assoc-ref %build-inputs "samtools")
13317 "/lib/"))
13318 #:phases
13319 (modify-phases %standard-phases
13320 (replace 'configure
13321 (lambda* (#:key inputs #:allow-other-keys)
13322 (substitute* "makefile"
13323 (("-I \\$\\{SAMTOOLS_DIR\\}")
13324 (string-append "-I" (assoc-ref inputs "samtools")
13325 "/include/samtools"))
13326 (("-lz ") "-lz -lpthread "))
13327 #t))
13328 (replace 'install
13329 (lambda* (#:key outputs #:allow-other-keys)
13330 (for-each (lambda (tool)
13331 (install-file tool
13332 (string-append (assoc-ref outputs "out")
13333 "/bin")))
13334 '("j_count" "b_count" "sjcount"))
13335 #t)))))
13336 (inputs
13337 `(("samtools" ,samtools-0.1)
13338 ("zlib" ,zlib)))
13339 (home-page "https://github.com/pervouchine/sjcount-full/")
13340 (synopsis "Annotation-agnostic splice junction counting pipeline")
13341 (description "Sjcount is a utility for fast quantification of splice
13342 junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
13343 version does count multisplits.")
13344 (license license:gpl3+))))
13345
13346 (define-public minimap2
13347 (package
13348 (name "minimap2")
13349 (version "2.17")
13350 (source
13351 (origin
13352 (method url-fetch)
13353 (uri (string-append "https://github.com/lh3/minimap2/"
13354 "releases/download/v" version "/"
13355 "minimap2-" version ".tar.bz2"))
13356 (sha256
13357 (base32
13358 "0hi7i9pzxhvjj44khzzzj1lrn5gb5837arr4wgln7k1k5n4ci2mn"))))
13359 (build-system gnu-build-system)
13360 (arguments
13361 `(#:tests? #f ; there are none
13362 #:make-flags
13363 (list "CC=gcc"
13364 (let ((system ,(or (%current-target-system)
13365 (%current-system))))
13366 (cond
13367 ((string-prefix? "x86_64" system)
13368 "all")
13369 ((or (string-prefix? "armhf" system)
13370 (string-prefix? "aarch64" system))
13371 "arm_neon=1")
13372 (else "sse2only=1"))))
13373 #:phases
13374 (modify-phases %standard-phases
13375 (delete 'configure)
13376 (replace 'install
13377 (lambda* (#:key outputs #:allow-other-keys)
13378 (let* ((out (assoc-ref outputs "out"))
13379 (bin (string-append out "/bin"))
13380 (man (string-append out "/share/man/man1")))
13381 (install-file "minimap2" bin)
13382 (mkdir-p man)
13383 (install-file "minimap2.1" man))
13384 #t)))))
13385 (inputs
13386 `(("zlib" ,zlib)))
13387 (home-page "https://lh3.github.io/minimap2/")
13388 (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
13389 (description "Minimap2 is a versatile sequence alignment program that
13390 aligns DNA or mRNA sequences against a large reference database. Typical use
13391 cases include:
13392
13393 @enumerate
13394 @item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
13395 @item finding overlaps between long reads with error rate up to ~15%;
13396 @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
13397 reads against a reference genome;
13398 @item aligning Illumina single- or paired-end reads;
13399 @item assembly-to-assembly alignment;
13400 @item full-genome alignment between two closely related species with
13401 divergence below ~15%.
13402 @end enumerate\n")
13403 (license license:expat)))
13404
13405 (define-public miniasm
13406 (package
13407 (name "miniasm")
13408 (version "0.3")
13409 (source (origin
13410 (method git-fetch)
13411 (uri (git-reference
13412 (url "https://github.com/lh3/miniasm")
13413 (commit (string-append "v" version))))
13414 (file-name (git-file-name name version))
13415 (sha256
13416 (base32
13417 "04dv5wv8bhsw1imxwyd438bnn9kby7svp44nbcz8lsadzjjci5gs"))))
13418 (build-system gnu-build-system)
13419 (inputs
13420 `(("zlib" ,zlib)))
13421 (arguments
13422 `(#:tests? #f ; There are no tests.
13423 #:phases
13424 (modify-phases %standard-phases
13425 (delete 'configure)
13426 (replace 'install
13427 (lambda* (#:key inputs outputs #:allow-other-keys)
13428 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13429 (install-file "miniasm" bin)
13430 (install-file "minidot" bin)
13431 #t))))))
13432 (home-page "https://github.com/lh3/miniasm")
13433 (synopsis "Ultrafast de novo assembly for long noisy reads")
13434 (description "Miniasm is a very fast OLC-based de novo assembler for noisy
13435 long reads. It takes all-vs-all read self-mappings (typically by minimap) as
13436 input and outputs an assembly graph in the GFA format. Different from
13437 mainstream assemblers, miniasm does not have a consensus step. It simply
13438 concatenates pieces of read sequences to generate the final unitig sequences.
13439 Thus the per-base error rate is similar to the raw input reads.")
13440 (license license:expat)))
13441
13442 (define-public r-circus
13443 (package
13444 (name "r-circus")
13445 (version "0.1.5")
13446 (source
13447 (origin
13448 (method git-fetch)
13449 (uri (git-reference
13450 (url "https://github.com/BIMSBbioinfo/ciRcus")
13451 (commit (string-append "v" version))))
13452 (file-name (git-file-name name version))
13453 (sha256
13454 (base32
13455 "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
13456 (build-system r-build-system)
13457 (propagated-inputs
13458 `(("r-annotationdbi" ,r-annotationdbi)
13459 ("r-annotationhub" ,r-annotationhub)
13460 ("r-biomart" ,r-biomart)
13461 ("r-data-table" ,r-data-table)
13462 ("r-dbi" ,r-dbi)
13463 ("r-genomicfeatures" ,r-genomicfeatures)
13464 ("r-genomicranges" ,r-genomicranges)
13465 ("r-ggplot2" ,r-ggplot2)
13466 ("r-hash" ,r-hash)
13467 ("r-iranges" ,r-iranges)
13468 ("r-rcolorbrewer" ,r-rcolorbrewer)
13469 ("r-rmysql" ,r-rmysql)
13470 ("r-s4vectors" ,r-s4vectors)
13471 ("r-stringr" ,r-stringr)
13472 ("r-summarizedexperiment" ,r-summarizedexperiment)))
13473 (native-inputs
13474 `(("r-knitr" ,r-knitr)))
13475 (home-page "https://github.com/BIMSBbioinfo/ciRcus")
13476 (synopsis "Annotation, analysis and visualization of circRNA data")
13477 (description "Circus is an R package for annotation, analysis and
13478 visualization of circRNA data. Users can annotate their circRNA candidates
13479 with host genes, gene featrues they are spliced from, and discriminate between
13480 known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
13481 can be calculated, and a number of descriptive plots easily generated.")
13482 (license license:artistic2.0)))
13483
13484 (define-public gffread
13485 ;; We cannot use the tagged release because it is not in sync with gclib.
13486 ;; See https://github.com/gpertea/gffread/issues/26
13487 (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
13488 (revision "1"))
13489 (package
13490 (name "gffread")
13491 (version (git-version "0.9.12" revision commit))
13492 (source
13493 (origin
13494 (method git-fetch)
13495 (uri (git-reference
13496 (url "https://github.com/gpertea/gffread")
13497 (commit commit)))
13498 (file-name (git-file-name name version))
13499 (sha256
13500 (base32
13501 "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
13502 (build-system gnu-build-system)
13503 (arguments
13504 `(#:tests? #f ; no check target
13505 #:make-flags
13506 (list "GCLDIR=gclib")
13507 #:phases
13508 (modify-phases %standard-phases
13509 (delete 'configure)
13510 (add-after 'unpack 'copy-gclib-source
13511 (lambda* (#:key inputs #:allow-other-keys)
13512 (mkdir-p "gclib")
13513 (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
13514 #t))
13515 ;; There is no install target
13516 (replace 'install
13517 (lambda* (#:key outputs #:allow-other-keys)
13518 (let* ((out (assoc-ref outputs "out"))
13519 (bin (string-append out "/bin")))
13520 (install-file "gffread" bin))
13521 #t)))))
13522 (native-inputs
13523 `(("gclib-source"
13524 ,(let ((version "0.10.3")
13525 (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13526 (revision "1"))
13527 (origin
13528 (method git-fetch)
13529 (uri (git-reference
13530 (url "https://github.com/gpertea/gclib")
13531 (commit commit)))
13532 (file-name (git-file-name "gclib" version))
13533 (sha256
13534 (base32
13535 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13536 (home-page "https://github.com/gpertea/gffread/")
13537 (synopsis "Parse and convert GFF/GTF files")
13538 (description
13539 "This package provides a GFF/GTF file parsing utility providing format
13540 conversions, region filtering, FASTA sequence extraction and more.")
13541 ;; gffread is under Expat, but gclib is under Artistic 2.0
13542 (license (list license:expat
13543 license:artistic2.0)))))
13544
13545 (define-public find-circ
13546 ;; The last release was in 2015. The license was clarified in 2017, so we
13547 ;; take the latest commit.
13548 (let ((commit "8655dca54970fcf7e92e22fbf57e1188724dda7d")
13549 (revision "1"))
13550 (package
13551 (name "find-circ")
13552 (version (git-version "1.2" revision commit))
13553 (source
13554 (origin
13555 (method git-fetch)
13556 (uri (git-reference
13557 (url "https://github.com/marvin-jens/find_circ")
13558 (commit commit)))
13559 (file-name (git-file-name name version))
13560 (sha256
13561 (base32
13562 "0p77pbqbclqr4srms34y1b9b4njybfpjiknc11ki84f3p8skb3cg"))))
13563 (build-system gnu-build-system)
13564 (arguments
13565 `(#:tests? #f ; there are none
13566 #:phases
13567 ;; There is no actual build system.
13568 (modify-phases %standard-phases
13569 (delete 'configure)
13570 (delete 'build)
13571 (replace 'install
13572 (lambda* (#:key outputs #:allow-other-keys)
13573 (let* ((out (assoc-ref outputs "out"))
13574 (bin (string-append out "/bin"))
13575 (path (getenv "PYTHONPATH")))
13576 (for-each (lambda (script)
13577 (install-file script bin)
13578 (wrap-program (string-append bin "/" script)
13579 `("PYTHONPATH" ":" prefix (,path))))
13580 '("cmp_bed.py"
13581 "find_circ.py"
13582 "maxlength.py"
13583 "merge_bed.py"
13584 "unmapped2anchors.py")))
13585 #t)))))
13586 (inputs
13587 `(("python2" ,python-2)
13588 ("python2-pysam" ,python2-pysam)
13589 ("python2-numpy" ,python2-numpy)))
13590 (home-page "https://github.com/marvin-jens/find_circ")
13591 (synopsis "circRNA detection from RNA-seq reads")
13592 (description "This package provides tools to detect head-to-tail
13593 spliced (back-spliced) sequencing reads, indicative of circular RNA (circRNA)
13594 in RNA-seq data.")
13595 (license license:gpl3))))
13596
13597 (define-public python-scanpy
13598 (package
13599 (name "python-scanpy")
13600 (version "1.4.6")
13601 (source
13602 (origin
13603 (method url-fetch)
13604 (uri (pypi-uri "scanpy" version))
13605 (sha256
13606 (base32
13607 "0s2b6cvaigx4wzw3850qb93sjwwxbzh22kpbp498zklc5rjpbz4l"))))
13608 (build-system python-build-system)
13609 (arguments
13610 `(#:phases
13611 (modify-phases %standard-phases
13612 (replace 'check
13613 (lambda* (#:key inputs #:allow-other-keys)
13614 ;; These tests require Internet access.
13615 (delete-file-recursively "scanpy/tests/notebooks")
13616 (delete-file "scanpy/tests/test_clustering.py")
13617 (delete-file "scanpy/tests/test_datasets.py")
13618
13619 ;; TODO: I can't get the plotting tests to work, even with Xvfb.
13620 (delete-file "scanpy/tests/test_plotting.py")
13621 (delete-file "scanpy/tests/test_preprocessing.py")
13622 (delete-file "scanpy/tests/test_read_10x.py")
13623
13624 (setenv "PYTHONPATH"
13625 (string-append (getcwd) ":"
13626 (getenv "PYTHONPATH")))
13627 (invoke "pytest")
13628 #t)))))
13629 (propagated-inputs
13630 `(("python-anndata" ,python-anndata)
13631 ("python-h5py" ,python-h5py)
13632 ("python-igraph" ,python-igraph)
13633 ("python-joblib" ,python-joblib)
13634 ("python-legacy-api-wrap" ,python-legacy-api-wrap)
13635 ("python-louvain" ,python-louvain)
13636 ("python-matplotlib" ,python-matplotlib)
13637 ("python-natsort" ,python-natsort)
13638 ("python-networkx" ,python-networkx)
13639 ("python-numba" ,python-numba)
13640 ("python-packaging" ,python-packaging)
13641 ("python-pandas" ,python-pandas)
13642 ("python-patsy" ,python-patsy)
13643 ("python-scikit-learn" ,python-scikit-learn)
13644 ("python-scipy" ,python-scipy)
13645 ("python-seaborn" ,python-seaborn)
13646 ("python-statsmodels" ,python-statsmodels)
13647 ("python-tables" ,python-tables)
13648 ("python-tqdm" ,python-tqdm)
13649 ("python-umap-learn" ,python-umap-learn)))
13650 (native-inputs
13651 `(("python-pytest" ,python-pytest)
13652 ("python-setuptools-scm" ,python-setuptools-scm)))
13653 (home-page "https://github.com/theislab/scanpy")
13654 (synopsis "Single-Cell Analysis in Python.")
13655 (description "Scanpy is a scalable toolkit for analyzing single-cell gene
13656 expression data. It includes preprocessing, visualization, clustering,
13657 pseudotime and trajectory inference and differential expression testing. The
13658 Python-based implementation efficiently deals with datasets of more than one
13659 million cells.")
13660 (license license:bsd-3)))
13661
13662 (define-public python-bbknn
13663 (package
13664 (name "python-bbknn")
13665 (version "1.3.6")
13666 (source
13667 (origin
13668 (method url-fetch)
13669 (uri (pypi-uri "bbknn" version))
13670 (sha256
13671 (base32
13672 "1jbsh01f57zj4bhvjr3jh4532zznqd6nccmgrl3qi9gnhkf7c4y0"))))
13673 (build-system python-build-system)
13674 (arguments
13675 `(#:tests? #f)) ; TODO: Enable after migration to scikit-learn.
13676 (propagated-inputs
13677 `(("python-annoy" ,python-annoy)
13678 ("python-cython" ,python-cython)
13679 ("python-numpy" ,python-numpy)
13680 ("python-scipy" ,python-scipy)
13681 ("python-umap-learn" ,python-umap-learn)))
13682 (home-page "https://github.com/Teichlab/bbknn")
13683 (synopsis "Batch balanced KNN")
13684 (description "BBKNN is a batch effect removal tool that can be directly
13685 used in the Scanpy workflow. It serves as an alternative to
13686 @code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour
13687 graph for subsequent use in clustering, pseudotime and UMAP visualisation. If
13688 technical artifacts are present in the data, they will make it challenging to
13689 link corresponding cell types across different batches. BBKNN actively
13690 combats this effect by splitting your data into batches and finding a smaller
13691 number of neighbours for each cell within each of the groups. This helps
13692 create connections between analogous cells in different batches without
13693 altering the counts or PCA space.")
13694 (license license:expat)))
13695
13696 (define-public gffcompare
13697 (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
13698 (revision "1"))
13699 (package
13700 (name "gffcompare")
13701 (version (git-version "0.10.15" revision commit))
13702 (source
13703 (origin
13704 (method git-fetch)
13705 (uri (git-reference
13706 (url "https://github.com/gpertea/gffcompare/")
13707 (commit commit)))
13708 (file-name (git-file-name name version))
13709 (sha256
13710 (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
13711 (build-system gnu-build-system)
13712 (arguments
13713 `(#:tests? #f ; no check target
13714 #:phases
13715 (modify-phases %standard-phases
13716 (delete 'configure)
13717 (add-before 'build 'copy-gclib-source
13718 (lambda* (#:key inputs #:allow-other-keys)
13719 (mkdir "../gclib")
13720 (copy-recursively
13721 (assoc-ref inputs "gclib-source") "../gclib")
13722 #t))
13723 (replace 'install
13724 (lambda* (#:key outputs #:allow-other-keys)
13725 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13726 (install-file "gffcompare" bin)
13727 #t))))))
13728 (native-inputs
13729 `(("gclib-source" ; see 'README.md' of gffcompare
13730 ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13731 (revision "1")
13732 (name "gclib")
13733 (version (git-version "0.10.3" revision commit)))
13734 (origin
13735 (method git-fetch)
13736 (uri (git-reference
13737 (url "https://github.com/gpertea/gclib/")
13738 (commit commit)))
13739 (file-name (git-file-name name version))
13740 (sha256
13741 (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13742 (home-page "https://github.com/gpertea/gffcompare/")
13743 (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
13744 (description
13745 "@code{gffcompare} is a tool that can:
13746 @enumerate
13747 @item compare and evaluate the accuracy of RNA-Seq transcript assemblers
13748 (Cufflinks, Stringtie);
13749 @item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
13750 resulted from assembly of different samples);
13751 @item classify transcripts from one or multiple GTF/GFF3 files as they relate to
13752 reference transcripts provided in a annotation file (also in GTF/GFF3 format).
13753 @end enumerate")
13754 (license
13755 (list
13756 license:expat ;license for gffcompare
13757 license:artistic2.0))))) ;license for gclib
13758
13759 (define-public intervaltree
13760 (let ((commit "b90527f9e6d51cd36ecbb50429e4524d3a418ea5"))
13761 (package
13762 (name "intervaltree")
13763 (version (git-version "0.0.0" "1" commit))
13764 (source
13765 (origin
13766 (method git-fetch)
13767 (uri (git-reference
13768 (url "https://github.com/ekg/intervaltree/")
13769 (commit commit)))
13770 (file-name (git-file-name name version))
13771 (sha256
13772 (base32 "0rgv6q5fl4x5d74n6p5wvdna6zmbdbqpb4jqqh6vq3670gn08xad"))))
13773 (build-system gnu-build-system)
13774 (arguments
13775 '(#:tests? #f ; No tests.
13776 #:make-flags (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
13777 "DESTDIR=\"\"")
13778 #:phases
13779 (modify-phases %standard-phases
13780 (delete 'configure)))) ; There is no configure phase.
13781 (home-page "https://github.com/ekg/intervaltree")
13782 (synopsis "Minimal C++ interval tree implementation")
13783 (description "An interval tree can be used to efficiently find a set of
13784 numeric intervals overlapping or containing another interval. This library
13785 provides a basic implementation of an interval tree using C++ templates,
13786 allowing the insertion of arbitrary types into the tree.")
13787 (license license:expat))))
13788
13789 (define-public python-intervaltree
13790 (package
13791 (name "python-intervaltree")
13792 (version "3.0.2")
13793 (source
13794 (origin
13795 (method url-fetch)
13796 (uri (pypi-uri "intervaltree" version))
13797 (sha256
13798 (base32
13799 "0wz234g6irlm4hivs2qzmnywk0ss06ckagwh15nflkyb3p462kyb"))))
13800 (build-system python-build-system)
13801 (arguments
13802 `(#:phases
13803 (modify-phases %standard-phases
13804 ;; pytest seems to have a check to make sure the user is testing
13805 ;; their checked-out code and not an installed, potentially
13806 ;; out-of-date copy. This is harmless here, since we just installed
13807 ;; the package, so we disable the check to avoid skipping tests
13808 ;; entirely.
13809 (add-before 'check 'import-mismatch-error-workaround
13810 (lambda _
13811 (setenv "PY_IGNORE_IMPORTMISMATCH" "1")
13812 #t)))))
13813 (propagated-inputs
13814 `(("python-sortedcontainers" ,python-sortedcontainers)))
13815 (native-inputs
13816 `(("python-pytest" ,python-pytest)))
13817 (home-page "https://github.com/chaimleib/intervaltree")
13818 (synopsis "Editable interval tree data structure")
13819 (description
13820 "This package provides a mutable, self-balancing interval tree
13821 implementation for Python. Queries may be by point, by range overlap, or by
13822 range envelopment. This library was designed to allow tagging text and time
13823 intervals, where the intervals include the lower bound but not the upper
13824 bound.")
13825 (license license:asl2.0)))
13826
13827 (define-public python-pypairix
13828 (package
13829 (name "python-pypairix")
13830 (version "0.3.7")
13831 ;; The tarball on pypi does not include the makefile to build the
13832 ;; programs.
13833 (source
13834 (origin
13835 (method git-fetch)
13836 (uri (git-reference
13837 (url "https://github.com/4dn-dcic/pairix")
13838 (commit version)))
13839 (file-name (git-file-name name version))
13840 (sha256
13841 (base32
13842 "1snr3lrmsld8sy77ng6ba6wcmd33xjccf1l2f3m6pi29xis9nd6p"))))
13843 (build-system python-build-system)
13844 (arguments
13845 `(#:phases
13846 (modify-phases %standard-phases
13847 (add-before 'build 'build-programs
13848 (lambda _ (invoke "make")))
13849 (add-after 'install 'install-programs
13850 (lambda* (#:key outputs #:allow-other-keys)
13851 (copy-recursively "bin" (string-append
13852 (assoc-ref outputs "out")
13853 "/bin"))
13854 #t)))))
13855 (inputs
13856 `(("zlib" ,zlib)))
13857 (home-page "https://github.com/4dn-dcic/pairix")
13858 (synopsis "Support for querying pairix-indexed bgzipped text files")
13859 (description
13860 "Pypairix is a Python module for fast querying on a pairix-indexed
13861 bgzipped text file that contains a pair of genomic coordinates per line.")
13862 (license license:expat)))
13863
13864 (define-public python-pyfaidx
13865 (package
13866 (name "python-pyfaidx")
13867 (version "0.5.8")
13868 (source
13869 (origin
13870 (method url-fetch)
13871 (uri (pypi-uri "pyfaidx" version))
13872 (sha256
13873 (base32
13874 "038xi3a6zvrxbyyfpp64ka8pcjgsdq4fgw9cl5lpxbvmm1bzzw2q"))))
13875 (build-system python-build-system)
13876 (propagated-inputs
13877 `(("python-six" ,python-six)))
13878 (home-page "http://mattshirley.com")
13879 (synopsis "Random access to fasta subsequences")
13880 (description
13881 "This package provides procedures for efficient pythonic random access to
13882 fasta subsequences.")
13883 (license license:bsd-3)))
13884
13885 (define-public python2-pyfaidx
13886 (package-with-python2 python-pyfaidx))
13887
13888 (define-public python-cooler
13889 (package
13890 (name "python-cooler")
13891 (version "0.8.7")
13892 (source
13893 (origin
13894 (method url-fetch)
13895 (uri (pypi-uri "cooler" version))
13896 (sha256
13897 (base32
13898 "01g6gqix9ba27sappz6nfyiwabzrlf8i5fn8kwcz8ra356cq9crp"))))
13899 (build-system python-build-system)
13900 (propagated-inputs
13901 `(("python-asciitree" ,python-asciitree)
13902 ("python-biopython" ,python-biopython)
13903 ("python-click" ,python-click)
13904 ("python-cytoolz" ,python-cytoolz)
13905 ("python-dask" ,python-dask)
13906 ("python-h5py" ,python-h5py)
13907 ("python-multiprocess" ,python-multiprocess)
13908 ("python-numpy" ,python-numpy)
13909 ("python-pandas" ,python-pandas)
13910 ("python-pyfaidx" ,python-pyfaidx)
13911 ("python-pypairix" ,python-pypairix)
13912 ("python-pysam" ,python-pysam)
13913 ("python-pyyaml" ,python-pyyaml)
13914 ("python-scipy" ,python-scipy)
13915 ("python-simplejson" ,python-simplejson)))
13916 (native-inputs
13917 `(("python-mock" ,python-mock)
13918 ("python-pytest" ,python-pytest)))
13919 (home-page "https://github.com/mirnylab/cooler")
13920 (synopsis "Sparse binary format for genomic interaction matrices")
13921 (description
13922 "Cooler is a support library for a sparse, compressed, binary persistent
13923 storage format, called @code{cool}, used to store genomic interaction data,
13924 such as Hi-C contact matrices.")
13925 (license license:bsd-3)))
13926
13927 (define-public python-hicmatrix
13928 (package
13929 (name "python-hicmatrix")
13930 (version "12")
13931 (source
13932 (origin
13933 ;; Version 12 is not available on pypi.
13934 (method git-fetch)
13935 (uri (git-reference
13936 (url "https://github.com/deeptools/HiCMatrix")
13937 (commit version)))
13938 (file-name (git-file-name name version))
13939 (sha256
13940 (base32
13941 "1xhdyx16f3brgxgxybixdi64ki8nbbkq5vk4h9ahi11pzpjfn1pj"))))
13942 (build-system python-build-system)
13943 (arguments
13944 `(#:phases
13945 (modify-phases %standard-phases
13946 (add-after 'unpack 'relax-requirements
13947 (lambda _
13948 (substitute* '("requirements.txt"
13949 "setup.py")
13950 (("cooler *=+ *0.8.5")
13951 "cooler==0.8.*"))
13952 #t)))))
13953 (propagated-inputs
13954 `(("python-cooler" ,python-cooler)
13955 ("python-intervaltree" ,python-intervaltree)
13956 ("python-numpy" ,python-numpy)
13957 ("python-pandas" ,python-pandas)
13958 ("python-scipy" ,python-scipy)
13959 ("python-tables" ,python-tables)))
13960 (home-page "https://github.com/deeptools/HiCMatrix/")
13961 (synopsis "HiCMatrix class for HiCExplorer and pyGenomeTracks")
13962 (description
13963 "This helper package implements the @code{HiCMatrix} class for
13964 the HiCExplorer and pyGenomeTracks packages.")
13965 (license license:gpl3+)))
13966
13967 (define-public python-hicexplorer
13968 (package
13969 (name "python-hicexplorer")
13970 (version "2.1.4")
13971 (source
13972 (origin
13973 ;; The latest version is not available on Pypi.
13974 (method git-fetch)
13975 (uri (git-reference
13976 (url "https://github.com/deeptools/HiCExplorer")
13977 (commit version)))
13978 (file-name (git-file-name name version))
13979 (sha256
13980 (base32
13981 "0q5gpbzmrkvygqgw524q36b4nrivcmyi5v194vsx0qw7b3gcmq08"))))
13982 (build-system python-build-system)
13983 (arguments
13984 `(#:phases
13985 (modify-phases %standard-phases
13986 (add-after 'unpack 'loosen-up-requirements
13987 (lambda _
13988 (substitute* "setup.py"
13989 (("==") ">="))
13990 #t)))))
13991 (propagated-inputs
13992 `(("python-biopython" ,python-biopython)
13993 ("python-configparser" ,python-configparser)
13994 ("python-cooler" ,python-cooler)
13995 ("python-future" ,python-future)
13996 ("python-intervaltree" ,python-intervaltree)
13997 ("python-jinja2" ,python-jinja2)
13998 ("python-matplotlib" ,python-matplotlib)
13999 ("python-numpy" ,python-numpy)
14000 ("python-pandas" ,python-pandas)
14001 ("python-pybigwig" ,python-pybigwig)
14002 ("python-pysam" ,python-pysam)
14003 ("python-scipy" ,python-scipy)
14004 ("python-six" ,python-six)
14005 ("python-tables" ,python-tables)
14006 ("python-unidecode" ,python-unidecode)))
14007 (home-page "https://hicexplorer.readthedocs.io")
14008 (synopsis "Process, analyze and visualize Hi-C data")
14009 (description
14010 "HiCExplorer is a powerful and easy to use set of tools to process,
14011 normalize and visualize Hi-C data. HiCExplorer facilitates the creation of
14012 contact matrices, correction of contacts, TAD detection, A/B compartments,
14013 merging, reordering or chromosomes, conversion from different formats
14014 including cooler and detection of long-range contacts. Moreover, it allows
14015 the visualization of multiple contact matrices along with other types of data
14016 like genes, compartments, ChIP-seq coverage tracks (and in general any type of
14017 genomic scores), long range contacts and the visualization of viewpoints.")
14018 (license license:gpl3)))
14019
14020 (define-public python-pygenometracks
14021 (package
14022 (name "python-pygenometracks")
14023 (version "3.3")
14024 (source
14025 (origin
14026 (method url-fetch)
14027 (uri (pypi-uri "pyGenomeTracks" version))
14028 (sha256
14029 (base32
14030 "16laa0wnf4qn9fb9ych4w1vqhqwjss70v0y0f6wp4gwqfrlgac0f"))))
14031 (build-system python-build-system)
14032 (arguments
14033 `(#:tests? #f ; there are none
14034 #:phases
14035 (modify-phases %standard-phases
14036 (add-after 'unpack 'relax-requirements
14037 (lambda _
14038 (substitute* "setup.py"
14039 (("matplotlib ==3.1.1")
14040 "matplotlib >=3.1.1"))
14041 #t)))))
14042 (propagated-inputs
14043 `(("python-future" ,python-future)
14044 ("python-gffutils" ,python-gffutils)
14045 ("python-hicmatrix" ,python-hicmatrix)
14046 ("python-intervaltree" ,python-intervaltree)
14047 ("python-matplotlib" ,python-matplotlib)
14048 ("python-numpy" ,python-numpy)
14049 ("python-pybigwig" ,python-pybigwig)
14050 ("python-pysam" ,python-pysam)
14051 ("python-tqdm" ,python-tqdm)))
14052 (native-inputs
14053 `(("python-pytest" ,python-pytest)))
14054 (home-page "https://pygenometracks.readthedocs.io")
14055 (synopsis "Program and library to plot beautiful genome browser tracks")
14056 (description
14057 "This package aims to produce high-quality genome browser tracks that
14058 are highly customizable. Currently, it is possible to plot: bigwig, bed (many
14059 options), bedgraph, links (represented as arcs), and Hi-C matrices.
14060 pyGenomeTracks can make plots with or without Hi-C data.")
14061 (license license:gpl3+)))
14062
14063 (define-public python-hic2cool
14064 (package
14065 (name "python-hic2cool")
14066 (version "0.4.2")
14067 (source
14068 (origin
14069 (method url-fetch)
14070 (uri (pypi-uri "hic2cool" version))
14071 (sha256
14072 (base32
14073 "0xy6mhfns2lzib1kcr6419jjp6pmh0qx8z8na55lmiwn0ds8q9cl"))))
14074 (build-system python-build-system)
14075 (arguments '(#:tests? #f)) ; no tests included
14076 (propagated-inputs
14077 `(("python-cooler" ,python-cooler)))
14078 (home-page "https://github.com/4dn-dcic/hic2cool")
14079 (synopsis "Converter for .hic and .cool files")
14080 (description
14081 "This package provides a converter between @code{.hic} files (from
14082 juicer) and single-resolution or multi-resolution @code{.cool} files (for
14083 cooler). Both @code{hic} and @code{cool} files describe Hi-C contact
14084 matrices.")
14085 (license license:expat)))
14086
14087 (define-public r-pore
14088 (package
14089 (name "r-pore")
14090 (version "0.24")
14091 (source
14092 (origin
14093 (method url-fetch)
14094 (uri
14095 (string-append "mirror://sourceforge/rpore/" version
14096 "/poRe_" version ".tar.gz"))
14097 (sha256
14098 (base32 "0pih9nljbv8g4x8rkk29i7aqq681b782r5s5ynp4nw9yzqnmmksv"))))
14099 (properties `((upstream-name . "poRe")))
14100 (build-system r-build-system)
14101 (propagated-inputs
14102 `(("r-bit64" ,r-bit64)
14103 ("r-data-table" ,r-data-table)
14104 ("r-rhdf5" ,r-rhdf5)
14105 ("r-shiny" ,r-shiny)
14106 ("r-svdialogs" ,r-svdialogs)))
14107 (home-page "https://sourceforge.net/projects/rpore/")
14108 (synopsis "Visualize Nanopore sequencing data")
14109 (description
14110 "This package provides graphical user interfaces to organize and visualize Nanopore
14111 sequencing data.")
14112 ;; This is free software but the license variant is unclear:
14113 ;; <https://github.com/mw55309/poRe_docs/issues/10>.
14114 (license license:bsd-3)))
14115
14116 (define-public r-xbioc
14117 (let ((revision "1")
14118 (commit "6ff0670a37ab3036aaf1d94aa4b208310946b0b5"))
14119 (package
14120 (name "r-xbioc")
14121 (version (git-version "0.1.16" revision commit))
14122 (source (origin
14123 (method git-fetch)
14124 (uri (git-reference
14125 (url "https://github.com/renozao/xbioc")
14126 (commit commit)))
14127 (file-name (git-file-name name version))
14128 (sha256
14129 (base32
14130 "0w8bsq5myiwkfhh83nm6is5ichiyvwa1axx2szvxnzq39x6knf66"))))
14131 (build-system r-build-system)
14132 (propagated-inputs
14133 `(("r-annotationdbi" ,r-annotationdbi)
14134 ("r-assertthat" ,r-assertthat)
14135 ("r-biobase" ,r-biobase)
14136 ("r-biocmanager" ,r-biocmanager)
14137 ("r-digest" ,r-digest)
14138 ("r-pkgmaker" ,r-pkgmaker)
14139 ("r-plyr" ,r-plyr)
14140 ("r-reshape2" ,r-reshape2)
14141 ("r-stringr" ,r-stringr)))
14142 (home-page "https://github.com/renozao/xbioc/")
14143 (synopsis "Extra base functions for Bioconductor")
14144 (description "This package provides extra utility functions to perform
14145 common tasks in the analysis of omics data, leveraging and enhancing features
14146 provided by Bioconductor packages.")
14147 (license license:gpl3+))))
14148
14149 (define-public r-cssam
14150 (let ((revision "1")
14151 (commit "9ec58c982fa551af0d80b1a266890d92954833f2"))
14152 (package
14153 (name "r-cssam")
14154 (version (git-version "1.4" revision commit))
14155 (source (origin
14156 (method git-fetch)
14157 (uri (git-reference
14158 (url "https://github.com/shenorrLab/csSAM")
14159 (commit commit)))
14160 (file-name (git-file-name name version))
14161 (sha256
14162 (base32
14163 "128syf9v39gk0z3ip000qpsjbg6l1siyq6c8b0hz41dzg5achyb3"))))
14164 (build-system r-build-system)
14165 (propagated-inputs
14166 `(("r-formula" ,r-formula)
14167 ("r-ggplot2" ,r-ggplot2)
14168 ("r-pkgmaker" ,r-pkgmaker)
14169 ("r-plyr" ,r-plyr)
14170 ("r-rngtools" ,r-rngtools)
14171 ("r-scales" ,r-scales)))
14172 (home-page "https://github.com/shenorrLab/csSAM/")
14173 (synopsis "Cell type-specific statistical analysis of microarray")
14174 (description "This package implements the method csSAM that computes
14175 cell-specific differential expression from measured cell proportions using
14176 SAM.")
14177 ;; Any version
14178 (license license:lgpl2.1+))))
14179
14180 (define-public r-bseqsc
14181 (let ((revision "1")
14182 (commit "fef3f3e38dcf3df37103348b5780937982b43b98"))
14183 (package
14184 (name "r-bseqsc")
14185 (version (git-version "1.0" revision commit))
14186 (source (origin
14187 (method git-fetch)
14188 (uri (git-reference
14189 (url "https://github.com/shenorrLab/bseqsc")
14190 (commit commit)))
14191 (file-name (git-file-name name version))
14192 (sha256
14193 (base32
14194 "1prw13wa20f7wlc3gkkls66n1kxz8d28qrb8icfqdwdnnv8w5qg8"))))
14195 (build-system r-build-system)
14196 (propagated-inputs
14197 `(("r-abind" ,r-abind)
14198 ("r-annotationdbi" ,r-annotationdbi)
14199 ("r-biobase" ,r-biobase)
14200 ("r-cssam" ,r-cssam)
14201 ("r-dplyr" ,r-dplyr)
14202 ("r-e1071" ,r-e1071)
14203 ("r-edger" ,r-edger)
14204 ("r-ggplot2" ,r-ggplot2)
14205 ("r-nmf" ,r-nmf)
14206 ("r-openxlsx" ,r-openxlsx)
14207 ("r-pkgmaker" ,r-pkgmaker)
14208 ("r-plyr" ,r-plyr)
14209 ("r-preprocesscore" ,r-preprocesscore)
14210 ("r-rngtools" ,r-rngtools)
14211 ("r-scales" ,r-scales)
14212 ("r-stringr" ,r-stringr)
14213 ("r-xbioc" ,r-xbioc)))
14214 (home-page "https://github.com/shenorrLab/bseqsc")
14215 (synopsis "Deconvolution of bulk sequencing experiments using single cell data")
14216 (description "BSeq-sc is a bioinformatics analysis pipeline that
14217 leverages single-cell sequencing data to estimate cell type proportion and
14218 cell type-specific gene expression differences from RNA-seq data from bulk
14219 tissue samples. This is a companion package to the publication \"A
14220 single-cell transcriptomic map of the human and mouse pancreas reveals inter-
14221 and intra-cell population structure.\" Baron et al. Cell Systems (2016)
14222 @url{https://www.ncbi.nlm.nih.gov/pubmed/27667365}.")
14223 (license license:gpl2+))))
14224
14225 (define-public porechop
14226 ;; The recommended way to install is to clone the git repository
14227 ;; https://github.com/rrwick/Porechop#installation
14228 (let ((commit "289d5dca4a5fc327f97b3f8cecb68ecaf1014861")
14229 (revision "1"))
14230 (package
14231 (name "porechop")
14232 (version (git-version "0.2.3" revision commit))
14233 (source
14234 (origin
14235 (method git-fetch)
14236 (uri (git-reference
14237 (url "https://github.com/rrwick/Porechop")
14238 (commit commit)))
14239 (file-name (git-file-name name version))
14240 (sha256
14241 (base32 "05ps43gig0d3ia9x5lj84lb00hbsl6ba9n7y7jz927npxbr2ym23"))))
14242 (build-system python-build-system)
14243 (home-page "https://github.com/rrwick/porechop")
14244 (synopsis "Finding, trimming or splitting adapters, in Oxford Nanopore reads")
14245 (description
14246 "The porechop package is a tool for finding and removing adapters from Oxford
14247 Nanopore reads. Adapters on the ends of reads are trimmed off, and when a read
14248 has an adapter in its middle, it is treated as chimeric and chopped into
14249 separate reads. Porechop performs thorough alignments to effectively find
14250 adapters, even at low sequence identity. Porechop also supports demultiplexing
14251 of Nanopore reads that were barcoded with the Native Barcoding Kit, PCR
14252 Barcoding Kit or Rapid Barcoding Kit.")
14253 (license license:gpl3+))))
14254
14255 (define-public poretools
14256 ;; The latest release was in 2016 and the latest commit is from 2017
14257 ;; the recommended way to install is to clone the git repository
14258 ;; https://poretools.readthedocs.io/en/latest/content/installation.html
14259 (let ((commit "e426b1f09e86ac259a00c261c79df91510777407")
14260 (revision "1"))
14261 (package
14262 (name "poretools")
14263 (version (git-version "0.6.0" revision commit))
14264 (source
14265 (origin
14266 (method git-fetch)
14267 (uri (git-reference
14268 (url "https://github.com/arq5x/poretools")
14269 (commit commit)))
14270 (file-name (git-file-name name version))
14271 (sha256
14272 (base32 "0bglj833wxpp3cq430p1d3xp085ls221js2y90w7ir2x5ay8l7am"))))
14273 (build-system python-build-system)
14274 ;; requires python >=2.7, <3.0, and the same for python dependencies
14275 (arguments `(#:python ,python-2))
14276 (inputs
14277 `(("hdf5" ,hdf5)))
14278 (propagated-inputs
14279 `(("python-dateutil" ,python2-dateutil)
14280 ("python-h5py" ,python2-h5py)
14281 ("python-matplotlib" ,python2-matplotlib)
14282 ("python-pandas" ,python2-pandas)
14283 ("python-seaborn" ,python2-seaborn)))
14284 (home-page "https://poretools.readthedocs.io")
14285 (synopsis "Toolkit for working with nanopore sequencing data")
14286 (description
14287 "The MinION from Oxford Nanopore Technologies is a nanopore sequencer.
14288 This @code{poretools} package is a flexible toolkit for exploring datasets
14289 generated by nanopore sequencing devices for the purposes of quality control and
14290 downstream analysis. Poretools operates directly on the native FAST5, a variant
14291 of the Hierarchical Data Format (HDF5) standard.")
14292 (license license:expat))))
14293
14294 (define-public r-absfiltergsea
14295 (package
14296 (name "r-absfiltergsea")
14297 (version "1.5.1")
14298 (source
14299 (origin
14300 (method url-fetch)
14301 (uri (cran-uri "AbsFilterGSEA" version))
14302 (sha256
14303 (base32 "15srxkxsvn38kd5frdrwfdf0ad8gskrd0h01wmdf9hglq8fjrp7w"))))
14304 (properties `((upstream-name . "AbsFilterGSEA")))
14305 (build-system r-build-system)
14306 (propagated-inputs
14307 `(("r-biobase" ,r-biobase)
14308 ("r-deseq" ,r-deseq)
14309 ("r-limma" ,r-limma)
14310 ("r-rcpp" ,r-rcpp)
14311 ("r-rcpparmadillo" ,r-rcpparmadillo)))
14312 (home-page "https://cran.r-project.org/web/packages/AbsFilterGSEA/")
14313 (synopsis "Improved false positive control of gene-permuting with absolute filtering")
14314 (description
14315 "This package provides a function that performs gene-permuting of a gene-set
14316 enrichment analysis (GSEA) calculation with or without the absolute filtering.
14317 Without filtering, users can perform (original) two-tailed or one-tailed
14318 absolute GSEA.")
14319 (license license:gpl2)))
14320
14321 (define-public jamm
14322 (package
14323 (name "jamm")
14324 (version "1.0.7.6")
14325 (source
14326 (origin
14327 (method git-fetch)
14328 (uri (git-reference
14329 (url "https://github.com/mahmoudibrahim/JAMM")
14330 (commit (string-append "JAMMv" version))))
14331 (file-name (git-file-name name version))
14332 (sha256
14333 (base32
14334 "0bsa5mf9n9q5jz7mmacrra41l7r8rac5vgsn6wv1fb52ya58b970"))))
14335 (build-system gnu-build-system)
14336 (arguments
14337 `(#:tests? #f ; there are none
14338 #:phases
14339 (modify-phases %standard-phases
14340 (delete 'configure)
14341 (delete 'build)
14342 (replace 'install
14343 (lambda* (#:key inputs outputs #:allow-other-keys)
14344 (let* ((out (assoc-ref outputs "out"))
14345 (libexec (string-append out "/libexec/jamm"))
14346 (bin (string-append out "/bin")))
14347 (substitute* '("JAMM.sh"
14348 "SignalGenerator.sh")
14349 (("^sPath=.*")
14350 (string-append "sPath=\"" libexec "\"\n")))
14351 (for-each (lambda (file)
14352 (install-file file libexec))
14353 (list "bincalculator.r"
14354 "peakfinder.r"
14355 "peakhelper.r"
14356 "signalmaker.r"
14357 "xcorr.r"
14358 "xcorrhelper.r"
14359 ;; Perl scripts
14360 "peakfilter.pl"
14361 "readshifter.pl"))
14362
14363 (for-each
14364 (lambda (script)
14365 (chmod script #o555)
14366 (install-file script bin)
14367 (wrap-program (string-append bin "/" script)
14368 `("PATH" ":" prefix
14369 (,(string-append (assoc-ref inputs "coreutils") "/bin")
14370 ,(string-append (assoc-ref inputs "gawk") "/bin")
14371 ,(string-append (assoc-ref inputs "perl") "/bin")
14372 ,(string-append (assoc-ref inputs "r-minimal") "/bin")))
14373 `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))
14374 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
14375 (list "JAMM.sh" "SignalGenerator.sh")))
14376 #t)))))
14377 (inputs
14378 `(("bash" ,bash)
14379 ("coreutils" ,coreutils)
14380 ("gawk" ,gawk)
14381 ("perl" ,perl)
14382 ("r-minimal" ,r-minimal)
14383 ;;("r-parallel" ,r-parallel)
14384 ("r-signal" ,r-signal)
14385 ("r-mclust" ,r-mclust)))
14386 (home-page "https://github.com/mahmoudibrahim/JAMM")
14387 (synopsis "Peak finder for NGS datasets")
14388 (description
14389 "JAMM is a peak finder for next generation sequencing datasets (ChIP-Seq,
14390 ATAC-Seq, DNase-Seq, etc.) that can integrate replicates and assign peak
14391 boundaries accurately. JAMM is applicable to both broad and narrow
14392 datasets.")
14393 (license license:gpl3+)))
14394
14395 (define-public ngless
14396 (package
14397 (name "ngless")
14398 (version "1.1.0")
14399 (source
14400 (origin
14401 (method git-fetch)
14402 (uri (git-reference
14403 (url "https://gitlab.com/ngless/ngless.git")
14404 (commit (string-append "v" version))))
14405 (file-name (git-file-name name version))
14406 (sha256
14407 (base32
14408 "1wim8wpqyff080dfcazynrmjwqas38m24m0v350w245mmhrapdma"))))
14409 (build-system haskell-build-system)
14410 (arguments
14411 `(#:haddock? #f ; The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
14412 ; error: parse error on input import
14413 ; import Options.Applicative
14414 #:phases
14415 (modify-phases %standard-phases
14416 (add-after 'unpack 'create-Versions.hs
14417 (lambda _
14418 (substitute* "Makefile"
14419 (("BWA_VERSION = .*")
14420 (string-append "BWA_VERSION = "
14421 ,(package-version bwa) "\n"))
14422 (("SAM_VERSION = .*")
14423 (string-append "SAM_VERSION = "
14424 ,(package-version samtools) "\n"))
14425 (("PRODIGAL_VERSION = .*")
14426 (string-append "PRODIGAL_VERSION = "
14427 ,(package-version prodigal) "\n"))
14428 (("MINIMAP2_VERSION = .*")
14429 (string-append "MINIMAP2_VERSION = "
14430 ,(package-version minimap2) "\n")))
14431 (invoke "make" "NGLess/Dependencies/Versions.hs")
14432 #t))
14433 (add-after 'create-Versions.hs 'create-cabal-file
14434 (lambda _ (invoke "hpack") #t))
14435 ;; These tools are expected to be installed alongside ngless.
14436 (add-after 'install 'link-tools
14437 (lambda* (#:key inputs outputs #:allow-other-keys)
14438 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
14439 (symlink (string-append (assoc-ref inputs "prodigal")
14440 "/bin/prodigal")
14441 (string-append bin "ngless-" ,version "-prodigal"))
14442 (symlink (string-append (assoc-ref inputs "minimap2")
14443 "/bin/minimap2")
14444 (string-append bin "ngless-" ,version "-minimap2"))
14445 (symlink (string-append (assoc-ref inputs "samtools")
14446 "/bin/samtools")
14447 (string-append bin "ngless-" ,version "-samtools"))
14448 (symlink (string-append (assoc-ref inputs "bwa")
14449 "/bin/bwa")
14450 (string-append bin "ngless-" ,version "-bwa"))
14451 #t))))))
14452 (inputs
14453 `(("prodigal" ,prodigal)
14454 ("bwa" ,bwa)
14455 ("samtools" ,samtools)
14456 ("minimap2" ,minimap2)
14457 ("ghc-aeson" ,ghc-aeson)
14458 ("ghc-ansi-terminal" ,ghc-ansi-terminal)
14459 ("ghc-async" ,ghc-async)
14460 ("ghc-atomic-write" ,ghc-atomic-write)
14461 ("ghc-bytestring-lexing" ,ghc-bytestring-lexing)
14462 ("ghc-conduit" ,ghc-conduit)
14463 ("ghc-conduit-algorithms" ,ghc-conduit-algorithms)
14464 ("ghc-conduit-extra" ,ghc-conduit-extra)
14465 ("ghc-configurator" ,ghc-configurator)
14466 ("ghc-convertible" ,ghc-convertible)
14467 ("ghc-data-default" ,ghc-data-default)
14468 ("ghc-diagrams-core" ,ghc-diagrams-core)
14469 ("ghc-diagrams-lib" ,ghc-diagrams-lib)
14470 ("ghc-diagrams-svg" ,ghc-diagrams-svg)
14471 ("ghc-double-conversion" ,ghc-double-conversion)
14472 ("ghc-edit-distance" ,ghc-edit-distance)
14473 ("ghc-either" ,ghc-either)
14474 ("ghc-errors" ,ghc-errors)
14475 ("ghc-extra" ,ghc-extra)
14476 ("ghc-filemanip" ,ghc-filemanip)
14477 ("ghc-file-embed" ,ghc-file-embed)
14478 ("ghc-gitrev" ,ghc-gitrev)
14479 ("ghc-hashtables" ,ghc-hashtables)
14480 ("ghc-http-conduit" ,ghc-http-conduit)
14481 ("ghc-inline-c" ,ghc-inline-c)
14482 ("ghc-inline-c-cpp" ,ghc-inline-c-cpp)
14483 ("ghc-intervalmap" ,ghc-intervalmap)
14484 ("ghc-missingh" ,ghc-missingh)
14485 ("ghc-optparse-applicative" ,ghc-optparse-applicative)
14486 ("ghc-regex" ,ghc-regex)
14487 ("ghc-safe" ,ghc-safe)
14488 ("ghc-safeio" ,ghc-safeio)
14489 ("ghc-strict" ,ghc-strict)
14490 ("ghc-tar" ,ghc-tar)
14491 ("ghc-tar-conduit" ,ghc-tar-conduit)
14492 ("ghc-unliftio" ,ghc-unliftio)
14493 ("ghc-unliftio-core" ,ghc-unliftio-core)
14494 ("ghc-vector" ,ghc-vector)
14495 ("ghc-yaml" ,ghc-yaml)
14496 ("ghc-zlib" ,ghc-zlib)))
14497 (propagated-inputs
14498 `(("r-r6" ,r-r6)
14499 ("r-hdf5r" ,r-hdf5r)
14500 ("r-iterators" ,r-iterators)
14501 ("r-itertools" ,r-itertools)
14502 ("r-matrix" ,r-matrix)))
14503 (native-inputs
14504 `(("ghc-hpack" ,ghc-hpack)
14505 ("ghc-quickcheck" ,ghc-quickcheck)
14506 ("ghc-test-framework" ,ghc-test-framework)
14507 ("ghc-test-framework-hunit",ghc-test-framework-hunit)
14508 ("ghc-test-framework-quickcheck2" ,ghc-test-framework-quickcheck2)
14509 ("ghc-test-framework-th" ,ghc-test-framework-th)))
14510 (home-page "https://gitlab.com/ngless/ngless")
14511 (synopsis "DSL for processing next-generation sequencing data")
14512 (description "Ngless is a domain-specific language for
14513 @dfn{next-generation sequencing} (NGS) data processing.")
14514 (license license:expat)))
14515
14516 (define-public filtlong
14517 ;; The recommended way to install is to clone the git repository
14518 ;; https://github.com/rrwick/Filtlong#installation
14519 ;; and the lastest release is more than nine months old
14520 (let ((commit "d1bb46dfe8bc7efe6257b5ce222c04bfe8aedaab")
14521 (revision "1"))
14522 (package
14523 (name "filtlong")
14524 (version (git-version "0.2.0" revision commit))
14525 (source
14526 (origin
14527 (method git-fetch)
14528 (uri (git-reference
14529 (url "https://github.com/rrwick/Filtlong")
14530 (commit commit)))
14531 (file-name (git-file-name name version))
14532 (sha256
14533 (base32 "1xr92r820x8qlkcr3b57iw223yq8vjgyi42jr79w2xgw47qzr575"))))
14534 (build-system gnu-build-system)
14535 (arguments
14536 `(#:tests? #f ; no check target
14537 #:phases
14538 (modify-phases %standard-phases
14539 (delete 'configure)
14540 (replace 'install
14541 (lambda* (#:key outputs #:allow-other-keys)
14542 (let* ((out (assoc-ref outputs "out"))
14543 (bin (string-append out "/bin"))
14544 (scripts (string-append out "/share/filtlong/scripts")))
14545 (install-file "bin/filtlong" bin)
14546 (install-file "scripts/histogram.py" scripts)
14547 (install-file "scripts/read_info_histograms.sh" scripts))
14548 #t))
14549 (add-after 'install 'wrap-program
14550 (lambda* (#:key inputs outputs #:allow-other-keys)
14551 (let* ((out (assoc-ref outputs "out"))
14552 (path (getenv "PYTHONPATH")))
14553 (wrap-program (string-append out
14554 "/share/filtlong/scripts/histogram.py")
14555 `("PYTHONPATH" ":" prefix (,path))))
14556 #t))
14557 (add-before 'check 'patch-tests
14558 (lambda _
14559 (substitute* "scripts/read_info_histograms.sh"
14560 (("awk") (which "gawk")))
14561 #t)))))
14562 (inputs
14563 `(("gawk" ,gawk) ;for read_info_histograms.sh
14564 ("python" ,python-2) ;required for histogram.py
14565 ("zlib" ,zlib)))
14566 (home-page "https://github.com/rrwick/Filtlong/")
14567 (synopsis "Tool for quality filtering of Nanopore and PacBio data")
14568 (description
14569 "The Filtlong package is a tool for filtering long reads by quality.
14570 It can take a set of long reads and produce a smaller, better subset. It uses
14571 both read length (longer is better) and read identity (higher is better) when
14572 choosing which reads pass the filter.")
14573 (license (list license:gpl3 ;filtlong
14574 license:asl2.0))))) ;histogram.py
14575
14576 (define-public nanopolish
14577 ;; The recommended way to install is to clone the git repository
14578 ;; <https://github.com/jts/nanopolish#installing-a-particular-release>.
14579 ;; Also, the differences between release and current version seem to be
14580 ;; significant.
14581 (let ((commit "6331dc4f15b9dfabb954ba3fae9d76b6c3ca6377")
14582 (revision "1"))
14583 (package
14584 (name "nanopolish")
14585 (version (git-version "0.11.1" revision commit))
14586 (source
14587 (origin
14588 (method git-fetch)
14589 (uri (git-reference
14590 (url "https://github.com/jts/nanopolish")
14591 (commit commit)
14592 (recursive? #t)))
14593 (file-name (git-file-name name version))
14594 (sha256
14595 (base32 "15ikl3d37y49pwd7vx36xksgsqajhf24q7qqsnpl15dqqyy5qgbc"))
14596 (modules '((guix build utils)))
14597 (snippet
14598 '(begin
14599 (delete-file-recursively "htslib")
14600 #t))))
14601 (build-system gnu-build-system)
14602 (arguments
14603 `(#:make-flags
14604 `("HDF5=noinstall" "EIGEN=noinstall" "HTS=noinstall" "CC=gcc")
14605 #:tests? #f ; no check target
14606 #:phases
14607 (modify-phases %standard-phases
14608 (add-after 'unpack 'find-eigen
14609 (lambda* (#:key inputs #:allow-other-keys)
14610 (setenv "CPATH"
14611 (string-append (assoc-ref inputs "eigen")
14612 "/include/eigen3:"
14613 (or (getenv "CPATH") "")))
14614 #t))
14615 (delete 'configure)
14616 (replace 'install
14617 (lambda* (#:key outputs #:allow-other-keys)
14618 (let* ((out (assoc-ref outputs "out"))
14619 (bin (string-append out "/bin"))
14620 (scripts (string-append out "/share/nanopolish/scripts")))
14621
14622 (install-file "nanopolish" bin)
14623 (for-each (lambda (file) (install-file file scripts))
14624 (find-files "scripts" ".*"))
14625 #t)))
14626 (add-after 'install 'wrap-programs
14627 (lambda* (#:key outputs #:allow-other-keys)
14628 (for-each (lambda (file)
14629 (wrap-program file `("PYTHONPATH" ":" prefix (,path))))
14630 (find-files "/share/nanopolish/scripts" "\\.py"))
14631 (for-each (lambda (file)
14632 (wrap-program file `("PERL5LIB" ":" prefix (,path))))
14633 (find-files "/share/nanopolish/scripts" "\\.pl"))
14634 #t)))))
14635 (inputs
14636 `(("eigen" ,eigen)
14637 ("hdf5" ,hdf5)
14638 ("htslib" ,htslib)
14639 ("perl" ,perl)
14640 ("python" ,python-wrapper)
14641 ("python-biopython" ,python-biopython)
14642 ("python-numpy" ,python-numpy)
14643 ("python-pysam" ,python-pysam)
14644 ("python-scikit-learn" , python-scikit-learn)
14645 ("python-scipy" ,python-scipy)
14646 ("zlib" ,zlib)))
14647 (home-page "https://github.com/jts/nanopolish")
14648 (synopsis "Signal-level analysis of Oxford Nanopore sequencing data")
14649 (description
14650 "This package analyses the Oxford Nanopore sequencing data at signal-level.
14651 Nanopolish can calculate an improved consensus sequence for a draft genome
14652 assembly, detect base modifications, call SNPs (Single nucleotide
14653 polymorphisms) and indels with respect to a reference genome and more.")
14654 (license license:expat))))
14655
14656 (define-public cnvkit
14657 (package
14658 (name "cnvkit")
14659 (version "0.9.5")
14660 (source
14661 (origin
14662 (method git-fetch)
14663 (uri (git-reference
14664 (url "https://github.com/etal/cnvkit")
14665 (commit (string-append "v" version))))
14666 (file-name (git-file-name name version))
14667 (sha256
14668 (base32 "0g2f78k68yglmj4fsfmgs8idqv3di9aj53fg0ld0hqljg8chhh82"))))
14669 (build-system python-build-system)
14670 (propagated-inputs
14671 `(("python-biopython" ,python-biopython)
14672 ("python-future" ,python-future)
14673 ("python-matplotlib" ,python-matplotlib)
14674 ("python-numpy" ,python-numpy)
14675 ("python-reportlab" ,python-reportlab)
14676 ("python-pandas" ,python-pandas)
14677 ("python-pysam" ,python-pysam)
14678 ("python-pyfaidx" ,python-pyfaidx)
14679 ("python-scipy" ,python-scipy)
14680 ;; R packages
14681 ("r-dnacopy" ,r-dnacopy)))
14682 (home-page "https://cnvkit.readthedocs.org/")
14683 (synopsis "Copy number variant detection from targeted DNA sequencing")
14684 (description
14685 "CNVkit is a Python library and command-line software toolkit to infer
14686 and visualize copy number from high-throughput DNA sequencing data. It is
14687 designed for use with hybrid capture, including both whole-exome and custom
14688 target panels, and short-read sequencing platforms such as Illumina and Ion
14689 Torrent.")
14690 (license license:asl2.0)))
14691
14692 (define-public python-pyfit-sne
14693 (package
14694 (name "python-pyfit-sne")
14695 (version "1.0.1")
14696 (source
14697 (origin
14698 (method git-fetch)
14699 (uri (git-reference
14700 (url "https://github.com/KlugerLab/pyFIt-SNE")
14701 (commit version)))
14702 (file-name (git-file-name name version))
14703 (sha256
14704 (base32 "13wh3qkzs56azmmgnxib6xfr29g7xh09sxylzjpni5j0pp0rc5qw"))))
14705 (build-system python-build-system)
14706 (propagated-inputs
14707 `(("python-numpy" ,python-numpy)))
14708 (inputs
14709 `(("fftw" ,fftw)))
14710 (native-inputs
14711 `(("python-cython" ,python-cython)))
14712 (home-page "https://github.com/KlugerLab/pyFIt-SNE")
14713 (synopsis "FFT-accelerated Interpolation-based t-SNE")
14714 (description
14715 "t-Stochastic Neighborhood Embedding (t-SNE) is a highly successful
14716 method for dimensionality reduction and visualization of high dimensional
14717 datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
14718 approximate the gradient at each iteration of gradient descent. This package
14719 is a Cython wrapper for FIt-SNE.")
14720 (license license:bsd-4)))
14721
14722 (define-public bbmap
14723 (package
14724 (name "bbmap")
14725 (version "35.82")
14726 (source (origin
14727 (method url-fetch)
14728 (uri (string-append
14729 "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz"))
14730 (sha256
14731 (base32
14732 "1q4rfhxcb6z3gm8zg2davjz98w22lkf4hm9ikxz9kdl93pil3wkd"))))
14733 (build-system ant-build-system)
14734 (arguments
14735 `(#:build-target "dist"
14736 #:tests? #f ; there are none
14737 #:make-flags
14738 (list (string-append "-Dmpijar="
14739 (assoc-ref %build-inputs "java-openmpi")
14740 "/lib/mpi.jar"))
14741 #:modules ((guix build ant-build-system)
14742 (guix build utils)
14743 (guix build java-utils))
14744 #:phases
14745 (modify-phases %standard-phases
14746 (add-after 'build 'build-jni-library
14747 (lambda _
14748 (with-directory-excursion "jni"
14749 (invoke "make" "-f" "makefile.linux"))))
14750 ;; There is no install target
14751 (replace 'install (install-jars "dist"))
14752 (add-after 'install 'install-scripts-and-documentation
14753 (lambda* (#:key outputs #:allow-other-keys)
14754 (substitute* "calcmem.sh"
14755 (("\\| awk ") (string-append "| " (which "awk") " ")))
14756 (let* ((scripts (find-files "." "\\.sh$"))
14757 (out (assoc-ref outputs "out"))
14758 (bin (string-append out "/bin"))
14759 (doc (string-append out "/share/doc/bbmap"))
14760 (jni (string-append out "/lib/jni")))
14761 (substitute* scripts
14762 (("\\$DIR\"\"docs") doc)
14763 (("^CP=.*")
14764 (string-append "CP=" out "/share/java/BBTools.jar\n"))
14765 (("^NATIVELIBDIR.*")
14766 (string-append "NATIVELIBDIR=" jni "\n"))
14767 (("CMD=\"java")
14768 (string-append "CMD=\"" (which "java"))))
14769 (for-each (lambda (script) (install-file script bin)) scripts)
14770
14771 ;; Install JNI library
14772 (install-file "jni/libbbtoolsjni.so" jni)
14773
14774 ;; Install documentation
14775 (install-file "docs/readme.txt" doc)
14776 (copy-recursively "docs/guides" doc))
14777 #t)))
14778 #:jdk ,openjdk11))
14779 (inputs
14780 `(("gawk" ,gawk)
14781 ("java-eclipse-jdt-core" ,java-eclipse-jdt-core)
14782 ("java-eclipse-jdt-compiler-apt" ,java-eclipse-jdt-compiler-apt)
14783 ("java-openmpi" ,java-openmpi)))
14784 (home-page "https://sourceforge.net/projects/bbmap/")
14785 (synopsis "Aligner and other tools for short sequencing reads")
14786 (description
14787 "This package provides bioinformatic tools to align, deduplicate,
14788 reformat, filter and normalize DNA and RNA-seq data. It includes the
14789 following tools: BBMap, a short read aligner for DNA and RNA-seq data; BBNorm,
14790 a kmer-based error-correction and normalization tool; Dedupe, a tool to
14791 simplify assemblies by removing duplicate or contained subsequences that share
14792 a target percent identity; Reformat, to convert reads between
14793 fasta/fastq/scarf/fasta+qual/sam, interleaved/paired, and ASCII-33/64, at over
14794 500 MB/s; and BBDuk, a tool to filter, trim, or mask reads with kmer matches
14795 to an artifact/contaminant file.")
14796 (license license:bsd-3)))
14797
14798 (define-public velvet
14799 (package
14800 (name "velvet")
14801 (version "1.2.10")
14802 (source (origin
14803 (method url-fetch)
14804 (uri (string-append "https://www.ebi.ac.uk/~zerbino/velvet/"
14805 "velvet_" version ".tgz"))
14806 (sha256
14807 (base32
14808 "0h3njwy66p6bx14r3ar1byb0ccaxmxka4c65rn4iybyiqa4d8kc8"))
14809 ;; Delete bundled libraries
14810 (modules '((guix build utils)))
14811 (snippet
14812 '(begin
14813 (delete-file "Manual.pdf")
14814 (delete-file-recursively "third-party")
14815 #t))))
14816 (build-system gnu-build-system)
14817 (arguments
14818 `(#:make-flags '("OPENMP=t")
14819 #:test-target "test"
14820 #:phases
14821 (modify-phases %standard-phases
14822 (delete 'configure)
14823 (add-after 'unpack 'fix-zlib-include
14824 (lambda _
14825 (substitute* "src/binarySequences.c"
14826 (("../third-party/zlib-1.2.3/zlib.h") "zlib.h"))
14827 #t))
14828 (replace 'install
14829 (lambda* (#:key outputs #:allow-other-keys)
14830 (let* ((out (assoc-ref outputs "out"))
14831 (bin (string-append out "/bin"))
14832 (doc (string-append out "/share/doc/velvet")))
14833 (mkdir-p bin)
14834 (mkdir-p doc)
14835 (install-file "velveth" bin)
14836 (install-file "velvetg" bin)
14837 (install-file "Manual.pdf" doc)
14838 (install-file "Columbus_manual.pdf" doc)
14839 #t))))))
14840 (inputs
14841 `(("openmpi" ,openmpi)
14842 ("zlib" ,zlib)))
14843 (native-inputs
14844 `(("texlive" ,(texlive-union (list texlive-latex-graphics
14845 texlive-latex-hyperref)))))
14846 (home-page "https://www.ebi.ac.uk/~zerbino/velvet/")
14847 (synopsis "Nucleic acid sequence assembler for very short reads")
14848 (description
14849 "Velvet is a de novo genomic assembler specially designed for short read
14850 sequencing technologies, such as Solexa or 454. Velvet currently takes in
14851 short read sequences, removes errors then produces high quality unique
14852 contigs. It then uses paired read information, if available, to retrieve the
14853 repeated areas between contigs.")
14854 (license license:gpl2+)))
14855
14856 (define-public python-velocyto
14857 (package
14858 (name "python-velocyto")
14859 (version "0.17.17")
14860 (source
14861 (origin
14862 (method url-fetch)
14863 (uri (pypi-uri "velocyto" version))
14864 (sha256
14865 (base32
14866 "0fgygyzqgrq32dv6a00biq1p1cwi6kbl5iqblxq1kklj6b2mzmhs"))))
14867 (build-system python-build-system)
14868 (native-inputs
14869 `(("python-joblib" ,python-joblib)))
14870 (propagated-inputs
14871 `(("python-click" ,python-click)
14872 ("python-cython" ,python-cython)
14873 ("python-h5py" ,python-h5py)
14874 ("python-loompy" ,python-loompy)
14875 ("python-matplotlib" ,python-matplotlib)
14876 ("python-numba" ,python-numba)
14877 ("python-numpy" ,python-numpy)
14878 ("python-pandas" ,python-pandas)
14879 ("python-pysam" ,python-pysam)
14880 ("python-scikit-learn" ,python-scikit-learn)
14881 ("python-scipy" ,python-scipy)))
14882 (home-page "https://github.com/velocyto-team/velocyto.py")
14883 (synopsis "RNA velocity analysis for single cell RNA-seq data")
14884 (description
14885 "Velocyto is a library for the analysis of RNA velocity. Velocyto
14886 includes a command line tool and an analysis pipeline.")
14887 (license license:bsd-2)))
14888
14889 (define-public arriba
14890 (package
14891 (name "arriba")
14892 (version "1.0.1")
14893 (source
14894 (origin
14895 (method url-fetch)
14896 (uri (string-append "https://github.com/suhrig/arriba/releases/"
14897 "download/v" version "/arriba_v" version ".tar.gz"))
14898 (sha256
14899 (base32
14900 "0jx9656ry766vb8z08m1c3im87b0c82qpnjby9wz4kcz8vn87dx2"))))
14901 (build-system gnu-build-system)
14902 (arguments
14903 `(#:tests? #f ; there are none
14904 #:phases
14905 (modify-phases %standard-phases
14906 (replace 'configure
14907 (lambda* (#:key inputs #:allow-other-keys)
14908 (let ((htslib (assoc-ref inputs "htslib")))
14909 (substitute* "Makefile"
14910 (("-I\\$\\(HTSLIB\\)/htslib")
14911 (string-append "-I" htslib "/include/htslib"))
14912 ((" \\$\\(HTSLIB\\)/libhts.a")
14913 (string-append " " htslib "/lib/libhts.so"))))
14914 (substitute* "run_arriba.sh"
14915 (("^STAR ") (string-append (which "STAR") " "))
14916 (("samtools --version-only")
14917 (string-append (which "samtools") " --version-only"))
14918 (("samtools index")
14919 (string-append (which "samtools") " index"))
14920 (("samtools sort")
14921 (string-append (which "samtools") " sort")))
14922 #t))
14923 (replace 'install
14924 (lambda* (#:key outputs #:allow-other-keys)
14925 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
14926 (install-file "arriba" bin)
14927 (install-file "run_arriba.sh" bin)
14928 (install-file "draw_fusions.R" bin)
14929 (wrap-program (string-append bin "/draw_fusions.R")
14930 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
14931 #t)))))
14932 (inputs
14933 `(("htslib" ,htslib)
14934 ("r-minimal" ,r-minimal)
14935 ("r-circlize" ,r-circlize)
14936 ("r-genomicalignments" ,r-genomicalignments)
14937 ("r-genomicranges" ,r-genomicranges)
14938 ("samtools" ,samtools)
14939 ("star" ,star)
14940 ("zlib" ,zlib)))
14941 (home-page "https://github.com/suhrig/arriba")
14942 (synopsis "Gene fusion detection from RNA-Seq data ")
14943 (description
14944 "Arriba is a command-line tool for the detection of gene fusions from
14945 RNA-Seq data. It was developed for the use in a clinical research setting.
14946 Therefore, short runtimes and high sensitivity were important design criteria.
14947 It is based on the fast STAR aligner and the post-alignment runtime is
14948 typically just around two minutes. In contrast to many other fusion detection
14949 tools which build on STAR, Arriba does not require to reduce the
14950 @code{alignIntronMax} parameter of STAR to detect small deletions.")
14951 ;; All code is under the Expat license with the exception of
14952 ;; "draw_fusions.R", which is under GPLv3.
14953 (license (list license:expat license:gpl3))))
14954
14955 (define-public adapterremoval
14956 (package
14957 (name "adapterremoval")
14958 (version "2.3.0")
14959 (source
14960 (origin
14961 (method git-fetch)
14962 (uri (git-reference
14963 (url "https://github.com/MikkelSchubert/adapterremoval")
14964 (commit (string-append "v" version))))
14965 (file-name (git-file-name name version))
14966 (sha256
14967 (base32
14968 "1nf3ki5pfzalhrx2fr1y6pfqfi133yj2m7q4fj9irf5fb94bapwr"))))
14969 (build-system gnu-build-system)
14970 (arguments
14971 `(#:make-flags (list "COLOR_BUILD=no"
14972 (string-append "PREFIX="
14973 (assoc-ref %outputs "out")))
14974 #:test-target "test"
14975 #:phases
14976 (modify-phases %standard-phases
14977 (delete 'configure))))
14978 (inputs
14979 `(("zlib" ,zlib)))
14980 (home-page "https://adapterremoval.readthedocs.io/")
14981 (synopsis "Rapid sequence adapter trimming, identification, and read merging")
14982 (description
14983 "This program searches for and removes remnant adapter sequences from
14984 @dfn{High-Throughput Sequencing} (HTS) data and (optionally) trims low quality
14985 bases from the 3' end of reads following adapter removal. AdapterRemoval can
14986 analyze both single end and paired end data, and can be used to merge
14987 overlapping paired-ended reads into (longer) consensus sequences.
14988 Additionally, the AdapterRemoval may be used to recover a consensus adapter
14989 sequence for paired-ended data, for which this information is not available.")
14990 (license license:gpl3+)))
14991
14992 (define-public pplacer
14993 (let ((commit "807f6f3"))
14994 (package
14995 (name "pplacer")
14996 ;; The commit should be updated with each version change.
14997 (version "1.1.alpha19")
14998 (source
14999 (origin
15000 (method git-fetch)
15001 (uri (git-reference
15002 (url "https://github.com/matsen/pplacer")
15003 (commit (string-append "v" version))))
15004 (file-name (git-file-name name version))
15005 (sha256
15006 (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn"))))
15007 (build-system ocaml-build-system)
15008 (arguments
15009 `(#:modules ((guix build ocaml-build-system)
15010 (guix build utils)
15011 (ice-9 ftw))
15012 #:phases
15013 (modify-phases %standard-phases
15014 (delete 'configure)
15015 (add-after 'unpack 'fix-build-with-latest-ocaml
15016 (lambda _
15017 (substitute* "myocamlbuild.ml"
15018 (("dep \\[\"c_pam\"\\]" m)
15019 (string-append "flag [\"ocaml\"; \"compile\"] (A \"-unsafe-string\");\n"
15020 m))
15021 (("let run_and_read" m)
15022 (string-append "
15023 let split s ch =
15024 let x = ref [] in
15025 let rec go s =
15026 let pos = String.index s ch in
15027 x := (String.before s pos)::!x;
15028 go (String.after s (pos + 1))
15029 in
15030 try go s
15031 with Not_found -> !x
15032 let split_nl s = split s '\\n'
15033 let before_space s =
15034 try String.before s (String.index s ' ')
15035 with Not_found -> s
15036
15037 " m))
15038 (("run_and_read \"ocamlfind list \\| cut -d' ' -f1\"" m)
15039 (string-append "List.map before_space (split_nl & " m ")"))
15040 ((" blank_sep_strings &") "")
15041 ((" Lexing.from_string &") ""))
15042 #t))
15043 (add-after 'unpack 'replace-bundled-cddlib
15044 (lambda* (#:key inputs #:allow-other-keys)
15045 (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
15046 (local-dir "cddlib_guix"))
15047 (mkdir local-dir)
15048 (with-directory-excursion local-dir
15049 (invoke "tar" "xvf" cddlib-src))
15050 (let ((cddlib-src-folder
15051 (string-append local-dir "/"
15052 (list-ref (scandir local-dir) 2)
15053 "/lib-src")))
15054 (for-each make-file-writable (find-files "cdd_src" ".*"))
15055 (for-each
15056 (lambda (file)
15057 (copy-file file
15058 (string-append "cdd_src/" (basename file))))
15059 (find-files cddlib-src-folder ".*[ch]$")))
15060 #t)))
15061 (add-after 'unpack 'fix-makefile
15062 (lambda _
15063 ;; Remove system calls to 'git'.
15064 (substitute* "Makefile"
15065 (("^DESCRIPT:=pplacer-.*")
15066 (string-append
15067 "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
15068 (substitute* "myocamlbuild.ml"
15069 (("git describe --tags --long .*\\\" with")
15070 (string-append
15071 "echo -n v" ,version "-" ,commit "\" with")))
15072 #t))
15073 (replace 'install
15074 (lambda* (#:key outputs #:allow-other-keys)
15075 (let* ((out (assoc-ref outputs "out"))
15076 (bin (string-append out "/bin")))
15077 (copy-recursively "bin" bin))
15078 #t)))
15079 #:ocaml ,ocaml-4.07
15080 #:findlib ,ocaml4.07-findlib))
15081 (inputs
15082 `(("zlib" ,zlib "static")
15083 ("gsl" ,gsl)
15084 ("ocaml-ounit" ,(package-with-ocaml4.07 ocaml-ounit))
15085 ("ocaml-batteries" ,(package-with-ocaml4.07 ocaml-batteries))
15086 ("ocaml-camlzip" ,(package-with-ocaml4.07 camlzip))
15087 ("ocaml-csv" ,(package-with-ocaml4.07 ocaml-csv))
15088 ("ocaml-sqlite3" ,ocaml4.07-sqlite3)
15089 ("ocaml-xmlm" ,(package-with-ocaml4.07 ocaml-xmlm))
15090 ("ocaml-mcl" ,(package-with-ocaml4.07 ocaml-mcl))
15091 ("ocaml-gsl" ,ocaml4.07-gsl-1)))
15092 (native-inputs
15093 `(("cddlib-src" ,(package-source cddlib))
15094 ("ocamlbuild" ,(package-with-ocaml4.07 ocamlbuild))
15095 ("pkg-config" ,pkg-config)))
15096 (propagated-inputs
15097 `(("pplacer-scripts" ,pplacer-scripts)))
15098 (synopsis "Phylogenetic placement of biological sequences")
15099 (description
15100 "Pplacer places query sequences on a fixed reference phylogenetic tree
15101 to maximize phylogenetic likelihood or posterior probability according to a
15102 reference alignment. Pplacer is designed to be fast, to give useful
15103 information about uncertainty, and to offer advanced visualization and
15104 downstream analysis.")
15105 (home-page "https://matsen.fhcrc.org/pplacer/")
15106 (license license:gpl3))))
15107
15108 ;; This package is installed alongside 'pplacer'. It is a separate package so
15109 ;; that it can use the python-build-system for the scripts that are
15110 ;; distributed alongside the main OCaml binaries.
15111 (define pplacer-scripts
15112 (package
15113 (inherit pplacer)
15114 (name "pplacer-scripts")
15115 (build-system python-build-system)
15116 (arguments
15117 `(#:python ,python-2
15118 #:phases
15119 (modify-phases %standard-phases
15120 (add-after 'unpack 'enter-scripts-dir
15121 (lambda _ (chdir "scripts") #t))
15122 (replace 'check
15123 (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t))
15124 (add-after 'install 'wrap-executables
15125 (lambda* (#:key inputs outputs #:allow-other-keys)
15126 (let* ((out (assoc-ref outputs "out"))
15127 (bin (string-append out "/bin")))
15128 (let ((path (string-append
15129 (assoc-ref inputs "hmmer") "/bin:"
15130 (assoc-ref inputs "infernal") "/bin")))
15131 (display path)
15132 (wrap-program (string-append bin "/refpkg_align.py")
15133 `("PATH" ":" prefix (,path))))
15134 (let ((path (string-append
15135 (assoc-ref inputs "hmmer") "/bin")))
15136 (wrap-program (string-append bin "/hrefpkg_query.py")
15137 `("PATH" ":" prefix (,path)))))
15138 #t)))))
15139 (inputs
15140 `(("infernal" ,infernal)
15141 ("hmmer" ,hmmer)))
15142 (propagated-inputs
15143 `(("python-biopython" ,python2-biopython)
15144 ("taxtastic" ,taxtastic)))
15145 (synopsis "Pplacer Python scripts")))
15146
15147 (define-public python2-checkm-genome
15148 (package
15149 (name "python2-checkm-genome")
15150 (version "1.0.13")
15151 (source
15152 (origin
15153 (method url-fetch)
15154 (uri (pypi-uri "checkm-genome" version))
15155 (sha256
15156 (base32
15157 "0bm8gpxjmzxsxxl8lzwqhgx8g1dlnmp6znz7wv3hgb0gdjbf9dzz"))))
15158 (build-system python-build-system)
15159 (arguments
15160 `(#:python ,python-2
15161 #:tests? #f)) ; some tests are interactive
15162 (propagated-inputs
15163 `(("python-dendropy" ,python2-dendropy)
15164 ("python-matplotlib" ,python2-matplotlib)
15165 ("python-numpy" ,python2-numpy)
15166 ("python-pysam" ,python2-pysam)
15167 ("python-scipy" ,python2-scipy)))
15168 (home-page "https://pypi.org/project/Checkm/")
15169 (synopsis "Assess the quality of putative genome bins")
15170 (description
15171 "CheckM provides a set of tools for assessing the quality of genomes
15172 recovered from isolates, single cells, or metagenomes. It provides robust
15173 estimates of genome completeness and contamination by using collocated sets of
15174 genes that are ubiquitous and single-copy within a phylogenetic lineage.
15175 Assessment of genome quality can also be examined using plots depicting key
15176 genomic characteristics (e.g., GC, coding density) which highlight sequences
15177 outside the expected distributions of a typical genome. CheckM also provides
15178 tools for identifying genome bins that are likely candidates for merging based
15179 on marker set compatibility, similarity in genomic characteristics, and
15180 proximity within a reference genome.")
15181 (license license:gpl3+)))
15182
15183 (define-public umi-tools
15184 (package
15185 (name "umi-tools")
15186 (version "1.0.0")
15187 (source
15188 (origin
15189 (method url-fetch)
15190 (uri (pypi-uri "umi_tools" version))
15191 (sha256
15192 (base32
15193 "08y3vz1vcx09whmbsn722lcs6jl9wyrh9i4p3k8j4cb1i32bij4a"))))
15194 (build-system python-build-system)
15195 (inputs
15196 `(("python-pandas" ,python-pandas)
15197 ("python-future" ,python-future)
15198 ("python-scipy" ,python-scipy)
15199 ("python-matplotlib" ,python-matplotlib)
15200 ("python-regex" ,python-regex)
15201 ("python-pysam" ,python-pysam)))
15202 (native-inputs
15203 `(("python-cython" ,python-cython)))
15204 (home-page "https://github.com/CGATOxford/UMI-tools")
15205 (synopsis "Tools for analyzing unique modular identifiers")
15206 (description "This package provides tools for dealing with @dfn{Unique
15207 Molecular Identifiers} (UMIs) and @dfn{Random Molecular Tags} (RMTs) in
15208 genetic sequences. There are six tools: the @code{extract} and
15209 @code{whitelist} commands are used to prepare a fastq containing UMIs @code{+/-}
15210 cell barcodes for alignment. The remaining commands, @code{group},
15211 @code{dedup}, and @{count}/@code{count_tab}, are used to identify PCR
15212 duplicates using the UMIs and perform different levels of analysis depending
15213 on the needs of the user.")
15214 (license license:expat)))
15215
15216 (define-public ataqv
15217 (package
15218 (name "ataqv")
15219 (version "1.0.0")
15220 (source
15221 (origin
15222 (method git-fetch)
15223 (uri (git-reference
15224 (url "https://github.com/ParkerLab/ataqv")
15225 (commit version)))
15226 (file-name (git-file-name name version))
15227 (sha256
15228 (base32
15229 "031xr6jx1aprh26y5b1lv3gzrlmzg4alfl73vvshymx8cq8asrqi"))))
15230 (build-system gnu-build-system)
15231 (arguments
15232 `(#:make-flags
15233 (list (string-append "prefix=" (assoc-ref %outputs "out"))
15234 (string-append "BOOST_ROOT="
15235 (assoc-ref %build-inputs "boost"))
15236 (string-append "HTSLIB_ROOT="
15237 (assoc-ref %build-inputs "htslib")))
15238 #:test-target "test"
15239 #:phases
15240 (modify-phases %standard-phases
15241 (delete 'configure))))
15242 (inputs
15243 `(("boost" ,boost)
15244 ("htslib" ,htslib)
15245 ("ncurses" ,ncurses)
15246 ("zlib" ,zlib)))
15247 (native-inputs
15248 `(("lcov" ,lcov)))
15249 (home-page "https://github.com/ParkerLab/ataqv")
15250 (synopsis "Toolkit for quality control and visualization of ATAC-seq data")
15251 (description "This package provides a toolkit for measuring and comparing
15252 ATAC-seq results. It was written to make it easier to spot differences that
15253 might be caused by ATAC-seq library prep or sequencing. The main program,
15254 @code{ataqv}, examines aligned reads and reports some basic metrics.")
15255 (license license:gpl3+)))
15256
15257 (define-public r-psiplot
15258 (package
15259 (name "r-psiplot")
15260 (version "2.3.0")
15261 (source
15262 (origin
15263 (method git-fetch)
15264 (uri (git-reference
15265 (url "https://github.com/kcha/psiplot")
15266 (commit (string-append "v" version))))
15267 (file-name (git-file-name name version))
15268 (sha256
15269 (base32 "08438h16cfry5kqh3y9hs8q1b1a8bxhblsm75knviz5r6q0n1jxh"))))
15270 (build-system r-build-system)
15271 (propagated-inputs
15272 `(("r-mass" ,r-mass)
15273 ("r-dplyr" ,r-dplyr)
15274 ("r-tidyr" ,r-tidyr)
15275 ("r-purrr" ,r-purrr)
15276 ("r-readr" ,r-readr)
15277 ("r-magrittr" ,r-magrittr)
15278 ("r-ggplot2" ,r-ggplot2)))
15279 (home-page "https://github.com/kcha/psiplot")
15280 (synopsis "Plot percent spliced-in values of alternatively-spliced exons")
15281 (description
15282 "PSIplot is an R package for generating plots of @dfn{percent
15283 spliced-in} (PSI) values of alternatively-spliced exons that were computed by
15284 vast-tools, an RNA-Seq pipeline for alternative splicing analysis. The plots
15285 are generated using @code{ggplot2}.")
15286 (license license:expat)))
15287
15288 (define-public python-ont-fast5-api
15289 (package
15290 (name "python-ont-fast5-api")
15291 (version "1.4.4")
15292 (source
15293 (origin
15294 (method git-fetch)
15295 (uri (git-reference
15296 (url "https://github.com/nanoporetech/ont_fast5_api")
15297 (commit (string-append "release_" version))))
15298 (file-name (git-file-name name version))
15299 (sha256
15300 (base32
15301 "03cbq4zbbwhll8ml2m9k8sa31mirsvcbjkrq1yna0kkzz9fad5fm"))))
15302 (build-system python-build-system)
15303 (propagated-inputs
15304 `(("python-numpy" ,python-numpy)
15305 ("python-six" ,python-six)
15306 ("python-h5py" ,python-h5py)
15307 ("python-progressbar33" ,python-progressbar33)))
15308 (home-page "https://github.com/nanoporetech/ont_fast5_api")
15309 (synopsis "Interface to HDF5 files of the Oxford Nanopore fast5 file format")
15310 (description
15311 "This package provides a concrete implementation of the fast5 file schema
15312 using the generic @code{h5py} library, plain-named methods to interact with
15313 and reflect the fast5 file schema, and tools to convert between
15314 @code{multi_read} and @code{single_read} formats.")
15315 (license license:mpl2.0)))
15316
15317 (define-public tbsp
15318 (let ((commit "ec8fff4410cfb13a677dbbb95cbbc60217e64907")
15319 (revision "1"))
15320 (package
15321 (name "tbsp")
15322 (version (git-version "1.0.0" revision commit))
15323 (source
15324 (origin
15325 (method git-fetch)
15326 (uri (git-reference
15327 (url "https://github.com/phoenixding/tbsp")
15328 (commit commit)))
15329 (file-name (git-file-name name version))
15330 (sha256
15331 (base32
15332 "025ym14x8gbd6hb55lsinqj6f5qzw36i10klgs7ldzxxd7s39ki1"))))
15333 (build-system python-build-system)
15334 (arguments '(#:tests? #f)) ; no tests included
15335 (inputs
15336 `(("python-matplotlib" ,python-matplotlib)
15337 ("python-networkx" ,python-networkx)
15338 ("python-numpy" ,python-numpy)
15339 ("python-pybigwig" ,python-pybigwig)
15340 ("python-biopython" ,python-biopython)
15341 ("python-scikit-learn" ,python-scikit-learn)
15342 ("python-scipy" ,python-scipy)))
15343 (home-page "https://github.com/phoenixding/tbsp/")
15344 (synopsis "SNP-based trajectory inference")
15345 (description
15346 "Several studies focus on the inference of developmental and response
15347 trajectories from single cell RNA-Seq (scRNA-Seq) data. A number of
15348 computational methods, often referred to as pseudo-time ordering, have been
15349 developed for this task. CRISPR has also been used to reconstruct lineage
15350 trees by inserting random mutations. The tbsp package implements an
15351 alternative method to detect significant, cell type specific sequence
15352 mutations from scRNA-Seq data.")
15353 (license license:expat))))
15354
15355 (define-public tabixpp
15356 (package
15357 (name "tabixpp")
15358 (version "1.1.0")
15359 (source (origin
15360 (method git-fetch)
15361 (uri (git-reference
15362 (url "https://github.com/ekg/tabixpp")
15363 (commit (string-append "v" version))))
15364 (file-name (git-file-name name version))
15365 (sha256
15366 (base32 "1k2a3vbq96ic4lw72iwp5s3mwwc4xhdffjj584yn6l9637q9j1yd"))
15367 (modules '((guix build utils)))
15368 (snippet
15369 `(begin
15370 (delete-file-recursively "htslib") #t))))
15371 (build-system gnu-build-system)
15372 (inputs
15373 `(("htslib" ,htslib)
15374 ("zlib" ,zlib)))
15375 (arguments
15376 `(#:tests? #f ; There are no tests to run.
15377 #:phases
15378 (modify-phases %standard-phases
15379 (delete 'configure) ; There is no configure phase.
15380 ;; The build phase needs overriding the location of htslib.
15381 (replace 'build
15382 (lambda* (#:key inputs #:allow-other-keys)
15383 (let ((htslib-ref (assoc-ref inputs "htslib")))
15384 (invoke "make"
15385 (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a")
15386 (string-append "INCLUDES= -I" htslib-ref "/include/htslib")
15387 "HTS_HEADERS=" ; No need to check for headers here.
15388 (string-append "LIBPATH=-L. -L" htslib-ref "/include")))))
15389 (replace 'install
15390 (lambda* (#:key outputs #:allow-other-keys)
15391 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15392 (install-file "tabix++" bin))
15393 #t)))))
15394 (home-page "https://github.com/ekg/tabixpp")
15395 (synopsis "C++ wrapper around tabix project")
15396 (description "This is a C++ wrapper around the Tabix project which abstracts
15397 some of the details of opening and jumping in tabix-indexed files.")
15398 (license license:expat)))
15399
15400 (define-public smithwaterman
15401 (let ((commit "2610e259611ae4cde8f03c72499d28f03f6d38a7"))
15402 (package
15403 (name "smithwaterman")
15404 (version (git-version "0.0.0" "2" commit))
15405 (source (origin
15406 (method git-fetch)
15407 (uri (git-reference
15408 (url "https://github.com/ekg/smithwaterman/")
15409 (commit commit)))
15410 (file-name (git-file-name name version))
15411 (sha256
15412 (base32 "0i9d8zrxpiracw3mxzd9siybpy62p06rqz9mc2w93arajgbk45bs"))))
15413 (build-system gnu-build-system)
15414 (arguments
15415 `(#:tests? #f ; There are no tests to run.
15416 #:make-flags '("libsw.a" "all")
15417 #:phases
15418 (modify-phases %standard-phases
15419 (delete 'configure) ; There is no configure phase.
15420 (replace 'install
15421 (lambda* (#:key outputs #:allow-other-keys)
15422 (let* ((out (assoc-ref outputs "out"))
15423 (bin (string-append out "/bin"))
15424 (lib (string-append out "/lib")))
15425 (install-file "smithwaterman" bin)
15426 (install-file "libsw.a" lib))
15427 #t)))))
15428 (home-page "https://github.com/ekg/smithwaterman")
15429 (synopsis "Implementation of the Smith-Waterman algorithm")
15430 (description "Implementation of the Smith-Waterman algorithm.")
15431 ;; The licensing terms are unclear: https://github.com/ekg/smithwaterman/issues/9.
15432 (license (list license:gpl2 license:expat)))))
15433
15434 (define-public multichoose
15435 (package
15436 (name "multichoose")
15437 (version "1.0.3")
15438 (source (origin
15439 (method git-fetch)
15440 (uri (git-reference
15441 (url "https://github.com/ekg/multichoose/")
15442 (commit (string-append "v" version))))
15443 (file-name (git-file-name name version))
15444 (sha256
15445 (base32 "0ci5fqvmpamwgxvmyd79ygj6n3bnbl3vc7b6h1sxz58186sm3pfs"))))
15446 (build-system gnu-build-system)
15447 (arguments
15448 `(#:tests? #f ; Tests require node.
15449 #:phases
15450 (modify-phases %standard-phases
15451 (delete 'configure) ; There is no configure phase.
15452 (replace 'install
15453 (lambda* (#:key outputs #:allow-other-keys)
15454 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15455 ;; TODO: There are Python modules for these programs too.
15456 (install-file "multichoose" bin)
15457 (install-file "multipermute" bin))
15458 #t)))))
15459 (home-page "https://github.com/ekg/multichoose")
15460 (synopsis "Efficient loopless multiset combination generation algorithm")
15461 (description "This library implements an efficient loopless multiset
15462 combination generation algorithm which is (approximately) described in
15463 \"Loopless algorithms for generating permutations, combinations, and other
15464 combinatorial configurations.\", G. Ehrlich - Journal of the ACM (JACM),
15465 1973. (Algorithm 7.)")
15466 (license license:expat)))
15467
15468 (define-public fsom
15469 (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3"))
15470 (package
15471 (name "fsom")
15472 (version (git-version "0.0.0" "1" commit))
15473 (source (origin
15474 (method git-fetch)
15475 (uri (git-reference
15476 (url "https://github.com/ekg/fsom/")
15477 (commit commit)))
15478 (file-name (git-file-name name version))
15479 (sha256
15480 (base32 "0gw1lpvr812pywg9y546x0h1hhj261xwls41r6kqhddjlrcjc0pi"))))
15481 (build-system gnu-build-system)
15482 (arguments
15483 `(#:tests? #f ; There are no tests to run.
15484 #:phases
15485 (modify-phases %standard-phases
15486 (delete 'configure) ; There is no configure phase.
15487 (replace 'install
15488 (lambda* (#:key outputs #:allow-other-keys)
15489 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15490 (install-file "fsom" bin))
15491 #t)))))
15492 (home-page "https://github.com/ekg/fsom")
15493 (synopsis "Manage SOM (Self-Organizing Maps) neural networks")
15494 (description "A tiny C library for managing SOM (Self-Organizing Maps)
15495 neural networks.")
15496 (license license:gpl3))))
15497
15498 (define-public fastahack
15499 (package
15500 (name "fastahack")
15501 (version "1.0.0")
15502 (source (origin
15503 (method git-fetch)
15504 (uri (git-reference
15505 (url "https://github.com/ekg/fastahack/")
15506 (commit (string-append "v" version))))
15507 (file-name (git-file-name name version))
15508 (sha256
15509 (base32 "0rp1blskhzxf7vbh253ibpxbgl9wwgyzf1wbkxndi08d3j4vcss9"))))
15510 (build-system gnu-build-system)
15511 (arguments
15512 `(#:tests? #f ; Unclear how to run tests: https://github.com/ekg/fastahack/issues/15
15513 #:phases
15514 (modify-phases %standard-phases
15515 (delete 'configure) ; There is no configure phase.
15516 (replace 'install
15517 (lambda* (#:key outputs #:allow-other-keys)
15518 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15519 (install-file "fastahack" bin))
15520 #t)))))
15521 (home-page "https://github.com/ekg/fastahack")
15522 (synopsis "Indexing and sequence extraction from FASTA files")
15523 (description "Fastahack is a small application for indexing and
15524 extracting sequences and subsequences from FASTA files. The included library
15525 provides a FASTA reader and indexer that can be embedded into applications
15526 which would benefit from directly reading subsequences from FASTA files. The
15527 library automatically handles index file generation and use.")
15528 (license (list license:expat license:gpl2))))
15529
15530 (define-public vcflib
15531 (package
15532 (name "vcflib")
15533 (version "1.0.1")
15534 (source
15535 (origin
15536 (method url-fetch)
15537 (uri (string-append "https://github.com/vcflib/vcflib/releases/"
15538 "download/v" version
15539 "/vcflib-" version "-src.tar.gz"))
15540 (sha256
15541 (base32 "14zzrg8hg8cq9cvq2wdvp21j7nmxxkjrbagw2apd2yqv2kyx42lm"))
15542 (modules '((guix build utils)))
15543 (snippet
15544 `(begin
15545 (for-each delete-file-recursively
15546 '("fastahack" "filevercmp" "fsom" "googletest" "intervaltree"
15547 "libVCFH" "multichoose" "smithwaterman" "tabixpp"))
15548 #t))))
15549 (build-system gnu-build-system)
15550 (inputs
15551 `(("htslib" ,htslib)
15552 ("perl" ,perl)
15553 ("python" ,python)
15554 ("zlib" ,zlib)))
15555 (native-inputs
15556 `(;; Submodules.
15557 ;; This package builds against the .o files so we need to extract the source.
15558 ("fastahack-src" ,(package-source fastahack))
15559 ("filevercmp-src" ,(package-source filevercmp))
15560 ("fsom-src" ,(package-source fsom))
15561 ("intervaltree-src" ,(package-source intervaltree))
15562 ("multichoose-src" ,(package-source multichoose))
15563 ("smithwaterman-src" ,(package-source smithwaterman))
15564 ("tabixpp-src" ,(package-source tabixpp))))
15565 (arguments
15566 `(#:tests? #f ; no tests
15567 #:make-flags (list (string-append "HTS_LIB="
15568 (assoc-ref %build-inputs "htslib")
15569 "/lib/libhts.a")
15570 (string-append "HTS_INCLUDES= -I"
15571 (assoc-ref %build-inputs "htslib")
15572 "/include/htslib")
15573 (string-append "HTS_LDFLAGS= -L"
15574 (assoc-ref %build-inputs "htslib")
15575 "/include/htslib" " -lhts"))
15576 #:phases
15577 (modify-phases %standard-phases
15578 (delete 'configure)
15579 (delete 'check)
15580 (add-after 'unpack 'unpack-submodule-sources
15581 (lambda* (#:key inputs #:allow-other-keys)
15582 (let ((unpack (lambda (source target)
15583 (mkdir target)
15584 (with-directory-excursion target
15585 (if (file-is-directory? (assoc-ref inputs source))
15586 (copy-recursively (assoc-ref inputs source) ".")
15587 (invoke "tar" "xvf"
15588 (assoc-ref inputs source)
15589 "--strip-components=1"))))))
15590 (and
15591 (unpack "fastahack-src" "fastahack")
15592 (unpack "filevercmp-src" "filevercmp")
15593 (unpack "fsom-src" "fsom")
15594 (unpack "intervaltree-src" "intervaltree")
15595 (unpack "multichoose-src" "multichoose")
15596 (unpack "smithwaterman-src" "smithwaterman")
15597 (unpack "tabixpp-src" "tabixpp")))))
15598 (replace 'build
15599 (lambda* (#:key inputs make-flags #:allow-other-keys)
15600 (let ((htslib (assoc-ref inputs "htslib")))
15601 (with-directory-excursion "tabixpp"
15602 (substitute* "Makefile"
15603 (("-Ihtslib") (string-append "-I" htslib "/include/htslib"))
15604 (("-Lhtslib") (string-append "-L" htslib "/lib/htslib"))
15605 (("htslib/htslib") (string-append htslib "/include/htslib")))
15606 (invoke "make"
15607 (string-append "HTS_LIB=" htslib "/lib/libhts.a")))
15608 (apply invoke "make" "CC=gcc" "CFLAGS=-Itabixpp" make-flags))))
15609 (replace 'install
15610 (lambda* (#:key outputs #:allow-other-keys)
15611 (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
15612 (lib (string-append (assoc-ref outputs "out") "/lib")))
15613 (for-each (lambda (file)
15614 (install-file file bin))
15615 (find-files "bin" ".*"))
15616 ;; The header files in src/ do not interface libvcflib,
15617 ;; therefore they are left out.
15618 (install-file "libvcflib.a" lib))
15619 #t)))))
15620 (home-page "https://github.com/vcflib/vcflib/")
15621 (synopsis "Library for parsing and manipulating VCF files")
15622 (description "Vcflib provides methods to manipulate and interpret
15623 sequence variation as it can be described by VCF. It is both an API for parsing
15624 and operating on records of genomic variation as it can be described by the VCF
15625 format, and a collection of command-line utilities for executing complex
15626 manipulations on VCF files.")
15627 (license license:expat)))
15628
15629 (define-public freebayes
15630 (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb")
15631 (revision "1")
15632 (version "1.0.2"))
15633 (package
15634 (name "freebayes")
15635 (version (git-version version revision commit))
15636 (source (origin
15637 (method git-fetch)
15638 (uri (git-reference
15639 (url "https://github.com/ekg/freebayes")
15640 (commit commit)))
15641 (file-name (git-file-name name version))
15642 (sha256
15643 (base32 "1sbzwmcbn78ybymjnhwk7qc5r912azy5vqz2y7y81616yc3ba2a2"))))
15644 (build-system gnu-build-system)
15645 (inputs
15646 `(("bamtools" ,bamtools)
15647 ("htslib" ,htslib)
15648 ("zlib" ,zlib)))
15649 (native-inputs
15650 `(("bc" ,bc) ; Needed for running tests.
15651 ("samtools" ,samtools) ; Needed for running tests.
15652 ("parallel" ,parallel) ; Needed for running tests.
15653 ("perl" ,perl) ; Needed for running tests.
15654 ("procps" ,procps) ; Needed for running tests.
15655 ("python" ,python-2) ; Needed for running tests.
15656 ("vcflib-src" ,(package-source vcflib))
15657 ;; These are submodules for the vcflib version used in freebayes.
15658 ;; This package builds against the .o files so we need to extract the source.
15659 ("tabixpp-src" ,(package-source tabixpp))
15660 ("smithwaterman-src" ,(package-source smithwaterman))
15661 ("multichoose-src" ,(package-source multichoose))
15662 ("fsom-src" ,(package-source fsom))
15663 ("filevercmp-src" ,(package-source filevercmp))
15664 ("fastahack-src" ,(package-source fastahack))
15665 ("intervaltree-src" ,(package-source intervaltree))
15666 ;; These submodules are needed to run the tests.
15667 ("bash-tap-src" ,(package-source bash-tap))
15668 ("test-simple-bash-src"
15669 ,(origin
15670 (method git-fetch)
15671 (uri (git-reference
15672 (url "https://github.com/ingydotnet/test-simple-bash/")
15673 (commit "124673ff204b01c8e96b7fc9f9b32ee35d898acc")))
15674 (file-name "test-simple-bash-src-checkout")
15675 (sha256
15676 (base32 "043plp6z0x9yf7mdpky1fw7zcpwn1p47px95w9mh16603zqqqpga"))))))
15677 (arguments
15678 `(#:make-flags
15679 (list "CC=gcc"
15680 (string-append "BAMTOOLS_ROOT="
15681 (assoc-ref %build-inputs "bamtools")))
15682 #:test-target "test"
15683 #:phases
15684 (modify-phases %standard-phases
15685 (delete 'configure)
15686 (add-after 'unpack 'fix-tests
15687 (lambda _
15688 (substitute* "test/t/01_call_variants.t"
15689 (("grep -P \"\\(\\\\t500\\$\\|\\\\t11000\\$\\|\\\\t1000\\$\\)\"")
15690 "grep -E ' (500|11000|1000)$'"))
15691 #t))
15692 (add-after 'unpack 'unpack-submodule-sources
15693 (lambda* (#:key inputs #:allow-other-keys)
15694 (let ((unpack (lambda (source target)
15695 (with-directory-excursion target
15696 (if (file-is-directory? (assoc-ref inputs source))
15697 (copy-recursively (assoc-ref inputs source) ".")
15698 (invoke "tar" "xvf"
15699 (assoc-ref inputs source)
15700 "--strip-components=1"))))))
15701 (and
15702 (unpack "vcflib-src" "vcflib")
15703 (unpack "fastahack-src" "vcflib/fastahack")
15704 (unpack "filevercmp-src" "vcflib/filevercmp")
15705 (unpack "fsom-src" "vcflib/fsom")
15706 (unpack "intervaltree-src" "vcflib/intervaltree")
15707 (unpack "multichoose-src" "vcflib/multichoose")
15708 (unpack "smithwaterman-src" "vcflib/smithwaterman")
15709 (unpack "tabixpp-src" "vcflib/tabixpp")
15710 (unpack "test-simple-bash-src" "test/test-simple-bash")
15711 (unpack "bash-tap-src" "test/bash-tap")))))
15712 (add-after 'unpack-submodule-sources 'fix-makefiles
15713 (lambda _
15714 ;; We don't have the .git folder to get the version tag from.
15715 (substitute* "vcflib/Makefile"
15716 (("^GIT_VERSION.*")
15717 (string-append "GIT_VERSION = v" ,version)))
15718 (substitute* "src/Makefile"
15719 (("-I\\$\\(BAMTOOLS_ROOT\\)/src")
15720 "-I$(BAMTOOLS_ROOT)/include/bamtools"))
15721 #t))
15722 (add-before 'build 'build-tabixpp-and-vcflib
15723 (lambda* (#:key inputs make-flags #:allow-other-keys)
15724 (with-directory-excursion "vcflib"
15725 (with-directory-excursion "tabixpp"
15726 (apply invoke "make"
15727 (string-append "HTS_LIB="
15728 (assoc-ref inputs "htslib")
15729 "/lib/libhts.a")
15730 make-flags))
15731 (apply invoke "make"
15732 (string-append "CFLAGS=-Itabixpp")
15733 "all"
15734 make-flags))))
15735 (replace 'install
15736 (lambda* (#:key outputs #:allow-other-keys)
15737 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15738 (install-file "bin/freebayes" bin)
15739 (install-file "bin/bamleftalign" bin))
15740 #t)))))
15741 (home-page "https://github.com/ekg/freebayes")
15742 (synopsis "Haplotype-based variant detector")
15743 (description "FreeBayes is a Bayesian genetic variant detector designed to
15744 find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
15745 indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
15746 complex events (composite insertion and substitution events) smaller than the
15747 length of a short-read sequencing alignment.")
15748 (license license:expat))))
15749
15750 (define-public samblaster
15751 (package
15752 (name "samblaster")
15753 (version "0.1.24")
15754 (source (origin
15755 (method git-fetch)
15756 (uri (git-reference
15757 (url "https://github.com/GregoryFaust/samblaster")
15758 (commit (string-append "v." version))))
15759 (file-name (git-file-name name version))
15760 (sha256
15761 (base32
15762 "0iv2ddfw8363vb2x8gr3p8g88whb6mb9m0pf71i2cqsbv6jghap7"))))
15763 (build-system gnu-build-system)
15764 (arguments
15765 `(#:tests? #f ; there are none
15766 #:phases
15767 (modify-phases %standard-phases
15768 (delete 'configure) ; There is no configure phase.
15769 (replace 'install
15770 (lambda* (#:key outputs #:allow-other-keys)
15771 (install-file "samblaster"
15772 (string-append (assoc-ref outputs "out") "/bin"))
15773 #t)))))
15774 (home-page "https://github.com/GregoryFaust/samblaster")
15775 (synopsis "Mark duplicates in paired-end SAM files")
15776 (description "Samblaster is a fast and flexible program for marking
15777 duplicates in read-id grouped paired-end SAM files. It can also optionally
15778 output discordant read pairs and/or split read mappings to separate SAM files,
15779 and/or unmapped/clipped reads to a separate FASTQ file. When marking
15780 duplicates, samblaster will require approximately 20MB of memory per 1M read
15781 pairs.")
15782 (license license:expat)))
15783
15784 (define-public r-velocyto
15785 (let ((commit "d7790346cb99f49ab9c2b23ba70dcf9d2c9fc350")
15786 (revision "1"))
15787 (package
15788 (name "r-velocyto")
15789 (version (git-version "0.6" revision commit))
15790 (source
15791 (origin
15792 (method git-fetch)
15793 (uri (git-reference
15794 (url "https://github.com/velocyto-team/velocyto.R")
15795 (commit commit)))
15796 (file-name (git-file-name name version))
15797 (sha256
15798 (base32
15799 "16wqf70j7rd7pay2q513iyz12i8n9vrpg1bisah4lddbcpx5dz1n"))))
15800 (build-system r-build-system)
15801 (inputs
15802 `(("boost" ,boost)))
15803 (propagated-inputs
15804 `(("r-hdf5r" ,r-hdf5r)
15805 ("r-mass" ,r-mass)
15806 ("r-mgcv" ,r-mgcv)
15807 ("r-pcamethods" ,r-pcamethods)
15808 ("r-rcpp" ,r-rcpp)
15809 ("r-rcpparmadillo" ,r-rcpparmadillo)
15810 ;; Suggested packages
15811 ("r-rtsne" ,r-rtsne)
15812 ("r-cluster" ,r-cluster)
15813 ("r-abind" ,r-abind)
15814 ("r-h5" ,r-h5)
15815 ("r-biocgenerics" ,r-biocgenerics)
15816 ("r-genomicalignments" ,r-genomicalignments)
15817 ("r-rsamtools" ,r-rsamtools)
15818 ("r-edger" ,r-edger)
15819 ("r-igraph" ,r-igraph)))
15820 (home-page "https://velocyto.org")
15821 (synopsis "RNA velocity estimation in R")
15822 (description
15823 "This package provides basic routines for estimation of gene-specific
15824 transcriptional derivatives and visualization of the resulting velocity
15825 patterns.")
15826 (license license:gpl3))))
15827
15828 (define-public methyldackel
15829 (package
15830 (name "methyldackel")
15831 (version "0.4.0")
15832 (source (origin
15833 (method git-fetch)
15834 (uri (git-reference
15835 (url "https://github.com/dpryan79/MethylDackel")
15836 (commit version)))
15837 (file-name (git-file-name name version))
15838 (sha256
15839 (base32
15840 "10gh8k0ca92kywnrw5pkacq3g6r8s976s12k8jhp8g3g49q9a97g"))))
15841 (build-system gnu-build-system)
15842 (arguments
15843 `(#:test-target "test"
15844 #:make-flags
15845 (list "CC=gcc"
15846 (string-append "prefix="
15847 (assoc-ref %outputs "out") "/bin/"))
15848 #:phases
15849 (modify-phases %standard-phases
15850 (replace 'configure
15851 (lambda* (#:key outputs #:allow-other-keys)
15852 (substitute* "Makefile"
15853 (("install MethylDackel \\$\\(prefix\\)" match)
15854 (string-append "install -d $(prefix); " match)))
15855 #t)))))
15856 (inputs
15857 `(("htslib" ,htslib)
15858 ("zlib" ,zlib)))
15859 ;; Needed for tests
15860 (native-inputs
15861 `(("python" ,python-wrapper)))
15862 (home-page "https://github.com/dpryan79/MethylDackel")
15863 (synopsis "Universal methylation extractor for BS-seq experiments")
15864 (description
15865 "MethylDackel will process a coordinate-sorted and indexed BAM or CRAM
15866 file containing some form of BS-seq alignments and extract per-base
15867 methylation metrics from them. MethylDackel requires an indexed fasta file
15868 containing the reference genome as well.")
15869 ;; See https://github.com/dpryan79/MethylDackel/issues/85
15870 (license license:expat)))
15871
15872 (define-public python-gffutils
15873 ;; The latest release is older more than a year than the latest commit
15874 (let ((commit "4034c54600813b1402945e12faa91b3a53162cf1")
15875 (revision "1"))
15876 (package
15877 (name "python-gffutils")
15878 (version (git-version "0.9" revision commit))
15879 (source
15880 (origin
15881 (method git-fetch)
15882 (uri (git-reference
15883 (url "https://github.com/daler/gffutils")
15884 (commit commit)))
15885 (file-name (git-file-name name version))
15886 (sha256
15887 (base32
15888 "1rwafjdnbir5wnk0ap06ww4lra3p5frhy7mfs03rlldgfnwxymsn"))))
15889 (build-system python-build-system)
15890 (arguments
15891 `(#:phases
15892 (modify-phases %standard-phases
15893 (replace 'check
15894 (lambda _
15895 ;; Tests need to access the HOME directory
15896 (setenv "HOME" "/tmp")
15897 (invoke "nosetests" "-a" "!slow")))
15898 (add-after 'unpack 'make-gz-files-writable
15899 (lambda _
15900 (for-each make-file-writable
15901 (find-files "." "\\.gz"))
15902 #t)))))
15903 (propagated-inputs
15904 `(("python-argcomplete" ,python-argcomplete)
15905 ("python-argh" ,python-argh)
15906 ("python-biopython" ,python-biopython)
15907 ("python-pybedtools" ,python-pybedtools)
15908 ("python-pyfaidx" ,python-pyfaidx)
15909 ("python-simplejson" ,python-simplejson)
15910 ("python-six" ,python-six)))
15911 (native-inputs
15912 `(("python-nose" , python-nose)))
15913 (home-page "https://github.com/daler/gffutils")
15914 (synopsis "Tool for manipulation of GFF and GTF files")
15915 (description
15916 "python-gffutils is a Python package for working with and manipulating
15917 the GFF and GTF format files typically used for genomic annotations. The
15918 files are loaded into a SQLite database, allowing much more complex
15919 manipulation of hierarchical features (e.g., genes, transcripts, and exons)
15920 than is possible with plain-text methods alone.")
15921 (license license:expat))))
15922
15923 (define-public libsbml
15924 (package
15925 (name "libsbml")
15926 (version "5.18.0")
15927 (source (origin
15928 (method url-fetch)
15929 (uri (string-append "mirror://sourceforge/sbml/libsbml/"
15930 version "/stable/libSBML-"
15931 version "-core-src.tar.gz"))
15932 (sha256
15933 (base32
15934 "0slkagrk3nfi2qsksv6b1brj6zhx4bj4bkib2sdycvrcd10ql2lh"))))
15935 (build-system cmake-build-system)
15936 (arguments
15937 `(#:test-target "test"
15938 #:configure-flags
15939 (list "-DWITH_CHECK=ON"
15940 (string-append "-DLIBXML_LIBRARY="
15941 (assoc-ref %build-inputs "libxml2")
15942 "/lib/libxml2.so")
15943 (string-append "-DLIBXML_INCLUDE_DIR="
15944 (assoc-ref %build-inputs "libxml2")
15945 "/include/libxml2"))))
15946 (propagated-inputs
15947 `(("libxml2" ,libxml2)))
15948 (native-inputs
15949 `(("check" ,check)
15950 ("swig" ,swig)))
15951 (home-page "http://sbml.org/Software/libSBML")
15952 (synopsis "Process SBML files and data streams")
15953 (description "LibSBML is a library to help you read, write, manipulate,
15954 translate, and validate SBML files and data streams. The @dfn{Systems Biology
15955 Markup Language} (SBML) is an interchange format for computer models of
15956 biological processes. SBML is useful for models of metabolism, cell
15957 signaling, and more. It continues to be evolved and expanded by an
15958 international community.")
15959 (license license:lgpl2.1+)))
15960
15961 (define-public grocsvs
15962 ;; The last release is out of date and new features have been added.
15963 (let ((commit "ecd956a65093a0b2c41849050e4512d46fecea5d")
15964 (revision "1"))
15965 (package
15966 (name "grocsvs")
15967 (version (git-version "0.2.6.1" revision commit))
15968 (source (origin
15969 (method git-fetch)
15970 (uri (git-reference
15971 (url "https://github.com/grocsvs/grocsvs")
15972 (commit commit)))
15973 (file-name (git-file-name name version))
15974 (sha256
15975 (base32 "14505725gr7qxc17cxxf0k6lzcwmgi64pija4mwf29aw70qn35cc"))
15976 (patches (search-patches "grocsvs-dont-use-admiral.patch"))))
15977 (build-system python-build-system)
15978 (arguments
15979 `(#:tests? #f ; No test suite.
15980 #:python ,python-2)) ; Only python-2 supported.
15981 (inputs
15982 `(("python2-h5py" ,python2-h5py)
15983 ("python2-ipython-cluster-helper" ,python2-ipython-cluster-helper)
15984 ("python2-networkx" ,python2-networkx)
15985 ("python2-psutil" ,python2-psutil)
15986 ("python2-pandas" ,python2-pandas)
15987 ("python2-pybedtools" ,python2-pybedtools)
15988 ("python2-pyfaidx" ,python2-pyfaidx)
15989 ("python2-pygraphviz" ,python2-pygraphviz)
15990 ("python2-pysam" ,python2-pysam)
15991 ("python2-scipy" ,python2-scipy)))
15992 (home-page "https://github.com/grocsvs/grocsvs")
15993 (synopsis "Genome-wide reconstruction of complex structural variants")
15994 (description
15995 "@dfn{Genome-wide Reconstruction of Complex Structural Variants}
15996 (GROC-SVs) is a software pipeline for identifying large-scale structural
15997 variants, performing sequence assembly at the breakpoints, and reconstructing
15998 the complex structural variants using the long-fragment information from the
15999 10x Genomics platform.")
16000 (license license:expat))))