gnu: Add go-github-com-emirpasic-gods-containers.
[jackhill/guix/guix.git] / gnu / packages / bioinformatics.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2018 Ben Woodcroft <donttrustben@gmail.com>
4 ;;; Copyright © 2015, 2016 Pjotr Prins <pjotr.guix@thebird.nl>
5 ;;; Copyright © 2015 Andreas Enge <andreas@enge.fr>
6 ;;; Copyright © 2016, 2020 Roel Janssen <roel@gnu.org>
7 ;;; Copyright © 2016, 2017, 2018, 2019, 2020 Efraim Flashner <efraim@flashner.co.il>
8 ;;; Copyright © 2016, 2020 Marius Bakke <mbakke@fastmail.com>
9 ;;; Copyright © 2016, 2018 Raoul Bonnal <ilpuccio.febo@gmail.com>
10 ;;; Copyright © 2017, 2018 Tobias Geerinckx-Rice <me@tobias.gr>
11 ;;; Copyright © 2017 Arun Isaac <arunisaac@systemreboot.net>
12 ;;; Copyright © 2018 Joshua Sierles, Nextjournal <joshua@nextjournal.com>
13 ;;; Copyright © 2018 Gábor Boskovits <boskovits@gmail.com>
14 ;;; Copyright © 2018, 2019 Mădălin Ionel Patrașcu <madalinionel.patrascu@mdc-berlin.de>
15 ;;; Copyright © 2019 Maxim Cournoyer <maxim.cournoyer@gmail.com>
16 ;;; Copyright © 2019 Brian Leung <bkleung89@gmail.com>
17 ;;; Copyright © 2019 Brett Gilio <brettg@gnu.org>
18 ;;; Copyright © 2020 Björn Höfling <bjoern.hoefling@bjoernhoefling.de>
19 ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
20 ;;; Copyright © 2020 Pierre Langlois <pierre.langlois@gmx.com>
21 ;;;
22 ;;; This file is part of GNU Guix.
23 ;;;
24 ;;; GNU Guix is free software; you can redistribute it and/or modify it
25 ;;; under the terms of the GNU General Public License as published by
26 ;;; the Free Software Foundation; either version 3 of the License, or (at
27 ;;; your option) any later version.
28 ;;;
29 ;;; GNU Guix is distributed in the hope that it will be useful, but
30 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
31 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32 ;;; GNU General Public License for more details.
33 ;;;
34 ;;; You should have received a copy of the GNU General Public License
35 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
36
37 (define-module (gnu packages bioinformatics)
38 #:use-module ((guix licenses) #:prefix license:)
39 #:use-module (guix packages)
40 #:use-module (guix utils)
41 #:use-module (guix download)
42 #:use-module (guix git-download)
43 #:use-module (guix hg-download)
44 #:use-module (guix build-system ant)
45 #:use-module (guix build-system gnu)
46 #:use-module (guix build-system cmake)
47 #:use-module (guix build-system go)
48 #:use-module (guix build-system haskell)
49 #:use-module (guix build-system meson)
50 #:use-module (guix build-system ocaml)
51 #:use-module (guix build-system perl)
52 #:use-module (guix build-system python)
53 #:use-module (guix build-system r)
54 #:use-module (guix build-system ruby)
55 #:use-module (guix build-system scons)
56 #:use-module (guix build-system trivial)
57 #:use-module (guix deprecation)
58 #:use-module (gnu packages)
59 #:use-module (gnu packages autotools)
60 #:use-module (gnu packages algebra)
61 #:use-module (gnu packages base)
62 #:use-module (gnu packages bash)
63 #:use-module (gnu packages bison)
64 #:use-module (gnu packages bioconductor)
65 #:use-module (gnu packages boost)
66 #:use-module (gnu packages check)
67 #:use-module (gnu packages code)
68 #:use-module (gnu packages compression)
69 #:use-module (gnu packages cpio)
70 #:use-module (gnu packages cran)
71 #:use-module (gnu packages curl)
72 #:use-module (gnu packages documentation)
73 #:use-module (gnu packages databases)
74 #:use-module (gnu packages datastructures)
75 #:use-module (gnu packages dlang)
76 #:use-module (gnu packages file)
77 #:use-module (gnu packages flex)
78 #:use-module (gnu packages gawk)
79 #:use-module (gnu packages gcc)
80 #:use-module (gnu packages gd)
81 #:use-module (gnu packages golang)
82 #:use-module (gnu packages glib)
83 #:use-module (gnu packages graph)
84 #:use-module (gnu packages graphviz)
85 #:use-module (gnu packages groff)
86 #:use-module (gnu packages gtk)
87 #:use-module (gnu packages guile)
88 #:use-module (gnu packages guile-xyz)
89 #:use-module (gnu packages haskell-check)
90 #:use-module (gnu packages haskell-web)
91 #:use-module (gnu packages haskell-xyz)
92 #:use-module (gnu packages image)
93 #:use-module (gnu packages imagemagick)
94 #:use-module (gnu packages java)
95 #:use-module (gnu packages java-compression)
96 #:use-module (gnu packages jemalloc)
97 #:use-module (gnu packages linux)
98 #:use-module (gnu packages lisp-xyz)
99 #:use-module (gnu packages logging)
100 #:use-module (gnu packages machine-learning)
101 #:use-module (gnu packages man)
102 #:use-module (gnu packages maths)
103 #:use-module (gnu packages mpi)
104 #:use-module (gnu packages ncurses)
105 #:use-module (gnu packages ocaml)
106 #:use-module (gnu packages pcre)
107 #:use-module (gnu packages parallel)
108 #:use-module (gnu packages pdf)
109 #:use-module (gnu packages perl)
110 #:use-module (gnu packages perl-check)
111 #:use-module (gnu packages pkg-config)
112 #:use-module (gnu packages popt)
113 #:use-module (gnu packages protobuf)
114 #:use-module (gnu packages python)
115 #:use-module (gnu packages python-compression)
116 #:use-module (gnu packages python-science)
117 #:use-module (gnu packages python-web)
118 #:use-module (gnu packages python-xyz)
119 #:use-module (gnu packages readline)
120 #:use-module (gnu packages ruby)
121 #:use-module (gnu packages serialization)
122 #:use-module (gnu packages shells)
123 #:use-module (gnu packages sphinx)
124 #:use-module (gnu packages statistics)
125 #:use-module (gnu packages swig)
126 #:use-module (gnu packages tbb)
127 #:use-module (gnu packages tex)
128 #:use-module (gnu packages texinfo)
129 #:use-module (gnu packages textutils)
130 #:use-module (gnu packages time)
131 #:use-module (gnu packages tls)
132 #:use-module (gnu packages vim)
133 #:use-module (gnu packages web)
134 #:use-module (gnu packages xml)
135 #:use-module (gnu packages xorg)
136 #:use-module (srfi srfi-1)
137 #:use-module (ice-9 match))
138
139 (define-public aragorn
140 (package
141 (name "aragorn")
142 (version "1.2.38")
143 (source (origin
144 (method url-fetch)
145 (uri (string-append
146 "http://mbio-serv2.mbioekol.lu.se/ARAGORN/Downloads/aragorn"
147 version ".tgz"))
148 (sha256
149 (base32
150 "09i1rg716smlbnixfm7q1ml2mfpaa2fpn3hwjg625ysmfwwy712b"))))
151 (build-system gnu-build-system)
152 (arguments
153 `(#:tests? #f ; there are no tests
154 #:phases
155 (modify-phases %standard-phases
156 (delete 'configure)
157 (replace 'build
158 (lambda _
159 (invoke "gcc"
160 "-O3"
161 "-ffast-math"
162 "-finline-functions"
163 "-o"
164 "aragorn"
165 (string-append "aragorn" ,version ".c"))
166 #t))
167 (replace 'install
168 (lambda* (#:key outputs #:allow-other-keys)
169 (let* ((out (assoc-ref outputs "out"))
170 (bin (string-append out "/bin"))
171 (man (string-append out "/share/man/man1")))
172 (install-file "aragorn" bin)
173 (install-file "aragorn.1" man))
174 #t)))))
175 (home-page "http://mbio-serv2.mbioekol.lu.se/ARAGORN")
176 (synopsis "Detect tRNA, mtRNA and tmRNA genes in nucleotide sequences")
177 (description
178 "Aragorn identifies transfer RNA, mitochondrial RNA and
179 transfer-messenger RNA from nucleotide sequences, based on homology to known
180 tRNA consensus sequences and RNA structure. It also outputs the secondary
181 structure of the predicted RNA.")
182 (license license:gpl2)))
183
184 (define-public bamm
185 (package
186 (name "bamm")
187 (version "1.7.3")
188 (source (origin
189 (method git-fetch)
190 ;; BamM is not available on pypi.
191 (uri (git-reference
192 (url "https://github.com/Ecogenomics/BamM")
193 (commit version)
194 (recursive? #t)))
195 (file-name (git-file-name name version))
196 (sha256
197 (base32
198 "1p83ahi984ipslxlg4yqy1gdnya9rkn1v71z8djgxkm9d2chw4c5"))
199 (modules '((guix build utils)))
200 (snippet
201 `(begin
202 ;; Delete bundled htslib.
203 (delete-file-recursively "c/htslib-1.3.1")
204 #t))))
205 (build-system python-build-system)
206 (arguments
207 `(#:python ,python-2 ; BamM is Python 2 only.
208 ;; Do not use bundled libhts. Do use the bundled libcfu because it has
209 ;; been modified from its original form.
210 #:configure-flags
211 (let ((htslib (assoc-ref %build-inputs "htslib")))
212 (list "--with-libhts-lib" (string-append htslib "/lib")
213 "--with-libhts-inc" (string-append htslib "/include/htslib")))
214 #:phases
215 (modify-phases %standard-phases
216 (add-after 'unpack 'autogen
217 (lambda _
218 (with-directory-excursion "c"
219 (let ((sh (which "sh")))
220 (for-each make-file-writable (find-files "." ".*"))
221 ;; Use autogen so that 'configure' works.
222 (substitute* "autogen.sh" (("/bin/sh") sh))
223 (setenv "CONFIG_SHELL" sh)
224 (invoke "./autogen.sh")))
225 #t))
226 (delete 'build)
227 ;; Run tests after installation so compilation only happens once.
228 (delete 'check)
229 (add-after 'install 'wrap-executable
230 (lambda* (#:key outputs #:allow-other-keys)
231 (let* ((out (assoc-ref outputs "out"))
232 (path (getenv "PATH")))
233 (wrap-program (string-append out "/bin/bamm")
234 `("PATH" ":" prefix (,path))))
235 #t))
236 (add-after 'wrap-executable 'post-install-check
237 (lambda* (#:key inputs outputs #:allow-other-keys)
238 (setenv "PATH"
239 (string-append (assoc-ref outputs "out")
240 "/bin:"
241 (getenv "PATH")))
242 (setenv "PYTHONPATH"
243 (string-append
244 (assoc-ref outputs "out")
245 "/lib/python"
246 (string-take (string-take-right
247 (assoc-ref inputs "python") 5) 3)
248 "/site-packages:"
249 (getenv "PYTHONPATH")))
250 ;; There are 2 errors printed, but they are safe to ignore:
251 ;; 1) [E::hts_open_format] fail to open file ...
252 ;; 2) samtools view: failed to open ...
253 (invoke "nosetests")
254 #t)))))
255 (native-inputs
256 `(("autoconf" ,autoconf)
257 ("automake" ,automake)
258 ("libtool" ,libtool)
259 ("zlib" ,zlib)
260 ("python-nose" ,python2-nose)
261 ("python-pysam" ,python2-pysam)))
262 (inputs
263 `(("htslib" ,htslib-1.3) ; At least one test fails on htslib-1.4+.
264 ("samtools" ,samtools)
265 ("bwa" ,bwa)
266 ("grep" ,grep)
267 ("sed" ,sed)
268 ("coreutils" ,coreutils)))
269 (propagated-inputs
270 `(("python-numpy" ,python2-numpy)))
271 (home-page "https://ecogenomics.github.io/BamM/")
272 (synopsis "Metagenomics-focused BAM file manipulator")
273 (description
274 "BamM is a C library, wrapped in python, to efficiently generate and
275 parse BAM files, specifically for the analysis of metagenomic data. For
276 instance, it implements several methods to assess contig-wise read coverage.")
277 (license license:lgpl3+)))
278
279 (define-public bamtools
280 (package
281 (name "bamtools")
282 (version "2.5.1")
283 (source (origin
284 (method git-fetch)
285 (uri (git-reference
286 (url "https://github.com/pezmaster31/bamtools")
287 (commit (string-append "v" version))))
288 (file-name (git-file-name name version))
289 (sha256
290 (base32
291 "0nfb2ypcx9959xnbz6wxh6py3xfizgmg8nrknxl95c507m9hmq8b"))))
292 (build-system cmake-build-system)
293 (arguments
294 `(#:tests? #f ;no "check" target
295 #:phases
296 (modify-phases %standard-phases
297 (add-before
298 'configure 'set-ldflags
299 (lambda* (#:key outputs #:allow-other-keys)
300 (setenv "LDFLAGS"
301 (string-append
302 "-Wl,-rpath="
303 (assoc-ref outputs "out") "/lib/bamtools"))
304 #t)))))
305 (inputs `(("zlib" ,zlib)))
306 (home-page "https://github.com/pezmaster31/bamtools")
307 (synopsis "C++ API and command-line toolkit for working with BAM data")
308 (description
309 "BamTools provides both a C++ API and a command-line toolkit for handling
310 BAM files.")
311 (license license:expat)))
312
313 (define-public bcftools
314 (package
315 (name "bcftools")
316 (version "1.9")
317 (source (origin
318 (method url-fetch)
319 (uri (string-append "https://github.com/samtools/bcftools/"
320 "releases/download/"
321 version "/bcftools-" version ".tar.bz2"))
322 (sha256
323 (base32
324 "1j3h638i8kgihzyrlnpj82xg1b23sijibys9hvwari3fy7kd0dkg"))
325 (modules '((guix build utils)))
326 (snippet '(begin
327 ;; Delete bundled htslib.
328 (delete-file-recursively "htslib-1.9")
329 #t))))
330 (build-system gnu-build-system)
331 (arguments
332 `(#:configure-flags
333 (list "--enable-libgsl")
334 #:test-target "test"
335 #:phases
336 (modify-phases %standard-phases
337 (add-before 'check 'patch-tests
338 (lambda _
339 (substitute* "test/test.pl"
340 (("/bin/bash") (which "bash")))
341 #t)))))
342 (native-inputs
343 `(("htslib" ,htslib)
344 ("perl" ,perl)))
345 (inputs
346 `(("gsl" ,gsl)
347 ("zlib" ,zlib)))
348 (home-page "https://samtools.github.io/bcftools/")
349 (synopsis "Utilities for variant calling and manipulating VCFs and BCFs")
350 (description
351 "BCFtools is a set of utilities that manipulate variant calls in the
352 Variant Call Format (VCF) and its binary counterpart BCF. All commands work
353 transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed.")
354 ;; The sources are dual MIT/GPL, but becomes GPL-only when USE_GPL=1.
355 (license (list license:gpl3+ license:expat))))
356
357 (define-public bedops
358 (package
359 (name "bedops")
360 (version "2.4.35")
361 (source (origin
362 (method git-fetch)
363 (uri (git-reference
364 (url "https://github.com/bedops/bedops")
365 (commit (string-append "v" version))))
366 (file-name (git-file-name name version))
367 (sha256
368 (base32
369 "0mmgsgwz5r9w76hzgxkxc9s9lkdhhaf7vr6i02b09vbswvs1fyqx"))))
370 (build-system gnu-build-system)
371 (arguments
372 '(#:tests? #f
373 #:make-flags (list (string-append "BINDIR=" %output "/bin"))
374 #:phases
375 (modify-phases %standard-phases
376 (add-after 'unpack 'unpack-tarballs
377 (lambda _
378 ;; FIXME: Bedops includes tarballs of minimally patched upstream
379 ;; libraries jansson, zlib, and bzip2. We cannot just use stock
380 ;; libraries because at least one of the libraries (zlib) is
381 ;; patched to add a C++ function definition (deflateInit2cpp).
382 ;; Until the Bedops developers offer a way to link against system
383 ;; libraries we have to build the in-tree copies of these three
384 ;; libraries.
385
386 ;; See upstream discussion:
387 ;; https://github.com/bedops/bedops/issues/124
388
389 ;; Unpack the tarballs to benefit from shebang patching.
390 (with-directory-excursion "third-party"
391 (invoke "tar" "xvf" "jansson-2.6.tar.bz2")
392 (invoke "tar" "xvf" "zlib-1.2.7.tar.bz2")
393 (invoke "tar" "xvf" "bzip2-1.0.6.tar.bz2"))
394 ;; Disable unpacking of tarballs in Makefile.
395 (substitute* "system.mk/Makefile.linux"
396 (("^\tbzcat .*") "\t@echo \"not unpacking\"\n")
397 (("\\./configure") "CONFIG_SHELL=bash ./configure"))
398 (substitute* "third-party/zlib-1.2.7/Makefile.in"
399 (("^SHELL=.*$") "SHELL=bash\n"))
400 #t))
401 (delete 'configure))))
402 (home-page "https://github.com/bedops/bedops")
403 (synopsis "Tools for high-performance genomic feature operations")
404 (description
405 "BEDOPS is a suite of tools to address common questions raised in genomic
406 studies---mostly with regard to overlap and proximity relationships between
407 data sets. It aims to be scalable and flexible, facilitating the efficient
408 and accurate analysis and management of large-scale genomic data.
409
410 BEDOPS provides tools that perform highly efficient and scalable Boolean and
411 other set operations, statistical calculations, archiving, conversion and
412 other management of genomic data of arbitrary scale. Tasks can be easily
413 split by chromosome for distributing whole-genome analyses across a
414 computational cluster.")
415 (license license:gpl2+)))
416
417 (define-public bedtools
418 (package
419 (name "bedtools")
420 (version "2.29.2")
421 (source (origin
422 (method url-fetch)
423 (uri (string-append "https://github.com/arq5x/bedtools2/releases/"
424 "download/v" version "/"
425 "bedtools-" version ".tar.gz"))
426 (sha256
427 (base32
428 "0m3hk6548846w83a9s5drsczvy67n2azx41kj71n03klb2gbzwg3"))))
429 (build-system gnu-build-system)
430 (arguments
431 '(#:test-target "test"
432 #:make-flags
433 (list (string-append "prefix=" (assoc-ref %outputs "out")))
434 #:phases
435 (modify-phases %standard-phases
436 (delete 'configure))))
437 (native-inputs
438 `(("python" ,python-wrapper)))
439 (inputs
440 `(("samtools" ,samtools)
441 ("zlib" ,zlib)))
442 (home-page "https://github.com/arq5x/bedtools2")
443 (synopsis "Tools for genome analysis and arithmetic")
444 (description
445 "Collectively, the bedtools utilities are a swiss-army knife of tools for
446 a wide-range of genomics analysis tasks. The most widely-used tools enable
447 genome arithmetic: that is, set theory on the genome. For example, bedtools
448 allows one to intersect, merge, count, complement, and shuffle genomic
449 intervals from multiple files in widely-used genomic file formats such as BAM,
450 BED, GFF/GTF, VCF.")
451 (license license:expat)))
452
453 ;; Later releases of bedtools produce files with more columns than
454 ;; what Ribotaper expects.
455 (define-public bedtools-2.18
456 (package (inherit bedtools)
457 (name "bedtools")
458 (version "2.18.0")
459 (source (origin
460 (method url-fetch)
461 (uri (string-append "https://github.com/arq5x/bedtools2/"
462 "releases/download/v" version
463 "/bedtools-" version ".tar.gz"))
464 (sha256
465 (base32
466 "11rvca19ncg03kxd0wzlfx5ws7r3nisd0z8s9j9n182d8ksp2pxz"))))
467 (arguments
468 '(#:test-target "test"
469 #:phases
470 (modify-phases %standard-phases
471 (delete 'configure)
472 (replace 'install
473 (lambda* (#:key outputs #:allow-other-keys)
474 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
475 (for-each (lambda (file)
476 (install-file file bin))
477 (find-files "bin" ".*")))
478 #t)))))))
479
480 (define-public pbbam
481 (package
482 (name "pbbam")
483 (version "0.23.0")
484 (source (origin
485 (method git-fetch)
486 (uri (git-reference
487 (url "https://github.com/PacificBiosciences/pbbam")
488 (commit version)))
489 (file-name (git-file-name name version))
490 (sha256
491 (base32
492 "0h9gkrpf2lrxklxp72xfl5bi3h5zcm5hprrya9gf0hr3xwlbpp0x"))))
493 (build-system meson-build-system)
494 (arguments
495 `(#:phases
496 (modify-phases %standard-phases
497 (add-after 'unpack 'find-googletest
498 (lambda* (#:key inputs #:allow-other-keys)
499 ;; It doesn't find gtest_main because there's no pkg-config file
500 ;; for it. Find it another way.
501 (substitute* "tests/meson.build"
502 (("pbbam_gtest_dep = dependency\\('gtest_main'.*")
503 (format #f "cpp = meson.get_compiler('cpp')
504 pbbam_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
505 (assoc-ref inputs "googletest"))))
506 #t)))
507 ;; TODO: tests/pbbam_test cannot be linked
508 ;; ld: tests/59830eb@@pbbam_test@exe/src_test_Accuracy.cpp.o:
509 ;; undefined reference to symbol '_ZTIN7testing4TestE'
510 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
511 ;; error adding symbols: DSO missing from command line
512 #:tests? #f
513 #:configure-flags '("-Dtests=false")))
514 ;; These libraries are listed as "Required" in the pkg-config file.
515 (propagated-inputs
516 `(("htslib" ,htslib)
517 ("zlib" ,zlib)))
518 (inputs
519 `(("boost" ,boost)
520 ("samtools" ,samtools)))
521 (native-inputs
522 `(("googletest" ,googletest)
523 ("pkg-config" ,pkg-config)
524 ("python" ,python-wrapper))) ; for tests
525 (home-page "https://github.com/PacificBiosciences/pbbam")
526 (synopsis "Work with PacBio BAM files")
527 (description
528 "The pbbam software package provides components to create, query, and
529 edit PacBio BAM files and associated indices. These components include a core
530 C++ library, bindings for additional languages, and command-line utilities.
531 This library is not intended to be used as a general-purpose BAM utility - all
532 input and output BAMs must adhere to the PacBio BAM format specification.
533 Non-PacBio BAMs will cause exceptions to be thrown.")
534 (license license:bsd-3)))
535
536 (define-public blasr-libcpp
537 (package
538 (name "blasr-libcpp")
539 (version "5.3.3")
540 (source (origin
541 (method git-fetch)
542 (uri (git-reference
543 (url "https://github.com/PacificBiosciences/blasr_libcpp")
544 (commit version)))
545 (file-name (git-file-name name version))
546 (sha256
547 (base32
548 "0cn5l42zyq67sj0g2imqkhayz2iqvv0a1pgpbmlq0qynjmsrbfd2"))))
549 (build-system meson-build-system)
550 (arguments
551 `(#:phases
552 (modify-phases %standard-phases
553 (add-after 'unpack 'link-with-hdf5
554 (lambda* (#:key inputs #:allow-other-keys)
555 (let ((hdf5 (assoc-ref inputs "hdf5")))
556 (substitute* "meson.build"
557 (("libblasr_deps = \\[" m)
558 (string-append
559 m
560 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
561 cpp.find_library('hdf5_cpp', dirs : '~a'), "
562 hdf5 hdf5)))))
563 #t))
564 (add-after 'unpack 'find-googletest
565 (lambda* (#:key inputs #:allow-other-keys)
566 ;; It doesn't find gtest_main because there's no pkg-config file
567 ;; for it. Find it another way.
568 (substitute* "unittest/meson.build"
569 (("libblasr_gtest_dep = dependency\\('gtest_main'.*")
570 (format #f "cpp = meson.get_compiler('cpp')
571 libblasr_gtest_dep = cpp.find_library('gtest_main', dirs : '~a')\n"
572 (assoc-ref inputs "googletest"))))
573 #t)))
574 ;; TODO: unittest/libblasr_unittest cannot be linked
575 ;; ld: ;; unittest/df08227@@libblasr_unittest@exe/alignment_utils_FileUtils_gtest.cpp.o:
576 ;; undefined reference to symbol
577 ;; '_ZN7testing8internal9DeathTest6CreateEPKcPKNS0_2REES3_iPPS1_'
578 ;; ld: /gnu/store/...-googletest-1.8.0/lib/libgtest.so:
579 ;; error adding symbols: DSO missing from command line
580 #:tests? #f
581 #:configure-flags '("-Dtests=false")))
582 (inputs
583 `(("boost" ,boost)
584 ("hdf5" ,hdf5)
585 ("pbbam" ,pbbam)
586 ("zlib" ,zlib)))
587 (native-inputs
588 `(("googletest" ,googletest)
589 ("pkg-config" ,pkg-config)))
590 (home-page "https://github.com/PacificBiosciences/blasr_libcpp")
591 (synopsis "Library for analyzing PacBio genomic sequences")
592 (description
593 "This package provides three libraries used by applications for analyzing
594 PacBio genomic sequences. This library contains three sub-libraries: pbdata,
595 hdf and alignment.")
596 (license license:bsd-3)))
597
598 (define-public blasr
599 (package
600 (name "blasr")
601 (version "5.3.3")
602 (source (origin
603 (method git-fetch)
604 (uri (git-reference
605 (url "https://github.com/PacificBiosciences/blasr")
606 (commit version)))
607 (file-name (git-file-name name version))
608 (sha256
609 (base32
610 "1skgy2mvz8gsgfh1gc2nfgwvpyzb1hpmp2cf2773h5wsj8nw22kl"))))
611 (build-system meson-build-system)
612 (arguments
613 `(#:phases
614 (modify-phases %standard-phases
615 (add-after 'unpack 'link-with-hdf5
616 (lambda* (#:key inputs #:allow-other-keys)
617 (let ((hdf5 (assoc-ref inputs "hdf5")))
618 (substitute* "meson.build"
619 (("blasr_deps = \\[" m)
620 (string-append
621 m
622 (format #f "cpp.find_library('hdf5', dirs : '~a'), \
623 cpp.find_library('hdf5_cpp', dirs : '~a'), "
624 hdf5 hdf5)))))
625 #t)))
626 ;; Tests require "cram" executable, which is not packaged.
627 #:tests? #f
628 #:configure-flags '("-Dtests=false")))
629 (inputs
630 `(("boost" ,boost)
631 ("blasr-libcpp" ,blasr-libcpp)
632 ("hdf5" ,hdf5)
633 ("pbbam" ,pbbam)
634 ("zlib" ,zlib)))
635 (native-inputs
636 `(("pkg-config" ,pkg-config)))
637 (home-page "https://github.com/PacificBiosciences/blasr")
638 (synopsis "PacBio long read aligner")
639 (description
640 "Blasr is a genomic sequence aligner for processing PacBio long reads.")
641 (license license:bsd-3)))
642
643 (define-public ribotaper
644 (package
645 (name "ribotaper")
646 (version "1.3.1")
647 (source (origin
648 (method url-fetch)
649 (uri (string-append "https://ohlerlab.mdc-berlin.de/"
650 "files/RiboTaper/RiboTaper_Version_"
651 version ".tar.gz"))
652 (sha256
653 (base32
654 "0ykjbps1y3z3085q94npw8i9x5gldc6shy8vlc08v76zljsm07hv"))))
655 (build-system gnu-build-system)
656 (arguments
657 `(#:phases
658 (modify-phases %standard-phases
659 (add-after 'install 'wrap-executables
660 (lambda* (#:key inputs outputs #:allow-other-keys)
661 (let* ((out (assoc-ref outputs "out")))
662 (for-each
663 (lambda (script)
664 (wrap-program (string-append out "/bin/" script)
665 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
666 '("create_annotations_files.bash"
667 "create_metaplots.bash"
668 "Ribotaper_ORF_find.sh"
669 "Ribotaper.sh")))
670 #t)))))
671 (inputs
672 `(("bedtools" ,bedtools-2.18)
673 ("samtools" ,samtools-0.1)
674 ("r-minimal" ,r-minimal)
675 ("r-foreach" ,r-foreach)
676 ("r-xnomial" ,r-xnomial)
677 ("r-domc" ,r-domc)
678 ("r-multitaper" ,r-multitaper)
679 ("r-seqinr" ,r-seqinr)))
680 (home-page "https://ohlerlab.mdc-berlin.de/software/RiboTaper_126/")
681 (synopsis "Define translated ORFs using ribosome profiling data")
682 (description
683 "Ribotaper is a method for defining translated @dfn{open reading
684 frames} (ORFs) using ribosome profiling (ribo-seq) data. This package
685 provides the Ribotaper pipeline.")
686 (license license:gpl3+)))
687
688 (define-public ribodiff
689 (package
690 (name "ribodiff")
691 (version "0.2.2")
692 (source
693 (origin
694 (method git-fetch)
695 (uri (git-reference
696 (url "https://github.com/ratschlab/RiboDiff")
697 (commit (string-append "v" version))))
698 (file-name (git-file-name name version))
699 (sha256
700 (base32
701 "0x75nlp7qnmm64jasbi6l21f2cy99r2cjyl6b4hr8zf2bq22drnz"))))
702 (build-system python-build-system)
703 (arguments
704 `(#:python ,python-2
705 #:phases
706 (modify-phases %standard-phases
707 ;; Generate an installable executable script wrapper.
708 (add-after 'unpack 'patch-setup.py
709 (lambda _
710 (substitute* "setup.py"
711 (("^(.*)packages=.*" line prefix)
712 (string-append line "\n"
713 prefix "scripts=['scripts/TE.py'],\n")))
714 #t)))))
715 (inputs
716 `(("python-numpy" ,python2-numpy)
717 ("python-matplotlib" ,python2-matplotlib)
718 ("python-scipy" ,python2-scipy)
719 ("python-statsmodels" ,python2-statsmodels)))
720 (native-inputs
721 `(("python-mock" ,python2-mock)
722 ("python-nose" ,python2-nose)))
723 (home-page "https://public.bmi.inf.ethz.ch/user/zhongy/RiboDiff/")
724 (synopsis "Detect translation efficiency changes from ribosome footprints")
725 (description "RiboDiff is a statistical tool that detects the protein
726 translational efficiency change from Ribo-Seq (ribosome footprinting) and
727 RNA-Seq data. It uses a generalized linear model to detect genes showing
728 difference in translational profile taking mRNA abundance into account. It
729 facilitates us to decipher the translational regulation that behave
730 independently with transcriptional regulation.")
731 (license license:gpl3+)))
732
733 (define-public bioawk
734 (package
735 (name "bioawk")
736 (version "1.0")
737 (source (origin
738 (method git-fetch)
739 (uri (git-reference
740 (url "https://github.com/lh3/bioawk")
741 (commit (string-append "v" version))))
742 (file-name (git-file-name name version))
743 (sha256
744 (base32
745 "1pxc3zdnirxbf9a0az698hd8xdik7qkhypm7v6hn922x8y9qmspm"))))
746 (build-system gnu-build-system)
747 (inputs
748 `(("zlib" ,zlib)))
749 (native-inputs
750 `(("bison" ,bison)))
751 (arguments
752 `(#:tests? #f ; There are no tests to run.
753 ;; Bison must generate files, before other targets can build.
754 #:parallel-build? #f
755 #:phases
756 (modify-phases %standard-phases
757 (delete 'configure) ; There is no configure phase.
758 (replace 'install
759 (lambda* (#:key outputs #:allow-other-keys)
760 (let* ((out (assoc-ref outputs "out"))
761 (bin (string-append out "/bin"))
762 (man (string-append out "/share/man/man1")))
763 (mkdir-p man)
764 (copy-file "awk.1" (string-append man "/bioawk.1"))
765 (install-file "bioawk" bin))
766 #t)))))
767 (home-page "https://github.com/lh3/bioawk")
768 (synopsis "AWK with bioinformatics extensions")
769 (description "Bioawk is an extension to Brian Kernighan's awk, adding the
770 support of several common biological data formats, including optionally gzip'ed
771 BED, GFF, SAM, VCF, FASTA/Q and TAB-delimited formats with column names. It
772 also adds a few built-in functions and a command line option to use TAB as the
773 input/output delimiter. When the new functionality is not used, bioawk is
774 intended to behave exactly the same as the original BWK awk.")
775 (license license:x11)))
776
777 (define-public python-pybedtools
778 (package
779 (name "python-pybedtools")
780 (version "0.8.1")
781 (source (origin
782 (method url-fetch)
783 (uri (pypi-uri "pybedtools" version))
784 (sha256
785 (base32
786 "14w5i40gi25clrr7h4wa2pcpnyipya8hrqi7nq77553zc5wf0df0"))))
787 (build-system python-build-system)
788 (arguments
789 `(#:modules ((ice-9 ftw)
790 (srfi srfi-1)
791 (srfi srfi-26)
792 (guix build utils)
793 (guix build python-build-system))
794 ;; See https://github.com/daler/pybedtools/issues/192
795 #:phases
796 (modify-phases %standard-phases
797 (add-after 'unpack 'disable-broken-tests
798 (lambda _
799 (substitute* "pybedtools/test/test_scripts.py"
800 ;; This test freezes.
801 (("def test_intron_exon_reads")
802 "def _do_not_test_intron_exon_reads")
803 ;; This test fails in the Python 2 build.
804 (("def test_venn_mpl")
805 "def _do_not_test_venn_mpl"))
806 (substitute* "pybedtools/test/test_helpers.py"
807 ;; Requires internet access.
808 (("def test_chromsizes")
809 "def _do_not_test_chromsizes")
810 ;; Broken as a result of the workaround used in the check phase
811 ;; (see: https://github.com/daler/pybedtools/issues/192).
812 (("def test_getting_example_beds")
813 "def _do_not_test_getting_example_beds"))
814 ;; This issue still occurs on python2
815 (substitute* "pybedtools/test/test_issues.py"
816 (("def test_issue_303")
817 "def _test_issue_303"))
818 #t))
819 ;; TODO: Remove phase after it's part of PYTHON-BUILD-SYSTEM.
820 ;; build system.
821 ;; Force the Cythonization of C++ files to guard against compilation
822 ;; problems.
823 (add-after 'unpack 'remove-cython-generated-files
824 (lambda _
825 (let ((cython-sources (map (cut string-drop-right <> 4)
826 (find-files "." "\\.pyx$")))
827 (c/c++-files (find-files "." "\\.(c|cpp|cxx)$")))
828 (define (strip-extension filename)
829 (string-take filename (string-index-right filename #\.)))
830 (define (cythonized? c/c++-file)
831 (member (strip-extension c/c++-file) cython-sources))
832 (for-each delete-file (filter cythonized? c/c++-files))
833 #t)))
834 (add-after 'remove-cython-generated-files 'generate-cython-extensions
835 (lambda _
836 (invoke "python" "setup.py" "cythonize")))
837 (replace 'check
838 (lambda _
839 (let* ((cwd (getcwd))
840 (build-root-directory (string-append cwd "/build/"))
841 (build (string-append
842 build-root-directory
843 (find (cut string-prefix? "lib" <>)
844 (scandir (string-append
845 build-root-directory)))))
846 (scripts (string-append
847 build-root-directory
848 (find (cut string-prefix? "scripts" <>)
849 (scandir build-root-directory)))))
850 (setenv "PYTHONPATH"
851 (string-append build ":" (getenv "PYTHONPATH")))
852 ;; Executable scripts such as 'intron_exon_reads.py' must be
853 ;; available in the PATH.
854 (setenv "PATH"
855 (string-append scripts ":" (getenv "PATH"))))
856 ;; The tests need to be run from elsewhere...
857 (mkdir-p "/tmp/test")
858 (copy-recursively "pybedtools/test" "/tmp/test")
859 (with-directory-excursion "/tmp/test"
860 (invoke "pytest" "-v" "--doctest-modules")))))))
861 (propagated-inputs
862 `(("bedtools" ,bedtools)
863 ("samtools" ,samtools)
864 ("python-matplotlib" ,python-matplotlib)
865 ("python-pysam" ,python-pysam)
866 ("python-pyyaml" ,python-pyyaml)))
867 (native-inputs
868 `(("python-numpy" ,python-numpy)
869 ("python-pandas" ,python-pandas)
870 ("python-cython" ,python-cython)
871 ("kentutils" ,kentutils) ; for bedGraphToBigWig
872 ("python-six" ,python-six)
873 ;; For the test suite.
874 ("python-pytest" ,python-pytest)
875 ("python-psutil" ,python-psutil)))
876 (home-page "https://pythonhosted.org/pybedtools/")
877 (synopsis "Python wrapper for BEDtools programs")
878 (description
879 "pybedtools is a Python wrapper for Aaron Quinlan's BEDtools programs,
880 which are widely used for genomic interval manipulation or \"genome algebra\".
881 pybedtools extends BEDTools by offering feature-level manipulations from with
882 Python.")
883 (license license:gpl2+)))
884
885 (define-public python2-pybedtools
886 (let ((pybedtools (package-with-python2 python-pybedtools)))
887 (package
888 (inherit pybedtools)
889 (native-inputs
890 `(("python2-pathlib" ,python2-pathlib)
891 ,@(package-native-inputs pybedtools))))))
892
893 (define-public python-biom-format
894 (package
895 (name "python-biom-format")
896 (version "2.1.7")
897 (source
898 (origin
899 (method git-fetch)
900 ;; Use GitHub as source because PyPI distribution does not contain
901 ;; test data: https://github.com/biocore/biom-format/issues/693
902 (uri (git-reference
903 (url "https://github.com/biocore/biom-format")
904 (commit version)))
905 (file-name (git-file-name name version))
906 (sha256
907 (base32
908 "1rna16lyk5aqhnv0dp77wwaplias93f1vw28ad3jmyw6hwkai05v"))
909 (modules '((guix build utils)))
910 (snippet '(begin
911 ;; Delete generated C files.
912 (for-each delete-file (find-files "." "\\.c"))
913 #t))))
914 (build-system python-build-system)
915 (arguments
916 `(#:phases
917 (modify-phases %standard-phases
918 (add-after 'unpack 'use-cython
919 (lambda _ (setenv "USE_CYTHON" "1") #t))
920 (add-after 'unpack 'disable-broken-tests
921 (lambda _
922 (substitute* "biom/tests/test_cli/test_validate_table.py"
923 (("^(.+)def test_invalid_hdf5" m indent)
924 (string-append indent
925 "@npt.dec.skipif(True, msg='Guix')\n"
926 m)))
927 (substitute* "biom/tests/test_table.py"
928 (("^(.+)def test_from_hdf5_issue_731" m indent)
929 (string-append indent
930 "@npt.dec.skipif(True, msg='Guix')\n"
931 m)))
932 #t))
933 (add-before 'reset-gzip-timestamps 'make-files-writable
934 (lambda* (#:key outputs #:allow-other-keys)
935 (let ((out (assoc-ref outputs "out")))
936 (for-each (lambda (file) (chmod file #o644))
937 (find-files out "\\.gz"))
938 #t))))))
939 (propagated-inputs
940 `(("python-numpy" ,python-numpy)
941 ("python-scipy" ,python-scipy)
942 ("python-flake8" ,python-flake8)
943 ("python-future" ,python-future)
944 ("python-click" ,python-click)
945 ("python-h5py" ,python-h5py)
946 ;; FIXME: Upgrade to pandas 1.0 when
947 ;; https://github.com/biocore/biom-format/issues/837 is resolved.
948 ("python-pandas" ,python-pandas-0.25)))
949 (native-inputs
950 `(("python-cython" ,python-cython)
951 ("python-pytest" ,python-pytest)
952 ("python-pytest-cov" ,python-pytest-cov)
953 ("python-nose" ,python-nose)))
954 (home-page "http://www.biom-format.org")
955 (synopsis "Biological Observation Matrix (BIOM) format utilities")
956 (description
957 "The BIOM file format is designed to be a general-use format for
958 representing counts of observations e.g. operational taxonomic units, KEGG
959 orthology groups or lipid types, in one or more biological samples
960 e.g. microbiome samples, genomes, metagenomes.")
961 (license license:bsd-3)
962 (properties `((python2-variant . ,(delay python2-biom-format))))))
963
964 (define-public python2-biom-format
965 (let ((base (package-with-python2 (strip-python2-variant python-biom-format))))
966 (package
967 (inherit base)
968 (arguments
969 (substitute-keyword-arguments (package-arguments base)
970 ((#:phases phases)
971 `(modify-phases ,phases
972 ;; Do not require the unmaintained pyqi library.
973 (add-after 'unpack 'remove-pyqi
974 (lambda _
975 (substitute* "setup.py"
976 (("install_requires.append\\(\"pyqi\"\\)") "pass"))
977 #t)))))))))
978
979 (define-public python-pairtools
980 (package
981 (name "python-pairtools")
982 (version "0.3.0")
983 (source (origin
984 (method git-fetch)
985 (uri (git-reference
986 (url "https://github.com/mirnylab/pairtools")
987 (commit (string-append "v" version))))
988 (file-name (git-file-name name version))
989 (sha256
990 (base32
991 "0gr8y13q7sd6yai6df4aavl2470n1f9s3cib6r473z4hr8hcbwmc"))))
992 (build-system python-build-system)
993 (arguments
994 `(#:phases
995 (modify-phases %standard-phases
996 (add-after 'unpack 'fix-references
997 (lambda _
998 (substitute* '("pairtools/pairtools_merge.py"
999 "pairtools/pairtools_sort.py")
1000 (("/bin/bash") (which "bash")))
1001 #t))
1002 (replace 'check
1003 (lambda* (#:key inputs outputs #:allow-other-keys)
1004 (add-installed-pythonpath inputs outputs)
1005 (with-directory-excursion "/tmp"
1006 (invoke "pytest" "-v")))))))
1007 (native-inputs
1008 `(("python-cython" ,python-cython)
1009 ("python-nose" ,python-nose)
1010 ("python-pytest" ,python-pytest)))
1011 (inputs
1012 `(("python" ,python-wrapper)))
1013 (propagated-inputs
1014 `(("htslib" ,htslib) ; for bgzip, looked up in PATH
1015 ("samtools" ,samtools) ; looked up in PATH
1016 ("lz4" ,lz4) ; for lz4c
1017 ("python-click" ,python-click)
1018 ("python-numpy" ,python-numpy)))
1019 (home-page "https://github.com/mirnylab/pairtools")
1020 (synopsis "Process mapped Hi-C data")
1021 (description "Pairtools is a simple and fast command-line framework to
1022 process sequencing data from a Hi-C experiment. Process pair-end sequence
1023 alignments and perform the following operations:
1024
1025 @itemize
1026 @item detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end
1027 sequences of Hi-C DNA molecules
1028 @item sort @code{.pairs} files for downstream analyses
1029 @item detect, tag and remove PCR/optical duplicates
1030 @item generate extensive statistics of Hi-C datasets
1031 @item select Hi-C pairs given flexibly defined criteria
1032 @item restore @code{.sam} alignments from Hi-C pairs.
1033 @end itemize
1034 ")
1035 (license license:expat)))
1036
1037 (define-public bioperl-minimal
1038 (let* ((inputs `(("perl-module-build" ,perl-module-build)
1039 ("perl-data-stag" ,perl-data-stag)
1040 ("perl-libwww" ,perl-libwww)
1041 ("perl-uri" ,perl-uri)))
1042 (transitive-inputs
1043 (map (compose package-name cadr)
1044 (delete-duplicates
1045 (concatenate
1046 (map (compose package-transitive-target-inputs cadr) inputs))))))
1047 (package
1048 (name "bioperl-minimal")
1049 (version "1.7.0")
1050 (source
1051 (origin
1052 (method git-fetch)
1053 (uri (git-reference
1054 (url "https://github.com/bioperl/bioperl-live")
1055 (commit (string-append "release-"
1056 (string-map (lambda (c)
1057 (if (char=? c #\.)
1058 #\- c)) version)))))
1059 (file-name (git-file-name name version))
1060 (sha256
1061 (base32
1062 "0wl8yvzcls59pwwk6m8ahy87pwg6nnibzy5cldbvmcwg2x2w7783"))))
1063 (build-system perl-build-system)
1064 (arguments
1065 `(#:phases
1066 (modify-phases %standard-phases
1067 (add-after
1068 'install 'wrap-programs
1069 (lambda* (#:key outputs #:allow-other-keys)
1070 ;; Make sure all executables in "bin" find the required Perl
1071 ;; modules at runtime. As the PERL5LIB variable contains also
1072 ;; the paths of native inputs, we pick the transitive target
1073 ;; inputs from %build-inputs.
1074 (let* ((out (assoc-ref outputs "out"))
1075 (bin (string-append out "/bin/"))
1076 (path (string-join
1077 (cons (string-append out "/lib/perl5/site_perl")
1078 (map (lambda (name)
1079 (assoc-ref %build-inputs name))
1080 ',transitive-inputs))
1081 ":")))
1082 (for-each (lambda (file)
1083 (wrap-program file
1084 `("PERL5LIB" ":" prefix (,path))))
1085 (find-files bin "\\.pl$"))
1086 #t))))))
1087 (inputs inputs)
1088 (native-inputs
1089 `(("perl-test-most" ,perl-test-most)))
1090 (home-page "https://metacpan.org/release/BioPerl")
1091 (synopsis "Bioinformatics toolkit")
1092 (description
1093 "BioPerl is the product of a community effort to produce Perl code which
1094 is useful in biology. Examples include Sequence objects, Alignment objects
1095 and database searching objects. These objects not only do what they are
1096 advertised to do in the documentation, but they also interact - Alignment
1097 objects are made from the Sequence objects, Sequence objects have access to
1098 Annotation and SeqFeature objects and databases, Blast objects can be
1099 converted to Alignment objects, and so on. This means that the objects
1100 provide a coordinated and extensible framework to do computational biology.")
1101 (license license:perl-license))))
1102
1103 (define-public python-biopython
1104 (package
1105 (name "python-biopython")
1106 (version "1.70")
1107 (source (origin
1108 (method url-fetch)
1109 ;; use PyPi rather than biopython.org to ease updating
1110 (uri (pypi-uri "biopython" version))
1111 (sha256
1112 (base32
1113 "0nz4n9d2y2dg849gn1z0vjlkwcpzzkzy3fij7x94a6ixy2c54z2a"))))
1114 (build-system python-build-system)
1115 (arguments
1116 `(#:phases
1117 (modify-phases %standard-phases
1118 (add-before 'check 'set-home
1119 ;; Some tests require a home directory to be set.
1120 (lambda _ (setenv "HOME" "/tmp") #t)))))
1121 (propagated-inputs
1122 `(("python-numpy" ,python-numpy)))
1123 (home-page "https://biopython.org/")
1124 (synopsis "Tools for biological computation in Python")
1125 (description
1126 "Biopython is a set of tools for biological computation including parsers
1127 for bioinformatics files into Python data structures; interfaces to common
1128 bioinformatics programs; a standard sequence class and tools for performing
1129 common operations on them; code to perform data classification; code for
1130 dealing with alignments; code making it easy to split up parallelizable tasks
1131 into separate processes; and more.")
1132 (license (license:non-copyleft "http://www.biopython.org/DIST/LICENSE"))))
1133
1134 (define-public python2-biopython
1135 (package-with-python2 python-biopython))
1136
1137 (define-public python-fastalite
1138 (package
1139 (name "python-fastalite")
1140 (version "0.3")
1141 (source
1142 (origin
1143 (method url-fetch)
1144 (uri (pypi-uri "fastalite" version))
1145 (sha256
1146 (base32
1147 "1qli6pxp77i9xn2wfciq2zaxhl82bdxb33cpzqzj1z25yd036wqj"))))
1148 (build-system python-build-system)
1149 (arguments
1150 `(#:tests? #f)) ; Test data is not distributed.
1151 (home-page "https://github.com/nhoffman/fastalite")
1152 (synopsis "Simplest possible FASTA parser")
1153 (description "This library implements a FASTA and a FASTQ parser without
1154 relying on a complex dependency tree.")
1155 (license license:expat)))
1156
1157 (define-public python2-fastalite
1158 (package-with-python2 python-fastalite))
1159
1160 (define-public bpp-core
1161 ;; The last release was in 2014 and the recommended way to install from source
1162 ;; is to clone the git repository, so we do this.
1163 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1164 (let ((commit "7d8bced0d1a87291ea8dd7046b7fb5ff9c35c582"))
1165 (package
1166 (name "bpp-core")
1167 (version (string-append "2.2.0-1." (string-take commit 7)))
1168 (source (origin
1169 (method git-fetch)
1170 (uri (git-reference
1171 (url "http://biopp.univ-montp2.fr/git/bpp-core")
1172 (commit commit)))
1173 (file-name (string-append name "-" version "-checkout"))
1174 (sha256
1175 (base32
1176 "10djsq5vlnkilv436gnmh4irpk49v29pa69r6xiryg32xmvn909j"))))
1177 (build-system cmake-build-system)
1178 (arguments
1179 `(#:parallel-build? #f))
1180 (home-page "http://biopp.univ-montp2.fr")
1181 (synopsis "C++ libraries for Bioinformatics")
1182 (description
1183 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1184 analysis, phylogenetics, molecular evolution and population genetics. It is
1185 Object Oriented and is designed to be both easy to use and computer efficient.
1186 Bio++ intends to help programmers to write computer expensive programs, by
1187 providing them a set of re-usable tools.")
1188 (license license:cecill-c))))
1189
1190 (define-public bpp-phyl
1191 ;; The last release was in 2014 and the recommended way to install from source
1192 ;; is to clone the git repository, so we do this.
1193 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1194 (let ((commit "0c07167b629f68b569bf274d1ad0c4af83276ae2"))
1195 (package
1196 (name "bpp-phyl")
1197 (version (string-append "2.2.0-1." (string-take commit 7)))
1198 (source (origin
1199 (method git-fetch)
1200 (uri (git-reference
1201 (url "http://biopp.univ-montp2.fr/git/bpp-phyl")
1202 (commit commit)))
1203 (file-name (string-append name "-" version "-checkout"))
1204 (sha256
1205 (base32
1206 "1ssjgchzwj3iai26kyly7gwkdv8sk59nqhkb1wpap3sf5m6kyllh"))))
1207 (build-system cmake-build-system)
1208 (arguments
1209 `(#:parallel-build? #f
1210 ;; If out-of-source, test data is not copied into the build directory
1211 ;; so the tests fail.
1212 #:out-of-source? #f))
1213 (inputs
1214 `(("bpp-core" ,bpp-core)
1215 ("bpp-seq" ,bpp-seq)))
1216 (home-page "http://biopp.univ-montp2.fr")
1217 (synopsis "Bio++ phylogenetic Library")
1218 (description
1219 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1220 analysis, phylogenetics, molecular evolution and population genetics. This
1221 library provides phylogenetics-related modules.")
1222 (license license:cecill-c))))
1223
1224 (define-public bpp-popgen
1225 ;; The last release was in 2014 and the recommended way to install from source
1226 ;; is to clone the git repository, so we do this.
1227 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1228 (let ((commit "e472bac9b1a148803895d747cd6d0c5904f85d9f"))
1229 (package
1230 (name "bpp-popgen")
1231 (version (string-append "2.2.0-1." (string-take commit 7)))
1232 (source (origin
1233 (method git-fetch)
1234 (uri (git-reference
1235 (url "http://biopp.univ-montp2.fr/git/bpp-popgen")
1236 (commit commit)))
1237 (file-name (string-append name "-" version "-checkout"))
1238 (sha256
1239 (base32
1240 "0yn82dzn1n5629nzja68xfrhi655709rjanyryb36vzkmymy6dw5"))))
1241 (build-system cmake-build-system)
1242 (arguments
1243 `(#:parallel-build? #f
1244 #:tests? #f)) ; There are no tests.
1245 (inputs
1246 `(("bpp-core" ,bpp-core)
1247 ("bpp-seq" ,bpp-seq)))
1248 (home-page "http://biopp.univ-montp2.fr")
1249 (synopsis "Bio++ population genetics library")
1250 (description
1251 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1252 analysis, phylogenetics, molecular evolution and population genetics. This
1253 library provides population genetics-related modules.")
1254 (license license:cecill-c))))
1255
1256 (define-public bpp-seq
1257 ;; The last release was in 2014 and the recommended way to install from source
1258 ;; is to clone the git repository, so we do this.
1259 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1260 (let ((commit "6cfa07965ce152e5598a89df2fa80a75973bfa33"))
1261 (package
1262 (name "bpp-seq")
1263 (version (string-append "2.2.0-1." (string-take commit 7)))
1264 (source (origin
1265 (method git-fetch)
1266 (uri (git-reference
1267 (url "http://biopp.univ-montp2.fr/git/bpp-seq")
1268 (commit commit)))
1269 (file-name (string-append name "-" version "-checkout"))
1270 (sha256
1271 (base32
1272 "1nys5jq7jqvdg40d91wsmj3q2yzy4276cp7sp44n67p468f27zf2"))))
1273 (build-system cmake-build-system)
1274 (arguments
1275 `(#:parallel-build? #f
1276 ;; If out-of-source, test data is not copied into the build directory
1277 ;; so the tests fail.
1278 #:out-of-source? #f))
1279 (inputs
1280 `(("bpp-core" ,bpp-core)))
1281 (home-page "http://biopp.univ-montp2.fr")
1282 (synopsis "Bio++ sequence library")
1283 (description
1284 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1285 analysis, phylogenetics, molecular evolution and population genetics. This
1286 library provides sequence-related modules.")
1287 (license license:cecill-c))))
1288
1289 (define-public bppsuite
1290 ;; The last release was in 2014 and the recommended way to install from source
1291 ;; is to clone the git repository, so we do this.
1292 ;; http://biopp.univ-montp2.fr/wiki/index.php/Main_Page
1293 (let ((commit "c516147f57aa50961121cd505bed52cd7603698b"))
1294 (package
1295 (name "bppsuite")
1296 (version (string-append "2.2.0-1." (string-take commit 7)))
1297 (source (origin
1298 (method git-fetch)
1299 (uri (git-reference
1300 (url "http://biopp.univ-montp2.fr/git/bppsuite")
1301 (commit commit)))
1302 (file-name (string-append name "-" version "-checkout"))
1303 (sha256
1304 (base32
1305 "1y87pxvw0jxjizhq2dr9g2r91md45k1p9ih2sl1yy1y3p934l2kb"))))
1306 (build-system cmake-build-system)
1307 (arguments
1308 `(#:parallel-build? #f
1309 #:tests? #f)) ; There are no tests.
1310 (native-inputs
1311 `(("groff" ,groff)
1312 ("man-db" ,man-db)
1313 ("texinfo" ,texinfo)))
1314 (inputs
1315 `(("bpp-core" ,bpp-core)
1316 ("bpp-seq" ,bpp-seq)
1317 ("bpp-phyl" ,bpp-phyl)
1318 ("bpp-phyl" ,bpp-popgen)))
1319 (home-page "http://biopp.univ-montp2.fr")
1320 (synopsis "Bioinformatics tools written with the Bio++ libraries")
1321 (description
1322 "Bio++ is a set of C++ libraries for Bioinformatics, including sequence
1323 analysis, phylogenetics, molecular evolution and population genetics. This
1324 package provides command line tools using the Bio++ library.")
1325 (license license:cecill-c))))
1326
1327 (define-public blast+
1328 (package
1329 (name "blast+")
1330 (version "2.10.1")
1331 (source (origin
1332 (method url-fetch)
1333 (uri (string-append
1334 "https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/"
1335 version "/ncbi-blast-" version "+-src.tar.gz"))
1336 (sha256
1337 (base32
1338 "11kvrrl0mcwww6530r55hccpg3x3msmhr3051fwnjbq8rzg2j1qi"))
1339 (modules '((guix build utils)))
1340 (snippet
1341 '(begin
1342 ;; Remove bundled bzip2, zlib and pcre.
1343 (delete-file-recursively "c++/src/util/compress/bzip2")
1344 (delete-file-recursively "c++/src/util/compress/zlib")
1345 (delete-file-recursively "c++/src/util/regexp")
1346 (substitute* "c++/src/util/compress/Makefile.in"
1347 (("bzip2 zlib api") "api"))
1348 ;; Remove useless msbuild directory
1349 (delete-file-recursively
1350 "c++/src/build-system/project_tree_builder/msbuild")
1351 #t))))
1352 (build-system gnu-build-system)
1353 (arguments
1354 `(;; There are two(!) tests for this massive library, and both fail with
1355 ;; "unparsable timing stats".
1356 ;; ERR [127] -- [serial/datatool] datatool.sh (unparsable timing stats)
1357 ;; ERR [127] -- [serial/datatool] datatool_xml.sh (unparsable timing stats)
1358 #:tests? #f
1359 #:out-of-source? #t
1360 #:parallel-build? #f ; not supported
1361 #:phases
1362 (modify-phases %standard-phases
1363 (add-before 'configure 'set-HOME
1364 ;; $HOME needs to be set at some point during the configure phase
1365 (lambda _ (setenv "HOME" "/tmp") #t))
1366 (add-after 'unpack 'enter-dir
1367 (lambda _ (chdir "c++") #t))
1368 (add-after 'enter-dir 'fix-build-system
1369 (lambda _
1370 (define (which* cmd)
1371 (cond ((string=? cmd "date")
1372 ;; make call to "date" deterministic
1373 "date -d @0")
1374 ((which cmd)
1375 => identity)
1376 (else
1377 (format (current-error-port)
1378 "WARNING: Unable to find absolute path for ~s~%"
1379 cmd)
1380 #f)))
1381
1382 ;; Rewrite hardcoded paths to various tools
1383 (substitute* (append '("src/build-system/configure.ac"
1384 "src/build-system/configure"
1385 "src/build-system/helpers/run_with_lock.c"
1386 "scripts/common/impl/if_diff.sh"
1387 "scripts/common/impl/run_with_lock.sh"
1388 "src/build-system/Makefile.configurables.real"
1389 "src/build-system/Makefile.in.top"
1390 "src/build-system/Makefile.meta.gmake=no"
1391 "src/build-system/Makefile.meta.in"
1392 "src/build-system/Makefile.meta_l"
1393 "src/build-system/Makefile.meta_p"
1394 "src/build-system/Makefile.meta_r"
1395 "src/build-system/Makefile.mk.in"
1396 "src/build-system/Makefile.requirements"
1397 "src/build-system/Makefile.rules_with_autodep.in")
1398 (find-files "scripts/common/check" "\\.sh$"))
1399 (("(/usr/bin/|/bin/)([a-z][-_.a-z]*)" all dir cmd)
1400 (or (which* cmd) all)))
1401
1402 (substitute* (find-files "src/build-system" "^config.*")
1403 (("LN_S=/bin/\\$LN_S") (string-append "LN_S=" (which "ln")))
1404 (("^PATH=.*") ""))
1405
1406 ;; rewrite "/var/tmp" in check script
1407 (substitute* "scripts/common/check/check_make_unix.sh"
1408 (("/var/tmp") "/tmp"))
1409
1410 ;; do not reset PATH
1411 (substitute* (find-files "scripts/common/impl/" "\\.sh$")
1412 (("^ *PATH=.*") "")
1413 (("action=/bin/") "action=")
1414 (("export PATH") ":"))
1415 #t))
1416 (replace 'configure
1417 (lambda* (#:key inputs outputs #:allow-other-keys)
1418 (let ((out (assoc-ref outputs "out"))
1419 (lib (string-append (assoc-ref outputs "lib") "/lib"))
1420 (include (string-append (assoc-ref outputs "include")
1421 "/include/ncbi-tools++")))
1422 ;; The 'configure' script doesn't recognize things like
1423 ;; '--enable-fast-install'.
1424 (invoke "./configure.orig"
1425 (string-append "--with-build-root=" (getcwd) "/build")
1426 (string-append "--prefix=" out)
1427 (string-append "--libdir=" lib)
1428 (string-append "--includedir=" include)
1429 (string-append "--with-bz2="
1430 (assoc-ref inputs "bzip2"))
1431 (string-append "--with-z="
1432 (assoc-ref inputs "zlib"))
1433 (string-append "--with-pcre="
1434 (assoc-ref inputs "pcre"))
1435 ;; Each library is built twice by default, once
1436 ;; with "-static" in its name, and again
1437 ;; without.
1438 "--without-static"
1439 "--with-dll")
1440 #t))))))
1441 (outputs '("out" ; 21 MB
1442 "lib" ; 226 MB
1443 "include")) ; 33 MB
1444 (inputs
1445 `(("bzip2" ,bzip2)
1446 ("lmdb" ,lmdb)
1447 ("zlib" ,zlib)
1448 ("pcre" ,pcre)
1449 ("perl" ,perl)
1450 ("python" ,python-wrapper)))
1451 (native-inputs
1452 `(("cpio" ,cpio)))
1453 (home-page "https://blast.ncbi.nlm.nih.gov")
1454 (synopsis "Basic local alignment search tool")
1455 (description
1456 "BLAST is a popular method of performing a DNA or protein sequence
1457 similarity search, using heuristics to produce results quickly. It also
1458 calculates an “expect value” that estimates how many matches would have
1459 occurred at a given score by chance, which can aid a user in judging how much
1460 confidence to have in an alignment.")
1461 ;; Most of the sources are in the public domain, with the following
1462 ;; exceptions:
1463 ;; * Expat:
1464 ;; * ./c++/include/util/bitset/
1465 ;; * ./c++/src/html/ncbi_menu*.js
1466 ;; * Boost license:
1467 ;; * ./c++/include/util/impl/floating_point_comparison.hpp
1468 ;; * LGPL 2+:
1469 ;; * ./c++/include/dbapi/driver/odbc/unix_odbc/
1470 ;; * ASL 2.0:
1471 ;; * ./c++/src/corelib/teamcity_*
1472 (license (list license:public-domain
1473 license:expat
1474 license:boost1.0
1475 license:lgpl2.0+
1476 license:asl2.0))))
1477
1478 (define-public bless
1479 (package
1480 (name "bless")
1481 (version "1p02")
1482 (source (origin
1483 (method url-fetch)
1484 (uri (string-append "mirror://sourceforge/bless-ec/bless.v"
1485 version ".tgz"))
1486 (sha256
1487 (base32
1488 "0rm0gw2s18dqwzzpl3c2x1z05ni2v0xz5dmfk3d33j6g4cgrlrdd"))
1489 (modules '((guix build utils)))
1490 (snippet
1491 `(begin
1492 ;; Remove bundled boost, pigz, zlib, and .git directory
1493 ;; FIXME: also remove bundled sources for murmurhash3 and
1494 ;; kmc once packaged.
1495 (delete-file-recursively "boost")
1496 (delete-file-recursively "pigz")
1497 (delete-file-recursively "google-sparsehash")
1498 (delete-file-recursively "zlib")
1499 (delete-file-recursively ".git")
1500 #t))))
1501 (build-system gnu-build-system)
1502 (arguments
1503 '(#:tests? #f ;no "check" target
1504 #:make-flags
1505 (list (string-append "ZLIB="
1506 (assoc-ref %build-inputs "zlib:static")
1507 "/lib/libz.a")
1508 (string-append "LDFLAGS="
1509 (string-join '("-lboost_filesystem"
1510 "-lboost_system"
1511 "-lboost_iostreams"
1512 "-lz"
1513 "-fopenmp"))))
1514 #:phases
1515 (modify-phases %standard-phases
1516 (add-after 'unpack 'do-not-build-bundled-pigz
1517 (lambda* (#:key inputs outputs #:allow-other-keys)
1518 (substitute* "Makefile"
1519 (("cd pigz/pigz-2.3.3; make") ""))
1520 #t))
1521 (add-after 'unpack 'patch-paths-to-executables
1522 (lambda* (#:key inputs outputs #:allow-other-keys)
1523 (substitute* "parse_args.cpp"
1524 (("kmc_binary = .*")
1525 (string-append "kmc_binary = \""
1526 (assoc-ref outputs "out")
1527 "/bin/kmc\";"))
1528 (("pigz_binary = .*")
1529 (string-append "pigz_binary = \""
1530 (assoc-ref inputs "pigz")
1531 "/bin/pigz\";")))
1532 #t))
1533 (replace 'install
1534 (lambda* (#:key outputs #:allow-other-keys)
1535 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
1536 (for-each (lambda (file)
1537 (install-file file bin))
1538 '("bless" "kmc/bin/kmc"))
1539 #t)))
1540 (delete 'configure))))
1541 (native-inputs
1542 `(("perl" ,perl)))
1543 (inputs
1544 `(("openmpi" ,openmpi)
1545 ("boost" ,boost)
1546 ("sparsehash" ,sparsehash)
1547 ("pigz" ,pigz)
1548 ("zlib:static" ,zlib "static")
1549 ("zlib" ,zlib)))
1550 (supported-systems '("x86_64-linux"))
1551 (home-page "https://sourceforge.net/p/bless-ec/wiki/Home/")
1552 (synopsis "Bloom-filter-based error correction tool for NGS reads")
1553 (description
1554 "@dfn{Bloom-filter-based error correction solution for high-throughput
1555 sequencing reads} (BLESS) uses a single minimum-sized bloom filter is a
1556 correction tool for genomic reads produced by @dfn{Next-generation
1557 sequencing} (NGS). BLESS produces accurate correction results with much less
1558 memory compared with previous solutions and is also able to tolerate a higher
1559 false-positive rate. BLESS can extend reads like DNA assemblers to correct
1560 errors at the end of reads.")
1561 (license license:gpl3+)))
1562
1563 (define-public bowtie
1564 (package
1565 (name "bowtie")
1566 (version "2.3.4.3")
1567 (source (origin
1568 (method git-fetch)
1569 (uri (git-reference
1570 (url "https://github.com/BenLangmead/bowtie2")
1571 (commit (string-append "v" version))))
1572 (file-name (git-file-name name version))
1573 (sha256
1574 (base32
1575 "1zl3cf327y2p7p03cavymbh7b00djc7lncfaqih33n96iy9q8ibp"))
1576 (modules '((guix build utils)))
1577 (snippet
1578 '(begin
1579 (substitute* "Makefile"
1580 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1581 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1582 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
1583 #t))))
1584 (build-system gnu-build-system)
1585 (arguments
1586 '(#:make-flags
1587 (list "allall"
1588 "WITH_TBB=1"
1589 (string-append "prefix=" (assoc-ref %outputs "out")))
1590 #:phases
1591 (modify-phases %standard-phases
1592 (delete 'configure)
1593 (replace 'check
1594 (lambda _
1595 (invoke "perl"
1596 "scripts/test/simple_tests.pl"
1597 "--bowtie2=./bowtie2"
1598 "--bowtie2-build=./bowtie2-build")
1599 #t)))))
1600 (inputs
1601 `(("tbb" ,tbb)
1602 ("zlib" ,zlib)
1603 ("python" ,python-wrapper)))
1604 (native-inputs
1605 `(("perl" ,perl)
1606 ("perl-clone" ,perl-clone)
1607 ("perl-test-deep" ,perl-test-deep)
1608 ("perl-test-simple" ,perl-test-simple)))
1609 (home-page "http://bowtie-bio.sourceforge.net/bowtie2/index.shtml")
1610 (synopsis "Fast and sensitive nucleotide sequence read aligner")
1611 (description
1612 "Bowtie 2 is a fast and memory-efficient tool for aligning sequencing
1613 reads to long reference sequences. It is particularly good at aligning reads
1614 of about 50 up to 100s or 1,000s of characters, and particularly good at
1615 aligning to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the
1616 genome with an FM Index to keep its memory footprint small: for the human
1617 genome, its memory footprint is typically around 3.2 GB. Bowtie 2 supports
1618 gapped, local, and paired-end alignment modes.")
1619 (supported-systems '("x86_64-linux"))
1620 (license license:gpl3+)))
1621
1622 (define-public bowtie1
1623 (package
1624 (name "bowtie1")
1625 (version "1.2.3")
1626 (source (origin
1627 (method url-fetch)
1628 (uri (string-append "mirror://sourceforge/bowtie-bio/bowtie/"
1629 version "/bowtie-src-x86_64.zip"))
1630 (sha256
1631 (base32
1632 "0vmiqdhc9dzyfy9sh6vgi7k9xy2hiw8g87vbamnc6cgpm179zsa4"))
1633 (modules '((guix build utils)))
1634 (snippet
1635 '(substitute* "Makefile"
1636 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
1637 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
1638 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\"")))))
1639 (build-system gnu-build-system)
1640 (arguments
1641 '(#:tests? #f ; no "check" target
1642 #:make-flags
1643 (list "all"
1644 (string-append "prefix=" (assoc-ref %outputs "out")))
1645 #:phases
1646 (modify-phases %standard-phases
1647 (delete 'configure))))
1648 (inputs
1649 `(("tbb" ,tbb)
1650 ("zlib" ,zlib)))
1651 (supported-systems '("x86_64-linux"))
1652 (home-page "http://bowtie-bio.sourceforge.net/index.shtml")
1653 (synopsis "Fast aligner for short nucleotide sequence reads")
1654 (description
1655 "Bowtie is a fast, memory-efficient short read aligner. It aligns short
1656 DNA sequences (reads) to the human genome at a rate of over 25 million 35-bp
1657 reads per hour. Bowtie indexes the genome with a Burrows-Wheeler index to
1658 keep its memory footprint small: typically about 2.2 GB for the human
1659 genome (2.9 GB for paired-end).")
1660 (license license:artistic2.0)))
1661
1662 (define-public tophat
1663 (package
1664 (name "tophat")
1665 (version "2.1.1")
1666 (source (origin
1667 (method url-fetch)
1668 (uri (string-append
1669 "http://ccb.jhu.edu/software/tophat/downloads/tophat-"
1670 version ".tar.gz"))
1671 (sha256
1672 (base32
1673 "19add02kv2xhd6ihd779dr7x35ggym3jqr0m5c4315i1yfb0p11p"))
1674 (modules '((guix build utils)))
1675 (snippet
1676 '(begin
1677 ;; Remove bundled SeqAn and samtools
1678 (delete-file-recursively "src/SeqAn-1.4.2")
1679 (delete-file-recursively "src/samtools-0.1.18")
1680 #t))))
1681 (build-system gnu-build-system)
1682 (arguments
1683 '(#:parallel-build? #f ; not supported
1684 #:phases
1685 (modify-phases %standard-phases
1686 (add-after 'unpack 'use-system-samtools
1687 (lambda* (#:key inputs #:allow-other-keys)
1688 (substitute* "src/Makefile.in"
1689 (("(noinst_LIBRARIES = )\\$\\(SAMLIB\\)" _ prefix) prefix)
1690 (("\\$\\(SAMPROG\\): \\$\\(SAMLIB\\)") "")
1691 (("SAMPROG = samtools_0\\.1\\.18") "")
1692 (("\\$\\(samtools_0_1_18_SOURCES\\)") "")
1693 (("am__EXEEXT_1 = samtools_0\\.1\\.18\\$\\(EXEEXT\\)") ""))
1694 (substitute* '("src/common.cpp"
1695 "src/tophat.py")
1696 (("samtools_0.1.18") (which "samtools")))
1697 (substitute* '("src/common.h"
1698 "src/bam2fastx.cpp")
1699 (("#include \"bam.h\"") "#include <samtools/bam.h>")
1700 (("#include \"sam.h\"") "#include <samtools/sam.h>"))
1701 (substitute* '("src/bwt_map.h"
1702 "src/map2gtf.h"
1703 "src/align_status.h")
1704 (("#include <bam.h>") "#include <samtools/bam.h>")
1705 (("#include <sam.h>") "#include <samtools/sam.h>"))
1706 #t)))))
1707 (native-inputs
1708 `(("gcc" ,gcc-5))) ;; doesn't build with later versions
1709 (inputs
1710 `(("boost" ,boost)
1711 ("bowtie" ,bowtie)
1712 ("ncurses" ,ncurses)
1713 ("perl" ,perl)
1714 ("python" ,python-2)
1715 ("samtools" ,samtools-0.1)
1716 ("seqan" ,seqan-1)
1717 ("zlib" ,zlib)))
1718 (home-page "https://ccb.jhu.edu/software/tophat/index.shtml")
1719 (synopsis "Spliced read mapper for RNA-Seq data")
1720 (description
1721 "TopHat is a fast splice junction mapper for nucleotide sequence
1722 reads produced by the RNA-Seq method. It aligns RNA-Seq reads to
1723 mammalian-sized genomes using the ultra high-throughput short read
1724 aligner Bowtie, and then analyzes the mapping results to identify
1725 splice junctions between exons.")
1726 ;; TopHat is released under the Boost Software License, Version 1.0
1727 ;; See https://github.com/infphilo/tophat/issues/11#issuecomment-121589893
1728 (license license:boost1.0)))
1729
1730 (define-public bwa
1731 (package
1732 (name "bwa")
1733 (version "0.7.17")
1734 (source (origin
1735 (method url-fetch)
1736 (uri (string-append
1737 "https://github.com/lh3/bwa/releases/download/v"
1738 version "/bwa-" version ".tar.bz2"))
1739 (sha256
1740 (base32
1741 "1zfhv2zg9v1icdlq4p9ssc8k01mca5d1bd87w71py2swfi74s6yy"))))
1742 (build-system gnu-build-system)
1743 (arguments
1744 '(#:tests? #f ;no "check" target
1745 #:phases
1746 (modify-phases %standard-phases
1747 (replace 'install
1748 (lambda* (#:key outputs #:allow-other-keys)
1749 (let* ((out (assoc-ref outputs "out"))
1750 (bin (string-append out "/bin"))
1751 (lib (string-append out "/lib"))
1752 (doc (string-append out "/share/doc/bwa"))
1753 (man (string-append out "/share/man/man1")))
1754 (install-file "bwa" bin)
1755 (install-file "libbwa.a" lib)
1756 (install-file "README.md" doc)
1757 (install-file "bwa.1" man))
1758 #t))
1759 ;; no "configure" script
1760 (delete 'configure))))
1761 (inputs `(("zlib" ,zlib)))
1762 ;; Non-portable SSE instructions are used so building fails on platforms
1763 ;; other than x86_64.
1764 (supported-systems '("x86_64-linux"))
1765 (home-page "http://bio-bwa.sourceforge.net/")
1766 (synopsis "Burrows-Wheeler sequence aligner")
1767 (description
1768 "BWA is a software package for mapping low-divergent sequences against a
1769 large reference genome, such as the human genome. It consists of three
1770 algorithms: BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is
1771 designed for Illumina sequence reads up to 100bp, while the rest two for
1772 longer sequences ranged from 70bp to 1Mbp. BWA-MEM and BWA-SW share similar
1773 features such as long-read support and split alignment, but BWA-MEM, which is
1774 the latest, is generally recommended for high-quality queries as it is faster
1775 and more accurate. BWA-MEM also has better performance than BWA-backtrack for
1776 70-100bp Illumina reads.")
1777 (license license:gpl3+)))
1778
1779 (define-public bwa-pssm
1780 (package (inherit bwa)
1781 (name "bwa-pssm")
1782 (version "0.5.11")
1783 (source (origin
1784 (method git-fetch)
1785 (uri (git-reference
1786 (url "https://github.com/pkerpedjiev/bwa-pssm")
1787 (commit version)))
1788 (file-name (git-file-name name version))
1789 (sha256
1790 (base32
1791 "076c4q0cdqz8jgylb067y9zmvxglppnzi3qiscn0xiypgc6lgb5r"))))
1792 (build-system gnu-build-system)
1793 (inputs
1794 `(("gdsl" ,gdsl)
1795 ("zlib" ,zlib)
1796 ("perl" ,perl)))
1797 (home-page "http://bwa-pssm.binf.ku.dk/")
1798 (synopsis "Burrows-Wheeler transform-based probabilistic short read mapper")
1799 (description
1800 "BWA-PSSM is a probabilistic short genomic sequence read aligner based on
1801 the use of @dfn{position specific scoring matrices} (PSSM). Like many of the
1802 existing aligners it is fast and sensitive. Unlike most other aligners,
1803 however, it is also adaptible in the sense that one can direct the alignment
1804 based on known biases within the data set. It is coded as a modification of
1805 the original BWA alignment program and shares the genome index structure as
1806 well as many of the command line options.")
1807 (license license:gpl3+)))
1808
1809 (define-public bwa-meth
1810 (package
1811 (name "bwa-meth")
1812 (version "0.2.2")
1813 (source (origin
1814 (method git-fetch)
1815 (uri (git-reference
1816 (url "https://github.com/brentp/bwa-meth")
1817 (commit (string-append "v" version))))
1818 (file-name (git-file-name name version))
1819 (sha256
1820 (base32
1821 "17j31i7zws5j7mhsq9x3qgkxly6mlmrgwhfq0qbflgxrmx04yaiz"))))
1822 (build-system python-build-system)
1823 (arguments
1824 `(#:phases
1825 (modify-phases %standard-phases
1826 (add-after 'unpack 'keep-references-to-bwa
1827 (lambda* (#:key inputs #:allow-other-keys)
1828 (substitute* "bwameth.py"
1829 (("bwa (mem|index)" _ command)
1830 (string-append (which "bwa") " " command))
1831 ;; There's an ill-advised check for "samtools" on PATH.
1832 (("^checkX.*") ""))
1833 #t)))))
1834 (inputs
1835 `(("bwa" ,bwa)))
1836 (native-inputs
1837 `(("python-toolshed" ,python-toolshed)))
1838 (home-page "https://github.com/brentp/bwa-meth")
1839 (synopsis "Fast and accurante alignment of BS-Seq reads")
1840 (description
1841 "BWA-Meth works for single-end reads and for paired-end reads from the
1842 directional protocol (most common). It uses the method employed by
1843 methylcoder and Bismark of in silico conversion of all C's to T's in both
1844 reference and reads. It recovers the original read (needed to tabulate
1845 methylation) by attaching it as a comment which BWA appends as a tag to the
1846 read. It performs favorably to existing aligners gauged by number of on and
1847 off-target reads for a capture method that targets CpG-rich region.")
1848 (license license:expat)))
1849
1850 (define-public python-bx-python
1851 (package
1852 (name "python-bx-python")
1853 (version "0.8.2")
1854 (source (origin
1855 (method url-fetch)
1856 (uri (pypi-uri "bx-python" version))
1857 (sha256
1858 (base32
1859 "11kksg2rbzihpmcid823xvg42xi88m7sz58rzk29abybkxy0rszs"))))
1860 (build-system python-build-system)
1861 ;; Tests fail because test data are not included
1862 (arguments '(#:tests? #f))
1863 (propagated-inputs
1864 `(("python-numpy" ,python-numpy)
1865 ("python-six" ,python-six)))
1866 (inputs
1867 `(("zlib" ,zlib)))
1868 (native-inputs
1869 `(("python-lzo" ,python-lzo)
1870 ("python-nose" ,python-nose)
1871 ("python-cython" ,python-cython)))
1872 (home-page "https://github.com/bxlab/bx-python")
1873 (synopsis "Tools for manipulating biological data")
1874 (description
1875 "bx-python provides tools for manipulating biological data, particularly
1876 multiple sequence alignments.")
1877 (license license:expat)))
1878
1879 (define-public python2-bx-python
1880 (package-with-python2 python-bx-python))
1881
1882 (define-public python-pysam
1883 (package
1884 (name "python-pysam")
1885 (version "0.15.1")
1886 (source (origin
1887 (method git-fetch)
1888 ;; Test data is missing on PyPi.
1889 (uri (git-reference
1890 (url "https://github.com/pysam-developers/pysam")
1891 (commit (string-append "v" version))))
1892 (file-name (git-file-name name version))
1893 (sha256
1894 (base32
1895 "1vj367w6xbn9bpmksm162l1aipf7cj97h1q83y7jcpm33ihwpf7x"))
1896 (modules '((guix build utils)))
1897 (snippet '(begin
1898 ;; Drop bundled htslib. TODO: Also remove samtools
1899 ;; and bcftools.
1900 (delete-file-recursively "htslib")
1901 #t))))
1902 (build-system python-build-system)
1903 (arguments
1904 `(#:modules ((ice-9 ftw)
1905 (srfi srfi-26)
1906 (guix build python-build-system)
1907 (guix build utils))
1908 #:phases
1909 (modify-phases %standard-phases
1910 (add-before 'build 'set-flags
1911 (lambda* (#:key inputs #:allow-other-keys)
1912 (setenv "HTSLIB_MODE" "external")
1913 (setenv "HTSLIB_LIBRARY_DIR"
1914 (string-append (assoc-ref inputs "htslib") "/lib"))
1915 (setenv "HTSLIB_INCLUDE_DIR"
1916 (string-append (assoc-ref inputs "htslib") "/include"))
1917 (setenv "LDFLAGS" "-lncurses")
1918 (setenv "CFLAGS" "-D_CURSES_LIB=1")
1919 #t))
1920 (replace 'check
1921 (lambda* (#:key inputs outputs #:allow-other-keys)
1922 ;; This file contains tests that require a connection to the
1923 ;; internet.
1924 (delete-file "tests/tabix_test.py")
1925 ;; FIXME: This test fails
1926 (delete-file "tests/AlignmentFile_test.py")
1927 ;; Add first subdirectory of "build" directory to PYTHONPATH.
1928 (setenv "PYTHONPATH"
1929 (string-append
1930 (getenv "PYTHONPATH")
1931 ":" (getcwd) "/build/"
1932 (car (scandir "build"
1933 (negate (cut string-prefix? "." <>))))))
1934 ;; Step out of source dir so python does not import from CWD.
1935 (with-directory-excursion "tests"
1936 (setenv "HOME" "/tmp")
1937 (invoke "make" "-C" "pysam_data")
1938 (invoke "make" "-C" "cbcf_data")
1939 ;; Running nosetests without explicitly asking for a single
1940 ;; process leads to a crash. Running with multiple processes
1941 ;; fails because the tests are not designed to run in parallel.
1942
1943 ;; FIXME: tests keep timing out on some systems.
1944 (invoke "nosetests" "-v" "--processes" "1")))))))
1945 (propagated-inputs
1946 `(("htslib" ,htslib))) ; Included from installed header files.
1947 (inputs
1948 `(("ncurses" ,ncurses)
1949 ("curl" ,curl)
1950 ("zlib" ,zlib)))
1951 (native-inputs
1952 `(("python-cython" ,python-cython)
1953 ;; Dependencies below are are for tests only.
1954 ("samtools" ,samtools)
1955 ("bcftools" ,bcftools)
1956 ("python-nose" ,python-nose)))
1957 (home-page "https://github.com/pysam-developers/pysam")
1958 (synopsis "Python bindings to the SAMtools C API")
1959 (description
1960 "Pysam is a Python module for reading and manipulating files in the
1961 SAM/BAM format. Pysam is a lightweight wrapper of the SAMtools C API. It
1962 also includes an interface for tabix.")
1963 (license license:expat)))
1964
1965 (define-public python2-pysam
1966 (package-with-python2 python-pysam))
1967
1968 (define-public python-twobitreader
1969 (package
1970 (name "python-twobitreader")
1971 (version "3.1.6")
1972 (source (origin
1973 (method git-fetch)
1974 (uri (git-reference
1975 (url "https://github.com/benjschiller/twobitreader")
1976 (commit version)))
1977 (file-name (git-file-name name version))
1978 (sha256
1979 (base32
1980 "1qbxvv1h58cismbk1anpjrkpghsaiy64a11ir3lhy6qch6xf8n62"))))
1981 (build-system python-build-system)
1982 ;; Tests are not included
1983 (arguments '(#:tests? #f))
1984 (native-inputs
1985 `(("python-sphinx" ,python-sphinx)))
1986 (home-page "https://github.com/benjschiller/twobitreader")
1987 (synopsis "Python library for reading .2bit files")
1988 (description
1989 "twobitreader is a Python library for reading .2bit files as used by the
1990 UCSC genome browser.")
1991 (license license:artistic2.0)))
1992
1993 (define-public python2-twobitreader
1994 (package-with-python2 python-twobitreader))
1995
1996 (define-public python-plastid
1997 (package
1998 (name "python-plastid")
1999 (version "0.4.8")
2000 (source (origin
2001 (method url-fetch)
2002 (uri (pypi-uri "plastid" version))
2003 (sha256
2004 (base32
2005 "0l24dd3q66if8yj042m4s0g95n6acn7im1imqd3p6h8ns43kxhj8"))))
2006 (build-system python-build-system)
2007 (arguments
2008 ;; Some test files are not included.
2009 `(#:tests? #f))
2010 (propagated-inputs
2011 `(("python-numpy" ,python-numpy)
2012 ("python-scipy" ,python-scipy)
2013 ("python-pandas" ,python-pandas)
2014 ("python-pysam" ,python-pysam)
2015 ("python-matplotlib" ,python-matplotlib)
2016 ("python-biopython" ,python-biopython)
2017 ("python-twobitreader" ,python-twobitreader)
2018 ("python-termcolor" ,python-termcolor)))
2019 (native-inputs
2020 `(("python-cython" ,python-cython)
2021 ("python-nose" ,python-nose)))
2022 (home-page "https://github.com/joshuagryphon/plastid")
2023 (synopsis "Python library for genomic analysis")
2024 (description
2025 "plastid is a Python library for genomic analysis – in particular,
2026 high-throughput sequencing data – with an emphasis on simplicity.")
2027 (license license:bsd-3)))
2028
2029 (define-public python2-plastid
2030 (package-with-python2 python-plastid))
2031
2032 (define-public tetoolkit
2033 (package
2034 (name "tetoolkit")
2035 (version "2.0.3")
2036 (source (origin
2037 (method git-fetch)
2038 (uri (git-reference
2039 (url "https://github.com/mhammell-laboratory/tetoolkit")
2040 (commit version)))
2041 (file-name (git-file-name name version))
2042 (sha256
2043 (base32
2044 "1yzi0kfpzip8zpjb82x1ik6h22yzfyjiz2dv85v6as2awwqvk807"))))
2045 (build-system python-build-system)
2046 (arguments
2047 `(#:python ,python-2 ; not guaranteed to work with Python 3
2048 #:phases
2049 (modify-phases %standard-phases
2050 (add-after 'unpack 'make-writable
2051 (lambda _
2052 (for-each make-file-writable (find-files "."))
2053 #t))
2054 (add-after 'unpack 'patch-invocations
2055 (lambda* (#:key inputs #:allow-other-keys)
2056 (substitute* '("bin/TEtranscripts"
2057 "bin/TEcount")
2058 (("'sort ")
2059 (string-append "'" (which "sort") " "))
2060 (("'rm -f ")
2061 (string-append "'" (which "rm") " -f "))
2062 (("'Rscript'") (string-append "'" (which "Rscript") "'")))
2063 (substitute* "TEToolkit/IO/ReadInputs.py"
2064 (("BamToBED") (which "bamToBed")))
2065 (substitute* "TEToolkit/Normalization.py"
2066 (("\"Rscript\"")
2067 (string-append "\"" (which "Rscript") "\"")))
2068 #t))
2069 (add-after 'install 'wrap-program
2070 (lambda* (#:key outputs #:allow-other-keys)
2071 ;; Make sure the executables find R packages.
2072 (let ((out (assoc-ref outputs "out")))
2073 (for-each
2074 (lambda (script)
2075 (wrap-program (string-append out "/bin/" script)
2076 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
2077 '("TEtranscripts"
2078 "TEcount")))
2079 #t)))))
2080 (inputs
2081 `(("coreutils" ,coreutils)
2082 ("bedtools" ,bedtools)
2083 ("python-argparse" ,python2-argparse)
2084 ("python-pysam" ,python2-pysam)
2085 ("r-minimal" ,r-minimal)
2086 ("r-deseq2" ,r-deseq2)))
2087 (home-page "https://github.com/mhammell-laboratory/tetoolkit")
2088 (synopsis "Transposable elements in differential enrichment analysis")
2089 (description
2090 "This is package for including transposable elements in differential
2091 enrichment analysis of sequencing datasets. TEtranscripts and TEcount take
2092 RNA-seq (and similar data) and annotates reads to both genes and transposable
2093 elements. TEtranscripts then performs differential analysis using DESeq2.
2094 Note that TEtranscripts and TEcount rely on specially curated GTF files, which
2095 are not included due to their size.")
2096 (license license:gpl3+)))
2097
2098 (define-public cd-hit
2099 (package
2100 (name "cd-hit")
2101 (version "4.6.8")
2102 (source (origin
2103 (method url-fetch)
2104 (uri (string-append "https://github.com/weizhongli/cdhit"
2105 "/releases/download/V" version
2106 "/cd-hit-v" version
2107 "-2017-0621-source.tar.gz"))
2108 (sha256
2109 (base32
2110 "1b4mwm2520ixjbw57sil20f9iixzw4bkdqqwgg1fc3pzm6rz4zmn"))))
2111 (build-system gnu-build-system)
2112 (arguments
2113 `(#:tests? #f ; there are no tests
2114 #:make-flags
2115 ;; Executables are copied directly to the PREFIX.
2116 (list (string-append "PREFIX=" (assoc-ref %outputs "out") "/bin")
2117 ;; Support longer sequences (e.g. Pacbio sequences)
2118 "MAX_SEQ=60000000")
2119 #:phases
2120 (modify-phases %standard-phases
2121 ;; No "configure" script
2122 (delete 'configure)
2123 ;; Remove sources of non-determinism
2124 (add-after 'unpack 'be-timeless
2125 (lambda _
2126 (substitute* "cdhit-utility.c++"
2127 ((" \\(built on \" __DATE__ \"\\)") ""))
2128 (substitute* "cdhit-common.c++"
2129 (("__DATE__") "\"0\"")
2130 (("\", %s, \" __TIME__ \"\\\\n\", date") ""))
2131 #t))
2132 ;; The "install" target does not create the target directory.
2133 (add-before 'install 'create-target-dir
2134 (lambda* (#:key outputs #:allow-other-keys)
2135 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
2136 #t)))))
2137 (inputs
2138 `(("perl" ,perl)))
2139 (home-page "http://weizhongli-lab.org/cd-hit/")
2140 (synopsis "Cluster and compare protein or nucleotide sequences")
2141 (description
2142 "CD-HIT is a program for clustering and comparing protein or nucleotide
2143 sequences. CD-HIT is designed to be fast and handle extremely large
2144 databases.")
2145 ;; The manual says: "It can be copied under the GNU General Public License
2146 ;; version 2 (GPLv2)."
2147 (license license:gpl2)))
2148
2149 (define-public clipper
2150 (package
2151 (name "clipper")
2152 (version "2.0")
2153 (source (origin
2154 (method git-fetch)
2155 (uri (git-reference
2156 (url "https://github.com/YeoLab/clipper")
2157 (commit version)))
2158 (file-name (git-file-name name version))
2159 (sha256
2160 (base32
2161 "1bcag4lb5bkzsj2vg7lrq24aw6yfgq275ifrbhd82l7kqgbbjbkv"))))
2162 (build-system python-build-system)
2163 (arguments
2164 `(#:phases
2165 (modify-phases %standard-phases
2166 (add-before 'reset-gzip-timestamps 'make-files-writable
2167 (lambda* (#:key outputs #:allow-other-keys)
2168 ;; Make sure .gz files are writable so that the
2169 ;; 'reset-gzip-timestamps' phase can do its work.
2170 (let ((out (assoc-ref outputs "out")))
2171 (for-each make-file-writable
2172 (find-files out "\\.gz$"))
2173 #t))))))
2174 (inputs
2175 `(("htseq" ,htseq)
2176 ("python-pybedtools" ,python-pybedtools)
2177 ("python-cython" ,python-cython)
2178 ("python-scikit-learn" ,python-scikit-learn)
2179 ("python-matplotlib" ,python-matplotlib)
2180 ("python-pandas" ,python-pandas)
2181 ("python-pysam" ,python-pysam)
2182 ("python-numpy" ,python-numpy)
2183 ("python-scipy" ,python-scipy)))
2184 (native-inputs
2185 `(("python-setuptools-git" ,python-setuptools-git)
2186 ("python-mock" ,python-mock) ; for tests
2187 ("python-nose" ,python-nose) ; for tests
2188 ("python-pytz" ,python-pytz))) ; for tests
2189 (home-page "https://github.com/YeoLab/clipper")
2190 (synopsis "CLIP peak enrichment recognition")
2191 (description
2192 "CLIPper is a tool to define peaks in CLIP-seq datasets.")
2193 (license license:gpl2)))
2194
2195 (define-public codingquarry
2196 (package
2197 (name "codingquarry")
2198 (version "2.0")
2199 (source (origin
2200 (method url-fetch)
2201 (uri (string-append
2202 "mirror://sourceforge/codingquarry/CodingQuarry_v"
2203 version ".tar.gz"))
2204 (sha256
2205 (base32
2206 "0115hkjflsnfzn36xppwf9h9avfxlavr43djqmshkkzbgjzsz60i"))))
2207 (build-system gnu-build-system)
2208 (arguments
2209 '(#:tests? #f ; no "check" target
2210 #:phases
2211 (modify-phases %standard-phases
2212 (delete 'configure)
2213 (replace 'install
2214 (lambda* (#:key outputs #:allow-other-keys)
2215 (let* ((out (assoc-ref outputs "out"))
2216 (bin (string-append out "/bin"))
2217 (doc (string-append out "/share/doc/codingquarry")))
2218 (install-file "INSTRUCTIONS.pdf" doc)
2219 (copy-recursively "QuarryFiles"
2220 (string-append out "/QuarryFiles"))
2221 (install-file "CodingQuarry" bin)
2222 (install-file "CufflinksGTF_to_CodingQuarryGFF3.py" bin))
2223 #t)))))
2224 (inputs `(("openmpi" ,openmpi)))
2225 (native-search-paths
2226 (list (search-path-specification
2227 (variable "QUARRY_PATH")
2228 (files '("QuarryFiles")))))
2229 (native-inputs `(("python" ,python-2))) ; Only Python 2 is supported
2230 (synopsis "Fungal gene predictor")
2231 (description "CodingQuarry is a highly accurate, self-training GHMM fungal
2232 gene predictor designed to work with assembled, aligned RNA-seq transcripts.")
2233 (home-page "https://sourceforge.net/projects/codingquarry/")
2234 (license license:gpl3+)))
2235
2236 (define-public couger
2237 (package
2238 (name "couger")
2239 (version "1.8.2")
2240 (source (origin
2241 (method url-fetch)
2242 (uri (string-append
2243 "http://couger.oit.duke.edu/static/assets/COUGER"
2244 version ".zip"))
2245 (sha256
2246 (base32
2247 "04p2b14nmhzxw5h72mpzdhalv21bx4w9b87z0wpw0xzxpysyncmq"))))
2248 (build-system gnu-build-system)
2249 (arguments
2250 `(#:tests? #f
2251 #:phases
2252 (modify-phases %standard-phases
2253 (delete 'configure)
2254 (delete 'build)
2255 (replace
2256 'install
2257 (lambda* (#:key outputs #:allow-other-keys)
2258 (let* ((out (assoc-ref outputs "out"))
2259 (bin (string-append out "/bin")))
2260 (copy-recursively "src" (string-append out "/src"))
2261 (mkdir bin)
2262 ;; Add "src" directory to module lookup path.
2263 (substitute* "couger"
2264 (("from argparse")
2265 (string-append "import sys\nsys.path.append(\""
2266 out "\")\nfrom argparse")))
2267 (install-file "couger" bin))
2268 #t))
2269 (add-after
2270 'install 'wrap-program
2271 (lambda* (#:key inputs outputs #:allow-other-keys)
2272 ;; Make sure 'couger' runs with the correct PYTHONPATH.
2273 (let* ((out (assoc-ref outputs "out"))
2274 (path (getenv "PYTHONPATH")))
2275 (wrap-program (string-append out "/bin/couger")
2276 `("PYTHONPATH" ":" prefix (,path))))
2277 #t)))))
2278 (inputs
2279 `(("python" ,python-2)
2280 ("python2-pillow" ,python2-pillow)
2281 ("python2-numpy" ,python2-numpy)
2282 ("python2-scipy" ,python2-scipy)
2283 ("python2-matplotlib" ,python2-matplotlib)))
2284 (propagated-inputs
2285 `(("r-minimal" ,r-minimal)
2286 ("libsvm" ,libsvm)
2287 ("randomjungle" ,randomjungle)))
2288 (native-inputs
2289 `(("unzip" ,unzip)))
2290 (home-page "http://couger.oit.duke.edu")
2291 (synopsis "Identify co-factors in sets of genomic regions")
2292 (description
2293 "COUGER can be applied to any two sets of genomic regions bound by
2294 paralogous TFs (e.g., regions derived from ChIP-seq experiments) to identify
2295 putative co-factors that provide specificity to each TF. The framework
2296 determines the genomic targets uniquely-bound by each TF, and identifies a
2297 small set of co-factors that best explain the in vivo binding differences
2298 between the two TFs.
2299
2300 COUGER uses classification algorithms (support vector machines and random
2301 forests) with features that reflect the DNA binding specificities of putative
2302 co-factors. The features are generated either from high-throughput TF-DNA
2303 binding data (from protein binding microarray experiments), or from large
2304 collections of DNA motifs.")
2305 (license license:gpl3+)))
2306
2307 (define-public clustal-omega
2308 (package
2309 (name "clustal-omega")
2310 (version "1.2.4")
2311 (source (origin
2312 (method url-fetch)
2313 (uri (string-append "http://www.clustal.org/omega/clustal-omega-"
2314 version ".tar.gz"))
2315 (sha256
2316 (base32
2317 "1vm30mzncwdv881vrcwg11vzvrsmwy4wg80j5i0lcfk6dlld50w6"))))
2318 (build-system gnu-build-system)
2319 (inputs
2320 `(("argtable" ,argtable)))
2321 (home-page "http://www.clustal.org/omega/")
2322 (synopsis "Multiple sequence aligner for protein and DNA/RNA")
2323 (description
2324 "Clustal-Omega is a general purpose multiple sequence alignment (MSA)
2325 program for protein and DNA/RNA. It produces high quality MSAs and is capable
2326 of handling data-sets of hundreds of thousands of sequences in reasonable
2327 time.")
2328 (license license:gpl2+)))
2329
2330 (define-public crossmap
2331 (package
2332 (name "crossmap")
2333 (version "0.3.8")
2334 (source (origin
2335 (method url-fetch)
2336 (uri (pypi-uri "CrossMap" version))
2337 (sha256
2338 (base32
2339 "1sb2f2qbxya4fzw3yjl09vbrs8vfmw22zrygrvz004sf9gb1vkan"))))
2340 (build-system python-build-system)
2341 (inputs
2342 `(("python-bx-python" ,python-bx-python)
2343 ("python-numpy" ,python-numpy)
2344 ("python-pybigwig" ,python-pybigwig)
2345 ("python-pysam" ,python-pysam)
2346 ("zlib" ,zlib)))
2347 (native-inputs
2348 `(("python-cython" ,python-cython)
2349 ("python-nose" ,python-nose)))
2350 (home-page "http://crossmap.sourceforge.net/")
2351 (synopsis "Convert genome coordinates between assemblies")
2352 (description
2353 "CrossMap is a program for conversion of genome coordinates or annotation
2354 files between different genome assemblies. It supports most commonly used
2355 file formats including SAM/BAM, Wiggle/BigWig, BED, GFF/GTF, VCF.")
2356 (license license:gpl2+)))
2357
2358 (define-public python-dnaio
2359 (package
2360 (name "python-dnaio")
2361 (version "0.3")
2362 (source
2363 (origin
2364 (method url-fetch)
2365 (uri (pypi-uri "dnaio" version))
2366 (sha256
2367 (base32
2368 "0f16m7hdlm0fz1n7y5asy0v9ghyrq17ni1p9iybq22ddzyd49r27"))))
2369 (build-system python-build-system)
2370 (native-inputs
2371 `(("python-cython" ,python-cython)
2372 ("python-pytest" ,python-pytest)
2373 ("python-xopen" ,python-xopen)))
2374 (home-page "https://github.com/marcelm/dnaio/")
2375 (synopsis "Read FASTA and FASTQ files efficiently")
2376 (description
2377 "dnaio is a Python library for fast parsing of FASTQ and also FASTA
2378 files. The code was previously part of the cutadapt tool.")
2379 (license license:expat)))
2380
2381 (define-public python-deeptoolsintervals
2382 (package
2383 (name "python-deeptoolsintervals")
2384 (version "0.1.9")
2385 (source (origin
2386 (method url-fetch)
2387 (uri (pypi-uri "deeptoolsintervals" version))
2388 (sha256
2389 (base32
2390 "1xnl80nblysj6dylj4683wgrfa425rkx4dp5k65hvwdns9pw753x"))))
2391 (build-system python-build-system)
2392 (inputs
2393 `(("zlib" ,zlib)))
2394 (home-page "https://github.com/deeptools/deeptools_intervals")
2395 (synopsis "Create GTF-based interval trees with associated meta-data")
2396 (description
2397 "This package provides a Python module creating/accessing GTF-based
2398 interval trees with associated meta-data. It is primarily used by the
2399 @code{deeptools} package.")
2400 (license license:expat)))
2401
2402 (define-public python-deeptools
2403 (package
2404 (name "python-deeptools")
2405 (version "3.4.3")
2406 (source (origin
2407 (method git-fetch)
2408 (uri (git-reference
2409 (url "https://github.com/deeptools/deepTools")
2410 (commit version)))
2411 (file-name (git-file-name name version))
2412 (sha256
2413 (base32
2414 "0l09vyynz6s6w7fnyd94rpys4a6aja6kp4gli64pngdxdz3md1nl"))))
2415 (build-system python-build-system)
2416 (native-inputs
2417 `(("python-mock" ,python-mock)
2418 ("python-nose" ,python-nose)))
2419 (propagated-inputs
2420 `(("python-matplotlib" ,python-matplotlib)
2421 ("python-numpy" ,python-numpy)
2422 ("python-numpydoc" ,python-numpydoc)
2423 ("python-py2bit" ,python-py2bit)
2424 ("python-pybigwig" ,python-pybigwig)
2425 ("python-pysam" ,python-pysam)
2426 ("python-scipy" ,python-scipy)
2427 ("python-deeptoolsintervals" ,python-deeptoolsintervals)
2428 ("python-plotly" ,python-plotly-2.4.1)))
2429 (home-page "https://pypi.org/project/deepTools/")
2430 (synopsis "Useful tools for exploring deep sequencing data")
2431 (description "This package addresses the challenge of handling large amounts
2432 of data that are now routinely generated from DNA sequencing centers.
2433 @code{deepTools} contains useful modules to process the mapped reads data for
2434 multiple quality checks, creating normalized coverage files in standard bedGraph
2435 and bigWig file formats, that allow comparison between different files. Finally,
2436 using such normalized and standardized files, deepTools can create many
2437 publication-ready visualizations to identify enrichments and for functional
2438 annotations of the genome.")
2439 ;; The file deeptools/cm.py is licensed under the BSD license. The
2440 ;; remainder of the code is licensed under the MIT license.
2441 (license (list license:bsd-3 license:expat))))
2442
2443 (define-deprecated deeptools python-deeptools)
2444
2445 (define-public cutadapt
2446 (package
2447 (name "cutadapt")
2448 (version "2.1")
2449 (source (origin
2450 (method url-fetch)
2451 (uri (pypi-uri "cutadapt" version))
2452 (sha256
2453 (base32
2454 "1vqmsfkm6llxzmsz9wcfcvzx9a9f8iabvwik2rbyn7nc4wm25z89"))))
2455 (build-system python-build-system)
2456 (inputs
2457 `(("python-dnaio" ,python-dnaio)
2458 ("python-xopen" ,python-xopen)))
2459 (native-inputs
2460 `(("python-cython" ,python-cython)
2461 ("python-pytest" ,python-pytest)
2462 ("python-setuptools-scm" ,python-setuptools-scm)))
2463 (home-page "https://cutadapt.readthedocs.io/en/stable/")
2464 (synopsis "Remove adapter sequences from nucleotide sequencing reads")
2465 (description
2466 "Cutadapt finds and removes adapter sequences, primers, poly-A tails and
2467 other types of unwanted sequence from high-throughput sequencing reads.")
2468 (license license:expat)))
2469
2470 (define-public libbigwig
2471 (package
2472 (name "libbigwig")
2473 (version "0.4.4")
2474 (source (origin
2475 (method git-fetch)
2476 (uri (git-reference
2477 (url "https://github.com/dpryan79/libBigWig")
2478 (commit version)))
2479 (file-name (git-file-name name version))
2480 (sha256
2481 (base32
2482 "09693dmf1scdac5pyq6qyn8b4mcipvnmc370k9a5z41z81m3dcsj"))))
2483 (build-system gnu-build-system)
2484 (arguments
2485 `(#:test-target "test"
2486 #:tests? #f ; tests require access to the web
2487 #:make-flags
2488 (list "CC=gcc"
2489 (string-append "prefix=" (assoc-ref %outputs "out")))
2490 #:phases
2491 (modify-phases %standard-phases
2492 (delete 'configure))))
2493 (inputs
2494 `(("zlib" ,zlib)
2495 ("curl" ,curl)))
2496 (native-inputs
2497 `(("doxygen" ,doxygen)
2498 ;; Need for tests
2499 ("python" ,python-2)))
2500 (home-page "https://github.com/dpryan79/libBigWig")
2501 (synopsis "C library for handling bigWig files")
2502 (description
2503 "This package provides a C library for parsing local and remote BigWig
2504 files.")
2505 (license license:expat)))
2506
2507 (define-public python-pybigwig
2508 (package
2509 (name "python-pybigwig")
2510 (version "0.3.17")
2511 (source (origin
2512 (method url-fetch)
2513 (uri (pypi-uri "pyBigWig" version))
2514 (sha256
2515 (base32
2516 "157x6v48y299zm382krf1dw08fdxg95im8lnabhp5vc94s04zxj1"))
2517 (modules '((guix build utils)))
2518 (snippet
2519 '(begin
2520 ;; Delete bundled libBigWig sources
2521 (delete-file-recursively "libBigWig")
2522 #t))))
2523 (build-system python-build-system)
2524 (arguments
2525 `(#:phases
2526 (modify-phases %standard-phases
2527 (add-after 'unpack 'link-with-libBigWig
2528 (lambda* (#:key inputs #:allow-other-keys)
2529 (substitute* "setup.py"
2530 (("libs=\\[") "libs=[\"BigWig\", "))
2531 #t)))))
2532 (propagated-inputs
2533 `(("python-numpy" ,python-numpy)))
2534 (inputs
2535 `(("libbigwig" ,libbigwig)
2536 ("zlib" ,zlib)
2537 ("curl" ,curl)))
2538 (home-page "https://github.com/dpryan79/pyBigWig")
2539 (synopsis "Access bigWig files in Python using libBigWig")
2540 (description
2541 "This package provides Python bindings to the libBigWig library for
2542 accessing bigWig files.")
2543 (license license:expat)))
2544
2545 (define-public python2-pybigwig
2546 (package-with-python2 python-pybigwig))
2547
2548 (define-public python-dendropy
2549 (package
2550 (name "python-dendropy")
2551 (version "4.4.0")
2552 (source
2553 (origin
2554 (method git-fetch)
2555 ;; Source from GitHub so that tests are included.
2556 (uri (git-reference
2557 (url "https://github.com/jeetsukumaran/DendroPy")
2558 (commit (string-append "v" version))))
2559 (file-name (git-file-name name version))
2560 (sha256
2561 (base32
2562 "097hfyv2kaf4x92i4rjx0paw2cncxap48qivv8zxng4z7nhid0x9"))))
2563 (build-system python-build-system)
2564 (home-page "https://dendropy.org/")
2565 (synopsis "Library for phylogenetics and phylogenetic computing")
2566 (description
2567 "DendroPy is a library for phylogenetics and phylogenetic computing: reading,
2568 writing, simulation, processing and manipulation of phylogenetic
2569 trees (phylogenies) and characters.")
2570 (license license:bsd-3)))
2571
2572 (define-public python2-dendropy
2573 (let ((base (package-with-python2 python-dendropy)))
2574 (package
2575 (inherit base)
2576 (arguments
2577 `(#:phases
2578 (modify-phases %standard-phases
2579 (add-after 'unpack 'remove-failing-test
2580 (lambda _
2581 ;; This test fails when the full test suite is run, as documented
2582 ;; at https://github.com/jeetsukumaran/DendroPy/issues/74
2583 (substitute* "tests/test_dataio_nexml_reader_tree_list.py"
2584 (("test_collection_comments_and_annotations")
2585 "do_not_test_collection_comments_and_annotations"))
2586 #t)))
2587 ,@(package-arguments base))))))
2588
2589 (define-public python-py2bit
2590 (package
2591 (name "python-py2bit")
2592 (version "0.3.0")
2593 (source
2594 (origin
2595 (method url-fetch)
2596 (uri (pypi-uri "py2bit" version))
2597 (sha256
2598 (base32
2599 "1vw2nvw1yrl7ikkqsqs1pg239yr5nspvd969r1x9arms1k25a1a5"))))
2600 (build-system python-build-system)
2601 (home-page "https://github.com/dpryan79/py2bit")
2602 (synopsis "Access 2bit files using lib2bit")
2603 (description
2604 "This package provides Python bindings for lib2bit to access 2bit files
2605 with Python.")
2606 (license license:expat)))
2607
2608 (define-public delly
2609 (package
2610 (name "delly")
2611 (version "0.7.9")
2612 (source (origin
2613 (method git-fetch)
2614 (uri (git-reference
2615 (url "https://github.com/dellytools/delly")
2616 (commit (string-append "v" version))))
2617 (file-name (git-file-name name version))
2618 (sha256
2619 (base32 "034jqsxswy9gqdh2zkgc1js99qkv75ks4xvzgmh0284sraagv61z"))
2620 (modules '((guix build utils)))
2621 (snippet
2622 '(begin
2623 (delete-file-recursively "src/htslib")
2624 #t))))
2625 (build-system gnu-build-system)
2626 (arguments
2627 `(#:tests? #f ; There are no tests to run.
2628 #:make-flags
2629 (list "PARALLEL=1" ; Allow parallel execution at run-time.
2630 (string-append "prefix=" (assoc-ref %outputs "out")))
2631 #:phases
2632 (modify-phases %standard-phases
2633 (delete 'configure) ; There is no configure phase.
2634 (add-after 'install 'install-templates
2635 (lambda* (#:key outputs #:allow-other-keys)
2636 (let ((templates (string-append (assoc-ref outputs "out")
2637 "/share/delly/templates")))
2638 (mkdir-p templates)
2639 (copy-recursively "excludeTemplates" templates)
2640 #t))))))
2641 (inputs
2642 `(("boost" ,boost)
2643 ("htslib" ,htslib)
2644 ("zlib" ,zlib)
2645 ("bzip2" ,bzip2)))
2646 (home-page "https://github.com/dellytools/delly")
2647 (synopsis "Integrated structural variant prediction method")
2648 (description "Delly is an integrated structural variant prediction method
2649 that can discover and genotype deletions, tandem duplications, inversions and
2650 translocations at single-nucleotide resolution in short-read massively parallel
2651 sequencing data. It uses paired-ends and split-reads to sensitively and
2652 accurately delineate genomic rearrangements throughout the genome.")
2653 (license license:gpl3+)))
2654
2655 (define-public diamond
2656 (package
2657 (name "diamond")
2658 (version "0.9.30")
2659 (source (origin
2660 (method git-fetch)
2661 (uri (git-reference
2662 (url "https://github.com/bbuchfink/diamond")
2663 (commit (string-append "v" version))))
2664 (file-name (git-file-name name version))
2665 (sha256
2666 (base32
2667 "0k6f3kb6cniw11xw6763kkbs1sl0yack7xsy7q5fl5v170ssphq4"))))
2668 (build-system cmake-build-system)
2669 (arguments
2670 '(#:tests? #f ; no "check" target
2671 #:phases
2672 (modify-phases %standard-phases
2673 (add-after 'unpack 'remove-native-compilation
2674 (lambda _
2675 (substitute* "CMakeLists.txt" (("-march=native") ""))
2676 #t)))))
2677 (inputs
2678 `(("zlib" ,zlib)))
2679 (home-page "https://github.com/bbuchfink/diamond")
2680 (synopsis "Accelerated BLAST compatible local sequence aligner")
2681 (description
2682 "DIAMOND is a BLAST-compatible local aligner for mapping protein and
2683 translated DNA query sequences against a protein reference database (BLASTP
2684 and BLASTX alignment mode). The speedup over BLAST is up to 20,000 on short
2685 reads at a typical sensitivity of 90-99% relative to BLAST depending on the
2686 data and settings.")
2687 (license license:agpl3+)))
2688
2689 (define-public discrover
2690 (package
2691 (name "discrover")
2692 (version "1.6.0")
2693 (source
2694 (origin
2695 (method git-fetch)
2696 (uri (git-reference
2697 (url "https://github.com/maaskola/discrover")
2698 (commit version)))
2699 (file-name (git-file-name name version))
2700 (sha256
2701 (base32
2702 "173fwi2vb6a5kp406hm3jj6j7v4whww796f2qcygp4rpvamh307y"))))
2703 (build-system cmake-build-system)
2704 (arguments
2705 `(#:tests? #f ; there are no tests
2706 #:phases
2707 (modify-phases %standard-phases
2708 (add-after 'unpack 'fix-latex-errors
2709 (lambda _
2710 (with-fluids ((%default-port-encoding #f))
2711 (substitute* "doc/references.bib"
2712 (("\\{S\\}illanp[^,]+,")
2713 "{S}illanp{\\\"a}{\\\"a},")))
2714 ;; XXX: I just can't get pdflatex to not complain about these
2715 ;; characters. They end up in the manual via the generated
2716 ;; discrover-cli-help.txt.
2717 (substitute* "src/hmm/cli.cpp"
2718 (("µ") "mu")
2719 (("η") "eta")
2720 (("≤") "<="))
2721 ;; This seems to be a syntax error.
2722 (substitute* "doc/discrover-manual.tex"
2723 (("theverbbox\\[t\\]") "theverbbox"))
2724 #t))
2725 (add-after 'unpack 'add-missing-includes
2726 (lambda _
2727 (substitute* "src/executioninformation.hpp"
2728 (("#define EXECUTIONINFORMATION_HPP" line)
2729 (string-append line "\n#include <random>")))
2730 (substitute* "src/plasma/fasta.hpp"
2731 (("#define FASTA_HPP" line)
2732 (string-append line "\n#include <random>")))
2733 #t))
2734 ;; FIXME: this is needed because we're using texlive-union, which
2735 ;; doesn't handle fonts correctly. It expects to be able to generate
2736 ;; fonts in the home directory.
2737 (add-before 'build 'setenv-HOME
2738 (lambda _ (setenv "HOME" "/tmp") #t)))))
2739 (inputs
2740 `(("boost" ,boost)
2741 ("cairo" ,cairo)
2742 ("rmath-standalone" ,rmath-standalone)))
2743 (native-inputs
2744 `(("texlive" ,(texlive-union (list texlive-fonts-cm
2745 texlive-fonts-amsfonts
2746
2747 texlive-latex-doi
2748 texlive-latex-examplep
2749 texlive-latex-hyperref
2750 texlive-latex-ms
2751 texlive-latex-natbib
2752 texlive-bibtex ; style files used by natbib
2753 texlive-latex-pgf ; tikz
2754 texlive-latex-verbatimbox)))
2755 ("imagemagick" ,imagemagick)))
2756 (home-page "https://dorina.mdc-berlin.de/public/rajewsky/discrover/")
2757 (synopsis "Discover discriminative nucleotide sequence motifs")
2758 (description "Discrover is a motif discovery method to find binding sites
2759 of nucleic acid binding proteins.")
2760 (license license:gpl3+)))
2761
2762 (define-public eigensoft
2763 (package
2764 (name "eigensoft")
2765 (version "7.2.1")
2766 (source
2767 (origin
2768 (method git-fetch)
2769 (uri (git-reference
2770 (url "https://github.com/DReichLab/EIG")
2771 (commit (string-append "v" version))))
2772 (file-name (git-file-name name version))
2773 (sha256
2774 (base32
2775 "1c141fqvhnzibmnf22sv23vbmzm20kjjyrib44cfh75wyndp2d9k"))
2776 (modules '((guix build utils)))
2777 ;; Remove pre-built binaries.
2778 (snippet '(begin
2779 (delete-file-recursively "bin")
2780 (mkdir "bin")
2781 #t))))
2782 (build-system gnu-build-system)
2783 (arguments
2784 `(#:tests? #f ; There are no tests.
2785 #:make-flags '("CC=gcc")
2786 #:phases
2787 (modify-phases %standard-phases
2788 ;; There is no configure phase, but the Makefile is in a
2789 ;; sub-directory.
2790 (replace 'configure
2791 (lambda _ (chdir "src") #t))
2792 ;; The provided install target only copies executables to
2793 ;; the "bin" directory in the build root.
2794 (add-after 'install 'actually-install
2795 (lambda* (#:key outputs #:allow-other-keys)
2796 (let* ((out (assoc-ref outputs "out"))
2797 (bin (string-append out "/bin")))
2798 (for-each (lambda (file)
2799 (install-file file bin))
2800 (find-files "../bin" ".*"))
2801 #t))))))
2802 (inputs
2803 `(("gsl" ,gsl)
2804 ("lapack" ,lapack)
2805 ("openblas" ,openblas)
2806 ("perl" ,perl)
2807 ("gfortran" ,gfortran "lib")))
2808 (home-page "https://github.com/DReichLab/EIG")
2809 (synopsis "Tools for population genetics")
2810 (description "The EIGENSOFT package provides tools for population
2811 genetics and stratification correction. EIGENSOFT implements methods commonly
2812 used in population genetics analyses such as PCA, computation of Tracy-Widom
2813 statistics, and finding related individuals in structured populations. It
2814 comes with a built-in plotting script and supports multiple file formats and
2815 quantitative phenotypes.")
2816 ;; The license of the eigensoft tools is Expat, but since it's
2817 ;; linking with the GNU Scientific Library (GSL) the effective
2818 ;; license is the GPL.
2819 (license license:gpl3+)))
2820
2821 (define-public edirect
2822 (package
2823 (name "edirect")
2824 (version "13.3.20200128")
2825 (source (origin
2826 (method url-fetch)
2827 (uri (string-append "ftp://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect"
2828 "/versions/" version
2829 "/edirect-" version ".tar.gz"))
2830 (sha256
2831 (base32
2832 "093zp7klv81ph0y8mm8d78a9hnpfxbv2kdym70gzdf3vz176rw33"))
2833 (modules '((guix build utils)))
2834 (snippet
2835 '(begin (delete-file "Mozilla-CA.tar.gz")
2836 (substitute* "rchive.go"
2837 ;; This go library does not have any license.
2838 (("github.com/fiam/gounidecode/unidecode")
2839 "golang.org/rainycape/unidecode"))
2840 #t))))
2841 (build-system perl-build-system)
2842 (arguments
2843 `(#:phases
2844 (modify-phases %standard-phases
2845 (delete 'configure)
2846 (delete 'build)
2847 (delete 'check) ; simple check after install
2848 (add-after 'unpack 'patch-programs
2849 (lambda* (#:key inputs #:allow-other-keys)
2850 ;; Ignore errors about missing xtract.Linux and rchive.Linux.
2851 (substitute* "pm-refresh"
2852 (("cat \\\"\\$target")
2853 "grep ^[[:digit:]] \"$target"))
2854 #t))
2855 (replace 'install
2856 (lambda* (#:key inputs outputs #:allow-other-keys)
2857 (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
2858 (edirect-go (assoc-ref inputs "edirect-go-programs")))
2859 (for-each
2860 (lambda (file)
2861 (install-file file bin))
2862 '("archive-pubmed" "asp-cp" "asp-ls" "download-ncbi-data"
2863 "download-pubmed" "edirect.pl" "efetch" "epost" "esearch"
2864 "fetch-pubmed" "ftp-cp" "ftp-ls" "has-asp" "index-pubmed"
2865 "pm-prepare" "pm-refresh" "pm-stash" "pm-collect"
2866 "pm-index" "pm-invert" "pm-merge" "pm-promote"))
2867 (symlink (string-append edirect-go "/bin/xtract.Linux")
2868 (string-append bin "/xtract"))
2869 (symlink (string-append edirect-go "/bin/rchive.Linux")
2870 (string-append bin "/rchive")))
2871 #t))
2872 (add-after 'install 'wrap-program
2873 (lambda* (#:key outputs #:allow-other-keys)
2874 ;; Make sure everything can run in a pure environment.
2875 (let ((out (assoc-ref outputs "out"))
2876 (path (getenv "PERL5LIB")))
2877 (for-each
2878 (lambda (file)
2879 (wrap-program file
2880 `("PERL5LIB" ":" prefix (,path)))
2881 (wrap-program file
2882 `("PATH" ":" prefix (,(string-append out "/bin")
2883 ,(dirname (which "sed"))
2884 ,(dirname (which "gzip"))
2885 ,(dirname (which "grep"))
2886 ,(dirname (which "perl"))
2887 ,(dirname (which "uname"))))))
2888 (find-files out ".")))
2889 #t))
2890 (add-after 'wrap-program 'check
2891 (lambda* (#:key outputs #:allow-other-keys)
2892 (invoke (string-append (assoc-ref outputs "out")
2893 "/bin/edirect.pl")
2894 "-filter" "-help")
2895 #t)))))
2896 (inputs
2897 `(("edirect-go-programs" ,edirect-go-programs)
2898 ("perl-html-parser" ,perl-html-parser)
2899 ("perl-encode-locale" ,perl-encode-locale)
2900 ("perl-file-listing" ,perl-file-listing)
2901 ("perl-html-tagset" ,perl-html-tagset)
2902 ("perl-html-tree" ,perl-html-tree)
2903 ("perl-http-cookies" ,perl-http-cookies)
2904 ("perl-http-date" ,perl-http-date)
2905 ("perl-http-message" ,perl-http-message)
2906 ("perl-http-negotiate" ,perl-http-negotiate)
2907 ("perl-lwp-mediatypes" ,perl-lwp-mediatypes)
2908 ("perl-lwp-protocol-https" ,perl-lwp-protocol-https)
2909 ("perl-net-http" ,perl-net-http)
2910 ("perl-uri" ,perl-uri)
2911 ("perl-www-robotrules" ,perl-www-robotrules)
2912 ("perl-xml-simple" ,perl-xml-simple)
2913 ("perl" ,perl)))
2914 (home-page "https://www.ncbi.nlm.nih.gov/books/NBK179288/")
2915 (synopsis "Tools for accessing the NCBI's set of databases")
2916 (description
2917 "Entrez Direct (EDirect) is a method for accessing the National Center
2918 for Biotechnology Information's (NCBI) set of interconnected
2919 databases (publication, sequence, structure, gene, variation, expression,
2920 etc.) from a terminal. Functions take search terms from command-line
2921 arguments. Individual operations are combined to build multi-step queries.
2922 Record retrieval and formatting normally complete the process.
2923
2924 EDirect also provides an argument-driven function that simplifies the
2925 extraction of data from document summaries or other results that are returned
2926 in structured XML format. This can eliminate the need for writing custom
2927 software to answer ad hoc questions.")
2928 (native-search-paths
2929 ;; Ideally this should be set for LWP somewhere.
2930 (list (search-path-specification
2931 (variable "PERL_LWP_SSL_CA_FILE")
2932 (file-type 'regular)
2933 (separator #f)
2934 (files '("/etc/ssl/certs/ca-certificates.crt")))))
2935 (license license:public-domain)))
2936
2937 (define-public edirect-go-programs
2938 (package
2939 (inherit edirect)
2940 (name "edirect-go-programs")
2941 (build-system go-build-system)
2942 (arguments
2943 `(#:install-source? #f
2944 #:tests? #f ; No tests.
2945 #:import-path "ncbi.nlm.nih.gov/entrez/edirect"
2946 #:phases
2947 (modify-phases %standard-phases
2948 (replace 'build
2949 (lambda* (#:key import-path #:allow-other-keys)
2950 (with-directory-excursion (string-append "src/" import-path)
2951 (invoke "go" "build" "-v" "-x" "j2x.go")
2952 (invoke "go" "build" "-v" "-x" "t2x.go")
2953 (invoke "go" "build" "-v" "-x" "-o"
2954 "xtract.Linux" "xtract.go" "common.go")
2955 (invoke "go" "build" "-v" "-x" "-o"
2956 "rchive.Linux" "rchive.go" "common.go")
2957 (invoke "go" "build" "-v" "-x" "-o" "symbols.Linux" "s2p.go"))))
2958 (replace 'install
2959 (lambda* (#:key outputs import-path #:allow-other-keys)
2960 (let ((dest (string-append (assoc-ref outputs "out") "/bin"))
2961 (source (string-append "src/" import-path "/")))
2962 (for-each (lambda (file)
2963 (format #t "installing ~a~%" file)
2964 (install-file (string-append source file) dest))
2965 '("j2x" "t2x" "symbols.Linux" "xtract.Linux" "rchive.Linux"))
2966 #t))))))
2967 (native-inputs '())
2968 (propagated-inputs '())
2969 (inputs
2970 `(("go-github-com-fatih-color" ,go-github-com-fatih-color)
2971 ("go-github-com-fogleman-gg" ,go-github-com-fogleman-gg)
2972 ("go-github-com-gedex-inflector" ,go-github-com-gedex-inflector)
2973 ("go-github-com-golang-freetype" ,go-github-com-golang-freetype)
2974 ("go-github-com-klauspost-cpuid" ,go-github-com-klauspost-cpuid)
2975 ("go-github-com-pbnjay-memory" ,go-github-com-pbnjay-memory)
2976 ("go-github-com-surgebase-porter2" ,go-github-com-surgebase-porter2)
2977 ("go-golang-org-rainycape-unidecode" ,go-golang-org-rainycape-unidecode)
2978 ("go-golang-org-x-image" ,go-golang-org-x-image)
2979 ("go-golang-org-x-text" ,go-golang-org-x-text)))))
2980
2981 (define-public exonerate
2982 (package
2983 (name "exonerate")
2984 (version "2.4.0")
2985 (source
2986 (origin
2987 (method url-fetch)
2988 (uri
2989 (string-append
2990 "http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/"
2991 "exonerate-" version ".tar.gz"))
2992 (sha256
2993 (base32
2994 "0hj0m9xygiqsdxvbg79wq579kbrx1mdrabi2bzqz2zn9qwfjcjgq"))))
2995 (build-system gnu-build-system)
2996 (arguments
2997 `(#:parallel-build? #f)) ; Building in parallel fails on some machines.
2998 (native-inputs
2999 `(("pkg-config" ,pkg-config)))
3000 (inputs
3001 `(("glib" ,glib)))
3002 (home-page
3003 "https://www.ebi.ac.uk/about/vertebrate-genomics/software/exonerate")
3004 (synopsis "Generic tool for biological sequence alignment")
3005 (description
3006 "Exonerate is a generic tool for pairwise sequence comparison. It allows
3007 the alignment of sequences using a many alignment models, either exhaustive
3008 dynamic programming or a variety of heuristics.")
3009 (license license:gpl3)))
3010
3011 (define-public express
3012 (package
3013 (name "express")
3014 (version "1.5.3")
3015 (source (origin
3016 (method git-fetch)
3017 (uri (git-reference
3018 (url "https://github.com/adarob/eXpress")
3019 (commit version)))
3020 (file-name (git-file-name name version))
3021 (sha256
3022 (base32
3023 "18nb22n7x820fzjngf4qgyb3mspqkw7xyk7v7s5ps6wfrd8qwscb"))))
3024 (build-system cmake-build-system)
3025 (arguments
3026 `(#:tests? #f ;no "check" target
3027 #:phases
3028 (modify-phases %standard-phases
3029 (add-after 'unpack 'use-shared-boost-libs-and-set-bamtools-paths
3030 (lambda* (#:key inputs #:allow-other-keys)
3031 (substitute* "CMakeLists.txt"
3032 (("set\\(Boost_USE_STATIC_LIBS ON\\)")
3033 "set(Boost_USE_STATIC_LIBS OFF)")
3034 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/bamtools/include")
3035 (string-append (assoc-ref inputs "bamtools") "/include/bamtools")))
3036 (substitute* "src/CMakeLists.txt"
3037 (("\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./bamtools/lib")
3038 (string-append (assoc-ref inputs "bamtools") "/lib"))
3039 (("libprotobuf.a") "libprotobuf.so"))
3040 #t))
3041 (add-after 'unpack 'remove-update-check
3042 (lambda _
3043 (substitute* "src/main.cpp"
3044 (("#include \"update_check.h\"") "")
3045 (("check_version\\(PACKAGE_VERSION\\);") ""))
3046 #t)))))
3047 (inputs
3048 `(("boost" ,boost)
3049 ("bamtools" ,bamtools)
3050 ("protobuf" ,protobuf)
3051 ("zlib" ,zlib)))
3052 (home-page "http://bio.math.berkeley.edu/eXpress")
3053 (synopsis "Streaming quantification for high-throughput genomic sequencing")
3054 (description
3055 "eXpress is a streaming tool for quantifying the abundances of a set of
3056 target sequences from sampled subsequences. Example applications include
3057 transcript-level RNA-Seq quantification, allele-specific/haplotype expression
3058 analysis (from RNA-Seq), transcription factor binding quantification in
3059 ChIP-Seq, and analysis of metagenomic data.")
3060 (license license:artistic2.0)))
3061
3062 (define-public express-beta-diversity
3063 (package
3064 (name "express-beta-diversity")
3065 (version "1.0.8")
3066 (source (origin
3067 (method git-fetch)
3068 (uri (git-reference
3069 (url "https://github.com/dparks1134/ExpressBetaDiversity")
3070 (commit (string-append "v" version))))
3071 (file-name (git-file-name name version))
3072 (sha256
3073 (base32
3074 "0s0yzg5c21349rh7x4w9266jsvnp7j1hp9cf8sk32hz8nvrj745x"))))
3075 (build-system gnu-build-system)
3076 (arguments
3077 `(#:phases
3078 (modify-phases %standard-phases
3079 (delete 'configure)
3080 (add-before 'build 'enter-source (lambda _ (chdir "source") #t))
3081 (replace 'check
3082 (lambda _ (invoke "../bin/ExpressBetaDiversity" "-u") #t))
3083 (replace 'install
3084 (lambda* (#:key outputs #:allow-other-keys)
3085 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3086 (install-file "../scripts/convertToEBD.py" bin)
3087 (install-file "../bin/ExpressBetaDiversity" bin)
3088 #t))))))
3089 (inputs
3090 `(("python" ,python-2)))
3091 (home-page "https://github.com/dparks1134/ExpressBetaDiversity")
3092 (synopsis "Taxon- and phylogenetic-based beta diversity measures")
3093 (description
3094 "Express Beta Diversity (EBD) calculates ecological beta diversity
3095 (dissimilarity) measures between biological communities. EBD implements a
3096 variety of diversity measures including those that make use of phylogenetic
3097 similarity of community members.")
3098 (license license:gpl3+)))
3099
3100 (define-public fasttree
3101 (package
3102 (name "fasttree")
3103 (version "2.1.10")
3104 (source (origin
3105 (method url-fetch)
3106 (uri (string-append
3107 "http://www.microbesonline.org/fasttree/FastTree-"
3108 version ".c"))
3109 (sha256
3110 (base32
3111 "0vcjdvy1j4m702vmak4svbfkrpcw63k7wymfksjp9a982zy8kjsl"))))
3112 (build-system gnu-build-system)
3113 (arguments
3114 `(#:tests? #f ; no "check" target
3115 #:phases
3116 (modify-phases %standard-phases
3117 (delete 'unpack)
3118 (delete 'configure)
3119 (replace 'build
3120 (lambda* (#:key source #:allow-other-keys)
3121 (invoke "gcc"
3122 "-O3"
3123 "-finline-functions"
3124 "-funroll-loops"
3125 "-Wall"
3126 "-o"
3127 "FastTree"
3128 source
3129 "-lm")
3130 (invoke "gcc"
3131 "-DOPENMP"
3132 "-fopenmp"
3133 "-O3"
3134 "-finline-functions"
3135 "-funroll-loops"
3136 "-Wall"
3137 "-o"
3138 "FastTreeMP"
3139 source
3140 "-lm")
3141 #t))
3142 (replace 'install
3143 (lambda* (#:key outputs #:allow-other-keys)
3144 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
3145 (install-file "FastTree" bin)
3146 (install-file "FastTreeMP" bin)
3147 #t))))))
3148 (home-page "http://www.microbesonline.org/fasttree")
3149 (synopsis "Infers approximately-maximum-likelihood phylogenetic trees")
3150 (description
3151 "FastTree can handle alignments with up to a million of sequences in a
3152 reasonable amount of time and memory. For large alignments, FastTree is
3153 100-1,000 times faster than PhyML 3.0 or RAxML 7.")
3154 (license license:gpl2+)))
3155
3156 (define-public fastx-toolkit
3157 (package
3158 (name "fastx-toolkit")
3159 (version "0.0.14")
3160 (source (origin
3161 (method url-fetch)
3162 (uri
3163 (string-append
3164 "https://github.com/agordon/fastx_toolkit/releases/download/"
3165 version "/fastx_toolkit-" version ".tar.bz2"))
3166 (sha256
3167 (base32
3168 "01jqzw386873sr0pjp1wr4rn8fsga2vxs1qfmicvx1pjr72007wy"))))
3169 (build-system gnu-build-system)
3170 (inputs
3171 `(("libgtextutils" ,libgtextutils)))
3172 (native-inputs
3173 `(("gcc" ,gcc-6) ;; doesn't build with later versions
3174 ("pkg-config" ,pkg-config)))
3175 (home-page "http://hannonlab.cshl.edu/fastx_toolkit/")
3176 (synopsis "Tools for FASTA/FASTQ file preprocessing")
3177 (description
3178 "The FASTX-Toolkit is a collection of command line tools for Short-Reads
3179 FASTA/FASTQ files preprocessing.
3180
3181 Next-Generation sequencing machines usually produce FASTA or FASTQ files,
3182 containing multiple short-reads sequences. The main processing of such
3183 FASTA/FASTQ files is mapping the sequences to reference genomes. However, it
3184 is sometimes more productive to preprocess the files before mapping the
3185 sequences to the genome---manipulating the sequences to produce better mapping
3186 results. The FASTX-Toolkit tools perform some of these preprocessing tasks.")
3187 (license license:agpl3+)))
3188
3189 (define-public flexbar
3190 (package
3191 (name "flexbar")
3192 (version "3.4.0")
3193 (source (origin
3194 (method git-fetch)
3195 (uri (git-reference
3196 (url "https://github.com/seqan/flexbar")
3197 (commit (string-append "v" version))))
3198 (file-name (git-file-name name version))
3199 (sha256
3200 (base32
3201 "1pq9sxvdnldl14libk234m72dqhwgzs3acgl943wchwdqlcsi5r2"))))
3202 (build-system cmake-build-system)
3203 (arguments
3204 `(#:phases
3205 (modify-phases %standard-phases
3206 (add-after 'unpack 'do-not-tune-to-CPU
3207 (lambda _
3208 (substitute* "src/CMakeLists.txt"
3209 ((" -march=native") ""))
3210 #t))
3211 (replace 'check
3212 (lambda* (#:key outputs #:allow-other-keys)
3213 (setenv "PATH" (string-append (getcwd) ":" (getenv "PATH")))
3214 (with-directory-excursion "../source/test"
3215 (invoke "bash" "flexbar_test.sh"))
3216 #t))
3217 (replace 'install
3218 (lambda* (#:key outputs #:allow-other-keys)
3219 (let* ((out (string-append (assoc-ref outputs "out")))
3220 (bin (string-append out "/bin/")))
3221 (install-file "flexbar" bin))
3222 #t)))))
3223 (inputs
3224 `(("tbb" ,tbb)
3225 ("zlib" ,zlib)))
3226 (native-inputs
3227 `(("pkg-config" ,pkg-config)
3228 ("seqan" ,seqan)))
3229 (home-page "https://github.com/seqan/flexbar")
3230 (synopsis "Barcode and adapter removal tool for sequencing platforms")
3231 (description
3232 "Flexbar preprocesses high-throughput nucleotide sequencing data
3233 efficiently. It demultiplexes barcoded runs and removes adapter sequences.
3234 Moreover, trimming and filtering features are provided. Flexbar increases
3235 read mapping rates and improves genome and transcriptome assemblies. It
3236 supports next-generation sequencing data in fasta/q and csfasta/q format from
3237 Illumina, Roche 454, and the SOLiD platform.")
3238 (license license:bsd-3)))
3239
3240 (define-public fraggenescan
3241 (package
3242 (name "fraggenescan")
3243 (version "1.30")
3244 (source
3245 (origin
3246 (method url-fetch)
3247 (uri
3248 (string-append "mirror://sourceforge/fraggenescan/"
3249 "FragGeneScan" version ".tar.gz"))
3250 (sha256
3251 (base32 "158dcnwczgcyhwm4qlx19sanrwgdpzf6bn2y57mbpx55lkgz1mzj"))))
3252 (build-system gnu-build-system)
3253 (arguments
3254 `(#:phases
3255 (modify-phases %standard-phases
3256 (delete 'configure)
3257 (add-before 'build 'patch-paths
3258 (lambda* (#:key outputs #:allow-other-keys)
3259 (let* ((out (string-append (assoc-ref outputs "out")))
3260 (share (string-append out "/share/fraggenescan/")))
3261 (substitute* "run_FragGeneScan.pl"
3262 (("system\\(\"rm")
3263 (string-append "system(\"" (which "rm")))
3264 (("system\\(\"mv")
3265 (string-append "system(\"" (which "mv")))
3266 (("\\\"awk") (string-append "\"" (which "awk")))
3267 ;; This script and other programs expect the training files
3268 ;; to be in the non-standard location bin/train/XXX. Change
3269 ;; this to be share/fraggenescan/train/XXX instead.
3270 (("^\\$train.file = \\$dir.*")
3271 (string-append "$train_file = \""
3272 share
3273 "train/\".$FGS_train_file;")))
3274 (substitute* "run_hmm.c"
3275 (("^ strcat\\(train_dir, \\\"train/\\\"\\);")
3276 (string-append " strcpy(train_dir, \"" share "/train/\");"))))
3277 #t))
3278 (replace 'build
3279 (lambda _
3280 (invoke "make" "clean")
3281 (invoke "make" "fgs")
3282 #t))
3283 (replace 'install
3284 (lambda* (#:key outputs #:allow-other-keys)
3285 (let* ((out (string-append (assoc-ref outputs "out")))
3286 (bin (string-append out "/bin/"))
3287 (share (string-append out "/share/fraggenescan/train")))
3288 (install-file "run_FragGeneScan.pl" bin)
3289 (install-file "FragGeneScan" bin)
3290 (copy-recursively "train" share))
3291 #t))
3292 (delete 'check)
3293 (add-after 'install 'post-install-check
3294 ;; In lieu of 'make check', run one of the examples and check the
3295 ;; output files gets created.
3296 (lambda* (#:key outputs #:allow-other-keys)
3297 (let* ((out (string-append (assoc-ref outputs "out")))
3298 (bin (string-append out "/bin/"))
3299 (frag (string-append bin "run_FragGeneScan.pl")))
3300 ;; Test complete genome.
3301 (invoke frag
3302 "-genome=./example/NC_000913.fna"
3303 "-out=./test2"
3304 "-complete=1"
3305 "-train=complete")
3306 (unless (and (file-exists? "test2.faa")
3307 (file-exists? "test2.ffn")
3308 (file-exists? "test2.gff")
3309 (file-exists? "test2.out"))
3310 (error "Expected files do not exist."))
3311 ;; Test incomplete sequences.
3312 (invoke frag
3313 "-genome=./example/NC_000913-fgs.ffn"
3314 "-out=out"
3315 "-complete=0"
3316 "-train=454_30")
3317 #t))))))
3318 (inputs
3319 `(("perl" ,perl)
3320 ("python" ,python-2))) ;not compatible with python 3.
3321 (home-page "https://sourceforge.net/projects/fraggenescan/")
3322 (synopsis "Finds potentially fragmented genes in short reads")
3323 (description
3324 "FragGeneScan is a program for predicting bacterial and archaeal genes in
3325 short and error-prone DNA sequencing reads. It can also be applied to predict
3326 genes in incomplete assemblies or complete genomes.")
3327 ;; GPL3+ according to private correspondense with the authors.
3328 (license license:gpl3+)))
3329
3330 (define-public fxtract
3331 (let ((util-commit "776ca85a18a47492af3794745efcb4a905113115"))
3332 (package
3333 (name "fxtract")
3334 (version "2.3")
3335 (source
3336 (origin
3337 (method git-fetch)
3338 (uri (git-reference
3339 (url "https://github.com/ctSkennerton/fxtract")
3340 (commit version)))
3341 (file-name (git-file-name name version))
3342 (sha256
3343 (base32
3344 "0hab3gpwf4w9s87qlbswq6ws1qqybh4dcqk79q1ahyldzai5fgp5"))))
3345 (build-system gnu-build-system)
3346 (arguments
3347 `(#:make-flags (list
3348 (string-append "PREFIX=" (assoc-ref %outputs "out"))
3349 "CC=gcc")
3350 #:test-target "fxtract_test"
3351 #:phases
3352 (modify-phases %standard-phases
3353 (delete 'configure)
3354 (add-before 'build 'copy-util
3355 (lambda* (#:key inputs #:allow-other-keys)
3356 (rmdir "util")
3357 (copy-recursively (assoc-ref inputs "ctskennerton-util") "util")
3358 #t))
3359 ;; Do not use make install as this requires additional dependencies.
3360 (replace 'install
3361 (lambda* (#:key outputs #:allow-other-keys)
3362 (let* ((out (assoc-ref outputs "out"))
3363 (bin (string-append out"/bin")))
3364 (install-file "fxtract" bin)
3365 #t))))))
3366 (inputs
3367 `(("pcre" ,pcre)
3368 ("zlib" ,zlib)))
3369 (native-inputs
3370 ;; ctskennerton-util is licensed under GPL2.
3371 `(("ctskennerton-util"
3372 ,(origin
3373 (method git-fetch)
3374 (uri (git-reference
3375 (url "https://github.com/ctSkennerton/util")
3376 (commit util-commit)))
3377 (file-name (string-append
3378 "ctstennerton-util-" util-commit "-checkout"))
3379 (sha256
3380 (base32
3381 "0cls1hd4vgj3f36fpzzg4xc77d6f3hpc60cbpfmn2gdr7ykzzad7"))))))
3382 (home-page "https://github.com/ctSkennerton/fxtract")
3383 (synopsis "Extract sequences from FASTA and FASTQ files")
3384 (description
3385 "Fxtract extracts sequences from a protein or nucleotide fastx (FASTA
3386 or FASTQ) file given a subsequence. It uses a simple substring search for
3387 basic tasks but can change to using POSIX regular expressions, PCRE, hash
3388 lookups or multi-pattern searching as required. By default fxtract looks in
3389 the sequence of each record but can also be told to look in the header,
3390 comment or quality sections.")
3391 ;; 'util' requires SSE instructions.
3392 (supported-systems '("x86_64-linux"))
3393 (license license:expat))))
3394
3395 (define-public gemma
3396 (package
3397 (name "gemma")
3398 (version "0.98")
3399 (source (origin
3400 (method git-fetch)
3401 (uri (git-reference
3402 (url "https://github.com/xiangzhou/GEMMA")
3403 (commit (string-append "v" version))))
3404 (file-name (git-file-name name version))
3405 (sha256
3406 (base32
3407 "1s3ncnbn45r2hh1cvrqky1kbqq6546biypr4f5mkw1kqlrgyh0yg"))))
3408 (inputs
3409 `(("eigen" ,eigen)
3410 ("gfortran" ,gfortran "lib")
3411 ("gsl" ,gsl)
3412 ("lapack" ,lapack)
3413 ("openblas" ,openblas)
3414 ("zlib" ,zlib)))
3415 (build-system gnu-build-system)
3416 (arguments
3417 `(#:make-flags
3418 '(,@(match (%current-system)
3419 ("x86_64-linux"
3420 '("FORCE_DYNAMIC=1"))
3421 ("i686-linux"
3422 '("FORCE_DYNAMIC=1" "FORCE_32BIT=1"))
3423 (_
3424 '("FORCE_DYNAMIC=1" "NO_INTEL_COMPAT=1"))))
3425 #:phases
3426 (modify-phases %standard-phases
3427 (delete 'configure)
3428 (add-after 'unpack 'find-eigen
3429 (lambda* (#:key inputs #:allow-other-keys)
3430 ;; Ensure that Eigen headers can be found
3431 (setenv "CPLUS_INCLUDE_PATH"
3432 (string-append (assoc-ref inputs "eigen")
3433 "/include/eigen3"))
3434 #t))
3435 (add-before 'build 'bin-mkdir
3436 (lambda _
3437 (mkdir-p "bin")
3438 #t))
3439 (replace 'install
3440 (lambda* (#:key outputs #:allow-other-keys)
3441 (let ((out (assoc-ref outputs "out")))
3442 (install-file "bin/gemma"
3443 (string-append
3444 out "/bin")))
3445 #t)))
3446 #:tests? #f)) ; no tests included yet
3447 (home-page "https://github.com/xiangzhou/GEMMA")
3448 (synopsis "Tool for genome-wide efficient mixed model association")
3449 (description
3450 "Genome-wide Efficient Mixed Model Association (GEMMA) provides a
3451 standard linear mixed model resolver with application in genome-wide
3452 association studies (GWAS).")
3453 (license license:gpl3)))
3454
3455 (define-public grit
3456 (package
3457 (name "grit")
3458 (version "2.0.5")
3459 (source (origin
3460 (method git-fetch)
3461 (uri (git-reference
3462 (url "https://github.com/nboley/grit")
3463 (commit version)))
3464 (file-name (git-file-name name version))
3465 (sha256
3466 (base32
3467 "1l5v8vfvfbrpmgnrvbrbv40d0arhxcnmxgv2f1mlcqfa3q6bkqm9"))))
3468 (build-system python-build-system)
3469 (arguments
3470 `(#:python ,python-2
3471 #:phases
3472 (modify-phases %standard-phases
3473 (add-after 'unpack 'generate-from-cython-sources
3474 (lambda* (#:key inputs outputs #:allow-other-keys)
3475 ;; Delete these C files to force fresh generation from pyx sources.
3476 (delete-file "grit/sparsify_support_fns.c")
3477 (delete-file "grit/call_peaks_support_fns.c")
3478 (substitute* "setup.py"
3479 (("Cython.Setup") "Cython.Build"))
3480 #t)))))
3481 (inputs
3482 `(("python-scipy" ,python2-scipy)
3483 ("python-numpy" ,python2-numpy)
3484 ("python-pysam" ,python2-pysam)
3485 ("python-networkx" ,python2-networkx)))
3486 (native-inputs
3487 `(("python-cython" ,python2-cython)))
3488 ;; The canonical <http://grit-bio.org> home page times out as of 2020-01-21.
3489 (home-page "https://github.com/nboley/grit")
3490 (synopsis "Tool for integrative analysis of RNA-seq type assays")
3491 (description
3492 "GRIT is designed to use RNA-seq, TES, and TSS data to build and quantify
3493 full length transcript models. When none of these data sources are available,
3494 GRIT can be run by providing a candidate set of TES or TSS sites. In
3495 addition, GRIT can merge in reference junctions and gene boundaries. GRIT can
3496 also be run in quantification mode, where it uses a provided GTF file and just
3497 estimates transcript expression.")
3498 (license license:gpl3+)))
3499
3500 (define-public hisat
3501 (package
3502 (name "hisat")
3503 (version "0.1.4")
3504 (source (origin
3505 (method url-fetch)
3506 (uri (string-append
3507 "http://ccb.jhu.edu/software/hisat/downloads/hisat-"
3508 version "-beta-source.zip"))
3509 (sha256
3510 (base32
3511 "1k381ydranqxp09yf2y7w1d0chz5d59vb6jchi89hbb0prq19lk5"))))
3512 (build-system gnu-build-system)
3513 (arguments
3514 `(#:tests? #f ;no check target
3515 #:make-flags '("allall"
3516 ;; Disable unsupported `popcnt' instructions on
3517 ;; architectures other than x86_64
3518 ,@(if (string-prefix? "x86_64"
3519 (or (%current-target-system)
3520 (%current-system)))
3521 '()
3522 '("POPCNT_CAPABILITY=0")))
3523 #:phases
3524 (modify-phases %standard-phases
3525 (add-after 'unpack 'patch-sources
3526 (lambda _
3527 ;; XXX Cannot use snippet because zip files are not supported
3528 (substitute* "Makefile"
3529 (("^CC = .*$") "CC = gcc")
3530 (("^CPP = .*$") "CPP = g++")
3531 ;; replace BUILD_HOST and BUILD_TIME for deterministic build
3532 (("-DBUILD_HOST=.*") "-DBUILD_HOST=\"\\\"guix\\\"\"")
3533 (("-DBUILD_TIME=.*") "-DBUILD_TIME=\"\\\"0\\\"\""))
3534 (substitute* '("hisat-build" "hisat-inspect")
3535 (("/usr/bin/env") (which "env")))
3536 #t))
3537 (replace 'install
3538 (lambda* (#:key outputs #:allow-other-keys)
3539 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
3540 (for-each (lambda (file)
3541 (install-file file bin))
3542 (find-files
3543 "."
3544 "hisat(-(build|align|inspect)(-(s|l)(-debug)*)*)*$")))
3545 #t))
3546 (delete 'configure))))
3547 (native-inputs
3548 `(("unzip" ,unzip)))
3549 (inputs
3550 `(("perl" ,perl)
3551 ("python" ,python)
3552 ("zlib" ,zlib)))
3553 ;; Non-portable SSE instructions are used so building fails on platforms
3554 ;; other than x86_64.
3555 (supported-systems '("x86_64-linux"))
3556 (home-page "https://ccb.jhu.edu/software/hisat/index.shtml")
3557 (synopsis "Hierarchical indexing for spliced alignment of transcripts")
3558 (description
3559 "HISAT is a fast and sensitive spliced alignment program for mapping
3560 RNA-seq reads. In addition to one global FM index that represents a whole
3561 genome, HISAT uses a large set of small FM indexes that collectively cover the
3562 whole genome. These small indexes (called local indexes) combined with
3563 several alignment strategies enable effective alignment of RNA-seq reads, in
3564 particular, reads spanning multiple exons.")
3565 (license license:gpl3+)))
3566
3567 (define-public hisat2
3568 (package
3569 (name "hisat2")
3570 (version "2.0.5")
3571 (source
3572 (origin
3573 (method url-fetch)
3574 (uri (string-append "ftp://ftp.ccb.jhu.edu/pub/infphilo/hisat2"
3575 "/downloads/hisat2-" version "-source.zip"))
3576 (sha256
3577 (base32
3578 "0lywnr8kijwsc2aw10dwxic0n0yvip6fl3rjlvc8zzwahamy4x7g"))))
3579 (build-system gnu-build-system)
3580 (arguments
3581 `(#:tests? #f ; no check target
3582 #:make-flags (list "CC=gcc" "CXX=g++" "allall")
3583 #:modules ((guix build gnu-build-system)
3584 (guix build utils)
3585 (srfi srfi-26))
3586 #:phases
3587 (modify-phases %standard-phases
3588 (add-after 'unpack 'make-deterministic
3589 (lambda _
3590 (substitute* "Makefile"
3591 (("`date`") "0"))
3592 #t))
3593 (delete 'configure)
3594 (replace 'install
3595 (lambda* (#:key outputs #:allow-other-keys)
3596 (let* ((out (assoc-ref outputs "out"))
3597 (bin (string-append out "/bin/"))
3598 (doc (string-append out "/share/doc/hisat2/")))
3599 (for-each
3600 (cut install-file <> bin)
3601 (find-files "."
3602 "hisat2(-(build|align|inspect)(-(s|l)(-debug)*)*)*$"))
3603 (mkdir-p doc)
3604 (install-file "doc/manual.inc.html" doc))
3605 #t)))))
3606 (native-inputs
3607 `(("unzip" ,unzip) ; needed for archive from ftp
3608 ("perl" ,perl)
3609 ("pandoc" ,ghc-pandoc))) ; for documentation
3610 (home-page "https://ccb.jhu.edu/software/hisat2/index.shtml")
3611 (synopsis "Graph-based alignment of genomic sequencing reads")
3612 (description "HISAT2 is a fast and sensitive alignment program for mapping
3613 next-generation sequencing reads (both DNA and RNA) to a population of human
3614 genomes (as well as to a single reference genome). In addition to using one
3615 global @dfn{graph FM} (GFM) index that represents a population of human
3616 genomes, HISAT2 uses a large set of small GFM indexes that collectively cover
3617 the whole genome. These small indexes, combined with several alignment
3618 strategies, enable rapid and accurate alignment of sequencing reads. This new
3619 indexing scheme is called a @dfn{Hierarchical Graph FM index} (HGFM).")
3620 ;; HISAT2 contains files from Bowtie2, which is released under
3621 ;; GPLv2 or later. The HISAT2 source files are released under
3622 ;; GPLv3 or later.
3623 (license license:gpl3+)))
3624
3625 (define-public hmmer
3626 (package
3627 (name "hmmer")
3628 (version "3.2.1")
3629 (source
3630 (origin
3631 (method url-fetch)
3632 (uri (string-append
3633 "http://eddylab.org/software/hmmer/hmmer-" version ".tar.gz"))
3634 (sha256
3635 (base32
3636 "171bivy6xhgjsz5nv53n81pc3frnwz29ylblawk2bv46szwjjqd5"))))
3637 (build-system gnu-build-system)
3638 (native-inputs `(("perl" ,perl)))
3639 (home-page "http://hmmer.org/")
3640 (synopsis "Biosequence analysis using profile hidden Markov models")
3641 (description
3642 "HMMER is used for searching sequence databases for homologs of protein
3643 sequences, and for making protein sequence alignments. It implements methods
3644 using probabilistic models called profile hidden Markov models (profile
3645 HMMs).")
3646 ;; hmmer uses non-portable SSE intrinsics so building fails on other
3647 ;; platforms.
3648 (supported-systems '("x86_64-linux" "i686-linux"))
3649 (license license:bsd-3)))
3650
3651 (define-public htseq
3652 (package
3653 (name "htseq")
3654 (version "0.9.1")
3655 (source (origin
3656 (method url-fetch)
3657 (uri (pypi-uri "HTSeq" version))
3658 (sha256
3659 (base32
3660 "11flgb1381xdhk43bzbfm3vhnszkpqg6jk76rpa5xd1zbrvvlnxg"))))
3661 (build-system python-build-system)
3662 (native-inputs
3663 `(("python-cython" ,python-cython)))
3664 ;; Numpy needs to be propagated when htseq is used as a Python library.
3665 (propagated-inputs
3666 `(("python-numpy" ,python-numpy)))
3667 (inputs
3668 `(("python-pysam" ,python-pysam)
3669 ("python-matplotlib" ,python-matplotlib)))
3670 (home-page "https://htseq.readthedocs.io/")
3671 (synopsis "Analysing high-throughput sequencing data with Python")
3672 (description
3673 "HTSeq is a Python package that provides infrastructure to process data
3674 from high-throughput sequencing assays.")
3675 (license license:gpl3+)))
3676
3677 (define-public python2-htseq
3678 (package-with-python2 htseq))
3679
3680 (define-public java-htsjdk
3681 (package
3682 (name "java-htsjdk")
3683 (version "2.3.0") ; last version without build dependency on gradle
3684 (source (origin
3685 (method git-fetch)
3686 (uri (git-reference
3687 (url "https://github.com/samtools/htsjdk")
3688 (commit version)))
3689 (file-name (git-file-name name version))
3690 (sha256
3691 (base32
3692 "1b178ixcabanm834ydjl3jiakpyxdmki32hqfv2abrzn3rcwa28i"))
3693 (modules '((guix build utils)))
3694 (snippet
3695 ;; Delete pre-built binaries
3696 '(begin
3697 (delete-file-recursively "lib")
3698 (mkdir-p "lib")
3699 #t))))
3700 (build-system ant-build-system)
3701 (arguments
3702 `(#:tests? #f ; test require Internet access
3703 #:jdk ,icedtea-8
3704 #:make-flags
3705 (list (string-append "-Ddist=" (assoc-ref %outputs "out")
3706 "/share/java/htsjdk/"))
3707 #:build-target "all"
3708 #:phases
3709 (modify-phases %standard-phases
3710 ;; The build phase also installs the jars
3711 (delete 'install))))
3712 (inputs
3713 `(("java-ngs" ,java-ngs)
3714 ("java-snappy-1" ,java-snappy-1)
3715 ("java-commons-compress" ,java-commons-compress)
3716 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3717 ("java-commons-jexl-2" ,java-commons-jexl-2)
3718 ("java-xz" ,java-xz)))
3719 (native-inputs
3720 `(("java-testng" ,java-testng)))
3721 (home-page "http://samtools.github.io/htsjdk/")
3722 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3723 (description
3724 "HTSJDK is an implementation of a unified Java library for accessing
3725 common file formats, such as SAM and VCF, used for high-throughput
3726 sequencing (HTS) data. There are also an number of useful utilities for
3727 manipulating HTS data.")
3728 (license license:expat)))
3729
3730 (define-public java-htsjdk-latest
3731 (package
3732 (name "java-htsjdk")
3733 (version "2.14.3")
3734 (source (origin
3735 (method git-fetch)
3736 (uri (git-reference
3737 (url "https://github.com/samtools/htsjdk")
3738 (commit version)))
3739 (file-name (string-append name "-" version "-checkout"))
3740 (sha256
3741 (base32
3742 "1lmya1fdjy03mz6zmdmd86j9v9vfhqb3952mqq075navx1i6g4bc"))))
3743 (build-system ant-build-system)
3744 (arguments
3745 `(#:tests? #f ; test require Scala
3746 #:jdk ,icedtea-8
3747 #:jar-name "htsjdk.jar"
3748 #:phases
3749 (modify-phases %standard-phases
3750 (add-after 'unpack 'remove-useless-build.xml
3751 (lambda _ (delete-file "build.xml") #t))
3752 ;; The tests require the scalatest package.
3753 (add-after 'unpack 'remove-tests
3754 (lambda _ (delete-file-recursively "src/test") #t)))))
3755 (inputs
3756 `(("java-ngs" ,java-ngs)
3757 ("java-snappy-1" ,java-snappy-1)
3758 ("java-commons-compress" ,java-commons-compress)
3759 ("java-commons-logging-minimal" ,java-commons-logging-minimal)
3760 ("java-commons-jexl-2" ,java-commons-jexl-2)
3761 ("java-xz" ,java-xz)))
3762 (native-inputs
3763 `(("java-junit" ,java-junit)))
3764 (home-page "http://samtools.github.io/htsjdk/")
3765 (synopsis "Java API for high-throughput sequencing data (HTS) formats")
3766 (description
3767 "HTSJDK is an implementation of a unified Java library for accessing
3768 common file formats, such as SAM and VCF, used for high-throughput
3769 sequencing (HTS) data. There are also an number of useful utilities for
3770 manipulating HTS data.")
3771 (license license:expat)))
3772
3773 ;; This is needed for picard 2.10.3
3774 (define-public java-htsjdk-2.10.1
3775 (package (inherit java-htsjdk-latest)
3776 (name "java-htsjdk")
3777 (version "2.10.1")
3778 (source (origin
3779 (method git-fetch)
3780 (uri (git-reference
3781 (url "https://github.com/samtools/htsjdk")
3782 (commit version)))
3783 (file-name (string-append name "-" version "-checkout"))
3784 (sha256
3785 (base32
3786 "1kxh7slm2pm3x9p6jxa1wqsq9a31dhiiflhxnxqcisan4k3rwia2"))))
3787 (build-system ant-build-system)
3788 (arguments
3789 `(#:tests? #f ; tests require Scala
3790 #:jdk ,icedtea-8
3791 #:jar-name "htsjdk.jar"
3792 #:phases
3793 (modify-phases %standard-phases
3794 (add-after 'unpack 'remove-useless-build.xml
3795 (lambda _ (delete-file "build.xml") #t))
3796 ;; The tests require the scalatest package.
3797 (add-after 'unpack 'remove-tests
3798 (lambda _ (delete-file-recursively "src/test") #t)))))))
3799
3800 ;; This version matches java-htsjdk 2.3.0. Later versions also require a more
3801 ;; recent version of java-htsjdk, which depends on gradle.
3802 (define-public java-picard
3803 (package
3804 (name "java-picard")
3805 (version "2.3.0")
3806 (source (origin
3807 (method git-fetch)
3808 (uri (git-reference
3809 (url "https://github.com/broadinstitute/picard")
3810 (commit version)))
3811 (file-name (string-append "java-picard-" version "-checkout"))
3812 (sha256
3813 (base32
3814 "1ll7mf4r3by92w2nhlmpa591xd1f46xlkwh59mq6fvbb5pdwzvx6"))
3815 (modules '((guix build utils)))
3816 (snippet
3817 '(begin
3818 ;; Delete pre-built binaries.
3819 (delete-file-recursively "lib")
3820 (mkdir-p "lib")
3821 (substitute* "build.xml"
3822 ;; Remove build-time dependency on git.
3823 (("failifexecutionfails=\"true\"")
3824 "failifexecutionfails=\"false\"")
3825 ;; Use our htsjdk.
3826 (("depends=\"compile-htsjdk, ")
3827 "depends=\"")
3828 (("depends=\"compile-htsjdk-tests, ")
3829 "depends=\"")
3830 ;; Build picard-lib.jar before building picard.jar
3831 (("name=\"picard-jar\" depends=\"" line)
3832 (string-append line "picard-lib-jar, ")))
3833 #t))))
3834 (build-system ant-build-system)
3835 (arguments
3836 `(#:build-target "picard-jar"
3837 #:test-target "test"
3838 ;; Tests require jacoco:coverage.
3839 #:tests? #f
3840 #:make-flags
3841 (list (string-append "-Dhtsjdk_lib_dir="
3842 (assoc-ref %build-inputs "java-htsjdk")
3843 "/share/java/htsjdk/")
3844 "-Dhtsjdk-classes=dist/tmp"
3845 (string-append "-Dhtsjdk-version="
3846 ,(package-version java-htsjdk)))
3847 #:jdk ,icedtea-8
3848 #:phases
3849 (modify-phases %standard-phases
3850 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3851 (delete 'generate-jar-indices)
3852 (add-after 'unpack 'use-our-htsjdk
3853 (lambda* (#:key inputs #:allow-other-keys)
3854 (substitute* "build.xml"
3855 (("\\$\\{htsjdk\\}/lib")
3856 (string-append (assoc-ref inputs "java-htsjdk")
3857 "/share/java/htsjdk/")))
3858 #t))
3859 (add-after 'unpack 'make-test-target-independent
3860 (lambda* (#:key inputs #:allow-other-keys)
3861 (substitute* "build.xml"
3862 (("name=\"test\" depends=\"compile, ")
3863 "name=\"test\" depends=\""))
3864 #t))
3865 (replace 'install (install-jars "dist")))))
3866 (inputs
3867 `(("java-htsjdk" ,java-htsjdk)
3868 ("java-guava" ,java-guava)))
3869 (native-inputs
3870 `(("java-testng" ,java-testng)))
3871 (home-page "http://broadinstitute.github.io/picard/")
3872 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3873 (description "Picard is a set of Java command line tools for manipulating
3874 high-throughput sequencing (HTS) data and formats. Picard is implemented
3875 using the HTSJDK Java library to support accessing file formats that are
3876 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3877 VCF.")
3878 (license license:expat)))
3879
3880 ;; This is needed for dropseq-tools
3881 (define-public java-picard-2.10.3
3882 (package
3883 (name "java-picard")
3884 (version "2.10.3")
3885 (source (origin
3886 (method git-fetch)
3887 (uri (git-reference
3888 (url "https://github.com/broadinstitute/picard")
3889 (commit version)))
3890 (file-name (string-append "java-picard-" version "-checkout"))
3891 (sha256
3892 (base32
3893 "1ajlx31l6i1k3y2rhnmgq07sz99g2czqfqgkr9mihmdjp3gwjhvi"))))
3894 (build-system ant-build-system)
3895 (arguments
3896 `(#:jar-name "picard.jar"
3897 ;; Tests require jacoco:coverage.
3898 #:tests? #f
3899 #:jdk ,icedtea-8
3900 #:main-class "picard.cmdline.PicardCommandLine"
3901 #:modules ((guix build ant-build-system)
3902 (guix build utils)
3903 (guix build java-utils)
3904 (sxml simple)
3905 (sxml transform)
3906 (sxml xpath))
3907 #:phases
3908 (modify-phases %standard-phases
3909 ;; FIXME: this phase fails with "duplicate entry: htsjdk/samtools/AbstractBAMFileIndex$1.class"
3910 (delete 'generate-jar-indices)
3911 (add-after 'unpack 'remove-useless-build.xml
3912 (lambda _ (delete-file "build.xml") #t))
3913 ;; This is necessary to ensure that htsjdk is found when using
3914 ;; picard.jar as an executable.
3915 (add-before 'build 'edit-classpath-in-manifest
3916 (lambda* (#:key inputs #:allow-other-keys)
3917 (chmod "build.xml" #o664)
3918 (call-with-output-file "build.xml.new"
3919 (lambda (port)
3920 (sxml->xml
3921 (pre-post-order
3922 (with-input-from-file "build.xml"
3923 (lambda _ (xml->sxml #:trim-whitespace? #t)))
3924 `((target . ,(lambda (tag . kids)
3925 (let ((name ((sxpath '(name *text*))
3926 (car kids)))
3927 ;; FIXME: We're breaking the line
3928 ;; early with a dummy path to
3929 ;; ensure that the store reference
3930 ;; isn't broken apart and can still
3931 ;; be found by the reference
3932 ;; scanner.
3933 (msg (format #f
3934 "\
3935 Class-Path: /~a \
3936 ~a/share/java/htsjdk.jar${line.separator}${line.separator}"
3937 ;; maximum line length is 70
3938 (string-tabulate (const #\b) 57)
3939 (assoc-ref inputs "java-htsjdk"))))
3940 (if (member "manifest" name)
3941 `(,tag ,@kids
3942 (replaceregexp
3943 (@ (file "${manifest.file}")
3944 (match "\\r\\n\\r\\n")
3945 (replace "${line.separator}")))
3946 (echo
3947 (@ (message ,msg)
3948 (file "${manifest.file}")
3949 (append "true"))))
3950 `(,tag ,@kids)))))
3951 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
3952 (*text* . ,(lambda (_ txt) txt))))
3953 port)))
3954 (rename-file "build.xml.new" "build.xml")
3955 #t)))))
3956 (propagated-inputs
3957 `(("java-htsjdk" ,java-htsjdk-2.10.1)))
3958 (native-inputs
3959 `(("java-testng" ,java-testng)
3960 ("java-guava" ,java-guava)))
3961 (home-page "http://broadinstitute.github.io/picard/")
3962 (synopsis "Tools for manipulating high-throughput sequencing data and formats")
3963 (description "Picard is a set of Java command line tools for manipulating
3964 high-throughput sequencing (HTS) data and formats. Picard is implemented
3965 using the HTSJDK Java library to support accessing file formats that are
3966 commonly used for high-throughput sequencing data such as SAM, BAM, CRAM and
3967 VCF.")
3968 (license license:expat)))
3969
3970 ;; This is the last version of Picard to provide net.sf.samtools
3971 (define-public java-picard-1.113
3972 (package (inherit java-picard)
3973 (name "java-picard")
3974 (version "1.113")
3975 (source (origin
3976 (method git-fetch)
3977 (uri (git-reference
3978 (url "https://github.com/broadinstitute/picard")
3979 (commit version)))
3980 (file-name (string-append "java-picard-" version "-checkout"))
3981 (sha256
3982 (base32
3983 "0lkpvin2fz3hhly4l02kk56fqy8lmlgyzr9kmvljk6ry6l1hw973"))
3984 (modules '((guix build utils)))
3985 (snippet
3986 '(begin
3987 ;; Delete pre-built binaries.
3988 (delete-file-recursively "lib")
3989 (mkdir-p "lib")
3990 #t))))
3991 (build-system ant-build-system)
3992 (arguments
3993 `(#:build-target "picard-jar"
3994 #:test-target "test"
3995 ;; FIXME: the class path at test time is wrong.
3996 ;; [testng] Error: A JNI error has occurred, please check your installation and try again
3997 ;; [testng] Exception in thread "main" java.lang.NoClassDefFoundError: com/beust/jcommander/ParameterException
3998 #:tests? #f
3999 #:jdk ,icedtea-8
4000 ;; This is only used for tests.
4001 #:make-flags
4002 (list "-Dsamjdk.intel_deflater_so_path=lib/jni/libIntelDeflater.so")
4003 #:phases
4004 (modify-phases %standard-phases
4005 ;; FIXME: This phase fails.
4006 (delete 'generate-jar-indices)
4007 ;; Do not use bundled ant bzip2.
4008 (add-after 'unpack 'use-ant-bzip
4009 (lambda* (#:key inputs #:allow-other-keys)
4010 (substitute* "build.xml"
4011 (("\\$\\{lib\\}/apache-ant-1.8.2-bzip2.jar")
4012 (string-append (assoc-ref inputs "ant")
4013 "/lib/ant.jar")))
4014 #t))
4015 (add-after 'unpack 'make-test-target-independent
4016 (lambda* (#:key inputs #:allow-other-keys)
4017 (substitute* "build.xml"
4018 (("name=\"test\" depends=\"compile, ")
4019 "name=\"test\" depends=\"compile-tests, ")
4020 (("name=\"compile\" depends=\"compile-src, compile-tests\"")
4021 "name=\"compile\" depends=\"compile-src\""))
4022 #t))
4023 (add-after 'unpack 'fix-deflater-path
4024 (lambda* (#:key outputs #:allow-other-keys)
4025 (substitute* "src/java/net/sf/samtools/Defaults.java"
4026 (("getStringProperty\\(\"intel_deflater_so_path\", null\\)")
4027 (string-append "getStringProperty(\"intel_deflater_so_path\", \""
4028 (assoc-ref outputs "out")
4029 "/lib/jni/libIntelDeflater.so"
4030 "\")")))
4031 #t))
4032 ;; Build the deflater library, because we've previously deleted the
4033 ;; pre-built one. This can only be built with access to the JDK
4034 ;; sources.
4035 (add-after 'build 'build-jni
4036 (lambda* (#:key inputs #:allow-other-keys)
4037 (mkdir-p "lib/jni")
4038 (mkdir-p "jdk-src")
4039 (invoke "tar" "--strip-components=1" "-C" "jdk-src"
4040 "-xf" (assoc-ref inputs "jdk-src"))
4041 (invoke "javah" "-jni"
4042 "-classpath" "classes"
4043 "-d" "lib/"
4044 "net.sf.samtools.util.zip.IntelDeflater")
4045 (with-directory-excursion "src/c/inteldeflater"
4046 (invoke "gcc" "-I../../../lib" "-I."
4047 (string-append "-I" (assoc-ref inputs "jdk")
4048 "/include/linux")
4049 "-I../../../jdk-src/src/share/native/common/"
4050 "-I../../../jdk-src/src/solaris/native/common/"
4051 "-c" "-O3" "-fPIC" "IntelDeflater.c")
4052 (invoke "gcc" "-shared"
4053 "-o" "../../../lib/jni/libIntelDeflater.so"
4054 "IntelDeflater.o" "-lz" "-lstdc++"))
4055 #t))
4056 ;; We can only build everything else after building the JNI library.
4057 (add-after 'build-jni 'build-rest
4058 (lambda* (#:key make-flags #:allow-other-keys)
4059 (apply invoke `("ant" "all" ,@make-flags))
4060 #t))
4061 (add-before 'build 'set-JAVA6_HOME
4062 (lambda _
4063 (setenv "JAVA6_HOME" (getenv "JAVA_HOME"))
4064 #t))
4065 (replace 'install (install-jars "dist"))
4066 (add-after 'install 'install-jni-lib
4067 (lambda* (#:key outputs #:allow-other-keys)
4068 (let ((jni (string-append (assoc-ref outputs "out")
4069 "/lib/jni")))
4070 (mkdir-p jni)
4071 (install-file "lib/jni/libIntelDeflater.so" jni)
4072 #t))))))
4073 (inputs
4074 `(("java-snappy-1" ,java-snappy-1)
4075 ("java-commons-jexl-2" ,java-commons-jexl-2)
4076 ("java-cofoja" ,java-cofoja)
4077 ("ant" ,ant) ; for bzip2 support at runtime
4078 ("zlib" ,zlib)))
4079 (native-inputs
4080 `(("ant-apache-bcel" ,ant-apache-bcel)
4081 ("ant-junit" ,ant-junit)
4082 ("java-testng" ,java-testng)
4083 ("java-commons-bcel" ,java-commons-bcel)
4084 ("java-jcommander" ,java-jcommander)
4085 ("jdk" ,icedtea-8 "jdk")
4086 ("jdk-src" ,(car (assoc-ref (package-native-inputs icedtea-8) "jdk-drop")))))))
4087
4088 (define-public fastqc
4089 (package
4090 (name "fastqc")
4091 (version "0.11.5")
4092 (source
4093 (origin
4094 (method url-fetch)
4095 (uri (string-append "http://www.bioinformatics.babraham.ac.uk/"
4096 "projects/fastqc/fastqc_v"
4097 version "_source.zip"))
4098 (sha256
4099 (base32
4100 "18rrlkhcrxvvvlapch4dpj6xc6mpayzys8qfppybi8jrpgx5cc5f"))))
4101 (build-system ant-build-system)
4102 (arguments
4103 `(#:tests? #f ; there are no tests
4104 #:build-target "build"
4105 #:phases
4106 (modify-phases %standard-phases
4107 (add-after 'unpack 'fix-dependencies
4108 (lambda* (#:key inputs #:allow-other-keys)
4109 (substitute* "build.xml"
4110 (("jbzip2-0.9.jar")
4111 (string-append (assoc-ref inputs "java-jbzip2")
4112 "/share/java/jbzip2.jar"))
4113 (("sam-1.103.jar")
4114 (string-append (assoc-ref inputs "java-picard-1.113")
4115 "/share/java/sam-1.112.jar"))
4116 (("cisd-jhdf5.jar")
4117 (string-append (assoc-ref inputs "java-cisd-jhdf5")
4118 "/share/java/sis-jhdf5.jar")))
4119 #t))
4120 ;; There is no installation target
4121 (replace 'install
4122 (lambda* (#:key inputs outputs #:allow-other-keys)
4123 (let* ((out (assoc-ref outputs "out"))
4124 (bin (string-append out "/bin"))
4125 (share (string-append out "/share/fastqc/"))
4126 (exe (string-append share "/fastqc")))
4127 (for-each mkdir-p (list bin share))
4128 (copy-recursively "bin" share)
4129 (substitute* exe
4130 (("my \\$java_bin = 'java';")
4131 (string-append "my $java_bin = '"
4132 (assoc-ref inputs "java")
4133 "/bin/java';")))
4134 (chmod exe #o555)
4135 (symlink exe (string-append bin "/fastqc"))
4136 #t))))))
4137 (inputs
4138 `(("java" ,icedtea)
4139 ("perl" ,perl) ; needed for the wrapper script
4140 ("java-cisd-jhdf5" ,java-cisd-jhdf5)
4141 ("java-picard-1.113" ,java-picard-1.113)
4142 ("java-jbzip2" ,java-jbzip2)))
4143 (native-inputs
4144 `(("unzip" ,unzip)))
4145 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/fastqc/")
4146 (synopsis "Quality control tool for high throughput sequence data")
4147 (description
4148 "FastQC aims to provide a simple way to do some quality control
4149 checks on raw sequence data coming from high throughput sequencing
4150 pipelines. It provides a modular set of analyses which you can use to
4151 give a quick impression of whether your data has any problems of which
4152 you should be aware before doing any further analysis.
4153
4154 The main functions of FastQC are:
4155
4156 @itemize
4157 @item Import of data from BAM, SAM or FastQ files (any variant);
4158 @item Providing a quick overview to tell you in which areas there may
4159 be problems;
4160 @item Summary graphs and tables to quickly assess your data;
4161 @item Export of results to an HTML based permanent report;
4162 @item Offline operation to allow automated generation of reports
4163 without running the interactive application.
4164 @end itemize\n")
4165 (license license:gpl3+)))
4166
4167 (define-public fastp
4168 (package
4169 (name "fastp")
4170 (version "0.14.1")
4171 (source
4172 (origin
4173 (method git-fetch)
4174 (uri (git-reference
4175 (url "https://github.com/OpenGene/fastp")
4176 (commit (string-append "v" version))))
4177 (file-name (git-file-name name version))
4178 (sha256
4179 (base32
4180 "1r6ms5zbf5rps4rgp4z73nczadl00b5rqylw8f684isfz27dp0xh"))))
4181 (build-system gnu-build-system)
4182 (arguments
4183 `(#:tests? #f ; there are none
4184 #:make-flags
4185 (list (string-append "BINDIR=" (assoc-ref %outputs "out") "/bin"))
4186 #:phases
4187 (modify-phases %standard-phases
4188 (delete 'configure)
4189 (add-before 'install 'create-target-dir
4190 (lambda* (#:key outputs #:allow-other-keys)
4191 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
4192 #t)))))
4193 (inputs
4194 `(("zlib" ,zlib)))
4195 (home-page "https://github.com/OpenGene/fastp/")
4196 (synopsis "All-in-one FastQ preprocessor")
4197 (description
4198 "Fastp is a tool designed to provide fast all-in-one preprocessing for
4199 FastQ files. This tool has multi-threading support to afford high
4200 performance.")
4201 (license license:expat)))
4202
4203 (define-public htslib
4204 (package
4205 (name "htslib")
4206 (version "1.9")
4207 (source (origin
4208 (method url-fetch)
4209 (uri (string-append
4210 "https://github.com/samtools/htslib/releases/download/"
4211 version "/htslib-" version ".tar.bz2"))
4212 (sha256
4213 (base32
4214 "16ljv43sc3fxmv63w7b2ff8m1s7h89xhazwmbm1bicz8axq8fjz0"))))
4215 (build-system gnu-build-system)
4216 (inputs
4217 `(("curl" ,curl)
4218 ("openssl" ,openssl)))
4219 ;; This is referred to in the pkg-config file as a required library.
4220 (propagated-inputs
4221 `(("zlib" ,zlib)))
4222 (native-inputs
4223 `(("perl" ,perl)))
4224 (home-page "https://www.htslib.org")
4225 (synopsis "C library for reading/writing high-throughput sequencing data")
4226 (description
4227 "HTSlib is a C library for reading/writing high-throughput sequencing
4228 data. It also provides the @command{bgzip}, @command{htsfile}, and
4229 @command{tabix} utilities.")
4230 ;; Files under cram/ are released under the modified BSD license;
4231 ;; the rest is released under the Expat license
4232 (license (list license:expat license:bsd-3))))
4233
4234 ;; This package should be removed once no packages rely upon it.
4235 (define htslib-1.3
4236 (package
4237 (inherit htslib)
4238 (version "1.3.1")
4239 (source (origin
4240 (method url-fetch)
4241 (uri (string-append
4242 "https://github.com/samtools/htslib/releases/download/"
4243 version "/htslib-" version ".tar.bz2"))
4244 (sha256
4245 (base32
4246 "1rja282fwdc25ql6izkhdyh8ppw8x2fs0w0js78zgkmqjlikmma9"))))))
4247
4248 (define-public idr
4249 (package
4250 (name "idr")
4251 (version "2.0.3")
4252 (source (origin
4253 (method git-fetch)
4254 (uri (git-reference
4255 (url "https://github.com/nboley/idr")
4256 (commit version)))
4257 (file-name (git-file-name name version))
4258 (sha256
4259 (base32
4260 "04j876h6z444v2q79drxx283d3k5snd72kj895wbalnl42206x9g"))
4261 ;; Delete generated C code.
4262 (snippet
4263 '(begin (delete-file "idr/inv_cdf.c") #t))))
4264 (build-system python-build-system)
4265 ;; There is only one test ("test_inv_cdf.py") and it tests features that
4266 ;; are no longer part of this package. It also asserts False, which
4267 ;; causes the tests to always fail.
4268 (arguments `(#:tests? #f))
4269 (propagated-inputs
4270 `(("python-scipy" ,python-scipy)
4271 ("python-sympy" ,python-sympy)
4272 ("python-numpy" ,python-numpy)
4273 ("python-matplotlib" ,python-matplotlib)))
4274 (native-inputs
4275 `(("python-cython" ,python-cython)))
4276 (home-page "https://github.com/nboley/idr")
4277 (synopsis "Tool to measure the irreproducible discovery rate (IDR)")
4278 (description
4279 "The IDR (Irreproducible Discovery Rate) framework is a unified approach
4280 to measure the reproducibility of findings identified from replicate
4281 experiments and provide highly stable thresholds based on reproducibility.")
4282 (license license:gpl2+)))
4283
4284 (define-public jellyfish
4285 (package
4286 (name "jellyfish")
4287 (version "2.2.10")
4288 (source (origin
4289 (method url-fetch)
4290 (uri (string-append "https://github.com/gmarcais/Jellyfish/"
4291 "releases/download/v" version
4292 "/jellyfish-" version ".tar.gz"))
4293 (sha256
4294 (base32
4295 "1k4pc3fvv6w1km2yph4m5sd78fbxp21d6xyzgmy0gjihzc6mb249"))))
4296 (build-system gnu-build-system)
4297 (outputs '("out" ;for library
4298 "ruby" ;for Ruby bindings
4299 "python")) ;for Python bindings
4300 (arguments
4301 `(#:configure-flags
4302 (list (string-append "--enable-ruby-binding="
4303 (assoc-ref %outputs "ruby"))
4304 (string-append "--enable-python-binding="
4305 (assoc-ref %outputs "python")))
4306 #:phases
4307 (modify-phases %standard-phases
4308 (add-before 'check 'set-SHELL-variable
4309 (lambda _
4310 ;; generator_manager.hpp either uses /bin/sh or $SHELL
4311 ;; to run tests.
4312 (setenv "SHELL" (which "bash"))
4313 #t)))))
4314 (native-inputs
4315 `(("bc" ,bc)
4316 ("time" ,time)
4317 ("ruby" ,ruby)
4318 ("python" ,python-2)
4319 ("pkg-config" ,pkg-config)))
4320 (inputs
4321 `(("htslib" ,htslib)))
4322 (synopsis "Tool for fast counting of k-mers in DNA")
4323 (description
4324 "Jellyfish is a tool for fast, memory-efficient counting of k-mers in
4325 DNA. A k-mer is a substring of length k, and counting the occurrences of all
4326 such substrings is a central step in many analyses of DNA sequence. Jellyfish
4327 is a command-line program that reads FASTA and multi-FASTA files containing
4328 DNA sequences. It outputs its k-mer counts in a binary format, which can be
4329 translated into a human-readable text format using the @code{jellyfish dump}
4330 command, or queried for specific k-mers with @code{jellyfish query}.")
4331 (home-page "http://www.genome.umd.edu/jellyfish.html")
4332 ;; JELLYFISH seems to be 64-bit only.
4333 (supported-systems '("x86_64-linux" "aarch64-linux" "mips64el-linux"))
4334 ;; The combined work is published under the GPLv3 or later. Individual
4335 ;; files such as lib/jsoncpp.cpp are released under the Expat license.
4336 (license (list license:gpl3+ license:expat))))
4337
4338 (define-public khmer
4339 (package
4340 (name "khmer")
4341 (version "3.0.0a3")
4342 (source
4343 (origin
4344 (method git-fetch)
4345 (uri (git-reference
4346 (url "https://github.com/dib-lab/khmer")
4347 (commit (string-append "v" version))))
4348 (file-name (git-file-name name version))
4349 (sha256
4350 (base32
4351 "01l4jczglkl7yfhgvzx8j0df7k54bk1r8sli9ll16i1mis0d8f37"))
4352 (modules '((guix build utils)))
4353 (snippet
4354 '(begin
4355 ;; Delete bundled libraries. We do not replace the bundled seqan
4356 ;; as it is a modified subset of the old version 1.4.1.
4357 ;;
4358 ;; We do not replace the bundled MurmurHash as the canonical
4359 ;; repository for this code 'SMHasher' is unsuitable for providing
4360 ;; a library. See
4361 ;; https://lists.gnu.org/archive/html/guix-devel/2016-06/msg00977.html
4362 (delete-file-recursively "third-party/zlib")
4363 (delete-file-recursively "third-party/bzip2")
4364 (delete-file-recursively "third-party/seqan")
4365 (substitute* "setup.cfg"
4366 (("# libraries = z,bz2")
4367 "libraries = z,bz2")
4368 (("include:third-party/zlib:third-party/bzip2")
4369 "include:"))
4370 #t))))
4371 (build-system python-build-system)
4372 (arguments
4373 `(#:phases
4374 (modify-phases %standard-phases
4375 (add-after 'unpack 'set-cc
4376 (lambda _ (setenv "CC" "gcc") #t))
4377
4378 (add-before 'reset-gzip-timestamps 'make-files-writable
4379 (lambda* (#:key outputs #:allow-other-keys)
4380 ;; Make sure .gz files are writable so that the
4381 ;; 'reset-gzip-timestamps' phase can do its work.
4382 (let ((out (assoc-ref outputs "out")))
4383 (for-each make-file-writable
4384 (find-files out "\\.gz$"))
4385 #t))))))
4386 (native-inputs
4387 `(("python-cython" ,python-cython)
4388 ("python-pytest" ,python-pytest)
4389 ("python-pytest-runner" ,python-pytest-runner)))
4390 (inputs
4391 `(("zlib" ,zlib)
4392 ("bzip2" ,bzip2)
4393 ("seqan" ,seqan-1)
4394 ("python-screed" ,python-screed)
4395 ("python-bz2file" ,python-bz2file)))
4396 (home-page "https://khmer.readthedocs.org/")
4397 (synopsis "K-mer counting, filtering and graph traversal library")
4398 (description "The khmer software is a set of command-line tools for
4399 working with DNA shotgun sequencing data from genomes, transcriptomes,
4400 metagenomes and single cells. Khmer can make de novo assemblies faster, and
4401 sometimes better. Khmer can also identify and fix problems with shotgun
4402 data.")
4403 ;; When building on i686, armhf and mips64el, we get the following error:
4404 ;; error: ['khmer', 'khmer.tests', 'oxli'] require 64-bit operating system
4405 (supported-systems '("x86_64-linux" "aarch64-linux"))
4406 (license license:bsd-3)))
4407
4408 (define-public kaiju
4409 (package
4410 (name "kaiju")
4411 (version "1.6.3")
4412 (source (origin
4413 (method git-fetch)
4414 (uri (git-reference
4415 (url "https://github.com/bioinformatics-centre/kaiju")
4416 (commit (string-append "v" version))))
4417 (file-name (git-file-name name version))
4418 (sha256
4419 (base32
4420 "119pzi0ddzv9mjg4wwa6han0cwr3k3ssn7kirvsjfcq05mi5ka0x"))))
4421 (build-system gnu-build-system)
4422 (arguments
4423 `(#:tests? #f ; There are no tests.
4424 #:phases
4425 (modify-phases %standard-phases
4426 (delete 'configure)
4427 (add-before 'build 'move-to-src-dir
4428 (lambda _ (chdir "src") #t))
4429 (replace 'install
4430 (lambda* (#:key inputs outputs #:allow-other-keys)
4431 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
4432 (mkdir-p bin)
4433 (chdir "..")
4434 (copy-recursively "bin" bin))
4435 #t)))))
4436 (inputs
4437 `(("perl" ,perl)
4438 ("zlib" ,zlib)))
4439 (home-page "http://kaiju.binf.ku.dk/")
4440 (synopsis "Fast and sensitive taxonomic classification for metagenomics")
4441 (description "Kaiju is a program for sensitive taxonomic classification
4442 of high-throughput sequencing reads from metagenomic whole genome sequencing
4443 experiments.")
4444 (license license:gpl3+)))
4445
4446 (define-public macs
4447 (package
4448 (name "macs")
4449 (version "2.2.6")
4450 (source (origin
4451 ;; The PyPi tarball does not contain tests.
4452 (method git-fetch)
4453 (uri (git-reference
4454 (url "https://github.com/taoliu/MACS")
4455 (commit (string-append "v" version))))
4456 (file-name (git-file-name name version))
4457 (sha256
4458 (base32
4459 "1c5gxr0mk6hkd4vclf0k00wvyvzw2vrmk52c85338p7aqjwg6n15"))))
4460 (build-system python-build-system)
4461 (arguments
4462 `(#:phases
4463 (modify-phases %standard-phases
4464 (delete 'check)
4465 (add-after 'install 'check
4466 (lambda* (#:key inputs outputs #:allow-other-keys)
4467 (add-installed-pythonpath inputs outputs)
4468 (invoke "pytest" "-v"))))))
4469 (inputs
4470 `(("python-numpy" ,python-numpy)))
4471 (native-inputs
4472 `(("python-pytest" ,python-pytest)))
4473 (home-page "https://github.com/taoliu/MACS/")
4474 (synopsis "Model based analysis for ChIP-Seq data")
4475 (description
4476 "MACS is an implementation of a ChIP-Seq analysis algorithm for
4477 identifying transcript factor binding sites named Model-based Analysis of
4478 ChIP-Seq (MACS). MACS captures the influence of genome complexity to evaluate
4479 the significance of enriched ChIP regions and it improves the spatial
4480 resolution of binding sites through combining the information of both
4481 sequencing tag position and orientation.")
4482 (license license:bsd-3)))
4483
4484 (define-public mafft
4485 (package
4486 (name "mafft")
4487 (version "7.394")
4488 (source (origin
4489 (method url-fetch)
4490 (uri (string-append
4491 "https://mafft.cbrc.jp/alignment/software/mafft-" version
4492 "-without-extensions-src.tgz"))
4493 (file-name (string-append name "-" version ".tgz"))
4494 (sha256
4495 (base32
4496 "0bacjkxfg944p5khhyh5rd4y7wkjc9qk4v2jjj442sqlq0f8ar7b"))))
4497 (build-system gnu-build-system)
4498 (arguments
4499 `(#:tests? #f ; no automated tests, though there are tests in the read me
4500 #:make-flags (let ((out (assoc-ref %outputs "out")))
4501 (list (string-append "PREFIX=" out)
4502 (string-append "BINDIR="
4503 (string-append out "/bin"))))
4504 #:phases
4505 (modify-phases %standard-phases
4506 (add-after 'unpack 'enter-dir
4507 (lambda _ (chdir "core") #t))
4508 (add-after 'enter-dir 'patch-makefile
4509 (lambda _
4510 ;; on advice from the MAFFT authors, there is no need to
4511 ;; distribute mafft-profile, mafft-distance, or
4512 ;; mafft-homologs.rb as they are too "specialised".
4513 (substitute* "Makefile"
4514 ;; remove mafft-homologs.rb from SCRIPTS
4515 (("^SCRIPTS = mafft mafft-homologs.rb")
4516 "SCRIPTS = mafft")
4517 ;; remove mafft-homologs from MANPAGES
4518 (("^MANPAGES = mafft.1 mafft-homologs.1")
4519 "MANPAGES = mafft.1")
4520 ;; remove mafft-distance from PROGS
4521 (("^PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance")
4522 "PROGS = dvtditr dndfast7 dndblast sextet5")
4523 ;; remove mafft-profile from PROGS
4524 (("splittbfast disttbfast tbfast mafft-profile 2cl mccaskillwrap")
4525 "splittbfast disttbfast tbfast f2cl mccaskillwrap")
4526 (("^rm -f mafft-profile mafft-profile.exe") "#")
4527 (("^rm -f mafft-distance mafft-distance.exe") ")#")
4528 ;; do not install MAN pages in libexec folder
4529 (("^\t\\$\\(INSTALL\\) -m 644 \\$\\(MANPAGES\\) \
4530 \\$\\(DESTDIR\\)\\$\\(LIBDIR\\)") "#"))
4531 #t))
4532 (add-after 'enter-dir 'patch-paths
4533 (lambda* (#:key inputs #:allow-other-keys)
4534 (substitute* '("pairash.c"
4535 "mafft.tmpl")
4536 (("perl") (which "perl"))
4537 (("([\"`| ])awk" _ prefix)
4538 (string-append prefix (which "awk")))
4539 (("grep") (which "grep")))
4540 #t))
4541 (delete 'configure)
4542 (add-after 'install 'wrap-programs
4543 (lambda* (#:key outputs #:allow-other-keys)
4544 (let* ((out (assoc-ref outputs "out"))
4545 (bin (string-append out "/bin"))
4546 (path (string-append
4547 (assoc-ref %build-inputs "coreutils") "/bin:")))
4548 (for-each (lambda (file)
4549 (wrap-program file
4550 `("PATH" ":" prefix (,path))))
4551 (find-files bin)))
4552 #t)))))
4553 (inputs
4554 `(("perl" ,perl)
4555 ("ruby" ,ruby)
4556 ("gawk" ,gawk)
4557 ("grep" ,grep)
4558 ("coreutils" ,coreutils)))
4559 (home-page "http://mafft.cbrc.jp/alignment/software/")
4560 (synopsis "Multiple sequence alignment program")
4561 (description
4562 "MAFFT offers a range of multiple alignment methods for nucleotide and
4563 protein sequences. For instance, it offers L-INS-i (accurate; for alignment
4564 of <~200 sequences) and FFT-NS-2 (fast; for alignment of <~30,000
4565 sequences).")
4566 (license (license:non-copyleft
4567 "http://mafft.cbrc.jp/alignment/software/license.txt"
4568 "BSD-3 with different formatting"))))
4569
4570 (define-public mash
4571 (package
4572 (name "mash")
4573 (version "2.1")
4574 (source (origin
4575 (method git-fetch)
4576 (uri (git-reference
4577 (url "https://github.com/marbl/mash")
4578 (commit (string-append "v" version))))
4579 (file-name (git-file-name name version))
4580 (sha256
4581 (base32
4582 "049hwcc059p2fd9vwndn63laifvvsi0wmv84i6y1fr79k15dxwy6"))
4583 (modules '((guix build utils)))
4584 (snippet
4585 '(begin
4586 ;; Delete bundled kseq.
4587 ;; TODO: Also delete bundled murmurhash and open bloom filter.
4588 (delete-file "src/mash/kseq.h")
4589 #t))))
4590 (build-system gnu-build-system)
4591 (arguments
4592 `(#:tests? #f ; No tests.
4593 #:configure-flags
4594 (list
4595 (string-append "--with-capnp=" (assoc-ref %build-inputs "capnproto"))
4596 (string-append "--with-gsl=" (assoc-ref %build-inputs "gsl")))
4597 #:make-flags (list "CC=gcc")
4598 #:phases
4599 (modify-phases %standard-phases
4600 (add-after 'unpack 'fix-includes
4601 (lambda _
4602 (substitute* '("src/mash/Sketch.cpp"
4603 "src/mash/CommandFind.cpp"
4604 "src/mash/CommandScreen.cpp")
4605 (("^#include \"kseq\\.h\"")
4606 "#include \"htslib/kseq.h\""))
4607 #t))
4608 (add-after 'fix-includes 'use-c++14
4609 (lambda _
4610 ;; capnproto 0.7 requires c++14 to build
4611 (substitute* "configure.ac"
4612 (("c\\+\\+11") "c++14"))
4613 (substitute* "Makefile.in"
4614 (("c\\+\\+11") "c++14"))
4615 #t)))))
4616 (native-inputs
4617 `(("autoconf" ,autoconf)
4618 ;; Capnproto and htslib are statically embedded in the final
4619 ;; application. Therefore we also list their licenses, below.
4620 ("capnproto" ,capnproto)
4621 ("htslib" ,htslib)))
4622 (inputs
4623 `(("gsl" ,gsl)
4624 ("zlib" ,zlib)))
4625 (supported-systems '("x86_64-linux"))
4626 (home-page "https://mash.readthedocs.io")
4627 (synopsis "Fast genome and metagenome distance estimation using MinHash")
4628 (description "Mash is a fast sequence distance estimator that uses the
4629 MinHash algorithm and is designed to work with genomes and metagenomes in the
4630 form of assemblies or reads.")
4631 (license (list license:bsd-3 ; Mash
4632 license:expat ; HTSlib and capnproto
4633 license:public-domain ; MurmurHash 3
4634 license:cpl1.0)))) ; Open Bloom Filter
4635
4636 (define-public metabat
4637 (package
4638 (name "metabat")
4639 (version "2.12.1")
4640 (source
4641 (origin
4642 (method git-fetch)
4643 (uri (git-reference
4644 (url "https://bitbucket.org/berkeleylab/metabat.git")
4645 (commit (string-append "v" version))))
4646 (file-name (git-file-name name version))
4647 (sha256
4648 (base32
4649 "0hyg2smw1nz69mfvjpk45xyyychmda92c80a0cv7baji84ri4iyn"))
4650 (patches (search-patches "metabat-fix-compilation.patch"))))
4651 (build-system scons-build-system)
4652 (arguments
4653 `(#:scons ,scons-python2
4654 #:scons-flags
4655 (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
4656 (string-append "BOOST_ROOT=" (assoc-ref %build-inputs "boost")))
4657 #:tests? #f ;; Tests are run during the build phase.
4658 #:phases
4659 (modify-phases %standard-phases
4660 (add-after 'unpack 'fix-includes
4661 (lambda _
4662 (substitute* "src/BamUtils.h"
4663 (("^#include \"bam/bam\\.h\"")
4664 "#include \"samtools/bam.h\"")
4665 (("^#include \"bam/sam\\.h\"")
4666 "#include \"samtools/sam.h\""))
4667 (substitute* "src/KseqReader.h"
4668 (("^#include \"bam/kseq\\.h\"")
4669 "#include \"htslib/kseq.h\""))
4670 #t))
4671 (add-after 'unpack 'fix-scons
4672 (lambda* (#:key inputs #:allow-other-keys)
4673 (substitute* "SConstruct"
4674 (("^htslib_dir += 'samtools'")
4675 (string-append "htslib_dir = '"
4676 (assoc-ref inputs "htslib")
4677 "'"))
4678 (("^samtools_dir = 'samtools'")
4679 (string-append "samtools_dir = '"
4680 (assoc-ref inputs "samtools")
4681 "'"))
4682 (("^findStaticOrShared\\('bam', hts_lib")
4683 (string-append "findStaticOrShared('bam', '"
4684 (assoc-ref inputs "samtools")
4685 "/lib'"))
4686 ;; Do not distribute README.
4687 (("^env\\.Install\\(idir_prefix, 'README\\.md'\\)") ""))
4688 #t)))))
4689 (inputs
4690 `(("zlib" ,zlib)
4691 ("perl" ,perl)
4692 ("samtools" ,samtools)
4693 ("htslib" ,htslib)
4694 ("boost" ,boost)))
4695 (home-page "https://bitbucket.org/berkeleylab/metabat")
4696 (synopsis
4697 "Reconstruction of single genomes from complex microbial communities")
4698 (description
4699 "Grouping large genomic fragments assembled from shotgun metagenomic
4700 sequences to deconvolute complex microbial communities, or metagenome binning,
4701 enables the study of individual organisms and their interactions. MetaBAT is
4702 an automated metagenome binning software, which integrates empirical
4703 probabilistic distances of genome abundance and tetranucleotide frequency.")
4704 ;; The source code contains inline assembly.
4705 (supported-systems '("x86_64-linux" "i686-linux"))
4706 (license (license:non-copyleft "file://license.txt"
4707 "See license.txt in the distribution."))))
4708
4709 (define-public minced
4710 (package
4711 (name "minced")
4712 (version "0.3.2")
4713 (source (origin
4714 (method git-fetch)
4715 (uri (git-reference
4716 (url "https://github.com/ctSkennerton/minced")
4717 (commit version)))
4718 (file-name (git-file-name name version))
4719 (sha256
4720 (base32
4721 "1f5h9him0gd355cnx7p6pnxpknhckd4g0v62mg8zyhfbx9as25fv"))))
4722 (build-system gnu-build-system)
4723 (arguments
4724 `(#:test-target "test"
4725 #:phases
4726 (modify-phases %standard-phases
4727 (delete 'configure)
4728 (add-before 'check 'fix-test
4729 (lambda _
4730 ;; Fix test for latest version.
4731 (substitute* "t/Aquifex_aeolicus_VF5.expected"
4732 (("minced:0.1.6") "minced:0.2.0"))
4733 #t))
4734 (replace 'install ; No install target.
4735 (lambda* (#:key inputs outputs #:allow-other-keys)
4736 (let* ((out (assoc-ref outputs "out"))
4737 (bin (string-append out "/bin"))
4738 (wrapper (string-append bin "/minced")))
4739 ;; Minced comes with a wrapper script that tries to figure out where
4740 ;; it is located before running the JAR. Since these paths are known
4741 ;; to us, we build our own wrapper to avoid coreutils dependency.
4742 (install-file "minced.jar" bin)
4743 (with-output-to-file wrapper
4744 (lambda _
4745 (display
4746 (string-append
4747 "#!" (assoc-ref inputs "bash") "/bin/sh\n\n"
4748 (assoc-ref inputs "jre") "/bin/java -jar "
4749 bin "/minced.jar \"$@\"\n"))))
4750 (chmod wrapper #o555))
4751 #t)))))
4752 (native-inputs
4753 `(("jdk" ,icedtea "jdk")))
4754 (inputs
4755 `(("bash" ,bash)
4756 ("jre" ,icedtea "out")))
4757 (home-page "https://github.com/ctSkennerton/minced")
4758 (synopsis "Mining CRISPRs in Environmental Datasets")
4759 (description
4760 "MinCED is a program to find Clustered Regularly Interspaced Short
4761 Palindromic Repeats (CRISPRs) in DNA sequences. It can be used for
4762 unassembled metagenomic reads, but is mainly designed for full genomes and
4763 assembled metagenomic sequence.")
4764 (license license:gpl3+)))
4765
4766 (define-public miso
4767 (package
4768 (name "miso")
4769 (version "0.5.4")
4770 (source (origin
4771 (method url-fetch)
4772 (uri (pypi-uri "misopy" version))
4773 (sha256
4774 (base32
4775 "1z3x0vd8ma7pdrnywj7i3kgwl89sdkwrrn62zl7r5calqaq2hyip"))
4776 (modules '((guix build utils)))
4777 (snippet '(begin
4778 (substitute* "setup.py"
4779 ;; Use setuptools, or else the executables are not
4780 ;; installed.
4781 (("distutils.core") "setuptools")
4782 ;; Use "gcc" instead of "cc" for compilation.
4783 (("^defines")
4784 "cc.set_executables(
4785 compiler='gcc',
4786 compiler_so='gcc',
4787 linker_exe='gcc',
4788 linker_so='gcc -shared'); defines"))
4789 #t))))
4790 (build-system python-build-system)
4791 (arguments
4792 `(#:python ,python-2 ; only Python 2 is supported
4793 #:tests? #f)) ; no "test" target
4794 (inputs
4795 `(("samtools" ,samtools)
4796 ("python-numpy" ,python2-numpy)
4797 ("python-pysam" ,python2-pysam)
4798 ("python-scipy" ,python2-scipy)
4799 ("python-matplotlib" ,python2-matplotlib)))
4800 (native-inputs
4801 `(("python-mock" ,python2-mock) ; for tests
4802 ("python-pytz" ,python2-pytz))) ; for tests
4803 (home-page "https://www.genes.mit.edu/burgelab/miso/index.html")
4804 (synopsis "Mixture of Isoforms model for RNA-Seq isoform quantitation")
4805 (description
4806 "MISO (Mixture-of-Isoforms) is a probabilistic framework that quantitates
4807 the expression level of alternatively spliced genes from RNA-Seq data, and
4808 identifies differentially regulated isoforms or exons across samples. By
4809 modeling the generative process by which reads are produced from isoforms in
4810 RNA-Seq, the MISO model uses Bayesian inference to compute the probability
4811 that a read originated from a particular isoform.")
4812 (license license:gpl2)))
4813
4814 (define-public muscle
4815 (package
4816 (name "muscle")
4817 (version "3.8.1551")
4818 (source (origin
4819 (method url-fetch/tarbomb)
4820 (uri (string-append
4821 "http://www.drive5.com/muscle/muscle_src_"
4822 version ".tar.gz"))
4823 (sha256
4824 (base32
4825 "0bj8kj7sdizy3987zx6w7axihk40fk8rn76mpbqqjcnd64i5a367"))))
4826 (build-system gnu-build-system)
4827 (arguments
4828 `(#:make-flags (list "LDLIBS = -lm")
4829 #:phases
4830 (modify-phases %standard-phases
4831 (delete 'configure)
4832 (replace 'check
4833 ;; There are no tests, so just test if it runs.
4834 (lambda _ (invoke "./muscle" "-version") #t))
4835 (replace 'install
4836 (lambda* (#:key outputs #:allow-other-keys)
4837 (let* ((out (assoc-ref outputs "out"))
4838 (bin (string-append out "/bin")))
4839 (install-file "muscle" bin)
4840 #t))))))
4841 (home-page "http://www.drive5.com/muscle")
4842 (synopsis "Multiple sequence alignment program")
4843 (description
4844 "MUSCLE aims to be a fast and accurate multiple sequence alignment
4845 program for nucleotide and protein sequences.")
4846 ;; License information found in 'muscle -h' and usage.cpp.
4847 (license license:public-domain)))
4848
4849 (define-public newick-utils
4850 ;; There are no recent releases so we package from git.
4851 (let ((commit "da121155a977197cab9fbb15953ca1b40b11eb87"))
4852 (package
4853 (name "newick-utils")
4854 (version (string-append "1.6-1." (string-take commit 8)))
4855 (source (origin
4856 (method git-fetch)
4857 (uri (git-reference
4858 (url "https://github.com/tjunier/newick_utils")
4859 (commit commit)))
4860 (file-name (string-append name "-" version "-checkout"))
4861 (sha256
4862 (base32
4863 "1hkw21rq1mwf7xp0rmbb2gqc0i6p11108m69i7mr7xcjl268pxnb"))))
4864 (build-system gnu-build-system)
4865 (inputs
4866 ;; XXX: TODO: Enable Lua and Guile bindings.
4867 ;; https://github.com/tjunier/newick_utils/issues/13
4868 `(("libxml2" ,libxml2)
4869 ("flex" ,flex)
4870 ("bison" ,bison)))
4871 (native-inputs
4872 `(("autoconf" ,autoconf)
4873 ("automake" ,automake)
4874 ("libtool" ,libtool)))
4875 (synopsis "Programs for working with newick format phylogenetic trees")
4876 (description
4877 "Newick-utils is a suite of utilities for processing phylogenetic trees
4878 in Newick format. Functions include re-rooting, extracting subtrees,
4879 trimming, pruning, condensing, drawing (ASCII graphics or SVG).")
4880 (home-page "https://github.com/tjunier/newick_utils")
4881 (license license:bsd-3))))
4882
4883 (define-public orfm
4884 (package
4885 (name "orfm")
4886 (version "0.7.1")
4887 (source (origin
4888 (method url-fetch)
4889 (uri (string-append
4890 "https://github.com/wwood/OrfM/releases/download/v"
4891 version "/orfm-" version ".tar.gz"))
4892 (sha256
4893 (base32
4894 "16iigyr2gd8x0imzkk1dr3k5xsds9bpmwg31ayvjg0f4pir9rwqr"))))
4895 (build-system gnu-build-system)
4896 (inputs `(("zlib" ,zlib)))
4897 (native-inputs
4898 `(("ruby-bio-commandeer" ,ruby-bio-commandeer)
4899 ("ruby-rspec" ,ruby-rspec)
4900 ("ruby" ,ruby)))
4901 (synopsis "Simple and not slow open reading frame (ORF) caller")
4902 (description
4903 "An ORF caller finds stretches of DNA that, when translated, are not
4904 interrupted by stop codons. OrfM finds and prints these ORFs.")
4905 (home-page "https://github.com/wwood/OrfM")
4906 (license license:lgpl3+)))
4907
4908 (define-public python2-pbcore
4909 (package
4910 (name "python2-pbcore")
4911 (version "1.2.10")
4912 (source (origin
4913 (method url-fetch)
4914 (uri (pypi-uri "pbcore" version))
4915 (sha256
4916 (base32
4917 "1kjmv891d6qbpp4shhhvkl02ff4q5xlpnls2513sm2cjcrs52f1i"))))
4918 (build-system python-build-system)
4919 (arguments
4920 `(#:python ,python-2 ;pbcore < 2.0 requires Python 2.7
4921 #:phases (modify-phases %standard-phases
4922 (add-after 'unpack 'remove-sphinx-dependency
4923 (lambda _
4924 ;; Sphinx is only required for documentation tests, which
4925 ;; we do not run; furthermore it depends on python2-sphinx
4926 ;; which is no longer maintained.
4927 (substitute* "requirements-dev.txt"
4928 (("^sphinx") ""))
4929 #t)))))
4930 (propagated-inputs
4931 `(("python-cython" ,python2-cython)
4932 ("python-numpy" ,python2-numpy)
4933 ("python-pysam" ,python2-pysam)
4934 ("python-h5py" ,python2-h5py)))
4935 (native-inputs
4936 `(("python-nose" ,python2-nose)
4937 ("python-pyxb" ,python2-pyxb)))
4938 (home-page "https://pacificbiosciences.github.io/pbcore/")
4939 (synopsis "Library for reading and writing PacBio data files")
4940 (description
4941 "The pbcore package provides Python APIs for interacting with PacBio data
4942 files and writing bioinformatics applications.")
4943 (license license:bsd-3)))
4944
4945 (define-public python2-warpedlmm
4946 (package
4947 (name "python2-warpedlmm")
4948 (version "0.21")
4949 (source
4950 (origin
4951 (method url-fetch)
4952 (uri (pypi-uri "WarpedLMM" version ".zip"))
4953 (sha256
4954 (base32
4955 "1agfz6zqa8nc6cw47yh0s3y14gkpa9wqazwcj7mwwj3ffnw39p3j"))))
4956 (build-system python-build-system)
4957 (arguments
4958 `(#:python ,python-2 ; requires Python 2.7
4959 #:tests? #f ; test data are not included
4960 #:phases
4961 (modify-phases %standard-phases
4962 (add-after 'unpack 'use-weave
4963 (lambda _
4964 (substitute* "warpedlmm/util/linalg.py"
4965 (("from scipy import linalg, weave")
4966 "from scipy import linalg\nimport weave"))
4967 #t)))))
4968 (propagated-inputs
4969 `(("python-scipy" ,python2-scipy)
4970 ("python-numpy" ,python2-numpy)
4971 ("python-matplotlib" ,python2-matplotlib)
4972 ("python-fastlmm" ,python2-fastlmm)
4973 ("python-pandas" ,python2-pandas)
4974 ("python-pysnptools" ,python2-pysnptools)
4975 ("python-weave" ,python2-weave)))
4976 (native-inputs
4977 `(("python-mock" ,python2-mock)
4978 ("python-nose" ,python2-nose)
4979 ("unzip" ,unzip)))
4980 (home-page "https://github.com/PMBio/warpedLMM")
4981 (synopsis "Implementation of warped linear mixed models")
4982 (description
4983 "WarpedLMM is a Python implementation of the warped linear mixed model,
4984 which automatically learns an optimal warping function (or transformation) for
4985 the phenotype as it models the data.")
4986 (license license:asl2.0)))
4987
4988 (define-public pbtranscript-tofu
4989 (let ((commit "8f5467fe6a4472bcfb4226c8720993c8507adfe4"))
4990 (package
4991 (name "pbtranscript-tofu")
4992 (version (string-append "2.2.3." (string-take commit 7)))
4993 (source (origin
4994 (method git-fetch)
4995 (uri (git-reference
4996 (url "https://github.com/PacificBiosciences/cDNA_primer")
4997 (commit commit)))
4998 (file-name (string-append name "-" version "-checkout"))
4999 (sha256
5000 (base32
5001 "1lgnpi35ihay42qx0b6yl3kkgra723i413j33kvs0kvs61h82w0f"))
5002 (modules '((guix build utils)))
5003 (snippet
5004 '(begin
5005 ;; remove bundled Cython sources
5006 (delete-file "pbtranscript-tofu/pbtranscript/Cython-0.20.1.tar.gz")
5007 #t))))
5008 (build-system python-build-system)
5009 (arguments
5010 `(#:python ,python-2
5011 ;; FIXME: Tests fail with "No such file or directory:
5012 ;; pbtools/pbtranscript/modified_bx_intervals/intersection_unique.so"
5013 #:tests? #f
5014 #:phases
5015 (modify-phases %standard-phases
5016 (add-after 'unpack 'enter-directory
5017 (lambda _
5018 (chdir "pbtranscript-tofu/pbtranscript/")
5019 #t))
5020 ;; With setuptools version 18.0 and later this setup.py hack causes
5021 ;; a build error, so we disable it.
5022 (add-after 'enter-directory 'patch-setuppy
5023 (lambda _
5024 (substitute* "setup.py"
5025 (("if 'setuptools.extension' in sys.modules:")
5026 "if False:"))
5027 #t)))))
5028 (inputs
5029 `(("python-numpy" ,python2-numpy)
5030 ("python-bx-python" ,python2-bx-python)
5031 ("python-networkx" ,python2-networkx)
5032 ("python-scipy" ,python2-scipy)
5033 ("python-pbcore" ,python2-pbcore)
5034 ("python-h5py" ,python2-h5py)))
5035 (native-inputs
5036 `(("python-cython" ,python2-cython)
5037 ("python-nose" ,python2-nose)))
5038 (home-page "https://github.com/PacificBiosciences/cDNA_primer")
5039 (synopsis "Analyze transcriptome data generated with the Iso-Seq protocol")
5040 (description
5041 "pbtranscript-tofu contains scripts to analyze transcriptome data
5042 generated using the PacBio Iso-Seq protocol.")
5043 (license license:bsd-3))))
5044
5045 (define-public prank
5046 (package
5047 (name "prank")
5048 (version "170427")
5049 (source (origin
5050 (method url-fetch)
5051 (uri (string-append
5052 "http://wasabiapp.org/download/prank/prank.source."
5053 version ".tgz"))
5054 (sha256
5055 (base32
5056 "0nc8g9c5rkdxcir46s0in9ci1sxwzbjibxrvkksf22ybnplvagk2"))))
5057 (build-system gnu-build-system)
5058 (arguments
5059 `(#:phases
5060 (modify-phases %standard-phases
5061 (add-after 'unpack 'enter-src-dir
5062 (lambda _
5063 (chdir "src")
5064 #t))
5065 (add-after 'unpack 'remove-m64-flag
5066 ;; Prank will build with the correct 'bit-ness' without this flag
5067 ;; and this allows building on 32-bit machines.
5068 (lambda _ (substitute* "src/Makefile"
5069 (("-m64") ""))
5070 #t))
5071 (delete 'configure)
5072 (replace 'install
5073 (lambda* (#:key outputs #:allow-other-keys)
5074 (let* ((out (assoc-ref outputs "out"))
5075 (bin (string-append out "/bin"))
5076 (man (string-append out "/share/man/man1"))
5077 (path (string-append
5078 (assoc-ref %build-inputs "mafft") "/bin:"
5079 (assoc-ref %build-inputs "exonerate") "/bin:"
5080 (assoc-ref %build-inputs "bppsuite") "/bin")))
5081 (install-file "prank" bin)
5082 (wrap-program (string-append bin "/prank")
5083 `("PATH" ":" prefix (,path)))
5084 (install-file "prank.1" man))
5085 #t)))))
5086 (inputs
5087 `(("mafft" ,mafft)
5088 ("exonerate" ,exonerate)
5089 ("bppsuite" ,bppsuite)))
5090 (home-page "http://wasabiapp.org/software/prank/")
5091 (synopsis "Probabilistic multiple sequence alignment program")
5092 (description
5093 "PRANK is a probabilistic multiple sequence alignment program for DNA,
5094 codon and amino-acid sequences. It is based on a novel algorithm that treats
5095 insertions correctly and avoids over-estimation of the number of deletion
5096 events. In addition, PRANK borrows ideas from maximum likelihood methods used
5097 in phylogenetics and correctly takes into account the evolutionary distances
5098 between sequences. Lastly, PRANK allows for defining a potential structure
5099 for sequences to be aligned and then, simultaneously with the alignment,
5100 predicts the locations of structural units in the sequences.")
5101 (license license:gpl2+)))
5102
5103 (define-public proteinortho
5104 (package
5105 (name "proteinortho")
5106 (version "6.0.14")
5107 (source (origin
5108 (method git-fetch)
5109 (uri (git-reference
5110 (url "https://gitlab.com/paulklemm_PHD/proteinortho.git")
5111 (commit (string-append "v" version))))
5112 (file-name (git-file-name name version))
5113 (sha256
5114 (base32
5115 "0pmy617zy2z2w6hjqxjhf3rzikf5n3mpia80ysq8233vfr7wrzff"))
5116 (modules '((guix build utils)))
5117 (snippet
5118 '(begin
5119 ;; remove pre-built scripts
5120 (delete-file-recursively "src/BUILD/")
5121 #t))))
5122 (build-system gnu-build-system)
5123 (arguments
5124 `(#:test-target "test"
5125 #:make-flags '("CC=gcc")
5126 #:phases
5127 (modify-phases %standard-phases
5128 (replace 'configure
5129 ;; There is no configure script, so we modify the Makefile directly.
5130 (lambda* (#:key outputs #:allow-other-keys)
5131 (substitute* "Makefile"
5132 (("INSTALLDIR=.*")
5133 (string-append
5134 "INSTALLDIR=" (assoc-ref outputs "out") "/bin\n")))
5135 #t))
5136 (add-before 'install 'make-install-directory
5137 ;; The install directory is not created during 'make install'.
5138 (lambda* (#:key outputs #:allow-other-keys)
5139 (mkdir-p (string-append (assoc-ref outputs "out") "/bin"))
5140 #t))
5141 (add-after 'install 'wrap-programs
5142 (lambda* (#:key inputs outputs #:allow-other-keys)
5143 (let ((path (getenv "PATH"))
5144 (out (assoc-ref outputs "out")))
5145 (for-each (lambda (script)
5146 (wrap-script script `("PATH" ":" prefix (,path))))
5147 (cons (string-append out "/bin/proteinortho")
5148 (find-files out "\\.(pl|py)$"))))
5149 #t)))))
5150 (inputs
5151 `(("guile" ,guile-3.0) ; for wrap-script
5152 ("diamond" ,diamond)
5153 ("perl" ,perl)
5154 ("python" ,python-wrapper)
5155 ("blast+" ,blast+)
5156 ("lapack" ,lapack)
5157 ("openblas" ,openblas)))
5158 (native-inputs
5159 `(("which" ,which)))
5160 (home-page "http://www.bioinf.uni-leipzig.de/Software/proteinortho")
5161 (synopsis "Detect orthologous genes across species")
5162 (description
5163 "Proteinortho is a tool to detect orthologous genes across different
5164 species. For doing so, it compares similarities of given gene sequences and
5165 clusters them to find significant groups. The algorithm was designed to handle
5166 large-scale data and can be applied to hundreds of species at once.")
5167 (license license:gpl3+)))
5168
5169 (define-public pyicoteo
5170 (package
5171 (name "pyicoteo")
5172 (version "2.0.7")
5173 (source
5174 (origin
5175 (method git-fetch)
5176 (uri (git-reference
5177 (url "https://bitbucket.org/regulatorygenomicsupf/pyicoteo.git")
5178 (commit (string-append "v" version))))
5179 (file-name (git-file-name name version))
5180 (sha256
5181 (base32
5182 "0hz5g8d25lbjy1wpscr490l0lmyvaix893hhax4fxnh1h9w34w8p"))))
5183 (build-system python-build-system)
5184 (arguments
5185 `(#:python ,python-2 ; does not work with Python 3
5186 #:tests? #f)) ; there are no tests
5187 (inputs
5188 `(("python2-matplotlib" ,python2-matplotlib)))
5189 (home-page "https://bitbucket.org/regulatorygenomicsupf/pyicoteo")
5190 (synopsis "Analyze high-throughput genetic sequencing data")
5191 (description
5192 "Pyicoteo is a suite of tools for the analysis of high-throughput genetic
5193 sequencing data. It works with genomic coordinates. There are currently six
5194 different command-line tools:
5195
5196 @enumerate
5197 @item pyicoregion: for generating exploratory regions automatically;
5198 @item pyicoenrich: for differential enrichment between two conditions;
5199 @item pyicoclip: for calling CLIP-Seq peaks without a control;
5200 @item pyicos: for genomic coordinates manipulation;
5201 @item pyicoller: for peak calling on punctuated ChIP-Seq;
5202 @item pyicount: to count how many reads from N experiment files overlap in a
5203 region file;
5204 @item pyicotrocol: to combine operations from pyicoteo.
5205 @end enumerate\n")
5206 (license license:gpl3+)))
5207
5208 (define-public prodigal
5209 (package
5210 (name "prodigal")
5211 ;; Check for a new home page when updating this package:
5212 ;; https://github.com/hyattpd/Prodigal/issues/36#issuecomment-536617588
5213 (version "2.6.3")
5214 (source (origin
5215 (method git-fetch)
5216 (uri (git-reference
5217 (url "https://github.com/hyattpd/Prodigal")
5218 (commit (string-append "v" version))))
5219 (file-name (git-file-name name version))
5220 (sha256
5221 (base32
5222 "1fs1hqk83qjbjhrvhw6ni75zakx5ki1ayy3v6wwkn3xvahc9hi5s"))))
5223 (build-system gnu-build-system)
5224 (arguments
5225 `(#:tests? #f ; no check target
5226 #:make-flags (list (string-append "INSTALLDIR="
5227 (assoc-ref %outputs "out")
5228 "/bin"))
5229 #:phases
5230 (modify-phases %standard-phases
5231 (delete 'configure))))
5232 (home-page "https://github.com/hyattpd/Prodigal")
5233 (synopsis "Protein-coding gene prediction for Archaea and Bacteria")
5234 (description
5235 "Prodigal runs smoothly on finished genomes, draft genomes, and
5236 metagenomes, providing gene predictions in GFF3, Genbank, or Sequin table
5237 format. It runs quickly, in an unsupervised fashion, handles gaps, handles
5238 partial genes, and identifies translation initiation sites.")
5239 (license license:gpl3+)))
5240
5241 (define-public roary
5242 (package
5243 (name "roary")
5244 (version "3.12.0")
5245 (source
5246 (origin
5247 (method url-fetch)
5248 (uri (string-append
5249 "mirror://cpan/authors/id/A/AJ/AJPAGE/Bio-Roary-"
5250 version ".tar.gz"))
5251 (sha256
5252 (base32
5253 "0qxrds9wx7cfhlkihrp6697kx0flhhxymap9fwan0b3rbdhcnmff"))))
5254 (build-system perl-build-system)
5255 (arguments
5256 `(#:phases
5257 (modify-phases %standard-phases
5258 (delete 'configure)
5259 (delete 'build)
5260 (replace 'check
5261 (lambda _
5262 ;; The tests are not run by default, so we run each test file
5263 ;; directly.
5264 (setenv "PATH" (string-append (getcwd) "/bin" ":"
5265 (getenv "PATH")))
5266 (setenv "PERL5LIB" (string-append (getcwd) "/lib" ":"
5267 (getenv "PERL5LIB")))
5268 (for-each (lambda (file)
5269 (display file)(display "\n")
5270 (invoke "perl" file))
5271 (find-files "t" ".*\\.t$"))
5272 #t))
5273 (replace 'install
5274 ;; There is no 'install' target in the Makefile.
5275 (lambda* (#:key outputs #:allow-other-keys)
5276 (let* ((out (assoc-ref outputs "out"))
5277 (bin (string-append out "/bin"))
5278 (perl (string-append out "/lib/perl5/site_perl"))
5279 (roary-plots "contrib/roary_plots"))
5280 (mkdir-p bin)
5281 (mkdir-p perl)
5282 (copy-recursively "bin" bin)
5283 (copy-recursively "lib" perl)
5284 #t)))
5285 (add-after 'install 'wrap-programs
5286 (lambda* (#:key inputs outputs #:allow-other-keys)
5287 (let* ((out (assoc-ref outputs "out"))
5288 (perl5lib (getenv "PERL5LIB"))
5289 (path (getenv "PATH")))
5290 (for-each (lambda (prog)
5291 (let ((binary (string-append out "/" prog)))
5292 (wrap-program binary
5293 `("PERL5LIB" ":" prefix
5294 (,(string-append perl5lib ":" out
5295 "/lib/perl5/site_perl"))))
5296 (wrap-program binary
5297 `("PATH" ":" prefix
5298 (,(string-append path ":" out "/bin"))))))
5299 (find-files "bin" ".*[^R]$"))
5300 (let ((file
5301 (string-append out "/bin/roary-create_pan_genome_plots.R"))
5302 (r-site-lib (getenv "R_LIBS_SITE"))
5303 (coreutils-path
5304 (string-append (assoc-ref inputs "coreutils") "/bin")))
5305 (wrap-program file
5306 `("R_LIBS_SITE" ":" prefix
5307 (,(string-append r-site-lib ":" out "/site-library/"))))
5308 (wrap-program file
5309 `("PATH" ":" prefix
5310 (,(string-append coreutils-path ":" out "/bin"))))))
5311 #t)))))
5312 (native-inputs
5313 `(("perl-env-path" ,perl-env-path)
5314 ("perl-test-files" ,perl-test-files)
5315 ("perl-test-most" ,perl-test-most)
5316 ("perl-test-output" ,perl-test-output)))
5317 (inputs
5318 `(("perl-array-utils" ,perl-array-utils)
5319 ("bioperl" ,bioperl-minimal)
5320 ("perl-digest-md5-file" ,perl-digest-md5-file)
5321 ("perl-exception-class" ,perl-exception-class)
5322 ("perl-file-find-rule" ,perl-file-find-rule)
5323 ("perl-file-grep" ,perl-file-grep)
5324 ("perl-file-slurper" ,perl-file-slurper)
5325 ("perl-file-which" ,perl-file-which)
5326 ("perl-graph" ,perl-graph)
5327 ("perl-graph-readwrite" ,perl-graph-readwrite)
5328 ("perl-log-log4perl" ,perl-log-log4perl)
5329 ("perl-moose" ,perl-moose)
5330 ("perl-perlio-utf8_strict" ,perl-perlio-utf8_strict)
5331 ("perl-text-csv" ,perl-text-csv)
5332 ("bedtools" ,bedtools)
5333 ("cd-hit" ,cd-hit)
5334 ("blast+" ,blast+)
5335 ("mcl" ,mcl)
5336 ("parallel" ,parallel)
5337 ("prank" ,prank)
5338 ("mafft" ,mafft)
5339 ("fasttree" ,fasttree)
5340 ("grep" ,grep)
5341 ("sed" ,sed)
5342 ("gawk" ,gawk)
5343 ("r-minimal" ,r-minimal)
5344 ("r-ggplot2" ,r-ggplot2)
5345 ("coreutils" ,coreutils)))
5346 (home-page "https://sanger-pathogens.github.io/Roary/")
5347 (synopsis "High speed stand-alone pan genome pipeline")
5348 (description
5349 "Roary is a high speed stand alone pan genome pipeline, which takes
5350 annotated assemblies in GFF3 format (produced by the Prokka program) and
5351 calculates the pan genome. Using a standard desktop PC, it can analyse
5352 datasets with thousands of samples, without compromising the quality of the
5353 results. 128 samples can be analysed in under 1 hour using 1 GB of RAM and a
5354 single processor. Roary is not intended for metagenomics or for comparing
5355 extremely diverse sets of genomes.")
5356 (license license:gpl3)))
5357
5358 (define-public raxml
5359 (package
5360 (name "raxml")
5361 (version "8.2.12")
5362 (source
5363 (origin
5364 (method git-fetch)
5365 (uri (git-reference
5366 (url "https://github.com/stamatak/standard-RAxML")
5367 (commit (string-append "v" version))))
5368 (file-name (git-file-name name version))
5369 (sha256
5370 (base32
5371 "1jqjzhch0rips0vp04prvb8vmc20c5pdmsqn8knadcf91yy859fh"))))
5372 (build-system gnu-build-system)
5373 (arguments
5374 `(#:tests? #f ; There are no tests.
5375 ;; Use 'standard' Makefile rather than SSE or AVX ones.
5376 #:make-flags (list "-f" "Makefile.HYBRID.gcc")
5377 #:phases
5378 (modify-phases %standard-phases
5379 (delete 'configure)
5380 (replace 'install
5381 (lambda* (#:key outputs #:allow-other-keys)
5382 (let* ((out (assoc-ref outputs "out"))
5383 (bin (string-append out "/bin"))
5384 (executable "raxmlHPC-HYBRID"))
5385 (install-file executable bin)
5386 (symlink (string-append bin "/" executable) "raxml"))
5387 #t)))))
5388 (inputs
5389 `(("openmpi" ,openmpi)))
5390 (home-page "https://cme.h-its.org/exelixis/web/software/raxml/index.html")
5391 (synopsis "Randomized Axelerated Maximum Likelihood phylogenetic trees")
5392 (description
5393 "RAxML is a tool for phylogenetic analysis and post-analysis of large
5394 phylogenies.")
5395 ;; The source includes x86 specific code
5396 (supported-systems '("x86_64-linux" "i686-linux"))
5397 (license license:gpl2+)))
5398
5399 (define-public rsem
5400 (package
5401 (name "rsem")
5402 (version "1.3.1")
5403 (source
5404 (origin
5405 (method git-fetch)
5406 (uri (git-reference
5407 (url "https://github.com/deweylab/RSEM")
5408 (commit (string-append "v" version))))
5409 (sha256
5410 (base32 "1jlq11d1p8qp64w75yj8cnbbd1a93viq10pzsbwal7vdn8fg13j1"))
5411 (file-name (git-file-name name version))
5412 (modules '((guix build utils)))
5413 (snippet
5414 '(begin
5415 ;; remove bundled copy of boost and samtools
5416 (delete-file-recursively "boost")
5417 (delete-file-recursively "samtools-1.3")
5418 #t))))
5419 (build-system gnu-build-system)
5420 (arguments
5421 `(#:tests? #f ;no "check" target
5422 #:make-flags
5423 (list (string-append "BOOST="
5424 (assoc-ref %build-inputs "boost")
5425 "/include/")
5426 (string-append "SAMHEADERS="
5427 (assoc-ref %build-inputs "htslib")
5428 "/include/htslib/sam.h")
5429 (string-append "SAMLIBS="
5430 (assoc-ref %build-inputs "htslib")
5431 "/lib/libhts.a"))
5432 #:phases
5433 (modify-phases %standard-phases
5434 ;; No "configure" script.
5435 ;; Do not build bundled samtools library.
5436 (replace 'configure
5437 (lambda _
5438 (substitute* "Makefile"
5439 (("^all : \\$\\(PROGRAMS\\).*") "all: $(PROGRAMS)\n")
5440 (("^\\$\\(SAMLIBS\\).*") ""))
5441 #t))
5442 (replace 'install
5443 (lambda* (#:key outputs #:allow-other-keys)
5444 (let* ((out (string-append (assoc-ref outputs "out")))
5445 (bin (string-append out "/bin/"))
5446 (perl (string-append out "/lib/perl5/site_perl")))
5447 (mkdir-p bin)
5448 (mkdir-p perl)
5449 (for-each (lambda (file)
5450 (install-file file bin))
5451 (find-files "." "rsem-.*"))
5452 (install-file "rsem_perl_utils.pm" perl))
5453 #t))
5454 (add-after 'install 'wrap-program
5455 (lambda* (#:key outputs #:allow-other-keys)
5456 (let ((out (assoc-ref outputs "out")))
5457 (for-each (lambda (prog)
5458 (wrap-program (string-append out "/bin/" prog)
5459 `("PERL5LIB" ":" prefix
5460 (,(string-append out "/lib/perl5/site_perl")))))
5461 '("rsem-calculate-expression"
5462 "rsem-control-fdr"
5463 "rsem-generate-data-matrix"
5464 "rsem-generate-ngvector"
5465 "rsem-plot-transcript-wiggles"
5466 "rsem-prepare-reference"
5467 "rsem-run-ebseq"
5468 "rsem-run-prsem-testing-procedure")))
5469 #t)))))
5470 (inputs
5471 `(("boost" ,boost)
5472 ("r-minimal" ,r-minimal)
5473 ("perl" ,perl)
5474 ("htslib" ,htslib-1.3)
5475 ("zlib" ,zlib)))
5476 (home-page "http://deweylab.biostat.wisc.edu/rsem/")
5477 (synopsis "Estimate gene expression levels from RNA-Seq data")
5478 (description
5479 "RSEM is a software package for estimating gene and isoform expression
5480 levels from RNA-Seq data. The RSEM package provides a user-friendly
5481 interface, supports threads for parallel computation of the EM algorithm,
5482 single-end and paired-end read data, quality scores, variable-length reads and
5483 RSPD estimation. In addition, it provides posterior mean and 95% credibility
5484 interval estimates for expression levels. For visualization, it can generate
5485 BAM and Wiggle files in both transcript-coordinate and genomic-coordinate.")
5486 (license license:gpl3+)))
5487
5488 (define-public rseqc
5489 (package
5490 (name "rseqc")
5491 (version "3.0.1")
5492 (source
5493 (origin
5494 (method url-fetch)
5495 (uri
5496 (string-append "mirror://sourceforge/rseqc/"
5497 "RSeQC-" version ".tar.gz"))
5498 (sha256
5499 (base32
5500 "0gbb9iyb7swiv5455fm5rg98r7l6qn27v564yllqjd574hncpx6m"))))
5501 (build-system python-build-system)
5502 (inputs
5503 `(("python-cython" ,python-cython)
5504 ("python-bx-python" ,python-bx-python)
5505 ("python-pybigwig" ,python-pybigwig)
5506 ("python-pysam" ,python-pysam)
5507 ("python-numpy" ,python-numpy)
5508 ("zlib" ,zlib)))
5509 (native-inputs
5510 `(("python-nose" ,python-nose)))
5511 (home-page "http://rseqc.sourceforge.net/")
5512 (synopsis "RNA-seq quality control package")
5513 (description
5514 "RSeQC provides a number of modules that can comprehensively evaluate
5515 high throughput sequence data, especially RNA-seq data. Some basic modules
5516 inspect sequence quality, nucleotide composition bias, PCR bias and GC bias,
5517 while RNA-seq specific modules evaluate sequencing saturation, mapped reads
5518 distribution, coverage uniformity, strand specificity, etc.")
5519 (license license:gpl3+)))
5520
5521 (define-public seek
5522 ;; There are no release tarballs. According to the installation
5523 ;; instructions at http://seek.princeton.edu/installation.jsp, the latest
5524 ;; stable release is identified by this changeset ID.
5525 (let ((changeset "2329130")
5526 (revision "1"))
5527 (package
5528 (name "seek")
5529 (version (string-append "0-" revision "." changeset))
5530 (source (origin
5531 (method hg-fetch)
5532 (uri (hg-reference
5533 (url "https://bitbucket.org/libsleipnir/sleipnir")
5534 (changeset changeset)))
5535 (file-name (string-append name "-" version "-checkout"))
5536 (sha256
5537 (base32
5538 "0qrvilwh18dpbhkf92qvxbmay0j75ra3jg2wrhz67gf538zzphsx"))))
5539 (build-system gnu-build-system)
5540 (arguments
5541 `(#:modules ((srfi srfi-1)
5542 (guix build gnu-build-system)
5543 (guix build utils))
5544 #:phases
5545 (let ((dirs '("SeekMiner"
5546 "SeekEvaluator"
5547 "SeekPrep"
5548 "Distancer"
5549 "Data2DB"
5550 "PCL2Bin")))
5551 (modify-phases %standard-phases
5552 (replace 'bootstrap
5553 (lambda _
5554 (substitute* "gen_tools_am"
5555 (("/usr/bin/env.*") (which "perl")))
5556 (invoke "bash" "gen_auto")
5557 #t))
5558 (add-after 'build 'build-additional-tools
5559 (lambda* (#:key make-flags #:allow-other-keys)
5560 (for-each (lambda (dir)
5561 (with-directory-excursion (string-append "tools/" dir)
5562 (apply invoke "make" make-flags)))
5563 dirs)
5564 #t))
5565 (add-after 'install 'install-additional-tools
5566 (lambda* (#:key make-flags #:allow-other-keys)
5567 (for-each (lambda (dir)
5568 (with-directory-excursion (string-append "tools/" dir)
5569 (apply invoke `("make" ,@make-flags "install"))))
5570 dirs)
5571 #t))))))
5572 (inputs
5573 `(("gsl" ,gsl)
5574 ("boost" ,boost)
5575 ("libsvm" ,libsvm)
5576 ("readline" ,readline)
5577 ("gengetopt" ,gengetopt)
5578 ("log4cpp" ,log4cpp)))
5579 (native-inputs
5580 `(("autoconf" ,autoconf)
5581 ("automake" ,automake)
5582 ("perl" ,perl)))
5583 (home-page "http://seek.princeton.edu")
5584 (synopsis "Gene co-expression search engine")
5585 (description
5586 "SEEK is a computational gene co-expression search engine. SEEK provides
5587 biologists with a way to navigate the massive human expression compendium that
5588 now contains thousands of expression datasets. SEEK returns a robust ranking
5589 of co-expressed genes in the biological area of interest defined by the user's
5590 query genes. It also prioritizes thousands of expression datasets according
5591 to the user's query of interest.")
5592 (license license:cc-by3.0))))
5593
5594 (define-public samtools
5595 (package
5596 (name "samtools")
5597 (version "1.9")
5598 (source
5599 (origin
5600 (method url-fetch)
5601 (uri
5602 (string-append "mirror://sourceforge/samtools/samtools/"
5603 version "/samtools-" version ".tar.bz2"))
5604 (sha256
5605 (base32
5606 "10ilqbmm7ri8z431sn90lvbjwizd0hhkf9rcqw8j823hf26nhgq8"))
5607 (modules '((guix build utils)))
5608 (snippet '(begin
5609 ;; Delete bundled htslib.
5610 (delete-file-recursively "htslib-1.9")
5611 #t))))
5612 (build-system gnu-build-system)
5613 (arguments
5614 `(#:modules ((ice-9 ftw)
5615 (ice-9 regex)
5616 (guix build gnu-build-system)
5617 (guix build utils))
5618 #:configure-flags (list "--with-ncurses")
5619 #:phases
5620 (modify-phases %standard-phases
5621 (add-after 'unpack 'patch-tests
5622 (lambda _
5623 (substitute* "test/test.pl"
5624 ;; The test script calls out to /bin/bash
5625 (("/bin/bash") (which "bash")))
5626 #t))
5627 (add-after 'install 'install-library
5628 (lambda* (#:key outputs #:allow-other-keys)
5629 (let ((lib (string-append (assoc-ref outputs "out") "/lib")))
5630 (install-file "libbam.a" lib)
5631 #t)))
5632 (add-after 'install 'install-headers
5633 (lambda* (#:key outputs #:allow-other-keys)
5634 (let ((include (string-append (assoc-ref outputs "out")
5635 "/include/samtools/")))
5636 (for-each (lambda (file)
5637 (install-file file include))
5638 (scandir "." (lambda (name) (string-match "\\.h$" name))))
5639 #t))))))
5640 (native-inputs `(("pkg-config" ,pkg-config)))
5641 (inputs
5642 `(("htslib" ,htslib)
5643 ("ncurses" ,ncurses)
5644 ("perl" ,perl)
5645 ("python" ,python)
5646 ("zlib" ,zlib)))
5647 (home-page "http://samtools.sourceforge.net")
5648 (synopsis "Utilities to efficiently manipulate nucleotide sequence alignments")
5649 (description
5650 "Samtools implements various utilities for post-processing nucleotide
5651 sequence alignments in the SAM, BAM, and CRAM formats, including indexing,
5652 variant calling (in conjunction with bcftools), and a simple alignment
5653 viewer.")
5654 (license license:expat)))
5655
5656 (define-public samtools-0.1
5657 ;; This is the most recent version of the 0.1 line of samtools. The input
5658 ;; and output formats differ greatly from that used and produced by samtools
5659 ;; 1.x and is still used in many bioinformatics pipelines.
5660 (package (inherit samtools)
5661 (version "0.1.19")
5662 (source
5663 (origin
5664 (method url-fetch)
5665 (uri
5666 (string-append "mirror://sourceforge/samtools/samtools/"
5667 version "/samtools-" version ".tar.bz2"))
5668 (sha256
5669 (base32 "1m33xsfwz0s8qi45lylagfllqg7fphf4dr0780rsvw75av9wk06h"))))
5670 (arguments
5671 `(#:tests? #f ;no "check" target
5672 #:make-flags
5673 (list "LIBCURSES=-lncurses")
5674 ,@(substitute-keyword-arguments (package-arguments samtools)
5675 ((#:phases phases)
5676 `(modify-phases ,phases
5677 (replace 'install
5678 (lambda* (#:key outputs #:allow-other-keys)
5679 (let ((bin (string-append
5680 (assoc-ref outputs "out") "/bin")))
5681 (mkdir-p bin)
5682 (install-file "samtools" bin)
5683 #t)))
5684 (delete 'patch-tests)
5685 (delete 'configure))))))))
5686
5687 (define-public mosaik
5688 (let ((commit "5c25216d3522d6a33e53875cd76a6d65001e4e67"))
5689 (package
5690 (name "mosaik")
5691 (version "2.2.30")
5692 (source (origin
5693 ;; There are no release tarballs nor tags.
5694 (method git-fetch)
5695 (uri (git-reference
5696 (url "https://github.com/wanpinglee/MOSAIK")
5697 (commit commit)))
5698 (file-name (string-append name "-" version))
5699 (sha256
5700 (base32
5701 "17gj3s07cm77r41z92awh0bim7w7q7fbn0sf5nkqmcm1vw052qgw"))))
5702 (build-system gnu-build-system)
5703 (arguments
5704 `(#:tests? #f ; no tests
5705 #:make-flags (list "CC=gcc")
5706 #:phases
5707 (modify-phases %standard-phases
5708 (replace 'configure
5709 (lambda _ (chdir "src") #t))
5710 (replace 'install
5711 (lambda* (#:key outputs #:allow-other-keys)
5712 (let ((bin (string-append (assoc-ref outputs "out")
5713 "/bin")))
5714 (mkdir-p bin)
5715 (copy-recursively "../bin" bin)
5716 #t))))))
5717 (inputs
5718 `(("perl" ,perl)
5719 ("zlib:static" ,zlib "static")
5720 ("zlib" ,zlib)))
5721 (supported-systems '("x86_64-linux"))
5722 (home-page "https://github.com/wanpinglee/MOSAIK")
5723 (synopsis "Map nucleotide sequence reads to reference genomes")
5724 (description
5725 "MOSAIK is a program for mapping second and third-generation sequencing
5726 reads to a reference genome. MOSAIK can align reads generated by all the
5727 major sequencing technologies, including Illumina, Applied Biosystems SOLiD,
5728 Roche 454, Ion Torrent and Pacific BioSciences SMRT.")
5729 ;; MOSAIK is released under the GPLv2+ with the exception of third-party
5730 ;; code released into the public domain:
5731 ;; 1. fastlz by Ariya Hidayat - http://www.fastlz.org/
5732 ;; 2. MD5 implementation - RSA Data Security, RFC 1321
5733 (license (list license:gpl2+ license:public-domain)))))
5734
5735 (define-public ngs-sdk
5736 (package
5737 (name "ngs-sdk")
5738 (version "2.10.5")
5739 (source (origin
5740 (method git-fetch)
5741 (uri (git-reference
5742 (url "https://github.com/ncbi/ngs")
5743 (commit version)))
5744 (file-name (git-file-name name version))
5745 (sha256
5746 (base32
5747 "1ix51c25hjn57w93qmwzw80xh2i34wx8j2hn7szh8p6w8i3az5qa"))))
5748 (build-system gnu-build-system)
5749 (arguments
5750 `(#:parallel-build? #f ; not supported
5751 #:tests? #f ; no "check" target
5752 #:phases
5753 (modify-phases %standard-phases
5754 (replace 'configure
5755 (lambda* (#:key outputs #:allow-other-keys)
5756 (let ((out (assoc-ref outputs "out")))
5757 ;; Allow 'konfigure.perl' to find 'package.prl'.
5758 (setenv "PERL5LIB"
5759 (string-append ".:" (getenv "PERL5LIB")))
5760
5761 ;; The 'configure' script doesn't recognize things like
5762 ;; '--enable-fast-install'.
5763 (invoke "./configure"
5764 (string-append "--build-prefix=" (getcwd) "/build")
5765 (string-append "--prefix=" out))
5766 #t)))
5767 (add-after 'unpack 'enter-dir
5768 (lambda _ (chdir "ngs-sdk") #t)))))
5769 (native-inputs `(("perl" ,perl)))
5770 ;; According to the test
5771 ;; unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i)
5772 ;; in ngs-sdk/setup/konfigure.perl
5773 (supported-systems '("i686-linux" "x86_64-linux"))
5774 (home-page "https://github.com/ncbi/ngs")
5775 (synopsis "API for accessing Next Generation Sequencing data")
5776 (description
5777 "NGS is a domain-specific API for accessing reads, alignments and pileups
5778 produced from Next Generation Sequencing. The API itself is independent from
5779 any particular back-end implementation, and supports use of multiple back-ends
5780 simultaneously.")
5781 (license license:public-domain)))
5782
5783 (define-public java-ngs
5784 (package (inherit ngs-sdk)
5785 (name "java-ngs")
5786 (arguments
5787 `(,@(substitute-keyword-arguments
5788 `(#:modules ((guix build gnu-build-system)
5789 (guix build utils)
5790 (srfi srfi-1)
5791 (srfi srfi-26))
5792 ,@(package-arguments ngs-sdk))
5793 ((#:phases phases)
5794 `(modify-phases ,phases
5795 (replace 'enter-dir (lambda _ (chdir "ngs-java") #t)))))))
5796 (inputs
5797 `(("jdk" ,icedtea "jdk")
5798 ("ngs-sdk" ,ngs-sdk)))
5799 (synopsis "Java bindings for NGS SDK")))
5800
5801 (define-public ncbi-vdb
5802 (package
5803 (name "ncbi-vdb")
5804 (version "2.10.6")
5805 (source (origin
5806 (method git-fetch)
5807 (uri (git-reference
5808 (url "https://github.com/ncbi/ncbi-vdb")
5809 (commit version)))
5810 (file-name (git-file-name name version))
5811 (sha256
5812 (base32
5813 "0m8hlxscidsfqm9x9fyi62q6lpf1dv5115kgjjgnrkl49q9c27m6"))))
5814 (build-system gnu-build-system)
5815 (arguments
5816 `(#:parallel-build? #f ; not supported
5817 #:tests? #f ; no "check" target
5818 #:make-flags '("HAVE_HDF5=1")
5819 #:phases
5820 (modify-phases %standard-phases
5821 (add-after 'unpack 'make-files-writable
5822 (lambda _ (for-each make-file-writable (find-files "." ".*")) #t))
5823 (add-before 'configure 'set-perl-search-path
5824 (lambda _
5825 ;; Work around "dotless @INC" build failure.
5826 (setenv "PERL5LIB"
5827 (string-append (getcwd) "/setup:"
5828 (getenv "PERL5LIB")))
5829 #t))
5830 ;; See https://github.com/ncbi/ncbi-vdb/issues/14
5831 (add-after 'unpack 'patch-krypto-flags
5832 (lambda _
5833 (substitute* "libs/krypto/Makefile"
5834 (("-Wa,-march=generic64\\+aes") "")
5835 (("-Wa,-march=generic64\\+sse4") ""))
5836 #t))
5837 (replace 'configure
5838 (lambda* (#:key inputs outputs #:allow-other-keys)
5839 (let ((out (assoc-ref outputs "out")))
5840 ;; Override include path for libmagic
5841 (substitute* "setup/package.prl"
5842 (("name => 'magic', Include => '/usr/include'")
5843 (string-append "name=> 'magic', Include => '"
5844 (assoc-ref inputs "libmagic")
5845 "/include" "'")))
5846
5847 ;; Install kdf5 library (needed by sra-tools)
5848 (substitute* "build/Makefile.install"
5849 (("LIBRARIES_TO_INSTALL =")
5850 "LIBRARIES_TO_INSTALL = kdf5.$(VERSION_LIBX) kdf5.$(VERSION_SHLX)"))
5851
5852 (substitute* "build/Makefile.env"
5853 (("CFLAGS =" prefix)
5854 (string-append prefix "-msse2 ")))
5855
5856 ;; Override search path for ngs-java
5857 (substitute* "setup/package.prl"
5858 (("/usr/local/ngs/ngs-java")
5859 (assoc-ref inputs "java-ngs")))
5860
5861 ;; The 'configure' script doesn't recognize things like
5862 ;; '--enable-fast-install'.
5863 (invoke "./configure"
5864 (string-append "--build-prefix=" (getcwd) "/build")
5865 (string-append "--prefix=" (assoc-ref outputs "out"))
5866 (string-append "--debug")
5867 (string-append "--with-xml2-prefix="
5868 (assoc-ref inputs "libxml2"))
5869 (string-append "--with-ngs-sdk-prefix="
5870 (assoc-ref inputs "ngs-sdk"))
5871 (string-append "--with-hdf5-prefix="
5872 (assoc-ref inputs "hdf5")))
5873 #t)))
5874 (add-after 'install 'install-interfaces
5875 (lambda* (#:key outputs #:allow-other-keys)
5876 ;; Install interface libraries. On i686 the interface libraries
5877 ;; are installed to "linux/gcc/i386", so we need to use the Linux
5878 ;; architecture name ("i386") instead of the target system prefix
5879 ;; ("i686").
5880 (mkdir (string-append (assoc-ref outputs "out") "/ilib"))
5881 (copy-recursively (string-append "build/ncbi-vdb/linux/gcc/"
5882 ,(system->linux-architecture
5883 (or (%current-target-system)
5884 (%current-system)))
5885 "/rel/ilib")
5886 (string-append (assoc-ref outputs "out")
5887 "/ilib"))
5888 ;; Install interface headers
5889 (copy-recursively "interfaces"
5890 (string-append (assoc-ref outputs "out")
5891 "/include"))
5892 #t))
5893 ;; These files are needed by sra-tools.
5894 (add-after 'install 'install-configuration-files
5895 (lambda* (#:key outputs #:allow-other-keys)
5896 (let ((target (string-append (assoc-ref outputs "out") "/kfg")))
5897 (mkdir target)
5898 (install-file "libs/kfg/default.kfg" target)
5899 (install-file "libs/kfg/certs.kfg" target))
5900 #t)))))
5901 (inputs
5902 `(("libxml2" ,libxml2)
5903 ("ngs-sdk" ,ngs-sdk)
5904 ("java-ngs" ,java-ngs)
5905 ("libmagic" ,file)
5906 ("hdf5" ,hdf5)))
5907 (native-inputs `(("perl" ,perl)))
5908 ;; NCBI-VDB requires SSE capability.
5909 (supported-systems '("i686-linux" "x86_64-linux"))
5910 (home-page "https://github.com/ncbi/ncbi-vdb")
5911 (synopsis "Database engine for genetic information")
5912 (description
5913 "The NCBI-VDB library implements a highly compressed columnar data
5914 warehousing engine that is most often used to store genetic information.
5915 Databases are stored in a portable image within the file system, and can be
5916 accessed/downloaded on demand across HTTP.")
5917 (license license:public-domain)))
5918
5919 (define-public plink
5920 (package
5921 (name "plink")
5922 (version "1.07")
5923 (source
5924 (origin
5925 (method url-fetch)
5926 (uri (string-append
5927 "http://pngu.mgh.harvard.edu/~purcell/plink/dist/plink-"
5928 version "-src.zip"))
5929 (sha256
5930 (base32 "0as8gxm4pjyc8dxmm1sl873rrd7wn5qs0l29nqfnl31x8i467xaa"))
5931 (patches (search-patches "plink-1.07-unclobber-i.patch"
5932 "plink-endian-detection.patch"))))
5933 (build-system gnu-build-system)
5934 (arguments
5935 '(#:tests? #f ;no "check" target
5936 #:make-flags (list (string-append "LIB_LAPACK="
5937 (assoc-ref %build-inputs "lapack")
5938 "/lib/liblapack.so")
5939 "WITH_LAPACK=1"
5940 "FORCE_DYNAMIC=1"
5941 ;; disable phoning home
5942 "WITH_WEBCHECK=")
5943 #:phases
5944 (modify-phases %standard-phases
5945 ;; no "configure" script
5946 (delete 'configure)
5947 (replace 'install
5948 (lambda* (#:key outputs #:allow-other-keys)
5949 (let ((bin (string-append (assoc-ref outputs "out")
5950 "/bin/")))
5951 (install-file "plink" bin)
5952 #t))))))
5953 (inputs
5954 `(("zlib" ,zlib)
5955 ("lapack" ,lapack)))
5956 (native-inputs
5957 `(("unzip" ,unzip)))
5958 (home-page "http://pngu.mgh.harvard.edu/~purcell/plink/")
5959 (synopsis "Whole genome association analysis toolset")
5960 (description
5961 "PLINK is a whole genome association analysis toolset, designed to
5962 perform a range of basic, large-scale analyses in a computationally efficient
5963 manner. The focus of PLINK is purely on analysis of genotype/phenotype data,
5964 so there is no support for steps prior to this (e.g. study design and
5965 planning, generating genotype or CNV calls from raw data). Through
5966 integration with gPLINK and Haploview, there is some support for the
5967 subsequent visualization, annotation and storage of results.")
5968 ;; Code is released under GPLv2, except for fisher.h, which is under
5969 ;; LGPLv2.1+
5970 (license (list license:gpl2 license:lgpl2.1+))))
5971
5972 (define-public plink-ng
5973 (package (inherit plink)
5974 (name "plink-ng")
5975 (version "1.90b4")
5976 (source
5977 (origin
5978 (method git-fetch)
5979 (uri (git-reference
5980 (url "https://github.com/chrchang/plink-ng")
5981 (commit (string-append "v" version))))
5982 (file-name (git-file-name name version))
5983 (sha256
5984 (base32 "02npdwgkpfkdnhw819rhj5kw02a5k5m90b14zq9zzya4hyg929c0"))))
5985 (build-system gnu-build-system)
5986 (arguments
5987 '(#:tests? #f ;no "check" target
5988 #:make-flags (list "BLASFLAGS=-llapack -lopenblas"
5989 "CFLAGS=-Wall -O2 -DDYNAMIC_ZLIB=1"
5990 "ZLIB=-lz"
5991 "-f" "Makefile.std")
5992 #:phases
5993 (modify-phases %standard-phases
5994 (add-after 'unpack 'chdir
5995 (lambda _ (chdir "1.9") #t))
5996 (delete 'configure) ; no "configure" script
5997 (replace 'install
5998 (lambda* (#:key outputs #:allow-other-keys)
5999 (let ((bin (string-append (assoc-ref outputs "out")
6000 "/bin/")))
6001 (install-file "plink" bin)
6002 #t))))))
6003 (inputs
6004 `(("zlib" ,zlib)
6005 ("lapack" ,lapack)
6006 ("openblas" ,openblas)))
6007 (home-page "https://www.cog-genomics.org/plink/")
6008 (license license:gpl3+)))
6009
6010 (define-public smithlab-cpp
6011 (let ((revision "1")
6012 (commit "728a097bec88c6f4b8528b685932049e660eff2e"))
6013 (package
6014 (name "smithlab-cpp")
6015 (version (string-append "0." revision "." (string-take commit 7)))
6016 (source (origin
6017 (method git-fetch)
6018 (uri (git-reference
6019 (url "https://github.com/smithlabcode/smithlab_cpp")
6020 (commit commit)))
6021 (file-name (string-append name "-" version "-checkout"))
6022 (sha256
6023 (base32
6024 "0d476lmj312xk77kr9fzrv7z1bv96yfyx0w7y62ycmnfbx32ll74"))))
6025 (build-system gnu-build-system)
6026 (arguments
6027 `(#:modules ((guix build gnu-build-system)
6028 (guix build utils)
6029 (srfi srfi-26))
6030 #:tests? #f ;no "check" target
6031 #:phases
6032 (modify-phases %standard-phases
6033 (add-after 'unpack 'use-samtools-headers
6034 (lambda _
6035 (substitute* '("SAM.cpp"
6036 "SAM.hpp")
6037 (("sam.h") "samtools/sam.h"))
6038 #t))
6039 (replace 'install
6040 (lambda* (#:key outputs #:allow-other-keys)
6041 (let* ((out (assoc-ref outputs "out"))
6042 (lib (string-append out "/lib"))
6043 (include (string-append out "/include/smithlab-cpp")))
6044 (mkdir-p lib)
6045 (mkdir-p include)
6046 (for-each (cut install-file <> lib)
6047 (find-files "." "\\.o$"))
6048 (for-each (cut install-file <> include)
6049 (find-files "." "\\.hpp$")))
6050 #t))
6051 (delete 'configure))))
6052 (inputs
6053 `(("samtools" ,samtools-0.1)
6054 ("zlib" ,zlib)))
6055 (home-page "https://github.com/smithlabcode/smithlab_cpp")
6056 (synopsis "C++ helper library for functions used in Smith lab projects")
6057 (description
6058 "Smithlab CPP is a C++ library that includes functions used in many of
6059 the Smith lab bioinformatics projects, such as a wrapper around Samtools data
6060 structures, classes for genomic regions, mapped sequencing reads, etc.")
6061 (license license:gpl3+))))
6062
6063 (define-public preseq
6064 (package
6065 (name "preseq")
6066 (version "2.0.3")
6067 (source (origin
6068 (method url-fetch)
6069 (uri (string-append "https://github.com/smithlabcode/preseq/"
6070 "releases/download/v" version
6071 "/preseq_v" version ".tar.bz2"))
6072 (sha256
6073 (base32 "149x9xmk1wy1gff85325yfzqc0qk4sgp1w6gbyj9cnji4x1dszbl"))
6074 (modules '((guix build utils)))
6075 (snippet '(begin
6076 ;; Remove bundled samtools.
6077 (delete-file-recursively "samtools")
6078 #t))))
6079 (build-system gnu-build-system)
6080 (arguments
6081 `(#:tests? #f ;no "check" target
6082 #:phases
6083 (modify-phases %standard-phases
6084 (delete 'configure))
6085 #:make-flags
6086 (list (string-append "PREFIX="
6087 (assoc-ref %outputs "out"))
6088 (string-append "LIBBAM="
6089 (assoc-ref %build-inputs "samtools")
6090 "/lib/libbam.a")
6091 (string-append "SMITHLAB_CPP="
6092 (assoc-ref %build-inputs "smithlab-cpp")
6093 "/lib")
6094 "PROGS=preseq"
6095 "INCLUDEDIRS=$(SMITHLAB_CPP)/../include/smithlab-cpp $(SAMTOOLS_DIR)")))
6096 (inputs
6097 `(("gsl" ,gsl)
6098 ("samtools" ,samtools-0.1)
6099 ("smithlab-cpp" ,smithlab-cpp)
6100 ("zlib" ,zlib)))
6101 (home-page "http://smithlabresearch.org/software/preseq/")
6102 (synopsis "Program for analyzing library complexity")
6103 (description
6104 "The preseq package is aimed at predicting and estimating the complexity
6105 of a genomic sequencing library, equivalent to predicting and estimating the
6106 number of redundant reads from a given sequencing depth and how many will be
6107 expected from additional sequencing using an initial sequencing experiment.
6108 The estimates can then be used to examine the utility of further sequencing,
6109 optimize the sequencing depth, or to screen multiple libraries to avoid low
6110 complexity samples.")
6111 (license license:gpl3+)))
6112
6113 (define-public python-screed
6114 (package
6115 (name "python-screed")
6116 (version "1.0")
6117 (source
6118 (origin
6119 (method url-fetch)
6120 (uri (pypi-uri "screed" version))
6121 (sha256
6122 (base32
6123 "148vcb7w2wr6a4w6vs2bsxanbqibxfk490zbcbg4m61s8669zdjx"))))
6124 (build-system python-build-system)
6125 (arguments
6126 '(#:phases
6127 (modify-phases %standard-phases
6128 ;; Tests must be run after installation, as the "screed" command does
6129 ;; not exist right after building.
6130 (delete 'check)
6131 (add-after 'install 'check
6132 (lambda* (#:key inputs outputs #:allow-other-keys)
6133 (let ((out (assoc-ref outputs "out")))
6134 (setenv "PYTHONPATH"
6135 (string-append out "/lib/python"
6136 (string-take (string-take-right
6137 (assoc-ref inputs "python")
6138 5) 3)
6139 "/site-packages:"
6140 (getenv "PYTHONPATH")))
6141 (setenv "PATH" (string-append out "/bin:" (getenv "PATH"))))
6142 (invoke "python" "setup.py" "test")
6143 #t)))))
6144 (native-inputs
6145 `(("python-pytest" ,python-pytest)
6146 ("python-pytest-cov" ,python-pytest-cov)
6147 ("python-pytest-runner" ,python-pytest-runner)))
6148 (inputs
6149 `(("python-bz2file" ,python-bz2file)))
6150 (home-page "https://github.com/dib-lab/screed/")
6151 (synopsis "Short read sequence database utilities")
6152 (description "Screed parses FASTA and FASTQ files and generates databases.
6153 Values such as sequence name, sequence description, sequence quality and the
6154 sequence itself can be retrieved from these databases.")
6155 (license license:bsd-3)))
6156
6157 (define-public python2-screed
6158 (package-with-python2 python-screed))
6159
6160 (define-public sra-tools
6161 (package
6162 (name "sra-tools")
6163 (version "2.10.6")
6164 (source
6165 (origin
6166 (method git-fetch)
6167 (uri (git-reference
6168 (url "https://github.com/ncbi/sra-tools")
6169 (commit version)))
6170 (file-name (git-file-name name version))
6171 (sha256
6172 (base32
6173 "1cr2mijkfs5sm35ffjs6861qsd1qkgnhnbavdv65zg5d655abbjf"))))
6174 (build-system gnu-build-system)
6175 (arguments
6176 `(#:parallel-build? #f ; not supported
6177 #:tests? #f ; no "check" target
6178 #:make-flags
6179 (list (string-append "DEFAULT_CRT="
6180 (assoc-ref %build-inputs "ncbi-vdb")
6181 "/kfg/certs.kfg")
6182 (string-append "DEFAULT_KFG="
6183 (assoc-ref %build-inputs "ncbi-vdb")
6184 "/kfg/default.kfg")
6185 (string-append "VDB_LIBDIR="
6186 (assoc-ref %build-inputs "ncbi-vdb")
6187 ,(if (string-prefix? "x86_64"
6188 (or (%current-target-system)
6189 (%current-system)))
6190 "/lib64"
6191 "/lib32")))
6192 #:phases
6193 (modify-phases %standard-phases
6194 (add-before 'configure 'set-perl-search-path
6195 (lambda _
6196 ;; Work around "dotless @INC" build failure.
6197 (setenv "PERL5LIB"
6198 (string-append (getcwd) "/setup:"
6199 (getenv "PERL5LIB")))
6200 #t))
6201 (replace 'configure
6202 (lambda* (#:key inputs outputs #:allow-other-keys)
6203 ;; The build system expects a directory containing the sources and
6204 ;; raw build output of ncbi-vdb, including files that are not
6205 ;; installed. Since we are building against an installed version of
6206 ;; ncbi-vdb, the following modifications are needed.
6207 (substitute* "setup/konfigure.perl"
6208 ;; Make the configure script look for the "ilib" directory of
6209 ;; "ncbi-vdb" without first checking for the existence of a
6210 ;; matching library in its "lib" directory.
6211 (("^ my \\$f = File::Spec->catdir\\(\\$libdir, \\$lib\\);")
6212 "my $f = File::Spec->catdir($ilibdir, $ilib);")
6213 ;; Look for interface libraries in ncbi-vdb's "ilib" directory.
6214 (("my \\$ilibdir = File::Spec->catdir\\(\\$builddir, 'ilib'\\);")
6215 "my $ilibdir = File::Spec->catdir($dir, 'ilib');"))
6216
6217 ;; Dynamic linking
6218 (substitute* "tools/copycat/Makefile"
6219 (("smagic-static") "lmagic"))
6220 (substitute* "tools/driver-tool/utf8proc/Makefile"
6221 (("CC\\?=gcc") "myCC=gcc")
6222 (("\\(CC\\)") "(myCC)"))
6223
6224 ;; The 'configure' script doesn't recognize things like
6225 ;; '--enable-fast-install'.
6226 (invoke "./configure"
6227 (string-append "--build-prefix=" (getcwd) "/build")
6228 (string-append "--prefix=" (assoc-ref outputs "out"))
6229 (string-append "--debug")
6230 (string-append "--with-fuse-prefix="
6231 (assoc-ref inputs "fuse"))
6232 (string-append "--with-magic-prefix="
6233 (assoc-ref inputs "libmagic"))
6234 ;; TODO: building with libxml2 fails with linker errors
6235 #;
6236 (string-append "--with-xml2-prefix="
6237 (assoc-ref inputs "libxml2"))
6238 (string-append "--with-ncbi-vdb-sources="
6239 (assoc-ref inputs "ncbi-vdb"))
6240 (string-append "--with-ncbi-vdb-build="
6241 (assoc-ref inputs "ncbi-vdb"))
6242 (string-append "--with-ngs-sdk-prefix="
6243 (assoc-ref inputs "ngs-sdk"))
6244 (string-append "--with-hdf5-prefix="
6245 (assoc-ref inputs "hdf5")))
6246 #t)))))
6247 (native-inputs `(("perl" ,perl)))
6248 (inputs
6249 `(("ngs-sdk" ,ngs-sdk)
6250 ("ncbi-vdb" ,ncbi-vdb)
6251 ("libmagic" ,file)
6252 ("fuse" ,fuse)
6253 ("hdf5" ,hdf5-1.10)
6254 ("zlib" ,zlib)
6255 ("python" ,python-wrapper)))
6256 (home-page
6257 "https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software")
6258 (synopsis "Tools and libraries for reading and writing sequencing data")
6259 (description
6260 "The SRA Toolkit from NCBI is a collection of tools and libraries for
6261 reading of sequencing files from the Sequence Read Archive (SRA) database and
6262 writing files into the .sra format.")
6263 (license license:public-domain)))
6264
6265 (define-public seqan
6266 (package
6267 (name "seqan")
6268 (version "2.4.0")
6269 (source (origin
6270 (method url-fetch)
6271 (uri (string-append "https://github.com/seqan/seqan/releases/"
6272 "download/seqan-v" version
6273 "/seqan-library-" version ".tar.xz"))
6274 (sha256
6275 (base32
6276 "19a1rlxx03qy1i1iriicly68w64yjxbv24g9gdywnfmq998v35yx"))))
6277 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6278 ;; makes sense to split the outputs.
6279 (outputs '("out" "doc"))
6280 (build-system trivial-build-system)
6281 (arguments
6282 `(#:modules ((guix build utils))
6283 #:builder
6284 (begin
6285 (use-modules (guix build utils))
6286 (let ((tar (assoc-ref %build-inputs "tar"))
6287 (xz (assoc-ref %build-inputs "xz"))
6288 (out (assoc-ref %outputs "out"))
6289 (doc (assoc-ref %outputs "doc")))
6290 (setenv "PATH" (string-append tar "/bin:" xz "/bin"))
6291 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6292 (chdir (string-append "seqan-library-" ,version))
6293 (copy-recursively "include" (string-append out "/include"))
6294 (copy-recursively "share" (string-append doc "/share"))
6295 #t))))
6296 (native-inputs
6297 `(("source" ,source)
6298 ("tar" ,tar)
6299 ("xz" ,xz)))
6300 (home-page "http://www.seqan.de")
6301 (synopsis "Library for nucleotide sequence analysis")
6302 (description
6303 "SeqAn is a C++ library of efficient algorithms and data structures for
6304 the analysis of sequences with the focus on biological data. It contains
6305 algorithms and data structures for string representation and their
6306 manipulation, online and indexed string search, efficient I/O of
6307 bioinformatics file formats, sequence alignment, and more.")
6308 (license license:bsd-3)))
6309
6310 (define-public seqan-1
6311 (package (inherit seqan)
6312 (name "seqan")
6313 (version "1.4.2")
6314 (source (origin
6315 (method url-fetch)
6316 (uri (string-append "http://packages.seqan.de/seqan-library/"
6317 "seqan-library-" version ".tar.bz2"))
6318 (sha256
6319 (base32
6320 "05s3wrrwn50f81aklfm65i4a749zag1vr8z03k21xm0pdxy47yvp"))))
6321 ;; The documentation is 7.8MB and the includes are 3.6MB heavy, so it
6322 ;; makes sense to split the outputs.
6323 (outputs '("out" "doc"))
6324 (build-system trivial-build-system)
6325 (arguments
6326 `(#:modules ((guix build utils))
6327 #:builder
6328 (begin
6329 (use-modules (guix build utils))
6330 (let ((tar (assoc-ref %build-inputs "tar"))
6331 (bzip (assoc-ref %build-inputs "bzip2"))
6332 (out (assoc-ref %outputs "out"))
6333 (doc (assoc-ref %outputs "doc")))
6334 (setenv "PATH" (string-append tar "/bin:" bzip "/bin"))
6335 (invoke "tar" "xvf" (assoc-ref %build-inputs "source"))
6336 (chdir (string-append "seqan-library-" ,version))
6337 (copy-recursively "include" (string-append out "/include"))
6338 (copy-recursively "share" (string-append doc "/share"))
6339 #t))))
6340 (native-inputs
6341 `(("source" ,source)
6342 ("tar" ,tar)
6343 ("bzip2" ,bzip2)))))
6344
6345 (define-public seqmagick
6346 (package
6347 (name "seqmagick")
6348 (version "0.7.0")
6349 (source
6350 (origin
6351 (method url-fetch)
6352 (uri (pypi-uri "seqmagick" version))
6353 (sha256
6354 (base32
6355 "12bfyp8nqi0hd36rmj450aygafp01qy3hkbvlwn3bk39pyjjkgg5"))))
6356 (build-system python-build-system)
6357 (inputs
6358 `(("python-biopython" ,python-biopython)))
6359 (native-inputs
6360 `(("python-nose" ,python-nose)))
6361 (home-page "https://github.com/fhcrc/seqmagick")
6362 (synopsis "Tools for converting and modifying sequence files")
6363 (description
6364 "Bioinformaticians often have to convert sequence files between formats
6365 and do little manipulations on them, and it's not worth writing scripts for
6366 that. Seqmagick is a utility to expose the file format conversion in
6367 BioPython in a convenient way. Instead of having a big mess of scripts, there
6368 is one that takes arguments.")
6369 (license license:gpl3)))
6370
6371 (define-public seqtk
6372 (package
6373 (name "seqtk")
6374 (version "1.3")
6375 (source (origin
6376 (method git-fetch)
6377 (uri (git-reference
6378 (url "https://github.com/lh3/seqtk")
6379 (commit (string-append "v" version))))
6380 (file-name (git-file-name name version))
6381 (sha256
6382 (base32
6383 "1bfzlqa84b5s1qi22blmmw2s8xdyp9h9ydcq22pfjhh5gab3yz6l"))))
6384 (build-system gnu-build-system)
6385 (arguments
6386 `(#:phases
6387 (modify-phases %standard-phases
6388 (delete 'configure)
6389 (replace 'check
6390 ;; There are no tests, so we just run a sanity check.
6391 (lambda _ (invoke "./seqtk" "seq") #t))
6392 (replace 'install
6393 (lambda* (#:key outputs #:allow-other-keys)
6394 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6395 (install-file "seqtk" bin)
6396 #t))))))
6397 (inputs
6398 `(("zlib" ,zlib)))
6399 (home-page "https://github.com/lh3/seqtk")
6400 (synopsis "Toolkit for processing biological sequences in FASTA/Q format")
6401 (description
6402 "Seqtk is a fast and lightweight tool for processing sequences in the
6403 FASTA or FASTQ format. It parses both FASTA and FASTQ files which can be
6404 optionally compressed by gzip.")
6405 (license license:expat)))
6406
6407 (define-public snap-aligner
6408 (package
6409 (name "snap-aligner")
6410 (version "1.0beta.18")
6411 (source (origin
6412 (method git-fetch)
6413 (uri (git-reference
6414 (url "https://github.com/amplab/snap")
6415 (commit (string-append "v" version))))
6416 (file-name (git-file-name name version))
6417 (sha256
6418 (base32
6419 "01w3qq4wm07z73vky0cfwlmrbf50n3w722cxrlzxfi99mnb808d8"))))
6420 (build-system gnu-build-system)
6421 (arguments
6422 '(#:phases
6423 (modify-phases %standard-phases
6424 (delete 'configure)
6425 (replace 'check (lambda _ (invoke "./unit_tests") #t))
6426 (replace 'install
6427 (lambda* (#:key outputs #:allow-other-keys)
6428 (let* ((out (assoc-ref outputs "out"))
6429 (bin (string-append out "/bin")))
6430 (install-file "snap-aligner" bin)
6431 (install-file "SNAPCommand" bin)
6432 #t))))))
6433 (native-inputs
6434 `(("zlib" ,zlib)))
6435 (home-page "http://snap.cs.berkeley.edu/")
6436 (synopsis "Short read DNA sequence aligner")
6437 (description
6438 "SNAP is a fast and accurate aligner for short DNA reads. It is
6439 optimized for modern read lengths of 100 bases or higher, and takes advantage
6440 of these reads to align data quickly through a hash-based indexing scheme.")
6441 ;; 32-bit systems are not supported by the unpatched code.
6442 ;; Following the bug reports https://github.com/amplab/snap/issues/68 and
6443 ;; https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=812378 we see that
6444 ;; systems without a lot of memory cannot make good use of this program.
6445 (supported-systems '("x86_64-linux"))
6446 (license license:asl2.0)))
6447
6448 (define-public sortmerna
6449 (package
6450 (name "sortmerna")
6451 (version "2.1b")
6452 (source
6453 (origin
6454 (method git-fetch)
6455 (uri (git-reference
6456 (url "https://github.com/biocore/sortmerna")
6457 (commit version)))
6458 (file-name (git-file-name name version))
6459 (sha256
6460 (base32
6461 "0j3mbz4n25738yijmjbr5r4fyvkgm8v5vn3sshyfvmyqf5q9byqf"))))
6462 (build-system gnu-build-system)
6463 (outputs '("out" ;for binaries
6464 "db")) ;for sequence databases
6465 (arguments
6466 `(#:phases
6467 (modify-phases %standard-phases
6468 (replace 'install
6469 (lambda* (#:key outputs #:allow-other-keys)
6470 (let* ((out (assoc-ref outputs "out"))
6471 (bin (string-append out "/bin"))
6472 (db (assoc-ref outputs "db"))
6473 (share
6474 (string-append db "/share/sortmerna/rRNA_databases")))
6475 (install-file "sortmerna" bin)
6476 (install-file "indexdb_rna" bin)
6477 (for-each (lambda (file)
6478 (install-file file share))
6479 (find-files "rRNA_databases" ".*fasta"))
6480 #t))))))
6481 (inputs
6482 `(("zlib" ,zlib)))
6483 (home-page "https://bioinfo.lifl.fr/RNA/sortmerna/")
6484 (synopsis "Biological sequence analysis tool for NGS reads")
6485 (description
6486 "SortMeRNA is a biological sequence analysis tool for filtering, mapping
6487 and operational taxonomic unit (OTU) picking of next generation
6488 sequencing (NGS) reads. The core algorithm is based on approximate seeds and
6489 allows for fast and sensitive analyses of nucleotide sequences. The main
6490 application of SortMeRNA is filtering rRNA from metatranscriptomic data.")
6491 ;; The source includes x86 specific code
6492 (supported-systems '("x86_64-linux" "i686-linux"))
6493 (license license:lgpl3)))
6494
6495 (define-public star
6496 (package
6497 (name "star")
6498 (version "2.7.3a")
6499 (source (origin
6500 (method git-fetch)
6501 (uri (git-reference
6502 (url "https://github.com/alexdobin/STAR")
6503 (commit version)))
6504 (file-name (git-file-name name version))
6505 (sha256
6506 (base32
6507 "1hgiqw5qhs0pc1xazzihcfd92na02xyq2kb469z04y1v51kpvvjq"))
6508 (modules '((guix build utils)))
6509 (snippet
6510 '(begin
6511 (substitute* "source/Makefile"
6512 (("/bin/rm") "rm"))
6513 ;; Remove pre-built binaries and bundled htslib sources.
6514 (delete-file-recursively "bin/MacOSX_x86_64")
6515 (delete-file-recursively "bin/Linux_x86_64")
6516 (delete-file-recursively "bin/Linux_x86_64_static")
6517 (delete-file-recursively "source/htslib")
6518 #t))))
6519 (build-system gnu-build-system)
6520 (arguments
6521 '(#:tests? #f ;no check target
6522 #:make-flags '("STAR")
6523 #:phases
6524 (modify-phases %standard-phases
6525 (add-after 'unpack 'enter-source-dir
6526 (lambda _ (chdir "source") #t))
6527 (add-after 'enter-source-dir 'make-reproducible
6528 (lambda _
6529 (substitute* "Makefile"
6530 (("(COMPILATION_TIME_PLACE=\")(.*)(\")" _ pre mid post)
6531 (string-append pre "Built with Guix" post)))
6532 #t))
6533 ;; See https://github.com/alexdobin/STAR/pull/562
6534 (add-after 'enter-source-dir 'add-missing-header
6535 (lambda _
6536 (substitute* "SoloReadFeature_inputRecords.cpp"
6537 (("#include \"binarySearch2.h\"" h)
6538 (string-append h "\n#include <math.h>")))
6539 #t))
6540 (add-after 'enter-source-dir 'do-not-use-bundled-htslib
6541 (lambda _
6542 (substitute* "Makefile"
6543 (("(Depend.list: \\$\\(SOURCES\\) parametersDefault\\.xxd) htslib"
6544 _ prefix) prefix))
6545 (substitute* '("BAMfunctions.cpp"
6546 "signalFromBAM.h"
6547 "bam_cat.h"
6548 "bam_cat.c"
6549 "STAR.cpp"
6550 "bamRemoveDuplicates.cpp")
6551 (("#include \"htslib/([^\"]+\\.h)\"" _ header)
6552 (string-append "#include <" header ">")))
6553 (substitute* "IncludeDefine.h"
6554 (("\"htslib/(htslib/[^\"]+.h)\"" _ header)
6555 (string-append "<" header ">")))
6556 #t))
6557 (replace 'install
6558 (lambda* (#:key outputs #:allow-other-keys)
6559 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6560 (install-file "STAR" bin))
6561 #t))
6562 (delete 'configure))))
6563 (native-inputs
6564 `(("xxd" ,xxd)))
6565 (inputs
6566 `(("htslib" ,htslib)
6567 ("zlib" ,zlib)))
6568 (home-page "https://github.com/alexdobin/STAR")
6569 (synopsis "Universal RNA-seq aligner")
6570 (description
6571 "The Spliced Transcripts Alignment to a Reference (STAR) software is
6572 based on a previously undescribed RNA-seq alignment algorithm that uses
6573 sequential maximum mappable seed search in uncompressed suffix arrays followed
6574 by seed clustering and stitching procedure. In addition to unbiased de novo
6575 detection of canonical junctions, STAR can discover non-canonical splices and
6576 chimeric (fusion) transcripts, and is also capable of mapping full-length RNA
6577 sequences.")
6578 ;; Only 64-bit systems are supported according to the README.
6579 (supported-systems '("x86_64-linux" "mips64el-linux"))
6580 ;; STAR is licensed under GPLv3 or later; htslib is MIT-licensed.
6581 (license license:gpl3+)))
6582
6583 (define-public starlong
6584 (package (inherit star)
6585 (name "starlong")
6586 (arguments
6587 (substitute-keyword-arguments (package-arguments star)
6588 ((#:make-flags flags)
6589 `(list "STARlong"))
6590 ((#:phases phases)
6591 `(modify-phases ,phases
6592 ;; Allow extra long sequence reads.
6593 (add-after 'unpack 'make-extra-long
6594 (lambda _
6595 (substitute* "source/IncludeDefine.h"
6596 (("(#define DEF_readNameLengthMax ).*" _ match)
6597 (string-append match "900000\n")))
6598 #t))
6599 (replace 'install
6600 (lambda* (#:key outputs #:allow-other-keys)
6601 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6602 (install-file "STARlong" bin))
6603 #t))))))))
6604
6605 (define-public subread
6606 (package
6607 (name "subread")
6608 (version "1.6.0")
6609 (source (origin
6610 (method url-fetch)
6611 (uri (string-append "mirror://sourceforge/subread/subread-"
6612 version "/subread-" version "-source.tar.gz"))
6613 (sha256
6614 (base32
6615 "0ah0n4jx6ksk2m2j7xk385x2qzmk1y4rfc6a4mfrdqrlq721w99i"))))
6616 (build-system gnu-build-system)
6617 (arguments
6618 `(#:tests? #f ;no "check" target
6619 ;; The CC and CCFLAGS variables are set to contain a lot of x86_64
6620 ;; optimizations by default, so we override these flags such that x86_64
6621 ;; flags are only added when the build target is an x86_64 system.
6622 #:make-flags
6623 (list (let ((system ,(or (%current-target-system)
6624 (%current-system)))
6625 (flags '("-ggdb" "-fomit-frame-pointer"
6626 "-ffast-math" "-funroll-loops"
6627 "-fmessage-length=0"
6628 "-O9" "-Wall" "-DMAKE_FOR_EXON"
6629 "-DMAKE_STANDALONE"
6630 "-DSUBREAD_VERSION=\\\"${SUBREAD_VERSION}\\\""))
6631 (flags64 '("-mmmx" "-msse" "-msse2" "-msse3")))
6632 (if (string-prefix? "x86_64" system)
6633 (string-append "CCFLAGS=" (string-join (append flags flags64)))
6634 (string-append "CCFLAGS=" (string-join flags))))
6635 "-f" "Makefile.Linux"
6636 "CC=gcc ${CCFLAGS}")
6637 #:phases
6638 (modify-phases %standard-phases
6639 (add-after 'unpack 'enter-dir
6640 (lambda _ (chdir "src") #t))
6641 (replace 'install
6642 (lambda* (#:key outputs #:allow-other-keys)
6643 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6644 (mkdir-p bin)
6645 (copy-recursively "../bin" bin))
6646 #t))
6647 ;; no "configure" script
6648 (delete 'configure))))
6649 (inputs `(("zlib" ,zlib)))
6650 (home-page "http://bioinf.wehi.edu.au/subread-package/")
6651 (synopsis "Tool kit for processing next-gen sequencing data")
6652 (description
6653 "The subread package contains the following tools: subread aligner, a
6654 general-purpose read aligner; subjunc aligner: detecting exon-exon junctions
6655 and mapping RNA-seq reads; featureCounts: counting mapped reads for genomic
6656 features; exactSNP: a SNP caller that discovers SNPs by testing signals
6657 against local background noises.")
6658 (license license:gpl3+)))
6659
6660 (define-public stringtie
6661 (package
6662 (name "stringtie")
6663 (version "1.2.1")
6664 (source (origin
6665 (method url-fetch)
6666 (uri (string-append "http://ccb.jhu.edu/software/stringtie/dl/"
6667 "stringtie-" version ".tar.gz"))
6668 (sha256
6669 (base32
6670 "1cqllsc1maq4kh92isi8yadgzbmnf042hlnalpk3y59aph1z3bfz"))
6671 (modules '((guix build utils)))
6672 (snippet
6673 '(begin
6674 (delete-file-recursively "samtools-0.1.18")
6675 #t))))
6676 (build-system gnu-build-system)
6677 (arguments
6678 `(#:tests? #f ;no test suite
6679 #:phases
6680 (modify-phases %standard-phases
6681 ;; no configure script
6682 (delete 'configure)
6683 (add-before 'build 'use-system-samtools
6684 (lambda _
6685 (substitute* "Makefile"
6686 (("stringtie: \\$\\{BAM\\}/libbam\\.a")
6687 "stringtie: "))
6688 (substitute* '("gclib/GBam.h"
6689 "gclib/GBam.cpp")
6690 (("#include \"(bam|sam|kstring).h\"" _ header)
6691 (string-append "#include <samtools/" header ".h>")))
6692 #t))
6693 (add-after 'unpack 'remove-duplicate-typedef
6694 (lambda _
6695 ;; This typedef conflicts with the typedef in
6696 ;; glibc-2.25/include/bits/types.h
6697 (substitute* "gclib/GThreads.h"
6698 (("typedef long long __intmax_t;") ""))
6699 #t))
6700 (replace 'install
6701 (lambda* (#:key outputs #:allow-other-keys)
6702 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
6703 (install-file "stringtie" bin)
6704 #t))))))
6705 (inputs
6706 `(("samtools" ,samtools-0.1)
6707 ("zlib" ,zlib)))
6708 (home-page "http://ccb.jhu.edu/software/stringtie/")
6709 (synopsis "Transcript assembly and quantification for RNA-Seq data")
6710 (description
6711 "StringTie is a fast and efficient assembler of RNA-Seq sequence
6712 alignments into potential transcripts. It uses a novel network flow algorithm
6713 as well as an optional de novo assembly step to assemble and quantitate
6714 full-length transcripts representing multiple splice variants for each gene
6715 locus. Its input can include not only the alignments of raw reads used by
6716 other transcript assemblers, but also alignments of longer sequences that have
6717 been assembled from those reads. To identify differentially expressed genes
6718 between experiments, StringTie's output can be processed either by the
6719 Cuffdiff or Ballgown programs.")
6720 (license license:artistic2.0)))
6721
6722 (define-public taxtastic
6723 (package
6724 (name "taxtastic")
6725 (version "0.8.11")
6726 (source (origin
6727 ;; The Pypi version does not include tests.
6728 (method git-fetch)
6729 (uri (git-reference
6730 (url "https://github.com/fhcrc/taxtastic")
6731 (commit (string-append "v" version))))
6732 (file-name (git-file-name name version))
6733 (sha256
6734 (base32
6735 "1sv8mkg64jn7zdwf1jj71c16686yrwxk0apb1l8sjszy9p166g0p"))))
6736 (build-system python-build-system)
6737 (arguments
6738 `(#:phases
6739 (modify-phases %standard-phases
6740 (add-after 'unpack 'prepare-directory
6741 (lambda _
6742 ;; The git checkout must be writable for tests.
6743 (for-each make-file-writable (find-files "."))
6744 ;; This test fails, but the error is not caught by the test
6745 ;; framework, so the tests fail...
6746 (substitute* "tests/test_taxit.py"
6747 (("self.cmd_fails\\(''\\)")
6748 "self.cmd_fails('nothing')"))
6749 ;; This version file is expected to be created with git describe.
6750 (mkdir-p "taxtastic/data")
6751 (with-output-to-file "taxtastic/data/ver"
6752 (lambda () (display ,version)))
6753 #t))
6754 (add-after 'unpack 'python37-compatibility
6755 (lambda _
6756 (substitute* "taxtastic/utils.py"
6757 (("import csv") "import csv, errno")
6758 (("os.errno") "errno"))
6759 #t))
6760 (replace 'check
6761 ;; Note, this fails to run with "-v" as it tries to write to a
6762 ;; closed output stream.
6763 (lambda _ (invoke "python" "-m" "unittest") #t)))))
6764 (propagated-inputs
6765 `(("python-sqlalchemy" ,python-sqlalchemy)
6766 ("python-decorator" ,python-decorator)
6767 ("python-biopython" ,python-biopython)
6768 ("python-pandas" ,python-pandas)
6769 ("python-psycopg2" ,python-psycopg2)
6770 ("python-fastalite" ,python-fastalite)
6771 ("python-pyyaml" ,python-pyyaml)
6772 ("python-six" ,python-six)
6773 ("python-jinja2" ,python-jinja2)
6774 ("python-dendropy" ,python-dendropy)))
6775 (home-page "https://github.com/fhcrc/taxtastic")
6776 (synopsis "Tools for taxonomic naming and annotation")
6777 (description
6778 "Taxtastic is software written in python used to build and maintain
6779 reference packages i.e. collections of reference trees, reference alignments,
6780 profiles, and associated taxonomic information.")
6781 (license license:gpl3+)))
6782
6783 (define-public vcftools
6784 (package
6785 (name "vcftools")
6786 (version "0.1.16")
6787 (source (origin
6788 (method url-fetch)
6789 (uri (string-append
6790 "https://github.com/vcftools/vcftools/releases/download/v"
6791 version "/vcftools-" version ".tar.gz"))
6792 (sha256
6793 (base32
6794 "1qqlx7flfv7axrjwkaz6njkscsl1d0jw98ns8d8bh1n1hd1pgz6v"))))
6795 (build-system gnu-build-system)
6796 (arguments
6797 `(#:tests? #f ; no "check" target
6798 #:make-flags (list
6799 "CFLAGS=-O2" ; override "-m64" flag
6800 (string-append "PREFIX=" (assoc-ref %outputs "out"))
6801 (string-append "MANDIR=" (assoc-ref %outputs "out")
6802 "/share/man/man1"))))
6803 (native-inputs
6804 `(("pkg-config" ,pkg-config)))
6805 (inputs
6806 `(("perl" ,perl)
6807 ("zlib" ,zlib)))
6808 (home-page "https://vcftools.github.io/")
6809 (synopsis "Tools for working with VCF files")
6810 (description
6811 "VCFtools is a program package designed for working with VCF files, such
6812 as those generated by the 1000 Genomes Project. The aim of VCFtools is to
6813 provide easily accessible methods for working with complex genetic variation
6814 data in the form of VCF files.")
6815 ;; The license is declared as LGPLv3 in the README and
6816 ;; at https://vcftools.github.io/license.html
6817 (license license:lgpl3)))
6818
6819 (define-public infernal
6820 (package
6821 (name "infernal")
6822 (version "1.1.3")
6823 (source (origin
6824 (method url-fetch)
6825 (uri (string-append "http://eddylab.org/software/infernal/"
6826 "infernal-" version ".tar.gz"))
6827 (sha256
6828 (base32
6829 "0pm8bm3s6nfa0av4x6m6h27lsg12b3lz3jm0fyh1mc77l2isd61v"))))
6830 (build-system gnu-build-system)
6831 (native-inputs
6832 `(("perl" ,perl)
6833 ("python" ,python))) ; for tests
6834 (home-page "http://eddylab.org/infernal/")
6835 (synopsis "Inference of RNA alignments")
6836 (description "Infernal (\"INFERence of RNA ALignment\") is a tool for
6837 searching DNA sequence databases for RNA structure and sequence similarities.
6838 It is an implementation of a special case of profile stochastic context-free
6839 grammars called @dfn{covariance models} (CMs). A CM is like a sequence
6840 profile, but it scores a combination of sequence consensus and RNA secondary
6841 structure consensus, so in many cases, it is more capable of identifying RNA
6842 homologs that conserve their secondary structure more than their primary
6843 sequence.")
6844 ;; Infernal 1.1.3 requires VMX or SSE capability for parallel instructions.
6845 (supported-systems '("i686-linux" "x86_64-linux"))
6846 (license license:bsd-3)))
6847
6848 (define-public r-scde
6849 (package
6850 (name "r-scde")
6851 (version "1.99.2")
6852 (source (origin
6853 (method git-fetch)
6854 (uri (git-reference
6855 (url "https://github.com/hms-dbmi/scde")
6856 (commit version)))
6857 (file-name (git-file-name name version))
6858 (sha256
6859 (base32
6860 "10na2gyka24mszdxf92wz9h2c13hdf1ww30c68gfsw53lvvhhhxb"))))
6861 (build-system r-build-system)
6862 (propagated-inputs
6863 `(("r-rcpp" ,r-rcpp)
6864 ("r-rcpparmadillo" ,r-rcpparmadillo)
6865 ("r-mgcv" ,r-mgcv)
6866 ("r-rook" ,r-rook)
6867 ("r-rjson" ,r-rjson)
6868 ("r-cairo" ,r-cairo)
6869 ("r-rcolorbrewer" ,r-rcolorbrewer)
6870 ("r-edger" ,r-edger)
6871 ("r-quantreg" ,r-quantreg)
6872 ("r-nnet" ,r-nnet)
6873 ("r-rmtstat" ,r-rmtstat)
6874 ("r-extremes" ,r-extremes)
6875 ("r-pcamethods" ,r-pcamethods)
6876 ("r-biocparallel" ,r-biocparallel)
6877 ("r-flexmix" ,r-flexmix)))
6878 (home-page "https://hms-dbmi.github.io/scde/")
6879 (synopsis "R package for analyzing single-cell RNA-seq data")
6880 (description "The SCDE package implements a set of statistical methods for
6881 analyzing single-cell RNA-seq data. SCDE fits individual error models for
6882 single-cell RNA-seq measurements. These models can then be used for
6883 assessment of differential expression between groups of cells, as well as
6884 other types of analysis. The SCDE package also contains the pagoda framework
6885 which applies pathway and gene set overdispersion analysis to identify aspects
6886 of transcriptional heterogeneity among single cells.")
6887 ;; See https://github.com/hms-dbmi/scde/issues/38
6888 (license license:gpl2)))
6889
6890 (define-public r-centipede
6891 (package
6892 (name "r-centipede")
6893 (version "1.2")
6894 (source (origin
6895 (method url-fetch)
6896 (uri (string-append "http://download.r-forge.r-project.org/"
6897 "src/contrib/CENTIPEDE_" version ".tar.gz"))
6898 (sha256
6899 (base32
6900 "1hsx6qgwr0i67fhy9257zj7s0ppncph2hjgbia5nn6nfmj0ax6l9"))))
6901 (build-system r-build-system)
6902 (home-page "http://centipede.uchicago.edu/")
6903 (synopsis "Predict transcription factor binding sites")
6904 (description
6905 "CENTIPEDE applies a hierarchical Bayesian mixture model to infer regions
6906 of the genome that are bound by particular transcription factors. It starts
6907 by identifying a set of candidate binding sites, and then aims to classify the
6908 sites according to whether each site is bound or not bound by a transcription
6909 factor. CENTIPEDE is an unsupervised learning algorithm that discriminates
6910 between two different types of motif instances using as much relevant
6911 information as possible.")
6912 (license (list license:gpl2+ license:gpl3+))))
6913
6914 (define-public r-genefilter
6915 (package
6916 (name "r-genefilter")
6917 (version "1.70.0")
6918 (source
6919 (origin
6920 (method url-fetch)
6921 (uri (bioconductor-uri "genefilter" version))
6922 (sha256
6923 (base32
6924 "1sbbrnq6p90fri0ik6aq2zw26kasw63nyiy7xkzrj6vgyq7x258g"))))
6925 (build-system r-build-system)
6926 (native-inputs
6927 `(("gfortran" ,gfortran)
6928 ("r-knitr" ,r-knitr)))
6929 (propagated-inputs
6930 `(("r-annotate" ,r-annotate)
6931 ("r-annotationdbi" ,r-annotationdbi)
6932 ("r-biobase" ,r-biobase)
6933 ("r-biocgenerics" ,r-biocgenerics)
6934 ("r-survival" ,r-survival)))
6935 (home-page "https://bioconductor.org/packages/genefilter")
6936 (synopsis "Filter genes from high-throughput experiments")
6937 (description
6938 "This package provides basic functions for filtering genes from
6939 high-throughput sequencing experiments.")
6940 (license license:artistic2.0)))
6941
6942 (define-public r-deseq2
6943 (package
6944 (name "r-deseq2")
6945 (version "1.28.1")
6946 (source
6947 (origin
6948 (method url-fetch)
6949 (uri (bioconductor-uri "DESeq2" version))
6950 (sha256
6951 (base32
6952 "0xh12c2skr0bbv893p05gvbismkcnqw8zwh7yz4wmycgajfzg2pp"))))
6953 (properties `((upstream-name . "DESeq2")))
6954 (build-system r-build-system)
6955 (propagated-inputs
6956 `(("r-biobase" ,r-biobase)
6957 ("r-biocgenerics" ,r-biocgenerics)
6958 ("r-biocparallel" ,r-biocparallel)
6959 ("r-genefilter" ,r-genefilter)
6960 ("r-geneplotter" ,r-geneplotter)
6961 ("r-genomicranges" ,r-genomicranges)
6962 ("r-ggplot2" ,r-ggplot2)
6963 ("r-iranges" ,r-iranges)
6964 ("r-locfit" ,r-locfit)
6965 ("r-rcpp" ,r-rcpp)
6966 ("r-rcpparmadillo" ,r-rcpparmadillo)
6967 ("r-s4vectors" ,r-s4vectors)
6968 ("r-summarizedexperiment" ,r-summarizedexperiment)))
6969 (native-inputs
6970 `(("r-knitr" ,r-knitr)))
6971 (home-page "https://bioconductor.org/packages/DESeq2")
6972 (synopsis "Differential gene expression analysis")
6973 (description
6974 "This package provides functions to estimate variance-mean dependence in
6975 count data from high-throughput nucleotide sequencing assays and test for
6976 differential expression based on a model using the negative binomial
6977 distribution.")
6978 (license license:lgpl3+)))
6979
6980 (define-public r-dexseq
6981 (package
6982 (name "r-dexseq")
6983 (version "1.34.1")
6984 (source
6985 (origin
6986 (method url-fetch)
6987 (uri (bioconductor-uri "DEXSeq" version))
6988 (sha256
6989 (base32
6990 "1m03awaw06mfv5gszq23k5apsqqzjqa5rcwp20y4xbpl7bywpsyl"))))
6991 (properties `((upstream-name . "DEXSeq")))
6992 (build-system r-build-system)
6993 (propagated-inputs
6994 `(("r-annotationdbi" ,r-annotationdbi)
6995 ("r-biobase" ,r-biobase)
6996 ("r-biocgenerics" ,r-biocgenerics)
6997 ("r-biocparallel" ,r-biocparallel)
6998 ("r-biomart" ,r-biomart)
6999 ("r-deseq2" ,r-deseq2)
7000 ("r-genefilter" ,r-genefilter)
7001 ("r-geneplotter" ,r-geneplotter)
7002 ("r-genomicranges" ,r-genomicranges)
7003 ("r-hwriter" ,r-hwriter)
7004 ("r-iranges" ,r-iranges)
7005 ("r-rcolorbrewer" ,r-rcolorbrewer)
7006 ("r-rsamtools" ,r-rsamtools)
7007 ("r-s4vectors" ,r-s4vectors)
7008 ("r-statmod" ,r-statmod)
7009 ("r-stringr" ,r-stringr)
7010 ("r-summarizedexperiment" ,r-summarizedexperiment)))
7011 (native-inputs
7012 `(("r-knitr" ,r-knitr)))
7013 (home-page "https://bioconductor.org/packages/DEXSeq")
7014 (synopsis "Inference of differential exon usage in RNA-Seq")
7015 (description
7016 "This package is focused on finding differential exon usage using RNA-seq
7017 exon counts between samples with different experimental designs. It provides
7018 functions that allows the user to make the necessary statistical tests based
7019 on a model that uses the negative binomial distribution to estimate the
7020 variance between biological replicates and generalized linear models for
7021 testing. The package also provides functions for the visualization and
7022 exploration of the results.")
7023 (license license:gpl3+)))
7024
7025 (define-public r-annotationforge
7026 (package
7027 (name "r-annotationforge")
7028 (version "1.30.1")
7029 (source
7030 (origin
7031 (method url-fetch)
7032 (uri (bioconductor-uri "AnnotationForge" version))
7033 (sha256
7034 (base32
7035 "1a2155jxbwc6qr3dcyvp850grhdr7czc7bs3s87ff4sgdl0jp3jw"))))
7036 (properties
7037 `((upstream-name . "AnnotationForge")))
7038 (build-system r-build-system)
7039 (propagated-inputs
7040 `(("r-annotationdbi" ,r-annotationdbi)
7041 ("r-biobase" ,r-biobase)
7042 ("r-biocgenerics" ,r-biocgenerics)
7043 ("r-dbi" ,r-dbi)
7044 ("r-rcurl" ,r-rcurl)
7045 ("r-rsqlite" ,r-rsqlite)
7046 ("r-s4vectors" ,r-s4vectors)
7047 ("r-xml" ,r-xml)))
7048 (native-inputs
7049 `(("r-knitr" ,r-knitr)))
7050 (home-page "https://bioconductor.org/packages/AnnotationForge")
7051 (synopsis "Code for building annotation database packages")
7052 (description
7053 "This package provides code for generating Annotation packages and their
7054 databases. Packages produced are intended to be used with AnnotationDbi.")
7055 (license license:artistic2.0)))
7056
7057 (define-public r-rbgl
7058 (package
7059 (name "r-rbgl")
7060 (version "1.64.0")
7061 (source
7062 (origin
7063 (method url-fetch)
7064 (uri (bioconductor-uri "RBGL" version))
7065 (sha256
7066 (base32
7067 "079599a6xn2i7snfn2vgshkw0c00rrfhj44pvi03ap8id29bkayy"))))
7068 (properties `((upstream-name . "RBGL")))
7069 (build-system r-build-system)
7070 (propagated-inputs
7071 `(("r-bh" ,r-bh)
7072 ("r-graph" ,r-graph)))
7073 (home-page "https://www.bioconductor.org/packages/RBGL")
7074 (synopsis "Interface to the Boost graph library")
7075 (description
7076 "This package provides a fairly extensive and comprehensive interface to
7077 the graph algorithms contained in the Boost library.")
7078 (license license:artistic2.0)))
7079
7080 (define-public r-gseabase
7081 (package
7082 (name "r-gseabase")
7083 (version "1.50.1")
7084 (source
7085 (origin
7086 (method url-fetch)
7087 (uri (bioconductor-uri "GSEABase" version))
7088 (sha256
7089 (base32
7090 "1k4faj53cwvqijad8cf7fcghzxcv9shlbpl8n73bsncc8k192y2j"))))
7091 (properties `((upstream-name . "GSEABase")))
7092 (build-system r-build-system)
7093 (propagated-inputs
7094 `(("r-annotate" ,r-annotate)
7095 ("r-annotationdbi" ,r-annotationdbi)
7096 ("r-biobase" ,r-biobase)
7097 ("r-biocgenerics" ,r-biocgenerics)
7098 ("r-graph" ,r-graph)
7099 ("r-xml" ,r-xml)))
7100 (native-inputs
7101 `(("r-knitr" ,r-knitr)))
7102 (home-page "https://bioconductor.org/packages/GSEABase")
7103 (synopsis "Gene set enrichment data structures and methods")
7104 (description
7105 "This package provides classes and methods to support @dfn{Gene Set
7106 Enrichment Analysis} (GSEA).")
7107 (license license:artistic2.0)))
7108
7109 (define-public r-category
7110 (package
7111 (name "r-category")
7112 (version "2.54.0")
7113 (source
7114 (origin
7115 (method url-fetch)
7116 (uri (bioconductor-uri "Category" version))
7117 (sha256
7118 (base32
7119 "1grspdzk5a4vidnxwcd1jmy1vcn494aydsp3vydx235yv7iqac1b"))))
7120 (properties `((upstream-name . "Category")))
7121 (build-system r-build-system)
7122 (propagated-inputs
7123 `(("r-annotate" ,r-annotate)
7124 ("r-annotationdbi" ,r-annotationdbi)
7125 ("r-biobase" ,r-biobase)
7126 ("r-biocgenerics" ,r-biocgenerics)
7127 ("r-genefilter" ,r-genefilter)
7128 ("r-graph" ,r-graph)
7129 ("r-gseabase" ,r-gseabase)
7130 ("r-matrix" ,r-matrix)
7131 ("r-rbgl" ,r-rbgl)
7132 ("r-dbi" ,r-dbi)))
7133 (home-page "https://bioconductor.org/packages/Category")
7134 (synopsis "Category analysis")
7135 (description
7136 "This package provides a collection of tools for performing category
7137 analysis.")
7138 (license license:artistic2.0)))
7139
7140 (define-public r-gostats
7141 (package
7142 (name "r-gostats")
7143 (version "2.54.0")
7144 (source
7145 (origin
7146 (method url-fetch)
7147 (uri (bioconductor-uri "GOstats" version))
7148 (sha256
7149 (base32
7150 "00q39cyv4l28r6s9gjnd0qhl7h80vmwp4lpmchisqzj44xzyics9"))))
7151 (properties `((upstream-name . "GOstats")))
7152 (build-system r-build-system)
7153 (propagated-inputs
7154 `(("r-annotate" ,r-annotate)
7155 ("r-annotationdbi" ,r-annotationdbi)
7156 ("r-annotationforge" ,r-annotationforge)
7157 ("r-biobase" ,r-biobase)
7158 ("r-category" ,r-category)
7159 ("r-go-db" ,r-go-db)
7160 ("r-graph" ,r-graph)
7161 ("r-rgraphviz" ,r-rgraphviz)
7162 ("r-rbgl" ,r-rbgl)))
7163 (home-page "https://bioconductor.org/packages/GOstats")
7164 (synopsis "Tools for manipulating GO and microarrays")
7165 (description
7166 "This package provides a set of tools for interacting with GO and
7167 microarray data. A variety of basic manipulation tools for graphs, hypothesis
7168 testing and other simple calculations.")
7169 (license license:artistic2.0)))
7170
7171 (define-public r-shortread
7172 (package
7173 (name "r-shortread")
7174 (version "1.46.0")
7175 (source
7176 (origin
7177 (method url-fetch)
7178 (uri (bioconductor-uri "ShortRead" version))
7179 (sha256
7180 (base32
7181 "0l4kdln69y6yhln0xiv2jmpxg05fjcglln406p43a2bqvk2lr03d"))))
7182 (properties `((upstream-name . "ShortRead")))
7183 (build-system r-build-system)
7184 (inputs
7185 `(("zlib" ,zlib)))
7186 (propagated-inputs
7187 `(("r-biobase" ,r-biobase)
7188 ("r-biocgenerics" ,r-biocgenerics)
7189 ("r-biocparallel" ,r-biocparallel)
7190 ("r-biostrings" ,r-biostrings)
7191 ("r-genomeinfodb" ,r-genomeinfodb)
7192 ("r-genomicalignments" ,r-genomicalignments)
7193 ("r-genomicranges" ,r-genomicranges)
7194 ("r-hwriter" ,r-hwriter)
7195 ("r-iranges" ,r-iranges)
7196 ("r-lattice" ,r-lattice)
7197 ("r-latticeextra" ,r-latticeextra)
7198 ("r-rsamtools" ,r-rsamtools)
7199 ("r-s4vectors" ,r-s4vectors)
7200 ("r-xvector" ,r-xvector)
7201 ("r-zlibbioc" ,r-zlibbioc)))
7202 (home-page "https://bioconductor.org/packages/ShortRead")
7203 (synopsis "FASTQ input and manipulation tools")
7204 (description
7205 "This package implements sampling, iteration, and input of FASTQ files.
7206 It includes functions for filtering and trimming reads, and for generating a
7207 quality assessment report. Data are represented as
7208 @code{DNAStringSet}-derived objects, and easily manipulated for a diversity of
7209 purposes. The package also contains legacy support for early single-end,
7210 ungapped alignment formats.")
7211 (license license:artistic2.0)))
7212
7213 (define-public r-systempiper
7214 (package
7215 (name "r-systempiper")
7216 (version "1.22.0")
7217 (source
7218 (origin
7219 (method url-fetch)
7220 (uri (bioconductor-uri "systemPipeR" version))
7221 (sha256
7222 (base32
7223 "01ilhlrvy28jfdyxjria4024yryj5zgympgqznw17g3y3az78kk2"))))
7224 (properties `((upstream-name . "systemPipeR")))
7225 (build-system r-build-system)
7226 (propagated-inputs
7227 `(("r-annotate" ,r-annotate)
7228 ("r-assertthat" ,r-assertthat)
7229 ("r-batchtools" ,r-batchtools)
7230 ("r-biostrings" ,r-biostrings)
7231 ("r-deseq2" ,r-deseq2)
7232 ("r-dot" ,r-dot)
7233 ("r-edger" ,r-edger)
7234 ("r-genomicfeatures" ,r-genomicfeatures)
7235 ("r-genomicranges" ,r-genomicranges)
7236 ("r-ggplot2" ,r-ggplot2)
7237 ("r-go-db" ,r-go-db)
7238 ("r-gostats" ,r-gostats)
7239 ("r-limma" ,r-limma)
7240 ("r-magrittr" ,r-magrittr)
7241 ("r-pheatmap" ,r-pheatmap)
7242 ("r-rjson" ,r-rjson)
7243 ("r-rsamtools" ,r-rsamtools)
7244 ("r-rsvg" ,r-rsvg)
7245 ("r-shortread" ,r-shortread)
7246 ("r-stringr" ,r-stringr)
7247 ("r-summarizedexperiment" ,r-summarizedexperiment)
7248 ("r-yaml" ,r-yaml)
7249 ("r-variantannotation" ,r-variantannotation)))
7250 (native-inputs
7251 `(("r-knitr" ,r-knitr)))
7252 (home-page "https://github.com/tgirke/systemPipeR")
7253 (synopsis "Next generation sequencing workflow and reporting environment")
7254 (description
7255 "This R package provides tools for building and running automated
7256 end-to-end analysis workflows for a wide range of @dfn{next generation
7257 sequence} (NGS) applications such as RNA-Seq, ChIP-Seq, VAR-Seq and Ribo-Seq.
7258 Important features include a uniform workflow interface across different NGS
7259 applications, automated report generation, and support for running both R and
7260 command-line software, such as NGS aligners or peak/variant callers, on local
7261 computers or compute clusters. Efficient handling of complex sample sets and
7262 experimental designs is facilitated by a consistently implemented sample
7263 annotation infrastructure.")
7264 (license license:artistic2.0)))
7265
7266 (define-public r-grohmm
7267 (package
7268 (name "r-grohmm")
7269 (version "1.22.0")
7270 (source
7271 (origin
7272 (method url-fetch)
7273 (uri (bioconductor-uri "groHMM" version))
7274 (sha256
7275 (base32
7276 "04z9qq1xwdsaxbqhyld37w0ybvzly9pc1hcyrnwdbyjwd7n1fncb"))))
7277 (properties `((upstream-name . "groHMM")))
7278 (build-system r-build-system)
7279 (propagated-inputs
7280 `(("r-genomeinfodb" ,r-genomeinfodb)
7281 ("r-genomicalignments" ,r-genomicalignments)
7282 ("r-genomicranges" ,r-genomicranges)
7283 ("r-iranges" ,r-iranges)
7284 ("r-mass" ,r-mass)
7285 ("r-rtracklayer" ,r-rtracklayer)
7286 ("r-s4vectors" ,r-s4vectors)))
7287 (home-page "https://github.com/Kraus-Lab/groHMM")
7288 (synopsis "GRO-seq analysis pipeline")
7289 (description
7290 "This package provides a pipeline for the analysis of GRO-seq data.")
7291 (license license:gpl3+)))
7292
7293 (define-public vsearch
7294 (package
7295 (name "vsearch")
7296 (version "2.9.1")
7297 (source
7298 (origin
7299 (method git-fetch)
7300 (uri (git-reference
7301 (url "https://github.com/torognes/vsearch")
7302 (commit (string-append "v" version))))
7303 (file-name (git-file-name name version))
7304 (sha256
7305 (base32
7306 "0vhrpjfdf75ba04b24xknp41790cvcgwl0vgpy7qbzj5xh2521ss"))
7307 (patches (search-patches "vsearch-unbundle-cityhash.patch"))
7308 (snippet
7309 '(begin
7310 ;; Remove bundled cityhash sources. The vsearch source is adjusted
7311 ;; for this in the patch.
7312 (delete-file "src/city.h")
7313 (delete-file "src/citycrc.h")
7314 (delete-file "src/city.cc")
7315 #t))))
7316 (build-system gnu-build-system)
7317 (inputs
7318 `(("zlib" ,zlib)
7319 ("bzip2" ,bzip2)
7320 ("cityhash" ,cityhash)))
7321 (native-inputs
7322 `(("autoconf" ,autoconf)
7323 ("automake" ,automake)))
7324 (synopsis "Sequence search tools for metagenomics")
7325 (description
7326 "VSEARCH supports DNA sequence searching, clustering, chimera detection,
7327 dereplication, pairwise alignment, shuffling, subsampling, sorting and
7328 masking. The tool takes advantage of parallelism in the form of SIMD
7329 vectorization as well as multiple threads to perform accurate alignments at
7330 high speed. VSEARCH uses an optimal global aligner (full dynamic programming
7331 Needleman-Wunsch).")
7332 (home-page "https://github.com/torognes/vsearch")
7333 ;; vsearch uses non-portable SSE intrinsics so building fails on other
7334 ;; platforms.
7335 (supported-systems '("x86_64-linux"))
7336 ;; Dual licensed; also includes public domain source.
7337 (license (list license:gpl3 license:bsd-2))))
7338
7339 (define-public pardre
7340 (package
7341 (name "pardre")
7342 ;; The source of 1.1.5 changed in place, so we append "-1" to the version.
7343 (version "1.1.5-1")
7344 (source
7345 (origin
7346 (method url-fetch)
7347 (uri (string-append "mirror://sourceforge/pardre/ParDRe-rel"
7348 "1.1.5" ".tar.gz"))
7349 (sha256
7350 (base32
7351 "17j73nc0viq4f6qj50nrndsrif5d6b71q8fl87m54psiv0ilns2b"))))
7352 (build-system gnu-build-system)
7353 (arguments
7354 `(#:tests? #f ; no tests included
7355 #:phases
7356 (modify-phases %standard-phases
7357 (delete 'configure)
7358 (replace 'install
7359 (lambda* (#:key outputs #:allow-other-keys)
7360 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
7361 (install-file "ParDRe" bin)
7362 #t))))))
7363 (inputs
7364 `(("openmpi" ,openmpi)
7365 ("zlib" ,zlib)))
7366 (synopsis "Parallel tool to remove duplicate DNA reads")
7367 (description
7368 "ParDRe is a parallel tool to remove duplicate genetic sequence reads.
7369 Duplicate reads can be seen as identical or nearly identical sequences with
7370 some mismatches. This tool lets users avoid the analysis of unnecessary
7371 reads, reducing the time of subsequent procedures with the
7372 dataset (e.g. assemblies, mappings, etc.). The tool is implemented with MPI
7373 in order to exploit the parallel capabilities of multicore clusters. It is
7374 faster than multithreaded counterparts (end of 2015) for the same number of
7375 cores and, thanks to the message-passing technology, it can be executed on
7376 clusters.")
7377 (home-page "https://sourceforge.net/projects/pardre/")
7378 (license license:gpl3+)))
7379
7380 (define-public ruby-bio-kseq
7381 (package
7382 (name "ruby-bio-kseq")
7383 (version "0.0.2")
7384 (source
7385 (origin
7386 (method url-fetch)
7387 (uri (rubygems-uri "bio-kseq" version))
7388 (sha256
7389 (base32
7390 "1xyaha46khb5jc6wzkbf7040jagac49jbimn0vcrzid0j8jdikrz"))))
7391 (build-system ruby-build-system)
7392 (arguments
7393 `(#:test-target "spec"))
7394 (native-inputs
7395 `(("bundler" ,bundler)
7396 ("ruby-rspec" ,ruby-rspec)
7397 ("ruby-rake-compiler" ,ruby-rake-compiler)))
7398 (inputs
7399 `(("zlib" ,zlib)))
7400 (synopsis "Ruby bindings for the kseq.h FASTA/Q parser")
7401 (description
7402 "@code{Bio::Kseq} provides ruby bindings to the @code{kseq.h} FASTA and
7403 FASTQ parsing code. It provides a fast iterator over sequences and their
7404 quality scores.")
7405 (home-page "https://github.com/gusevfe/bio-kseq")
7406 (license license:expat)))
7407
7408 (define-public bio-locus
7409 (package
7410 (name "bio-locus")
7411 (version "0.0.7")
7412 (source
7413 (origin
7414 (method url-fetch)
7415 (uri (rubygems-uri "bio-locus" version))
7416 (sha256
7417 (base32
7418 "02vmrxyimkj9sahsp4zhfhnmbvz6dbbqz1y01vglf8cbwvkajfl0"))))
7419 (build-system ruby-build-system)
7420 (native-inputs
7421 `(("ruby-rspec" ,ruby-rspec)))
7422 (synopsis "Tool for fast querying of genome locations")
7423 (description
7424 "Bio-locus is a tabix-like tool for fast querying of genome
7425 locations. Many file formats in bioinformatics contain records that
7426 start with a chromosome name and a position for a SNP, or a start-end
7427 position for indels. Bio-locus allows users to store this chr+pos or
7428 chr+pos+alt information in a database.")
7429 (home-page "https://github.com/pjotrp/bio-locus")
7430 (license license:expat)))
7431
7432 (define-public bio-blastxmlparser
7433 (package
7434 (name "bio-blastxmlparser")
7435 (version "2.0.4")
7436 (source (origin
7437 (method url-fetch)
7438 (uri (rubygems-uri "bio-blastxmlparser" version))
7439 (sha256
7440 (base32
7441 "1wf4qygcmdjgcqm6flmvsagfr1gs9lf63mj32qv3z1f481zc5692"))))
7442 (build-system ruby-build-system)
7443 (propagated-inputs
7444 `(("ruby-bio-logger" ,ruby-bio-logger)
7445 ("ruby-nokogiri" ,ruby-nokogiri)))
7446 (inputs
7447 `(("ruby-rspec" ,ruby-rspec)))
7448 (synopsis "Fast big data BLAST XML parser and library")
7449 (description
7450 "Very fast parallel big-data BLAST XML file parser which can be used as
7451 command line utility. Use blastxmlparser to: Parse BLAST XML; filter output;
7452 generate FASTA, JSON, YAML, RDF, JSON-LD, HTML, CSV, tabular output etc.")
7453 (home-page "https://github.com/pjotrp/blastxmlparser")
7454 (license license:expat)))
7455
7456 (define-public bioruby
7457 (package
7458 (name "bioruby")
7459 (version "1.5.2")
7460 (source
7461 (origin
7462 (method url-fetch)
7463 (uri (rubygems-uri "bio" version))
7464 (sha256
7465 (base32
7466 "1d56amdsjv1mag7m6gv2w0xij8hqx1v5xbdjsix8sp3yp36m7938"))))
7467 (build-system ruby-build-system)
7468 (propagated-inputs
7469 `(("ruby-libxml" ,ruby-libxml)))
7470 (native-inputs
7471 `(("which" ,which))) ; required for test phase
7472 (arguments
7473 `(#:phases
7474 (modify-phases %standard-phases
7475 (add-before 'build 'patch-test-command
7476 (lambda _
7477 (substitute* '("test/functional/bio/test_command.rb")
7478 (("/bin/sh") (which "sh")))
7479 (substitute* '("test/functional/bio/test_command.rb")
7480 (("/bin/ls") (which "ls")))
7481 (substitute* '("test/functional/bio/test_command.rb")
7482 (("which") (which "which")))
7483 (substitute* '("test/functional/bio/test_command.rb",
7484 "test/data/command/echoarg2.sh")
7485 (("/bin/echo") (which "echo")))
7486 #t)))))
7487 (synopsis "Ruby library, shell and utilities for bioinformatics")
7488 (description "BioRuby comes with a comprehensive set of Ruby development
7489 tools and libraries for bioinformatics and molecular biology. BioRuby has
7490 components for sequence analysis, pathway analysis, protein modelling and
7491 phylogenetic analysis; it supports many widely used data formats and provides
7492 easy access to databases, external programs and public web services, including
7493 BLAST, KEGG, GenBank, MEDLINE and GO.")
7494 (home-page "http://bioruby.org/")
7495 ;; Code is released under Ruby license, except for setup
7496 ;; (LGPLv2.1+) and scripts in samples (which have GPL2 and GPL2+)
7497 (license (list license:ruby license:lgpl2.1+ license:gpl2+ ))))
7498
7499 (define-public r-biocviews
7500 (package
7501 (name "r-biocviews")
7502 (version "1.56.1")
7503 (source (origin
7504 (method url-fetch)
7505 (uri (bioconductor-uri "biocViews" version))
7506 (sha256
7507 (base32
7508 "0zcx8gha3x3jc0ra6ii6wwq2vfsmffrrnilknbq8h5xjrl55m6ci"))))
7509 (properties
7510 `((upstream-name . "biocViews")))
7511 (build-system r-build-system)
7512 (propagated-inputs
7513 `(("r-biobase" ,r-biobase)
7514 ("r-biocmanager" ,r-biocmanager)
7515 ("r-graph" ,r-graph)
7516 ("r-rbgl" ,r-rbgl)
7517 ("r-rcurl" ,r-rcurl)
7518 ("r-xml" ,r-xml)
7519 ("r-runit" ,r-runit)))
7520 (home-page "https://bioconductor.org/packages/biocViews")
7521 (synopsis "Bioconductor package categorization helper")
7522 (description "The purpose of biocViews is to create HTML pages that
7523 categorize packages in a Bioconductor package repository according to keywords,
7524 also known as views, in a controlled vocabulary.")
7525 (license license:artistic2.0)))
7526
7527 (define-public r-biocstyle
7528 (package
7529 (name "r-biocstyle")
7530 (version "2.16.0")
7531 (source (origin
7532 (method url-fetch)
7533 (uri (bioconductor-uri "BiocStyle" version))
7534 (sha256
7535 (base32
7536 "07rjl2n4sazdg581zh7w3yykzphgr2gpz41ba4ryqs7347vh9nbf"))))
7537 (properties
7538 `((upstream-name . "BiocStyle")))
7539 (build-system r-build-system)
7540 (propagated-inputs
7541 `(("r-biocmanager" ,r-biocmanager)
7542 ("r-bookdown" ,r-bookdown)
7543 ("r-knitr" ,r-knitr)
7544 ("r-rmarkdown" ,r-rmarkdown)
7545 ("r-yaml" ,r-yaml)))
7546 (native-inputs
7547 `(("r-knitr" ,r-knitr)))
7548 (home-page "https://bioconductor.org/packages/BiocStyle")
7549 (synopsis "Bioconductor formatting styles")
7550 (description "This package provides standard formatting styles for
7551 Bioconductor PDF and HTML documents. Package vignettes illustrate use and
7552 functionality.")
7553 (license license:artistic2.0)))
7554
7555 (define-public r-bioccheck
7556 (package
7557 (name "r-bioccheck")
7558 (version "1.24.0")
7559 (source (origin
7560 (method url-fetch)
7561 (uri (bioconductor-uri "BiocCheck" version))
7562 (sha256
7563 (base32
7564 "1p9ys18sn1crxw1iac2jdgqvwpb5hjd9nfxx0qn0ncrv0b550mny"))))
7565 (properties
7566 `((upstream-name . "BiocCheck")))
7567 (build-system r-build-system)
7568 (arguments
7569 '(#:phases
7570 (modify-phases %standard-phases
7571 ;; This package can be used by calling BiocCheck(<package>) from
7572 ;; within R, or by running R CMD BiocCheck <package>. This phase
7573 ;; makes sure the latter works. For this to work, the BiocCheck
7574 ;; script must be somewhere on the PATH (not the R bin directory).
7575 (add-after 'install 'install-bioccheck-subcommand
7576 (lambda* (#:key outputs #:allow-other-keys)
7577 (let* ((out (assoc-ref outputs "out"))
7578 (dest-dir (string-append out "/bin"))
7579 (script-dir
7580 (string-append out "/site-library/BiocCheck/script/")))
7581 (mkdir-p dest-dir)
7582 (symlink (string-append script-dir "/checkBadDeps.R")
7583 (string-append dest-dir "/checkBadDeps.R"))
7584 (symlink (string-append script-dir "/BiocCheck")
7585 (string-append dest-dir "/BiocCheck")))
7586 #t)))))
7587 (propagated-inputs
7588 `(("r-codetools" ,r-codetools)
7589 ("r-graph" ,r-graph)
7590 ("r-httr" ,r-httr)
7591 ("r-knitr" ,r-knitr)
7592 ("r-optparse" ,r-optparse)
7593 ("r-biocmanager" ,r-biocmanager)
7594 ("r-biocviews" ,r-biocviews)
7595 ("r-stringdist" ,r-stringdist)))
7596 (native-inputs
7597 `(("r-knitr" ,r-knitr)))
7598 (home-page "https://bioconductor.org/packages/BiocCheck")
7599 (synopsis "Executes Bioconductor-specific package checks")
7600 (description "This package contains tools to perform additional quality
7601 checks on R packages that are to be submitted to the Bioconductor repository.")
7602 (license license:artistic2.0)))
7603
7604 (define-public r-s4vectors
7605 (package
7606 (name "r-s4vectors")
7607 (version "0.26.1")
7608 (source (origin
7609 (method url-fetch)
7610 (uri (bioconductor-uri "S4Vectors" version))
7611 (sha256
7612 (base32
7613 "1ddr3ngyczx332zw9ai1a6h7442lgrbfcj8vrhvbkdkzqwj14xmb"))))
7614 (properties
7615 `((upstream-name . "S4Vectors")))
7616 (build-system r-build-system)
7617 (propagated-inputs
7618 `(("r-biocgenerics" ,r-biocgenerics)))
7619 (home-page "https://bioconductor.org/packages/S4Vectors")
7620 (synopsis "S4 implementation of vectors and lists")
7621 (description
7622 "The S4Vectors package defines the @code{Vector} and @code{List} virtual
7623 classes and a set of generic functions that extend the semantic of ordinary
7624 vectors and lists in R. Package developers can easily implement vector-like
7625 or list-like objects as concrete subclasses of @code{Vector} or @code{List}.
7626 In addition, a few low-level concrete subclasses of general interest (e.g.
7627 @code{DataFrame}, @code{Rle}, and @code{Hits}) are implemented in the
7628 S4Vectors package itself.")
7629 (license license:artistic2.0)))
7630
7631 (define-public r-iranges
7632 (package
7633 (name "r-iranges")
7634 (version "2.22.2")
7635 (source (origin
7636 (method url-fetch)
7637 (uri (bioconductor-uri "IRanges" version))
7638 (sha256
7639 (base32
7640 "1y24jw62806wp2afiyj1x6n00gj7d3323klqdypra9q43pg1w49d"))))
7641 (properties
7642 `((upstream-name . "IRanges")))
7643 (build-system r-build-system)
7644 (propagated-inputs
7645 `(("r-biocgenerics" ,r-biocgenerics)
7646 ("r-s4vectors" ,r-s4vectors)))
7647 (home-page "https://bioconductor.org/packages/IRanges")
7648 (synopsis "Infrastructure for manipulating intervals on sequences")
7649 (description
7650 "This package provides efficient low-level and highly reusable S4 classes
7651 for storing ranges of integers, RLE vectors (Run-Length Encoding), and, more
7652 generally, data that can be organized sequentially (formally defined as
7653 @code{Vector} objects), as well as views on these @code{Vector} objects.
7654 Efficient list-like classes are also provided for storing big collections of
7655 instances of the basic classes. All classes in the package use consistent
7656 naming and share the same rich and consistent \"Vector API\" as much as
7657 possible.")
7658 (license license:artistic2.0)))
7659
7660 (define-public r-genomeinfodbdata
7661 (package
7662 (name "r-genomeinfodbdata")
7663 (version "1.2.0")
7664 (source (origin
7665 (method url-fetch)
7666 ;; We cannot use bioconductor-uri here because this tarball is
7667 ;; located under "data/annotation/" instead of "bioc/".
7668 (uri (string-append "https://bioconductor.org/packages/release/"
7669 "data/annotation/src/contrib/GenomeInfoDbData_"
7670 version ".tar.gz"))
7671 (sha256
7672 (base32
7673 "0di6nlqpsyqf693k2na65ayqldih563x3zfrczpqc5q2hl5kg35c"))))
7674 (properties
7675 `((upstream-name . "GenomeInfoDbData")))
7676 (build-system r-build-system)
7677 (home-page "https://bioconductor.org/packages/GenomeInfoDbData")
7678 (synopsis "Species and taxonomy ID look up tables for GenomeInfoDb")
7679 (description "This package contains data for mapping between NCBI taxonomy
7680 ID and species. It is used by functions in the GenomeInfoDb package.")
7681 (license license:artistic2.0)))
7682
7683 (define-public r-genomeinfodb
7684 (package
7685 (name "r-genomeinfodb")
7686 (version "1.24.2")
7687 (source (origin
7688 (method url-fetch)
7689 (uri (bioconductor-uri "GenomeInfoDb" version))
7690 (sha256
7691 (base32
7692 "1cqs53p4m5q1dr59war72bccphy01ilw4xra24fmngrv4x32rznd"))))
7693 (properties
7694 `((upstream-name . "GenomeInfoDb")))
7695 (build-system r-build-system)
7696 (propagated-inputs
7697 `(("r-biocgenerics" ,r-biocgenerics)
7698 ("r-genomeinfodbdata" ,r-genomeinfodbdata)
7699 ("r-iranges" ,r-iranges)
7700 ("r-rcurl" ,r-rcurl)
7701 ("r-s4vectors" ,r-s4vectors)))
7702 (native-inputs
7703 `(("r-knitr" ,r-knitr)))
7704 (home-page "https://bioconductor.org/packages/GenomeInfoDb")
7705 (synopsis "Utilities for manipulating chromosome identifiers")
7706 (description
7707 "This package contains data and functions that define and allow
7708 translation between different chromosome sequence naming conventions (e.g.,
7709 \"chr1\" versus \"1\"), including a function that attempts to place sequence
7710 names in their natural, rather than lexicographic, order.")
7711 (license license:artistic2.0)))
7712
7713 (define-public r-edger
7714 (package
7715 (name "r-edger")
7716 (version "3.30.3")
7717 (source (origin
7718 (method url-fetch)
7719 (uri (bioconductor-uri "edgeR" version))
7720 (sha256
7721 (base32
7722 "1z9bkg08rgqn3jm2s4ndbj641w33wl8jd3j6m5if6h2nnw6011ic"))))
7723 (properties `((upstream-name . "edgeR")))
7724 (build-system r-build-system)
7725 (propagated-inputs
7726 `(("r-limma" ,r-limma)
7727 ("r-locfit" ,r-locfit)
7728 ("r-rcpp" ,r-rcpp)
7729 ("r-statmod" ,r-statmod))) ;for estimateDisp
7730 (home-page "http://bioinf.wehi.edu.au/edgeR")
7731 (synopsis "EdgeR does empirical analysis of digital gene expression data")
7732 (description "This package can do differential expression analysis of
7733 RNA-seq expression profiles with biological replication. It implements a range
7734 of statistical methodology based on the negative binomial distributions,
7735 including empirical Bayes estimation, exact tests, generalized linear models
7736 and quasi-likelihood tests. It be applied to differential signal analysis of
7737 other types of genomic data that produce counts, including ChIP-seq, SAGE and
7738 CAGE.")
7739 (license license:gpl2+)))
7740
7741 (define-public r-variantannotation
7742 (package
7743 (name "r-variantannotation")
7744 (version "1.34.0")
7745 (source (origin
7746 (method url-fetch)
7747 (uri (bioconductor-uri "VariantAnnotation" version))
7748 (sha256
7749 (base32
7750 "09y6ymwky839nb0y7y93w810hk9mvwqn7595q1276c28dkddiqvw"))))
7751 (properties
7752 `((upstream-name . "VariantAnnotation")))
7753 (inputs
7754 `(("zlib" ,zlib)))
7755 (propagated-inputs
7756 `(("r-annotationdbi" ,r-annotationdbi)
7757 ("r-biobase" ,r-biobase)
7758 ("r-biocgenerics" ,r-biocgenerics)
7759 ("r-biostrings" ,r-biostrings)
7760 ("r-bsgenome" ,r-bsgenome)
7761 ("r-dbi" ,r-dbi)
7762 ("r-genomeinfodb" ,r-genomeinfodb)
7763 ("r-genomicfeatures" ,r-genomicfeatures)
7764 ("r-genomicranges" ,r-genomicranges)
7765 ("r-iranges" ,r-iranges)
7766 ("r-summarizedexperiment" ,r-summarizedexperiment)
7767 ("r-rhtslib" ,r-rhtslib)
7768 ("r-rsamtools" ,r-rsamtools)
7769 ("r-rtracklayer" ,r-rtracklayer)
7770 ("r-s4vectors" ,r-s4vectors)
7771 ("r-xvector" ,r-xvector)
7772 ("r-zlibbioc" ,r-zlibbioc)))
7773 (build-system r-build-system)
7774 (home-page "https://bioconductor.org/packages/VariantAnnotation")
7775 (synopsis "Package for annotation of genetic variants")
7776 (description "This R package can annotate variants, compute amino acid
7777 coding changes and predict coding outcomes.")
7778 (license license:artistic2.0)))
7779
7780 (define-public r-limma
7781 (package
7782 (name "r-limma")
7783 (version "3.44.3")
7784 (source (origin
7785 (method url-fetch)
7786 (uri (bioconductor-uri "limma" version))
7787 (sha256
7788 (base32
7789 "09fnqxx4rzq5n447aqg2l6y0idfwgz2jxz99sifxsr2q8afzbcj6"))))
7790 (build-system r-build-system)
7791 (home-page "http://bioinf.wehi.edu.au/limma")
7792 (synopsis "Package for linear models for microarray and RNA-seq data")
7793 (description "This package can be used for the analysis of gene expression
7794 studies, especially the use of linear models for analysing designed experiments
7795 and the assessment of differential expression. The analysis methods apply to
7796 different technologies, including microarrays, RNA-seq, and quantitative PCR.")
7797 (license license:gpl2+)))
7798
7799 (define-public r-xvector
7800 (package
7801 (name "r-xvector")
7802 (version "0.28.0")
7803 (source (origin
7804 (method url-fetch)
7805 (uri (bioconductor-uri "XVector" version))
7806 (sha256
7807 (base32
7808 "11h1hszv4798q1gbx8r6zf8vlaqx4v9ql0lbh2xaxybp66a03pvc"))))
7809 (properties
7810 `((upstream-name . "XVector")))
7811 (build-system r-build-system)
7812 (arguments
7813 `(#:phases
7814 (modify-phases %standard-phases
7815 (add-after 'unpack 'use-system-zlib
7816 (lambda _
7817 (substitute* "DESCRIPTION"
7818 (("zlibbioc, ") ""))
7819 (substitute* "NAMESPACE"
7820 (("import\\(zlibbioc\\)") ""))
7821 #t)))))
7822 (inputs
7823 `(("zlib" ,zlib)))
7824 (propagated-inputs
7825 `(("r-biocgenerics" ,r-biocgenerics)
7826 ("r-iranges" ,r-iranges)
7827 ("r-s4vectors" ,r-s4vectors)))
7828 (home-page "https://bioconductor.org/packages/XVector")
7829 (synopsis "Representation and manpulation of external sequences")
7830 (description
7831 "This package provides memory efficient S4 classes for storing sequences
7832 \"externally\" (behind an R external pointer, or on disk).")
7833 (license license:artistic2.0)))
7834
7835 (define-public r-genomicranges
7836 (package
7837 (name "r-genomicranges")
7838 (version "1.40.0")
7839 (source (origin
7840 (method url-fetch)
7841 (uri (bioconductor-uri "GenomicRanges" version))
7842 (sha256
7843 (base32
7844 "0wn1zr2qq0rpv9z2wialgizn6xzdlcjg1w2kif67n53svz6vk2x1"))))
7845 (properties
7846 `((upstream-name . "GenomicRanges")))
7847 (build-system r-build-system)
7848 (propagated-inputs
7849 `(("r-biocgenerics" ,r-biocgenerics)
7850 ("r-genomeinfodb" ,r-genomeinfodb)
7851 ("r-iranges" ,r-iranges)
7852 ("r-s4vectors" ,r-s4vectors)
7853 ("r-xvector" ,r-xvector)))
7854 (native-inputs
7855 `(("r-knitr" ,r-knitr)))
7856 (home-page "https://bioconductor.org/packages/GenomicRanges")
7857 (synopsis "Representation and manipulation of genomic intervals")
7858 (description
7859 "This package provides tools to efficiently represent and manipulate
7860 genomic annotations and alignments is playing a central role when it comes to
7861 analyzing high-throughput sequencing data (a.k.a. NGS data). The
7862 GenomicRanges package defines general purpose containers for storing and
7863 manipulating genomic intervals and variables defined along a genome.")
7864 (license license:artistic2.0)))
7865
7866 (define-public r-biobase
7867 (package
7868 (name "r-biobase")
7869 (version "2.48.0")
7870 (source (origin
7871 (method url-fetch)
7872 (uri (bioconductor-uri "Biobase" version))
7873 (sha256
7874 (base32
7875 "13p3kgnxm7hbn8cy289kbhaiyfa6rxx7l1pbvajwqzbay3cxznqp"))))
7876 (properties
7877 `((upstream-name . "Biobase")))
7878 (build-system r-build-system)
7879 (propagated-inputs
7880 `(("r-biocgenerics" ,r-biocgenerics)))
7881 (home-page "https://bioconductor.org/packages/Biobase")
7882 (synopsis "Base functions for Bioconductor")
7883 (description
7884 "This package provides functions that are needed by many other packages
7885 on Bioconductor or which replace R functions.")
7886 (license license:artistic2.0)))
7887
7888 (define-public r-annotationdbi
7889 (package
7890 (name "r-annotationdbi")
7891 (version "1.50.1")
7892 (source (origin
7893 (method url-fetch)
7894 (uri (bioconductor-uri "AnnotationDbi" version))
7895 (sha256
7896 (base32
7897 "00pd8lsdppxlmx0l65phw0jhsm0qkwjc4wsdxpvgc31iiz9yslbj"))))
7898 (properties
7899 `((upstream-name . "AnnotationDbi")))
7900 (build-system r-build-system)
7901 (propagated-inputs
7902 `(("r-biobase" ,r-biobase)
7903 ("r-biocgenerics" ,r-biocgenerics)
7904 ("r-dbi" ,r-dbi)
7905 ("r-iranges" ,r-iranges)
7906 ("r-rsqlite" ,r-rsqlite)
7907 ("r-s4vectors" ,r-s4vectors)))
7908 (native-inputs
7909 `(("r-knitr" ,r-knitr)))
7910 (home-page "https://bioconductor.org/packages/AnnotationDbi")
7911 (synopsis "Annotation database interface")
7912 (description
7913 "This package provides user interface and database connection code for
7914 annotation data packages using SQLite data storage.")
7915 (license license:artistic2.0)))
7916
7917 (define-public r-biomart
7918 (package
7919 (name "r-biomart")
7920 (version "2.44.1")
7921 (source (origin
7922 (method url-fetch)
7923 (uri (bioconductor-uri "biomaRt" version))
7924 (sha256
7925 (base32
7926 "0np4nh3gj60mgb6312z7x0z9fg5bhrhw872sp3dzgmqc8q8b84iz"))))
7927 (properties
7928 `((upstream-name . "biomaRt")))
7929 (build-system r-build-system)
7930 (propagated-inputs
7931 `(("r-annotationdbi" ,r-annotationdbi)
7932 ("r-biocfilecache" ,r-biocfilecache)
7933 ("r-httr" ,r-httr)
7934 ("r-openssl" ,r-openssl)
7935 ("r-progress" ,r-progress)
7936 ("r-rappdirs" ,r-rappdirs)
7937 ("r-stringr" ,r-stringr)
7938 ("r-xml" ,r-xml)))
7939 (native-inputs
7940 `(("r-knitr" ,r-knitr)))
7941 (home-page "https://bioconductor.org/packages/biomaRt")
7942 (synopsis "Interface to BioMart databases")
7943 (description
7944 "biomaRt provides an interface to a growing collection of databases
7945 implementing the @url{BioMart software suite, http://www.biomart.org}. The
7946 package enables retrieval of large amounts of data in a uniform way without
7947 the need to know the underlying database schemas or write complex SQL queries.
7948 Examples of BioMart databases are Ensembl, COSMIC, Uniprot, HGNC, Gramene,
7949 Wormbase and dbSNP mapped to Ensembl. These major databases give biomaRt
7950 users direct access to a diverse set of data and enable a wide range of
7951 powerful online queries from gene annotation to database mining.")
7952 (license license:artistic2.0)))
7953
7954 (define-public r-biocparallel
7955 (package
7956 (name "r-biocparallel")
7957 (version "1.22.0")
7958 (source (origin
7959 (method url-fetch)
7960 (uri (bioconductor-uri "BiocParallel" version))
7961 (sha256
7962 (base32
7963 "1lsg5xm5j3ly5k60fidzhkgqc9jgscyfkqngjvd9qp6yfzvsb82g"))))
7964 (properties
7965 `((upstream-name . "BiocParallel")))
7966 (build-system r-build-system)
7967 (propagated-inputs
7968 `(("r-futile-logger" ,r-futile-logger)
7969 ("r-snow" ,r-snow)
7970 ("r-bh" ,r-bh)))
7971 (native-inputs
7972 `(("r-knitr" ,r-knitr)))
7973 (home-page "https://bioconductor.org/packages/BiocParallel")
7974 (synopsis "Bioconductor facilities for parallel evaluation")
7975 (description
7976 "This package provides modified versions and novel implementation of
7977 functions for parallel evaluation, tailored to use with Bioconductor
7978 objects.")
7979 (license (list license:gpl2+ license:gpl3+))))
7980
7981 (define-public r-biostrings
7982 (package
7983 (name "r-biostrings")
7984 (version "2.56.0")
7985 (source (origin
7986 (method url-fetch)
7987 (uri (bioconductor-uri "Biostrings" version))
7988 (sha256
7989 (base32
7990 "0imhfz7dg8b3l5qzipjranqqshdsg2x6zc49drlhn8sc7j40cvi8"))))
7991 (properties
7992 `((upstream-name . "Biostrings")))
7993 (build-system r-build-system)
7994 (propagated-inputs
7995 `(("r-biocgenerics" ,r-biocgenerics)
7996 ("r-crayon" ,r-crayon)
7997 ("r-iranges" ,r-iranges)
7998 ("r-s4vectors" ,r-s4vectors)
7999 ("r-xvector" ,r-xvector)))
8000 (home-page "https://bioconductor.org/packages/Biostrings")
8001 (synopsis "String objects and algorithms for biological sequences")
8002 (description
8003 "This package provides memory efficient string containers, string
8004 matching algorithms, and other utilities, for fast manipulation of large
8005 biological sequences or sets of sequences.")
8006 (license license:artistic2.0)))
8007
8008 (define-public r-rsamtools
8009 (package
8010 (name "r-rsamtools")
8011 (version "2.4.0")
8012 (source (origin
8013 (method url-fetch)
8014 (uri (bioconductor-uri "Rsamtools" version))
8015 (sha256
8016 (base32
8017 "0z01z0s71f941k7sns46nyabps28c69d6jxx6sppjpc6h4vrw0vq"))))
8018 (properties
8019 `((upstream-name . "Rsamtools")))
8020 (build-system r-build-system)
8021 (arguments
8022 `(#:phases
8023 (modify-phases %standard-phases
8024 (add-after 'unpack 'use-system-zlib
8025 (lambda _
8026 (substitute* "DESCRIPTION"
8027 (("zlibbioc, ") ""))
8028 (substitute* "NAMESPACE"
8029 (("import\\(zlibbioc\\)") ""))
8030 #t)))))
8031 (inputs
8032 `(("zlib" ,zlib)))
8033 (propagated-inputs
8034 `(("r-biocgenerics" ,r-biocgenerics)
8035 ("r-biocparallel" ,r-biocparallel)
8036 ("r-biostrings" ,r-biostrings)
8037 ("r-bitops" ,r-bitops)
8038 ("r-genomeinfodb" ,r-genomeinfodb)
8039 ("r-genomicranges" ,r-genomicranges)
8040 ("r-iranges" ,r-iranges)
8041 ("r-rhtslib" ,r-rhtslib)
8042 ("r-s4vectors" ,r-s4vectors)
8043 ("r-xvector" ,r-xvector)))
8044 (home-page "https://bioconductor.org/packages/release/bioc/html/Rsamtools.html")
8045 (synopsis "Interface to samtools, bcftools, and tabix")
8046 (description
8047 "This package provides an interface to the @code{samtools},
8048 @code{bcftools}, and @code{tabix} utilities for manipulating SAM (Sequence
8049 Alignment / Map), FASTA, binary variant call (BCF) and compressed indexed
8050 tab-delimited (tabix) files.")
8051 (license license:expat)))
8052
8053 (define-public r-delayedarray
8054 (package
8055 (name "r-delayedarray")
8056 (version "0.14.0")
8057 (source (origin
8058 (method url-fetch)
8059 (uri (bioconductor-uri "DelayedArray" version))
8060 (sha256
8061 (base32
8062 "1lz7a0rrlfv3w44n073mk8pw39z7lfs0njdxp5vpp0rdsmvdf1qk"))))
8063 (properties
8064 `((upstream-name . "DelayedArray")))
8065 (build-system r-build-system)
8066 (propagated-inputs
8067 `(("r-biocgenerics" ,r-biocgenerics)
8068 ("r-s4vectors" ,r-s4vectors)
8069 ("r-iranges" ,r-iranges)
8070 ("r-matrix" ,r-matrix)
8071 ("r-matrixstats" ,r-matrixstats)))
8072 (native-inputs
8073 `(("r-knitr" ,r-knitr)))
8074 (home-page "https://bioconductor.org/packages/DelayedArray")
8075 (synopsis "Delayed operations on array-like objects")
8076 (description
8077 "Wrapping an array-like object (typically an on-disk object) in a
8078 @code{DelayedArray} object allows one to perform common array operations on it
8079 without loading the object in memory. In order to reduce memory usage and
8080 optimize performance, operations on the object are either delayed or executed
8081 using a block processing mechanism. Note that this also works on in-memory
8082 array-like objects like @code{DataFrame} objects (typically with Rle columns),
8083 @code{Matrix} objects, and ordinary arrays and data frames.")
8084 (license license:artistic2.0)))
8085
8086 (define-public r-summarizedexperiment
8087 (package
8088 (name "r-summarizedexperiment")
8089 (version "1.18.2")
8090 (source (origin
8091 (method url-fetch)
8092 (uri (bioconductor-uri "SummarizedExperiment" version))
8093 (sha256
8094 (base32
8095 "1raw5ycigr6gjgzn3kx3jls5hzww10fhnwd8c17c20hmhirf13rw"))))
8096 (properties
8097 `((upstream-name . "SummarizedExperiment")))
8098 (build-system r-build-system)
8099 (propagated-inputs
8100 `(("r-biobase" ,r-biobase)
8101 ("r-biocgenerics" ,r-biocgenerics)
8102 ("r-delayedarray" ,r-delayedarray)
8103 ("r-genomeinfodb" ,r-genomeinfodb)
8104 ("r-genomicranges" ,r-genomicranges)
8105 ("r-iranges" ,r-iranges)
8106 ("r-matrix" ,r-matrix)
8107 ("r-s4vectors" ,r-s4vectors)))
8108 (native-inputs
8109 `(("r-knitr" ,r-knitr)))
8110 (home-page "https://bioconductor.org/packages/SummarizedExperiment")
8111 (synopsis "Container for representing genomic ranges by sample")
8112 (description
8113 "The SummarizedExperiment container contains one or more assays, each
8114 represented by a matrix-like object of numeric or other mode. The rows
8115 typically represent genomic ranges of interest and the columns represent
8116 samples.")
8117 (license license:artistic2.0)))
8118
8119 (define-public r-genomicalignments
8120 (package
8121 (name "r-genomicalignments")
8122 (version "1.24.0")
8123 (source (origin
8124 (method url-fetch)
8125 (uri (bioconductor-uri "GenomicAlignments" version))
8126 (sha256
8127 (base32
8128 "0v8k6d7frm5p48cmk4zik78cw9abz4inx0zhl4zrmmx31ifyvk8d"))))
8129 (properties
8130 `((upstream-name . "GenomicAlignments")))
8131 (build-system r-build-system)
8132 (propagated-inputs
8133 `(("r-biocgenerics" ,r-biocgenerics)
8134 ("r-biocparallel" ,r-biocparallel)
8135 ("r-biostrings" ,r-biostrings)
8136 ("r-genomeinfodb" ,r-genomeinfodb)
8137 ("r-genomicranges" ,r-genomicranges)
8138 ("r-iranges" ,r-iranges)
8139 ("r-rsamtools" ,r-rsamtools)
8140 ("r-s4vectors" ,r-s4vectors)
8141 ("r-summarizedexperiment" ,r-summarizedexperiment)))
8142 (home-page "https://bioconductor.org/packages/GenomicAlignments")
8143 (synopsis "Representation and manipulation of short genomic alignments")
8144 (description
8145 "This package provides efficient containers for storing and manipulating
8146 short genomic alignments (typically obtained by aligning short reads to a
8147 reference genome). This includes read counting, computing the coverage,
8148 junction detection, and working with the nucleotide content of the
8149 alignments.")
8150 (license license:artistic2.0)))
8151
8152 (define-public r-rtracklayer
8153 (package
8154 (name "r-rtracklayer")
8155 (version "1.48.0")
8156 (source (origin
8157 (method url-fetch)
8158 (uri (bioconductor-uri "rtracklayer" version))
8159 (sha256
8160 (base32
8161 "1zcgk92sidhy4y7ws9ms4nkkh2hnccfhfh53qgna0kma9jy4v5xf"))))
8162 (build-system r-build-system)
8163 (arguments
8164 `(#:phases
8165 (modify-phases %standard-phases
8166 (add-after 'unpack 'use-system-zlib
8167 (lambda _
8168 (substitute* "DESCRIPTION"
8169 ((" zlibbioc,") ""))
8170 (substitute* "NAMESPACE"
8171 (("import\\(zlibbioc\\)") ""))
8172 #t)))))
8173 (native-inputs
8174 `(("pkg-config" ,pkg-config)))
8175 (inputs
8176 `(("zlib" ,zlib)))
8177 (propagated-inputs
8178 `(("r-biocgenerics" ,r-biocgenerics)
8179 ("r-biostrings" ,r-biostrings)
8180 ("r-genomeinfodb" ,r-genomeinfodb)
8181 ("r-genomicalignments" ,r-genomicalignments)
8182 ("r-genomicranges" ,r-genomicranges)
8183 ("r-iranges" ,r-iranges)
8184 ("r-rcurl" ,r-rcurl)
8185 ("r-rsamtools" ,r-rsamtools)
8186 ("r-s4vectors" ,r-s4vectors)
8187 ("r-xml" ,r-xml)
8188 ("r-xvector" ,r-xvector)))
8189 (home-page "https://bioconductor.org/packages/rtracklayer")
8190 (synopsis "R interface to genome browsers and their annotation tracks")
8191 (description
8192 "rtracklayer is an extensible framework for interacting with multiple
8193 genome browsers (currently UCSC built-in) and manipulating annotation tracks
8194 in various formats (currently GFF, BED, bedGraph, BED15, WIG, BigWig and 2bit
8195 built-in). The user may export/import tracks to/from the supported browsers,
8196 as well as query and modify the browser state, such as the current viewport.")
8197 (license license:artistic2.0)))
8198
8199 (define-public r-genomicfeatures
8200 (package
8201 (name "r-genomicfeatures")
8202 (version "1.40.1")
8203 (source (origin
8204 (method url-fetch)
8205 (uri (bioconductor-uri "GenomicFeatures" version))
8206 (sha256
8207 (base32
8208 "0zm7q957g952qyfqmvf27sldsnq6sqd0wlgnqzvmxayg9pxh0l0z"))))
8209 (properties
8210 `((upstream-name . "GenomicFeatures")))
8211 (build-system r-build-system)
8212 (propagated-inputs
8213 `(("r-annotationdbi" ,r-annotationdbi)
8214 ("r-biobase" ,r-biobase)
8215 ("r-biocgenerics" ,r-biocgenerics)
8216 ("r-biomart" ,r-biomart)
8217 ("r-biostrings" ,r-biostrings)
8218 ("r-dbi" ,r-dbi)
8219 ("r-genomeinfodb" ,r-genomeinfodb)
8220 ("r-genomicranges" ,r-genomicranges)
8221 ("r-iranges" ,r-iranges)
8222 ("r-rcurl" ,r-rcurl)
8223 ("r-rsqlite" ,r-rsqlite)
8224 ("r-rtracklayer" ,r-rtracklayer)
8225 ("r-s4vectors" ,r-s4vectors)
8226 ("r-xvector" ,r-xvector)))
8227 (native-inputs
8228 `(("r-knitr" ,r-knitr)))
8229 (home-page "https://bioconductor.org/packages/GenomicFeatures")
8230 (synopsis "Tools for working with transcript centric annotations")
8231 (description
8232 "This package provides a set of tools and methods for making and
8233 manipulating transcript centric annotations. With these tools the user can
8234 easily download the genomic locations of the transcripts, exons and cds of a
8235 given organism, from either the UCSC Genome Browser or a BioMart
8236 database (more sources will be supported in the future). This information is
8237 then stored in a local database that keeps track of the relationship between
8238 transcripts, exons, cds and genes. Flexible methods are provided for
8239 extracting the desired features in a convenient format.")
8240 (license license:artistic2.0)))
8241
8242 (define-public r-go-db
8243 (package
8244 (name "r-go-db")
8245 (version "3.7.0")
8246 (source (origin
8247 (method url-fetch)
8248 (uri (string-append "https://www.bioconductor.org/packages/"
8249 "release/data/annotation/src/contrib/GO.db_"
8250 version ".tar.gz"))
8251 (sha256
8252 (base32
8253 "0i3wcf5h3n0dawzc1hy0kv74f06j80c47n4p3g3fmrcxlhi3jpa5"))))
8254 (properties
8255 `((upstream-name . "GO.db")))
8256 (build-system r-build-system)
8257 (propagated-inputs
8258 `(("r-annotationdbi" ,r-annotationdbi)))
8259 (home-page "https://bioconductor.org/packages/GO.db")
8260 (synopsis "Annotation maps describing the entire Gene Ontology")
8261 (description
8262 "The purpose of this GO.db annotation package is to provide detailed
8263 information about the latest version of the Gene Ontologies.")
8264 (license license:artistic2.0)))
8265
8266 (define-public r-topgo
8267 (package
8268 (name "r-topgo")
8269 (version "2.40.0")
8270 (source (origin
8271 (method url-fetch)
8272 (uri (bioconductor-uri "topGO" version))
8273 (sha256
8274 (base32
8275 "13rhbvn27sj75fklf1cnjaazacx8yyjlhqlnbp5zk157q6y5cwdr"))))
8276 (properties
8277 `((upstream-name . "topGO")))
8278 (build-system r-build-system)
8279 (propagated-inputs
8280 `(("r-annotationdbi" ,r-annotationdbi)
8281 ("r-dbi" ,r-dbi)
8282 ("r-biobase" ,r-biobase)
8283 ("r-biocgenerics" ,r-biocgenerics)
8284 ("r-go-db" ,r-go-db)
8285 ("r-graph" ,r-graph)
8286 ("r-lattice" ,r-lattice)
8287 ("r-matrixstats" ,r-matrixstats)
8288 ("r-sparsem" ,r-sparsem)))
8289 (home-page "https://bioconductor.org/packages/topGO")
8290 (synopsis "Enrichment analysis for gene ontology")
8291 (description
8292 "The topGO package provides tools for testing @dfn{gene ontology} (GO)
8293 terms while accounting for the topology of the GO graph. Different test
8294 statistics and different methods for eliminating local similarities and
8295 dependencies between GO terms can be implemented and applied.")
8296 ;; Any version of the LGPL applies.
8297 (license license:lgpl2.1+)))
8298
8299 (define-public r-bsgenome
8300 (package
8301 (name "r-bsgenome")
8302 (version "1.56.0")
8303 (source (origin
8304 (method url-fetch)
8305 (uri (bioconductor-uri "BSgenome" version))
8306 (sha256
8307 (base32
8308 "1jw8r1qm9fpg2s1cw2y4np243jjxm65j2xdy2785h8fc1b02msf6"))))
8309 (properties
8310 `((upstream-name . "BSgenome")))
8311 (build-system r-build-system)
8312 (propagated-inputs
8313 `(("r-biocgenerics" ,r-biocgenerics)
8314 ("r-biostrings" ,r-biostrings)
8315 ("r-genomeinfodb" ,r-genomeinfodb)
8316 ("r-genomicranges" ,r-genomicranges)
8317 ("r-iranges" ,r-iranges)
8318 ("r-matrixstats" ,r-matrixstats)
8319 ("r-rsamtools" ,r-rsamtools)
8320 ("r-rtracklayer" ,r-rtracklayer)
8321 ("r-s4vectors" ,r-s4vectors)
8322 ("r-xvector" ,r-xvector)))
8323 (home-page "https://bioconductor.org/packages/BSgenome")
8324 (synopsis "Infrastructure for Biostrings-based genome data packages")
8325 (description
8326 "This package provides infrastructure shared by all Biostrings-based
8327 genome data packages and support for efficient SNP representation.")
8328 (license license:artistic2.0)))
8329
8330 (define-public r-impute
8331 (package
8332 (name "r-impute")
8333 (version "1.62.0")
8334 (source (origin
8335 (method url-fetch)
8336 (uri (bioconductor-uri "impute" version))
8337 (sha256
8338 (base32
8339 "161p6l1cp3wwdynkxwvg0yhrh6yv20brdlplw5w5mavn4hf1nm0h"))))
8340 (native-inputs
8341 `(("gfortran" ,gfortran)))
8342 (build-system r-build-system)
8343 (home-page "https://bioconductor.org/packages/impute")
8344 (synopsis "Imputation for microarray data")
8345 (description
8346 "This package provides a function to impute missing gene expression
8347 microarray data, using nearest neighbor averaging.")
8348 (license license:gpl2+)))
8349
8350 (define-public r-seqpattern
8351 (package
8352 (name "r-seqpattern")
8353 (version "1.20.0")
8354 (source (origin
8355 (method url-fetch)
8356 (uri (bioconductor-uri "seqPattern" version))
8357 (sha256
8358 (base32
8359 "0f1yvx2ri1557rzjx08q5bgml1cvkm8hjl8xn1qi4rjs64sy6mci"))))
8360 (properties
8361 `((upstream-name . "seqPattern")))
8362 (build-system r-build-system)
8363 (propagated-inputs
8364 `(("r-biostrings" ,r-biostrings)
8365 ("r-genomicranges" ,r-genomicranges)
8366 ("r-iranges" ,r-iranges)
8367 ("r-kernsmooth" ,r-kernsmooth)
8368 ("r-plotrix" ,r-plotrix)))
8369 (home-page "https://bioconductor.org/packages/seqPattern")
8370 (synopsis "Visualising oligonucleotide patterns and motif occurrences")
8371 (description
8372 "This package provides tools to visualize oligonucleotide patterns and
8373 sequence motif occurrences across a large set of sequences centred at a common
8374 reference point and sorted by a user defined feature.")
8375 (license license:gpl3+)))
8376
8377 (define-public r-genomation
8378 (package
8379 (name "r-genomation")
8380 (version "1.20.0")
8381 (source (origin
8382 (method url-fetch)
8383 (uri (bioconductor-uri "genomation" version))
8384 (sha256
8385 (base32
8386 "1cy8kqwddiha5jy6nda1al956i4wncbgjkrxwijdb08cmka2sfwh"))))
8387 (build-system r-build-system)
8388 (propagated-inputs
8389 `(("r-biostrings" ,r-biostrings)
8390 ("r-bsgenome" ,r-bsgenome)
8391 ("r-data-table" ,r-data-table)
8392 ("r-genomeinfodb" ,r-genomeinfodb)
8393 ("r-genomicalignments" ,r-genomicalignments)
8394 ("r-genomicranges" ,r-genomicranges)
8395 ("r-ggplot2" ,r-ggplot2)
8396 ("r-gridbase" ,r-gridbase)
8397 ("r-impute" ,r-impute)
8398 ("r-iranges" ,r-iranges)
8399 ("r-matrixstats" ,r-matrixstats)
8400 ("r-plotrix" ,r-plotrix)
8401 ("r-plyr" ,r-plyr)
8402 ("r-rcpp" ,r-rcpp)
8403 ("r-readr" ,r-readr)
8404 ("r-reshape2" ,r-reshape2)
8405 ("r-rsamtools" ,r-rsamtools)
8406 ("r-rtracklayer" ,r-rtracklayer)
8407 ("r-runit" ,r-runit)
8408 ("r-s4vectors" ,r-s4vectors)
8409 ("r-seqpattern" ,r-seqpattern)))
8410 (native-inputs
8411 `(("r-knitr" ,r-knitr)))
8412 (home-page "https://bioinformatics.mdc-berlin.de/genomation/")
8413 (synopsis "Summary, annotation and visualization of genomic data")
8414 (description
8415 "This package provides a package for summary and annotation of genomic
8416 intervals. Users can visualize and quantify genomic intervals over
8417 pre-defined functional regions, such as promoters, exons, introns, etc. The
8418 genomic intervals represent regions with a defined chromosome position, which
8419 may be associated with a score, such as aligned reads from HT-seq experiments,
8420 TF binding sites, methylation scores, etc. The package can use any tabular
8421 genomic feature data as long as it has minimal information on the locations of
8422 genomic intervals. In addition, it can use BAM or BigWig files as input.")
8423 (license license:artistic2.0)))
8424
8425 (define-public r-genomationdata
8426 (package
8427 (name "r-genomationdata")
8428 (version "1.14.0")
8429 (source (origin
8430 (method url-fetch)
8431 ;; We cannot use bioconductor-uri here because this tarball is
8432 ;; located under "data/annotation/" instead of "bioc/".
8433 (uri (string-append "https://bioconductor.org/packages/"
8434 "release/data/experiment/src/contrib/"
8435 "genomationData_" version ".tar.gz"))
8436 (sha256
8437 (base32
8438 "10xyb8akjrhmak2i0mnv1agny2ipy364q9nlibyplpzc7vdb6bw7"))))
8439 (build-system r-build-system)
8440 ;; As this package provides little more than large data files, it doesn't
8441 ;; make sense to build substitutes.
8442 (arguments `(#:substitutable? #f))
8443 (native-inputs
8444 `(("r-knitr" ,r-knitr)))
8445 (home-page "https://bioinformatics.mdc-berlin.de/genomation/")
8446 (synopsis "Experimental data for use with the genomation package")
8447 (description
8448 "This package contains experimental genetic data for use with the
8449 genomation package. Included are Chip Seq, Methylation and Cage data,
8450 downloaded from Encode.")
8451 (license license:gpl3+)))
8452
8453 (define-public r-seqlogo
8454 (package
8455 (name "r-seqlogo")
8456 (version "1.54.3")
8457 (source
8458 (origin
8459 (method url-fetch)
8460 (uri (bioconductor-uri "seqLogo" version))
8461 (sha256
8462 (base32
8463 "09kkxir305fv8z2yyihvspkrzclxbw1cx5mvhfkrhl10rap6662j"))))
8464 (properties `((upstream-name . "seqLogo")))
8465 (build-system r-build-system)
8466 (home-page "https://bioconductor.org/packages/seqLogo")
8467 (synopsis "Sequence logos for DNA sequence alignments")
8468 (description
8469 "seqLogo takes the position weight matrix of a DNA sequence motif and
8470 plots the corresponding sequence logo as introduced by Schneider and
8471 Stephens (1990).")
8472 (license license:lgpl2.0+)))
8473
8474 (define-public r-motifrg
8475 (package
8476 (name "r-motifrg")
8477 (version "1.31.0")
8478 (source
8479 (origin
8480 (method url-fetch)
8481 (uri (bioconductor-uri "motifRG" version))
8482 (sha256
8483 (base32
8484 "1ml6zyzlk8yjbnfhga2qnw8nl43rankvka0kc1yljxr2b66aqbhn"))))
8485 (properties `((upstream-name . "motifRG")))
8486 (build-system r-build-system)
8487 (propagated-inputs
8488 `(("r-biostrings" ,r-biostrings)
8489 ("r-bsgenome" ,r-bsgenome)
8490 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8491 ("r-iranges" ,r-iranges)
8492 ("r-seqlogo" ,r-seqlogo)
8493 ("r-xvector" ,r-xvector)))
8494 (home-page "https://bioconductor.org/packages/motifRG")
8495 (synopsis "Discover motifs in high throughput sequencing data")
8496 (description
8497 "This package provides tools for discriminative motif discovery in high
8498 throughput genetic sequencing data sets using regression methods.")
8499 (license license:artistic2.0)))
8500
8501 (define-public r-qtl
8502 (package
8503 (name "r-qtl")
8504 (version "1.46-2")
8505 (source
8506 (origin
8507 (method url-fetch)
8508 (uri (string-append "mirror://cran/src/contrib/qtl_"
8509 version ".tar.gz"))
8510 (sha256
8511 (base32
8512 "0rbwcnvyy96gq1dsgpxx03pv423qya26h6ws5y0blj3blfdmj83a"))))
8513 (build-system r-build-system)
8514 (home-page "https://rqtl.org/")
8515 (synopsis "R package for analyzing QTL experiments in genetics")
8516 (description "R/qtl is an extension library for the R statistics
8517 system. It is used to analyze experimental crosses for identifying
8518 genes contributing to variation in quantitative traits (so-called
8519 quantitative trait loci, QTLs).
8520
8521 Using a hidden Markov model, R/qtl estimates genetic maps, to
8522 identify genotyping errors, and to perform single-QTL and two-QTL,
8523 two-dimensional genome scans.")
8524 (license license:gpl3)))
8525
8526 (define-public r-qtl2
8527 (package
8528 (name "r-qtl2")
8529 (version "0.22-11")
8530 (source (origin
8531 (method url-fetch)
8532 (uri (cran-uri "qtl2" version))
8533 (sha256
8534 (base32 "0dfdzjylqzc92dcszawc8cyinxccjm3p36v9vcq9ma818pqcanmr"))))
8535 (build-system r-build-system)
8536 (propagated-inputs
8537 `(("r-data-table" ,r-data-table)
8538 ("r-jsonlite" ,r-jsonlite)
8539 ("r-rcpp" ,r-rcpp)
8540 ("r-rcppeigen" ,r-rcppeigen)
8541 ("r-rsqlite" ,r-rsqlite)
8542 ("r-yaml" ,r-yaml)))
8543 (home-page "https://kbroman.org/qtl2/")
8544 (synopsis "Quantitative Trait Locus Mapping in Experimental Crosses")
8545 (description
8546 "This package provides a set of tools to perform @dfn{Quantitative Trait
8547 Locus} (QTL) analysis in experimental crosses. It is a reimplementation of the
8548 @code{R/qtl} package to better handle high-dimensional data and complex cross
8549 designs. Broman et al. (2018) <doi:10.1534/genetics.118.301595>.")
8550 (license license:gpl3)))
8551
8552 (define-public r-zlibbioc
8553 (package
8554 (name "r-zlibbioc")
8555 (version "1.34.0")
8556 (source (origin
8557 (method url-fetch)
8558 (uri (bioconductor-uri "zlibbioc" version))
8559 (sha256
8560 (base32
8561 "0j1l052jb2cwc1nifxzwknc9csagf4f2d092zs0i95dz0rma89l0"))))
8562 (properties
8563 `((upstream-name . "zlibbioc")))
8564 (build-system r-build-system)
8565 (home-page "https://bioconductor.org/packages/zlibbioc")
8566 (synopsis "Provider for zlib-1.2.5 to R packages")
8567 (description "This package uses the source code of zlib-1.2.5 to create
8568 libraries for systems that do not have these available via other means.")
8569 (license license:artistic2.0)))
8570
8571 (define-public r-r4rna
8572 (package
8573 (name "r-r4rna")
8574 (version "0.1.4")
8575 (source
8576 (origin
8577 (method url-fetch)
8578 (uri (string-append "http://www.e-rna.org/r-chie/files/R4RNA_"
8579 version ".tar.gz"))
8580 (sha256
8581 (base32
8582 "1p0i78wh76jfgmn9jphbwwaz6yy6pipzfg08xs54cxavxg2j81p5"))))
8583 (build-system r-build-system)
8584 (propagated-inputs
8585 `(("r-optparse" ,r-optparse)
8586 ("r-rcolorbrewer" ,r-rcolorbrewer)))
8587 (home-page "https://www.e-rna.org/r-chie/index.cgi")
8588 (synopsis "Analysis framework for RNA secondary structure")
8589 (description
8590 "The R4RNA package aims to be a general framework for the analysis of RNA
8591 secondary structure and comparative analysis in R.")
8592 (license license:gpl3+)))
8593
8594 (define-public r-rhtslib
8595 (package
8596 (name "r-rhtslib")
8597 (version "1.20.0")
8598 (source
8599 (origin
8600 (method url-fetch)
8601 (uri (bioconductor-uri "Rhtslib" version))
8602 (sha256
8603 (base32
8604 "186r7icrkzrni1c4n33ip7dlsfgys7hnqf0simvxrpl3yhh3ygdi"))))
8605 (properties `((upstream-name . "Rhtslib")))
8606 (build-system r-build-system)
8607 ;; Without this a temporary directory ends up in the Rhtslib.so binary,
8608 ;; which makes R abort the build.
8609 (arguments '(#:configure-flags '("--no-staged-install")))
8610 (propagated-inputs
8611 `(("curl" ,curl)
8612 ("r-zlibbioc" ,r-zlibbioc)))
8613 (inputs
8614 `(("zlib" ,zlib)))
8615 (native-inputs
8616 `(("pkg-config" ,pkg-config)
8617 ("r-knitr" ,r-knitr)))
8618 (home-page "https://github.com/nhayden/Rhtslib")
8619 (synopsis "High-throughput sequencing library as an R package")
8620 (description
8621 "This package provides the HTSlib C library for high-throughput
8622 nucleotide sequence analysis. The package is primarily useful to developers
8623 of other R packages who wish to make use of HTSlib.")
8624 (license license:lgpl2.0+)))
8625
8626 (define-public r-bamsignals
8627 (package
8628 (name "r-bamsignals")
8629 (version "1.20.0")
8630 (source
8631 (origin
8632 (method url-fetch)
8633 (uri (bioconductor-uri "bamsignals" version))
8634 (sha256
8635 (base32
8636 "0p858xxfv79yc8b3lq58zl9f00irvbn3czsd8wdi5040xg42m402"))))
8637 (build-system r-build-system)
8638 (propagated-inputs
8639 `(("r-biocgenerics" ,r-biocgenerics)
8640 ("r-genomicranges" ,r-genomicranges)
8641 ("r-iranges" ,r-iranges)
8642 ("r-rcpp" ,r-rcpp)
8643 ("r-rhtslib" ,r-rhtslib)
8644 ("r-zlibbioc" ,r-zlibbioc)))
8645 (inputs
8646 `(("zlib" ,zlib)))
8647 (native-inputs
8648 `(("r-knitr" ,r-knitr)))
8649 (home-page "https://bioconductor.org/packages/bamsignals")
8650 (synopsis "Extract read count signals from bam files")
8651 (description
8652 "This package efficiently obtains count vectors from indexed bam
8653 files. It counts the number of nucleotide sequence reads in given genomic
8654 ranges and it computes reads profiles and coverage profiles. It also handles
8655 paired-end data.")
8656 (license license:gpl2+)))
8657
8658 (define-public r-rcas
8659 (package
8660 (name "r-rcas")
8661 (version "1.14.0")
8662 (source (origin
8663 (method url-fetch)
8664 (uri (bioconductor-uri "RCAS" version))
8665 (sha256
8666 (base32
8667 "0f812pgv3ys1zv4n9sqkgm01hj4cdd0i0h85dqbhkwd94zl6cavl"))))
8668 (properties `((upstream-name . "RCAS")))
8669 (build-system r-build-system)
8670 (propagated-inputs
8671 `(("r-biocgenerics" ,r-biocgenerics)
8672 ("r-biostrings" ,r-biostrings)
8673 ("r-bsgenome" ,r-bsgenome)
8674 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8675 ("r-cowplot" ,r-cowplot)
8676 ("r-data-table" ,r-data-table)
8677 ("r-dt" ,r-dt)
8678 ("r-genomation" ,r-genomation)
8679 ("r-genomeinfodb" ,r-genomeinfodb)
8680 ("r-genomicfeatures" ,r-genomicfeatures)
8681 ("r-genomicranges" ,r-genomicranges)
8682 ("r-ggplot2" ,r-ggplot2)
8683 ("r-ggseqlogo" ,r-ggseqlogo)
8684 ("r-gprofiler2" ,r-gprofiler2)
8685 ("r-iranges" ,r-iranges)
8686 ("r-pbapply" ,r-pbapply)
8687 ("r-pheatmap" ,r-pheatmap)
8688 ("r-plotly" ,r-plotly)
8689 ("r-plotrix" ,r-plotrix)
8690 ("r-proxy" ,r-proxy)
8691 ("r-ranger" ,r-ranger)
8692 ("r-rsqlite" ,r-rsqlite)
8693 ("r-rtracklayer" ,r-rtracklayer)
8694 ("r-rmarkdown" ,r-rmarkdown)
8695 ("r-s4vectors" ,r-s4vectors)
8696 ("pandoc" ,ghc-pandoc)))
8697 (native-inputs
8698 `(("r-knitr" ,r-knitr)))
8699 (synopsis "RNA-centric annotation system")
8700 (description
8701 "RCAS aims to be a standalone RNA-centric annotation system that provides
8702 intuitive reports and publication-ready graphics. This package provides the R
8703 library implementing most of the pipeline's features.")
8704 (home-page "https://github.com/BIMSBbioinfo/RCAS")
8705 (license license:artistic2.0)))
8706
8707 (define-public rcas-web
8708 (package
8709 (name "rcas-web")
8710 (version "0.1.0")
8711 (source
8712 (origin
8713 (method url-fetch)
8714 (uri (string-append "https://github.com/BIMSBbioinfo/rcas-web/"
8715 "releases/download/v" version
8716 "/rcas-web-" version ".tar.gz"))
8717 (sha256
8718 (base32
8719 "0wq951aj45gqki1bickg876i993lmawkp8x24agg264br5x716db"))))
8720 (build-system gnu-build-system)
8721 (arguments
8722 `(#:phases
8723 (modify-phases %standard-phases
8724 (add-before 'configure 'find-RCAS
8725 ;; The configure script can't find non-1.3.x versions of RCAS because
8726 ;; its R expression ‘1.10.1 >= 1.3.4’ evaluates to false.
8727 (lambda _
8728 (substitute* "configure"
8729 (("1\\.3\\.4") "0.0.0"))
8730 #t))
8731 (add-after 'install 'wrap-executable
8732 (lambda* (#:key inputs outputs #:allow-other-keys)
8733 (let* ((out (assoc-ref outputs "out"))
8734 (json (assoc-ref inputs "guile-json"))
8735 (redis (assoc-ref inputs "guile-redis"))
8736 (path (string-append
8737 json "/share/guile/site/2.2:"
8738 redis "/share/guile/site/2.2")))
8739 (wrap-program (string-append out "/bin/rcas-web")
8740 `("GUILE_LOAD_PATH" ":" = (,path))
8741 `("GUILE_LOAD_COMPILED_PATH" ":" = (,path))
8742 `("R_LIBS_SITE" ":" = (,(getenv "R_LIBS_SITE")))))
8743 #t)))))
8744 (inputs
8745 `(("r-minimal" ,r-minimal)
8746 ("r-rcas" ,r-rcas)
8747 ("guile" ,guile-2.2)
8748 ("guile-json" ,guile-json-1)
8749 ("guile-redis" ,guile2.2-redis)))
8750 (native-inputs
8751 `(("pkg-config" ,pkg-config)))
8752 (home-page "https://github.com/BIMSBbioinfo/rcas-web")
8753 (synopsis "Web interface for RNA-centric annotation system (RCAS)")
8754 (description "This package provides a simple web interface for the
8755 @dfn{RNA-centric annotation system} (RCAS).")
8756 (license license:agpl3+)))
8757
8758 (define-public r-mutationalpatterns
8759 (package
8760 (name "r-mutationalpatterns")
8761 (version "2.0.0")
8762 (source
8763 (origin
8764 (method url-fetch)
8765 (uri (bioconductor-uri "MutationalPatterns" version))
8766 (sha256
8767 (base32
8768 "02lyjiabyhmifycksvpcx29a0pb7z9xjw0hgg8n0sd0dy3afqhcm"))))
8769 (build-system r-build-system)
8770 (propagated-inputs
8771 `(("r-biocgenerics" ,r-biocgenerics)
8772 ("r-biostrings" ,r-biostrings)
8773 ;; These two packages are suggested packages
8774 ("r-bsgenome-hsapiens-1000g" ,r-bsgenome-hsapiens-1000genomes-hs37d5)
8775 ("r-bsgenome-hsapiens-ucsc-hg19" ,r-bsgenome-hsapiens-ucsc-hg19)
8776 ("r-genomicranges" ,r-genomicranges)
8777 ("r-genomeinfodb" ,r-genomeinfodb)
8778 ("r-ggplot2" ,r-ggplot2)
8779 ("r-iranges" ,r-iranges)
8780 ("r-nmf" ,r-nmf)
8781 ("r-plyr" ,r-plyr)
8782 ("r-pracma" ,r-pracma)
8783 ("r-reshape2" ,r-reshape2)
8784 ("r-cowplot" ,r-cowplot)
8785 ("r-ggdendro" ,r-ggdendro)
8786 ("r-s4vectors" ,r-s4vectors)
8787 ("r-summarizedexperiment" ,r-summarizedexperiment)
8788 ("r-variantannotation" ,r-variantannotation)))
8789 (home-page "https://bioconductor.org/packages/MutationalPatterns/")
8790 (synopsis "Extract and visualize mutational patterns in genomic data")
8791 (description "This package provides an extensive toolset for the
8792 characterization and visualization of a wide range of mutational patterns
8793 in SNV base substitution data.")
8794 (license license:expat)))
8795
8796 (define-public r-chipkernels
8797 (let ((commit "c9cfcacb626b1221094fb3490ea7bac0fd625372")
8798 (revision "1"))
8799 (package
8800 (name "r-chipkernels")
8801 (version (string-append "1.1-" revision "." (string-take commit 9)))
8802 (source
8803 (origin
8804 (method git-fetch)
8805 (uri (git-reference
8806 (url "https://github.com/ManuSetty/ChIPKernels")
8807 (commit commit)))
8808 (file-name (string-append name "-" version))
8809 (sha256
8810 (base32
8811 "14bj5qhjm1hsm9ay561nfbqi9wxsa7y487df2idsaaf6z10nw4v0"))))
8812 (build-system r-build-system)
8813 (propagated-inputs
8814 `(("r-iranges" ,r-iranges)
8815 ("r-xvector" ,r-xvector)
8816 ("r-biostrings" ,r-biostrings)
8817 ("r-bsgenome" ,r-bsgenome)
8818 ("r-gtools" ,r-gtools)
8819 ("r-genomicranges" ,r-genomicranges)
8820 ("r-sfsmisc" ,r-sfsmisc)
8821 ("r-kernlab" ,r-kernlab)
8822 ("r-s4vectors" ,r-s4vectors)
8823 ("r-biocgenerics" ,r-biocgenerics)))
8824 (home-page "https://github.com/ManuSetty/ChIPKernels")
8825 (synopsis "Build string kernels for DNA Sequence analysis")
8826 (description "ChIPKernels is an R package for building different string
8827 kernels used for DNA Sequence analysis. A dictionary of the desired kernel
8828 must be built and this dictionary can be used for determining kernels for DNA
8829 Sequences.")
8830 (license license:gpl2+))))
8831
8832 (define-public r-seqgl
8833 (package
8834 (name "r-seqgl")
8835 (version "1.1.4")
8836 (source
8837 (origin
8838 (method git-fetch)
8839 (uri (git-reference
8840 (url "https://github.com/ManuSetty/SeqGL")
8841 (commit version)))
8842 (file-name (git-file-name name version))
8843 (sha256
8844 (base32
8845 "1r6ywvhxl3ffv48lgj7sbd582mcc6dha3ksgc2qjlvjrnkbj3799"))))
8846 (build-system r-build-system)
8847 (propagated-inputs
8848 `(("r-biostrings" ,r-biostrings)
8849 ("r-chipkernels" ,r-chipkernels)
8850 ("r-genomicranges" ,r-genomicranges)
8851 ("r-spams" ,r-spams)
8852 ("r-wgcna" ,r-wgcna)
8853 ("r-fastcluster" ,r-fastcluster)))
8854 (home-page "https://github.com/ManuSetty/SeqGL")
8855 (synopsis "Group lasso for Dnase/ChIP-seq data")
8856 (description "SeqGL is a group lasso based algorithm to extract
8857 transcription factor sequence signals from ChIP, DNase and ATAC-seq profiles.
8858 This package presents a method which uses group lasso to discriminate between
8859 bound and non bound genomic regions to accurately identify transcription
8860 factors bound at the specific regions.")
8861 (license license:gpl2+)))
8862
8863 (define-public r-tximport
8864 (package
8865 (name "r-tximport")
8866 (version "1.16.1")
8867 (source (origin
8868 (method url-fetch)
8869 (uri (bioconductor-uri "tximport" version))
8870 (sha256
8871 (base32
8872 "1x9959lkjl2h869rgd1b30q1idxzjkr1fyqbpndqk3kbi4q2gr40"))))
8873 (build-system r-build-system)
8874 (native-inputs
8875 `(("r-knitr" ,r-knitr)))
8876 (home-page "https://bioconductor.org/packages/tximport")
8877 (synopsis "Import and summarize transcript-level estimates for gene-level analysis")
8878 (description
8879 "This package provides tools to import transcript-level abundance,
8880 estimated counts and transcript lengths, and to summarize them into matrices
8881 for use with downstream gene-level analysis packages. Average transcript
8882 length, weighted by sample-specific transcript abundance estimates, is
8883 provided as a matrix which can be used as an offset for different expression
8884 of gene-level counts.")
8885 (license license:gpl2+)))
8886
8887 (define-public r-rhdf5
8888 (package
8889 (name "r-rhdf5")
8890 (version "2.32.2")
8891 (source (origin
8892 (method url-fetch)
8893 (uri (bioconductor-uri "rhdf5" version))
8894 (sha256
8895 (base32
8896 "1v6ygi0avh3gmaj2ld2nr7vww4ipw39b5kqci9w27i3ja985lb8j"))))
8897 (build-system r-build-system)
8898 (propagated-inputs
8899 `(("r-rhdf5lib" ,r-rhdf5lib)))
8900 (inputs
8901 `(("zlib" ,zlib)))
8902 (native-inputs
8903 `(("r-knitr" ,r-knitr)))
8904 (home-page "https://bioconductor.org/packages/rhdf5")
8905 (synopsis "HDF5 interface to R")
8906 (description
8907 "This R/Bioconductor package provides an interface between HDF5 and R.
8908 HDF5's main features are the ability to store and access very large and/or
8909 complex datasets and a wide variety of metadata on mass storage (disk) through
8910 a completely portable file format. The rhdf5 package is thus suited for the
8911 exchange of large and/or complex datasets between R and other software
8912 package, and for letting R applications work on datasets that are larger than
8913 the available RAM.")
8914 (license license:artistic2.0)))
8915
8916 (define-public r-annotationfilter
8917 (package
8918 (name "r-annotationfilter")
8919 (version "1.12.0")
8920 (source (origin
8921 (method url-fetch)
8922 (uri (bioconductor-uri "AnnotationFilter" version))
8923 (sha256
8924 (base32
8925 "18kh1xrhpwb48s1qj4f1v8af3jmw49pnbp5afi2myn9894hxg0cs"))))
8926 (properties
8927 `((upstream-name . "AnnotationFilter")))
8928 (build-system r-build-system)
8929 (propagated-inputs
8930 `(("r-genomicranges" ,r-genomicranges)
8931 ("r-lazyeval" ,r-lazyeval)))
8932 (native-inputs
8933 `(("r-knitr" ,r-knitr)))
8934 (home-page "https://github.com/Bioconductor/AnnotationFilter")
8935 (synopsis "Facilities for filtering Bioconductor annotation resources")
8936 (description
8937 "This package provides classes and other infrastructure to implement
8938 filters for manipulating Bioconductor annotation resources. The filters are
8939 used by @code{ensembldb}, @code{Organism.dplyr}, and other packages.")
8940 (license license:artistic2.0)))
8941
8942 (define-public emboss
8943 (package
8944 (name "emboss")
8945 (version "6.5.7")
8946 (source (origin
8947 (method url-fetch)
8948 (uri (string-append "ftp://emboss.open-bio.org/pub/EMBOSS/old/"
8949 (version-major+minor version) ".0/"
8950 "EMBOSS-" version ".tar.gz"))
8951 (sha256
8952 (base32
8953 "0vsmz96gc411yj2iyzdrsmg4l2n1nhgmp7vrgzlxx3xixv9xbf0q"))))
8954 (build-system gnu-build-system)
8955 (arguments
8956 `(#:configure-flags
8957 (list (string-append "--with-hpdf="
8958 (assoc-ref %build-inputs "libharu")))
8959 #:phases
8960 (modify-phases %standard-phases
8961 (add-after 'unpack 'fix-checks
8962 (lambda _
8963 ;; The PNGDRIVER tests check for the presence of libgd, libpng
8964 ;; and zlib, but assume that they are all found at the same
8965 ;; prefix.
8966 (substitute* "configure.in"
8967 (("CHECK_PNGDRIVER")
8968 "LIBS=\"$LIBS -lgd -lpng -lz -lm\"
8969 AC_DEFINE([PLD_png], [1], [Define to 1 if PNG support is available])
8970 AM_CONDITIONAL(AMPNG, true)"))
8971 #t))
8972 (add-after 'fix-checks 'disable-update-check
8973 (lambda _
8974 ;; At build time there is no connection to the Internet, so
8975 ;; looking for updates will not work.
8976 (substitute* "Makefile.am"
8977 (("\\$\\(bindir\\)/embossupdate") ""))
8978 #t))
8979 (add-after 'disable-update-check 'autogen
8980 (lambda _ (invoke "autoreconf" "-vif") #t)))))
8981 (inputs
8982 `(("perl" ,perl)
8983 ("libpng" ,libpng)
8984 ("gd" ,gd)
8985 ("libx11" ,libx11)
8986 ("libharu" ,libharu)
8987 ("zlib" ,zlib)))
8988 (native-inputs
8989 `(("autoconf" ,autoconf)
8990 ("automake" ,automake)
8991 ("libtool" ,libtool)
8992 ("pkg-config" ,pkg-config)))
8993 (home-page "http://emboss.sourceforge.net")
8994 (synopsis "Molecular biology analysis suite")
8995 (description "EMBOSS is the \"European Molecular Biology Open Software
8996 Suite\". EMBOSS is an analysis package specially developed for the needs of
8997 the molecular biology (e.g. EMBnet) user community. The software
8998 automatically copes with data in a variety of formats and even allows
8999 transparent retrieval of sequence data from the web. It also provides a
9000 number of libraries for the development of software in the field of molecular
9001 biology. EMBOSS also integrates a range of currently available packages and
9002 tools for sequence analysis into a seamless whole.")
9003 (license license:gpl2+)))
9004
9005 (define-public bits
9006 (let ((revision "1")
9007 (commit "3cc4567896d9d6442923da944beb704750a08d2d"))
9008 (package
9009 (name "bits")
9010 ;; The version is 2.13.0 even though no release archives have been
9011 ;; published as yet.
9012 (version (string-append "2.13.0-" revision "." (string-take commit 9)))
9013 (source (origin
9014 (method git-fetch)
9015 (uri (git-reference
9016 (url "https://github.com/arq5x/bits")
9017 (commit commit)))
9018 (file-name (string-append name "-" version "-checkout"))
9019 (sha256
9020 (base32
9021 "17n2kffk4kmhivd8c98g2vr6y1s23vbg4sxlxs689wni66797hbs"))))
9022 (build-system gnu-build-system)
9023 (arguments
9024 `(#:tests? #f ;no tests included
9025 #:phases
9026 (modify-phases %standard-phases
9027 (delete 'configure)
9028 (add-after 'unpack 'remove-cuda
9029 (lambda _
9030 (substitute* "Makefile"
9031 ((".*_cuda") "")
9032 (("(bits_test_intersections) \\\\" _ match) match))
9033 #t))
9034 (replace 'install
9035 (lambda* (#:key outputs #:allow-other-keys)
9036 (copy-recursively
9037 "bin" (string-append (assoc-ref outputs "out") "/bin"))
9038 #t)))))
9039 (inputs
9040 `(("gsl" ,gsl)
9041 ("zlib" ,zlib)))
9042 (home-page "https://github.com/arq5x/bits")
9043 (synopsis "Implementation of binary interval search algorithm")
9044 (description "This package provides an implementation of the
9045 BITS (Binary Interval Search) algorithm, an approach to interval set
9046 intersection. It is especially suited for the comparison of diverse genomic
9047 datasets and the exploration of large datasets of genome
9048 intervals (e.g. genes, sequence alignments).")
9049 (license license:gpl2))))
9050
9051 (define-public piranha
9052 ;; There is no release tarball for the latest version. The latest commit is
9053 ;; older than one year at the time of this writing.
9054 (let ((revision "1")
9055 (commit "0466d364b71117d01e4471b74c514436cc281233"))
9056 (package
9057 (name "piranha")
9058 (version (string-append "1.2.1-" revision "." (string-take commit 9)))
9059 (source (origin
9060 (method git-fetch)
9061 (uri (git-reference
9062 (url "https://github.com/smithlabcode/piranha")
9063 (commit commit)))
9064 (file-name (git-file-name name version))
9065 (sha256
9066 (base32
9067 "117dc0zf20c61jam69sk4abl57ah6yi6i7qra7d7y5zrbgk12q5n"))))
9068 (build-system gnu-build-system)
9069 (arguments
9070 `(#:test-target "test"
9071 #:phases
9072 (modify-phases %standard-phases
9073 (add-after 'unpack 'copy-smithlab-cpp
9074 (lambda* (#:key inputs #:allow-other-keys)
9075 (for-each (lambda (file)
9076 (install-file file "./src/smithlab_cpp/"))
9077 (find-files (assoc-ref inputs "smithlab-cpp")))
9078 #t))
9079 (add-after 'install 'install-to-store
9080 (lambda* (#:key outputs #:allow-other-keys)
9081 (let* ((out (assoc-ref outputs "out"))
9082 (bin (string-append out "/bin")))
9083 (for-each (lambda (file)
9084 (install-file file bin))
9085 (find-files "bin" ".*")))
9086 #t)))
9087 #:configure-flags
9088 (list (string-append "--with-bam_tools_headers="
9089 (assoc-ref %build-inputs "bamtools") "/include/bamtools")
9090 (string-append "--with-bam_tools_library="
9091 (assoc-ref %build-inputs "bamtools") "/lib/bamtools"))))
9092 (inputs
9093 `(("bamtools" ,bamtools)
9094 ("samtools" ,samtools-0.1)
9095 ("gsl" ,gsl)
9096 ("smithlab-cpp"
9097 ,(let ((commit "3723e2db438c51501d0423429ff396c3035ba46a"))
9098 (origin
9099 (method git-fetch)
9100 (uri (git-reference
9101 (url "https://github.com/smithlabcode/smithlab_cpp")
9102 (commit commit)))
9103 (file-name (string-append "smithlab_cpp-" commit "-checkout"))
9104 (sha256
9105 (base32
9106 "0l4gvbwslw5ngziskja41c00x1r06l3yidv7y0xw9djibhykzy0g")))))))
9107 (native-inputs
9108 `(("python" ,python-2)))
9109 (home-page "https://github.com/smithlabcode/piranha")
9110 (synopsis "Peak-caller for CLIP-seq and RIP-seq data")
9111 (description
9112 "Piranha is a peak-caller for genomic data produced by CLIP-seq and
9113 RIP-seq experiments. It takes input in BED or BAM format and identifies
9114 regions of statistically significant read enrichment. Additional covariates
9115 may optionally be provided to further inform the peak-calling process.")
9116 (license license:gpl3+))))
9117
9118 (define-public pepr
9119 (package
9120 (name "pepr")
9121 (version "1.0.9")
9122 (source (origin
9123 (method url-fetch)
9124 (uri (pypi-uri "PePr" version))
9125 (sha256
9126 (base32
9127 "0qxjfdpl1b1y53nccws2d85f6k74zwmx8y8sd9rszcqhfayx6gdx"))))
9128 (build-system python-build-system)
9129 (arguments
9130 `(#:python ,python-2 ; python2 only
9131 #:tests? #f)) ; no tests included
9132 (propagated-inputs
9133 `(("python2-numpy" ,python2-numpy)
9134 ("python2-scipy" ,python2-scipy)
9135 ("python2-pysam" ,python2-pysam)))
9136 (home-page "https://github.com/shawnzhangyx/PePr")
9137 (synopsis "Peak-calling and prioritization pipeline for ChIP-Seq data")
9138 (description
9139 "PePr is a ChIP-Seq peak calling or differential binding analysis tool
9140 that is primarily designed for data with biological replicates. It uses a
9141 negative binomial distribution to model the read counts among the samples in
9142 the same group, and look for consistent differences between ChIP and control
9143 group or two ChIP groups run under different conditions.")
9144 (license license:gpl3+)))
9145
9146 (define-public filevercmp
9147 (let ((commit "1a9b779b93d0b244040274794d402106907b71b7"))
9148 (package
9149 (name "filevercmp")
9150 (version (string-append "0-1." (string-take commit 7)))
9151 (source (origin
9152 (method git-fetch)
9153 (uri (git-reference
9154 (url "https://github.com/ekg/filevercmp")
9155 (commit commit)))
9156 (file-name (git-file-name name commit))
9157 (sha256
9158 (base32
9159 "1j9vxsy0y050v59h0q1d6501fcw1kjvj0d18l1xk2zyg0jzj247c"))))
9160 (build-system gnu-build-system)
9161 (arguments
9162 `(#:tests? #f ; There are no tests to run.
9163 #:phases
9164 (modify-phases %standard-phases
9165 (delete 'configure) ; There is no configure phase.
9166 (replace 'install
9167 (lambda* (#:key outputs #:allow-other-keys)
9168 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
9169 (install-file "filevercmp" bin)
9170 #t))))))
9171 (home-page "https://github.com/ekg/filevercmp")
9172 (synopsis "This program compares version strings")
9173 (description "This program compares version strings. It intends to be a
9174 replacement for strverscmp.")
9175 (license license:gpl3+))))
9176
9177 (define-public multiqc
9178 (package
9179 (name "multiqc")
9180 (version "1.5")
9181 (source
9182 (origin
9183 (method url-fetch)
9184 (uri (pypi-uri "multiqc" version))
9185 (sha256
9186 (base32
9187 "02iihfl0w0hpnr4pa0sbd1y9qxrg3ycyhjp5lidkcrqh1lmzs3zy"))))
9188 (build-system python-build-system)
9189 (propagated-inputs
9190 `(("python-jinja2" ,python-jinja2)
9191 ("python-simplejson" ,python-simplejson)
9192 ("python-pyyaml" ,python-pyyaml)
9193 ("python-click" ,python-click)
9194 ("python-spectra" ,python-spectra)
9195 ("python-requests" ,python-requests)
9196 ("python-markdown" ,python-markdown)
9197 ("python-lzstring" ,python-lzstring)
9198 ("python-matplotlib" ,python-matplotlib)
9199 ("python-numpy" ,python-numpy)
9200 ;; MultQC checks for the presence of nose at runtime.
9201 ("python-nose" ,python-nose)))
9202 (arguments
9203 `(#:phases
9204 (modify-phases %standard-phases
9205 (add-after 'unpack 'relax-requirements
9206 (lambda _
9207 (substitute* "setup.py"
9208 ;; MultiQC 1.5 ‘requires’ a version of python-matplotlib older
9209 ;; than the one in Guix, but should work fine with 2.2.2.
9210 ;; See <https://github.com/ewels/MultiQC/issues/725> and
9211 ;; <https://github.com/ewels/MultiQC/issues/732> for details.
9212 (("['\"]matplotlib.*?['\"]")
9213 "'matplotlib'"))
9214 #t)))))
9215 (home-page "https://multiqc.info")
9216 (synopsis "Aggregate bioinformatics analysis reports")
9217 (description
9218 "MultiQC is a tool to aggregate bioinformatics results across many
9219 samples into a single report. It contains modules for a large number of
9220 common bioinformatics tools.")
9221 (license license:gpl3+)))
9222
9223 (define-public variant-tools
9224 (package
9225 (name "variant-tools")
9226 (version "3.1.2")
9227 (source
9228 (origin
9229 (method git-fetch)
9230 (uri (git-reference
9231 (url "https://github.com/vatlab/varianttools")
9232 ;; There is no tag corresponding to version 3.1.2
9233 (commit "813ae4a90d25b69abc8a40f4f70441fe09015249")))
9234 (file-name (git-file-name name version))
9235 (sha256
9236 (base32
9237 "12ibdmksj7icyqhks4xyvd61bygk4pjmxn618kp6vgk1af01y34g"))))
9238 (build-system python-build-system)
9239 (inputs
9240 `(("boost" ,boost)
9241 ("c-blosc" ,c-blosc)
9242 ("gsl" ,gsl)
9243 ("hdf5" ,hdf5)
9244 ("hdf5-blosc" ,hdf5-blosc)
9245 ("python-cython" ,python-cython)
9246 ("zlib" ,zlib)))
9247 (propagated-inputs
9248 `(("python-numpy" ,python-numpy)
9249 ("python-pycurl" ,python-pycurl)
9250 ("python-pyzmq" ,python-pyzmq)
9251 ("python-scipy" ,python-scipy)
9252 ("python-tables" ,python-tables)))
9253 (home-page "https://vatlab.github.io/vat-docs/")
9254 (synopsis "Analyze genetic variants from Next-Gen sequencing studies")
9255 (description
9256 "Variant tools is a tool for the manipulation, annotation,
9257 selection, simulation, and analysis of variants in the context of next-gen
9258 sequencing analysis. Unlike some other tools used for next-gen sequencing
9259 analysis, variant tools is project based and provides a whole set of tools to
9260 manipulate and analyze genetic variants.")
9261 (license license:gpl3+)))
9262
9263 (define-public r-chipseq
9264 (package
9265 (name "r-chipseq")
9266 (version "1.38.0")
9267 (source
9268 (origin
9269 (method url-fetch)
9270 (uri (bioconductor-uri "chipseq" version))
9271 (sha256
9272 (base32
9273 "0lh859s0aq73vac1phcgagf6n000qgq2xsk0bmfr61n5swifml2a"))))
9274 (build-system r-build-system)
9275 (propagated-inputs
9276 `(("r-biocgenerics" ,r-biocgenerics)
9277 ("r-genomicranges" ,r-genomicranges)
9278 ("r-iranges" ,r-iranges)
9279 ("r-lattice" ,r-lattice)
9280 ("r-s4vectors" ,r-s4vectors)
9281 ("r-shortread" ,r-shortread)))
9282 (home-page "https://bioconductor.org/packages/chipseq")
9283 (synopsis "Package for analyzing ChIPseq data")
9284 (description
9285 "This package provides tools for processing short read data from ChIPseq
9286 experiments.")
9287 (license license:artistic2.0)))
9288
9289 (define-public r-copyhelper
9290 (package
9291 (name "r-copyhelper")
9292 (version "1.6.0")
9293 (source
9294 (origin
9295 (method url-fetch)
9296 (uri (string-append "https://bioconductor.org/packages/release/"
9297 "data/experiment/src/contrib/CopyhelpeR_"
9298 version ".tar.gz"))
9299 (sha256
9300 (base32
9301 "0x7cyynjmxls9as2gg0iyp9x5fpalxmdjq914ss7i84i9zyk5bhq"))))
9302 (properties `((upstream-name . "CopyhelpeR")))
9303 (build-system r-build-system)
9304 (home-page "https://bioconductor.org/packages/CopyhelpeR/")
9305 (synopsis "Helper files for CopywriteR")
9306 (description
9307 "This package contains the helper files that are required to run the
9308 Bioconductor package CopywriteR. It contains pre-assembled 1kb bin GC-content
9309 and mappability files for the reference genomes hg18, hg19, hg38, mm9 and
9310 mm10. In addition, it contains a blacklist filter to remove regions that
9311 display copy number variation. Files are stored as GRanges objects from the
9312 GenomicRanges Bioconductor package.")
9313 (license license:gpl2)))
9314
9315 (define-public r-copywriter
9316 (package
9317 (name "r-copywriter")
9318 (version "2.20.0")
9319 (source
9320 (origin
9321 (method url-fetch)
9322 (uri (bioconductor-uri "CopywriteR" version))
9323 (sha256
9324 (base32
9325 "0c36wpv0rygkbqpf3dwh5xmc3lr7p8lrdzsq2fbbpw04skl6i7m2"))))
9326 (properties `((upstream-name . "CopywriteR")))
9327 (build-system r-build-system)
9328 (propagated-inputs
9329 `(("r-biocparallel" ,r-biocparallel)
9330 ("r-chipseq" ,r-chipseq)
9331 ("r-copyhelper" ,r-copyhelper)
9332 ("r-data-table" ,r-data-table)
9333 ("r-dnacopy" ,r-dnacopy)
9334 ("r-futile-logger" ,r-futile-logger)
9335 ("r-genomeinfodb" ,r-genomeinfodb)
9336 ("r-genomicalignments" ,r-genomicalignments)
9337 ("r-genomicranges" ,r-genomicranges)
9338 ("r-gtools" ,r-gtools)
9339 ("r-iranges" ,r-iranges)
9340 ("r-matrixstats" ,r-matrixstats)
9341 ("r-rsamtools" ,r-rsamtools)
9342 ("r-s4vectors" ,r-s4vectors)))
9343 (home-page "https://github.com/PeeperLab/CopywriteR")
9344 (synopsis "Copy number information from targeted sequencing")
9345 (description
9346 "CopywriteR extracts DNA copy number information from targeted sequencing
9347 by utilizing off-target reads. It allows for extracting uniformly distributed
9348 copy number information, can be used without reference, and can be applied to
9349 sequencing data obtained from various techniques including chromatin
9350 immunoprecipitation and target enrichment on small gene panels. Thereby,
9351 CopywriteR constitutes a widely applicable alternative to available copy
9352 number detection tools.")
9353 (license license:gpl2)))
9354
9355 (define-public r-methylkit
9356 (package
9357 (name "r-methylkit")
9358 (version "1.14.2")
9359 (source (origin
9360 (method url-fetch)
9361 (uri (bioconductor-uri "methylKit" version))
9362 (sha256
9363 (base32
9364 "1qr13d2712ypbn96ijic2z5adr5dsd61kzscx7shw6vyj360rlm5"))))
9365 (properties `((upstream-name . "methylKit")))
9366 (build-system r-build-system)
9367 (propagated-inputs
9368 `(("r-data-table" ,r-data-table)
9369 ("r-emdbook" ,r-emdbook)
9370 ("r-fastseg" ,r-fastseg)
9371 ("r-genomeinfodb" ,r-genomeinfodb)
9372 ("r-genomicranges" ,r-genomicranges)
9373 ("r-gtools" ,r-gtools)
9374 ("r-iranges" ,r-iranges)
9375 ("r-kernsmooth" ,r-kernsmooth)
9376 ("r-limma" ,r-limma)
9377 ("r-mclust" ,r-mclust)
9378 ("r-mgcv" ,r-mgcv)
9379 ("r-qvalue" ,r-qvalue)
9380 ("r-r-utils" ,r-r-utils)
9381 ("r-rcpp" ,r-rcpp)
9382 ("r-rhtslib" ,r-rhtslib)
9383 ("r-rsamtools" ,r-rsamtools)
9384 ("r-rtracklayer" ,r-rtracklayer)
9385 ("r-s4vectors" ,r-s4vectors)
9386 ("r-zlibbioc" ,r-zlibbioc)))
9387 (native-inputs
9388 `(("r-knitr" ,r-knitr))) ; for vignettes
9389 (inputs
9390 `(("zlib" ,zlib)))
9391 (home-page "https://github.com/al2na/methylKit")
9392 (synopsis
9393 "DNA methylation analysis from high-throughput bisulfite sequencing results")
9394 (description
9395 "MethylKit is an R package for DNA methylation analysis and annotation
9396 from high-throughput bisulfite sequencing. The package is designed to deal
9397 with sequencing data from @dfn{Reduced representation bisulfite
9398 sequencing} (RRBS) and its variants, but also target-capture methods and whole
9399 genome bisulfite sequencing. It also has functions to analyze base-pair
9400 resolution 5hmC data from experimental protocols such as oxBS-Seq and
9401 TAB-Seq.")
9402 (license license:artistic2.0)))
9403
9404 (define-public r-sva
9405 (package
9406 (name "r-sva")
9407 (version "3.36.0")
9408 (source
9409 (origin
9410 (method url-fetch)
9411 (uri (bioconductor-uri "sva" version))
9412 (sha256
9413 (base32
9414 "0xa1lm0k1a6nig90mab6xh4gln88rbs5l1cdr6ik6agg7jhs7ji4"))))
9415 (build-system r-build-system)
9416 (propagated-inputs
9417 `(("r-edger" ,r-edger)
9418 ("r-genefilter" ,r-genefilter)
9419 ("r-mgcv" ,r-mgcv)
9420 ("r-biocparallel" ,r-biocparallel)
9421 ("r-matrixstats" ,r-matrixstats)
9422 ("r-limma" ,r-limma)))
9423 (home-page "https://bioconductor.org/packages/sva")
9424 (synopsis "Surrogate variable analysis")
9425 (description
9426 "This package contains functions for removing batch effects and other
9427 unwanted variation in high-throughput experiment. It also contains functions
9428 for identifying and building surrogate variables for high-dimensional data
9429 sets. Surrogate variables are covariates constructed directly from
9430 high-dimensional data like gene expression/RNA sequencing/methylation/brain
9431 imaging data that can be used in subsequent analyses to adjust for unknown,
9432 unmodeled, or latent sources of noise.")
9433 (license license:artistic2.0)))
9434
9435 (define-public r-seqminer
9436 (package
9437 (name "r-seqminer")
9438 (version "8.0")
9439 (source
9440 (origin
9441 (method url-fetch)
9442 (uri (cran-uri "seqminer" version))
9443 (sha256
9444 (base32
9445 "00jzj8mwb0zaiwlifd41b26mrq9mzigj18nc29dydi0r42hxg16i"))))
9446 (build-system r-build-system)
9447 (inputs
9448 `(("zlib" ,zlib)))
9449 (home-page "http://seqminer.genomic.codes")
9450 (synopsis "Read nucleotide sequence data (VCF, BCF, and METAL formats)")
9451 (description
9452 "This package provides tools to integrate nucleotide sequencing
9453 data (variant call format, e.g. VCF or BCF) or meta-analysis results in R.")
9454 ;; Any version of the GPL is acceptable
9455 (license (list license:gpl2+ license:gpl3+))))
9456
9457 (define-public r-raremetals2
9458 (package
9459 (name "r-raremetals2")
9460 (version "0.1")
9461 (source
9462 (origin
9463 (method url-fetch)
9464 (uri (string-append "http://genome.sph.umich.edu/w/images/"
9465 "b/b7/RareMETALS2_" version ".tar.gz"))
9466 (sha256
9467 (base32
9468 "0z5ljcgvnm06ja9lm85a3cniq7slxcy37aqqkxrdidr79an5fs4s"))))
9469 (properties `((upstream-name . "RareMETALS2")))
9470 (build-system r-build-system)
9471 (propagated-inputs
9472 `(("r-seqminer" ,r-seqminer)
9473 ("r-mvtnorm" ,r-mvtnorm)
9474 ("r-mass" ,r-mass)
9475 ("r-compquadform" ,r-compquadform)
9476 ("r-getopt" ,r-getopt)))
9477 (home-page "http://genome.sph.umich.edu/wiki/RareMETALS2")
9478 (synopsis "Analyze gene-level association tests for binary trait")
9479 (description
9480 "The R package rareMETALS2 is an extension of the R package rareMETALS.
9481 It was designed to meta-analyze gene-level association tests for binary trait.
9482 While rareMETALS offers a near-complete solution for meta-analysis of
9483 gene-level tests for quantitative trait, it does not offer the optimal
9484 solution for binary trait. The package rareMETALS2 offers improved features
9485 for analyzing gene-level association tests in meta-analyses for binary
9486 trait.")
9487 (license license:gpl3)))
9488
9489 (define-public r-maldiquant
9490 (package
9491 (name "r-maldiquant")
9492 (version "1.19.3")
9493 (source
9494 (origin
9495 (method url-fetch)
9496 (uri (cran-uri "MALDIquant" version))
9497 (sha256
9498 (base32
9499 "0b7kdz3x4sdq413h1q09l1qhcvdnnwv6fqsqwllks1cd3xy34c57"))))
9500 (properties `((upstream-name . "MALDIquant")))
9501 (build-system r-build-system)
9502 (home-page "https://cran.r-project.org/web/packages/MALDIquant")
9503 (synopsis "Quantitative analysis of mass spectrometry data")
9504 (description
9505 "This package provides a complete analysis pipeline for matrix-assisted
9506 laser desorption/ionization-time-of-flight (MALDI-TOF) and other
9507 two-dimensional mass spectrometry data. In addition to commonly used plotting
9508 and processing methods it includes distinctive features, namely baseline
9509 subtraction methods such as morphological filters (TopHat) or the
9510 statistics-sensitive non-linear iterative peak-clipping algorithm (SNIP), peak
9511 alignment using warping functions, handling of replicated measurements as well
9512 as allowing spectra with different resolutions.")
9513 (license license:gpl3+)))
9514
9515 (define-public r-protgenerics
9516 (package
9517 (name "r-protgenerics")
9518 (version "1.20.0")
9519 (source
9520 (origin
9521 (method url-fetch)
9522 (uri (bioconductor-uri "ProtGenerics" version))
9523 (sha256
9524 (base32
9525 "14xzdh7vxss8vmrw91hcwrszdn3ikm71mah8875b2lkrkrfzbl73"))))
9526 (properties `((upstream-name . "ProtGenerics")))
9527 (build-system r-build-system)
9528 (home-page "https://github.com/lgatto/ProtGenerics")
9529 (synopsis "S4 generic functions for proteomics infrastructure")
9530 (description
9531 "This package provides S4 generic functions needed by Bioconductor
9532 proteomics packages.")
9533 (license license:artistic2.0)))
9534
9535 (define-public r-mzr
9536 (package
9537 (name "r-mzr")
9538 (version "2.22.0")
9539 (source
9540 (origin
9541 (method url-fetch)
9542 (uri (bioconductor-uri "mzR" version))
9543 (sha256
9544 (base32
9545 "1r8j8yiz5lcan7j4h37sza2kwczl48dxvld3da3ghjjq67cdc2cm"))
9546 (modules '((guix build utils)))
9547 (snippet
9548 '(begin
9549 (delete-file-recursively "src/boost")
9550 #t))))
9551 (properties `((upstream-name . "mzR")))
9552 (build-system r-build-system)
9553 (arguments
9554 `(#:phases
9555 (modify-phases %standard-phases
9556 (add-after 'unpack 'use-system-boost
9557 (lambda _
9558 (substitute* "src/Makevars"
9559 (("\\./boost/libs.*") "")
9560 (("ARCH_OBJS=" line)
9561 (string-append line
9562 "\nARCH_LIBS=-lboost_system -lboost_regex \
9563 -lboost_iostreams -lboost_thread -lboost_filesystem -lboost_chrono\n")))
9564 #t)))))
9565 (inputs
9566 `(;; Our default boost package won't work here, unfortunately, even with
9567 ;; mzR version 2.22.0.
9568 ("boost" ,boost-for-mysql) ; use this instead of the bundled boost sources
9569 ("zlib" ,zlib)))
9570 (propagated-inputs
9571 `(("r-biobase" ,r-biobase)
9572 ("r-biocgenerics" ,r-biocgenerics)
9573 ("r-ncdf4" ,r-ncdf4)
9574 ("r-protgenerics" ,r-protgenerics)
9575 ("r-rcpp" ,r-rcpp)
9576 ("r-rhdf5lib" ,r-rhdf5lib)
9577 ("r-zlibbioc" ,r-zlibbioc)))
9578 (native-inputs
9579 `(("r-knitr" ,r-knitr)))
9580 (home-page "https://github.com/sneumann/mzR/")
9581 (synopsis "Parser for mass spectrometry data files")
9582 (description
9583 "The mzR package provides a unified API to the common file formats and
9584 parsers available for mass spectrometry data. It comes with a wrapper for the
9585 ISB random access parser for mass spectrometry mzXML, mzData and mzML files.
9586 The package contains the original code written by the ISB, and a subset of the
9587 proteowizard library for mzML and mzIdentML. The netCDF reading code has
9588 previously been used in XCMS.")
9589 (license license:artistic2.0)))
9590
9591 (define-public r-affyio
9592 (package
9593 (name "r-affyio")
9594 (version "1.58.0")
9595 (source
9596 (origin
9597 (method url-fetch)
9598 (uri (bioconductor-uri "affyio" version))
9599 (sha256
9600 (base32
9601 "0j1f61409yq6hmkqrpzamfm7dx35rlq33ccs7wb1qcqx3d3nb75q"))))
9602 (build-system r-build-system)
9603 (propagated-inputs
9604 `(("r-zlibbioc" ,r-zlibbioc)))
9605 (inputs
9606 `(("zlib" ,zlib)))
9607 (home-page "https://github.com/bmbolstad/affyio")
9608 (synopsis "Tools for parsing Affymetrix data files")
9609 (description
9610 "This package provides routines for parsing Affymetrix data files based
9611 upon file format information. The primary focus is on accessing the CEL and
9612 CDF file formats.")
9613 (license license:lgpl2.0+)))
9614
9615 (define-public r-affy
9616 (package
9617 (name "r-affy")
9618 (version "1.66.0")
9619 (source
9620 (origin
9621 (method url-fetch)
9622 (uri (bioconductor-uri "affy" version))
9623 (sha256
9624 (base32
9625 "0m6hkyjxmsf80n3anhwh9k26csxczv6v92fkb7klnchdski61pyc"))))
9626 (build-system r-build-system)
9627 (propagated-inputs
9628 `(("r-affyio" ,r-affyio)
9629 ("r-biobase" ,r-biobase)
9630 ("r-biocgenerics" ,r-biocgenerics)
9631 ("r-biocmanager" ,r-biocmanager)
9632 ("r-preprocesscore" ,r-preprocesscore)
9633 ("r-zlibbioc" ,r-zlibbioc)))
9634 (inputs
9635 `(("zlib" ,zlib)))
9636 (home-page "https://bioconductor.org/packages/affy")
9637 (synopsis "Methods for affymetrix oligonucleotide arrays")
9638 (description
9639 "This package contains functions for exploratory oligonucleotide array
9640 analysis.")
9641 (license license:lgpl2.0+)))
9642
9643 (define-public r-vsn
9644 (package
9645 (name "r-vsn")
9646 (version "3.56.0")
9647 (source
9648 (origin
9649 (method url-fetch)
9650 (uri (bioconductor-uri "vsn" version))
9651 (sha256
9652 (base32
9653 "1k82dikrv1gcync5y1131wg7z1kxv2z2jl4nndg20bixc3398h58"))))
9654 (build-system r-build-system)
9655 (propagated-inputs
9656 `(("r-affy" ,r-affy)
9657 ("r-biobase" ,r-biobase)
9658 ("r-ggplot2" ,r-ggplot2)
9659 ("r-lattice" ,r-lattice)
9660 ("r-limma" ,r-limma)))
9661 (native-inputs
9662 `(("r-knitr" ,r-knitr))) ; for vignettes
9663 (home-page "https://bioconductor.org/packages/release/bioc/html/vsn.html")
9664 (synopsis "Variance stabilization and calibration for microarray data")
9665 (description
9666 "The package implements a method for normalising microarray intensities,
9667 and works for single- and multiple-color arrays. It can also be used for data
9668 from other technologies, as long as they have similar format. The method uses
9669 a robust variant of the maximum-likelihood estimator for an
9670 additive-multiplicative error model and affine calibration. The model
9671 incorporates data calibration step (a.k.a. normalization), a model for the
9672 dependence of the variance on the mean intensity and a variance stabilizing
9673 data transformation. Differences between transformed intensities are
9674 analogous to \"normalized log-ratios\". However, in contrast to the latter,
9675 their variance is independent of the mean, and they are usually more sensitive
9676 and specific in detecting differential transcription.")
9677 (license license:artistic2.0)))
9678
9679 (define-public r-mzid
9680 (package
9681 (name "r-mzid")
9682 (version "1.26.0")
9683 (source
9684 (origin
9685 (method url-fetch)
9686 (uri (bioconductor-uri "mzID" version))
9687 (sha256
9688 (base32
9689 "0y50lzkdamkpz67f6r5whp246qsxpbammjil7g8vjprx0c4jk5n5"))))
9690 (properties `((upstream-name . "mzID")))
9691 (build-system r-build-system)
9692 (propagated-inputs
9693 `(("r-doparallel" ,r-doparallel)
9694 ("r-foreach" ,r-foreach)
9695 ("r-iterators" ,r-iterators)
9696 ("r-plyr" ,r-plyr)
9697 ("r-protgenerics" ,r-protgenerics)
9698 ("r-rcpp" ,r-rcpp)
9699 ("r-xml" ,r-xml)))
9700 (native-inputs
9701 `(("r-knitr" ,r-knitr)))
9702 (home-page "https://bioconductor.org/packages/mzID")
9703 (synopsis "Parser for mzIdentML files")
9704 (description
9705 "This package provides a parser for mzIdentML files implemented using the
9706 XML package. The parser tries to be general and able to handle all types of
9707 mzIdentML files with the drawback of having less pretty output than a vendor
9708 specific parser.")
9709 (license license:gpl2+)))
9710
9711 (define-public r-pcamethods
9712 (package
9713 (name "r-pcamethods")
9714 (version "1.80.0")
9715 (source
9716 (origin
9717 (method url-fetch)
9718 (uri (bioconductor-uri "pcaMethods" version))
9719 (sha256
9720 (base32
9721 "10cww4jxyynkwxbbsx804nwac31j0hh8dgisygld0q663gaxkgni"))))
9722 (properties `((upstream-name . "pcaMethods")))
9723 (build-system r-build-system)
9724 (propagated-inputs
9725 `(("r-biobase" ,r-biobase)
9726 ("r-biocgenerics" ,r-biocgenerics)
9727 ("r-mass" ,r-mass)
9728 ("r-rcpp" ,r-rcpp)))
9729 (home-page "https://github.com/hredestig/pcamethods")
9730 (synopsis "Collection of PCA methods")
9731 (description
9732 "This package provides Bayesian PCA, Probabilistic PCA, Nipals PCA,
9733 Inverse Non-Linear PCA and the conventional SVD PCA. A cluster based method
9734 for missing value estimation is included for comparison. BPCA, PPCA and
9735 NipalsPCA may be used to perform PCA on incomplete data as well as for
9736 accurate missing value estimation. A set of methods for printing and plotting
9737 the results is also provided. All PCA methods make use of the same data
9738 structure (pcaRes) to provide a common interface to the PCA results.")
9739 (license license:gpl3+)))
9740
9741 (define-public r-msnbase
9742 (package
9743 (name "r-msnbase")
9744 (version "2.14.2")
9745 (source
9746 (origin
9747 (method url-fetch)
9748 (uri (bioconductor-uri "MSnbase" version))
9749 (sha256
9750 (base32
9751 "17vlv9gh41s1hp043b7j1jfqiw52alh1misjzy1kxl0g90rld00l"))))
9752 (properties `((upstream-name . "MSnbase")))
9753 (build-system r-build-system)
9754 (propagated-inputs
9755 `(("r-affy" ,r-affy)
9756 ("r-biobase" ,r-biobase)
9757 ("r-biocgenerics" ,r-biocgenerics)
9758 ("r-biocparallel" ,r-biocparallel)
9759 ("r-digest" ,r-digest)
9760 ("r-ggplot2" ,r-ggplot2)
9761 ("r-impute" ,r-impute)
9762 ("r-iranges" ,r-iranges)
9763 ("r-lattice" ,r-lattice)
9764 ("r-maldiquant" ,r-maldiquant)
9765 ("r-mass" ,r-mass)
9766 ("r-mzid" ,r-mzid)
9767 ("r-mzr" ,r-mzr)
9768 ("r-pcamethods" ,r-pcamethods)
9769 ("r-plyr" ,r-plyr)
9770 ("r-preprocesscore" ,r-preprocesscore)
9771 ("r-protgenerics" ,r-protgenerics)
9772 ("r-rcpp" ,r-rcpp)
9773 ("r-s4vectors" ,r-s4vectors)
9774 ("r-scales" ,r-scales)
9775 ("r-vsn" ,r-vsn)
9776 ("r-xml" ,r-xml)))
9777 (native-inputs
9778 `(("r-knitr" ,r-knitr)))
9779 (home-page "https://github.com/lgatto/MSnbase")
9780 (synopsis "Base functions and classes for MS-based proteomics")
9781 (description
9782 "This package provides basic plotting, data manipulation and processing
9783 of mass spectrometry based proteomics data.")
9784 (license license:artistic2.0)))
9785
9786 (define-public r-msnid
9787 (package
9788 (name "r-msnid")
9789 (version "1.22.0")
9790 (source
9791 (origin
9792 (method url-fetch)
9793 (uri (bioconductor-uri "MSnID" version))
9794 (sha256
9795 (base32
9796 "0dwa6j2nqb3223a8g4f453aznjh69wngrpvdi12iy69j1psbbjcc"))))
9797 (properties `((upstream-name . "MSnID")))
9798 (build-system r-build-system)
9799 (propagated-inputs
9800 `(("r-biobase" ,r-biobase)
9801 ("r-data-table" ,r-data-table)
9802 ("r-doparallel" ,r-doparallel)
9803 ("r-dplyr" ,r-dplyr)
9804 ("r-foreach" ,r-foreach)
9805 ("r-iterators" ,r-iterators)
9806 ("r-msnbase" ,r-msnbase)
9807 ("r-mzid" ,r-mzid)
9808 ("r-mzr" ,r-mzr)
9809 ("r-protgenerics" ,r-protgenerics)
9810 ("r-r-cache" ,r-r-cache)
9811 ("r-rcpp" ,r-rcpp)
9812 ("r-reshape2" ,r-reshape2)))
9813 (home-page "https://bioconductor.org/packages/MSnID")
9814 (synopsis "Utilities for LC-MSn proteomics identifications")
9815 (description
9816 "This package extracts @dfn{tandem mass spectrometry} (MS/MS) ID data
9817 from mzIdentML (leveraging the mzID package) or text files. After collating
9818 the search results from multiple datasets it assesses their identification
9819 quality and optimize filtering criteria to achieve the maximum number of
9820 identifications while not exceeding a specified false discovery rate. It also
9821 contains a number of utilities to explore the MS/MS results and assess missed
9822 and irregular enzymatic cleavages, mass measurement accuracy, etc.")
9823 (license license:artistic2.0)))
9824
9825 (define-public r-seurat
9826 (package
9827 (name "r-seurat")
9828 (version "3.1.5")
9829 (source (origin
9830 (method url-fetch)
9831 (uri (cran-uri "Seurat" version))
9832 (sha256
9833 (base32
9834 "1lbq2pqhb6ih6iqawlnzdh05zff71pwbw1cpfv2sld3pd7kz0zkm"))))
9835 (properties `((upstream-name . "Seurat")))
9836 (build-system r-build-system)
9837 (propagated-inputs
9838 `(("r-ape" ,r-ape)
9839 ("r-cluster" ,r-cluster)
9840 ("r-cowplot" ,r-cowplot)
9841 ("r-fitdistrplus" ,r-fitdistrplus)
9842 ("r-future" ,r-future)
9843 ("r-future-apply" ,r-future-apply)
9844 ("r-ggplot2" ,r-ggplot2)
9845 ("r-ggrepel" ,r-ggrepel)
9846 ("r-ggridges" ,r-ggridges)
9847 ("r-httr" ,r-httr)
9848 ("r-ica" ,r-ica)
9849 ("r-igraph" ,r-igraph)
9850 ("r-irlba" ,r-irlba)
9851 ("r-kernsmooth" ,r-kernsmooth)
9852 ("r-leiden" ,r-leiden)
9853 ("r-lmtest" ,r-lmtest)
9854 ("r-mass" ,r-mass)
9855 ("r-matrix" ,r-matrix)
9856 ("r-patchwork" ,r-patchwork)
9857 ("r-pbapply" ,r-pbapply)
9858 ("r-plotly" ,r-plotly)
9859 ("r-png" ,r-png)
9860 ("r-rann" ,r-rann)
9861 ("r-rcolorbrewer" ,r-rcolorbrewer)
9862 ("r-rcpp" ,r-rcpp)
9863 ("r-rcppannoy" ,r-rcppannoy)
9864 ("r-rcppeigen" ,r-rcppeigen)
9865 ("r-rcppprogress" ,r-rcppprogress)
9866 ("r-reticulate" ,r-reticulate)
9867 ("r-rlang" ,r-rlang)
9868 ("r-rocr" ,r-rocr)
9869 ("r-rsvd" ,r-rsvd)
9870 ("r-rtsne" ,r-rtsne)
9871 ("r-scales" ,r-scales)
9872 ("r-sctransform" ,r-sctransform)
9873 ("r-tsne" ,r-tsne)
9874 ("r-uwot" ,r-uwot)))
9875 (home-page "http://www.satijalab.org/seurat")
9876 (synopsis "Seurat is an R toolkit for single cell genomics")
9877 (description
9878 "This package is an R package designed for QC, analysis, and
9879 exploration of single cell RNA-seq data. It easily enables widely-used
9880 analytical techniques, including the identification of highly variable genes,
9881 dimensionality reduction; PCA, ICA, t-SNE, standard unsupervised clustering
9882 algorithms; density clustering, hierarchical clustering, k-means, and the
9883 discovery of differentially expressed genes and markers.")
9884 (license license:gpl3)))
9885
9886 (define-public r-aroma-light
9887 (package
9888 (name "r-aroma-light")
9889 (version "3.18.0")
9890 (source
9891 (origin
9892 (method url-fetch)
9893 (uri (bioconductor-uri "aroma.light" version))
9894 (sha256
9895 (base32
9896 "19y5f2minx2pp73zdh43v1qkwpkaxygkl8cwlnwja15i46s0bcyc"))))
9897 (properties `((upstream-name . "aroma.light")))
9898 (build-system r-build-system)
9899 (propagated-inputs
9900 `(("r-matrixstats" ,r-matrixstats)
9901 ("r-r-methodss3" ,r-r-methodss3)
9902 ("r-r-oo" ,r-r-oo)
9903 ("r-r-utils" ,r-r-utils)))
9904 (home-page "https://github.com/HenrikBengtsson/aroma.light")
9905 (synopsis "Methods for normalization and visualization of microarray data")
9906 (description
9907 "This package provides methods for microarray analysis that take basic
9908 data types such as matrices and lists of vectors. These methods can be used
9909 standalone, be utilized in other packages, or be wrapped up in higher-level
9910 classes.")
9911 (license license:gpl2+)))
9912
9913 (define-public r-deseq
9914 (package
9915 (name "r-deseq")
9916 (version "1.39.0")
9917 (source
9918 (origin
9919 (method url-fetch)
9920 (uri (bioconductor-uri "DESeq" version))
9921 (sha256
9922 (base32
9923 "047hph5aqmjnz1aqprziw0smdn5lf96hmwpnvqrxv1j2yfvcf3h1"))))
9924 (properties `((upstream-name . "DESeq")))
9925 (build-system r-build-system)
9926 (propagated-inputs
9927 `(("r-biobase" ,r-biobase)
9928 ("r-biocgenerics" ,r-biocgenerics)
9929 ("r-genefilter" ,r-genefilter)
9930 ("r-geneplotter" ,r-geneplotter)
9931 ("r-lattice" ,r-lattice)
9932 ("r-locfit" ,r-locfit)
9933 ("r-mass" ,r-mass)
9934 ("r-rcolorbrewer" ,r-rcolorbrewer)))
9935 (home-page "https://www-huber.embl.de/users/anders/DESeq/")
9936 (synopsis "Differential gene expression analysis")
9937 (description
9938 "This package provides tools for estimating variance-mean dependence in
9939 count data from high-throughput genetic sequencing assays and for testing for
9940 differential expression based on a model using the negative binomial
9941 distribution.")
9942 (license license:gpl3+)))
9943
9944 (define-public r-edaseq
9945 (package
9946 (name "r-edaseq")
9947 (version "2.22.0")
9948 (source
9949 (origin
9950 (method url-fetch)
9951 (uri (bioconductor-uri "EDASeq" version))
9952 (sha256
9953 (base32
9954 "12gzxjh73qshlwvsf92lbrf4bi199kxg2snrkprh1z4yqf7bjfm4"))))
9955 (properties `((upstream-name . "EDASeq")))
9956 (build-system r-build-system)
9957 (propagated-inputs
9958 `(("r-annotationdbi" ,r-annotationdbi)
9959 ("r-aroma-light" ,r-aroma-light)
9960 ("r-biobase" ,r-biobase)
9961 ("r-biocgenerics" ,r-biocgenerics)
9962 ("r-biocmanager" ,r-biocmanager)
9963 ("r-biomart" ,r-biomart)
9964 ("r-biostrings" ,r-biostrings)
9965 ("r-deseq" ,r-deseq)
9966 ("r-genomicfeatures" ,r-genomicfeatures)
9967 ("r-genomicranges" ,r-genomicranges)
9968 ("r-iranges" ,r-iranges)
9969 ("r-rsamtools" ,r-rsamtools)
9970 ("r-shortread" ,r-shortread)))
9971 (native-inputs
9972 `(("r-knitr" ,r-knitr)))
9973 (home-page "https://github.com/drisso/EDASeq")
9974 (synopsis "Exploratory data analysis and normalization for RNA-Seq")
9975 (description
9976 "This package provides support for numerical and graphical summaries of
9977 RNA-Seq genomic read data. Provided within-lane normalization procedures to
9978 adjust for GC-content effect (or other gene-level effects) on read counts:
9979 loess robust local regression, global-scaling, and full-quantile
9980 normalization. Between-lane normalization procedures to adjust for
9981 distributional differences between lanes (e.g., sequencing depth):
9982 global-scaling and full-quantile normalization.")
9983 (license license:artistic2.0)))
9984
9985 (define-public r-interactivedisplaybase
9986 (package
9987 (name "r-interactivedisplaybase")
9988 (version "1.26.3")
9989 (source
9990 (origin
9991 (method url-fetch)
9992 (uri (bioconductor-uri "interactiveDisplayBase" version))
9993 (sha256
9994 (base32
9995 "1x5vipqa4pgwpd62c1c58shnlpv3zyzzpf4wdwr00q1swkdb7wv3"))))
9996 (properties
9997 `((upstream-name . "interactiveDisplayBase")))
9998 (build-system r-build-system)
9999 (propagated-inputs
10000 `(("r-biocgenerics" ,r-biocgenerics)
10001 ("r-dt" ,r-dt)
10002 ("r-shiny" ,r-shiny)))
10003 (native-inputs
10004 `(("r-knitr" ,r-knitr)))
10005 (home-page "https://bioconductor.org/packages/interactiveDisplayBase")
10006 (synopsis "Base package for web displays of Bioconductor objects")
10007 (description
10008 "This package contains the basic methods needed to generate interactive
10009 Shiny-based display methods for Bioconductor objects.")
10010 (license license:artistic2.0)))
10011
10012 (define-public r-annotationhub
10013 (package
10014 (name "r-annotationhub")
10015 (version "2.20.0")
10016 (source
10017 (origin
10018 (method url-fetch)
10019 (uri (bioconductor-uri "AnnotationHub" version))
10020 (sha256
10021 (base32
10022 "0r4xzf93bm9cpys5cg70wg0b8hxli80hvqwgh4hzbd45yyf5c4wz"))))
10023 (properties `((upstream-name . "AnnotationHub")))
10024 (build-system r-build-system)
10025 (propagated-inputs
10026 `(("r-annotationdbi" ,r-annotationdbi)
10027 ("r-biocfilecache" ,r-biocfilecache)
10028 ("r-biocgenerics" ,r-biocgenerics)
10029 ("r-biocmanager" ,r-biocmanager)
10030 ("r-biocversion" ,r-biocversion)
10031 ("r-curl" ,r-curl)
10032 ("r-dplyr" ,r-dplyr)
10033 ("r-httr" ,r-httr)
10034 ("r-interactivedisplaybase" ,r-interactivedisplaybase)
10035 ("r-rappdirs" ,r-rappdirs)
10036 ("r-rsqlite" ,r-rsqlite)
10037 ("r-s4vectors" ,r-s4vectors)
10038 ("r-yaml" ,r-yaml)))
10039 (native-inputs
10040 `(("r-knitr" ,r-knitr)))
10041 (home-page "https://bioconductor.org/packages/AnnotationHub")
10042 (synopsis "Client to access AnnotationHub resources")
10043 (description
10044 "This package provides a client for the Bioconductor AnnotationHub web
10045 resource. The AnnotationHub web resource provides a central location where
10046 genomic files (e.g. VCF, bed, wig) and other resources from standard
10047 locations (e.g. UCSC, Ensembl) can be discovered. The resource includes
10048 metadata about each resource, e.g., a textual description, tags, and date of
10049 modification. The client creates and manages a local cache of files retrieved
10050 by the user, helping with quick and reproducible access.")
10051 (license license:artistic2.0)))
10052
10053 (define-public r-fastseg
10054 (package
10055 (name "r-fastseg")
10056 (version "1.34.0")
10057 (source
10058 (origin
10059 (method url-fetch)
10060 (uri (bioconductor-uri "fastseg" version))
10061 (sha256
10062 (base32
10063 "1d48n245pzmvcpsz93lxb4frqh222gfhpmlvm0sb74skn16way63"))))
10064 (build-system r-build-system)
10065 (propagated-inputs
10066 `(("r-biobase" ,r-biobase)
10067 ("r-biocgenerics" ,r-biocgenerics)
10068 ("r-genomicranges" ,r-genomicranges)
10069 ("r-iranges" ,r-iranges)
10070 ("r-s4vectors" ,r-s4vectors)))
10071 (home-page "https://www.bioinf.jku.at/software/fastseg/index.html")
10072 (synopsis "Fast segmentation algorithm for genetic sequencing data")
10073 (description
10074 "Fastseg implements a very fast and efficient segmentation algorithm.
10075 It can segment data from DNA microarrays and data from next generation
10076 sequencing for example to detect copy number segments. Further it can segment
10077 data from RNA microarrays like tiling arrays to identify transcripts. Most
10078 generally, it can segment data given as a matrix or as a vector. Various data
10079 formats can be used as input to fastseg like expression set objects for
10080 microarrays or GRanges for sequencing data.")
10081 (license license:lgpl2.0+)))
10082
10083 (define-public r-keggrest
10084 (package
10085 (name "r-keggrest")
10086 (version "1.28.0")
10087 (source
10088 (origin
10089 (method url-fetch)
10090 (uri (bioconductor-uri "KEGGREST" version))
10091 (sha256
10092 (base32
10093 "0q76w17fya2x0z7mvyhkk5kqh07flldgih13ma44vhcy1bdlm6j1"))))
10094 (properties `((upstream-name . "KEGGREST")))
10095 (build-system r-build-system)
10096 (propagated-inputs
10097 `(("r-biostrings" ,r-biostrings)
10098 ("r-httr" ,r-httr)
10099 ("r-png" ,r-png)))
10100 (native-inputs
10101 `(("r-knitr" ,r-knitr)))
10102 (home-page "https://bioconductor.org/packages/KEGGREST")
10103 (synopsis "Client-side REST access to KEGG")
10104 (description
10105 "This package provides a package that provides a client interface to the
10106 @dfn{Kyoto Encyclopedia of Genes and Genomes} (KEGG) REST server.")
10107 (license license:artistic2.0)))
10108
10109 (define-public r-gage
10110 (package
10111 (name "r-gage")
10112 (version "2.37.0")
10113 (source
10114 (origin
10115 (method url-fetch)
10116 (uri (bioconductor-uri "gage" version))
10117 (sha256
10118 (base32
10119 "1zfaas4x6g7wiml6cmxa7b4f43az9s0lrw80k6sf7c96hsh1jijr"))))
10120 (build-system r-build-system)
10121 (propagated-inputs
10122 `(("r-annotationdbi" ,r-annotationdbi)
10123 ("r-graph" ,r-graph)
10124 ("r-keggrest" ,r-keggrest)))
10125 (home-page (string-append "https://bmcbioinformatics.biomedcentral.com/"
10126 "articles/10.1186/1471-2105-10-161"))
10127 (synopsis "Generally applicable gene-set enrichment for pathway analysis")
10128 (description
10129 "GAGE is a published method for gene set (enrichment or GSEA) or pathway
10130 analysis. GAGE is generally applicable independent of microarray or RNA-Seq
10131 data attributes including sample sizes, experimental designs, assay platforms,
10132 and other types of heterogeneity. The gage package provides functions for
10133 basic GAGE analysis, result processing and presentation. In addition, it
10134 provides demo microarray data and commonly used gene set data based on KEGG
10135 pathways and GO terms. These functions and data are also useful for gene set
10136 analysis using other methods.")
10137 (license license:gpl2+)))
10138
10139 (define-public r-genomicfiles
10140 (package
10141 (name "r-genomicfiles")
10142 (version "1.24.0")
10143 (source
10144 (origin
10145 (method url-fetch)
10146 (uri (bioconductor-uri "GenomicFiles" version))
10147 (sha256
10148 (base32
10149 "1k3824pzf9fdqvcv6cz2742q3mabpmncrc72hwa21ac8wy1b04n4"))))
10150 (properties `((upstream-name . "GenomicFiles")))
10151 (build-system r-build-system)
10152 (propagated-inputs
10153 `(("r-biocgenerics" ,r-biocgenerics)
10154 ("r-biocparallel" ,r-biocparallel)
10155 ("r-genomeinfodb" ,r-genomeinfodb)
10156 ("r-genomicalignments" ,r-genomicalignments)
10157 ("r-genomicranges" ,r-genomicranges)
10158 ("r-iranges" ,r-iranges)
10159 ("r-rsamtools" ,r-rsamtools)
10160 ("r-rtracklayer" ,r-rtracklayer)
10161 ("r-s4vectors" ,r-s4vectors)
10162 ("r-summarizedexperiment" ,r-summarizedexperiment)
10163 ("r-variantannotation" ,r-variantannotation)))
10164 (home-page "https://bioconductor.org/packages/GenomicFiles")
10165 (synopsis "Distributed computing by file or by range")
10166 (description
10167 "This package provides infrastructure for parallel computations
10168 distributed by file or by range. User defined mapper and reducer functions
10169 provide added flexibility for data combination and manipulation.")
10170 (license license:artistic2.0)))
10171
10172 (define-public r-complexheatmap
10173 (package
10174 (name "r-complexheatmap")
10175 (version "2.4.2")
10176 (source
10177 (origin
10178 (method url-fetch)
10179 (uri (bioconductor-uri "ComplexHeatmap" version))
10180 (sha256
10181 (base32
10182 "01jxxwxhf9n8baxgja4rb592p5210s4ppd7a5b4xby5aalhzkr0l"))))
10183 (properties
10184 `((upstream-name . "ComplexHeatmap")))
10185 (build-system r-build-system)
10186 (propagated-inputs
10187 `(("r-circlize" ,r-circlize)
10188 ("r-clue" ,r-clue)
10189 ("r-colorspace" ,r-colorspace)
10190 ("r-getoptlong" ,r-getoptlong)
10191 ("r-globaloptions" ,r-globaloptions)
10192 ("r-png" ,r-png)
10193 ("r-rcolorbrewer" ,r-rcolorbrewer)))
10194 (native-inputs
10195 `(("r-knitr" ,r-knitr)))
10196 (home-page
10197 "https://github.com/jokergoo/ComplexHeatmap")
10198 (synopsis "Making Complex Heatmaps")
10199 (description
10200 "Complex heatmaps are efficient to visualize associations between
10201 different sources of data sets and reveal potential structures. This package
10202 provides a highly flexible way to arrange multiple heatmaps and supports
10203 self-defined annotation graphics.")
10204 (license license:gpl2+)))
10205
10206 (define-public r-dirichletmultinomial
10207 (package
10208 (name "r-dirichletmultinomial")
10209 (version "1.30.0")
10210 (source
10211 (origin
10212 (method url-fetch)
10213 (uri (bioconductor-uri "DirichletMultinomial" version))
10214 (sha256
10215 (base32
10216 "1m9dsrddrllb2i88qzik1867iv9mggrgdkn0dlp8sq7gl69vmalb"))))
10217 (properties
10218 `((upstream-name . "DirichletMultinomial")))
10219 (build-system r-build-system)
10220 (inputs
10221 `(("gsl" ,gsl)))
10222 (propagated-inputs
10223 `(("r-biocgenerics" ,r-biocgenerics)
10224 ("r-iranges" ,r-iranges)
10225 ("r-s4vectors" ,r-s4vectors)))
10226 (home-page "https://bioconductor.org/packages/DirichletMultinomial")
10227 (synopsis "Dirichlet-Multinomial mixture models for microbiome data")
10228 (description
10229 "Dirichlet-multinomial mixture models can be used to describe variability
10230 in microbial metagenomic data. This package is an interface to code
10231 originally made available by Holmes, Harris, and Quince, 2012, PLoS ONE 7(2):
10232 1-15.")
10233 (license license:lgpl3)))
10234
10235 (define-public r-ensembldb
10236 (package
10237 (name "r-ensembldb")
10238 (version "2.12.1")
10239 (source
10240 (origin
10241 (method url-fetch)
10242 (uri (bioconductor-uri "ensembldb" version))
10243 (sha256
10244 (base32
10245 "1vvchc04nshxc768fp31rxb603aj3hmq8xlh5qabcwf2c3z9719g"))))
10246 (build-system r-build-system)
10247 (propagated-inputs
10248 `(("r-annotationdbi" ,r-annotationdbi)
10249 ("r-annotationfilter" ,r-annotationfilter)
10250 ("r-biobase" ,r-biobase)
10251 ("r-biocgenerics" ,r-biocgenerics)
10252 ("r-biostrings" ,r-biostrings)
10253 ("r-curl" ,r-curl)
10254 ("r-dbi" ,r-dbi)
10255 ("r-genomeinfodb" ,r-genomeinfodb)
10256 ("r-genomicfeatures" ,r-genomicfeatures)
10257 ("r-genomicranges" ,r-genomicranges)
10258 ("r-iranges" ,r-iranges)
10259 ("r-protgenerics" ,r-protgenerics)
10260 ("r-rsamtools" ,r-rsamtools)
10261 ("r-rsqlite" ,r-rsqlite)
10262 ("r-rtracklayer" ,r-rtracklayer)
10263 ("r-s4vectors" ,r-s4vectors)))
10264 (native-inputs
10265 `(("r-knitr" ,r-knitr)))
10266 (home-page "https://github.com/jotsetung/ensembldb")
10267 (synopsis "Utilities to create and use Ensembl-based annotation databases")
10268 (description
10269 "The package provides functions to create and use transcript-centric
10270 annotation databases/packages. The annotation for the databases are directly
10271 fetched from Ensembl using their Perl API. The functionality and data is
10272 similar to that of the TxDb packages from the @code{GenomicFeatures} package,
10273 but, in addition to retrieve all gene/transcript models and annotations from
10274 the database, the @code{ensembldb} package also provides a filter framework
10275 allowing to retrieve annotations for specific entries like genes encoded on a
10276 chromosome region or transcript models of lincRNA genes.")
10277 ;; No version specified
10278 (license license:lgpl3+)))
10279
10280 (define-public r-organismdbi
10281 (package
10282 (name "r-organismdbi")
10283 (version "1.30.0")
10284 (source
10285 (origin
10286 (method url-fetch)
10287 (uri (bioconductor-uri "OrganismDbi" version))
10288 (sha256
10289 (base32
10290 "194h5576inq44qr666snzq0ygnc77rk5ljkn9bn8zs6x6gb3cwaw"))))
10291 (properties `((upstream-name . "OrganismDbi")))
10292 (build-system r-build-system)
10293 (propagated-inputs
10294 `(("r-annotationdbi" ,r-annotationdbi)
10295 ("r-biobase" ,r-biobase)
10296 ("r-biocgenerics" ,r-biocgenerics)
10297 ("r-biocmanager" ,r-biocmanager)
10298 ("r-dbi" ,r-dbi)
10299 ("r-genomicfeatures" ,r-genomicfeatures)
10300 ("r-genomicranges" ,r-genomicranges)
10301 ("r-graph" ,r-graph)
10302 ("r-iranges" ,r-iranges)
10303 ("r-rbgl" ,r-rbgl)
10304 ("r-s4vectors" ,r-s4vectors)))
10305 (home-page "https://bioconductor.org/packages/OrganismDbi")
10306 (synopsis "Software to enable the smooth interfacing of database packages")
10307 (description "The package enables a simple unified interface to several
10308 annotation packages each of which has its own schema by taking advantage of
10309 the fact that each of these packages implements a select methods.")
10310 (license license:artistic2.0)))
10311
10312 (define-public r-biovizbase
10313 (package
10314 (name "r-biovizbase")
10315 (version "1.36.0")
10316 (source
10317 (origin
10318 (method url-fetch)
10319 (uri (bioconductor-uri "biovizBase" version))
10320 (sha256
10321 (base32
10322 "1vq2mxa2jkljgw75zqjdkyml0ppi5dspvwj4cznfhi31cq8ds0qh"))))
10323 (properties `((upstream-name . "biovizBase")))
10324 (build-system r-build-system)
10325 (propagated-inputs
10326 `(("r-annotationdbi" ,r-annotationdbi)
10327 ("r-annotationfilter" ,r-annotationfilter)
10328 ("r-biocgenerics" ,r-biocgenerics)
10329 ("r-biostrings" ,r-biostrings)
10330 ("r-dichromat" ,r-dichromat)
10331 ("r-ensembldb" ,r-ensembldb)
10332 ("r-genomeinfodb" ,r-genomeinfodb)
10333 ("r-genomicalignments" ,r-genomicalignments)
10334 ("r-genomicfeatures" ,r-genomicfeatures)
10335 ("r-genomicranges" ,r-genomicranges)
10336 ("r-hmisc" ,r-hmisc)
10337 ("r-iranges" ,r-iranges)
10338 ("r-rcolorbrewer" ,r-rcolorbrewer)
10339 ("r-rlang" ,r-rlang)
10340 ("r-rsamtools" ,r-rsamtools)
10341 ("r-s4vectors" ,r-s4vectors)
10342 ("r-scales" ,r-scales)
10343 ("r-summarizedexperiment" ,r-summarizedexperiment)
10344 ("r-variantannotation" ,r-variantannotation)))
10345 (home-page "https://bioconductor.org/packages/biovizBase")
10346 (synopsis "Basic graphic utilities for visualization of genomic data")
10347 (description
10348 "The biovizBase package is designed to provide a set of utilities, color
10349 schemes and conventions for genomic data. It serves as the base for various
10350 high-level packages for biological data visualization. This saves development
10351 effort and encourages consistency.")
10352 (license license:artistic2.0)))
10353
10354 (define-public r-ggbio
10355 (package
10356 (name "r-ggbio")
10357 (version "1.36.0")
10358 (source
10359 (origin
10360 (method url-fetch)
10361 (uri (bioconductor-uri "ggbio" version))
10362 (sha256
10363 (base32
10364 "11ggnqjq42fi2hm9xlvrrlr2xhy4kglvl1a0mycp1s4v67lxw5h5"))))
10365 (build-system r-build-system)
10366 (arguments
10367 `(#:phases
10368 (modify-phases %standard-phases
10369 ;; See https://github.com/tengfei/ggbio/issues/117
10370 ;; This fix will be included in the next release.
10371 (add-after 'unpack 'fix-typo
10372 (lambda _
10373 (substitute* "R/GGbio-class.R"
10374 (("fechable") "fetchable"))
10375 #t)))))
10376 (propagated-inputs
10377 `(("r-annotationdbi" ,r-annotationdbi)
10378 ("r-annotationfilter" ,r-annotationfilter)
10379 ("r-biobase" ,r-biobase)
10380 ("r-biocgenerics" ,r-biocgenerics)
10381 ("r-biostrings" ,r-biostrings)
10382 ("r-biovizbase" ,r-biovizbase)
10383 ("r-bsgenome" ,r-bsgenome)
10384 ("r-ensembldb" ,r-ensembldb)
10385 ("r-genomeinfodb" ,r-genomeinfodb)
10386 ("r-genomicalignments" ,r-genomicalignments)
10387 ("r-genomicfeatures" ,r-genomicfeatures)
10388 ("r-genomicranges" ,r-genomicranges)
10389 ("r-ggally" ,r-ggally)
10390 ("r-ggplot2" ,r-ggplot2)
10391 ("r-gridextra" ,r-gridextra)
10392 ("r-gtable" ,r-gtable)
10393 ("r-hmisc" ,r-hmisc)
10394 ("r-iranges" ,r-iranges)
10395 ("r-organismdbi" ,r-organismdbi)
10396 ("r-reshape2" ,r-reshape2)
10397 ("r-rlang" ,r-rlang)
10398 ("r-rsamtools" ,r-rsamtools)
10399 ("r-rtracklayer" ,r-rtracklayer)
10400 ("r-s4vectors" ,r-s4vectors)
10401 ("r-scales" ,r-scales)
10402 ("r-summarizedexperiment" ,r-summarizedexperiment)
10403 ("r-variantannotation" ,r-variantannotation)))
10404 (native-inputs
10405 `(("r-knitr" ,r-knitr)))
10406 (home-page "http://www.tengfei.name/ggbio/")
10407 (synopsis "Visualization tools for genomic data")
10408 (description
10409 "The ggbio package extends and specializes the grammar of graphics for
10410 biological data. The graphics are designed to answer common scientific
10411 questions, in particular those often asked of high throughput genomics data.
10412 All core Bioconductor data structures are supported, where appropriate. The
10413 package supports detailed views of particular genomic regions, as well as
10414 genome-wide overviews. Supported overviews include ideograms and grand linear
10415 views. High-level plots include sequence fragment length, edge-linked
10416 interval to data view, mismatch pileup, and several splicing summaries.")
10417 (license license:artistic2.0)))
10418
10419 (define-public r-gqtlbase
10420 (package
10421 (name "r-gqtlbase")
10422 (version "1.20.0")
10423 (source
10424 (origin
10425 (method url-fetch)
10426 (uri (bioconductor-uri "gQTLBase" version))
10427 (sha256
10428 (base32
10429 "06xvzp4fn3qfa46ggg8kxi267gbyd821vvx4040173xkqxpr0g5j"))))
10430 (properties `((upstream-name . "gQTLBase")))
10431 (build-system r-build-system)
10432 (propagated-inputs
10433 `(("r-batchjobs" ,r-batchjobs)
10434 ("r-bbmisc" ,r-bbmisc)
10435 ("r-biocgenerics" ,r-biocgenerics)
10436 ("r-bit" ,r-bit)
10437 ("r-doparallel" ,r-doparallel)
10438 ("r-ff" ,r-ff)
10439 ("r-ffbase" ,r-ffbase)
10440 ("r-foreach" ,r-foreach)
10441 ("r-genomicfiles" ,r-genomicfiles)
10442 ("r-genomicranges" ,r-genomicranges)
10443 ("r-rtracklayer" ,r-rtracklayer)
10444 ("r-s4vectors" ,r-s4vectors)
10445 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10446 (native-inputs
10447 `(("r-knitr" ,r-knitr)))
10448 (home-page "https://bioconductor.org/packages/gQTLBase")
10449 (synopsis "Infrastructure for eQTL, mQTL and similar studies")
10450 (description
10451 "The purpose of this package is to simplify the storage and interrogation
10452 of @dfn{quantitative trait loci} (QTL) archives, such as eQTL, mQTL, dsQTL,
10453 and more.")
10454 (license license:artistic2.0)))
10455
10456 (define-public r-snpstats
10457 (package
10458 (name "r-snpstats")
10459 (version "1.38.0")
10460 (source
10461 (origin
10462 (method url-fetch)
10463 (uri (bioconductor-uri "snpStats" version))
10464 (sha256
10465 (base32
10466 "1qv3nqqr30d3n66mawqd9dbl95dl89r4bcjvkc5iassy1yrwr8wq"))))
10467 (properties `((upstream-name . "snpStats")))
10468 (build-system r-build-system)
10469 (inputs `(("zlib" ,zlib)))
10470 (propagated-inputs
10471 `(("r-biocgenerics" ,r-biocgenerics)
10472 ("r-matrix" ,r-matrix)
10473 ("r-survival" ,r-survival)
10474 ("r-zlibbioc" ,r-zlibbioc)))
10475 (home-page "https://bioconductor.org/packages/snpStats")
10476 (synopsis "Methods for SNP association studies")
10477 (description
10478 "This package provides classes and statistical methods for large
10479 @dfn{single-nucleotide polymorphism} (SNP) association studies. This extends
10480 the earlier snpMatrix package, allowing for uncertainty in genotypes.")
10481 (license license:gpl3)))
10482
10483 (define-public r-homo-sapiens
10484 (package
10485 (name "r-homo-sapiens")
10486 (version "1.3.1")
10487 (source (origin
10488 (method url-fetch)
10489 ;; We cannot use bioconductor-uri here because this tarball is
10490 ;; located under "data/annotation/" instead of "bioc/".
10491 (uri (string-append "https://www.bioconductor.org/packages/"
10492 "release/data/annotation/src/contrib/"
10493 "Homo.sapiens_"
10494 version ".tar.gz"))
10495 (sha256
10496 (base32
10497 "151vj7h5p1c8yd5swrchk46z469p135wk50hvkl0nhgndvy0jj01"))))
10498 (properties
10499 `((upstream-name . "Homo.sapiens")))
10500 (build-system r-build-system)
10501 (propagated-inputs
10502 `(("r-genomicfeatures" ,r-genomicfeatures)
10503 ("r-go-db" ,r-go-db)
10504 ("r-org-hs-eg-db" ,r-org-hs-eg-db)
10505 ("r-txdb-hsapiens-ucsc-hg19-knowngene" ,r-txdb-hsapiens-ucsc-hg19-knowngene)
10506 ("r-organismdbi" ,r-organismdbi)
10507 ("r-annotationdbi" ,r-annotationdbi)))
10508 (home-page "https://bioconductor.org/packages/Homo.sapiens/")
10509 (synopsis "Annotation package for the Homo.sapiens object")
10510 (description
10511 "This package contains the Homo.sapiens object to access data from
10512 several related annotation packages.")
10513 (license license:artistic2.0)))
10514
10515 (define-public r-erma
10516 (package
10517 (name "r-erma")
10518 (version "1.4.0")
10519 (source
10520 (origin
10521 (method url-fetch)
10522 (uri (bioconductor-uri "erma" version))
10523 (sha256
10524 (base32
10525 "1ccfbq0r48sr3h8050w8zv8402h7nx09adr0xdyqlg7kwp9vd2l3"))))
10526 (build-system r-build-system)
10527 (propagated-inputs
10528 `(("r-annotationdbi" ,r-annotationdbi)
10529 ("r-biobase" ,r-biobase)
10530 ("r-biocgenerics" ,r-biocgenerics)
10531 ("r-biocparallel" ,r-biocparallel)
10532 ("r-genomeinfodb" ,r-genomeinfodb)
10533 ("r-genomicfiles" ,r-genomicfiles)
10534 ("r-genomicranges" ,r-genomicranges)
10535 ("r-ggplot2" ,r-ggplot2)
10536 ("r-homo-sapiens" ,r-homo-sapiens)
10537 ("r-iranges" ,r-iranges)
10538 ("r-rtracklayer" ,r-rtracklayer)
10539 ("r-s4vectors" ,r-s4vectors)
10540 ("r-shiny" ,r-shiny)
10541 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10542 (native-inputs
10543 `(("r-knitr" ,r-knitr)))
10544 (home-page "https://bioconductor.org/packages/erma")
10545 (synopsis "Epigenomic road map adventures")
10546 (description
10547 "The epigenomics road map describes locations of epigenetic marks in DNA
10548 from a variety of cell types. Of interest are locations of histone
10549 modifications, sites of DNA methylation, and regions of accessible chromatin.
10550 This package presents a selection of elements of the road map including
10551 metadata and outputs of the ChromImpute procedure applied to ENCODE cell lines
10552 by Ernst and Kellis.")
10553 (license license:artistic2.0)))
10554
10555 (define-public r-ldblock
10556 (package
10557 (name "r-ldblock")
10558 (version "1.18.0")
10559 (source
10560 (origin
10561 (method url-fetch)
10562 (uri (bioconductor-uri "ldblock" version))
10563 (sha256
10564 (base32
10565 "0plw00n2zfgh029ab41dnydzgv2yxrapjp770147rx9pff4dngrv"))))
10566 (build-system r-build-system)
10567 (propagated-inputs
10568 `(("r-biocgenerics" ,r-biocgenerics)
10569 ("r-ensdb-hsapiens-v75" ,r-ensdb-hsapiens-v75)
10570 ("r-ensembldb" ,r-ensembldb)
10571 ("r-genomeinfodb" ,r-genomeinfodb)
10572 ("r-genomicfiles" ,r-genomicfiles)
10573 ("r-httr" ,r-httr)
10574 ("r-matrix" ,r-matrix)
10575 ("r-rsamtools" ,r-rsamtools)
10576 ("r-snpstats" ,r-snpstats)
10577 ("r-variantannotation" ,r-variantannotation)))
10578 (native-inputs
10579 `(("r-knitr" ,r-knitr)))
10580 (home-page "https://bioconductor.org/packages/ldblock")
10581 (synopsis "Data structures for linkage disequilibrium measures in populations")
10582 (description
10583 "This package defines data structures for @dfn{linkage
10584 disequilibrium} (LD) measures in populations. Its purpose is to simplify
10585 handling of existing population-level data for the purpose of flexibly
10586 defining LD blocks.")
10587 (license license:artistic2.0)))
10588
10589 (define-public r-gqtlstats
10590 (package
10591 (name "r-gqtlstats")
10592 (version "1.20.0")
10593 (source
10594 (origin
10595 (method url-fetch)
10596 (uri (bioconductor-uri "gQTLstats" version))
10597 (sha256
10598 (base32
10599 "1jjqfpjp93nmxjn757j5mzcax96bzcqdd1gr3rsdxg7ap008l2w7"))))
10600 (properties `((upstream-name . "gQTLstats")))
10601 (build-system r-build-system)
10602 (propagated-inputs
10603 `(("r-annotationdbi" ,r-annotationdbi)
10604 ("r-batchjobs" ,r-batchjobs)
10605 ("r-bbmisc" ,r-bbmisc)
10606 ("r-beeswarm" ,r-beeswarm)
10607 ("r-biobase" ,r-biobase)
10608 ("r-biocgenerics" ,r-biocgenerics)
10609 ("r-doparallel" ,r-doparallel)
10610 ("r-dplyr" ,r-dplyr)
10611 ("r-erma" ,r-erma)
10612 ("r-ffbase" ,r-ffbase)
10613 ("r-foreach" ,r-foreach)
10614 ("r-genomeinfodb" ,r-genomeinfodb)
10615 ("r-genomicfeatures" ,r-genomicfeatures)
10616 ("r-genomicfiles" ,r-genomicfiles)
10617 ("r-genomicranges" ,r-genomicranges)
10618 ("r-ggbeeswarm" ,r-ggbeeswarm)
10619 ("r-ggplot2" ,r-ggplot2)
10620 ("r-gqtlbase" ,r-gqtlbase)
10621 ("r-hardyweinberg" ,r-hardyweinberg)
10622 ("r-homo-sapiens" ,r-homo-sapiens)
10623 ("r-iranges" ,r-iranges)
10624 ("r-limma" ,r-limma)
10625 ("r-mgcv" ,r-mgcv)
10626 ("r-plotly" ,r-plotly)
10627 ("r-reshape2" ,r-reshape2)
10628 ("r-s4vectors" ,r-s4vectors)
10629 ("r-shiny" ,r-shiny)
10630 ("r-snpstats" ,r-snpstats)
10631 ("r-summarizedexperiment" ,r-summarizedexperiment)
10632 ("r-variantannotation" ,r-variantannotation)))
10633 (native-inputs
10634 `(("r-knitr" ,r-knitr)))
10635 (home-page "https://bioconductor.org/packages/gQTLstats")
10636 (synopsis "Computationally efficient analysis for eQTL and allied studies")
10637 (description
10638 "This package provides tools for the computationally efficient analysis
10639 of @dfn{quantitative trait loci} (QTL) data, including eQTL, mQTL, dsQTL, etc.
10640 The software in this package aims to support refinements and functional
10641 interpretation of members of a collection of association statistics on a
10642 family of feature/genome hypotheses.")
10643 (license license:artistic2.0)))
10644
10645 (define-public r-gviz
10646 (package
10647 (name "r-gviz")
10648 (version "1.32.0")
10649 (source
10650 (origin
10651 (method url-fetch)
10652 (uri (bioconductor-uri "Gviz" version))
10653 (sha256
10654 (base32
10655 "0cgkp0ciyy2qykqgh3vzp5mx9b4vsvacjh2jnsj3wldiapzlz08a"))))
10656 (properties `((upstream-name . "Gviz")))
10657 (build-system r-build-system)
10658 (propagated-inputs
10659 `(("r-annotationdbi" ,r-annotationdbi)
10660 ("r-biobase" ,r-biobase)
10661 ("r-biocgenerics" ,r-biocgenerics)
10662 ("r-biomart" ,r-biomart)
10663 ("r-biostrings" ,r-biostrings)
10664 ("r-biovizbase" ,r-biovizbase)
10665 ("r-bsgenome" ,r-bsgenome)
10666 ("r-digest" ,r-digest)
10667 ("r-ensembldb" ,r-ensembldb)
10668 ("r-genomeinfodb" ,r-genomeinfodb)
10669 ("r-genomicalignments" ,r-genomicalignments)
10670 ("r-genomicfeatures" ,r-genomicfeatures)
10671 ("r-genomicranges" ,r-genomicranges)
10672 ("r-iranges" ,r-iranges)
10673 ("r-lattice" ,r-lattice)
10674 ("r-latticeextra" ,r-latticeextra)
10675 ("r-matrixstats" ,r-matrixstats)
10676 ("r-rcolorbrewer" ,r-rcolorbrewer)
10677 ("r-rsamtools" ,r-rsamtools)
10678 ("r-rtracklayer" ,r-rtracklayer)
10679 ("r-s4vectors" ,r-s4vectors)
10680 ("r-xvector" ,r-xvector)))
10681 (native-inputs
10682 `(("r-knitr" ,r-knitr)))
10683 (home-page "https://bioconductor.org/packages/Gviz")
10684 (synopsis "Plotting data and annotation information along genomic coordinates")
10685 (description
10686 "Genomic data analyses requires integrated visualization of known genomic
10687 information and new experimental data. Gviz uses the biomaRt and the
10688 rtracklayer packages to perform live annotation queries to Ensembl and UCSC
10689 and translates this to e.g. gene/transcript structures in viewports of the
10690 grid graphics package. This results in genomic information plotted together
10691 with your data.")
10692 (license license:artistic2.0)))
10693
10694 (define-public r-gwascat
10695 (package
10696 (name "r-gwascat")
10697 (version "2.20.1")
10698 (source
10699 (origin
10700 (method url-fetch)
10701 (uri (bioconductor-uri "gwascat" version))
10702 (sha256
10703 (base32
10704 "1cq5cmdrf0a0arr841yvkh6d8drc15p7mif1afr215l1s3y2dwd4"))))
10705 (build-system r-build-system)
10706 (propagated-inputs
10707 `(("r-annotationdbi" ,r-annotationdbi)
10708 ("r-biocgenerics" ,r-biocgenerics)
10709 ("r-biostrings" ,r-biostrings)
10710 ("r-genomeinfodb" ,r-genomeinfodb)
10711 ("r-genomicfeatures" ,r-genomicfeatures)
10712 ("r-genomicranges" ,r-genomicranges)
10713 ("r-ggplot2" ,r-ggplot2)
10714 ("r-iranges" ,r-iranges)
10715 ("r-rsamtools" ,r-rsamtools)
10716 ("r-rtracklayer" ,r-rtracklayer)
10717 ("r-s4vectors" ,r-s4vectors)))
10718 (native-inputs
10719 `(("r-knitr" ,r-knitr)))
10720 (home-page "https://bioconductor.org/packages/gwascat")
10721 (synopsis "Tools for data in the EMBL-EBI GWAS catalog")
10722 (description
10723 "This package provides tools for representing and modeling data in the
10724 EMBL-EBI GWAS catalog.")
10725 (license license:artistic2.0)))
10726
10727 (define-public r-sushi
10728 (package
10729 (name "r-sushi")
10730 (version "1.26.0")
10731 (source (origin
10732 (method url-fetch)
10733 (uri (bioconductor-uri "Sushi" version))
10734 (sha256
10735 (base32
10736 "17j3d5qjq5nbv99by5mq8rwr0jgh2jyyfn2nwxmwgzlmk3lgi1rb"))))
10737 (properties `((upstream-name . "Sushi")))
10738 (build-system r-build-system)
10739 (propagated-inputs
10740 `(("r-biomart" ,r-biomart)
10741 ("r-zoo" ,r-zoo)))
10742 (home-page "https://bioconductor.org/packages/Sushi")
10743 (synopsis "Tools for visualizing genomics data")
10744 (description
10745 "This package provides flexible, quantitative, and integrative genomic
10746 visualizations for publication-quality multi-panel figures.")
10747 (license license:gpl2+)))
10748
10749 (define-public r-fithic
10750 (package
10751 (name "r-fithic")
10752 (version "1.14.0")
10753 (source (origin
10754 (method url-fetch)
10755 (uri (bioconductor-uri "FitHiC" version))
10756 (sha256
10757 (base32
10758 "1dffkdxm08wq4kjd9j2v2625x3p6vbrk33a2zx94pwpgkghr72yp"))))
10759 (properties `((upstream-name . "FitHiC")))
10760 (build-system r-build-system)
10761 (propagated-inputs
10762 `(("r-data-table" ,r-data-table)
10763 ("r-fdrtool" ,r-fdrtool)
10764 ("r-rcpp" ,r-rcpp)))
10765 (native-inputs
10766 `(("r-knitr" ,r-knitr)))
10767 (home-page "https://bioconductor.org/packages/FitHiC")
10768 (synopsis "Confidence estimation for intra-chromosomal contact maps")
10769 (description
10770 "Fit-Hi-C is a tool for assigning statistical confidence estimates to
10771 intra-chromosomal contact maps produced by genome-wide genome architecture
10772 assays such as Hi-C.")
10773 (license license:gpl2+)))
10774
10775 (define-public r-hitc
10776 (package
10777 (name "r-hitc")
10778 (version "1.32.0")
10779 (source (origin
10780 (method url-fetch)
10781 (uri (bioconductor-uri "HiTC" version))
10782 (sha256
10783 (base32
10784 "1jx2pfa7sbdz7xi466lz1h5xv126g56z73n0a5l2wrq28k47qaxy"))))
10785 (properties `((upstream-name . "HiTC")))
10786 (build-system r-build-system)
10787 (propagated-inputs
10788 `(("r-biostrings" ,r-biostrings)
10789 ("r-genomeinfodb" ,r-genomeinfodb)
10790 ("r-genomicranges" ,r-genomicranges)
10791 ("r-iranges" ,r-iranges)
10792 ("r-matrix" ,r-matrix)
10793 ("r-rcolorbrewer" ,r-rcolorbrewer)
10794 ("r-rtracklayer" ,r-rtracklayer)))
10795 (home-page "https://bioconductor.org/packages/HiTC")
10796 (synopsis "High throughput chromosome conformation capture analysis")
10797 (description
10798 "The HiTC package was developed to explore high-throughput \"C\" data
10799 such as 5C or Hi-C. Dedicated R classes as well as standard methods for
10800 quality controls, normalization, visualization, and further analysis are also
10801 provided.")
10802 (license license:artistic2.0)))
10803
10804 (define-public r-hdf5array
10805 (package
10806 (name "r-hdf5array")
10807 (version "1.16.1")
10808 (source
10809 (origin
10810 (method url-fetch)
10811 (uri (bioconductor-uri "HDF5Array" version))
10812 (sha256
10813 (base32
10814 "01767v90nl0499jcicpxngbbs0af5p9c5aasi5va01w3v5bnqddn"))))
10815 (properties `((upstream-name . "HDF5Array")))
10816 (build-system r-build-system)
10817 (inputs
10818 `(("zlib" ,zlib)))
10819 (propagated-inputs
10820 `(("r-biocgenerics" ,r-biocgenerics)
10821 ("r-delayedarray" ,r-delayedarray)
10822 ("r-iranges" ,r-iranges)
10823 ("r-matrix" ,r-matrix)
10824 ("r-rhdf5" ,r-rhdf5)
10825 ("r-rhdf5lib" ,r-rhdf5lib)
10826 ("r-s4vectors" ,r-s4vectors)))
10827 (home-page "https://bioconductor.org/packages/HDF5Array")
10828 (synopsis "HDF5 back end for DelayedArray objects")
10829 (description "This package provides an array-like container for convenient
10830 access and manipulation of HDF5 datasets. It supports delayed operations and
10831 block processing.")
10832 (license license:artistic2.0)))
10833
10834 (define-public r-rhdf5lib
10835 (package
10836 (name "r-rhdf5lib")
10837 (version "1.10.1")
10838 (source
10839 (origin
10840 (method url-fetch)
10841 (uri (bioconductor-uri "Rhdf5lib" version))
10842 (sha256
10843 (base32
10844 "0f45sqrvzj6x4mckalyp8366hm8v0rrmzklx3xd4gs6l2wallcn9"))
10845 (modules '((guix build utils)))
10846 (snippet
10847 '(begin
10848 ;; Delete bundled binaries
10849 (delete-file-recursively "src/wininclude/")
10850 (delete-file-recursively "src/winlib-4.9.3/")
10851 (delete-file-recursively "src/winlib-8.3.0/")
10852 (delete-file "src/hdf5small_cxx_hl_1.10.6.tar.gz")
10853 #t))))
10854 (properties `((upstream-name . "Rhdf5lib")))
10855 (build-system r-build-system)
10856 (arguments
10857 `(#:phases
10858 (modify-phases %standard-phases
10859 (add-after 'unpack 'do-not-use-bundled-hdf5
10860 (lambda* (#:key inputs #:allow-other-keys)
10861 (for-each delete-file '("configure" "configure.ac"))
10862 ;; Do not make other packages link with the proprietary libsz.
10863 (substitute* "R/zzz.R"
10864 ((" \"%s/libsz.a\"") ""))
10865 (with-directory-excursion "src"
10866 (invoke "tar" "xvf" (assoc-ref inputs "hdf5-source"))
10867 (rename-file (string-append "hdf5-" ,(package-version hdf5-1.10))
10868 "hdf5")
10869 ;; Remove timestamp and host system information to make
10870 ;; the build reproducible.
10871 (substitute* "hdf5/src/libhdf5.settings.in"
10872 (("Configured on: @CONFIG_DATE@")
10873 "Configured on: Guix")
10874 (("Uname information:.*")
10875 "Uname information: Linux\n")
10876 ;; Remove unnecessary store reference.
10877 (("C Compiler:.*")
10878 "C Compiler: GCC\n"))
10879 (rename-file "Makevars.in" "Makevars")
10880 (substitute* "Makevars"
10881 (("@ZLIB_LIB@") "-lz")
10882 (("@ZLIB_INCLUDE@") "")
10883 (("HDF5_CXX_LIB=.*")
10884 (string-append "HDF5_CXX_LIB="
10885 (assoc-ref inputs "hdf5") "/lib/libhdf5_cpp.a\n"))
10886 (("HDF5_LIB=.*")
10887 (string-append "HDF5_LIB="
10888 (assoc-ref inputs "hdf5") "/lib/libhdf5.a\n"))
10889 (("HDF5_CXX_INCLUDE=.*") "HDF5_CXX_INCLUDE=./hdf5/c++/src\n")
10890 (("HDF5_INCLUDE=.*") "HDF5_INCLUDE=./hdf5/src\n")
10891 (("HDF5_HL_INCLUDE=.*") "HDF5_HL_INCLUDE=./hdf5/hl/src\n")
10892 (("HDF5_HL_CXX_INCLUDE=.*") "HDF5_HL_CXX_INCLUDE=./hdf5/hl/c++/src\n")
10893 (("HDF5_HL_LIB=.*")
10894 (string-append "HDF5_HL_LIB="
10895 (assoc-ref inputs "hdf5") "/lib/libhdf5_hl.a\n"))
10896 (("HDF5_HL_CXX_LIB=.*")
10897 (string-append "HDF5_HL_CXX_LIB="
10898 (assoc-ref inputs "hdf5") "/lib/libhdf5_hl_cpp.a\n"))
10899 ;; szip is non-free software
10900 (("cp \"\\$\\{SZIP_LIB\\}.*") "")
10901 (("PKG_LIBS =.*") "PKG_LIBS = -lz -lhdf5\n")))
10902 #t)))))
10903 (inputs
10904 `(("zlib" ,zlib)))
10905 (propagated-inputs
10906 `(("hdf5" ,hdf5-1.10)))
10907 (native-inputs
10908 `(("hdf5-source" ,(package-source hdf5-1.10))
10909 ("r-knitr" ,r-knitr)))
10910 (home-page "https://bioconductor.org/packages/Rhdf5lib")
10911 (synopsis "HDF5 library as an R package")
10912 (description "This package provides C and C++ HDF5 libraries for use in R
10913 packages.")
10914 (license license:artistic2.0)))
10915
10916 (define-public r-beachmat
10917 (package
10918 (name "r-beachmat")
10919 (version "2.4.0")
10920 (source
10921 (origin
10922 (method url-fetch)
10923 (uri (bioconductor-uri "beachmat" version))
10924 (sha256
10925 (base32
10926 "1vl6jbf9ia78cm4ikdb8vz04jv4b46zhvg5i006c63a9pzw7zhxi"))))
10927 (build-system r-build-system)
10928 (propagated-inputs
10929 `(("r-biocgenerics" ,r-biocgenerics)
10930 ("r-delayedarray" ,r-delayedarray)
10931 ("r-matrix" ,r-matrix)))
10932 (native-inputs
10933 `(("r-knitr" ,r-knitr)))
10934 (home-page "https://bioconductor.org/packages/beachmat")
10935 (synopsis "Compiling Bioconductor to handle each matrix type")
10936 (description "This package provides a consistent C++ class interface for a
10937 variety of commonly used matrix types, including sparse and HDF5-backed
10938 matrices.")
10939 (license license:gpl3)))
10940
10941 (define-public r-singlecellexperiment
10942 (package
10943 (name "r-singlecellexperiment")
10944 (version "1.10.1")
10945 (source
10946 (origin
10947 (method url-fetch)
10948 (uri (bioconductor-uri "SingleCellExperiment" version))
10949 (sha256
10950 (base32
10951 "092wvk11n7pa234vlwhxm3gdi4k3sbnz1splhxalbdhz3jf02zfp"))))
10952 (properties
10953 `((upstream-name . "SingleCellExperiment")))
10954 (build-system r-build-system)
10955 (propagated-inputs
10956 `(("r-biocgenerics" ,r-biocgenerics)
10957 ("r-s4vectors" ,r-s4vectors)
10958 ("r-summarizedexperiment" ,r-summarizedexperiment)))
10959 (native-inputs
10960 `(("r-knitr" ,r-knitr)))
10961 (home-page "https://bioconductor.org/packages/SingleCellExperiment")
10962 (synopsis "S4 classes for single cell data")
10963 (description "This package defines an S4 class for storing data from
10964 single-cell experiments. This includes specialized methods to store and
10965 retrieve spike-in information, dimensionality reduction coordinates and size
10966 factors for each cell, along with the usual metadata for genes and
10967 libraries.")
10968 (license license:gpl3)))
10969
10970 (define-public r-scater
10971 (package
10972 (name "r-scater")
10973 (version "1.16.2")
10974 (source (origin
10975 (method url-fetch)
10976 (uri (bioconductor-uri "scater" version))
10977 (sha256
10978 (base32
10979 "1pa5wvgjb30rw1vsjwbnn07ss3sc5n8ck5d7khdby4r2s9177s33"))))
10980 (build-system r-build-system)
10981 (propagated-inputs
10982 `(("r-beachmat" ,r-beachmat)
10983 ("r-biocgenerics" ,r-biocgenerics)
10984 ("r-biocneighbors" ,r-biocneighbors)
10985 ("r-biocparallel" ,r-biocparallel)
10986 ("r-biocsingular" ,r-biocsingular)
10987 ("r-delayedarray" ,r-delayedarray)
10988 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
10989 ("r-ggbeeswarm" ,r-ggbeeswarm)
10990 ("r-ggplot2" ,r-ggplot2)
10991 ("r-matrix" ,r-matrix)
10992 ("r-rcpp" ,r-rcpp)
10993 ("r-rlang" ,r-rlang)
10994 ("r-s4vectors" ,r-s4vectors)
10995 ("r-singlecellexperiment" ,r-singlecellexperiment)
10996 ("r-summarizedexperiment" ,r-summarizedexperiment)
10997 ("r-viridis" ,r-viridis)))
10998 (native-inputs
10999 `(("r-knitr" ,r-knitr)))
11000 (home-page "https://github.com/davismcc/scater")
11001 (synopsis "Single-cell analysis toolkit for gene expression data in R")
11002 (description "This package provides a collection of tools for doing
11003 various analyses of single-cell RNA-seq gene expression data, with a focus on
11004 quality control.")
11005 (license license:gpl2+)))
11006
11007 (define-public r-scran
11008 (package
11009 (name "r-scran")
11010 (version "1.16.0")
11011 (source
11012 (origin
11013 (method url-fetch)
11014 (uri (bioconductor-uri "scran" version))
11015 (sha256
11016 (base32
11017 "1gm4ys4aq8h1pn45k1rxk384wjyf55izivw8kgxbrflj6j4xvvsv"))))
11018 (build-system r-build-system)
11019 (propagated-inputs
11020 `(("r-beachmat" ,r-beachmat)
11021 ("r-bh" ,r-bh)
11022 ("r-biocgenerics" ,r-biocgenerics)
11023 ("r-biocneighbors" ,r-biocneighbors)
11024 ("r-biocparallel" ,r-biocparallel)
11025 ("r-biocsingular" ,r-biocsingular)
11026 ("r-delayedarray" ,r-delayedarray)
11027 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
11028 ("r-dqrng" ,r-dqrng)
11029 ("r-edger" ,r-edger)
11030 ("r-igraph" ,r-igraph)
11031 ("r-iranges" ,r-iranges)
11032 ("r-limma" ,r-limma)
11033 ("r-matrix" ,r-matrix)
11034 ("r-rcpp" ,r-rcpp)
11035 ("r-s4vectors" ,r-s4vectors)
11036 ("r-scater" ,r-scater)
11037 ("r-singlecellexperiment" ,r-singlecellexperiment)
11038 ("r-statmod" ,r-statmod)
11039 ("r-summarizedexperiment" ,r-summarizedexperiment)))
11040 (native-inputs
11041 `(("r-knitr" ,r-knitr)))
11042 (home-page "https://bioconductor.org/packages/scran")
11043 (synopsis "Methods for single-cell RNA-Seq data analysis")
11044 (description "This package implements a variety of low-level analyses of
11045 single-cell RNA-seq data. Methods are provided for normalization of
11046 cell-specific biases, assignment of cell cycle phase, and detection of highly
11047 variable and significantly correlated genes.")
11048 (license license:gpl3)))
11049
11050 (define-public r-delayedmatrixstats
11051 (package
11052 (name "r-delayedmatrixstats")
11053 (version "1.10.1")
11054 (source
11055 (origin
11056 (method url-fetch)
11057 (uri (bioconductor-uri "DelayedMatrixStats" version))
11058 (sha256
11059 (base32
11060 "046sam0rz42ph0m7jz7v3bck7d3h2mp45gzywh5dvc1qkjq6fdxx"))))
11061 (properties
11062 `((upstream-name . "DelayedMatrixStats")))
11063 (build-system r-build-system)
11064 (propagated-inputs
11065 `(("r-biocparallel" ,r-biocparallel)
11066 ("r-delayedarray" ,r-delayedarray)
11067 ("r-hdf5array" ,r-hdf5array)
11068 ("r-iranges" ,r-iranges)
11069 ("r-matrix" ,r-matrix)
11070 ("r-matrixstats" ,r-matrixstats)
11071 ("r-s4vectors" ,r-s4vectors)))
11072 (native-inputs
11073 `(("r-knitr" ,r-knitr)))
11074 (home-page "https://github.com/PeteHaitch/DelayedMatrixStats")
11075 (synopsis "Functions that apply to rows and columns of DelayedMatrix objects")
11076 (description
11077 "This package provides a port of the @code{matrixStats} API for use with
11078 @code{DelayedMatrix} objects from the @code{DelayedArray} package. It
11079 contains high-performing functions operating on rows and columns of
11080 @code{DelayedMatrix} objects, e.g. @code{colMedians}, @code{rowMedians},
11081 @code{colRanks}, @code{rowRanks}, @code{colSds}, and @code{rowSds}. Functions
11082 are optimized per data type and for subsetted calculations such that both
11083 memory usage and processing time is minimized.")
11084 (license license:expat)))
11085
11086 (define-public r-phangorn
11087 (package
11088 (name "r-phangorn")
11089 (version "2.5.5")
11090 (source
11091 (origin
11092 (method url-fetch)
11093 (uri (cran-uri "phangorn" version))
11094 (sha256
11095 (base32
11096 "0ihkaykqjmf80d8wrk3saphxvnv58zma6pd13633bd3cwanc33f5"))))
11097 (build-system r-build-system)
11098 (propagated-inputs
11099 `(("r-ape" ,r-ape)
11100 ("r-fastmatch" ,r-fastmatch)
11101 ("r-igraph" ,r-igraph)
11102 ("r-magrittr" ,r-magrittr)
11103 ("r-matrix" ,r-matrix)
11104 ("r-quadprog" ,r-quadprog)
11105 ("r-rcpp" ,r-rcpp)))
11106 (home-page "https://github.com/KlausVigo/phangorn")
11107 (synopsis "Phylogenetic analysis in R")
11108 (description
11109 "Phangorn is a package for phylogenetic analysis in R. It supports
11110 estimation of phylogenetic trees and networks using Maximum Likelihood,
11111 Maximum Parsimony, distance methods and Hadamard conjugation.")
11112 (license license:gpl2+)))
11113
11114 (define-public r-dropbead
11115 (let ((commit "d746c6f3b32110428ea56d6a0001ce52a251c247")
11116 (revision "2"))
11117 (package
11118 (name "r-dropbead")
11119 (version (string-append "0-" revision "." (string-take commit 7)))
11120 (source
11121 (origin
11122 (method git-fetch)
11123 (uri (git-reference
11124 (url "https://github.com/rajewsky-lab/dropbead")
11125 (commit commit)))
11126 (file-name (git-file-name name version))
11127 (sha256
11128 (base32
11129 "0sbzma49aiiyw8b0jpr7fnhzys9nsqmp4hy4hdz1gzyg1lhnca26"))))
11130 (build-system r-build-system)
11131 (propagated-inputs
11132 `(("r-ggplot2" ,r-ggplot2)
11133 ("r-rcolorbrewer" ,r-rcolorbrewer)
11134 ("r-gridextra" ,r-gridextra)
11135 ("r-gplots" ,r-gplots)
11136 ("r-plyr" ,r-plyr)))
11137 (home-page "https://github.com/rajewsky-lab/dropbead")
11138 (synopsis "Basic exploration and analysis of Drop-seq data")
11139 (description "This package offers a quick and straight-forward way to
11140 explore and perform basic analysis of single cell sequencing data coming from
11141 droplet sequencing. It has been particularly tailored for Drop-seq.")
11142 (license license:gpl3))))
11143
11144 (define htslib-for-sambamba
11145 (let ((commit "2f3c3ea7b301f9b45737a793c0b2dcf0240e5ee5"))
11146 (package
11147 (inherit htslib)
11148 (name "htslib-for-sambamba")
11149 (version (string-append "1.3.1-1." (string-take commit 9)))
11150 (source
11151 (origin
11152 (method git-fetch)
11153 (uri (git-reference
11154 (url "https://github.com/lomereiter/htslib")
11155 (commit commit)))
11156 (file-name (string-append "htslib-" version "-checkout"))
11157 (sha256
11158 (base32
11159 "0g38g8s3npr0gjm9fahlbhiskyfws9l5i0x1ml3rakzj7az5l9c9"))))
11160 (native-inputs
11161 `(("autoconf" ,autoconf)
11162 ("automake" ,automake)
11163 ,@(package-native-inputs htslib))))))
11164
11165 (define-public sambamba
11166 (package
11167 (name "sambamba")
11168 (version "0.7.1")
11169 (source
11170 (origin
11171 (method git-fetch)
11172 (uri (git-reference
11173 (url "https://github.com/lomereiter/sambamba")
11174 (commit (string-append "v" version))))
11175 (file-name (string-append name "-" version "-checkout"))
11176 (sha256
11177 (base32
11178 "111h05b60pj8dxbidiamy4imc92x2962b3lmb7wgysl6lx064qis"))))
11179 (build-system gnu-build-system)
11180 (arguments
11181 `(#:tests? #f ; there is no test target
11182 #:parallel-build? #f ; not supported
11183 #:phases
11184 (modify-phases %standard-phases
11185 (delete 'configure)
11186 (add-after 'unpack 'fix-ldc-version
11187 (lambda _
11188 (substitute* "gen_ldc_version_info.py"
11189 (("/usr/bin/env.*") (which "python3")))
11190 (substitute* "Makefile"
11191 ;; We use ldc2 instead of ldmd2 to compile sambamba.
11192 (("\\$\\(shell which ldmd2\\)") (which "ldc2")))
11193 #t))
11194 (add-after 'unpack 'place-biod-and-undead
11195 (lambda* (#:key inputs #:allow-other-keys)
11196 (copy-recursively (assoc-ref inputs "biod") "BioD")
11197 #t))
11198 (add-after 'unpack 'unbundle-prerequisites
11199 (lambda _
11200 (substitute* "Makefile"
11201 (("htslib/libhts.a lz4/lib/liblz4.a")
11202 "-L-lhts -L-llz4")
11203 ((" lz4-static htslib-static") ""))
11204 #t))
11205 (replace 'install
11206 (lambda* (#:key outputs #:allow-other-keys)
11207 (let* ((out (assoc-ref outputs "out"))
11208 (bin (string-append out "/bin")))
11209 (mkdir-p bin)
11210 (copy-file (string-append "bin/sambamba-" ,version)
11211 (string-append bin "/sambamba"))
11212 #t))))))
11213 (native-inputs
11214 `(("ldc" ,ldc)
11215 ("rdmd" ,rdmd)
11216 ("python" ,python)
11217 ("biod"
11218 ,(let ((commit "7969eb0a847b05874e83ffddead26e193ece8101"))
11219 (origin
11220 (method git-fetch)
11221 (uri (git-reference
11222 (url "https://github.com/biod/BioD")
11223 (commit commit)))
11224 (file-name (string-append "biod-"
11225 (string-take commit 9)
11226 "-checkout"))
11227 (sha256
11228 (base32
11229 "0mjxsmbmv0jxl3pq21p8j5r829d648if8q58ka50b2956lc6qkpm")))))))
11230 (inputs
11231 `(("lz4" ,lz4)
11232 ("htslib" ,htslib-for-sambamba)))
11233 (home-page "https://lomereiter.github.io/sambamba/")
11234 (synopsis "Tools for working with SAM/BAM data")
11235 (description "Sambamba is a high performance modern robust and
11236 fast tool (and library), written in the D programming language, for
11237 working with SAM and BAM files. Current parallelised functionality is
11238 an important subset of samtools functionality, including view, index,
11239 sort, markdup, and depth.")
11240 (license license:gpl2+)))
11241
11242 (define-public ritornello
11243 (package
11244 (name "ritornello")
11245 (version "2.0.1")
11246 (source (origin
11247 (method git-fetch)
11248 (uri (git-reference
11249 (url "https://github.com/KlugerLab/Ritornello")
11250 (commit (string-append "v" version))))
11251 (file-name (git-file-name name version))
11252 (sha256
11253 (base32
11254 "1xahvq215qld7x1w8vpa5zbrsj6p9crb9shqa2x89sb0aaxa02jk"))))
11255 (build-system gnu-build-system)
11256 (arguments
11257 `(#:tests? #f ; there are no tests
11258 #:phases
11259 (modify-phases %standard-phases
11260 (add-after 'unpack 'patch-samtools-references
11261 (lambda* (#:key inputs #:allow-other-keys)
11262 (substitute* '("src/SamStream.h"
11263 "src/FLD.cpp")
11264 (("<sam.h>") "<samtools/sam.h>"))
11265 #t))
11266 (delete 'configure)
11267 (replace 'install
11268 (lambda* (#:key inputs outputs #:allow-other-keys)
11269 (let* ((out (assoc-ref outputs "out"))
11270 (bin (string-append out "/bin/")))
11271 (mkdir-p bin)
11272 (install-file "bin/Ritornello" bin)
11273 #t))))))
11274 (inputs
11275 `(("samtools" ,samtools-0.1)
11276 ("fftw" ,fftw)
11277 ("boost" ,boost)
11278 ("zlib" ,zlib)))
11279 (home-page "https://github.com/KlugerLab/Ritornello")
11280 (synopsis "Control-free peak caller for ChIP-seq data")
11281 (description "Ritornello is a ChIP-seq peak calling algorithm based on
11282 signal processing that can accurately call binding events without the need to
11283 do a pair total DNA input or IgG control sample. It has been tested for use
11284 with narrow binding events such as transcription factor ChIP-seq.")
11285 (license license:gpl3+)))
11286
11287 (define-public trim-galore
11288 (package
11289 (name "trim-galore")
11290 (version "0.6.1")
11291 (source
11292 (origin
11293 (method git-fetch)
11294 (uri (git-reference
11295 (url "https://github.com/FelixKrueger/TrimGalore")
11296 (commit version)))
11297 (file-name (git-file-name name version))
11298 (sha256
11299 (base32
11300 "1y31wbxwkm9xqzr5zv1pk5q418whnmlmgmfyxxpnl12h83m2i9iv"))))
11301 (build-system gnu-build-system)
11302 (arguments
11303 `(#:tests? #f ; no tests
11304 #:phases
11305 (modify-phases %standard-phases
11306 (replace 'configure
11307 (lambda _
11308 ;; Trim Galore tries to figure out what version of Python
11309 ;; cutadapt is using by looking at the shebang. Of course that
11310 ;; doesn't work, because cutadapt is wrapped in a shell script.
11311 (substitute* "trim_galore"
11312 (("my \\$python_return.*")
11313 "my $python_return = \"Python 3.999\";\n"))
11314 #t))
11315 (delete 'build)
11316 (add-after 'unpack 'hardcode-tool-references
11317 (lambda* (#:key inputs #:allow-other-keys)
11318 (substitute* "trim_galore"
11319 (("\\$path_to_cutadapt = 'cutadapt'")
11320 (string-append "$path_to_cutadapt = '"
11321 (assoc-ref inputs "cutadapt")
11322 "/bin/cutadapt'"))
11323 (("\\$compression_path = \"gzip\"")
11324 (string-append "$compression_path = \""
11325 (assoc-ref inputs "gzip")
11326 "/bin/gzip\""))
11327 (("\"gunzip")
11328 (string-append "\""
11329 (assoc-ref inputs "gzip")
11330 "/bin/gunzip"))
11331 (("\"pigz")
11332 (string-append "\""
11333 (assoc-ref inputs "pigz")
11334 "/bin/pigz")))
11335 #t))
11336 (replace 'install
11337 (lambda* (#:key outputs #:allow-other-keys)
11338 (let ((bin (string-append (assoc-ref outputs "out")
11339 "/bin")))
11340 (mkdir-p bin)
11341 (install-file "trim_galore" bin)
11342 #t))))))
11343 (inputs
11344 `(("gzip" ,gzip)
11345 ("perl" ,perl)
11346 ("pigz" ,pigz)
11347 ("cutadapt" ,cutadapt)))
11348 (native-inputs
11349 `(("unzip" ,unzip)))
11350 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/")
11351 (synopsis "Wrapper around Cutadapt and FastQC")
11352 (description "Trim Galore! is a wrapper script to automate quality and
11353 adapter trimming as well as quality control, with some added functionality to
11354 remove biased methylation positions for RRBS sequence files.")
11355 (license license:gpl3+)))
11356
11357 (define-public gess
11358 (package
11359 (name "gess")
11360 (version "1.0")
11361 (source (origin
11362 (method url-fetch)
11363 (uri (string-append "http://compbio.uthscsa.edu/"
11364 "GESS_Web/files/"
11365 "gess-" version ".src.tar.gz"))
11366 (sha256
11367 (base32
11368 "0hyk403kxscclzfs24pvdgiv0wm03kjcziqdrp5w46cb049gz0d7"))))
11369 (build-system gnu-build-system)
11370 (arguments
11371 `(#:tests? #f ; no tests
11372 #:phases
11373 (modify-phases %standard-phases
11374 (delete 'configure)
11375 (delete 'build)
11376 (replace 'install
11377 (lambda* (#:key inputs outputs #:allow-other-keys)
11378 (let* ((python (assoc-ref inputs "python"))
11379 (out (assoc-ref outputs "out"))
11380 (bin (string-append out "/bin/"))
11381 (target (string-append
11382 out "/lib/python"
11383 ,(version-major+minor
11384 (package-version python))
11385 "/site-packages/gess/")))
11386 (mkdir-p target)
11387 (copy-recursively "." target)
11388 ;; Make GESS.py executable
11389 (chmod (string-append target "GESS.py") #o555)
11390 ;; Add Python shebang to the top and make Matplotlib
11391 ;; usable.
11392 (substitute* (string-append target "GESS.py")
11393 (("\"\"\"Description:" line)
11394 (string-append "#!" (which "python") "
11395 import matplotlib
11396 matplotlib.use('Agg')
11397 " line)))
11398 ;; Make sure GESS has all modules in its path
11399 (wrap-script (string-append target "GESS.py")
11400 `("PYTHONPATH" ":" = (,target ,(getenv "PYTHONPATH"))))
11401 (mkdir-p bin)
11402 (symlink (string-append target "GESS.py")
11403 (string-append bin "GESS.py"))
11404 #t))))))
11405 (inputs
11406 `(("python" ,python-2)
11407 ("python2-pysam" ,python2-pysam)
11408 ("python2-scipy" ,python2-scipy)
11409 ("python2-numpy" ,python2-numpy)
11410 ("python2-networkx" ,python2-networkx)
11411 ("python2-biopython" ,python2-biopython)
11412 ("guile" ,guile-3.0))) ; for the script wrapper
11413 (home-page "https://compbio.uthscsa.edu/GESS_Web/")
11414 (synopsis "Detect exon-skipping events from raw RNA-seq data")
11415 (description
11416 "GESS is an implementation of a novel computational method to detect de
11417 novo exon-skipping events directly from raw RNA-seq data without the prior
11418 knowledge of gene annotation information. GESS stands for the graph-based
11419 exon-skipping scanner detection scheme.")
11420 (license license:bsd-3)))
11421
11422 (define-public phylip
11423 (package
11424 (name "phylip")
11425 (version "3.696")
11426 (source
11427 (origin
11428 (method url-fetch)
11429 (uri (string-append "http://evolution.gs.washington.edu/phylip/"
11430 "download/phylip-" version ".tar.gz"))
11431 (sha256
11432 (base32
11433 "01jar1rayhr2gba2pgbw49m56rc5z4p5wn3ds0m188hrlln4a2nd"))))
11434 (build-system gnu-build-system)
11435 (arguments
11436 `(#:tests? #f ; no check target
11437 #:make-flags (list "-f" "Makefile.unx" "install")
11438 #:parallel-build? #f ; not supported
11439 #:phases
11440 (modify-phases %standard-phases
11441 (add-after 'unpack 'enter-dir
11442 (lambda _ (chdir "src") #t))
11443 (delete 'configure)
11444 (replace 'install
11445 (lambda* (#:key inputs outputs #:allow-other-keys)
11446 (let ((target (string-append (assoc-ref outputs "out")
11447 "/bin")))
11448 (mkdir-p target)
11449 (for-each (lambda (file)
11450 (install-file file target))
11451 (find-files "../exe" ".*")))
11452 #t)))))
11453 (home-page "http://evolution.genetics.washington.edu/phylip/")
11454 (synopsis "Tools for inferring phylogenies")
11455 (description "PHYLIP (the PHYLogeny Inference Package) is a package of
11456 programs for inferring phylogenies (evolutionary trees).")
11457 (license license:bsd-2)))
11458
11459 (define-public imp
11460 (package
11461 (name "imp")
11462 (version "2.6.2")
11463 (source
11464 (origin
11465 (method url-fetch)
11466 (uri (string-append "https://integrativemodeling.org/"
11467 version "/download/imp-" version ".tar.gz"))
11468 (sha256
11469 (base32
11470 "0lxqx7vh79d771svr611dkilp6sn30qrbw8zvscbrm37v38d2j6h"))))
11471 (build-system cmake-build-system)
11472 (arguments
11473 `(;; FIXME: Some tests fail because they produce warnings, others fail
11474 ;; because the PYTHONPATH does not include the modeller's directory.
11475 #:tests? #f))
11476 (inputs
11477 `(("boost" ,boost)
11478 ("gsl" ,gsl)
11479 ("swig" ,swig)
11480 ("hdf5" ,hdf5)
11481 ("fftw" ,fftw)
11482 ("python" ,python-2)))
11483 (propagated-inputs
11484 `(("python2-numpy" ,python2-numpy)
11485 ("python2-scipy" ,python2-scipy)
11486 ("python2-pandas" ,python2-pandas)
11487 ("python2-scikit-learn" ,python2-scikit-learn)
11488 ("python2-networkx" ,python2-networkx)))
11489 (home-page "https://integrativemodeling.org")
11490 (synopsis "Integrative modeling platform")
11491 (description "IMP's broad goal is to contribute to a comprehensive
11492 structural characterization of biomolecules ranging in size and complexity
11493 from small peptides to large macromolecular assemblies, by integrating data
11494 from diverse biochemical and biophysical experiments. IMP provides a C++ and
11495 Python toolbox for solving complex modeling problems, and a number of
11496 applications for tackling some common problems in a user-friendly way.")
11497 ;; IMP is largely available under the GNU Lesser GPL; see the file
11498 ;; COPYING.LGPL for the full text of this license. Some IMP modules are
11499 ;; available under the GNU GPL (see the file COPYING.GPL).
11500 (license (list license:lgpl2.1+
11501 license:gpl3+))))
11502
11503 (define-public tadbit
11504 (package
11505 (name "tadbit")
11506 (version "0.2.0")
11507 (source (origin
11508 (method git-fetch)
11509 (uri (git-reference
11510 (url "https://github.com/3DGenomes/TADbit")
11511 (commit (string-append "v" version))))
11512 (file-name (git-file-name name version))
11513 (sha256
11514 (base32
11515 "07g3aj648prmsvxp9caz5yl41k0y0647vxh0f5p3w8376mfiljd0"))))
11516 (build-system python-build-system)
11517 (arguments
11518 `(;; Tests are included and must be run after installation, but
11519 ;; they are incomplete and thus cannot be run.
11520 #:tests? #f
11521 #:python ,python-2
11522 #:phases
11523 (modify-phases %standard-phases
11524 (add-after 'unpack 'fix-problems-with-setup.py
11525 (lambda* (#:key outputs #:allow-other-keys)
11526 ;; setup.py opens these files for writing
11527 (chmod "_pytadbit/_version.py" #o664)
11528 (chmod "README.rst" #o664)
11529
11530 ;; Don't attempt to install the bash completions to
11531 ;; the home directory.
11532 (rename-file "extras/.bash_completion"
11533 "extras/tadbit")
11534 (substitute* "setup.py"
11535 (("\\(path.expanduser\\('~'\\)")
11536 (string-append "(\""
11537 (assoc-ref outputs "out")
11538 "/etc/bash_completion.d\""))
11539 (("extras/\\.bash_completion")
11540 "extras/tadbit"))
11541 #t)))))
11542 (inputs
11543 ;; TODO: add Chimera for visualization
11544 `(("imp" ,imp)
11545 ("mcl" ,mcl)
11546 ("python2-scipy" ,python2-scipy)
11547 ("python2-numpy" ,python2-numpy)
11548 ("python2-matplotlib" ,python2-matplotlib)
11549 ("python2-pysam" ,python2-pysam)))
11550 (home-page "https://3dgenomes.github.io/TADbit/")
11551 (synopsis "Analyze, model, and explore 3C-based data")
11552 (description
11553 "TADbit is a complete Python library to deal with all steps to analyze,
11554 model, and explore 3C-based data. With TADbit the user can map FASTQ files to
11555 obtain raw interaction binned matrices (Hi-C like matrices), normalize and
11556 correct interaction matrices, identify and compare the so-called
11557 @dfn{Topologically Associating Domains} (TADs), build 3D models from the
11558 interaction matrices, and finally, extract structural properties from the
11559 models. TADbit is complemented by TADkit for visualizing 3D models.")
11560 (license license:gpl3+)))
11561
11562 (define-public kentutils
11563 (package
11564 (name "kentutils")
11565 ;; 302.1.0 is out, but the only difference is the inclusion of
11566 ;; pre-built binaries.
11567 (version "302.0.0")
11568 (source
11569 (origin
11570 (method git-fetch)
11571 (uri (git-reference
11572 (url "https://github.com/ENCODE-DCC/kentUtils")
11573 (commit (string-append "v" version))))
11574 (file-name (git-file-name name version))
11575 (sha256
11576 (base32
11577 "0n1wbyjpzii2b9qhyp9r1q76j623cggpg3y8fmw78ld3z4y7ivha"))
11578 (modules '((guix build utils)
11579 (srfi srfi-26)
11580 (ice-9 ftw)))
11581 (snippet
11582 '(begin
11583 ;; Only the contents of the specified directories are free
11584 ;; for all uses, so we remove the rest. "hg/autoSql" and
11585 ;; "hg/autoXml" are nominally free, but they depend on a
11586 ;; library that is built from the sources in "hg/lib",
11587 ;; which is nonfree.
11588 (let ((free (list "." ".."
11589 "utils" "lib" "inc" "tagStorm"
11590 "parasol" "htslib"))
11591 (directory? (lambda (file)
11592 (eq? 'directory (stat:type (stat file))))))
11593 (for-each (lambda (file)
11594 (and (directory? file)
11595 (delete-file-recursively file)))
11596 (map (cut string-append "src/" <>)
11597 (scandir "src"
11598 (lambda (file)
11599 (not (member file free)))))))
11600 ;; Only make the utils target, not the userApps target,
11601 ;; because that requires libraries we won't build.
11602 (substitute* "Makefile"
11603 ((" userApps") " utils"))
11604 ;; Only build libraries that are free.
11605 (substitute* "src/makefile"
11606 (("DIRS =.*") "DIRS =\n")
11607 (("cd jkOwnLib.*") "")
11608 ((" hgLib") "")
11609 (("cd hg.*") ""))
11610 (substitute* "src/utils/makefile"
11611 ;; These tools depend on "jkhgap.a", which is part of the
11612 ;; nonfree "src/hg/lib" directory.
11613 (("raSqlQuery") "")
11614 (("pslLiftSubrangeBlat") "")
11615
11616 ;; Do not build UCSC tools, which may require nonfree
11617 ;; components.
11618 (("ALL_APPS =.*") "ALL_APPS = $(UTILS_APPLIST)\n"))
11619 #t))))
11620 (build-system gnu-build-system)
11621 (arguments
11622 `( ;; There is no global test target and the test target for
11623 ;; individual tools depends on input files that are not
11624 ;; included.
11625 #:tests? #f
11626 #:phases
11627 (modify-phases %standard-phases
11628 (add-after 'unpack 'fix-permissions
11629 (lambda _ (make-file-writable "src/inc/localEnvironment.mk") #t))
11630 (add-after 'unpack 'fix-paths
11631 (lambda _
11632 (substitute* "Makefile"
11633 (("/bin/echo") (which "echo")))
11634 #t))
11635 (add-after 'unpack 'prepare-samtabix
11636 (lambda* (#:key inputs #:allow-other-keys)
11637 (copy-recursively (assoc-ref inputs "samtabix")
11638 "samtabix")
11639 #t))
11640 (delete 'configure)
11641 (replace 'install
11642 (lambda* (#:key outputs #:allow-other-keys)
11643 (let ((bin (string-append (assoc-ref outputs "out")
11644 "/bin")))
11645 (copy-recursively "bin" bin))
11646 #t)))))
11647 (native-inputs
11648 `(("samtabix"
11649 ,(origin
11650 (method git-fetch)
11651 (uri (git-reference
11652 (url "http://genome-source.cse.ucsc.edu/samtabix.git")
11653 (commit "10fd107909c1ac4d679299908be4262a012965ba")))
11654 (sha256
11655 (base32
11656 "0c1nj64l42v395sa84n7az43xiap4i6f9n9dfz4058aqiwkhkmma"))))))
11657 (inputs
11658 `(("zlib" ,zlib)
11659 ("tcsh" ,tcsh)
11660 ("perl" ,perl)
11661 ("libpng" ,libpng)
11662 ("mariadb" ,mariadb "lib")
11663 ("mariadb-dev" ,mariadb "dev")
11664 ("openssl" ,openssl-1.0)))
11665 (home-page "https://genome.cse.ucsc.edu/index.html")
11666 (synopsis "Assorted bioinformatics utilities")
11667 (description "This package provides the kentUtils, a selection of
11668 bioinformatics utilities used in combination with the UCSC genome
11669 browser.")
11670 ;; Only a subset of the sources are released under a non-copyleft
11671 ;; free software license. All other sources are removed in a
11672 ;; snippet. See this bug report for an explanation of how the
11673 ;; license statements apply:
11674 ;; https://github.com/ENCODE-DCC/kentUtils/issues/12
11675 (license (license:non-copyleft
11676 "http://genome.ucsc.edu/license/"
11677 "The contents of this package are free for all uses."))))
11678
11679 (define-public f-seq
11680 (let ((commit "6ccded34cff38cf432deed8503648b4a66953f9b")
11681 (revision "1"))
11682 (package
11683 (name "f-seq")
11684 (version (string-append "1.1-" revision "." (string-take commit 7)))
11685 (source (origin
11686 (method git-fetch)
11687 (uri (git-reference
11688 (url "https://github.com/aboyle/F-seq")
11689 (commit commit)))
11690 (file-name (string-append name "-" version))
11691 (sha256
11692 (base32
11693 "1nk33k0yajg2id4g59bc4szr58r2q6pdq42vgcw054m8ip9wv26h"))
11694 (modules '((guix build utils)))
11695 ;; Remove bundled Java library archives.
11696 (snippet
11697 '(begin
11698 (for-each delete-file (find-files "lib" ".*"))
11699 #t))))
11700 (build-system ant-build-system)
11701 (arguments
11702 `(#:tests? #f ; no tests included
11703 #:phases
11704 (modify-phases %standard-phases
11705 (replace 'install
11706 (lambda* (#:key inputs outputs #:allow-other-keys)
11707 (let* ((target (assoc-ref outputs "out"))
11708 (bin (string-append target "/bin"))
11709 (doc (string-append target "/share/doc/f-seq"))
11710 (lib (string-append target "/lib")))
11711 (mkdir-p target)
11712 (mkdir-p doc)
11713 (substitute* "bin/linux/fseq"
11714 (("java") (which "java"))
11715 (("\\$REALDIR/../lib/commons-cli-1.1.jar")
11716 (string-append (assoc-ref inputs "java-commons-cli")
11717 "/share/java/commons-cli.jar"))
11718 (("REALDIR=.*")
11719 (string-append "REALDIR=" bin "\n")))
11720 (install-file "README.txt" doc)
11721 (install-file "bin/linux/fseq" bin)
11722 (install-file "build~/fseq.jar" lib)
11723 (copy-recursively "lib" lib)
11724 #t))))))
11725 (inputs
11726 `(("perl" ,perl)
11727 ("java-commons-cli" ,java-commons-cli)))
11728 (home-page "http://fureylab.web.unc.edu/software/fseq/")
11729 (synopsis "Feature density estimator for high-throughput sequence tags")
11730 (description
11731 "F-Seq is a software package that generates a continuous tag sequence
11732 density estimation allowing identification of biologically meaningful sites
11733 such as transcription factor binding sites (ChIP-seq) or regions of open
11734 chromatin (DNase-seq). Output can be displayed directly in the UCSC Genome
11735 Browser.")
11736 (license license:gpl3+))))
11737
11738 (define-public bismark
11739 (package
11740 (name "bismark")
11741 (version "0.20.1")
11742 (source
11743 (origin
11744 (method git-fetch)
11745 (uri (git-reference
11746 (url "https://github.com/FelixKrueger/Bismark")
11747 (commit version)))
11748 (file-name (string-append name "-" version "-checkout"))
11749 (sha256
11750 (base32
11751 "0xchm3rgilj6vfjnyzfzzymfd7djr64sbrmrvs3njbwi66jqbzw9"))))
11752 (build-system perl-build-system)
11753 (arguments
11754 `(#:tests? #f ; there are no tests
11755 #:modules ((guix build utils)
11756 (ice-9 popen)
11757 (srfi srfi-26)
11758 (guix build perl-build-system))
11759 #:phases
11760 (modify-phases %standard-phases
11761 ;; The bundled plotly.js is minified.
11762 (add-after 'unpack 'replace-plotly.js
11763 (lambda* (#:key inputs #:allow-other-keys)
11764 (let* ((file (assoc-ref inputs "plotly.js"))
11765 (installed "plotly/plotly.js"))
11766 (let ((minified (open-pipe* OPEN_READ "uglify-js" file)))
11767 (call-with-output-file installed
11768 (cut dump-port minified <>))))
11769 #t))
11770 (delete 'configure)
11771 (delete 'build)
11772 (replace 'install
11773 (lambda* (#:key inputs outputs #:allow-other-keys)
11774 (let* ((out (assoc-ref outputs "out"))
11775 (bin (string-append out "/bin"))
11776 (share (string-append out "/share/bismark"))
11777 (docdir (string-append out "/share/doc/bismark"))
11778 (docs '("Docs/Bismark_User_Guide.html"))
11779 (scripts '("bismark"
11780 "bismark_genome_preparation"
11781 "bismark_methylation_extractor"
11782 "bismark2bedGraph"
11783 "bismark2report"
11784 "coverage2cytosine"
11785 "deduplicate_bismark"
11786 "filter_non_conversion"
11787 "bam2nuc"
11788 "bismark2summary"
11789 "NOMe_filtering")))
11790 (substitute* "bismark2report"
11791 (("\\$RealBin/plotly")
11792 (string-append share "/plotly")))
11793 (mkdir-p share)
11794 (mkdir-p docdir)
11795 (mkdir-p bin)
11796 (for-each (lambda (file) (install-file file bin))
11797 scripts)
11798 (for-each (lambda (file) (install-file file docdir))
11799 docs)
11800 (copy-recursively "Docs/Images" (string-append docdir "/Images"))
11801 (copy-recursively "plotly"
11802 (string-append share "/plotly"))
11803
11804 ;; Fix references to gunzip
11805 (substitute* (map (lambda (file)
11806 (string-append bin "/" file))
11807 scripts)
11808 (("\"gunzip -c")
11809 (string-append "\"" (assoc-ref inputs "gzip")
11810 "/bin/gunzip -c")))
11811 #t))))))
11812 (inputs
11813 `(("gzip" ,gzip)
11814 ("perl-carp" ,perl-carp)
11815 ("perl-getopt-long" ,perl-getopt-long)))
11816 (native-inputs
11817 `(("plotly.js"
11818 ,(origin
11819 (method url-fetch)
11820 (uri (string-append "https://raw.githubusercontent.com/plotly/plotly.js/"
11821 "v1.39.4/dist/plotly.js"))
11822 (sha256
11823 (base32 "138mwsr4nf5qif4mrxx286mpnagxd1xwl6k8aidrjgknaqg88zyr"))))
11824 ("uglify-js" ,uglify-js)))
11825 (home-page "https://www.bioinformatics.babraham.ac.uk/projects/bismark/")
11826 (synopsis "Map bisulfite treated sequence reads and analyze methylation")
11827 (description "Bismark is a program to map bisulfite treated sequencing
11828 reads to a genome of interest and perform methylation calls in a single step.
11829 The output can be easily imported into a genome viewer, such as SeqMonk, and
11830 enables a researcher to analyse the methylation levels of their samples
11831 straight away. Its main features are:
11832
11833 @itemize
11834 @item Bisulfite mapping and methylation calling in one single step
11835 @item Supports single-end and paired-end read alignments
11836 @item Supports ungapped and gapped alignments
11837 @item Alignment seed length, number of mismatches etc are adjustable
11838 @item Output discriminates between cytosine methylation in CpG, CHG
11839 and CHH context
11840 @end itemize\n")
11841 (license license:gpl3+)))
11842
11843 (define-public paml
11844 (package
11845 (name "paml")
11846 (version "4.9e")
11847 (source (origin
11848 (method url-fetch)
11849 (uri (string-append "http://abacus.gene.ucl.ac.uk/software/"
11850 "paml" version ".tgz"))
11851 (sha256
11852 (base32
11853 "13zf6h9fiqghwhch2h06x1zdr6s42plsnqahflp5g7myr3han3s6"))
11854 (modules '((guix build utils)))
11855 ;; Remove Windows binaries
11856 (snippet
11857 '(begin
11858 (for-each delete-file (find-files "." "\\.exe$"))
11859 ;; Some files in the original tarball have restrictive
11860 ;; permissions, which makes repackaging fail
11861 (for-each (lambda (file) (chmod file #o644)) (find-files "."))
11862 #t))))
11863 (build-system gnu-build-system)
11864 (arguments
11865 `(#:tests? #f ; there are no tests
11866 #:make-flags '("CC=gcc")
11867 #:phases
11868 (modify-phases %standard-phases
11869 (replace 'configure
11870 (lambda _
11871 (substitute* "src/BFdriver.c"
11872 (("/bin/bash") (which "bash")))
11873 (chdir "src")
11874 #t))
11875 (replace 'install
11876 (lambda* (#:key outputs #:allow-other-keys)
11877 (let ((tools '("baseml" "basemlg" "codeml"
11878 "pamp" "evolver" "yn00" "chi2"))
11879 (bin (string-append (assoc-ref outputs "out") "/bin"))
11880 (docdir (string-append (assoc-ref outputs "out")
11881 "/share/doc/paml")))
11882 (mkdir-p bin)
11883 (for-each (lambda (file) (install-file file bin)) tools)
11884 (copy-recursively "../doc" docdir)
11885 #t))))))
11886 (home-page "http://abacus.gene.ucl.ac.uk/software/paml.html")
11887 (synopsis "Phylogentic analysis by maximum likelihood")
11888 (description "PAML (for Phylogentic Analysis by Maximum Likelihood)
11889 contains a few programs for model fitting and phylogenetic tree reconstruction
11890 using nucleotide or amino-acid sequence data.")
11891 ;; GPLv3 only
11892 (license license:gpl3)))
11893
11894 (define-public kallisto
11895 (package
11896 (name "kallisto")
11897 (version "0.44.0")
11898 (source (origin
11899 (method git-fetch)
11900 (uri (git-reference
11901 (url "https://github.com/pachterlab/kallisto")
11902 (commit (string-append "v" version))))
11903 (file-name (git-file-name name version))
11904 (sha256
11905 (base32
11906 "0nj382jiywqnpgvyhichajpkkh5r0bapn43f4dx40zdaq5v4m40m"))))
11907 (build-system cmake-build-system)
11908 (arguments
11909 `(#:tests? #f ; no "check" target
11910 #:phases
11911 (modify-phases %standard-phases
11912 (add-after 'unpack 'do-not-use-bundled-htslib
11913 (lambda _
11914 (substitute* "CMakeLists.txt"
11915 (("^ExternalProject_Add" m)
11916 (string-append "if (NEVER)\n" m))
11917 (("^\\)")
11918 (string-append ")\nendif(NEVER)"))
11919 (("include_directories\\(\\$\\{htslib_PREFIX.*" m)
11920 (string-append "# " m)))
11921 (substitute* "src/CMakeLists.txt"
11922 (("target_link_libraries\\(kallisto kallisto_core pthread \
11923 \\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/../ext/htslib/libhts.a\\)")
11924 "target_link_libraries(kallisto kallisto_core pthread hts)")
11925 (("include_directories\\(\\.\\./ext/htslib\\)") ""))
11926 #t)))))
11927 (inputs
11928 `(("hdf5" ,hdf5)
11929 ("htslib" ,htslib)
11930 ("zlib" ,zlib)))
11931 (home-page "https://pachterlab.github.io/kallisto/")
11932 (synopsis "Near-optimal RNA-Seq quantification")
11933 (description
11934 "Kallisto is a program for quantifying abundances of transcripts from
11935 RNA-Seq data, or more generally of target sequences using high-throughput
11936 sequencing reads. It is based on the novel idea of pseudoalignment for
11937 rapidly determining the compatibility of reads with targets, without the need
11938 for alignment. Pseudoalignment of reads preserves the key information needed
11939 for quantification, and kallisto is therefore not only fast, but also as
11940 accurate as existing quantification tools.")
11941 (license license:bsd-2)))
11942
11943 (define-public libgff
11944 (package
11945 (name "libgff")
11946 (version "1.0")
11947 (source (origin
11948 (method git-fetch)
11949 (uri (git-reference
11950 (url "https://github.com/Kingsford-Group/libgff")
11951 (commit (string-append "v" version))))
11952 (file-name (git-file-name name version))
11953 (sha256
11954 (base32
11955 "0n6vfjnq7a2mianipscbshrvbncss8z4zkgkbjw754p9043nfkps"))))
11956 (build-system cmake-build-system)
11957 (arguments `(#:tests? #f)) ; no tests included
11958 (home-page "https://github.com/Kingsford-Group/libgff")
11959 (synopsis "Parser library for reading/writing GFF files")
11960 (description "This is a simple \"libraryfication\" of the GFF/GTF parsing
11961 code that is used in the Cufflinks codebase. The goal of this library is to
11962 provide this functionality without the necessity of drawing in a heavy-weight
11963 dependency like SeqAn.")
11964 (license (license:x11-style "https://www.boost.org/LICENSE_1_0.txt"))))
11965
11966 (define-public sailfish
11967 (package
11968 (name "sailfish")
11969 (version "0.10.1")
11970 (source (origin
11971 (method git-fetch)
11972 (uri (git-reference
11973 (url "https://github.com/kingsfordgroup/sailfish")
11974 (commit (string-append "v" version))))
11975 (file-name (git-file-name name version))
11976 (sha256
11977 (base32
11978 "1amcc5hqvsl42hg4x19bi9vy47cl874s0lw1fmi0hwsdk9i8c03v"))
11979 (modules '((guix build utils)))
11980 (snippet
11981 '(begin
11982 ;; Delete bundled headers for eigen3.
11983 (delete-file-recursively "include/eigen3/")
11984 #t))))
11985 (build-system cmake-build-system)
11986 (arguments
11987 `(#:configure-flags
11988 (list (string-append "-DBOOST_INCLUDEDIR="
11989 (assoc-ref %build-inputs "boost")
11990 "/include/")
11991 (string-append "-DBOOST_LIBRARYDIR="
11992 (assoc-ref %build-inputs "boost")
11993 "/lib/")
11994 (string-append "-DBoost_LIBRARIES="
11995 "-lboost_iostreams "
11996 "-lboost_filesystem "
11997 "-lboost_system "
11998 "-lboost_thread "
11999 "-lboost_timer "
12000 "-lboost_chrono "
12001 "-lboost_program_options")
12002 "-DBoost_FOUND=TRUE"
12003 ;; Don't download RapMap---we already have it!
12004 "-DFETCHED_RAPMAP=1")
12005 ;; Tests must be run after installation and the location of the test
12006 ;; data file must be overridden. But the tests fail. It looks like
12007 ;; they are not really meant to be run.
12008 #:tests? #f
12009 #:phases
12010 (modify-phases %standard-phases
12011 ;; Boost cannot be found, even though it's right there.
12012 (add-after 'unpack 'do-not-look-for-boost
12013 (lambda* (#:key inputs #:allow-other-keys)
12014 (substitute* "CMakeLists.txt"
12015 (("find_package\\(Boost 1\\.53\\.0") "#"))
12016 #t))
12017 (add-after 'unpack 'do-not-assign-to-macro
12018 (lambda _
12019 (substitute* "include/spdlog/details/format.cc"
12020 (("const unsigned CHAR_WIDTH = 1;") ""))
12021 #t))
12022 (add-after 'unpack 'prepare-rapmap
12023 (lambda* (#:key inputs #:allow-other-keys)
12024 (let ((src "external/install/src/rapmap/")
12025 (include "external/install/include/rapmap/")
12026 (rapmap (assoc-ref inputs "rapmap")))
12027 (mkdir-p "/tmp/rapmap")
12028 (invoke "tar" "xf"
12029 (assoc-ref inputs "rapmap")
12030 "-C" "/tmp/rapmap"
12031 "--strip-components=1")
12032 (mkdir-p src)
12033 (mkdir-p include)
12034 (for-each (lambda (file)
12035 (install-file file src))
12036 (find-files "/tmp/rapmap/src" "\\.(c|cpp)"))
12037 (copy-recursively "/tmp/rapmap/include" include))
12038 #t))
12039 (add-after 'unpack 'use-system-libraries
12040 (lambda* (#:key inputs #:allow-other-keys)
12041 (substitute* '("src/SailfishIndexer.cpp"
12042 "src/SailfishUtils.cpp"
12043 "src/SailfishQuantify.cpp"
12044 "src/FASTAParser.cpp"
12045 "include/PCA.hpp"
12046 "include/SailfishUtils.hpp"
12047 "include/SailfishIndex.hpp"
12048 "include/CollapsedEMOptimizer.hpp"
12049 "src/CollapsedEMOptimizer.cpp")
12050 (("#include \"jellyfish/config.h\"") ""))
12051 (substitute* "src/CMakeLists.txt"
12052 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/include/jellyfish-2.2..")
12053 (string-append (assoc-ref inputs "jellyfish")
12054 "/include/jellyfish-" ,(package-version jellyfish)))
12055 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libjellyfish-2.0.a")
12056 (string-append (assoc-ref inputs "jellyfish")
12057 "/lib/libjellyfish-2.0.a"))
12058 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
12059 (string-append (assoc-ref inputs "libdivsufsort")
12060 "/lib/libdivsufsort.so"))
12061 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
12062 (string-append (assoc-ref inputs "libdivsufsort")
12063 "/lib/libdivsufsort64.so")))
12064 (substitute* "CMakeLists.txt"
12065 ;; Don't prefer static libs
12066 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
12067 (("find_package\\(Jellyfish.*") "")
12068 (("ExternalProject_Add\\(libjellyfish") "message(")
12069 (("ExternalProject_Add\\(libgff") "message(")
12070 (("ExternalProject_Add\\(libsparsehash") "message(")
12071 (("ExternalProject_Add\\(libdivsufsort") "message("))
12072
12073 ;; Ensure that Eigen headers can be found
12074 (setenv "CPLUS_INCLUDE_PATH"
12075 (string-append (assoc-ref inputs "eigen")
12076 "/include/eigen3:"
12077 (or (getenv "CPLUS_INCLUDE_PATH") "")))
12078 #t)))))
12079 (inputs
12080 `(("boost" ,boost)
12081 ("eigen" ,eigen)
12082 ("jemalloc" ,jemalloc)
12083 ("jellyfish" ,jellyfish)
12084 ("sparsehash" ,sparsehash)
12085 ("rapmap" ,(origin
12086 (method git-fetch)
12087 (uri (git-reference
12088 (url "https://github.com/COMBINE-lab/RapMap")
12089 (commit (string-append "sf-v" version))))
12090 (file-name (string-append "rapmap-sf-v" version "-checkout"))
12091 (sha256
12092 (base32
12093 "1hv79l5i576ykv5a1srj2p0q36yvyl5966m0fcy2lbi169ipjakf"))
12094 (modules '((guix build utils)))
12095 ;; These files are expected to be excluded.
12096 (snippet
12097 '(begin (delete-file-recursively "include/spdlog")
12098 (for-each delete-file '("include/xxhash.h"
12099 "src/xxhash.c"))
12100 #t))))
12101 ("libdivsufsort" ,libdivsufsort)
12102 ("libgff" ,libgff)
12103 ("tbb" ,tbb)
12104 ("zlib" ,zlib)))
12105 (native-inputs
12106 `(("pkg-config" ,pkg-config)))
12107 (home-page "https://www.cs.cmu.edu/~ckingsf/software/sailfish/")
12108 (synopsis "Mapping-based isoform quantification from RNA-Seq reads")
12109 (description "Sailfish is a tool for genomic transcript quantification
12110 from RNA-seq data. It requires a set of target transcripts (either from a
12111 reference or de-novo assembly) to quantify. All you need to run sailfish is a
12112 fasta file containing your reference transcripts and a (set of) fasta/fastq
12113 file(s) containing your reads.")
12114 (license license:gpl3+)))
12115
12116 (define libstadenio-for-salmon
12117 (package
12118 (name "libstadenio")
12119 (version "1.14.8")
12120 (source (origin
12121 (method git-fetch)
12122 (uri (git-reference
12123 (url "https://github.com/COMBINE-lab/staden-io_lib")
12124 (commit (string-append "v" version))))
12125 (file-name (string-append name "-" version "-checkout"))
12126 (sha256
12127 (base32
12128 "1x8kxxqxl892vwfbprlbyfwkkv7c34ggkc94892x9x0g37x5nbwx"))))
12129 (build-system gnu-build-system)
12130 (arguments '(#:parallel-tests? #f)) ; not supported
12131 (inputs
12132 `(("zlib" ,zlib)))
12133 (native-inputs
12134 `(("perl" ,perl))) ; for tests
12135 (home-page "https://github.com/COMBINE-lab/staden-io_lib")
12136 (synopsis "General purpose trace and experiment file library")
12137 (description "This package provides a library of file reading and writing
12138 code to provide a general purpose Trace file (and Experiment File) reading
12139 interface.
12140
12141 The following file formats are supported:
12142
12143 @enumerate
12144 @item SCF trace files
12145 @item ABI trace files
12146 @item ALF trace files
12147 @item ZTR trace files
12148 @item SFF trace archives
12149 @item SRF trace archives
12150 @item Experiment files
12151 @item Plain text files
12152 @item SAM/BAM sequence files
12153 @item CRAM sequence files
12154 @end enumerate\n")
12155 (license license:bsd-3)))
12156
12157 (define-public salmon
12158 (package
12159 (name "salmon")
12160 (version "0.13.1")
12161 (source (origin
12162 (method git-fetch)
12163 (uri (git-reference
12164 (url "https://github.com/COMBINE-lab/salmon")
12165 (commit (string-append "v" version))))
12166 (file-name (git-file-name name version))
12167 (sha256
12168 (base32
12169 "1i2z4aivicmiixdz9bxalp7vmfzi3k92fxa63iqa8kgvfw5a4aq5"))
12170 (modules '((guix build utils)))
12171 (snippet
12172 '(begin
12173 ;; Delete bundled headers for eigen3.
12174 (delete-file-recursively "include/eigen3/")
12175 #t))))
12176 (build-system cmake-build-system)
12177 (arguments
12178 `(#:configure-flags
12179 (list (string-append "-DBOOST_INCLUDEDIR="
12180 (assoc-ref %build-inputs "boost")
12181 "/include/")
12182 (string-append "-DBOOST_LIBRARYDIR="
12183 (assoc-ref %build-inputs "boost")
12184 "/lib/")
12185 (string-append "-DBoost_LIBRARIES="
12186 "-lboost_iostreams "
12187 "-lboost_filesystem "
12188 "-lboost_system "
12189 "-lboost_thread "
12190 "-lboost_timer "
12191 "-lboost_chrono "
12192 "-lboost_program_options")
12193 "-DBoost_FOUND=TRUE"
12194 "-DTBB_LIBRARIES=tbb tbbmalloc"
12195 ;; Don't download RapMap---we already have it!
12196 "-DFETCHED_RAPMAP=1")
12197 #:phases
12198 (modify-phases %standard-phases
12199 ;; Boost cannot be found, even though it's right there.
12200 (add-after 'unpack 'do-not-look-for-boost
12201 (lambda* (#:key inputs #:allow-other-keys)
12202 (substitute* "CMakeLists.txt"
12203 (("find_package\\(Boost 1\\.59\\.0") "#"))
12204 #t))
12205 (add-after 'unpack 'do-not-phone-home
12206 (lambda _
12207 (substitute* "src/Salmon.cpp"
12208 (("getVersionMessage\\(\\)") "\"\""))
12209 #t))
12210 (add-after 'unpack 'prepare-rapmap
12211 (lambda* (#:key inputs #:allow-other-keys)
12212 (let ((src "external/install/src/rapmap/")
12213 (include "external/install/include/rapmap/")
12214 (rapmap (assoc-ref inputs "rapmap")))
12215 (mkdir-p src)
12216 (mkdir-p include)
12217 (copy-recursively (string-append rapmap "/src") src)
12218 (copy-recursively (string-append rapmap "/include") include)
12219 (for-each delete-file '("external/install/include/rapmap/xxhash.h"
12220 "external/install/include/rapmap/FastxParser.hpp"
12221 "external/install/include/rapmap/concurrentqueue.h"
12222 "external/install/include/rapmap/FastxParserThreadUtils.hpp"
12223 "external/install/src/rapmap/FastxParser.cpp"
12224 "external/install/src/rapmap/xxhash.c"))
12225 (delete-file-recursively "external/install/include/rapmap/spdlog"))
12226 #t))
12227 (add-after 'unpack 'use-system-libraries
12228 (lambda* (#:key inputs #:allow-other-keys)
12229 (substitute* "CMakeLists.txt"
12230 ;; Don't prefer static libs
12231 (("SET\\(CMAKE_FIND_LIBRARY_SUFFIXES.*") "")
12232 (("set\\(TBB_LIBRARIES") "message(")
12233 ;; Don't download anything
12234 (("DOWNLOAD_COMMAND") "DOWNLOAD_COMMAND echo")
12235 (("externalproject_add\\(libcereal") "message(")
12236 (("externalproject_add\\(libgff") "message(")
12237 (("externalproject_add\\(libtbb") "message(")
12238 (("externalproject_add\\(libdivsufsort") "message(")
12239 (("externalproject_add\\(libstadenio") "message(")
12240 (("externalproject_add_step\\(") "message("))
12241 (substitute* "src/CMakeLists.txt"
12242 (("add_dependencies") "#")
12243 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libstaden-read.a")
12244 (string-append (assoc-ref inputs "libstadenio-for-salmon")
12245 "/lib/libstaden-read.so"))
12246 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort.a")
12247 (string-append (assoc-ref inputs "libdivsufsort")
12248 "/lib/libdivsufsort.so"))
12249 (("\\$\\{GAT_SOURCE_DIR\\}/external/install/lib/libdivsufsort64.a")
12250 (string-append (assoc-ref inputs "libdivsufsort")
12251 "/lib/libdivsufsort64.so"))
12252 (("lib/libdivsufsort.a") "/lib/libdivsufsort.so"))
12253
12254 ;; Ensure that all headers can be found
12255 (setenv "CPLUS_INCLUDE_PATH"
12256 (string-append (or (getenv "CPLUS_INCLUDE_PATH") "")
12257 ":"
12258 (assoc-ref inputs "eigen")
12259 "/include/eigen3"))
12260 #t))
12261 ;; CMAKE_INSTALL_PREFIX does not exist when the tests are
12262 ;; run. It only exists after the install phase.
12263 (add-after 'unpack 'fix-tests
12264 (lambda _
12265 (substitute* "src/CMakeLists.txt"
12266 (("DTOPLEVEL_DIR=\\$\\{CMAKE_INSTALL_PREFIX")
12267 "DTOPLEVEL_DIR=${GAT_SOURCE_DIR"))
12268 #t)))))
12269 (inputs
12270 `(("boost" ,boost)
12271 ("bzip2" ,bzip2)
12272 ("cereal" ,cereal)
12273 ("eigen" ,eigen)
12274 ("rapmap" ,(origin
12275 (method git-fetch)
12276 (uri (git-reference
12277 (url "https://github.com/COMBINE-lab/RapMap")
12278 (commit (string-append "salmon-v" version))))
12279 (file-name (string-append "rapmap-salmon-v" version "-checkout"))
12280 (sha256
12281 (base32
12282 "1biplxf0csc7a8h1wf219b0vmjkvw6wk2zylhdklb577kgmihdms"))))
12283 ("jemalloc" ,jemalloc)
12284 ("libgff" ,libgff)
12285 ("tbb" ,tbb)
12286 ("libdivsufsort" ,libdivsufsort)
12287 ("libstadenio-for-salmon" ,libstadenio-for-salmon)
12288 ("xz" ,xz)
12289 ("zlib" ,zlib)))
12290 (native-inputs
12291 `(("pkg-config" ,pkg-config)))
12292 (home-page "https://github.com/COMBINE-lab/salmon")
12293 (synopsis "Quantification from RNA-seq reads using lightweight alignments")
12294 (description "Salmon is a program to produce highly-accurate,
12295 transcript-level quantification estimates from RNA-seq data. Salmon achieves
12296 its accuracy and speed via a number of different innovations, including the
12297 use of lightweight alignments (accurate but fast-to-compute proxies for
12298 traditional read alignments) and massively-parallel stochastic collapsed
12299 variational inference.")
12300 (license license:gpl3+)))
12301
12302 (define-public python-loompy
12303 (package
12304 (name "python-loompy")
12305 (version "2.0.17")
12306 ;; The tarball on Pypi does not include the tests.
12307 (source (origin
12308 (method git-fetch)
12309 (uri (git-reference
12310 (url "https://github.com/linnarsson-lab/loompy")
12311 (commit version)))
12312 (file-name (git-file-name name version))
12313 (sha256
12314 (base32
12315 "12a5kjgiikapv93wahfw0frszx1lblnppyz3vs5gy8fgmgngra07"))))
12316 (build-system python-build-system)
12317 (arguments
12318 `(#:phases
12319 (modify-phases %standard-phases
12320 (replace 'check
12321 (lambda _
12322 (setenv "PYTHONPATH"
12323 (string-append (getcwd) ":"
12324 (getenv "PYTHONPATH")))
12325 (invoke "pytest" "tests")
12326 #t)))))
12327 (propagated-inputs
12328 `(("python-h5py" ,python-h5py)
12329 ("python-numpy" ,python-numpy)
12330 ("python-pandas" ,python-pandas)
12331 ("python-scipy" ,python-scipy)))
12332 (native-inputs
12333 `(("python-pytest" ,python-pytest)))
12334 (home-page "https://github.com/linnarsson-lab/loompy")
12335 (synopsis "Work with .loom files for single-cell RNA-seq data")
12336 (description "The loom file format is an efficient format for very large
12337 omics datasets, consisting of a main matrix, optional additional layers, a
12338 variable number of row and column annotations. Loom also supports sparse
12339 graphs. This library makes it easy to work with @file{.loom} files for
12340 single-cell RNA-seq data.")
12341 (license license:bsd-3)))
12342
12343 ;; We cannot use the latest commit because it requires Java 9.
12344 (define-public java-forester
12345 (let ((commit "86b07efe302d5094b42deed9260f719a4c4ac2e6")
12346 (revision "1"))
12347 (package
12348 (name "java-forester")
12349 (version (string-append "0-" revision "." (string-take commit 7)))
12350 (source (origin
12351 (method git-fetch)
12352 (uri (git-reference
12353 (url "https://github.com/cmzmasek/forester")
12354 (commit commit)))
12355 (file-name (string-append name "-" version "-checkout"))
12356 (sha256
12357 (base32
12358 "0vxavc1yrf84yrnf20dq26hi0lglidk8d382xrxsy4qmlbjd276z"))
12359 (modules '((guix build utils)))
12360 (snippet
12361 '(begin
12362 ;; Delete bundled jars and pre-built classes
12363 (delete-file-recursively "forester/java/resources")
12364 (delete-file-recursively "forester/java/classes")
12365 (for-each delete-file (find-files "forester/java/" "\\.jar$"))
12366 ;; Delete bundled applications
12367 (delete-file-recursively "forester_applications")
12368 #t))))
12369 (build-system ant-build-system)
12370 (arguments
12371 `(#:tests? #f ; there are none
12372 #:jdk ,icedtea-8
12373 #:modules ((guix build ant-build-system)
12374 (guix build utils)
12375 (guix build java-utils)
12376 (sxml simple)
12377 (sxml transform))
12378 #:phases
12379 (modify-phases %standard-phases
12380 (add-after 'unpack 'chdir
12381 (lambda _ (chdir "forester/java") #t))
12382 (add-after 'chdir 'fix-dependencies
12383 (lambda _
12384 (chmod "build.xml" #o664)
12385 (call-with-output-file "build.xml.new"
12386 (lambda (port)
12387 (sxml->xml
12388 (pre-post-order
12389 (with-input-from-file "build.xml"
12390 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12391 `(;; Remove all unjar tags to avoid repacking classes.
12392 (unjar . ,(lambda _ '()))
12393 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12394 (*text* . ,(lambda (_ txt) txt))))
12395 port)))
12396 (rename-file "build.xml.new" "build.xml")
12397 #t))
12398 ;; FIXME: itext is difficult to package as it depends on a few
12399 ;; unpackaged libraries.
12400 (add-after 'chdir 'remove-dependency-on-unpackaged-itext
12401 (lambda _
12402 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12403 (substitute* "src/org/forester/archaeopteryx/MainFrame.java"
12404 (("pdf_written_to = PdfExporter.*")
12405 "throw new IOException(\"PDF export is not available.\");"))
12406 #t))
12407 ;; There is no install target
12408 (replace 'install (install-jars ".")))))
12409 (propagated-inputs
12410 `(("java-commons-codec" ,java-commons-codec)
12411 ("java-openchart2" ,java-openchart2)))
12412 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12413 (synopsis "Phylogenomics libraries for Java")
12414 (description "Forester is a collection of Java libraries for
12415 phylogenomics and evolutionary biology research. It includes support for
12416 reading, writing, and exporting phylogenetic trees.")
12417 (license license:lgpl2.1+))))
12418
12419 (define-public java-forester-1.005
12420 (package
12421 (name "java-forester")
12422 (version "1.005")
12423 (source (origin
12424 (method url-fetch)
12425 (uri (string-append "https://repo1.maven.org/maven2/"
12426 "org/biojava/thirdparty/forester/"
12427 version "/forester-" version "-sources.jar"))
12428 (file-name (string-append name "-" version ".jar"))
12429 (sha256
12430 (base32
12431 "04r8qv4rk3p71z4ajrvp11py1z46qrx0047j3zzs79s6lnsm3lcv"))))
12432 (build-system ant-build-system)
12433 (arguments
12434 `(#:tests? #f ; there are none
12435 #:jdk ,icedtea-8
12436 #:modules ((guix build ant-build-system)
12437 (guix build utils)
12438 (guix build java-utils)
12439 (sxml simple)
12440 (sxml transform))
12441 #:phases
12442 (modify-phases %standard-phases
12443 (add-after 'unpack 'fix-dependencies
12444 (lambda* (#:key inputs #:allow-other-keys)
12445 (call-with-output-file "build.xml"
12446 (lambda (port)
12447 (sxml->xml
12448 (pre-post-order
12449 (with-input-from-file "src/build.xml"
12450 (lambda _ (xml->sxml #:trim-whitespace? #t)))
12451 `(;; Remove all unjar tags to avoid repacking classes.
12452 (unjar . ,(lambda _ '()))
12453 (*default* . ,(lambda (tag . kids) `(,tag ,@kids)))
12454 (*text* . ,(lambda (_ txt) txt))))
12455 port)))
12456 (copy-file (assoc-ref inputs "synth_look_and_feel_1.xml")
12457 "synth_look_and_feel_1.xml")
12458 (copy-file (assoc-ref inputs "phyloxml.xsd")
12459 "phyloxml.xsd")
12460 (substitute* "build.xml"
12461 (("../resources/synth_laf/synth_look_and_feel_1.xml")
12462 "synth_look_and_feel_1.xml")
12463 (("../resources/phyloxml_schema/1.10/phyloxml.xsd")
12464 "phyloxml.xsd"))
12465 #t))
12466 ;; FIXME: itext is difficult to package as it depends on a few
12467 ;; unpackaged libraries.
12468 (add-after 'unpack 'remove-dependency-on-unpackaged-itext
12469 (lambda _
12470 (delete-file "src/org/forester/archaeopteryx/PdfExporter.java")
12471 (substitute* '("src/org/forester/archaeopteryx/MainFrame.java"
12472 "src/org/forester/archaeopteryx/MainFrameApplication.java")
12473 (("pdf_written_to = PdfExporter.*")
12474 "throw new IOException(\"PDF export is not available.\"); /*")
12475 ((".getPrintSizeX\\(\\), getOptions\\(\\).getPrintSizeY\\(\\) \\);") "*/")
12476 (("getCurrentTreePanel\\(\\).getHeight\\(\\) \\);") "*/"))
12477 #t))
12478 (add-after 'unpack 'delete-pre-built-classes
12479 (lambda _ (delete-file-recursively "src/classes") #t))
12480 ;; There is no install target
12481 (replace 'install (install-jars ".")))))
12482 (propagated-inputs
12483 `(("java-commons-codec" ,java-commons-codec)
12484 ("java-openchart2" ,java-openchart2)))
12485 ;; The source archive does not contain the resources.
12486 (native-inputs
12487 `(("phyloxml.xsd"
12488 ,(origin
12489 (method url-fetch)
12490 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12491 "b61cc2dcede0bede317db362472333115756b8c6/"
12492 "forester/resources/phyloxml_schema/1.10/phyloxml.xsd"))
12493 (file-name (string-append name "-phyloxml-" version ".xsd"))
12494 (sha256
12495 (base32
12496 "1zxc4m8sn4n389nqdnpxa8d0k17qnr3pm2y5y6g6vh4k0zm52npv"))))
12497 ("synth_look_and_feel_1.xml"
12498 ,(origin
12499 (method url-fetch)
12500 (uri (string-append "https://raw.githubusercontent.com/cmzmasek/forester/"
12501 "29e04321615da6b35c1e15c60e52caf3f21d8e6a/"
12502 "forester/java/classes/resources/"
12503 "synth_look_and_feel_1.xml"))
12504 (file-name (string-append name "-synth-look-and-feel-" version ".xml"))
12505 (sha256
12506 (base32
12507 "1gv5602gv4k7y7713y75a4jvj7i9s7nildsbdl7n9q10sc2ikg8h"))))))
12508 (home-page "https://sites.google.com/site/cmzmasek/home/software/forester")
12509 (synopsis "Phylogenomics libraries for Java")
12510 (description "Forester is a collection of Java libraries for
12511 phylogenomics and evolutionary biology research. It includes support for
12512 reading, writing, and exporting phylogenetic trees.")
12513 (license license:lgpl2.1+)))
12514
12515 (define-public java-biojava-core
12516 (package
12517 (name "java-biojava-core")
12518 (version "4.2.11")
12519 (source (origin
12520 (method git-fetch)
12521 (uri (git-reference
12522 (url "https://github.com/biojava/biojava")
12523 (commit (string-append "biojava-" version))))
12524 (file-name (string-append name "-" version "-checkout"))
12525 (sha256
12526 (base32
12527 "1bvryh2bpsvash8ln79cmc9sqm8qw72hz4xzwqxcrjm8ssxszhqk"))))
12528 (build-system ant-build-system)
12529 (arguments
12530 `(#:jdk ,icedtea-8
12531 #:jar-name "biojava-core.jar"
12532 #:source-dir "biojava-core/src/main/java/"
12533 #:test-dir "biojava-core/src/test"
12534 ;; These tests seem to require internet access.
12535 #:test-exclude (list "**/SearchIOTest.java"
12536 "**/BlastXMLParserTest.java"
12537 "**/GenbankCookbookTest.java"
12538 "**/GenbankProxySequenceReaderTest.java")
12539 #:phases
12540 (modify-phases %standard-phases
12541 (add-before 'build 'copy-resources
12542 (lambda _
12543 (copy-recursively "biojava-core/src/main/resources"
12544 "build/classes")
12545 #t))
12546 (add-before 'check 'copy-test-resources
12547 (lambda _
12548 (copy-recursively "biojava-core/src/test/resources"
12549 "build/test-classes")
12550 #t)))))
12551 (propagated-inputs
12552 `(("java-log4j-api" ,java-log4j-api)
12553 ("java-log4j-core" ,java-log4j-core)
12554 ("java-slf4j-api" ,java-slf4j-api)
12555 ("java-slf4j-simple" ,java-slf4j-simple)))
12556 (native-inputs
12557 `(("java-junit" ,java-junit)
12558 ("java-hamcrest-core" ,java-hamcrest-core)))
12559 (home-page "https://biojava.org")
12560 (synopsis "Core libraries of Java framework for processing biological data")
12561 (description "BioJava is a project dedicated to providing a Java framework
12562 for processing biological data. It provides analytical and statistical
12563 routines, parsers for common file formats, reference implementations of
12564 popular algorithms, and allows the manipulation of sequences and 3D
12565 structures. The goal of the biojava project is to facilitate rapid
12566 application development for bioinformatics.
12567
12568 This package provides the core libraries.")
12569 (license license:lgpl2.1+)))
12570
12571 (define-public java-biojava-phylo
12572 (package (inherit java-biojava-core)
12573 (name "java-biojava-phylo")
12574 (build-system ant-build-system)
12575 (arguments
12576 `(#:jdk ,icedtea-8
12577 #:jar-name "biojava-phylo.jar"
12578 #:source-dir "biojava-phylo/src/main/java/"
12579 #:test-dir "biojava-phylo/src/test"
12580 #:phases
12581 (modify-phases %standard-phases
12582 (add-before 'build 'copy-resources
12583 (lambda _
12584 (copy-recursively "biojava-phylo/src/main/resources"
12585 "build/classes")
12586 #t))
12587 (add-before 'check 'copy-test-resources
12588 (lambda _
12589 (copy-recursively "biojava-phylo/src/test/resources"
12590 "build/test-classes")
12591 #t)))))
12592 (propagated-inputs
12593 `(("java-log4j-api" ,java-log4j-api)
12594 ("java-log4j-core" ,java-log4j-core)
12595 ("java-slf4j-api" ,java-slf4j-api)
12596 ("java-slf4j-simple" ,java-slf4j-simple)
12597 ("java-biojava-core" ,java-biojava-core)
12598 ("java-forester" ,java-forester)))
12599 (native-inputs
12600 `(("java-junit" ,java-junit)
12601 ("java-hamcrest-core" ,java-hamcrest-core)))
12602 (home-page "https://biojava.org")
12603 (synopsis "Biojava interface to the forester phylogenomics library")
12604 (description "The phylo module provides a biojava interface layer to the
12605 forester phylogenomics library for constructing phylogenetic trees.")))
12606
12607 (define-public java-biojava-alignment
12608 (package (inherit java-biojava-core)
12609 (name "java-biojava-alignment")
12610 (build-system ant-build-system)
12611 (arguments
12612 `(#:jdk ,icedtea-8
12613 #:jar-name "biojava-alignment.jar"
12614 #:source-dir "biojava-alignment/src/main/java/"
12615 #:test-dir "biojava-alignment/src/test"
12616 #:phases
12617 (modify-phases %standard-phases
12618 (add-before 'build 'copy-resources
12619 (lambda _
12620 (copy-recursively "biojava-alignment/src/main/resources"
12621 "build/classes")
12622 #t))
12623 (add-before 'check 'copy-test-resources
12624 (lambda _
12625 (copy-recursively "biojava-alignment/src/test/resources"
12626 "build/test-classes")
12627 #t)))))
12628 (propagated-inputs
12629 `(("java-log4j-api" ,java-log4j-api)
12630 ("java-log4j-core" ,java-log4j-core)
12631 ("java-slf4j-api" ,java-slf4j-api)
12632 ("java-slf4j-simple" ,java-slf4j-simple)
12633 ("java-biojava-core" ,java-biojava-core)
12634 ("java-biojava-phylo" ,java-biojava-phylo)
12635 ("java-forester" ,java-forester)))
12636 (native-inputs
12637 `(("java-junit" ,java-junit)
12638 ("java-hamcrest-core" ,java-hamcrest-core)))
12639 (home-page "https://biojava.org")
12640 (synopsis "Biojava API for genetic sequence alignment")
12641 (description "The alignment module of BioJava provides an API that
12642 contains
12643
12644 @itemize
12645 @item implementations of dynamic programming algorithms for sequence
12646 alignment;
12647 @item reading and writing of popular alignment file formats;
12648 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12649 @end itemize\n")))
12650
12651 (define-public java-biojava-core-4.0
12652 (package (inherit java-biojava-core)
12653 (name "java-biojava-core")
12654 (version "4.0.0")
12655 (source (origin
12656 (method git-fetch)
12657 (uri (git-reference
12658 (url "https://github.com/biojava/biojava")
12659 (commit (string-append "biojava-" version))))
12660 (file-name (string-append name "-" version "-checkout"))
12661 (sha256
12662 (base32
12663 "13675f6y9aqi7bi2lk3s1z7a22ynccjiqwa8izh7p97xi9wsfmd8"))))))
12664
12665 (define-public java-biojava-phylo-4.0
12666 (package (inherit java-biojava-core-4.0)
12667 (name "java-biojava-phylo")
12668 (build-system ant-build-system)
12669 (arguments
12670 `(#:jdk ,icedtea-8
12671 #:jar-name "biojava-phylo.jar"
12672 #:source-dir "biojava-phylo/src/main/java/"
12673 #:test-dir "biojava-phylo/src/test"
12674 #:phases
12675 (modify-phases %standard-phases
12676 (add-before 'build 'copy-resources
12677 (lambda _
12678 (copy-recursively "biojava-phylo/src/main/resources"
12679 "build/classes")
12680 #t))
12681 (add-before 'check 'copy-test-resources
12682 (lambda _
12683 (copy-recursively "biojava-phylo/src/test/resources"
12684 "build/test-classes")
12685 #t)))))
12686 (propagated-inputs
12687 `(("java-log4j-api" ,java-log4j-api)
12688 ("java-log4j-core" ,java-log4j-core)
12689 ("java-slf4j-api" ,java-slf4j-api)
12690 ("java-slf4j-simple" ,java-slf4j-simple)
12691 ("java-biojava-core" ,java-biojava-core-4.0)
12692 ("java-forester" ,java-forester-1.005)))
12693 (native-inputs
12694 `(("java-junit" ,java-junit)
12695 ("java-hamcrest-core" ,java-hamcrest-core)))
12696 (home-page "https://biojava.org")
12697 (synopsis "Biojava interface to the forester phylogenomics library")
12698 (description "The phylo module provides a biojava interface layer to the
12699 forester phylogenomics library for constructing phylogenetic trees.")))
12700
12701 (define-public java-biojava-alignment-4.0
12702 (package (inherit java-biojava-core-4.0)
12703 (name "java-biojava-alignment")
12704 (build-system ant-build-system)
12705 (arguments
12706 `(#:jdk ,icedtea-8
12707 #:jar-name "biojava-alignment.jar"
12708 #:source-dir "biojava-alignment/src/main/java/"
12709 #:test-dir "biojava-alignment/src/test"
12710 #:phases
12711 (modify-phases %standard-phases
12712 (add-before 'build 'copy-resources
12713 (lambda _
12714 (copy-recursively "biojava-alignment/src/main/resources"
12715 "build/classes")
12716 #t))
12717 (add-before 'check 'copy-test-resources
12718 (lambda _
12719 (copy-recursively "biojava-alignment/src/test/resources"
12720 "build/test-classes")
12721 #t)))))
12722 (propagated-inputs
12723 `(("java-log4j-api" ,java-log4j-api)
12724 ("java-log4j-core" ,java-log4j-core)
12725 ("java-slf4j-api" ,java-slf4j-api)
12726 ("java-slf4j-simple" ,java-slf4j-simple)
12727 ("java-biojava-core" ,java-biojava-core-4.0)
12728 ("java-biojava-phylo" ,java-biojava-phylo-4.0)
12729 ("java-forester" ,java-forester-1.005)))
12730 (native-inputs
12731 `(("java-junit" ,java-junit)
12732 ("java-hamcrest-core" ,java-hamcrest-core)))
12733 (home-page "https://biojava.org")
12734 (synopsis "Biojava API for genetic sequence alignment")
12735 (description "The alignment module of BioJava provides an API that
12736 contains
12737
12738 @itemize
12739 @item implementations of dynamic programming algorithms for sequence
12740 alignment;
12741 @item reading and writing of popular alignment file formats;
12742 @item a single-, or multi- threaded multiple sequence alignment algorithm.
12743 @end itemize\n")))
12744
12745 (define-public dropseq-tools
12746 (package
12747 (name "dropseq-tools")
12748 (version "1.13")
12749 (source
12750 (origin
12751 (method url-fetch)
12752 (uri "http://mccarrolllab.com/download/1276/")
12753 (file-name (string-append "dropseq-tools-" version ".zip"))
12754 (sha256
12755 (base32
12756 "0yrffckxqk5l8b5xb6z4laq157zd9mdypr2p4b4vq2bhjzi1sj0s"))
12757 ;; Delete bundled libraries
12758 (modules '((guix build utils)))
12759 (snippet
12760 '(begin
12761 (for-each delete-file (find-files "jar/lib" "\\.jar$"))
12762 (delete-file-recursively "3rdParty")
12763 #t))))
12764 (build-system ant-build-system)
12765 (arguments
12766 `(#:tests? #f ; test data are not included
12767 #:test-target "test"
12768 #:build-target "all"
12769 #:source-dir "public/src/"
12770 #:jdk ,icedtea-8
12771 #:make-flags
12772 (list (string-append "-Dpicard.executable.dir="
12773 (assoc-ref %build-inputs "java-picard")
12774 "/share/java/"))
12775 #:modules ((ice-9 match)
12776 (srfi srfi-1)
12777 (guix build utils)
12778 (guix build java-utils)
12779 (guix build ant-build-system))
12780 #:phases
12781 (modify-phases %standard-phases
12782 ;; FIXME: fails with "java.io.FileNotFoundException:
12783 ;; /gnu/store/…-dropseq-tools-1.13/share/java/lib/biojava-alignment.jar"
12784 (delete 'generate-jar-indices)
12785 ;; All dependencies must be linked to "lib", because that's where
12786 ;; they will be searched for when the Class-Path property of the
12787 ;; manifest is computed.
12788 (add-after 'unpack 'record-references
12789 (lambda* (#:key inputs #:allow-other-keys)
12790 (mkdir-p "jar/lib")
12791 (let ((dirs (filter-map (match-lambda
12792 ((name . dir)
12793 (if (and (string-prefix? "java-" name)
12794 (not (string=? name "java-testng")))
12795 dir #f)))
12796 inputs)))
12797 (for-each (lambda (jar)
12798 (symlink jar (string-append "jar/lib/" (basename jar))))
12799 (append-map (lambda (dir) (find-files dir "\\.jar$"))
12800 dirs)))
12801 #t))
12802 ;; There is no installation target
12803 (replace 'install
12804 (lambda* (#:key inputs outputs #:allow-other-keys)
12805 (let* ((out (assoc-ref outputs "out"))
12806 (bin (string-append out "/bin"))
12807 (share (string-append out "/share/java/"))
12808 (lib (string-append share "/lib/"))
12809 (scripts (list "BAMTagHistogram"
12810 "BAMTagofTagCounts"
12811 "BaseDistributionAtReadPosition"
12812 "CollapseBarcodesInPlace"
12813 "CollapseTagWithContext"
12814 "ConvertToRefFlat"
12815 "CreateIntervalsFiles"
12816 "DetectBeadSynthesisErrors"
12817 "DigitalExpression"
12818 "Drop-seq_alignment.sh"
12819 "FilterBAM"
12820 "FilterBAMByTag"
12821 "GatherGeneGCLength"
12822 "GatherMolecularBarcodeDistributionByGene"
12823 "GatherReadQualityMetrics"
12824 "PolyATrimmer"
12825 "ReduceGTF"
12826 "SelectCellsByNumTranscripts"
12827 "SingleCellRnaSeqMetricsCollector"
12828 "TagBamWithReadSequenceExtended"
12829 "TagReadWithGeneExon"
12830 "TagReadWithInterval"
12831 "TrimStartingSequence"
12832 "ValidateReference")))
12833 (for-each mkdir-p (list bin share lib))
12834 (install-file "dist/dropseq.jar" share)
12835 (for-each (lambda (script)
12836 (chmod script #o555)
12837 (install-file script bin))
12838 scripts)
12839 (substitute* (map (lambda (script)
12840 (string-append bin "/" script))
12841 scripts)
12842 (("^java") (which "java"))
12843 (("jar_deploy_dir=.*")
12844 (string-append "jar_deploy_dir=" share "\n"))))
12845 #t))
12846 ;; FIXME: We do this after stripping jars because we don't want it to
12847 ;; copy all these jars and strip them. We only want to install
12848 ;; links. Arguably, this is a problem with the ant-build-system.
12849 (add-after 'strip-jar-timestamps 'install-links
12850 (lambda* (#:key outputs #:allow-other-keys)
12851 (let* ((out (assoc-ref outputs "out"))
12852 (share (string-append out "/share/java/"))
12853 (lib (string-append share "/lib/")))
12854 (for-each (lambda (jar)
12855 (symlink (readlink jar)
12856 (string-append lib (basename jar))))
12857 (find-files "jar/lib" "\\.jar$")))
12858 #t)))))
12859 (inputs
12860 `(("jdk" ,icedtea-8)
12861 ("java-picard" ,java-picard-2.10.3)
12862 ("java-log4j-1.2-api" ,java-log4j-1.2-api)
12863 ("java-commons-math3" ,java-commons-math3)
12864 ("java-commons-jexl2" ,java-commons-jexl-2)
12865 ("java-commons-collections4" ,java-commons-collections4)
12866 ("java-commons-lang2" ,java-commons-lang)
12867 ("java-commons-io" ,java-commons-io)
12868 ("java-snappy-1.0.3-rc3" ,java-snappy-1)
12869 ("java-guava" ,java-guava)
12870 ("java-la4j" ,java-la4j)
12871 ("java-biojava-core" ,java-biojava-core-4.0)
12872 ("java-biojava-alignment" ,java-biojava-alignment-4.0)
12873 ("java-jdistlib" ,java-jdistlib)
12874 ("java-simple-xml" ,java-simple-xml)
12875 ("java-snakeyaml" ,java-snakeyaml)))
12876 (native-inputs
12877 `(("unzip" ,unzip)
12878 ("java-testng" ,java-testng)))
12879 (home-page "http://mccarrolllab.com/dropseq/")
12880 (synopsis "Tools for Drop-seq analyses")
12881 (description "Drop-seq is a technology to enable biologists to
12882 analyze RNA expression genome-wide in thousands of individual cells at
12883 once. This package provides tools to perform Drop-seq analyses.")
12884 (license license:expat)))
12885
12886 (define-public pigx-rnaseq
12887 (package
12888 (name "pigx-rnaseq")
12889 (version "0.0.10")
12890 (source (origin
12891 (method url-fetch)
12892 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_rnaseq/"
12893 "releases/download/v" version
12894 "/pigx_rnaseq-" version ".tar.gz"))
12895 (sha256
12896 (base32
12897 "0z3hr120wk2vrlmlpz1vp3n9wy3rq4y2mnzh2vf08qgqn2xfdwcw"))))
12898 (build-system gnu-build-system)
12899 (arguments
12900 `(#:parallel-tests? #f ; not supported
12901 #:phases
12902 (modify-phases %standard-phases
12903 ;; "test.sh" runs STAR, which requires excessive amounts of memory.
12904 (add-after 'unpack 'disable-resource-intensive-test
12905 (lambda _
12906 (substitute* "Makefile.in"
12907 (("(^ tests/test_trim_galore/test.sh).*" _ m) m)
12908 (("^ tests/test_multiqc/test.sh") "")
12909 (("^ test.sh") ""))
12910 #t)))))
12911 (inputs
12912 `(("coreutils" ,coreutils)
12913 ("sed" ,sed)
12914 ("gzip" ,gzip)
12915 ("snakemake" ,snakemake)
12916 ("fastqc" ,fastqc)
12917 ("multiqc" ,multiqc)
12918 ("star" ,star)
12919 ("trim-galore" ,trim-galore)
12920 ("htseq" ,htseq)
12921 ("samtools" ,samtools)
12922 ("r-minimal" ,r-minimal)
12923 ("r-rmarkdown" ,r-rmarkdown)
12924 ("r-ggplot2" ,r-ggplot2)
12925 ("r-ggrepel" ,r-ggrepel)
12926 ("r-gprofiler" ,r-gprofiler)
12927 ("r-deseq2" ,r-deseq2)
12928 ("r-dt" ,r-dt)
12929 ("r-knitr" ,r-knitr)
12930 ("r-pheatmap" ,r-pheatmap)
12931 ("r-corrplot" ,r-corrplot)
12932 ("r-reshape2" ,r-reshape2)
12933 ("r-plotly" ,r-plotly)
12934 ("r-scales" ,r-scales)
12935 ("r-summarizedexperiment" ,r-summarizedexperiment)
12936 ("r-crosstalk" ,r-crosstalk)
12937 ("r-tximport" ,r-tximport)
12938 ("r-rtracklayer" ,r-rtracklayer)
12939 ("r-rjson" ,r-rjson)
12940 ("salmon" ,salmon)
12941 ("ghc-pandoc" ,ghc-pandoc)
12942 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
12943 ("python-wrapper" ,python-wrapper)
12944 ("python-pyyaml" ,python-pyyaml)))
12945 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
12946 (synopsis "Analysis pipeline for RNA sequencing experiments")
12947 (description "PiGX RNAseq is an analysis pipeline for preprocessing and
12948 reporting for RNA sequencing experiments. It is easy to use and produces high
12949 quality reports. The inputs are reads files from the sequencing experiment,
12950 and a configuration file which describes the experiment. In addition to
12951 quality control of the experiment, the pipeline produces a differential
12952 expression report comparing samples in an easily configurable manner.")
12953 (license license:gpl3+)))
12954
12955 (define-public pigx-chipseq
12956 (package
12957 (name "pigx-chipseq")
12958 (version "0.0.42")
12959 (source (origin
12960 (method url-fetch)
12961 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_chipseq/"
12962 "releases/download/v" version
12963 "/pigx_chipseq-" version ".tar.gz"))
12964 (sha256
12965 (base32
12966 "0xbvgqpk32a8iczhvac56cacr46rdkqb0allhhpvmj940idf72bi"))))
12967 (build-system gnu-build-system)
12968 ;; parts of the tests rely on access to the network
12969 (arguments '(#:tests? #f))
12970 (inputs
12971 `(("grep" ,grep)
12972 ("coreutils" ,coreutils)
12973 ("r-minimal" ,r-minimal)
12974 ("r-argparser" ,r-argparser)
12975 ("r-biocparallel" ,r-biocparallel)
12976 ("r-biostrings" ,r-biostrings)
12977 ("r-chipseq" ,r-chipseq)
12978 ("r-data-table" ,r-data-table)
12979 ("r-dplyr" ,r-dplyr)
12980 ("r-genomation" ,r-genomation)
12981 ("r-genomicalignments" ,r-genomicalignments)
12982 ("r-genomicranges" ,r-genomicranges)
12983 ("r-rsamtools" ,r-rsamtools)
12984 ("r-rtracklayer" ,r-rtracklayer)
12985 ("r-s4vectors" ,r-s4vectors)
12986 ("r-stringr" ,r-stringr)
12987 ("r-tibble" ,r-tibble)
12988 ("r-tidyr" ,r-tidyr)
12989 ("r-jsonlite" ,r-jsonlite)
12990 ("r-heatmaply" ,r-heatmaply)
12991 ("r-htmlwidgets" ,r-htmlwidgets)
12992 ("r-ggplot2" ,r-ggplot2)
12993 ("r-plotly" ,r-plotly)
12994 ("r-rmarkdown" ,r-rmarkdown)
12995 ("python-wrapper" ,python-wrapper)
12996 ("python-pyyaml" ,python-pyyaml)
12997 ("python-magic" ,python-magic)
12998 ("python-xlrd" ,python-xlrd)
12999 ("trim-galore" ,trim-galore)
13000 ("macs" ,macs)
13001 ("multiqc" ,multiqc)
13002 ("perl" ,perl)
13003 ("ghc-pandoc" ,ghc-pandoc)
13004 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
13005 ("fastqc" ,fastqc)
13006 ("bowtie" ,bowtie)
13007 ("idr" ,idr)
13008 ("snakemake" ,snakemake)
13009 ("samtools" ,samtools)
13010 ("bedtools" ,bedtools)
13011 ("kentutils" ,kentutils)))
13012 (native-inputs
13013 `(("python-pytest" ,python-pytest)))
13014 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
13015 (synopsis "Analysis pipeline for ChIP sequencing experiments")
13016 (description "PiGX ChIPseq is an analysis pipeline for preprocessing, peak
13017 calling and reporting for ChIP sequencing experiments. It is easy to use and
13018 produces high quality reports. The inputs are reads files from the sequencing
13019 experiment, and a configuration file which describes the experiment. In
13020 addition to quality control of the experiment, the pipeline enables to set up
13021 multiple peak calling analysis and allows the generation of a UCSC track hub
13022 in an easily configurable manner.")
13023 (license license:gpl3+)))
13024
13025 (define-public pigx-bsseq
13026 (package
13027 (name "pigx-bsseq")
13028 (version "0.0.10")
13029 (source (origin
13030 (method url-fetch)
13031 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_bsseq/"
13032 "releases/download/v" version
13033 "/pigx_bsseq-" version ".tar.gz"))
13034 (sha256
13035 (base32
13036 "0l97wvkq4diq8lcarraj33bby1zzf0w804jwi8mlc5qddp8idwhy"))))
13037 (build-system gnu-build-system)
13038 (arguments
13039 `(#:phases
13040 (modify-phases %standard-phases
13041 (add-before 'check 'set-timezone
13042 ;; The readr package is picky about timezones.
13043 (lambda* (#:key inputs #:allow-other-keys)
13044 (setenv "TZ" "UTC+1")
13045 (setenv "TZDIR"
13046 (string-append (assoc-ref inputs "tzdata")
13047 "/share/zoneinfo"))
13048 #t)))))
13049 (native-inputs
13050 `(("tzdata" ,tzdata)))
13051 (inputs
13052 `(("coreutils" ,coreutils)
13053 ("sed" ,sed)
13054 ("grep" ,grep)
13055 ("r-minimal" ,r-minimal)
13056 ("r-annotationhub" ,r-annotationhub)
13057 ("r-dt" ,r-dt)
13058 ("r-genomation" ,r-genomation)
13059 ("r-methylkit" ,r-methylkit)
13060 ("r-rtracklayer" ,r-rtracklayer)
13061 ("r-rmarkdown" ,r-rmarkdown)
13062 ("r-bookdown" ,r-bookdown)
13063 ("r-ggplot2" ,r-ggplot2)
13064 ("r-ggbio" ,r-ggbio)
13065 ("ghc-pandoc" ,ghc-pandoc)
13066 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
13067 ("python-wrapper" ,python-wrapper)
13068 ("python-pyyaml" ,python-pyyaml)
13069 ("snakemake" ,snakemake)
13070 ("bismark" ,bismark)
13071 ("fastqc" ,fastqc)
13072 ("bowtie" ,bowtie)
13073 ("trim-galore" ,trim-galore)
13074 ("cutadapt" ,cutadapt)
13075 ("samtools" ,samtools)))
13076 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
13077 (synopsis "Bisulfite sequencing pipeline from fastq to methylation reports")
13078 (description "PiGx BSseq is a data processing pipeline for raw fastq read
13079 data of bisulfite experiments; it produces reports on aggregate methylation
13080 and coverage and can be used to produce information on differential
13081 methylation and segmentation.")
13082 (license license:gpl3+)))
13083
13084 (define-public pigx-scrnaseq
13085 (package
13086 (name "pigx-scrnaseq")
13087 (version "1.1.4")
13088 (source (origin
13089 (method url-fetch)
13090 (uri (string-append "https://github.com/BIMSBbioinfo/pigx_scrnaseq/"
13091 "releases/download/v" version
13092 "/pigx_scrnaseq-" version ".tar.gz"))
13093 (sha256
13094 (base32
13095 "1d5l3gywypi67yz9advxq5xkgfhr4733gj0bwnngm723i3hdf5w9"))))
13096 (build-system gnu-build-system)
13097 (inputs
13098 `(("coreutils" ,coreutils)
13099 ("perl" ,perl)
13100 ("fastqc" ,fastqc)
13101 ("flexbar" ,flexbar)
13102 ("java" ,icedtea-8)
13103 ("jellyfish" ,jellyfish)
13104 ("python-wrapper" ,python-wrapper)
13105 ("python-pyyaml" ,python-pyyaml)
13106 ("python-pandas" ,python-pandas)
13107 ("python-magic" ,python-magic)
13108 ("python-numpy" ,python-numpy)
13109 ("python-loompy" ,python-loompy)
13110 ("ghc-pandoc" ,ghc-pandoc)
13111 ("ghc-pandoc-citeproc" ,ghc-pandoc-citeproc)
13112 ("samtools" ,samtools)
13113 ("snakemake" ,snakemake)
13114 ("star" ,star)
13115 ("r-minimal" ,r-minimal)
13116 ("r-argparser" ,r-argparser)
13117 ("r-cowplot" ,r-cowplot)
13118 ("r-data-table" ,r-data-table)
13119 ("r-delayedarray" ,r-delayedarray)
13120 ("r-delayedmatrixstats" ,r-delayedmatrixstats)
13121 ("r-dplyr" ,r-dplyr)
13122 ("r-dropbead" ,r-dropbead)
13123 ("r-dt" ,r-dt)
13124 ("r-genomicalignments" ,r-genomicalignments)
13125 ("r-genomicfiles" ,r-genomicfiles)
13126 ("r-genomicranges" ,r-genomicranges)
13127 ("r-ggplot2" ,r-ggplot2)
13128 ("r-hdf5array" ,r-hdf5array)
13129 ("r-pheatmap" ,r-pheatmap)
13130 ("r-rmarkdown" ,r-rmarkdown)
13131 ("r-rsamtools" ,r-rsamtools)
13132 ("r-rtracklayer" ,r-rtracklayer)
13133 ("r-rtsne" ,r-rtsne)
13134 ("r-scater" ,r-scater)
13135 ("r-scran" ,r-scran)
13136 ("r-seurat" ,r-seurat)
13137 ("r-singlecellexperiment" ,r-singlecellexperiment)
13138 ("r-stringr" ,r-stringr)
13139 ("r-yaml" ,r-yaml)))
13140 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
13141 (synopsis "Analysis pipeline for single-cell RNA sequencing experiments")
13142 (description "PiGX scRNAseq is an analysis pipeline for preprocessing and
13143 quality control for single cell RNA sequencing experiments. The inputs are
13144 read files from the sequencing experiment, and a configuration file which
13145 describes the experiment. It produces processed files for downstream analysis
13146 and interactive quality reports. The pipeline is designed to work with UMI
13147 based methods.")
13148 (license license:gpl3+)))
13149
13150 (define-public pigx
13151 (package
13152 (name "pigx")
13153 (version "0.0.3")
13154 (source (origin
13155 (method url-fetch)
13156 (uri (string-append "https://github.com/BIMSBbioinfo/pigx/"
13157 "releases/download/v" version
13158 "/pigx-" version ".tar.gz"))
13159 (sha256
13160 (base32
13161 "1i5njdy1clj5ncw45d16p7mwmqvb1ilikl9n797pxklc3f4s7mq7"))))
13162 (build-system gnu-build-system)
13163 (inputs
13164 `(("python" ,python)
13165 ("pigx-bsseq" ,pigx-bsseq)
13166 ("pigx-chipseq" ,pigx-chipseq)
13167 ("pigx-rnaseq" ,pigx-rnaseq)
13168 ("pigx-scrnaseq" ,pigx-scrnaseq)))
13169 (home-page "https://bioinformatics.mdc-berlin.de/pigx/")
13170 (synopsis "Analysis pipelines for genomics")
13171 (description "PiGx is a collection of genomics pipelines. It includes the
13172 following pipelines:
13173
13174 @itemize
13175 @item PiGx BSseq for raw fastq read data of bisulfite experiments
13176 @item PiGx RNAseq for RNAseq samples
13177 @item PiGx scRNAseq for single cell dropseq analysis
13178 @item PiGx ChIPseq for reads from ChIPseq experiments
13179 @end itemize
13180
13181 All pipelines are easily configured with a simple sample sheet and a
13182 descriptive settings file. The result is a set of comprehensive, interactive
13183 HTML reports with interesting findings about your samples.")
13184 (license license:gpl3+)))
13185
13186 (define-public genrich
13187 (package
13188 (name "genrich")
13189 (version "0.5")
13190 (source (origin
13191 (method git-fetch)
13192 (uri (git-reference
13193 (url "https://github.com/jsh58/Genrich")
13194 (commit (string-append "v" version))))
13195 (file-name (git-file-name name version))
13196 (sha256
13197 (base32
13198 "0x0q6z0208n3cxzqjla4rgjqpyqgwpmz27852lcvzkzaigymq4zp"))))
13199 (build-system gnu-build-system)
13200 (arguments
13201 `(#:tests? #f ; there are none
13202 #:phases
13203 (modify-phases %standard-phases
13204 (delete 'configure)
13205 (replace 'install
13206 (lambda* (#:key outputs #:allow-other-keys)
13207 (install-file "Genrich" (string-append (assoc-ref outputs "out") "/bin"))
13208 #t)))))
13209 (inputs
13210 `(("zlib" ,zlib)))
13211 (home-page "https://github.com/jsh58/Genrich")
13212 (synopsis "Detecting sites of genomic enrichment")
13213 (description "Genrich is a peak-caller for genomic enrichment
13214 assays (e.g. ChIP-seq, ATAC-seq). It analyzes alignment files generated
13215 following the assay and produces a file detailing peaks of significant
13216 enrichment.")
13217 (license license:expat)))
13218
13219 (define-public mantis
13220 (let ((commit "4ffd171632c2cb0056a86d709dfd2bf21bc69b84")
13221 (revision "1"))
13222 (package
13223 (name "mantis")
13224 (version (git-version "0" revision commit))
13225 (source (origin
13226 (method git-fetch)
13227 (uri (git-reference
13228 (url "https://github.com/splatlab/mantis")
13229 (commit commit)))
13230 (file-name (git-file-name name version))
13231 (sha256
13232 (base32
13233 "0iqbr0dhmlc8mzpirmm2s4pkzkwdgrcx50yx6cv3wlr2qi064p55"))))
13234 (build-system cmake-build-system)
13235 (arguments '(#:tests? #f)) ; there are none
13236 (inputs
13237 `(("sdsl-lite" ,sdsl-lite)
13238 ("openssl" ,openssl)
13239 ("zlib" ,zlib)))
13240 (home-page "https://github.com/splatlab/mantis")
13241 (synopsis "Large-scale sequence-search index data structure")
13242 (description "Mantis is a space-efficient data structure that can be
13243 used to index thousands of raw-read genomics experiments and facilitate
13244 large-scale sequence searches on those experiments. Mantis uses counting
13245 quotient filters instead of Bloom filters, enabling rapid index builds and
13246 queries, small indexes, and exact results, i.e., no false positives or
13247 negatives. Furthermore, Mantis is also a colored de Bruijn graph
13248 representation, so it supports fast graph traversal and other topological
13249 analyses in addition to large-scale sequence-level searches.")
13250 ;; uses __uint128_t and inline assembly
13251 (supported-systems '("x86_64-linux"))
13252 (license license:bsd-3))))
13253
13254 (define-public r-diversitree
13255 (package
13256 (name "r-diversitree")
13257 (version "0.9-13")
13258 (source
13259 (origin
13260 (method url-fetch)
13261 (uri (cran-uri "diversitree" version))
13262 (sha256
13263 (base32
13264 "00vi4klywi35hd170ksjv3xja3hqqbkcidcnrrlpgv4179k0azix"))))
13265 (build-system r-build-system)
13266 (native-inputs
13267 `(("gfortran" ,gfortran)))
13268 (inputs `(("fftw" ,fftw) ("gsl" ,gsl)))
13269 (propagated-inputs
13270 `(("r-ape" ,r-ape)
13271 ("r-desolve" ,r-desolve)
13272 ("r-rcpp" ,r-rcpp)
13273 ("r-subplex" ,r-subplex)))
13274 (home-page "https://www.zoology.ubc.ca/prog/diversitree")
13275 (synopsis "Comparative 'phylogenetic' analyses of diversification")
13276 (description "This package contains a number of comparative \"phylogenetic\"
13277 methods, mostly focusing on analysing diversification and character evolution.
13278 Contains implementations of \"BiSSE\" (Binary State Speciation and Extinction)
13279 and its unresolved tree extensions, \"MuSSE\" (Multiple State Speciation and
13280 Extinction), \"QuaSSE\", \"GeoSSE\", and \"BiSSE-ness\" Other included methods
13281 include Markov models of discrete and continuous trait evolution and constant
13282 rate speciation and extinction.")
13283 (license license:gpl2+)))
13284
13285 (define-public sjcount
13286 ;; There is no tag for version 3.2, nor is there a release archive.
13287 (let ((commit "292d3917cadb3f6834c81e509c30e61cd7ead6e5")
13288 (revision "1"))
13289 (package
13290 (name "sjcount")
13291 (version (git-version "3.2" revision commit))
13292 (source (origin
13293 (method git-fetch)
13294 (uri (git-reference
13295 (url "https://github.com/pervouchine/sjcount-full")
13296 (commit commit)))
13297 (file-name (string-append name "-" version "-checkout"))
13298 (sha256
13299 (base32
13300 "0gdgj35j249f04rqgq8ymcc1xg1vi9kzbajnjqpaq2wpbh8bl234"))))
13301 (build-system gnu-build-system)
13302 (arguments
13303 `(#:tests? #f ; requires a 1.4G test file
13304 #:make-flags
13305 (list (string-append "SAMTOOLS_DIR="
13306 (assoc-ref %build-inputs "samtools")
13307 "/lib/"))
13308 #:phases
13309 (modify-phases %standard-phases
13310 (replace 'configure
13311 (lambda* (#:key inputs #:allow-other-keys)
13312 (substitute* "makefile"
13313 (("-I \\$\\{SAMTOOLS_DIR\\}")
13314 (string-append "-I" (assoc-ref inputs "samtools")
13315 "/include/samtools"))
13316 (("-lz ") "-lz -lpthread "))
13317 #t))
13318 (replace 'install
13319 (lambda* (#:key outputs #:allow-other-keys)
13320 (for-each (lambda (tool)
13321 (install-file tool
13322 (string-append (assoc-ref outputs "out")
13323 "/bin")))
13324 '("j_count" "b_count" "sjcount"))
13325 #t)))))
13326 (inputs
13327 `(("samtools" ,samtools-0.1)
13328 ("zlib" ,zlib)))
13329 (home-page "https://github.com/pervouchine/sjcount-full/")
13330 (synopsis "Annotation-agnostic splice junction counting pipeline")
13331 (description "Sjcount is a utility for fast quantification of splice
13332 junctions in RNA-seq data. It is annotation-agnostic and offset-aware. This
13333 version does count multisplits.")
13334 (license license:gpl3+))))
13335
13336 (define-public minimap2
13337 (package
13338 (name "minimap2")
13339 (version "2.17")
13340 (source
13341 (origin
13342 (method url-fetch)
13343 (uri (string-append "https://github.com/lh3/minimap2/"
13344 "releases/download/v" version "/"
13345 "minimap2-" version ".tar.bz2"))
13346 (sha256
13347 (base32
13348 "0hi7i9pzxhvjj44khzzzj1lrn5gb5837arr4wgln7k1k5n4ci2mn"))))
13349 (build-system gnu-build-system)
13350 (arguments
13351 `(#:tests? #f ; there are none
13352 #:make-flags
13353 (list "CC=gcc"
13354 (let ((system ,(or (%current-target-system)
13355 (%current-system))))
13356 (cond
13357 ((string-prefix? "x86_64" system)
13358 "all")
13359 ((or (string-prefix? "armhf" system)
13360 (string-prefix? "aarch64" system))
13361 "arm_neon=1")
13362 (else "sse2only=1"))))
13363 #:phases
13364 (modify-phases %standard-phases
13365 (delete 'configure)
13366 (replace 'install
13367 (lambda* (#:key outputs #:allow-other-keys)
13368 (let* ((out (assoc-ref outputs "out"))
13369 (bin (string-append out "/bin"))
13370 (man (string-append out "/share/man/man1")))
13371 (install-file "minimap2" bin)
13372 (mkdir-p man)
13373 (install-file "minimap2.1" man))
13374 #t)))))
13375 (inputs
13376 `(("zlib" ,zlib)))
13377 (home-page "https://lh3.github.io/minimap2/")
13378 (synopsis "Pairwise aligner for genomic and spliced nucleotide sequences")
13379 (description "Minimap2 is a versatile sequence alignment program that
13380 aligns DNA or mRNA sequences against a large reference database. Typical use
13381 cases include:
13382
13383 @enumerate
13384 @item mapping PacBio or Oxford Nanopore genomic reads to the human genome;
13385 @item finding overlaps between long reads with error rate up to ~15%;
13386 @item splice-aware alignment of PacBio Iso-Seq or Nanopore cDNA or Direct RNA
13387 reads against a reference genome;
13388 @item aligning Illumina single- or paired-end reads;
13389 @item assembly-to-assembly alignment;
13390 @item full-genome alignment between two closely related species with
13391 divergence below ~15%.
13392 @end enumerate\n")
13393 (license license:expat)))
13394
13395 (define-public miniasm
13396 (package
13397 (name "miniasm")
13398 (version "0.3")
13399 (source (origin
13400 (method git-fetch)
13401 (uri (git-reference
13402 (url "https://github.com/lh3/miniasm")
13403 (commit (string-append "v" version))))
13404 (file-name (git-file-name name version))
13405 (sha256
13406 (base32
13407 "04dv5wv8bhsw1imxwyd438bnn9kby7svp44nbcz8lsadzjjci5gs"))))
13408 (build-system gnu-build-system)
13409 (inputs
13410 `(("zlib" ,zlib)))
13411 (arguments
13412 `(#:tests? #f ; There are no tests.
13413 #:phases
13414 (modify-phases %standard-phases
13415 (delete 'configure)
13416 (replace 'install
13417 (lambda* (#:key inputs outputs #:allow-other-keys)
13418 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13419 (install-file "miniasm" bin)
13420 (install-file "minidot" bin)
13421 #t))))))
13422 (home-page "https://github.com/lh3/miniasm")
13423 (synopsis "Ultrafast de novo assembly for long noisy reads")
13424 (description "Miniasm is a very fast OLC-based de novo assembler for noisy
13425 long reads. It takes all-vs-all read self-mappings (typically by minimap) as
13426 input and outputs an assembly graph in the GFA format. Different from
13427 mainstream assemblers, miniasm does not have a consensus step. It simply
13428 concatenates pieces of read sequences to generate the final unitig sequences.
13429 Thus the per-base error rate is similar to the raw input reads.")
13430 (license license:expat)))
13431
13432 (define-public r-circus
13433 (package
13434 (name "r-circus")
13435 (version "0.1.5")
13436 (source
13437 (origin
13438 (method git-fetch)
13439 (uri (git-reference
13440 (url "https://github.com/BIMSBbioinfo/ciRcus")
13441 (commit (string-append "v" version))))
13442 (file-name (git-file-name name version))
13443 (sha256
13444 (base32
13445 "0jhjn3ilb057hbf6yzrihj13ifxxs32y7nkby8l3lkm28dg4p97h"))))
13446 (build-system r-build-system)
13447 (propagated-inputs
13448 `(("r-annotationdbi" ,r-annotationdbi)
13449 ("r-annotationhub" ,r-annotationhub)
13450 ("r-biomart" ,r-biomart)
13451 ("r-data-table" ,r-data-table)
13452 ("r-dbi" ,r-dbi)
13453 ("r-genomicfeatures" ,r-genomicfeatures)
13454 ("r-genomicranges" ,r-genomicranges)
13455 ("r-ggplot2" ,r-ggplot2)
13456 ("r-hash" ,r-hash)
13457 ("r-iranges" ,r-iranges)
13458 ("r-rcolorbrewer" ,r-rcolorbrewer)
13459 ("r-rmysql" ,r-rmysql)
13460 ("r-s4vectors" ,r-s4vectors)
13461 ("r-stringr" ,r-stringr)
13462 ("r-summarizedexperiment" ,r-summarizedexperiment)))
13463 (native-inputs
13464 `(("r-knitr" ,r-knitr)))
13465 (home-page "https://github.com/BIMSBbioinfo/ciRcus")
13466 (synopsis "Annotation, analysis and visualization of circRNA data")
13467 (description "Circus is an R package for annotation, analysis and
13468 visualization of circRNA data. Users can annotate their circRNA candidates
13469 with host genes, gene featrues they are spliced from, and discriminate between
13470 known and yet unknown splice junctions. Circular-to-linear ratios of circRNAs
13471 can be calculated, and a number of descriptive plots easily generated.")
13472 (license license:artistic2.0)))
13473
13474 (define-public gffread
13475 ;; We cannot use the tagged release because it is not in sync with gclib.
13476 ;; See https://github.com/gpertea/gffread/issues/26
13477 (let ((commit "ba7535fcb3cea55a6e5a491d916e93b454e87fd0")
13478 (revision "1"))
13479 (package
13480 (name "gffread")
13481 (version (git-version "0.9.12" revision commit))
13482 (source
13483 (origin
13484 (method git-fetch)
13485 (uri (git-reference
13486 (url "https://github.com/gpertea/gffread")
13487 (commit commit)))
13488 (file-name (git-file-name name version))
13489 (sha256
13490 (base32
13491 "1dl2nbcg96lxpd0drg48ssa8343nf7pw9s9mkrc4mjjmfwsin3ki"))))
13492 (build-system gnu-build-system)
13493 (arguments
13494 `(#:tests? #f ; no check target
13495 #:make-flags
13496 (list "GCLDIR=gclib")
13497 #:phases
13498 (modify-phases %standard-phases
13499 (delete 'configure)
13500 (add-after 'unpack 'copy-gclib-source
13501 (lambda* (#:key inputs #:allow-other-keys)
13502 (mkdir-p "gclib")
13503 (copy-recursively (assoc-ref inputs "gclib-source") "gclib")
13504 #t))
13505 ;; There is no install target
13506 (replace 'install
13507 (lambda* (#:key outputs #:allow-other-keys)
13508 (let* ((out (assoc-ref outputs "out"))
13509 (bin (string-append out "/bin")))
13510 (install-file "gffread" bin))
13511 #t)))))
13512 (native-inputs
13513 `(("gclib-source"
13514 ,(let ((version "0.10.3")
13515 (commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13516 (revision "1"))
13517 (origin
13518 (method git-fetch)
13519 (uri (git-reference
13520 (url "https://github.com/gpertea/gclib")
13521 (commit commit)))
13522 (file-name (git-file-name "gclib" version))
13523 (sha256
13524 (base32
13525 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13526 (home-page "https://github.com/gpertea/gffread/")
13527 (synopsis "Parse and convert GFF/GTF files")
13528 (description
13529 "This package provides a GFF/GTF file parsing utility providing format
13530 conversions, region filtering, FASTA sequence extraction and more.")
13531 ;; gffread is under Expat, but gclib is under Artistic 2.0
13532 (license (list license:expat
13533 license:artistic2.0)))))
13534
13535 (define-public find-circ
13536 ;; The last release was in 2015. The license was clarified in 2017, so we
13537 ;; take the latest commit.
13538 (let ((commit "8655dca54970fcf7e92e22fbf57e1188724dda7d")
13539 (revision "1"))
13540 (package
13541 (name "find-circ")
13542 (version (git-version "1.2" revision commit))
13543 (source
13544 (origin
13545 (method git-fetch)
13546 (uri (git-reference
13547 (url "https://github.com/marvin-jens/find_circ")
13548 (commit commit)))
13549 (file-name (git-file-name name version))
13550 (sha256
13551 (base32
13552 "0p77pbqbclqr4srms34y1b9b4njybfpjiknc11ki84f3p8skb3cg"))))
13553 (build-system gnu-build-system)
13554 (arguments
13555 `(#:tests? #f ; there are none
13556 #:phases
13557 ;; There is no actual build system.
13558 (modify-phases %standard-phases
13559 (delete 'configure)
13560 (delete 'build)
13561 (replace 'install
13562 (lambda* (#:key outputs #:allow-other-keys)
13563 (let* ((out (assoc-ref outputs "out"))
13564 (bin (string-append out "/bin"))
13565 (path (getenv "PYTHONPATH")))
13566 (for-each (lambda (script)
13567 (install-file script bin)
13568 (wrap-program (string-append bin "/" script)
13569 `("PYTHONPATH" ":" prefix (,path))))
13570 '("cmp_bed.py"
13571 "find_circ.py"
13572 "maxlength.py"
13573 "merge_bed.py"
13574 "unmapped2anchors.py")))
13575 #t)))))
13576 (inputs
13577 `(("python2" ,python-2)
13578 ("python2-pysam" ,python2-pysam)
13579 ("python2-numpy" ,python2-numpy)))
13580 (home-page "https://github.com/marvin-jens/find_circ")
13581 (synopsis "circRNA detection from RNA-seq reads")
13582 (description "This package provides tools to detect head-to-tail
13583 spliced (back-spliced) sequencing reads, indicative of circular RNA (circRNA)
13584 in RNA-seq data.")
13585 (license license:gpl3))))
13586
13587 (define-public python-scanpy
13588 (package
13589 (name "python-scanpy")
13590 (version "1.4.6")
13591 (source
13592 (origin
13593 (method url-fetch)
13594 (uri (pypi-uri "scanpy" version))
13595 (sha256
13596 (base32
13597 "0s2b6cvaigx4wzw3850qb93sjwwxbzh22kpbp498zklc5rjpbz4l"))))
13598 (build-system python-build-system)
13599 (arguments
13600 `(#:phases
13601 (modify-phases %standard-phases
13602 (replace 'check
13603 (lambda* (#:key inputs #:allow-other-keys)
13604 ;; These tests require Internet access.
13605 (delete-file-recursively "scanpy/tests/notebooks")
13606 (delete-file "scanpy/tests/test_clustering.py")
13607 (delete-file "scanpy/tests/test_datasets.py")
13608
13609 ;; TODO: I can't get the plotting tests to work, even with Xvfb.
13610 (delete-file "scanpy/tests/test_plotting.py")
13611 (delete-file "scanpy/tests/test_preprocessing.py")
13612 (delete-file "scanpy/tests/test_read_10x.py")
13613
13614 (setenv "PYTHONPATH"
13615 (string-append (getcwd) ":"
13616 (getenv "PYTHONPATH")))
13617 (invoke "pytest")
13618 #t)))))
13619 (propagated-inputs
13620 `(("python-anndata" ,python-anndata)
13621 ("python-h5py" ,python-h5py)
13622 ("python-igraph" ,python-igraph)
13623 ("python-joblib" ,python-joblib)
13624 ("python-legacy-api-wrap" ,python-legacy-api-wrap)
13625 ("python-louvain" ,python-louvain)
13626 ("python-matplotlib" ,python-matplotlib)
13627 ("python-natsort" ,python-natsort)
13628 ("python-networkx" ,python-networkx)
13629 ("python-numba" ,python-numba)
13630 ("python-packaging" ,python-packaging)
13631 ("python-pandas" ,python-pandas)
13632 ("python-patsy" ,python-patsy)
13633 ("python-scikit-learn" ,python-scikit-learn)
13634 ("python-scipy" ,python-scipy)
13635 ("python-seaborn" ,python-seaborn)
13636 ("python-statsmodels" ,python-statsmodels)
13637 ("python-tables" ,python-tables)
13638 ("python-tqdm" ,python-tqdm)
13639 ("python-umap-learn" ,python-umap-learn)))
13640 (native-inputs
13641 `(("python-pytest" ,python-pytest)
13642 ("python-setuptools-scm" ,python-setuptools-scm)))
13643 (home-page "https://github.com/theislab/scanpy")
13644 (synopsis "Single-Cell Analysis in Python.")
13645 (description "Scanpy is a scalable toolkit for analyzing single-cell gene
13646 expression data. It includes preprocessing, visualization, clustering,
13647 pseudotime and trajectory inference and differential expression testing. The
13648 Python-based implementation efficiently deals with datasets of more than one
13649 million cells.")
13650 (license license:bsd-3)))
13651
13652 (define-public python-bbknn
13653 (package
13654 (name "python-bbknn")
13655 (version "1.3.6")
13656 (source
13657 (origin
13658 (method url-fetch)
13659 (uri (pypi-uri "bbknn" version))
13660 (sha256
13661 (base32
13662 "1jbsh01f57zj4bhvjr3jh4532zznqd6nccmgrl3qi9gnhkf7c4y0"))))
13663 (build-system python-build-system)
13664 (arguments
13665 `(#:tests? #f)) ; TODO: Enable after migration to scikit-learn.
13666 (propagated-inputs
13667 `(("python-annoy" ,python-annoy)
13668 ("python-cython" ,python-cython)
13669 ("python-numpy" ,python-numpy)
13670 ("python-scipy" ,python-scipy)
13671 ("python-umap-learn" ,python-umap-learn)))
13672 (home-page "https://github.com/Teichlab/bbknn")
13673 (synopsis "Batch balanced KNN")
13674 (description "BBKNN is a batch effect removal tool that can be directly
13675 used in the Scanpy workflow. It serves as an alternative to
13676 @code{scanpy.api.pp.neighbors()}, with both functions creating a neighbour
13677 graph for subsequent use in clustering, pseudotime and UMAP visualisation. If
13678 technical artifacts are present in the data, they will make it challenging to
13679 link corresponding cell types across different batches. BBKNN actively
13680 combats this effect by splitting your data into batches and finding a smaller
13681 number of neighbours for each cell within each of the groups. This helps
13682 create connections between analogous cells in different batches without
13683 altering the counts or PCA space.")
13684 (license license:expat)))
13685
13686 (define-public gffcompare
13687 (let ((commit "be56ef4349ea3966c12c6397f85e49e047361c41")
13688 (revision "1"))
13689 (package
13690 (name "gffcompare")
13691 (version (git-version "0.10.15" revision commit))
13692 (source
13693 (origin
13694 (method git-fetch)
13695 (uri (git-reference
13696 (url "https://github.com/gpertea/gffcompare/")
13697 (commit commit)))
13698 (file-name (git-file-name name version))
13699 (sha256
13700 (base32 "0cp5qpxdhw4mxpya5dld8wi3jk00zyklm6rcri426wydinrnfmkg"))))
13701 (build-system gnu-build-system)
13702 (arguments
13703 `(#:tests? #f ; no check target
13704 #:phases
13705 (modify-phases %standard-phases
13706 (delete 'configure)
13707 (add-before 'build 'copy-gclib-source
13708 (lambda* (#:key inputs #:allow-other-keys)
13709 (mkdir "../gclib")
13710 (copy-recursively
13711 (assoc-ref inputs "gclib-source") "../gclib")
13712 #t))
13713 (replace 'install
13714 (lambda* (#:key outputs #:allow-other-keys)
13715 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
13716 (install-file "gffcompare" bin)
13717 #t))))))
13718 (native-inputs
13719 `(("gclib-source" ; see 'README.md' of gffcompare
13720 ,(let ((commit "54917d0849c1e83cfb057b5f712e5cb6a35d948f")
13721 (revision "1")
13722 (name "gclib")
13723 (version (git-version "0.10.3" revision commit)))
13724 (origin
13725 (method git-fetch)
13726 (uri (git-reference
13727 (url "https://github.com/gpertea/gclib/")
13728 (commit commit)))
13729 (file-name (git-file-name name version))
13730 (sha256
13731 (base32 "0b51lc0b8syrv7186fd7n8f15rwnf264qgfmm2palrwks1px24mr")))))))
13732 (home-page "https://github.com/gpertea/gffcompare/")
13733 (synopsis "Tool for comparing or classifing transcripts of RNA-Seq")
13734 (description
13735 "@code{gffcompare} is a tool that can:
13736 @enumerate
13737 @item compare and evaluate the accuracy of RNA-Seq transcript assemblers
13738 (Cufflinks, Stringtie);
13739 @item collapse (merge) duplicate transcripts from multiple GTF/GFF3 files (e.g.
13740 resulted from assembly of different samples);
13741 @item classify transcripts from one or multiple GTF/GFF3 files as they relate to
13742 reference transcripts provided in a annotation file (also in GTF/GFF3 format).
13743 @end enumerate")
13744 (license
13745 (list
13746 license:expat ;license for gffcompare
13747 license:artistic2.0))))) ;license for gclib
13748
13749 (define-public intervaltree
13750 (let ((commit "b90527f9e6d51cd36ecbb50429e4524d3a418ea5"))
13751 (package
13752 (name "intervaltree")
13753 (version (git-version "0.0.0" "1" commit))
13754 (source
13755 (origin
13756 (method git-fetch)
13757 (uri (git-reference
13758 (url "https://github.com/ekg/intervaltree/")
13759 (commit commit)))
13760 (file-name (git-file-name name version))
13761 (sha256
13762 (base32 "0rgv6q5fl4x5d74n6p5wvdna6zmbdbqpb4jqqh6vq3670gn08xad"))))
13763 (build-system gnu-build-system)
13764 (arguments
13765 '(#:tests? #f ; No tests.
13766 #:make-flags (list (string-append "PREFIX=" (assoc-ref %outputs "out"))
13767 "DESTDIR=\"\"")
13768 #:phases
13769 (modify-phases %standard-phases
13770 (delete 'configure)))) ; There is no configure phase.
13771 (home-page "https://github.com/ekg/intervaltree")
13772 (synopsis "Minimal C++ interval tree implementation")
13773 (description "An interval tree can be used to efficiently find a set of
13774 numeric intervals overlapping or containing another interval. This library
13775 provides a basic implementation of an interval tree using C++ templates,
13776 allowing the insertion of arbitrary types into the tree.")
13777 (license license:expat))))
13778
13779 (define-public python-intervaltree
13780 (package
13781 (name "python-intervaltree")
13782 (version "3.0.2")
13783 (source
13784 (origin
13785 (method url-fetch)
13786 (uri (pypi-uri "intervaltree" version))
13787 (sha256
13788 (base32
13789 "0wz234g6irlm4hivs2qzmnywk0ss06ckagwh15nflkyb3p462kyb"))))
13790 (build-system python-build-system)
13791 (arguments
13792 `(#:phases
13793 (modify-phases %standard-phases
13794 ;; pytest seems to have a check to make sure the user is testing
13795 ;; their checked-out code and not an installed, potentially
13796 ;; out-of-date copy. This is harmless here, since we just installed
13797 ;; the package, so we disable the check to avoid skipping tests
13798 ;; entirely.
13799 (add-before 'check 'import-mismatch-error-workaround
13800 (lambda _
13801 (setenv "PY_IGNORE_IMPORTMISMATCH" "1")
13802 #t)))))
13803 (propagated-inputs
13804 `(("python-sortedcontainers" ,python-sortedcontainers)))
13805 (native-inputs
13806 `(("python-pytest" ,python-pytest)))
13807 (home-page "https://github.com/chaimleib/intervaltree")
13808 (synopsis "Editable interval tree data structure")
13809 (description
13810 "This package provides a mutable, self-balancing interval tree
13811 implementation for Python. Queries may be by point, by range overlap, or by
13812 range envelopment. This library was designed to allow tagging text and time
13813 intervals, where the intervals include the lower bound but not the upper
13814 bound.")
13815 (license license:asl2.0)))
13816
13817 (define-public python-pypairix
13818 (package
13819 (name "python-pypairix")
13820 (version "0.3.7")
13821 ;; The tarball on pypi does not include the makefile to build the
13822 ;; programs.
13823 (source
13824 (origin
13825 (method git-fetch)
13826 (uri (git-reference
13827 (url "https://github.com/4dn-dcic/pairix")
13828 (commit version)))
13829 (file-name (git-file-name name version))
13830 (sha256
13831 (base32
13832 "1snr3lrmsld8sy77ng6ba6wcmd33xjccf1l2f3m6pi29xis9nd6p"))))
13833 (build-system python-build-system)
13834 (arguments
13835 `(#:phases
13836 (modify-phases %standard-phases
13837 (add-before 'build 'build-programs
13838 (lambda _ (invoke "make")))
13839 (add-after 'install 'install-programs
13840 (lambda* (#:key outputs #:allow-other-keys)
13841 (copy-recursively "bin" (string-append
13842 (assoc-ref outputs "out")
13843 "/bin"))
13844 #t)))))
13845 (inputs
13846 `(("zlib" ,zlib)))
13847 (home-page "https://github.com/4dn-dcic/pairix")
13848 (synopsis "Support for querying pairix-indexed bgzipped text files")
13849 (description
13850 "Pypairix is a Python module for fast querying on a pairix-indexed
13851 bgzipped text file that contains a pair of genomic coordinates per line.")
13852 (license license:expat)))
13853
13854 (define-public python-pyfaidx
13855 (package
13856 (name "python-pyfaidx")
13857 (version "0.5.8")
13858 (source
13859 (origin
13860 (method url-fetch)
13861 (uri (pypi-uri "pyfaidx" version))
13862 (sha256
13863 (base32
13864 "038xi3a6zvrxbyyfpp64ka8pcjgsdq4fgw9cl5lpxbvmm1bzzw2q"))))
13865 (build-system python-build-system)
13866 (propagated-inputs
13867 `(("python-six" ,python-six)))
13868 (home-page "http://mattshirley.com")
13869 (synopsis "Random access to fasta subsequences")
13870 (description
13871 "This package provides procedures for efficient pythonic random access to
13872 fasta subsequences.")
13873 (license license:bsd-3)))
13874
13875 (define-public python2-pyfaidx
13876 (package-with-python2 python-pyfaidx))
13877
13878 (define-public python-cooler
13879 (package
13880 (name "python-cooler")
13881 (version "0.8.7")
13882 (source
13883 (origin
13884 (method url-fetch)
13885 (uri (pypi-uri "cooler" version))
13886 (sha256
13887 (base32
13888 "01g6gqix9ba27sappz6nfyiwabzrlf8i5fn8kwcz8ra356cq9crp"))))
13889 (build-system python-build-system)
13890 (propagated-inputs
13891 `(("python-asciitree" ,python-asciitree)
13892 ("python-biopython" ,python-biopython)
13893 ("python-click" ,python-click)
13894 ("python-cytoolz" ,python-cytoolz)
13895 ("python-dask" ,python-dask)
13896 ("python-h5py" ,python-h5py)
13897 ("python-multiprocess" ,python-multiprocess)
13898 ("python-numpy" ,python-numpy)
13899 ("python-pandas" ,python-pandas)
13900 ("python-pyfaidx" ,python-pyfaidx)
13901 ("python-pypairix" ,python-pypairix)
13902 ("python-pysam" ,python-pysam)
13903 ("python-pyyaml" ,python-pyyaml)
13904 ("python-scipy" ,python-scipy)
13905 ("python-simplejson" ,python-simplejson)))
13906 (native-inputs
13907 `(("python-mock" ,python-mock)
13908 ("python-pytest" ,python-pytest)))
13909 (home-page "https://github.com/mirnylab/cooler")
13910 (synopsis "Sparse binary format for genomic interaction matrices")
13911 (description
13912 "Cooler is a support library for a sparse, compressed, binary persistent
13913 storage format, called @code{cool}, used to store genomic interaction data,
13914 such as Hi-C contact matrices.")
13915 (license license:bsd-3)))
13916
13917 (define-public python-hicmatrix
13918 (package
13919 (name "python-hicmatrix")
13920 (version "12")
13921 (source
13922 (origin
13923 ;; Version 12 is not available on pypi.
13924 (method git-fetch)
13925 (uri (git-reference
13926 (url "https://github.com/deeptools/HiCMatrix")
13927 (commit version)))
13928 (file-name (git-file-name name version))
13929 (sha256
13930 (base32
13931 "1xhdyx16f3brgxgxybixdi64ki8nbbkq5vk4h9ahi11pzpjfn1pj"))))
13932 (build-system python-build-system)
13933 (arguments
13934 `(#:phases
13935 (modify-phases %standard-phases
13936 (add-after 'unpack 'relax-requirements
13937 (lambda _
13938 (substitute* '("requirements.txt"
13939 "setup.py")
13940 (("cooler *=+ *0.8.5")
13941 "cooler==0.8.*"))
13942 #t)))))
13943 (propagated-inputs
13944 `(("python-cooler" ,python-cooler)
13945 ("python-intervaltree" ,python-intervaltree)
13946 ("python-numpy" ,python-numpy)
13947 ("python-pandas" ,python-pandas)
13948 ("python-scipy" ,python-scipy)
13949 ("python-tables" ,python-tables)))
13950 (home-page "https://github.com/deeptools/HiCMatrix/")
13951 (synopsis "HiCMatrix class for HiCExplorer and pyGenomeTracks")
13952 (description
13953 "This helper package implements the @code{HiCMatrix} class for
13954 the HiCExplorer and pyGenomeTracks packages.")
13955 (license license:gpl3+)))
13956
13957 (define-public python-hicexplorer
13958 (package
13959 (name "python-hicexplorer")
13960 (version "2.1.4")
13961 (source
13962 (origin
13963 ;; The latest version is not available on Pypi.
13964 (method git-fetch)
13965 (uri (git-reference
13966 (url "https://github.com/deeptools/HiCExplorer")
13967 (commit version)))
13968 (file-name (git-file-name name version))
13969 (sha256
13970 (base32
13971 "0q5gpbzmrkvygqgw524q36b4nrivcmyi5v194vsx0qw7b3gcmq08"))))
13972 (build-system python-build-system)
13973 (arguments
13974 `(#:phases
13975 (modify-phases %standard-phases
13976 (add-after 'unpack 'loosen-up-requirements
13977 (lambda _
13978 (substitute* "setup.py"
13979 (("==") ">="))
13980 #t)))))
13981 (propagated-inputs
13982 `(("python-biopython" ,python-biopython)
13983 ("python-configparser" ,python-configparser)
13984 ("python-cooler" ,python-cooler)
13985 ("python-future" ,python-future)
13986 ("python-intervaltree" ,python-intervaltree)
13987 ("python-jinja2" ,python-jinja2)
13988 ("python-matplotlib" ,python-matplotlib)
13989 ("python-numpy" ,python-numpy)
13990 ("python-pandas" ,python-pandas)
13991 ("python-pybigwig" ,python-pybigwig)
13992 ("python-pysam" ,python-pysam)
13993 ("python-scipy" ,python-scipy)
13994 ("python-six" ,python-six)
13995 ("python-tables" ,python-tables)
13996 ("python-unidecode" ,python-unidecode)))
13997 (home-page "https://hicexplorer.readthedocs.io")
13998 (synopsis "Process, analyze and visualize Hi-C data")
13999 (description
14000 "HiCExplorer is a powerful and easy to use set of tools to process,
14001 normalize and visualize Hi-C data. HiCExplorer facilitates the creation of
14002 contact matrices, correction of contacts, TAD detection, A/B compartments,
14003 merging, reordering or chromosomes, conversion from different formats
14004 including cooler and detection of long-range contacts. Moreover, it allows
14005 the visualization of multiple contact matrices along with other types of data
14006 like genes, compartments, ChIP-seq coverage tracks (and in general any type of
14007 genomic scores), long range contacts and the visualization of viewpoints.")
14008 (license license:gpl3)))
14009
14010 (define-public python-pygenometracks
14011 (package
14012 (name "python-pygenometracks")
14013 (version "3.3")
14014 (source
14015 (origin
14016 (method url-fetch)
14017 (uri (pypi-uri "pyGenomeTracks" version))
14018 (sha256
14019 (base32
14020 "16laa0wnf4qn9fb9ych4w1vqhqwjss70v0y0f6wp4gwqfrlgac0f"))))
14021 (build-system python-build-system)
14022 (arguments
14023 `(#:tests? #f ; there are none
14024 #:phases
14025 (modify-phases %standard-phases
14026 (add-after 'unpack 'relax-requirements
14027 (lambda _
14028 (substitute* "setup.py"
14029 (("matplotlib ==3.1.1")
14030 "matplotlib >=3.1.1"))
14031 #t)))))
14032 (propagated-inputs
14033 `(("python-future" ,python-future)
14034 ("python-gffutils" ,python-gffutils)
14035 ("python-hicmatrix" ,python-hicmatrix)
14036 ("python-intervaltree" ,python-intervaltree)
14037 ("python-matplotlib" ,python-matplotlib)
14038 ("python-numpy" ,python-numpy)
14039 ("python-pybigwig" ,python-pybigwig)
14040 ("python-pysam" ,python-pysam)
14041 ("python-tqdm" ,python-tqdm)))
14042 (native-inputs
14043 `(("python-pytest" ,python-pytest)))
14044 (home-page "https://pygenometracks.readthedocs.io")
14045 (synopsis "Program and library to plot beautiful genome browser tracks")
14046 (description
14047 "This package aims to produce high-quality genome browser tracks that
14048 are highly customizable. Currently, it is possible to plot: bigwig, bed (many
14049 options), bedgraph, links (represented as arcs), and Hi-C matrices.
14050 pyGenomeTracks can make plots with or without Hi-C data.")
14051 (license license:gpl3+)))
14052
14053 (define-public python-hic2cool
14054 (package
14055 (name "python-hic2cool")
14056 (version "0.4.2")
14057 (source
14058 (origin
14059 (method url-fetch)
14060 (uri (pypi-uri "hic2cool" version))
14061 (sha256
14062 (base32
14063 "0xy6mhfns2lzib1kcr6419jjp6pmh0qx8z8na55lmiwn0ds8q9cl"))))
14064 (build-system python-build-system)
14065 (arguments '(#:tests? #f)) ; no tests included
14066 (propagated-inputs
14067 `(("python-cooler" ,python-cooler)))
14068 (home-page "https://github.com/4dn-dcic/hic2cool")
14069 (synopsis "Converter for .hic and .cool files")
14070 (description
14071 "This package provides a converter between @code{.hic} files (from
14072 juicer) and single-resolution or multi-resolution @code{.cool} files (for
14073 cooler). Both @code{hic} and @code{cool} files describe Hi-C contact
14074 matrices.")
14075 (license license:expat)))
14076
14077 (define-public r-pore
14078 (package
14079 (name "r-pore")
14080 (version "0.24")
14081 (source
14082 (origin
14083 (method url-fetch)
14084 (uri
14085 (string-append "mirror://sourceforge/rpore/" version
14086 "/poRe_" version ".tar.gz"))
14087 (sha256
14088 (base32 "0pih9nljbv8g4x8rkk29i7aqq681b782r5s5ynp4nw9yzqnmmksv"))))
14089 (properties `((upstream-name . "poRe")))
14090 (build-system r-build-system)
14091 (propagated-inputs
14092 `(("r-bit64" ,r-bit64)
14093 ("r-data-table" ,r-data-table)
14094 ("r-rhdf5" ,r-rhdf5)
14095 ("r-shiny" ,r-shiny)
14096 ("r-svdialogs" ,r-svdialogs)))
14097 (home-page "https://sourceforge.net/projects/rpore/")
14098 (synopsis "Visualize Nanopore sequencing data")
14099 (description
14100 "This package provides graphical user interfaces to organize and visualize Nanopore
14101 sequencing data.")
14102 ;; This is free software but the license variant is unclear:
14103 ;; <https://github.com/mw55309/poRe_docs/issues/10>.
14104 (license license:bsd-3)))
14105
14106 (define-public r-xbioc
14107 (let ((revision "1")
14108 (commit "6ff0670a37ab3036aaf1d94aa4b208310946b0b5"))
14109 (package
14110 (name "r-xbioc")
14111 (version (git-version "0.1.16" revision commit))
14112 (source (origin
14113 (method git-fetch)
14114 (uri (git-reference
14115 (url "https://github.com/renozao/xbioc")
14116 (commit commit)))
14117 (file-name (git-file-name name version))
14118 (sha256
14119 (base32
14120 "0w8bsq5myiwkfhh83nm6is5ichiyvwa1axx2szvxnzq39x6knf66"))))
14121 (build-system r-build-system)
14122 (propagated-inputs
14123 `(("r-annotationdbi" ,r-annotationdbi)
14124 ("r-assertthat" ,r-assertthat)
14125 ("r-biobase" ,r-biobase)
14126 ("r-biocmanager" ,r-biocmanager)
14127 ("r-digest" ,r-digest)
14128 ("r-pkgmaker" ,r-pkgmaker)
14129 ("r-plyr" ,r-plyr)
14130 ("r-reshape2" ,r-reshape2)
14131 ("r-stringr" ,r-stringr)))
14132 (home-page "https://github.com/renozao/xbioc/")
14133 (synopsis "Extra base functions for Bioconductor")
14134 (description "This package provides extra utility functions to perform
14135 common tasks in the analysis of omics data, leveraging and enhancing features
14136 provided by Bioconductor packages.")
14137 (license license:gpl3+))))
14138
14139 (define-public r-cssam
14140 (let ((revision "1")
14141 (commit "9ec58c982fa551af0d80b1a266890d92954833f2"))
14142 (package
14143 (name "r-cssam")
14144 (version (git-version "1.4" revision commit))
14145 (source (origin
14146 (method git-fetch)
14147 (uri (git-reference
14148 (url "https://github.com/shenorrLab/csSAM")
14149 (commit commit)))
14150 (file-name (git-file-name name version))
14151 (sha256
14152 (base32
14153 "128syf9v39gk0z3ip000qpsjbg6l1siyq6c8b0hz41dzg5achyb3"))))
14154 (build-system r-build-system)
14155 (propagated-inputs
14156 `(("r-formula" ,r-formula)
14157 ("r-ggplot2" ,r-ggplot2)
14158 ("r-pkgmaker" ,r-pkgmaker)
14159 ("r-plyr" ,r-plyr)
14160 ("r-rngtools" ,r-rngtools)
14161 ("r-scales" ,r-scales)))
14162 (home-page "https://github.com/shenorrLab/csSAM/")
14163 (synopsis "Cell type-specific statistical analysis of microarray")
14164 (description "This package implements the method csSAM that computes
14165 cell-specific differential expression from measured cell proportions using
14166 SAM.")
14167 ;; Any version
14168 (license license:lgpl2.1+))))
14169
14170 (define-public r-bseqsc
14171 (let ((revision "1")
14172 (commit "fef3f3e38dcf3df37103348b5780937982b43b98"))
14173 (package
14174 (name "r-bseqsc")
14175 (version (git-version "1.0" revision commit))
14176 (source (origin
14177 (method git-fetch)
14178 (uri (git-reference
14179 (url "https://github.com/shenorrLab/bseqsc")
14180 (commit commit)))
14181 (file-name (git-file-name name version))
14182 (sha256
14183 (base32
14184 "1prw13wa20f7wlc3gkkls66n1kxz8d28qrb8icfqdwdnnv8w5qg8"))))
14185 (build-system r-build-system)
14186 (propagated-inputs
14187 `(("r-abind" ,r-abind)
14188 ("r-annotationdbi" ,r-annotationdbi)
14189 ("r-biobase" ,r-biobase)
14190 ("r-cssam" ,r-cssam)
14191 ("r-dplyr" ,r-dplyr)
14192 ("r-e1071" ,r-e1071)
14193 ("r-edger" ,r-edger)
14194 ("r-ggplot2" ,r-ggplot2)
14195 ("r-nmf" ,r-nmf)
14196 ("r-openxlsx" ,r-openxlsx)
14197 ("r-pkgmaker" ,r-pkgmaker)
14198 ("r-plyr" ,r-plyr)
14199 ("r-preprocesscore" ,r-preprocesscore)
14200 ("r-rngtools" ,r-rngtools)
14201 ("r-scales" ,r-scales)
14202 ("r-stringr" ,r-stringr)
14203 ("r-xbioc" ,r-xbioc)))
14204 (home-page "https://github.com/shenorrLab/bseqsc")
14205 (synopsis "Deconvolution of bulk sequencing experiments using single cell data")
14206 (description "BSeq-sc is a bioinformatics analysis pipeline that
14207 leverages single-cell sequencing data to estimate cell type proportion and
14208 cell type-specific gene expression differences from RNA-seq data from bulk
14209 tissue samples. This is a companion package to the publication \"A
14210 single-cell transcriptomic map of the human and mouse pancreas reveals inter-
14211 and intra-cell population structure.\" Baron et al. Cell Systems (2016)
14212 @url{https://www.ncbi.nlm.nih.gov/pubmed/27667365}.")
14213 (license license:gpl2+))))
14214
14215 (define-public porechop
14216 ;; The recommended way to install is to clone the git repository
14217 ;; https://github.com/rrwick/Porechop#installation
14218 (let ((commit "289d5dca4a5fc327f97b3f8cecb68ecaf1014861")
14219 (revision "1"))
14220 (package
14221 (name "porechop")
14222 (version (git-version "0.2.3" revision commit))
14223 (source
14224 (origin
14225 (method git-fetch)
14226 (uri (git-reference
14227 (url "https://github.com/rrwick/Porechop")
14228 (commit commit)))
14229 (file-name (git-file-name name version))
14230 (sha256
14231 (base32 "05ps43gig0d3ia9x5lj84lb00hbsl6ba9n7y7jz927npxbr2ym23"))))
14232 (build-system python-build-system)
14233 (home-page "https://github.com/rrwick/porechop")
14234 (synopsis "Finding, trimming or splitting adapters, in Oxford Nanopore reads")
14235 (description
14236 "The porechop package is a tool for finding and removing adapters from Oxford
14237 Nanopore reads. Adapters on the ends of reads are trimmed off, and when a read
14238 has an adapter in its middle, it is treated as chimeric and chopped into
14239 separate reads. Porechop performs thorough alignments to effectively find
14240 adapters, even at low sequence identity. Porechop also supports demultiplexing
14241 of Nanopore reads that were barcoded with the Native Barcoding Kit, PCR
14242 Barcoding Kit or Rapid Barcoding Kit.")
14243 (license license:gpl3+))))
14244
14245 (define-public poretools
14246 ;; The latest release was in 2016 and the latest commit is from 2017
14247 ;; the recommended way to install is to clone the git repository
14248 ;; https://poretools.readthedocs.io/en/latest/content/installation.html
14249 (let ((commit "e426b1f09e86ac259a00c261c79df91510777407")
14250 (revision "1"))
14251 (package
14252 (name "poretools")
14253 (version (git-version "0.6.0" revision commit))
14254 (source
14255 (origin
14256 (method git-fetch)
14257 (uri (git-reference
14258 (url "https://github.com/arq5x/poretools")
14259 (commit commit)))
14260 (file-name (git-file-name name version))
14261 (sha256
14262 (base32 "0bglj833wxpp3cq430p1d3xp085ls221js2y90w7ir2x5ay8l7am"))))
14263 (build-system python-build-system)
14264 ;; requires python >=2.7, <3.0, and the same for python dependencies
14265 (arguments `(#:python ,python-2))
14266 (inputs
14267 `(("hdf5" ,hdf5)))
14268 (propagated-inputs
14269 `(("python-dateutil" ,python2-dateutil)
14270 ("python-h5py" ,python2-h5py)
14271 ("python-matplotlib" ,python2-matplotlib)
14272 ("python-pandas" ,python2-pandas)
14273 ("python-seaborn" ,python2-seaborn)))
14274 (home-page "https://poretools.readthedocs.io")
14275 (synopsis "Toolkit for working with nanopore sequencing data")
14276 (description
14277 "The MinION from Oxford Nanopore Technologies is a nanopore sequencer.
14278 This @code{poretools} package is a flexible toolkit for exploring datasets
14279 generated by nanopore sequencing devices for the purposes of quality control and
14280 downstream analysis. Poretools operates directly on the native FAST5, a variant
14281 of the Hierarchical Data Format (HDF5) standard.")
14282 (license license:expat))))
14283
14284 (define-public r-absfiltergsea
14285 (package
14286 (name "r-absfiltergsea")
14287 (version "1.5.1")
14288 (source
14289 (origin
14290 (method url-fetch)
14291 (uri (cran-uri "AbsFilterGSEA" version))
14292 (sha256
14293 (base32 "15srxkxsvn38kd5frdrwfdf0ad8gskrd0h01wmdf9hglq8fjrp7w"))))
14294 (properties `((upstream-name . "AbsFilterGSEA")))
14295 (build-system r-build-system)
14296 (propagated-inputs
14297 `(("r-biobase" ,r-biobase)
14298 ("r-deseq" ,r-deseq)
14299 ("r-limma" ,r-limma)
14300 ("r-rcpp" ,r-rcpp)
14301 ("r-rcpparmadillo" ,r-rcpparmadillo)))
14302 (home-page "https://cran.r-project.org/web/packages/AbsFilterGSEA/")
14303 (synopsis "Improved false positive control of gene-permuting with absolute filtering")
14304 (description
14305 "This package provides a function that performs gene-permuting of a gene-set
14306 enrichment analysis (GSEA) calculation with or without the absolute filtering.
14307 Without filtering, users can perform (original) two-tailed or one-tailed
14308 absolute GSEA.")
14309 (license license:gpl2)))
14310
14311 (define-public jamm
14312 (package
14313 (name "jamm")
14314 (version "1.0.7.6")
14315 (source
14316 (origin
14317 (method git-fetch)
14318 (uri (git-reference
14319 (url "https://github.com/mahmoudibrahim/JAMM")
14320 (commit (string-append "JAMMv" version))))
14321 (file-name (git-file-name name version))
14322 (sha256
14323 (base32
14324 "0bsa5mf9n9q5jz7mmacrra41l7r8rac5vgsn6wv1fb52ya58b970"))))
14325 (build-system gnu-build-system)
14326 (arguments
14327 `(#:tests? #f ; there are none
14328 #:phases
14329 (modify-phases %standard-phases
14330 (delete 'configure)
14331 (delete 'build)
14332 (replace 'install
14333 (lambda* (#:key inputs outputs #:allow-other-keys)
14334 (let* ((out (assoc-ref outputs "out"))
14335 (libexec (string-append out "/libexec/jamm"))
14336 (bin (string-append out "/bin")))
14337 (substitute* '("JAMM.sh"
14338 "SignalGenerator.sh")
14339 (("^sPath=.*")
14340 (string-append "sPath=\"" libexec "\"\n")))
14341 (for-each (lambda (file)
14342 (install-file file libexec))
14343 (list "bincalculator.r"
14344 "peakfinder.r"
14345 "peakhelper.r"
14346 "signalmaker.r"
14347 "xcorr.r"
14348 "xcorrhelper.r"
14349 ;; Perl scripts
14350 "peakfilter.pl"
14351 "readshifter.pl"))
14352
14353 (for-each
14354 (lambda (script)
14355 (chmod script #o555)
14356 (install-file script bin)
14357 (wrap-program (string-append bin "/" script)
14358 `("PATH" ":" prefix
14359 (,(string-append (assoc-ref inputs "coreutils") "/bin")
14360 ,(string-append (assoc-ref inputs "gawk") "/bin")
14361 ,(string-append (assoc-ref inputs "perl") "/bin")
14362 ,(string-append (assoc-ref inputs "r-minimal") "/bin")))
14363 `("PERL5LIB" ":" prefix (,(getenv "PERL5LIB")))
14364 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
14365 (list "JAMM.sh" "SignalGenerator.sh")))
14366 #t)))))
14367 (inputs
14368 `(("bash" ,bash)
14369 ("coreutils" ,coreutils)
14370 ("gawk" ,gawk)
14371 ("perl" ,perl)
14372 ("r-minimal" ,r-minimal)
14373 ;;("r-parallel" ,r-parallel)
14374 ("r-signal" ,r-signal)
14375 ("r-mclust" ,r-mclust)))
14376 (home-page "https://github.com/mahmoudibrahim/JAMM")
14377 (synopsis "Peak finder for NGS datasets")
14378 (description
14379 "JAMM is a peak finder for next generation sequencing datasets (ChIP-Seq,
14380 ATAC-Seq, DNase-Seq, etc.) that can integrate replicates and assign peak
14381 boundaries accurately. JAMM is applicable to both broad and narrow
14382 datasets.")
14383 (license license:gpl3+)))
14384
14385 (define-public ngless
14386 (package
14387 (name "ngless")
14388 (version "1.1.0")
14389 (source
14390 (origin
14391 (method git-fetch)
14392 (uri (git-reference
14393 (url "https://gitlab.com/ngless/ngless.git")
14394 (commit (string-append "v" version))))
14395 (file-name (git-file-name name version))
14396 (sha256
14397 (base32
14398 "1wim8wpqyff080dfcazynrmjwqas38m24m0v350w245mmhrapdma"))))
14399 (build-system haskell-build-system)
14400 (arguments
14401 `(#:haddock? #f ; The haddock phase fails with: NGLess/CmdArgs.hs:20:1:
14402 ; error: parse error on input import
14403 ; import Options.Applicative
14404 #:phases
14405 (modify-phases %standard-phases
14406 (add-after 'unpack 'create-Versions.hs
14407 (lambda _
14408 (substitute* "Makefile"
14409 (("BWA_VERSION = .*")
14410 (string-append "BWA_VERSION = "
14411 ,(package-version bwa) "\n"))
14412 (("SAM_VERSION = .*")
14413 (string-append "SAM_VERSION = "
14414 ,(package-version samtools) "\n"))
14415 (("PRODIGAL_VERSION = .*")
14416 (string-append "PRODIGAL_VERSION = "
14417 ,(package-version prodigal) "\n"))
14418 (("MINIMAP2_VERSION = .*")
14419 (string-append "MINIMAP2_VERSION = "
14420 ,(package-version minimap2) "\n")))
14421 (invoke "make" "NGLess/Dependencies/Versions.hs")
14422 #t))
14423 (add-after 'create-Versions.hs 'create-cabal-file
14424 (lambda _ (invoke "hpack") #t))
14425 ;; These tools are expected to be installed alongside ngless.
14426 (add-after 'install 'link-tools
14427 (lambda* (#:key inputs outputs #:allow-other-keys)
14428 (let ((bin (string-append (assoc-ref outputs "out") "/bin/")))
14429 (symlink (string-append (assoc-ref inputs "prodigal")
14430 "/bin/prodigal")
14431 (string-append bin "ngless-" ,version "-prodigal"))
14432 (symlink (string-append (assoc-ref inputs "minimap2")
14433 "/bin/minimap2")
14434 (string-append bin "ngless-" ,version "-minimap2"))
14435 (symlink (string-append (assoc-ref inputs "samtools")
14436 "/bin/samtools")
14437 (string-append bin "ngless-" ,version "-samtools"))
14438 (symlink (string-append (assoc-ref inputs "bwa")
14439 "/bin/bwa")
14440 (string-append bin "ngless-" ,version "-bwa"))
14441 #t))))))
14442 (inputs
14443 `(("prodigal" ,prodigal)
14444 ("bwa" ,bwa)
14445 ("samtools" ,samtools)
14446 ("minimap2" ,minimap2)
14447 ("ghc-aeson" ,ghc-aeson)
14448 ("ghc-ansi-terminal" ,ghc-ansi-terminal)
14449 ("ghc-async" ,ghc-async)
14450 ("ghc-atomic-write" ,ghc-atomic-write)
14451 ("ghc-bytestring-lexing" ,ghc-bytestring-lexing)
14452 ("ghc-conduit" ,ghc-conduit)
14453 ("ghc-conduit-algorithms" ,ghc-conduit-algorithms)
14454 ("ghc-conduit-extra" ,ghc-conduit-extra)
14455 ("ghc-configurator" ,ghc-configurator)
14456 ("ghc-convertible" ,ghc-convertible)
14457 ("ghc-data-default" ,ghc-data-default)
14458 ("ghc-diagrams-core" ,ghc-diagrams-core)
14459 ("ghc-diagrams-lib" ,ghc-diagrams-lib)
14460 ("ghc-diagrams-svg" ,ghc-diagrams-svg)
14461 ("ghc-double-conversion" ,ghc-double-conversion)
14462 ("ghc-edit-distance" ,ghc-edit-distance)
14463 ("ghc-either" ,ghc-either)
14464 ("ghc-errors" ,ghc-errors)
14465 ("ghc-extra" ,ghc-extra)
14466 ("ghc-filemanip" ,ghc-filemanip)
14467 ("ghc-file-embed" ,ghc-file-embed)
14468 ("ghc-gitrev" ,ghc-gitrev)
14469 ("ghc-hashtables" ,ghc-hashtables)
14470 ("ghc-http-conduit" ,ghc-http-conduit)
14471 ("ghc-inline-c" ,ghc-inline-c)
14472 ("ghc-inline-c-cpp" ,ghc-inline-c-cpp)
14473 ("ghc-intervalmap" ,ghc-intervalmap)
14474 ("ghc-missingh" ,ghc-missingh)
14475 ("ghc-optparse-applicative" ,ghc-optparse-applicative)
14476 ("ghc-regex" ,ghc-regex)
14477 ("ghc-safe" ,ghc-safe)
14478 ("ghc-safeio" ,ghc-safeio)
14479 ("ghc-strict" ,ghc-strict)
14480 ("ghc-tar" ,ghc-tar)
14481 ("ghc-tar-conduit" ,ghc-tar-conduit)
14482 ("ghc-unliftio" ,ghc-unliftio)
14483 ("ghc-unliftio-core" ,ghc-unliftio-core)
14484 ("ghc-vector" ,ghc-vector)
14485 ("ghc-yaml" ,ghc-yaml)
14486 ("ghc-zlib" ,ghc-zlib)))
14487 (propagated-inputs
14488 `(("r-r6" ,r-r6)
14489 ("r-hdf5r" ,r-hdf5r)
14490 ("r-iterators" ,r-iterators)
14491 ("r-itertools" ,r-itertools)
14492 ("r-matrix" ,r-matrix)))
14493 (native-inputs
14494 `(("ghc-hpack" ,ghc-hpack)
14495 ("ghc-quickcheck" ,ghc-quickcheck)
14496 ("ghc-test-framework" ,ghc-test-framework)
14497 ("ghc-test-framework-hunit",ghc-test-framework-hunit)
14498 ("ghc-test-framework-quickcheck2" ,ghc-test-framework-quickcheck2)
14499 ("ghc-test-framework-th" ,ghc-test-framework-th)))
14500 (home-page "https://gitlab.com/ngless/ngless")
14501 (synopsis "DSL for processing next-generation sequencing data")
14502 (description "Ngless is a domain-specific language for
14503 @dfn{next-generation sequencing} (NGS) data processing.")
14504 (license license:expat)))
14505
14506 (define-public filtlong
14507 ;; The recommended way to install is to clone the git repository
14508 ;; https://github.com/rrwick/Filtlong#installation
14509 ;; and the lastest release is more than nine months old
14510 (let ((commit "d1bb46dfe8bc7efe6257b5ce222c04bfe8aedaab")
14511 (revision "1"))
14512 (package
14513 (name "filtlong")
14514 (version (git-version "0.2.0" revision commit))
14515 (source
14516 (origin
14517 (method git-fetch)
14518 (uri (git-reference
14519 (url "https://github.com/rrwick/Filtlong")
14520 (commit commit)))
14521 (file-name (git-file-name name version))
14522 (sha256
14523 (base32 "1xr92r820x8qlkcr3b57iw223yq8vjgyi42jr79w2xgw47qzr575"))))
14524 (build-system gnu-build-system)
14525 (arguments
14526 `(#:tests? #f ; no check target
14527 #:phases
14528 (modify-phases %standard-phases
14529 (delete 'configure)
14530 (replace 'install
14531 (lambda* (#:key outputs #:allow-other-keys)
14532 (let* ((out (assoc-ref outputs "out"))
14533 (bin (string-append out "/bin"))
14534 (scripts (string-append out "/share/filtlong/scripts")))
14535 (install-file "bin/filtlong" bin)
14536 (install-file "scripts/histogram.py" scripts)
14537 (install-file "scripts/read_info_histograms.sh" scripts))
14538 #t))
14539 (add-after 'install 'wrap-program
14540 (lambda* (#:key inputs outputs #:allow-other-keys)
14541 (let* ((out (assoc-ref outputs "out"))
14542 (path (getenv "PYTHONPATH")))
14543 (wrap-program (string-append out
14544 "/share/filtlong/scripts/histogram.py")
14545 `("PYTHONPATH" ":" prefix (,path))))
14546 #t))
14547 (add-before 'check 'patch-tests
14548 (lambda _
14549 (substitute* "scripts/read_info_histograms.sh"
14550 (("awk") (which "gawk")))
14551 #t)))))
14552 (inputs
14553 `(("gawk" ,gawk) ;for read_info_histograms.sh
14554 ("python" ,python-2) ;required for histogram.py
14555 ("zlib" ,zlib)))
14556 (home-page "https://github.com/rrwick/Filtlong/")
14557 (synopsis "Tool for quality filtering of Nanopore and PacBio data")
14558 (description
14559 "The Filtlong package is a tool for filtering long reads by quality.
14560 It can take a set of long reads and produce a smaller, better subset. It uses
14561 both read length (longer is better) and read identity (higher is better) when
14562 choosing which reads pass the filter.")
14563 (license (list license:gpl3 ;filtlong
14564 license:asl2.0))))) ;histogram.py
14565
14566 (define-public nanopolish
14567 ;; The recommended way to install is to clone the git repository
14568 ;; <https://github.com/jts/nanopolish#installing-a-particular-release>.
14569 ;; Also, the differences between release and current version seem to be
14570 ;; significant.
14571 (let ((commit "6331dc4f15b9dfabb954ba3fae9d76b6c3ca6377")
14572 (revision "1"))
14573 (package
14574 (name "nanopolish")
14575 (version (git-version "0.11.1" revision commit))
14576 (source
14577 (origin
14578 (method git-fetch)
14579 (uri (git-reference
14580 (url "https://github.com/jts/nanopolish")
14581 (commit commit)
14582 (recursive? #t)))
14583 (file-name (git-file-name name version))
14584 (sha256
14585 (base32 "15ikl3d37y49pwd7vx36xksgsqajhf24q7qqsnpl15dqqyy5qgbc"))
14586 (modules '((guix build utils)))
14587 (snippet
14588 '(begin
14589 (delete-file-recursively "htslib")
14590 #t))))
14591 (build-system gnu-build-system)
14592 (arguments
14593 `(#:make-flags
14594 `("HDF5=noinstall" "EIGEN=noinstall" "HTS=noinstall" "CC=gcc")
14595 #:tests? #f ; no check target
14596 #:phases
14597 (modify-phases %standard-phases
14598 (add-after 'unpack 'find-eigen
14599 (lambda* (#:key inputs #:allow-other-keys)
14600 (setenv "CPATH"
14601 (string-append (assoc-ref inputs "eigen")
14602 "/include/eigen3:"
14603 (or (getenv "CPATH") "")))
14604 #t))
14605 (delete 'configure)
14606 (replace 'install
14607 (lambda* (#:key outputs #:allow-other-keys)
14608 (let* ((out (assoc-ref outputs "out"))
14609 (bin (string-append out "/bin"))
14610 (scripts (string-append out "/share/nanopolish/scripts")))
14611
14612 (install-file "nanopolish" bin)
14613 (for-each (lambda (file) (install-file file scripts))
14614 (find-files "scripts" ".*"))
14615 #t)))
14616 (add-after 'install 'wrap-programs
14617 (lambda* (#:key outputs #:allow-other-keys)
14618 (for-each (lambda (file)
14619 (wrap-program file `("PYTHONPATH" ":" prefix (,path))))
14620 (find-files "/share/nanopolish/scripts" "\\.py"))
14621 (for-each (lambda (file)
14622 (wrap-program file `("PERL5LIB" ":" prefix (,path))))
14623 (find-files "/share/nanopolish/scripts" "\\.pl"))
14624 #t)))))
14625 (inputs
14626 `(("eigen" ,eigen)
14627 ("hdf5" ,hdf5)
14628 ("htslib" ,htslib)
14629 ("perl" ,perl)
14630 ("python" ,python-wrapper)
14631 ("python-biopython" ,python-biopython)
14632 ("python-numpy" ,python-numpy)
14633 ("python-pysam" ,python-pysam)
14634 ("python-scikit-learn" , python-scikit-learn)
14635 ("python-scipy" ,python-scipy)
14636 ("zlib" ,zlib)))
14637 (home-page "https://github.com/jts/nanopolish")
14638 (synopsis "Signal-level analysis of Oxford Nanopore sequencing data")
14639 (description
14640 "This package analyses the Oxford Nanopore sequencing data at signal-level.
14641 Nanopolish can calculate an improved consensus sequence for a draft genome
14642 assembly, detect base modifications, call SNPs (Single nucleotide
14643 polymorphisms) and indels with respect to a reference genome and more.")
14644 (license license:expat))))
14645
14646 (define-public cnvkit
14647 (package
14648 (name "cnvkit")
14649 (version "0.9.5")
14650 (source
14651 (origin
14652 (method git-fetch)
14653 (uri (git-reference
14654 (url "https://github.com/etal/cnvkit")
14655 (commit (string-append "v" version))))
14656 (file-name (git-file-name name version))
14657 (sha256
14658 (base32 "0g2f78k68yglmj4fsfmgs8idqv3di9aj53fg0ld0hqljg8chhh82"))))
14659 (build-system python-build-system)
14660 (propagated-inputs
14661 `(("python-biopython" ,python-biopython)
14662 ("python-future" ,python-future)
14663 ("python-matplotlib" ,python-matplotlib)
14664 ("python-numpy" ,python-numpy)
14665 ("python-reportlab" ,python-reportlab)
14666 ("python-pandas" ,python-pandas)
14667 ("python-pysam" ,python-pysam)
14668 ("python-pyfaidx" ,python-pyfaidx)
14669 ("python-scipy" ,python-scipy)
14670 ;; R packages
14671 ("r-dnacopy" ,r-dnacopy)))
14672 (home-page "https://cnvkit.readthedocs.org/")
14673 (synopsis "Copy number variant detection from targeted DNA sequencing")
14674 (description
14675 "CNVkit is a Python library and command-line software toolkit to infer
14676 and visualize copy number from high-throughput DNA sequencing data. It is
14677 designed for use with hybrid capture, including both whole-exome and custom
14678 target panels, and short-read sequencing platforms such as Illumina and Ion
14679 Torrent.")
14680 (license license:asl2.0)))
14681
14682 (define-public python-pyfit-sne
14683 (package
14684 (name "python-pyfit-sne")
14685 (version "1.0.1")
14686 (source
14687 (origin
14688 (method git-fetch)
14689 (uri (git-reference
14690 (url "https://github.com/KlugerLab/pyFIt-SNE")
14691 (commit version)))
14692 (file-name (git-file-name name version))
14693 (sha256
14694 (base32 "13wh3qkzs56azmmgnxib6xfr29g7xh09sxylzjpni5j0pp0rc5qw"))))
14695 (build-system python-build-system)
14696 (propagated-inputs
14697 `(("python-numpy" ,python-numpy)))
14698 (inputs
14699 `(("fftw" ,fftw)))
14700 (native-inputs
14701 `(("python-cython" ,python-cython)))
14702 (home-page "https://github.com/KlugerLab/pyFIt-SNE")
14703 (synopsis "FFT-accelerated Interpolation-based t-SNE")
14704 (description
14705 "t-Stochastic Neighborhood Embedding (t-SNE) is a highly successful
14706 method for dimensionality reduction and visualization of high dimensional
14707 datasets. A popular implementation of t-SNE uses the Barnes-Hut algorithm to
14708 approximate the gradient at each iteration of gradient descent. This package
14709 is a Cython wrapper for FIt-SNE.")
14710 (license license:bsd-4)))
14711
14712 (define-public bbmap
14713 (package
14714 (name "bbmap")
14715 (version "35.82")
14716 (source (origin
14717 (method url-fetch)
14718 (uri (string-append
14719 "mirror://sourceforge/bbmap/BBMap_" version ".tar.gz"))
14720 (sha256
14721 (base32
14722 "1q4rfhxcb6z3gm8zg2davjz98w22lkf4hm9ikxz9kdl93pil3wkd"))))
14723 (build-system ant-build-system)
14724 (arguments
14725 `(#:build-target "dist"
14726 #:tests? #f ; there are none
14727 #:make-flags
14728 (list (string-append "-Dmpijar="
14729 (assoc-ref %build-inputs "java-openmpi")
14730 "/lib/mpi.jar"))
14731 #:modules ((guix build ant-build-system)
14732 (guix build utils)
14733 (guix build java-utils))
14734 #:phases
14735 (modify-phases %standard-phases
14736 (add-after 'build 'build-jni-library
14737 (lambda _
14738 (with-directory-excursion "jni"
14739 (invoke "make" "-f" "makefile.linux"))))
14740 ;; There is no install target
14741 (replace 'install (install-jars "dist"))
14742 (add-after 'install 'install-scripts-and-documentation
14743 (lambda* (#:key outputs #:allow-other-keys)
14744 (substitute* "calcmem.sh"
14745 (("\\| awk ") (string-append "| " (which "awk") " ")))
14746 (let* ((scripts (find-files "." "\\.sh$"))
14747 (out (assoc-ref outputs "out"))
14748 (bin (string-append out "/bin"))
14749 (doc (string-append out "/share/doc/bbmap"))
14750 (jni (string-append out "/lib/jni")))
14751 (substitute* scripts
14752 (("\\$DIR\"\"docs") doc)
14753 (("^CP=.*")
14754 (string-append "CP=" out "/share/java/BBTools.jar\n"))
14755 (("^NATIVELIBDIR.*")
14756 (string-append "NATIVELIBDIR=" jni "\n"))
14757 (("CMD=\"java")
14758 (string-append "CMD=\"" (which "java"))))
14759 (for-each (lambda (script) (install-file script bin)) scripts)
14760
14761 ;; Install JNI library
14762 (install-file "jni/libbbtoolsjni.so" jni)
14763
14764 ;; Install documentation
14765 (install-file "docs/readme.txt" doc)
14766 (copy-recursively "docs/guides" doc))
14767 #t)))
14768 #:jdk ,openjdk11))
14769 (inputs
14770 `(("gawk" ,gawk)
14771 ("java-eclipse-jdt-core" ,java-eclipse-jdt-core)
14772 ("java-eclipse-jdt-compiler-apt" ,java-eclipse-jdt-compiler-apt)
14773 ("java-openmpi" ,java-openmpi)))
14774 (home-page "https://sourceforge.net/projects/bbmap/")
14775 (synopsis "Aligner and other tools for short sequencing reads")
14776 (description
14777 "This package provides bioinformatic tools to align, deduplicate,
14778 reformat, filter and normalize DNA and RNA-seq data. It includes the
14779 following tools: BBMap, a short read aligner for DNA and RNA-seq data; BBNorm,
14780 a kmer-based error-correction and normalization tool; Dedupe, a tool to
14781 simplify assemblies by removing duplicate or contained subsequences that share
14782 a target percent identity; Reformat, to convert reads between
14783 fasta/fastq/scarf/fasta+qual/sam, interleaved/paired, and ASCII-33/64, at over
14784 500 MB/s; and BBDuk, a tool to filter, trim, or mask reads with kmer matches
14785 to an artifact/contaminant file.")
14786 (license license:bsd-3)))
14787
14788 (define-public velvet
14789 (package
14790 (name "velvet")
14791 (version "1.2.10")
14792 (source (origin
14793 (method url-fetch)
14794 (uri (string-append "https://www.ebi.ac.uk/~zerbino/velvet/"
14795 "velvet_" version ".tgz"))
14796 (sha256
14797 (base32
14798 "0h3njwy66p6bx14r3ar1byb0ccaxmxka4c65rn4iybyiqa4d8kc8"))
14799 ;; Delete bundled libraries
14800 (modules '((guix build utils)))
14801 (snippet
14802 '(begin
14803 (delete-file "Manual.pdf")
14804 (delete-file-recursively "third-party")
14805 #t))))
14806 (build-system gnu-build-system)
14807 (arguments
14808 `(#:make-flags '("OPENMP=t")
14809 #:test-target "test"
14810 #:phases
14811 (modify-phases %standard-phases
14812 (delete 'configure)
14813 (add-after 'unpack 'fix-zlib-include
14814 (lambda _
14815 (substitute* "src/binarySequences.c"
14816 (("../third-party/zlib-1.2.3/zlib.h") "zlib.h"))
14817 #t))
14818 (replace 'install
14819 (lambda* (#:key outputs #:allow-other-keys)
14820 (let* ((out (assoc-ref outputs "out"))
14821 (bin (string-append out "/bin"))
14822 (doc (string-append out "/share/doc/velvet")))
14823 (mkdir-p bin)
14824 (mkdir-p doc)
14825 (install-file "velveth" bin)
14826 (install-file "velvetg" bin)
14827 (install-file "Manual.pdf" doc)
14828 (install-file "Columbus_manual.pdf" doc)
14829 #t))))))
14830 (inputs
14831 `(("openmpi" ,openmpi)
14832 ("zlib" ,zlib)))
14833 (native-inputs
14834 `(("texlive" ,(texlive-union (list texlive-latex-graphics
14835 texlive-latex-hyperref)))))
14836 (home-page "https://www.ebi.ac.uk/~zerbino/velvet/")
14837 (synopsis "Nucleic acid sequence assembler for very short reads")
14838 (description
14839 "Velvet is a de novo genomic assembler specially designed for short read
14840 sequencing technologies, such as Solexa or 454. Velvet currently takes in
14841 short read sequences, removes errors then produces high quality unique
14842 contigs. It then uses paired read information, if available, to retrieve the
14843 repeated areas between contigs.")
14844 (license license:gpl2+)))
14845
14846 (define-public python-velocyto
14847 (package
14848 (name "python-velocyto")
14849 (version "0.17.17")
14850 (source
14851 (origin
14852 (method url-fetch)
14853 (uri (pypi-uri "velocyto" version))
14854 (sha256
14855 (base32
14856 "0fgygyzqgrq32dv6a00biq1p1cwi6kbl5iqblxq1kklj6b2mzmhs"))))
14857 (build-system python-build-system)
14858 (native-inputs
14859 `(("python-joblib" ,python-joblib)))
14860 (propagated-inputs
14861 `(("python-click" ,python-click)
14862 ("python-cython" ,python-cython)
14863 ("python-h5py" ,python-h5py)
14864 ("python-loompy" ,python-loompy)
14865 ("python-matplotlib" ,python-matplotlib)
14866 ("python-numba" ,python-numba)
14867 ("python-numpy" ,python-numpy)
14868 ("python-pandas" ,python-pandas)
14869 ("python-pysam" ,python-pysam)
14870 ("python-scikit-learn" ,python-scikit-learn)
14871 ("python-scipy" ,python-scipy)))
14872 (home-page "https://github.com/velocyto-team/velocyto.py")
14873 (synopsis "RNA velocity analysis for single cell RNA-seq data")
14874 (description
14875 "Velocyto is a library for the analysis of RNA velocity. Velocyto
14876 includes a command line tool and an analysis pipeline.")
14877 (license license:bsd-2)))
14878
14879 (define-public arriba
14880 (package
14881 (name "arriba")
14882 (version "1.0.1")
14883 (source
14884 (origin
14885 (method url-fetch)
14886 (uri (string-append "https://github.com/suhrig/arriba/releases/"
14887 "download/v" version "/arriba_v" version ".tar.gz"))
14888 (sha256
14889 (base32
14890 "0jx9656ry766vb8z08m1c3im87b0c82qpnjby9wz4kcz8vn87dx2"))))
14891 (build-system gnu-build-system)
14892 (arguments
14893 `(#:tests? #f ; there are none
14894 #:phases
14895 (modify-phases %standard-phases
14896 (replace 'configure
14897 (lambda* (#:key inputs #:allow-other-keys)
14898 (let ((htslib (assoc-ref inputs "htslib")))
14899 (substitute* "Makefile"
14900 (("-I\\$\\(HTSLIB\\)/htslib")
14901 (string-append "-I" htslib "/include/htslib"))
14902 ((" \\$\\(HTSLIB\\)/libhts.a")
14903 (string-append " " htslib "/lib/libhts.so"))))
14904 (substitute* "run_arriba.sh"
14905 (("^STAR ") (string-append (which "STAR") " "))
14906 (("samtools --version-only")
14907 (string-append (which "samtools") " --version-only"))
14908 (("samtools index")
14909 (string-append (which "samtools") " index"))
14910 (("samtools sort")
14911 (string-append (which "samtools") " sort")))
14912 #t))
14913 (replace 'install
14914 (lambda* (#:key outputs #:allow-other-keys)
14915 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
14916 (install-file "arriba" bin)
14917 (install-file "run_arriba.sh" bin)
14918 (install-file "draw_fusions.R" bin)
14919 (wrap-program (string-append bin "/draw_fusions.R")
14920 `("R_LIBS_SITE" ":" prefix (,(getenv "R_LIBS_SITE")))))
14921 #t)))))
14922 (inputs
14923 `(("htslib" ,htslib)
14924 ("r-minimal" ,r-minimal)
14925 ("r-circlize" ,r-circlize)
14926 ("r-genomicalignments" ,r-genomicalignments)
14927 ("r-genomicranges" ,r-genomicranges)
14928 ("samtools" ,samtools)
14929 ("star" ,star)
14930 ("zlib" ,zlib)))
14931 (home-page "https://github.com/suhrig/arriba")
14932 (synopsis "Gene fusion detection from RNA-Seq data ")
14933 (description
14934 "Arriba is a command-line tool for the detection of gene fusions from
14935 RNA-Seq data. It was developed for the use in a clinical research setting.
14936 Therefore, short runtimes and high sensitivity were important design criteria.
14937 It is based on the fast STAR aligner and the post-alignment runtime is
14938 typically just around two minutes. In contrast to many other fusion detection
14939 tools which build on STAR, Arriba does not require to reduce the
14940 @code{alignIntronMax} parameter of STAR to detect small deletions.")
14941 ;; All code is under the Expat license with the exception of
14942 ;; "draw_fusions.R", which is under GPLv3.
14943 (license (list license:expat license:gpl3))))
14944
14945 (define-public adapterremoval
14946 (package
14947 (name "adapterremoval")
14948 (version "2.3.0")
14949 (source
14950 (origin
14951 (method git-fetch)
14952 (uri (git-reference
14953 (url "https://github.com/MikkelSchubert/adapterremoval")
14954 (commit (string-append "v" version))))
14955 (file-name (git-file-name name version))
14956 (sha256
14957 (base32
14958 "1nf3ki5pfzalhrx2fr1y6pfqfi133yj2m7q4fj9irf5fb94bapwr"))))
14959 (build-system gnu-build-system)
14960 (arguments
14961 `(#:make-flags (list "COLOR_BUILD=no"
14962 (string-append "PREFIX="
14963 (assoc-ref %outputs "out")))
14964 #:test-target "test"
14965 #:phases
14966 (modify-phases %standard-phases
14967 (delete 'configure))))
14968 (inputs
14969 `(("zlib" ,zlib)))
14970 (home-page "https://adapterremoval.readthedocs.io/")
14971 (synopsis "Rapid sequence adapter trimming, identification, and read merging")
14972 (description
14973 "This program searches for and removes remnant adapter sequences from
14974 @dfn{High-Throughput Sequencing} (HTS) data and (optionally) trims low quality
14975 bases from the 3' end of reads following adapter removal. AdapterRemoval can
14976 analyze both single end and paired end data, and can be used to merge
14977 overlapping paired-ended reads into (longer) consensus sequences.
14978 Additionally, the AdapterRemoval may be used to recover a consensus adapter
14979 sequence for paired-ended data, for which this information is not available.")
14980 (license license:gpl3+)))
14981
14982 (define-public pplacer
14983 (let ((commit "807f6f3"))
14984 (package
14985 (name "pplacer")
14986 ;; The commit should be updated with each version change.
14987 (version "1.1.alpha19")
14988 (source
14989 (origin
14990 (method git-fetch)
14991 (uri (git-reference
14992 (url "https://github.com/matsen/pplacer")
14993 (commit (string-append "v" version))))
14994 (file-name (git-file-name name version))
14995 (sha256
14996 (base32 "11ppbbbx20p2g9wj3ff64dhnarb12q79v7qh4rk0gj6lkbz4n7cn"))))
14997 (build-system ocaml-build-system)
14998 (arguments
14999 `(#:modules ((guix build ocaml-build-system)
15000 (guix build utils)
15001 (ice-9 ftw))
15002 #:phases
15003 (modify-phases %standard-phases
15004 (delete 'configure)
15005 (add-after 'unpack 'fix-build-with-latest-ocaml
15006 (lambda _
15007 (substitute* "myocamlbuild.ml"
15008 (("dep \\[\"c_pam\"\\]" m)
15009 (string-append "flag [\"ocaml\"; \"compile\"] (A \"-unsafe-string\");\n"
15010 m))
15011 (("let run_and_read" m)
15012 (string-append "
15013 let split s ch =
15014 let x = ref [] in
15015 let rec go s =
15016 let pos = String.index s ch in
15017 x := (String.before s pos)::!x;
15018 go (String.after s (pos + 1))
15019 in
15020 try go s
15021 with Not_found -> !x
15022 let split_nl s = split s '\\n'
15023 let before_space s =
15024 try String.before s (String.index s ' ')
15025 with Not_found -> s
15026
15027 " m))
15028 (("run_and_read \"ocamlfind list \\| cut -d' ' -f1\"" m)
15029 (string-append "List.map before_space (split_nl & " m ")"))
15030 ((" blank_sep_strings &") "")
15031 ((" Lexing.from_string &") ""))
15032 #t))
15033 (add-after 'unpack 'replace-bundled-cddlib
15034 (lambda* (#:key inputs #:allow-other-keys)
15035 (let* ((cddlib-src (assoc-ref inputs "cddlib-src"))
15036 (local-dir "cddlib_guix"))
15037 (mkdir local-dir)
15038 (with-directory-excursion local-dir
15039 (invoke "tar" "xvf" cddlib-src))
15040 (let ((cddlib-src-folder
15041 (string-append local-dir "/"
15042 (list-ref (scandir local-dir) 2)
15043 "/lib-src")))
15044 (for-each make-file-writable (find-files "cdd_src" ".*"))
15045 (for-each
15046 (lambda (file)
15047 (copy-file file
15048 (string-append "cdd_src/" (basename file))))
15049 (find-files cddlib-src-folder ".*[ch]$")))
15050 #t)))
15051 (add-after 'unpack 'fix-makefile
15052 (lambda _
15053 ;; Remove system calls to 'git'.
15054 (substitute* "Makefile"
15055 (("^DESCRIPT:=pplacer-.*")
15056 (string-append
15057 "DESCRIPT:=pplacer-$(shell uname)-v" ,version "\n")))
15058 (substitute* "myocamlbuild.ml"
15059 (("git describe --tags --long .*\\\" with")
15060 (string-append
15061 "echo -n v" ,version "-" ,commit "\" with")))
15062 #t))
15063 (replace 'install
15064 (lambda* (#:key outputs #:allow-other-keys)
15065 (let* ((out (assoc-ref outputs "out"))
15066 (bin (string-append out "/bin")))
15067 (copy-recursively "bin" bin))
15068 #t)))
15069 #:ocaml ,ocaml-4.07
15070 #:findlib ,ocaml4.07-findlib))
15071 (inputs
15072 `(("zlib" ,zlib "static")
15073 ("gsl" ,gsl)
15074 ("ocaml-ounit" ,(package-with-ocaml4.07 ocaml-ounit))
15075 ("ocaml-batteries" ,(package-with-ocaml4.07 ocaml-batteries))
15076 ("ocaml-camlzip" ,(package-with-ocaml4.07 camlzip))
15077 ("ocaml-csv" ,(package-with-ocaml4.07 ocaml-csv))
15078 ("ocaml-sqlite3" ,ocaml4.07-sqlite3)
15079 ("ocaml-xmlm" ,(package-with-ocaml4.07 ocaml-xmlm))
15080 ("ocaml-mcl" ,(package-with-ocaml4.07 ocaml-mcl))
15081 ("ocaml-gsl" ,ocaml4.07-gsl-1)))
15082 (native-inputs
15083 `(("cddlib-src" ,(package-source cddlib))
15084 ("ocamlbuild" ,(package-with-ocaml4.07 ocamlbuild))
15085 ("pkg-config" ,pkg-config)))
15086 (propagated-inputs
15087 `(("pplacer-scripts" ,pplacer-scripts)))
15088 (synopsis "Phylogenetic placement of biological sequences")
15089 (description
15090 "Pplacer places query sequences on a fixed reference phylogenetic tree
15091 to maximize phylogenetic likelihood or posterior probability according to a
15092 reference alignment. Pplacer is designed to be fast, to give useful
15093 information about uncertainty, and to offer advanced visualization and
15094 downstream analysis.")
15095 (home-page "https://matsen.fhcrc.org/pplacer/")
15096 (license license:gpl3))))
15097
15098 ;; This package is installed alongside 'pplacer'. It is a separate package so
15099 ;; that it can use the python-build-system for the scripts that are
15100 ;; distributed alongside the main OCaml binaries.
15101 (define pplacer-scripts
15102 (package
15103 (inherit pplacer)
15104 (name "pplacer-scripts")
15105 (build-system python-build-system)
15106 (arguments
15107 `(#:python ,python-2
15108 #:phases
15109 (modify-phases %standard-phases
15110 (add-after 'unpack 'enter-scripts-dir
15111 (lambda _ (chdir "scripts") #t))
15112 (replace 'check
15113 (lambda _ (invoke "python" "-m" "unittest" "discover" "-v") #t))
15114 (add-after 'install 'wrap-executables
15115 (lambda* (#:key inputs outputs #:allow-other-keys)
15116 (let* ((out (assoc-ref outputs "out"))
15117 (bin (string-append out "/bin")))
15118 (let ((path (string-append
15119 (assoc-ref inputs "hmmer") "/bin:"
15120 (assoc-ref inputs "infernal") "/bin")))
15121 (display path)
15122 (wrap-program (string-append bin "/refpkg_align.py")
15123 `("PATH" ":" prefix (,path))))
15124 (let ((path (string-append
15125 (assoc-ref inputs "hmmer") "/bin")))
15126 (wrap-program (string-append bin "/hrefpkg_query.py")
15127 `("PATH" ":" prefix (,path)))))
15128 #t)))))
15129 (inputs
15130 `(("infernal" ,infernal)
15131 ("hmmer" ,hmmer)))
15132 (propagated-inputs
15133 `(("python-biopython" ,python2-biopython)
15134 ("taxtastic" ,taxtastic)))
15135 (synopsis "Pplacer Python scripts")))
15136
15137 (define-public python2-checkm-genome
15138 (package
15139 (name "python2-checkm-genome")
15140 (version "1.0.13")
15141 (source
15142 (origin
15143 (method url-fetch)
15144 (uri (pypi-uri "checkm-genome" version))
15145 (sha256
15146 (base32
15147 "0bm8gpxjmzxsxxl8lzwqhgx8g1dlnmp6znz7wv3hgb0gdjbf9dzz"))))
15148 (build-system python-build-system)
15149 (arguments
15150 `(#:python ,python-2
15151 #:tests? #f)) ; some tests are interactive
15152 (propagated-inputs
15153 `(("python-dendropy" ,python2-dendropy)
15154 ("python-matplotlib" ,python2-matplotlib)
15155 ("python-numpy" ,python2-numpy)
15156 ("python-pysam" ,python2-pysam)
15157 ("python-scipy" ,python2-scipy)))
15158 (home-page "https://pypi.org/project/Checkm/")
15159 (synopsis "Assess the quality of putative genome bins")
15160 (description
15161 "CheckM provides a set of tools for assessing the quality of genomes
15162 recovered from isolates, single cells, or metagenomes. It provides robust
15163 estimates of genome completeness and contamination by using collocated sets of
15164 genes that are ubiquitous and single-copy within a phylogenetic lineage.
15165 Assessment of genome quality can also be examined using plots depicting key
15166 genomic characteristics (e.g., GC, coding density) which highlight sequences
15167 outside the expected distributions of a typical genome. CheckM also provides
15168 tools for identifying genome bins that are likely candidates for merging based
15169 on marker set compatibility, similarity in genomic characteristics, and
15170 proximity within a reference genome.")
15171 (license license:gpl3+)))
15172
15173 (define-public umi-tools
15174 (package
15175 (name "umi-tools")
15176 (version "1.0.0")
15177 (source
15178 (origin
15179 (method url-fetch)
15180 (uri (pypi-uri "umi_tools" version))
15181 (sha256
15182 (base32
15183 "08y3vz1vcx09whmbsn722lcs6jl9wyrh9i4p3k8j4cb1i32bij4a"))))
15184 (build-system python-build-system)
15185 (inputs
15186 `(("python-pandas" ,python-pandas)
15187 ("python-future" ,python-future)
15188 ("python-scipy" ,python-scipy)
15189 ("python-matplotlib" ,python-matplotlib)
15190 ("python-regex" ,python-regex)
15191 ("python-pysam" ,python-pysam)))
15192 (native-inputs
15193 `(("python-cython" ,python-cython)))
15194 (home-page "https://github.com/CGATOxford/UMI-tools")
15195 (synopsis "Tools for analyzing unique modular identifiers")
15196 (description "This package provides tools for dealing with @dfn{Unique
15197 Molecular Identifiers} (UMIs) and @dfn{Random Molecular Tags} (RMTs) in
15198 genetic sequences. There are six tools: the @code{extract} and
15199 @code{whitelist} commands are used to prepare a fastq containing UMIs @code{+/-}
15200 cell barcodes for alignment. The remaining commands, @code{group},
15201 @code{dedup}, and @{count}/@code{count_tab}, are used to identify PCR
15202 duplicates using the UMIs and perform different levels of analysis depending
15203 on the needs of the user.")
15204 (license license:expat)))
15205
15206 (define-public ataqv
15207 (package
15208 (name "ataqv")
15209 (version "1.0.0")
15210 (source
15211 (origin
15212 (method git-fetch)
15213 (uri (git-reference
15214 (url "https://github.com/ParkerLab/ataqv")
15215 (commit version)))
15216 (file-name (git-file-name name version))
15217 (sha256
15218 (base32
15219 "031xr6jx1aprh26y5b1lv3gzrlmzg4alfl73vvshymx8cq8asrqi"))))
15220 (build-system gnu-build-system)
15221 (arguments
15222 `(#:make-flags
15223 (list (string-append "prefix=" (assoc-ref %outputs "out"))
15224 (string-append "BOOST_ROOT="
15225 (assoc-ref %build-inputs "boost"))
15226 (string-append "HTSLIB_ROOT="
15227 (assoc-ref %build-inputs "htslib")))
15228 #:test-target "test"
15229 #:phases
15230 (modify-phases %standard-phases
15231 (delete 'configure))))
15232 (inputs
15233 `(("boost" ,boost)
15234 ("htslib" ,htslib)
15235 ("ncurses" ,ncurses)
15236 ("zlib" ,zlib)))
15237 (native-inputs
15238 `(("lcov" ,lcov)))
15239 (home-page "https://github.com/ParkerLab/ataqv")
15240 (synopsis "Toolkit for quality control and visualization of ATAC-seq data")
15241 (description "This package provides a toolkit for measuring and comparing
15242 ATAC-seq results. It was written to make it easier to spot differences that
15243 might be caused by ATAC-seq library prep or sequencing. The main program,
15244 @code{ataqv}, examines aligned reads and reports some basic metrics.")
15245 (license license:gpl3+)))
15246
15247 (define-public r-psiplot
15248 (package
15249 (name "r-psiplot")
15250 (version "2.3.0")
15251 (source
15252 (origin
15253 (method git-fetch)
15254 (uri (git-reference
15255 (url "https://github.com/kcha/psiplot")
15256 (commit (string-append "v" version))))
15257 (file-name (git-file-name name version))
15258 (sha256
15259 (base32 "08438h16cfry5kqh3y9hs8q1b1a8bxhblsm75knviz5r6q0n1jxh"))))
15260 (build-system r-build-system)
15261 (propagated-inputs
15262 `(("r-mass" ,r-mass)
15263 ("r-dplyr" ,r-dplyr)
15264 ("r-tidyr" ,r-tidyr)
15265 ("r-purrr" ,r-purrr)
15266 ("r-readr" ,r-readr)
15267 ("r-magrittr" ,r-magrittr)
15268 ("r-ggplot2" ,r-ggplot2)))
15269 (home-page "https://github.com/kcha/psiplot")
15270 (synopsis "Plot percent spliced-in values of alternatively-spliced exons")
15271 (description
15272 "PSIplot is an R package for generating plots of @dfn{percent
15273 spliced-in} (PSI) values of alternatively-spliced exons that were computed by
15274 vast-tools, an RNA-Seq pipeline for alternative splicing analysis. The plots
15275 are generated using @code{ggplot2}.")
15276 (license license:expat)))
15277
15278 (define-public python-ont-fast5-api
15279 (package
15280 (name "python-ont-fast5-api")
15281 (version "1.4.4")
15282 (source
15283 (origin
15284 (method git-fetch)
15285 (uri (git-reference
15286 (url "https://github.com/nanoporetech/ont_fast5_api")
15287 (commit (string-append "release_" version))))
15288 (file-name (git-file-name name version))
15289 (sha256
15290 (base32
15291 "03cbq4zbbwhll8ml2m9k8sa31mirsvcbjkrq1yna0kkzz9fad5fm"))))
15292 (build-system python-build-system)
15293 (propagated-inputs
15294 `(("python-numpy" ,python-numpy)
15295 ("python-six" ,python-six)
15296 ("python-h5py" ,python-h5py)
15297 ("python-progressbar33" ,python-progressbar33)))
15298 (home-page "https://github.com/nanoporetech/ont_fast5_api")
15299 (synopsis "Interface to HDF5 files of the Oxford Nanopore fast5 file format")
15300 (description
15301 "This package provides a concrete implementation of the fast5 file schema
15302 using the generic @code{h5py} library, plain-named methods to interact with
15303 and reflect the fast5 file schema, and tools to convert between
15304 @code{multi_read} and @code{single_read} formats.")
15305 (license license:mpl2.0)))
15306
15307 (define-public tbsp
15308 (let ((commit "ec8fff4410cfb13a677dbbb95cbbc60217e64907")
15309 (revision "1"))
15310 (package
15311 (name "tbsp")
15312 (version (git-version "1.0.0" revision commit))
15313 (source
15314 (origin
15315 (method git-fetch)
15316 (uri (git-reference
15317 (url "https://github.com/phoenixding/tbsp")
15318 (commit commit)))
15319 (file-name (git-file-name name version))
15320 (sha256
15321 (base32
15322 "025ym14x8gbd6hb55lsinqj6f5qzw36i10klgs7ldzxxd7s39ki1"))))
15323 (build-system python-build-system)
15324 (arguments '(#:tests? #f)) ; no tests included
15325 (inputs
15326 `(("python-matplotlib" ,python-matplotlib)
15327 ("python-networkx" ,python-networkx)
15328 ("python-numpy" ,python-numpy)
15329 ("python-pybigwig" ,python-pybigwig)
15330 ("python-biopython" ,python-biopython)
15331 ("python-scikit-learn" ,python-scikit-learn)
15332 ("python-scipy" ,python-scipy)))
15333 (home-page "https://github.com/phoenixding/tbsp/")
15334 (synopsis "SNP-based trajectory inference")
15335 (description
15336 "Several studies focus on the inference of developmental and response
15337 trajectories from single cell RNA-Seq (scRNA-Seq) data. A number of
15338 computational methods, often referred to as pseudo-time ordering, have been
15339 developed for this task. CRISPR has also been used to reconstruct lineage
15340 trees by inserting random mutations. The tbsp package implements an
15341 alternative method to detect significant, cell type specific sequence
15342 mutations from scRNA-Seq data.")
15343 (license license:expat))))
15344
15345 (define-public tabixpp
15346 (package
15347 (name "tabixpp")
15348 (version "1.1.0")
15349 (source (origin
15350 (method git-fetch)
15351 (uri (git-reference
15352 (url "https://github.com/ekg/tabixpp")
15353 (commit (string-append "v" version))))
15354 (file-name (git-file-name name version))
15355 (sha256
15356 (base32 "1k2a3vbq96ic4lw72iwp5s3mwwc4xhdffjj584yn6l9637q9j1yd"))
15357 (modules '((guix build utils)))
15358 (snippet
15359 `(begin
15360 (delete-file-recursively "htslib") #t))))
15361 (build-system gnu-build-system)
15362 (inputs
15363 `(("htslib" ,htslib)
15364 ("zlib" ,zlib)))
15365 (arguments
15366 `(#:tests? #f ; There are no tests to run.
15367 #:phases
15368 (modify-phases %standard-phases
15369 (delete 'configure) ; There is no configure phase.
15370 ;; The build phase needs overriding the location of htslib.
15371 (replace 'build
15372 (lambda* (#:key inputs #:allow-other-keys)
15373 (let ((htslib-ref (assoc-ref inputs "htslib")))
15374 (invoke "make"
15375 (string-append "HTS_LIB=" htslib-ref "/lib/libhts.a")
15376 (string-append "INCLUDES= -I" htslib-ref "/include/htslib")
15377 "HTS_HEADERS=" ; No need to check for headers here.
15378 (string-append "LIBPATH=-L. -L" htslib-ref "/include")))))
15379 (replace 'install
15380 (lambda* (#:key outputs #:allow-other-keys)
15381 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15382 (install-file "tabix++" bin))
15383 #t)))))
15384 (home-page "https://github.com/ekg/tabixpp")
15385 (synopsis "C++ wrapper around tabix project")
15386 (description "This is a C++ wrapper around the Tabix project which abstracts
15387 some of the details of opening and jumping in tabix-indexed files.")
15388 (license license:expat)))
15389
15390 (define-public smithwaterman
15391 (let ((commit "2610e259611ae4cde8f03c72499d28f03f6d38a7"))
15392 (package
15393 (name "smithwaterman")
15394 (version (git-version "0.0.0" "2" commit))
15395 (source (origin
15396 (method git-fetch)
15397 (uri (git-reference
15398 (url "https://github.com/ekg/smithwaterman/")
15399 (commit commit)))
15400 (file-name (git-file-name name version))
15401 (sha256
15402 (base32 "0i9d8zrxpiracw3mxzd9siybpy62p06rqz9mc2w93arajgbk45bs"))))
15403 (build-system gnu-build-system)
15404 (arguments
15405 `(#:tests? #f ; There are no tests to run.
15406 #:make-flags '("libsw.a" "all")
15407 #:phases
15408 (modify-phases %standard-phases
15409 (delete 'configure) ; There is no configure phase.
15410 (replace 'install
15411 (lambda* (#:key outputs #:allow-other-keys)
15412 (let* ((out (assoc-ref outputs "out"))
15413 (bin (string-append out "/bin"))
15414 (lib (string-append out "/lib")))
15415 (install-file "smithwaterman" bin)
15416 (install-file "libsw.a" lib))
15417 #t)))))
15418 (home-page "https://github.com/ekg/smithwaterman")
15419 (synopsis "Implementation of the Smith-Waterman algorithm")
15420 (description "Implementation of the Smith-Waterman algorithm.")
15421 ;; The licensing terms are unclear: https://github.com/ekg/smithwaterman/issues/9.
15422 (license (list license:gpl2 license:expat)))))
15423
15424 (define-public multichoose
15425 (package
15426 (name "multichoose")
15427 (version "1.0.3")
15428 (source (origin
15429 (method git-fetch)
15430 (uri (git-reference
15431 (url "https://github.com/ekg/multichoose/")
15432 (commit (string-append "v" version))))
15433 (file-name (git-file-name name version))
15434 (sha256
15435 (base32 "0ci5fqvmpamwgxvmyd79ygj6n3bnbl3vc7b6h1sxz58186sm3pfs"))))
15436 (build-system gnu-build-system)
15437 (arguments
15438 `(#:tests? #f ; Tests require node.
15439 #:phases
15440 (modify-phases %standard-phases
15441 (delete 'configure) ; There is no configure phase.
15442 (replace 'install
15443 (lambda* (#:key outputs #:allow-other-keys)
15444 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15445 ;; TODO: There are Python modules for these programs too.
15446 (install-file "multichoose" bin)
15447 (install-file "multipermute" bin))
15448 #t)))))
15449 (home-page "https://github.com/ekg/multichoose")
15450 (synopsis "Efficient loopless multiset combination generation algorithm")
15451 (description "This library implements an efficient loopless multiset
15452 combination generation algorithm which is (approximately) described in
15453 \"Loopless algorithms for generating permutations, combinations, and other
15454 combinatorial configurations.\", G. Ehrlich - Journal of the ACM (JACM),
15455 1973. (Algorithm 7.)")
15456 (license license:expat)))
15457
15458 (define-public fsom
15459 (let ((commit "a6ef318fbd347c53189384aef7f670c0e6ce89a3"))
15460 (package
15461 (name "fsom")
15462 (version (git-version "0.0.0" "1" commit))
15463 (source (origin
15464 (method git-fetch)
15465 (uri (git-reference
15466 (url "https://github.com/ekg/fsom/")
15467 (commit commit)))
15468 (file-name (git-file-name name version))
15469 (sha256
15470 (base32 "0gw1lpvr812pywg9y546x0h1hhj261xwls41r6kqhddjlrcjc0pi"))))
15471 (build-system gnu-build-system)
15472 (arguments
15473 `(#:tests? #f ; There are no tests to run.
15474 #:phases
15475 (modify-phases %standard-phases
15476 (delete 'configure) ; There is no configure phase.
15477 (replace 'install
15478 (lambda* (#:key outputs #:allow-other-keys)
15479 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15480 (install-file "fsom" bin))
15481 #t)))))
15482 (home-page "https://github.com/ekg/fsom")
15483 (synopsis "Manage SOM (Self-Organizing Maps) neural networks")
15484 (description "A tiny C library for managing SOM (Self-Organizing Maps)
15485 neural networks.")
15486 (license license:gpl3))))
15487
15488 (define-public fastahack
15489 (package
15490 (name "fastahack")
15491 (version "1.0.0")
15492 (source (origin
15493 (method git-fetch)
15494 (uri (git-reference
15495 (url "https://github.com/ekg/fastahack/")
15496 (commit (string-append "v" version))))
15497 (file-name (git-file-name name version))
15498 (sha256
15499 (base32 "0rp1blskhzxf7vbh253ibpxbgl9wwgyzf1wbkxndi08d3j4vcss9"))))
15500 (build-system gnu-build-system)
15501 (arguments
15502 `(#:tests? #f ; Unclear how to run tests: https://github.com/ekg/fastahack/issues/15
15503 #:phases
15504 (modify-phases %standard-phases
15505 (delete 'configure) ; There is no configure phase.
15506 (replace 'install
15507 (lambda* (#:key outputs #:allow-other-keys)
15508 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15509 (install-file "fastahack" bin))
15510 #t)))))
15511 (home-page "https://github.com/ekg/fastahack")
15512 (synopsis "Indexing and sequence extraction from FASTA files")
15513 (description "Fastahack is a small application for indexing and
15514 extracting sequences and subsequences from FASTA files. The included library
15515 provides a FASTA reader and indexer that can be embedded into applications
15516 which would benefit from directly reading subsequences from FASTA files. The
15517 library automatically handles index file generation and use.")
15518 (license (list license:expat license:gpl2))))
15519
15520 (define-public vcflib
15521 (package
15522 (name "vcflib")
15523 (version "1.0.1")
15524 (source
15525 (origin
15526 (method url-fetch)
15527 (uri (string-append "https://github.com/vcflib/vcflib/releases/"
15528 "download/v" version
15529 "/vcflib-" version "-src.tar.gz"))
15530 (sha256
15531 (base32 "14zzrg8hg8cq9cvq2wdvp21j7nmxxkjrbagw2apd2yqv2kyx42lm"))
15532 (modules '((guix build utils)))
15533 (snippet
15534 `(begin
15535 (for-each delete-file-recursively
15536 '("fastahack" "filevercmp" "fsom" "googletest" "intervaltree"
15537 "libVCFH" "multichoose" "smithwaterman" "tabixpp"))
15538 #t))))
15539 (build-system gnu-build-system)
15540 (inputs
15541 `(("htslib" ,htslib)
15542 ("perl" ,perl)
15543 ("python" ,python)
15544 ("zlib" ,zlib)))
15545 (native-inputs
15546 `(;; Submodules.
15547 ;; This package builds against the .o files so we need to extract the source.
15548 ("fastahack-src" ,(package-source fastahack))
15549 ("filevercmp-src" ,(package-source filevercmp))
15550 ("fsom-src" ,(package-source fsom))
15551 ("intervaltree-src" ,(package-source intervaltree))
15552 ("multichoose-src" ,(package-source multichoose))
15553 ("smithwaterman-src" ,(package-source smithwaterman))
15554 ("tabixpp-src" ,(package-source tabixpp))))
15555 (arguments
15556 `(#:tests? #f ; no tests
15557 #:make-flags (list (string-append "HTS_LIB="
15558 (assoc-ref %build-inputs "htslib")
15559 "/lib/libhts.a")
15560 (string-append "HTS_INCLUDES= -I"
15561 (assoc-ref %build-inputs "htslib")
15562 "/include/htslib")
15563 (string-append "HTS_LDFLAGS= -L"
15564 (assoc-ref %build-inputs "htslib")
15565 "/include/htslib" " -lhts"))
15566 #:phases
15567 (modify-phases %standard-phases
15568 (delete 'configure)
15569 (delete 'check)
15570 (add-after 'unpack 'unpack-submodule-sources
15571 (lambda* (#:key inputs #:allow-other-keys)
15572 (let ((unpack (lambda (source target)
15573 (mkdir target)
15574 (with-directory-excursion target
15575 (if (file-is-directory? (assoc-ref inputs source))
15576 (copy-recursively (assoc-ref inputs source) ".")
15577 (invoke "tar" "xvf"
15578 (assoc-ref inputs source)
15579 "--strip-components=1"))))))
15580 (and
15581 (unpack "fastahack-src" "fastahack")
15582 (unpack "filevercmp-src" "filevercmp")
15583 (unpack "fsom-src" "fsom")
15584 (unpack "intervaltree-src" "intervaltree")
15585 (unpack "multichoose-src" "multichoose")
15586 (unpack "smithwaterman-src" "smithwaterman")
15587 (unpack "tabixpp-src" "tabixpp")))))
15588 (replace 'build
15589 (lambda* (#:key inputs make-flags #:allow-other-keys)
15590 (let ((htslib (assoc-ref inputs "htslib")))
15591 (with-directory-excursion "tabixpp"
15592 (substitute* "Makefile"
15593 (("-Ihtslib") (string-append "-I" htslib "/include/htslib"))
15594 (("-Lhtslib") (string-append "-L" htslib "/lib/htslib"))
15595 (("htslib/htslib") (string-append htslib "/include/htslib")))
15596 (invoke "make"
15597 (string-append "HTS_LIB=" htslib "/lib/libhts.a")))
15598 (apply invoke "make" "CC=gcc" "CFLAGS=-Itabixpp" make-flags))))
15599 (replace 'install
15600 (lambda* (#:key outputs #:allow-other-keys)
15601 (let ((bin (string-append (assoc-ref outputs "out") "/bin"))
15602 (lib (string-append (assoc-ref outputs "out") "/lib")))
15603 (for-each (lambda (file)
15604 (install-file file bin))
15605 (find-files "bin" ".*"))
15606 ;; The header files in src/ do not interface libvcflib,
15607 ;; therefore they are left out.
15608 (install-file "libvcflib.a" lib))
15609 #t)))))
15610 (home-page "https://github.com/vcflib/vcflib/")
15611 (synopsis "Library for parsing and manipulating VCF files")
15612 (description "Vcflib provides methods to manipulate and interpret
15613 sequence variation as it can be described by VCF. It is both an API for parsing
15614 and operating on records of genomic variation as it can be described by the VCF
15615 format, and a collection of command-line utilities for executing complex
15616 manipulations on VCF files.")
15617 (license license:expat)))
15618
15619 (define-public freebayes
15620 (let ((commit "3ce827d8ebf89bb3bdc097ee0fe7f46f9f30d5fb")
15621 (revision "1")
15622 (version "1.0.2"))
15623 (package
15624 (name "freebayes")
15625 (version (git-version version revision commit))
15626 (source (origin
15627 (method git-fetch)
15628 (uri (git-reference
15629 (url "https://github.com/ekg/freebayes")
15630 (commit commit)))
15631 (file-name (git-file-name name version))
15632 (sha256
15633 (base32 "1sbzwmcbn78ybymjnhwk7qc5r912azy5vqz2y7y81616yc3ba2a2"))))
15634 (build-system gnu-build-system)
15635 (inputs
15636 `(("bamtools" ,bamtools)
15637 ("htslib" ,htslib)
15638 ("zlib" ,zlib)))
15639 (native-inputs
15640 `(("bc" ,bc) ; Needed for running tests.
15641 ("samtools" ,samtools) ; Needed for running tests.
15642 ("parallel" ,parallel) ; Needed for running tests.
15643 ("perl" ,perl) ; Needed for running tests.
15644 ("procps" ,procps) ; Needed for running tests.
15645 ("python" ,python-2) ; Needed for running tests.
15646 ("vcflib-src" ,(package-source vcflib))
15647 ;; These are submodules for the vcflib version used in freebayes.
15648 ;; This package builds against the .o files so we need to extract the source.
15649 ("tabixpp-src" ,(package-source tabixpp))
15650 ("smithwaterman-src" ,(package-source smithwaterman))
15651 ("multichoose-src" ,(package-source multichoose))
15652 ("fsom-src" ,(package-source fsom))
15653 ("filevercmp-src" ,(package-source filevercmp))
15654 ("fastahack-src" ,(package-source fastahack))
15655 ("intervaltree-src" ,(package-source intervaltree))
15656 ;; These submodules are needed to run the tests.
15657 ("bash-tap-src" ,(package-source bash-tap))
15658 ("test-simple-bash-src"
15659 ,(origin
15660 (method git-fetch)
15661 (uri (git-reference
15662 (url "https://github.com/ingydotnet/test-simple-bash/")
15663 (commit "124673ff204b01c8e96b7fc9f9b32ee35d898acc")))
15664 (file-name "test-simple-bash-src-checkout")
15665 (sha256
15666 (base32 "043plp6z0x9yf7mdpky1fw7zcpwn1p47px95w9mh16603zqqqpga"))))))
15667 (arguments
15668 `(#:make-flags
15669 (list "CC=gcc"
15670 (string-append "BAMTOOLS_ROOT="
15671 (assoc-ref %build-inputs "bamtools")))
15672 #:test-target "test"
15673 #:phases
15674 (modify-phases %standard-phases
15675 (delete 'configure)
15676 (add-after 'unpack 'fix-tests
15677 (lambda _
15678 (substitute* "test/t/01_call_variants.t"
15679 (("grep -P \"\\(\\\\t500\\$\\|\\\\t11000\\$\\|\\\\t1000\\$\\)\"")
15680 "grep -E ' (500|11000|1000)$'"))
15681 #t))
15682 (add-after 'unpack 'unpack-submodule-sources
15683 (lambda* (#:key inputs #:allow-other-keys)
15684 (let ((unpack (lambda (source target)
15685 (with-directory-excursion target
15686 (if (file-is-directory? (assoc-ref inputs source))
15687 (copy-recursively (assoc-ref inputs source) ".")
15688 (invoke "tar" "xvf"
15689 (assoc-ref inputs source)
15690 "--strip-components=1"))))))
15691 (and
15692 (unpack "vcflib-src" "vcflib")
15693 (unpack "fastahack-src" "vcflib/fastahack")
15694 (unpack "filevercmp-src" "vcflib/filevercmp")
15695 (unpack "fsom-src" "vcflib/fsom")
15696 (unpack "intervaltree-src" "vcflib/intervaltree")
15697 (unpack "multichoose-src" "vcflib/multichoose")
15698 (unpack "smithwaterman-src" "vcflib/smithwaterman")
15699 (unpack "tabixpp-src" "vcflib/tabixpp")
15700 (unpack "test-simple-bash-src" "test/test-simple-bash")
15701 (unpack "bash-tap-src" "test/bash-tap")))))
15702 (add-after 'unpack-submodule-sources 'fix-makefiles
15703 (lambda _
15704 ;; We don't have the .git folder to get the version tag from.
15705 (substitute* "vcflib/Makefile"
15706 (("^GIT_VERSION.*")
15707 (string-append "GIT_VERSION = v" ,version)))
15708 (substitute* "src/Makefile"
15709 (("-I\\$\\(BAMTOOLS_ROOT\\)/src")
15710 "-I$(BAMTOOLS_ROOT)/include/bamtools"))
15711 #t))
15712 (add-before 'build 'build-tabixpp-and-vcflib
15713 (lambda* (#:key inputs make-flags #:allow-other-keys)
15714 (with-directory-excursion "vcflib"
15715 (with-directory-excursion "tabixpp"
15716 (apply invoke "make"
15717 (string-append "HTS_LIB="
15718 (assoc-ref inputs "htslib")
15719 "/lib/libhts.a")
15720 make-flags))
15721 (apply invoke "make"
15722 (string-append "CFLAGS=-Itabixpp")
15723 "all"
15724 make-flags))))
15725 (replace 'install
15726 (lambda* (#:key outputs #:allow-other-keys)
15727 (let ((bin (string-append (assoc-ref outputs "out") "/bin")))
15728 (install-file "bin/freebayes" bin)
15729 (install-file "bin/bamleftalign" bin))
15730 #t)))))
15731 (home-page "https://github.com/ekg/freebayes")
15732 (synopsis "Haplotype-based variant detector")
15733 (description "FreeBayes is a Bayesian genetic variant detector designed to
15734 find small polymorphisms, specifically SNPs (single-nucleotide polymorphisms),
15735 indels (insertions and deletions), MNPs (multi-nucleotide polymorphisms), and
15736 complex events (composite insertion and substitution events) smaller than the
15737 length of a short-read sequencing alignment.")
15738 (license license:expat))))
15739
15740 (define-public samblaster
15741 (package
15742 (name "samblaster")
15743 (version "0.1.24")
15744 (source (origin
15745 (method git-fetch)
15746 (uri (git-reference
15747 (url "https://github.com/GregoryFaust/samblaster")
15748 (commit (string-append "v." version))))
15749 (file-name (git-file-name name version))
15750 (sha256
15751 (base32
15752 "0iv2ddfw8363vb2x8gr3p8g88whb6mb9m0pf71i2cqsbv6jghap7"))))
15753 (build-system gnu-build-system)
15754 (arguments
15755 `(#:tests? #f ; there are none
15756 #:phases
15757 (modify-phases %standard-phases
15758 (delete 'configure) ; There is no configure phase.
15759 (replace 'install
15760 (lambda* (#:key outputs #:allow-other-keys)
15761 (install-file "samblaster"
15762 (string-append (assoc-ref outputs "out") "/bin"))
15763 #t)))))
15764 (home-page "https://github.com/GregoryFaust/samblaster")
15765 (synopsis "Mark duplicates in paired-end SAM files")
15766 (description "Samblaster is a fast and flexible program for marking
15767 duplicates in read-id grouped paired-end SAM files. It can also optionally
15768 output discordant read pairs and/or split read mappings to separate SAM files,
15769 and/or unmapped/clipped reads to a separate FASTQ file. When marking
15770 duplicates, samblaster will require approximately 20MB of memory per 1M read
15771 pairs.")
15772 (license license:expat)))
15773
15774 (define-public r-velocyto
15775 (let ((commit "d7790346cb99f49ab9c2b23ba70dcf9d2c9fc350")
15776 (revision "1"))
15777 (package
15778 (name "r-velocyto")
15779 (version (git-version "0.6" revision commit))
15780 (source
15781 (origin
15782 (method git-fetch)
15783 (uri (git-reference
15784 (url "https://github.com/velocyto-team/velocyto.R")
15785 (commit commit)))
15786 (file-name (git-file-name name version))
15787 (sha256
15788 (base32
15789 "16wqf70j7rd7pay2q513iyz12i8n9vrpg1bisah4lddbcpx5dz1n"))))
15790 (build-system r-build-system)
15791 (inputs
15792 `(("boost" ,boost)))
15793 (propagated-inputs
15794 `(("r-hdf5r" ,r-hdf5r)
15795 ("r-mass" ,r-mass)
15796 ("r-mgcv" ,r-mgcv)
15797 ("r-pcamethods" ,r-pcamethods)
15798 ("r-rcpp" ,r-rcpp)
15799 ("r-rcpparmadillo" ,r-rcpparmadillo)
15800 ;; Suggested packages
15801 ("r-rtsne" ,r-rtsne)
15802 ("r-cluster" ,r-cluster)
15803 ("r-abind" ,r-abind)
15804 ("r-h5" ,r-h5)
15805 ("r-biocgenerics" ,r-biocgenerics)
15806 ("r-genomicalignments" ,r-genomicalignments)
15807 ("r-rsamtools" ,r-rsamtools)
15808 ("r-edger" ,r-edger)
15809 ("r-igraph" ,r-igraph)))
15810 (home-page "https://velocyto.org")
15811 (synopsis "RNA velocity estimation in R")
15812 (description
15813 "This package provides basic routines for estimation of gene-specific
15814 transcriptional derivatives and visualization of the resulting velocity
15815 patterns.")
15816 (license license:gpl3))))
15817
15818 (define-public methyldackel
15819 (package
15820 (name "methyldackel")
15821 (version "0.4.0")
15822 (source (origin
15823 (method git-fetch)
15824 (uri (git-reference
15825 (url "https://github.com/dpryan79/MethylDackel")
15826 (commit version)))
15827 (file-name (git-file-name name version))
15828 (sha256
15829 (base32
15830 "10gh8k0ca92kywnrw5pkacq3g6r8s976s12k8jhp8g3g49q9a97g"))))
15831 (build-system gnu-build-system)
15832 (arguments
15833 `(#:test-target "test"
15834 #:make-flags
15835 (list "CC=gcc"
15836 (string-append "prefix="
15837 (assoc-ref %outputs "out") "/bin/"))
15838 #:phases
15839 (modify-phases %standard-phases
15840 (replace 'configure
15841 (lambda* (#:key outputs #:allow-other-keys)
15842 (substitute* "Makefile"
15843 (("install MethylDackel \\$\\(prefix\\)" match)
15844 (string-append "install -d $(prefix); " match)))
15845 #t)))))
15846 (inputs
15847 `(("htslib" ,htslib)
15848 ("zlib" ,zlib)))
15849 ;; Needed for tests
15850 (native-inputs
15851 `(("python" ,python-wrapper)))
15852 (home-page "https://github.com/dpryan79/MethylDackel")
15853 (synopsis "Universal methylation extractor for BS-seq experiments")
15854 (description
15855 "MethylDackel will process a coordinate-sorted and indexed BAM or CRAM
15856 file containing some form of BS-seq alignments and extract per-base
15857 methylation metrics from them. MethylDackel requires an indexed fasta file
15858 containing the reference genome as well.")
15859 ;; See https://github.com/dpryan79/MethylDackel/issues/85
15860 (license license:expat)))
15861
15862 (define-public python-gffutils
15863 ;; The latest release is older more than a year than the latest commit
15864 (let ((commit "4034c54600813b1402945e12faa91b3a53162cf1")
15865 (revision "1"))
15866 (package
15867 (name "python-gffutils")
15868 (version (git-version "0.9" revision commit))
15869 (source
15870 (origin
15871 (method git-fetch)
15872 (uri (git-reference
15873 (url "https://github.com/daler/gffutils")
15874 (commit commit)))
15875 (file-name (git-file-name name version))
15876 (sha256
15877 (base32
15878 "1rwafjdnbir5wnk0ap06ww4lra3p5frhy7mfs03rlldgfnwxymsn"))))
15879 (build-system python-build-system)
15880 (arguments
15881 `(#:phases
15882 (modify-phases %standard-phases
15883 (replace 'check
15884 (lambda _
15885 ;; Tests need to access the HOME directory
15886 (setenv "HOME" "/tmp")
15887 (invoke "nosetests" "-a" "!slow")))
15888 (add-after 'unpack 'make-gz-files-writable
15889 (lambda _
15890 (for-each make-file-writable
15891 (find-files "." "\\.gz"))
15892 #t)))))
15893 (propagated-inputs
15894 `(("python-argcomplete" ,python-argcomplete)
15895 ("python-argh" ,python-argh)
15896 ("python-biopython" ,python-biopython)
15897 ("python-pybedtools" ,python-pybedtools)
15898 ("python-pyfaidx" ,python-pyfaidx)
15899 ("python-simplejson" ,python-simplejson)
15900 ("python-six" ,python-six)))
15901 (native-inputs
15902 `(("python-nose" , python-nose)))
15903 (home-page "https://github.com/daler/gffutils")
15904 (synopsis "Tool for manipulation of GFF and GTF files")
15905 (description
15906 "python-gffutils is a Python package for working with and manipulating
15907 the GFF and GTF format files typically used for genomic annotations. The
15908 files are loaded into a SQLite database, allowing much more complex
15909 manipulation of hierarchical features (e.g., genes, transcripts, and exons)
15910 than is possible with plain-text methods alone.")
15911 (license license:expat))))
15912
15913 (define-public libsbml
15914 (package
15915 (name "libsbml")
15916 (version "5.18.0")
15917 (source (origin
15918 (method url-fetch)
15919 (uri (string-append "mirror://sourceforge/sbml/libsbml/"
15920 version "/stable/libSBML-"
15921 version "-core-src.tar.gz"))
15922 (sha256
15923 (base32
15924 "0slkagrk3nfi2qsksv6b1brj6zhx4bj4bkib2sdycvrcd10ql2lh"))))
15925 (build-system cmake-build-system)
15926 (arguments
15927 `(#:test-target "test"
15928 #:configure-flags
15929 (list "-DWITH_CHECK=ON"
15930 (string-append "-DLIBXML_LIBRARY="
15931 (assoc-ref %build-inputs "libxml2")
15932 "/lib/libxml2.so")
15933 (string-append "-DLIBXML_INCLUDE_DIR="
15934 (assoc-ref %build-inputs "libxml2")
15935 "/include/libxml2"))))
15936 (propagated-inputs
15937 `(("libxml2" ,libxml2)))
15938 (native-inputs
15939 `(("check" ,check)
15940 ("swig" ,swig)))
15941 (home-page "http://sbml.org/Software/libSBML")
15942 (synopsis "Process SBML files and data streams")
15943 (description "LibSBML is a library to help you read, write, manipulate,
15944 translate, and validate SBML files and data streams. The @dfn{Systems Biology
15945 Markup Language} (SBML) is an interchange format for computer models of
15946 biological processes. SBML is useful for models of metabolism, cell
15947 signaling, and more. It continues to be evolved and expanded by an
15948 international community.")
15949 (license license:lgpl2.1+)))
15950
15951 (define-public grocsvs
15952 ;; The last release is out of date and new features have been added.
15953 (let ((commit "ecd956a65093a0b2c41849050e4512d46fecea5d")
15954 (revision "1"))
15955 (package
15956 (name "grocsvs")
15957 (version (git-version "0.2.6.1" revision commit))
15958 (source (origin
15959 (method git-fetch)
15960 (uri (git-reference
15961 (url "https://github.com/grocsvs/grocsvs")
15962 (commit commit)))
15963 (file-name (git-file-name name version))
15964 (sha256
15965 (base32 "14505725gr7qxc17cxxf0k6lzcwmgi64pija4mwf29aw70qn35cc"))
15966 (patches (search-patches "grocsvs-dont-use-admiral.patch"))))
15967 (build-system python-build-system)
15968 (arguments
15969 `(#:tests? #f ; No test suite.
15970 #:python ,python-2)) ; Only python-2 supported.
15971 (inputs
15972 `(("python2-h5py" ,python2-h5py)
15973 ("python2-ipython-cluster-helper" ,python2-ipython-cluster-helper)
15974 ("python2-networkx" ,python2-networkx)
15975 ("python2-psutil" ,python2-psutil)
15976 ("python2-pandas" ,python2-pandas)
15977 ("python2-pybedtools" ,python2-pybedtools)
15978 ("python2-pyfaidx" ,python2-pyfaidx)
15979 ("python2-pygraphviz" ,python2-pygraphviz)
15980 ("python2-pysam" ,python2-pysam)
15981 ("python2-scipy" ,python2-scipy)))
15982 (home-page "https://github.com/grocsvs/grocsvs")
15983 (synopsis "Genome-wide reconstruction of complex structural variants")
15984 (description
15985 "@dfn{Genome-wide Reconstruction of Complex Structural Variants}
15986 (GROC-SVs) is a software pipeline for identifying large-scale structural
15987 variants, performing sequence assembly at the breakpoints, and reconstructing
15988 the complex structural variants using the long-fragment information from the
15989 10x Genomics platform.")
15990 (license license:expat))))